Commit 6d50bc71 authored by Axelle Loriot's avatar Axelle Loriot
Browse files

Auto-saving for axelle.loriot on branch master from commit 38f4ae17

parent 38f4ae17
Pipeline #90457 passed with stage
in 1 minute and 30 seconds
library(tidyverse)
rWSBIM2122::prepare_shell()
samples <- list.files("wsbim2122_data/count_data/", pattern = "*tsv.gz")
counts <- read_tsv(file = paste0("wsbim2122_data/count_data/", samples[1])) %>%
select(ends_with('.bam'))
for (sample in samples[-1]){
tmp <- read_tsv(file = paste0("wsbim2122_data/count_data/", sample)) %>%
select(ends_with('.bam'))
counts <- cbind(counts, tmp)
}
names(counts) <- sub(pattern = ".bam$", '', names(counts))
names(counts) <- sub(pattern = '../processed_data/bam/', '', names(counts))
Geneids <- read_tsv(file = paste0("wsbim2122_data/count_data/", samples[1])) %>%
dplyr::select(Geneid)
rownames(counts) <- Geneids$Geneid
library(DESeq2)
library(tidyverse)
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
rld <- rlogTransformation(dds)
# effect of the rlog transformation
# picked a very highly expressed gene
assay(dds["ENSG00000198804"])
log2(assay(dds["ENSG00000198804"]))
assay(rld["ENSG00000198804"])
# picked a lowly expressed gene
assay(dds["ENSG00000248671"])
log2(assay(dds["ENSG00000248671"]))
assay(rld["ENSG00000248671"])
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds <- DESeq(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
best_genes
counts(dds[best_genes], normalize = TRUE)
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
# Independant filtering threshold used
metadata(res)$filterThreshold
quantile(res$baseMean, 0.7169)
head(metadata(res)$filterNumRej)
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 5
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# => Number of genes kept
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Compare with previous anaylsis with the independant filtering thr fixed by deseq2
as_tibble(res, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj),
color = padj < 0.05 & abs(log2FoldChange) > 1)) +
scale_colour_manual(values = c("gray", "red")) +
geom_point(size = 0.5) +
geom_hline(yintercept = -log10(0.05)) +
geom_vline(xintercept = 1) +
geom_vline(xintercept = -1)
# plot counts of 6 best genes
best_genes <- res_tbl %>%
arrange(padj) %>%
head(6)
as_tibble(counts(dds[best_genes$ENSEMBL, ], normalize = T),
rownames = 'ENSEMBL') %>%
gather(sample, counts, -ENSEMBL) %>%
left_join(as_tibble(coldata, rownames = "sample")) %>%
ggplot(aes(x = sample, y = counts, fill = Condition)) +
geom_bar(stat = 'identity', color = "gray30") +
facet_wrap( ~ ENSEMBL, scales = "free", ncol = 3) +
theme(axis.text.x = element_text(size = 7, angle = 90),
axis.title.x = element_blank(),
legend.position = "right",
legend.text = element_text(size = 7),
legend.title = element_text(size = 7))
#Identify and inspect counts of the genes plotted in red in the volcano-plot.
#These genes have a very large log2FC (|log2FC| > 5) but are far from bearing
#the lowest padjusted value (their padjusted value is between 0.05 and 1e-5).
selected_genes <- res_tbl %>%
filter(padj < 0.05 & padj > 1e-5 & abs(log2FoldChange) > 5)
as_tibble(counts(dds[selected_genes$ENSEMBL, ], normalize = T),
rownames = 'ENSEMBL') %>%
gather(sample, counts, -ENSEMBL) %>%
left_join(as_tibble(coldata, rownames = "sample")) %>%
ggplot(aes(x = sample, y = counts, fill = Condition)) +
geom_bar(stat = 'identity', color = "gray30") +
facet_wrap( ~ ENSEMBL, scales = "free", ncol = 3) +
theme(axis.text.x = element_text(size = 7, angle = 90),
axis.title.x = element_blank(),
legend.position = "right",
legend.text = element_text(size = 7),
legend.title = element_text(size = 7))
# Using dispersion() function, compare dispersion values for both group
#of genes
dispersions(dds[best_genes$ENSEMBL,])
dispersions(dds[selected_genes$ENSEMBL,])
# Add genes names
library("biomaRt")
library("org.Hs.eg.db")
# Load homo sapiens ensembl dataset
mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#Attributes define the values we are interested to retrieve.
listAttributes(mart)
ensembl_to_geneName <- getBM(
attributes = c("ensembl_gene_id", "external_gene_name", "entrezgene_id"),
mart = mart)
ensembl_to_geneName
names(ensembl_to_geneName) <- c("ENSEMBL", "gene", "ENTREZID")
res_tbl <- res_tbl %>%
left_join(ensembl_to_geneName) %>%
arrange(padj)
saveRDS(dds, file = "./data/dds.rds")
saveRDS(res_tbl, file = "./data/res_tbl.rds")
saveRDS(ensembl_to_geneName, file = "./data/ensembl_to_geneName.rds")
readRDS(file = "./data/en")
\ No newline at end of file
File added
library(DESeq2)
library(tidyverse)
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
coldata
head(counts)
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
head(assay(dds))
colData(dds)
rowData(dds)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
dim(rld)
rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# PCA with prcomp()
pca <- prcomp(t(assay(rld)))
summary(pca)
var <- pca$sdev^2
pve <- var/sum(var) # % var explained
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# PCA with prcomp() taking 500 genes with higher variance
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(rld)[select, ]))
pve <- pca$sdev^2/sum(pca$sdev^2)
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds$Condition
dds <- DESeq(dds)
resultsNames(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
rowMeans(counts(dds[best_genes], normalize = TRUE))
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
head(metadata(res)$filterNumRej)
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 5
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res, alpha = 0.05)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
geom_point(size = 0.5)
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
#!genome-build GRCh38.p12
#!genome-version GRCh38
#!genome-date 2013-12
#!genome-build-accession NCBI:GCA_000001405.27
#!genebuild-last-updated 2018-07
1 havana gene 11869 14409 . + . gene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";
1 havana transcript 11869 14409 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "1";
1 havana exon 11869 12227 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002234944"; exon_version "1"; tag "basic"; transcript_support_level "1";
1 havana exon 12613 12721 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003582793"; exon_version "1"; tag "basic"; transcript_support_level "1";
1 havana exon 13221 14409 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002312635"; exon_version "1"; tag "basic"; transcript_support_level "1";
1 havana transcript 12010 13670 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
1 havana exon 12010 12057 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 12179 12227 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1 havana exon 12613 12697 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1 havana exon 12975 13052 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001799933"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1 havana exon 13221 13374 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001746346"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1 havana exon 13453 13670 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "6"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001863096"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana gene 14404 29570 . - . gene_id "ENSG00000227232"; gene_version "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
1 havana transcript 14404 29570 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
1 havana exon 29534 29570 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "1"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001890219"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 24738 24891 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "2"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003507205"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 18268 18366 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "3"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003477500"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 17915 18061 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "4"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003565697"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 17606 17742 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003475637"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 17233 17368 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "6"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003502542"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 16858 17055 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "7"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003553898"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 16607 16765 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "8"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003621279"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 15796 15947 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "9"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00002030414"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 15005 15038 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "10"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001935574"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana exon 14404 14501 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "11"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001843071"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 mirbase gene 17369 17436 . - . gene_id "ENSG00000278267"; gene_version "1"; gene_name "MIR6859-1"; gene_source "mirbase"; gene_biotype "miRNA";
1 mirbase transcript 17369 17436 . - . gene_id "ENSG00000278267"; gene_version "1"; transcript_id "ENST00000619216"; transcript_version "1"; gene_name "MIR6859-1"; gene_source "mirbase"; gene_biotype "miRNA"; transcript_name "MIR6859-1-201"; transcript_source "mirbase"; transcript_biotype "miRNA"; tag "basic"; transcript_support_level "NA";
1 mirbase exon 17369 17436 . - . gene_id "ENSG00000278267"; gene_version "1"; transcript_id "ENST00000619216"; transcript_version "1"; exon_number "1"; gene_name "MIR6859-1"; gene_source "mirbase"; gene_biotype "miRNA"; transcript_name "MIR6859-1-201"; transcript_source "mirbase"; transcript_biotype "miRNA"; exon_id "ENSE00003746039"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana gene 29554 31109 . + . gene_id "ENSG00000243485"; gene_version "5"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA";
1 havana transcript 29554 31097 . + . gene_id "ENSG00000243485"; gene_version "5"; transcript_id "ENST00000473358"; transcript_version "1"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "MIR1302-2HG-202"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "5";
1 havana exon 29554 30039 . + . gene_id "ENSG00000243485"; gene_version "5"; transcript_id "ENST00000473358"; transcript_version "1"; exon_number "1"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "MIR1302-2HG-202"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001947070"; exon_version "1"; tag "basic"; transcript_support_level "5";
1 havana exon 30564 30667 . + . gene_id "ENSG00000243485"; gene_version "5"; transcript_id "ENST00000473358"; transcript_version "1"; exon_number "2"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "MIR1302-2HG-202"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001922571"; exon_version "1"; tag "basic"; transcript_support_level "5";
1 havana exon 30976 31097 . + . gene_id "ENSG00000243485"; gene_version "5"; transcript_id "ENST00000473358"; transcript_version "1"; exon_number "3"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "MIR1302-2HG-202"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001827679"; exon_version "1"; tag "basic"; transcript_support_level "5";
1 havana transcript 30267 31109 . + . gene_id "ENSG00000243485"; gene_version "5"; transcript_id "ENST00000469289"; transcript_version "1"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "MIR1302-2HG-201"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "5";
1 havana exon 30267 30667 . + . gene_id "ENSG00000243485"; gene_version "5"; transcript_id "ENST00000469289"; transcript_version "1"; exon_number "1"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "MIR1302-2HG-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001841699"; exon_version "1"; tag "basic"; transcript_support_level "5";
1 havana exon 30976 31109 . + . gene_id "ENSG00000243485"; gene_version "5"; transcript_id "ENST00000469289"; transcript_version "1"; exon_number "2"; gene_name "MIR1302-2HG"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "MIR1302-2HG-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001890064"; exon_version "1"; tag "basic"; transcript_support_level "5";
1 mirbase gene 30366 30503 . + . gene_id "ENSG00000284332"; gene_version "1"; gene_name "MIR1302-2"; gene_source "mirbase"; gene_biotype "miRNA";
1 mirbase transcript 30366 30503 . + . gene_id "ENSG00000284332"; gene_version "1"; transcript_id "ENST00000607096"; transcript_version "1"; gene_name "MIR1302-2"; gene_source "mirbase"; gene_biotype "miRNA"; transcript_name "MIR1302-2-201"; transcript_source "mirbase"; transcript_biotype "miRNA"; tag "basic"; transcript_support_level "NA";
1 mirbase exon 30366 30503 . + . gene_id "ENSG00000284332"; gene_version "1"; transcript_id "ENST00000607096"; transcript_version "1"; exon_number "1"; gene_name "MIR1302-2"; gene_source "mirbase"; gene_biotype "miRNA"; transcript_name "MIR1302-2-201"; transcript_source "mirbase"; transcript_biotype "miRNA"; exon_id "ENSE00003695741"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana gene 34554 36081 . - . gene_id "ENSG00000237613"; gene_version "2"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA";
1 havana transcript 34554 36081 . - . gene_id "ENSG00000237613"; gene_version "2"; transcript_id "ENST00000417324"; transcript_version "1"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "FAM138A-201"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "1";
1 havana exon 35721 36081 . - . gene_id "ENSG00000237613"; gene_version "2"; transcript_id "ENST00000417324"; transcript_version "1"; exon_number "1"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "FAM138A-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001656588"; exon_version "1"; tag "basic"; transcript_support_level "1";
1 havana exon 35277 35481 . - . gene_id "ENSG00000237613"; gene_version "2"; transcript_id "ENST00000417324"; transcript_version "1"; exon_number "2"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "FAM138A-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001669267"; exon_version "1"; tag "basic"; transcript_support_level "1";
1 havana exon 34554 35174 . - . gene_id "ENSG00000237613"; gene_version "2"; transcript_id "ENST00000417324"; transcript_version "1"; exon_number "3"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "FAM138A-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001727627"; exon_version "1"; tag "basic"; transcript_support_level "1";
1 havana transcript 35245 36073 . - . gene_id "ENSG00000237613"; gene_version "2"; transcript_id "ENST00000461467"; transcript_version "1"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "FAM138A-202"; transcript_source "havana"; transcript_biotype "lincRNA"; transcript_support_level "3";
1 havana exon 35721 36073 . - . gene_id "ENSG00000237613"; gene_version "2"; transcript_id "ENST00000461467"; transcript_version "1"; exon_number "1"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "FAM138A-202"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001618781"; exon_version "2"; transcript_support_level "3";
1 havana exon 35245 35481 . - . gene_id "ENSG00000237613"; gene_version "2"; transcript_id "ENST00000461467"; transcript_version "1"; exon_number "2"; gene_name "FAM138A"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "FAM138A-202"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001874421"; exon_version "1"; transcript_support_level "3";
1 havana gene 52473 53312 . + . gene_id "ENSG00000268020"; gene_version "3"; gene_name "OR4G4P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
1 havana transcript 52473 53312 . + . gene_id "ENSG00000268020"; gene_version "3"; transcript_id "ENST00000606857"; transcript_version "1"; gene_name "OR4G4P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "OR4G4P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
1 havana exon 52473 53312 . + . gene_id "ENSG00000268020"; gene_version "3"; transcript_id "ENST00000606857"; transcript_version "1"; exon_number "1"; gene_name "OR4G4P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "OR4G4P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003698237"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1 havana gene 57598 64116 . + . gene_id "ENSG00000240361"; gene_version "2"; gene_name "OR4G11P"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";
1 havana transcript 57598 64116 . + . gene_id "ENSG00000240361"; gene_version "2"; transcript_id "ENST00000642116"; transcript_version "1"; gene_name "OR4G11P"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "OR4G11P-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic";
1 havana exon 57598 57653 . + . gene_id "ENSG00000240361"; gene_version "2"; transcript_id "ENST00000642116"; transcript_version "1"; exon_number "1"; gene_name "OR4G11P"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "OR4G11P-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003812686"; exon_version "1"; tag "basic";
1 havana exon 58700 58856 . + . gene_id "ENSG00000240361"; gene_version "2"; transcript_id "ENST00000642116"; transcript_version "1"; exon_number "2"; gene_name "OR4G11P"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "OR4G11P-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003812505"; exon_version "1"; tag "basic";
1 havana exon 62916 64116 . + . gene_id "ENSG00000240361"; gene_version "2"; transcript_id "ENST00000642116"; transcript_version "1"; exon_number "3"; gene_name "OR4G11P"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "OR4G11P-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003811818"; exon_version "1"; tag "basic";
1 havana transcript 62949 63887 . + . gene_id "ENSG00000240361"; gene_version "2"; transcript_id "ENST00000492842"; transcript_version "2"; gene_name "OR4G11P"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "OR4G11P-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA (assigned to previous version 1)";
1 havana exon 62949 63887 . + . gene_id "ENSG00000240361"; gene_version "2"; transcript_id "ENST00000492842"; transcript_version "2"; exon_number "1"; gene_name "OR4G11P"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "OR4G11P-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001830178"; exon_version "2"; tag "basic"; transcript_support_level "NA (assigned to previous version 1)";
1 ensembl_havana gene 65419 71585 . + . gene_id "ENSG00000186092"; gene_version "6"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
1 havana transcript 65419 71585 . + . gene_id "ENSG00000186092"; gene_version "6"; transcript_id "ENST00000641515"; transcript_version "2"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "OR4F5-202"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";