Commit a383e533 authored by Axelle Loriot's avatar Axelle Loriot
Browse files

Auto-saving for axelle.loriot on branch master from commit 38f4ae17

parent 38f4ae17
Pipeline #87942 passed with stage
in 25 seconds
library(tidyverse)
rWSBIM2122::prepare_shell()
samples <- list.files("wsbim2122_data/count_data/", pattern = "*tsv.gz")
counts <- read_tsv(file = paste0("wsbim2122_data/count_data/", samples[1])) %>%
select(ends_with('.bam'))
for (sample in samples[-1]){
tmp <- read_tsv(file = paste0("wsbim2122_data/count_data/", sample)) %>%
select(ends_with('.bam'))
counts <- cbind(counts, tmp)
}
names(counts) <- sub(pattern = ".bam$", '', names(counts))
names(counts) <- sub(pattern = '../processed_data/bam/', '', names(counts))
Geneids <- read_tsv(file = paste0("wsbim2122_data/count_data/", samples[1])) %>%
dplyr::select(Geneid)
rownames(counts) <- Geneids$Geneid
library(DESeq2)
library(tidyverse)
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
rld <- rlogTransformation(dds)
# effect of the rlog transformation
# picked a very highly expressed gene
assay(dds["ENSG00000198804"])
log2(assay(dds["ENSG00000198804"]))
assay(rld["ENSG00000198804"])
# picked a lowly expressed gene
assay(dds["ENSG00000248671"])
log2(assay(dds["ENSG00000248671"]))
assay(rld["ENSG00000248671"])
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds <- DESeq(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
best_genes
counts(dds[best_genes], normalize = TRUE)
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
# Independant filtering threshold used
metadata(res)$filterThreshold
quantile(res$baseMean, 0.7169)
head(metadata(res)$filterNumRej)
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 5
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# => Number of genes kept
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Compare with previous anaylsis with the independant filtering thr fixed by deseq2
as_tibble(res, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj),
color = padj < 0.05 & abs(log2FoldChange) > 1)) +
scale_colour_manual(values = c("gray", "red")) +
geom_point(size = 0.5) +
geom_hline(yintercept = -log10(0.05)) +
geom_vline(xintercept = 1) +
geom_vline(xintercept = -1)
# plot counts of 6 best genes
best_genes <- res_tbl %>%
arrange(padj) %>%
head(6)
as_tibble(counts(dds[best_genes$ENSEMBL, ], normalize = T),
rownames = 'ENSEMBL') %>%
gather(sample, counts, -ENSEMBL) %>%
left_join(as_tibble(coldata, rownames = "sample")) %>%
ggplot(aes(x = sample, y = counts, fill = Condition)) +
geom_bar(stat = 'identity', color = "gray30") +
facet_wrap( ~ ENSEMBL, scales = "free", ncol = 3) +
theme(axis.text.x = element_text(size = 7, angle = 90),
axis.title.x = element_blank(),
legend.position = "right",
legend.text = element_text(size = 7),
legend.title = element_text(size = 7))
#Identify and inspect counts of the genes plotted in red in the volcano-plot.
#These genes have a very large log2FC (|log2FC| > 5) but are far from bearing
#the lowest padjusted value (their padjusted value is between 0.05 and 1e-5).
selected_genes <- res_tbl %>%
filter(padj < 0.05 & padj > 1e-5 & abs(log2FoldChange) > 5)
as_tibble(counts(dds[selected_genes$ENSEMBL, ], normalize = T),
rownames = 'ENSEMBL') %>%
gather(sample, counts, -ENSEMBL) %>%
left_join(as_tibble(coldata, rownames = "sample")) %>%
ggplot(aes(x = sample, y = counts, fill = Condition)) +
geom_bar(stat = 'identity', color = "gray30") +
facet_wrap( ~ ENSEMBL, scales = "free", ncol = 3) +
theme(axis.text.x = element_text(size = 7, angle = 90),
axis.title.x = element_blank(),
legend.position = "right",
legend.text = element_text(size = 7),
legend.title = element_text(size = 7))
# Using dispersion() function, compare dispersion values for both group
#of genes
dispersions(dds[best_genes$ENSEMBL,])
dispersions(dds[selected_genes$ENSEMBL,])
# Add genes names
library("biomaRt")
library("org.Hs.eg.db")
# Load homo sapiens ensembl dataset
mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#Attributes define the values we are interested to retrieve.
listAttributes(mart)
ensembl_to_geneName <- getBM(
attributes = c("ensembl_gene_id", "external_gene_name", "entrezgene_id"),
mart = mart)
ensembl_to_geneName
names(ensembl_to_geneName) <- c("ENSEMBL", "gene", "ENTREZID")
res_tbl <- res_tbl %>%
left_join(ensembl_to_geneName) %>%
arrange(padj)
saveRDS(dds, file = "./data/dds.rds")
saveRDS(res_tbl, file = "./data/res_tbl.rds")
saveRDS(ensembl_to_geneName, file = "./data/ensembl_to_geneName.rds")
readRDS(file = "./data/en")
\ No newline at end of file
File added
library(DESeq2)
library(tidyverse)
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
coldata
head(counts)
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
head(assay(dds))
colData(dds)
rowData(dds)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
dim(rld)
rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# PCA with prcomp()
pca <- prcomp(t(assay(rld)))
summary(pca)
var <- pca$sdev^2
pve <- var/sum(var) # % var explained
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# PCA with prcomp() taking 500 genes with higher variance
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(rld)[select, ]))
pve <- pca$sdev^2/sum(pca$sdev^2)
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds$Condition
dds <- DESeq(dds)
resultsNames(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
rowMeans(counts(dds[best_genes], normalize = TRUE))
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
head(metadata(res)$filterNumRej)
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 5
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res, alpha = 0.05)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
geom_point(size = 0.5)
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
This diff is collapsed.
@DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
NTGANGCTTAGGTAGGGGTAGGGAGACAGGGAAGTAACAGAGGTACTGGACATGCCAGGCAGAAGAAACAGCAGTACAGGACATGGAAGCAAACAACATGG
+DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
#0;9#4=9@<><>?????@=@???@???????<?>?=?=?<?????????????????????<@<???=?<=?<::<==<;<<==<;<<<<==<<<<<<=<
@DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
CCTGNACCCAGTAGAGAAAGCCCTTCGAGATGCCAAACTAGACAAGTCACAGATTCATGATATTGTCCTGGTTGGTGGTTCTACTCGTATCCCCAAGATTC
+DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
@@@D#2ADHHHAAF<CFHIGHIIGGIHIECGFAHDHIIIGGGIBGG?FHHHGGEHIH=FEGDDGI@GIIGIHHEHEDDBACCDCECB@BCBCCBCBBBCC@
DRR016707.26395247 147 1 19626495 60 101M = 19626490 -106 GTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGG DDDDDDDDDDDDDEEEEEEEFFFFFFHCHHHJJJHHJJJJJJJJJJJJIHFJJIHJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJHHHHHFFFFFCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.26395247 99 1 19626490 60 101M = 19626495 106 CTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTG CCCFFFFFHHHHHJIJJJJIJJJJJJJJJJIJJJJJJJJJJJJJJJHHIHIJJJIJJHIJJJIIIIIIJGIJJJJDHJJJHHHHHHHFFFFFFEDACEEDD AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 163 1 19626487 60 101M = 19626493 107 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAG CC@FFFFFHFHFFHGGJCIFJGHIIIIGIIIIJIIJJJIGIIIIIIGII?:BFHGHIIIIGEHHHGIIDGGH@GGIGHJGHHHHCEHDEFDDFFEEECCEC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 83 1 19626493 60 101M = 19626487 -107 CTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAA CDDDDDDDDDDDDECEEEEEFFFFD?=HHHHGIIGEGIGIIIHIGDIHHGGGIIHGHIIIIGIIIGIGEIIGGGIIIIGIIIIGHDIIFHHHHFFBDD?C@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.42561021 161 1 19626495 60 100M1S = 19626494 0 GTAACAATGTTATCAGTAATGCTTTAAACTAAAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGA ?8?B1:BDD,,2<:C<2A<<,+A?CBFFAE<+<+2A?EEBD99:D<*?B:*:D9B:DD/BDED@)=8=B;@=)).=(=@;7A)7.==>;3);@AAA>A@3; AS:i:-8 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:30C0C68 YT:Z:UP NH:i:1
DRR016707.42561021 81 1 19626494 60 101M = 19626495 0 TGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGACACCAAGTCTGTTTCTTGTTTTGTATTTTCGCTCTGGAAGTTGTAAG (9(??==3??>==;5(55((6.((9>;..;67>)>).5(A;;>=/))8.=>9)((?AB?92=00*;=::1)=:3=74=A7<)0<+<AAAB?=<+4AA<;== AS:i:-5 ZS:i:-19 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:54A29T16 YT:Z:UP NH:i:1
DRR016707.43465443 163 1 19626483 60 101M = 19626486 104 TGCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG B@@FAFFFHHHHHHIIIIJJJHJJIJJIIJJJJJJJFJJJJJJJJJJJJJHIJFHJJIJJJIJJIIJIJJHIHHIIIJJJJJHEHHHFEFFFFFEDEEEEE AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:-1 YT:Z:CP NH:i:1
DRR016707.43465443 83 1 19626486 60 100M1S = 19626483 -104 CATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAG DDDDDDCDDDDCCDEEEEDFFFFFDBHHHHHGJJGHDJIJJIGIHJGGJJJJJJJJJJJJJHG?JJJJJJJJJJJIIJJJJIJJJJJIGHGHHFFFFFCCC AS:i:-1 ZS:i:-12 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 147 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG DDDDDCDDDDDDEEDEEEEFFFFFFHHHHHHJJIJIGHFIHJJJIHFIHHFJJJJJIJJJJJJJIHJJJJJIJJJJJJJJJJJJJJJJHHHHHFFFFFBB@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 99 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG CCCFFFFFHHHHHJJJJJJJIJJJJJJJJJJJJJJJJJJJJJIJJJJJJIJJDGIJJJJJJJJCGIJJJIIHHJJJHHIJJJHHHHHDFFFFFFEEEEEED AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.68628658 163 1 19626487 60 97M4S = 19626487 -105 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGAGAT CCCFFFFFGHHHHJJJJJJIJJJJJJIJJJJJJJJJIJGJIIEIIJJJIE:DGIJJJJJJJJJJJIJIHIJGGHJJJJJHJHHHHHHFFFFFFFEEEDEDC AS:i:-4 ZS:i:-17 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
DRR016707.68628658 83 1 19626487 60 4S97M = 19626487 105 ATCTATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG EDDDDDDDDCDEEEEEEEFFFEEFHCHHHHHIGJJJJIHCIJJJJJIHIJJJIFIJJIJJJJJIIHFJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFCCC AS:i:-4 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
> SEQUENCE 1 (Fri Oct 9 07:31:51 2020)
TGCAAGGCGGGTACTCGGTCCATAACTGTGTAGCGCCGTATTTTGCTTCG
ACCCACGCCAAGTCTAAGATCGCCTTCGGGTTATTATGGTTTGGAGGGCC
GCGGTTACCTTGCAGTGAAGATTACTGAGAAGACTCGCTCTCTTTCAATT
TGTTCGGACCGGAGCCCGATCAGTGCTGGGCTCCCGCATCGTGGTTATCC
CTCGTCATGAGCCGCAGTATGGCTGATTGAAACAAACGTTGAAGGATAGC
ATGGAAAGCACCGAAAATGCTTCATGGCAACCACTTCGGCTAATTCTCGT
TGACACCACTCGGGACGCTTGTGAAAACCGGCAAACGCCCACGTACAAGG
TGTTATCCACCTAGCAACTCCTGAAGAGGAACGTGTACGATTCGCTGAAC
GCAGATCTGTGGGCTCTGAGCCATTTTGTAGTCAGTTCAACCATCATGCT
ATAATAGGGAGCAGCGGCTTAAGTTTCGCCTTGAGAATTTGGGCATCCCC
CCCGGGCACCCAATTCTAGGGGTTATCGTCTGTGAGTGGACCCGCGGCAG
ATAAAAGTAACATGGAGCGAAGACACCCCGTAGTCCTTCCTAAACAACAG
AATAAGACTTCGTACCCTAGGATGCAATCATGGGCTCAATGTTGGCACGC
CAAAATATTCACATATGACATATAAAGATTGGTTCCACGGCAATCCACCA
GAAAGAGAGCGACGGTGAAACGGCACGTCATAGGACTATTACCTGGTCCA
CTTGGTCTTAATCGTACGGCCAACCGGCGTAGCAGGGATGTAACGCGAAT
GACAAGTAGAAACGATGTACCGGTGCTAAGTAGTCGTACTCTAATTGAGA
AACACCACGCCTGTCAAGCTGACGCATGAAGGATTAGATAAGAATCCCTG
TAAGCCGCCACGGCAACACTTACACGGAGGTAAAAGTTATAGGCTTGAAC
GGAGTTAGGTCCCCACGAGGTCTATGGGATCGAGAGTGGAATCTGTCGCG
CTCCTTCTCCATGCCTCGCTGGCAGGCTCGTCTGGGACGGATAGCTGTTC
GTTCTTGATGTACGTCTTGGTAGGTAATTTTTGGCGAGGTGAATCAGTAA
TGTCCATGTACAGACGCTGCATGCAAGTTCCTGCTGCGCGTGCCCGAGTT
AAAAAGAAAGGCTCTAGACGATGTCGGTGAATGTATGATAGCAGGGTGAA
GGTTTTAACCATCTTTCCGTAATTTTACTTGATACTTCCCTATCCTAACA
TGCGCCTGTAACTTACAATGAACATACATATCAACCAGCAGGATAATGTT
GTCGAGCTTATTTACTCTGGTCTTCTAGTAAGACCTGATTAGGTGGCGCC
AGACTCCATCCGATCCCTCTCTGAAGACAGTGAAGTCCTAAGTCGAAGGG
GCGTCCGGTCGAGACTCTGCCTCGAGTATGGCGAAGTTGTGTATTAGGCC
TCCATGATCGGACACGCCGACGTTAAAAGCGACATTTCCATTCTACTACT
CCAGGACGATGGGTTGGTATGCACACTGGTCCACCCGGGGTGGTTACACT
CCGGTCCAAACAAGGTAATCGAGCAATACCTTCGAATACTAGCACATAGA
ACCTTAACTTAGGGCCCGGGTCATGAAGCCGTTTCTCCGCTCAACTCTGT
CTGTCTCTCCTTGCCCCATTCACGGTCACCAAATGAATGAGATGATGGCG
GGCACACCCTTGGCATTGCCAACACGTCCTGCTCAGTTTATGTATGCGAC
TAGGTATTCCACGAAAGACACCCGAATTATGAGGCGTCTCGAGGCTTAGA
CCGAAAACCAAGTATGAGGTTGTAGCGGTAGCCATGTCTGGTTATTTTTG
CGACTATCTCCTGCGGTTCCTTCCTATAGGAAGACATCTCTCACAATAAA
CCCAGACGCTAGTTATACCAGCAAAATCTACTTTATCTTCACTAGCTGCA
TAGAGATGCGATCGCCTAGGGCCAGGGTGGCAGTTGATATTTATGCAGGA
GTGCGAGGCAGTAGAGTAAGACTATACCAAACAAGCATTTTCACAAAAGG
TCAATGCGTCGATGCCATAGACAAAGCTACCCCGGGCGGTTGATATCTAT
ACACACGTCGTCAACCTCATCGTGAGGTAAAGACAGCATACGCAGATACT
TATAACTCGTCGATGGTTATGCTAGTCGAAGCCGTGTCCCCCTATGGAGG
TGAGAGGGCGGAACTCAGCATCCCCAGCTAGGAACCGCGGTCTAGAGCTC
GGCTTAACGCTAGGTTTAGGTCCGGTGGCGACTTAGCAGCGTTACGATGA
TGCGTGAGTTAGCGGTGAGCATACTGGCCATGAGAATTCACACGCAATAA