Commit ade3c494 authored by Axelle Loriot's avatar Axelle Loriot
Browse files

Auto-saving for axelle.loriot on branch master from commit 38f4ae17

parent 38f4ae17
Pipeline #80010 passed with stage
in 21 seconds
library(DESeq2)
library(tidyverse)
rWSBIM2122::prepare_shell()
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
coldata
head(counts)
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
head(assay(dds))
colData(dds)
rowData(dds)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
rld <- rlogTransformation(dds)
# effect of the rlog transformation
# picked a very highly expressed gene
assay(dds["ENSG00000198804"])
log2(assay(dds["ENSG00000198804"]))
assay(rld["ENSG00000198804"])
# picked a lowly expressed gene
assay(dds["ENSG00000248671"])
log2(assay(dds["ENSG00000248671"]))
assay(rld["ENSG00000248671"])
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
enframe(sizeFactors(dds), name = "sample", value = "Size_Factor")
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds$Condition
dds <- DESeq(dds)
resultsNames(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
best_genes
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
metadata(res)
metadata(res)$filterThreshold
quantile(res$baseMean, 0.7169)
metadata(res)$filterNumRej
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 10
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# => Number of genes kept
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Compare with previous anaylsis with the independant filtering thr fixed by deseq2
as_tibble(res, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj),
color = padj < 0.05 & abs(log2FoldChange) > 1)) +
scale_colour_manual(values = c("gray", "red")) +
geom_point(size = 0.5) +
geom_hline(yintercept = -log10(0.05)) +
geom_vline(xintercept = 1) +
geom_vline(xintercept = -1)
# plot counts of 6 best genes
best_genes <- res_tbl %>%
arrange(padj) %>%
head(6)
as_tibble(counts(dds[best_genes$ENSEMBL, ], normalize = T),
rownames = 'ENSEMBL') %>%
gather(sample, counts, -ENSEMBL) %>%
left_join(as_tibble(coldata, rownames = "sample")) %>%
ggplot(aes(x = sample, y = counts, fill = Condition)) +
geom_bar(stat = 'identity', color = "gray30") +
facet_wrap( ~ ENSEMBL, scales = "free", ncol = 3) +
theme(axis.text.x = element_text(size = 7, angle = 90),
axis.title.x = element_blank(),
legend.position = "right",
legend.text = element_text(size = 7),
legend.title = element_text(size = 7))
#Identify and inspect counts of the genes plotted in red in the volcano-plot.
#These genes have a very large log2FC (|log2FC| > 5) but are far from bearing
#the lowest padjusted value (their padjusted value is between 0.05 and 1e-5).
selected_genes <- res_tbl %>%
filter(padj < 0.05 & padj > 1e-5 & abs(log2FoldChange) > 5)
as_tibble(counts(dds[selected_genes$ENSEMBL, ], normalize = T),
rownames = 'ENSEMBL') %>%
gather(sample, counts, -ENSEMBL) %>%
left_join(as_tibble(coldata, rownames = "sample")) %>%
ggplot(aes(x = sample, y = counts, fill = Condition)) +
geom_bar(stat = 'identity', color = "gray30") +
facet_wrap( ~ ENSEMBL, scales = "free", ncol = 3) +
theme(axis.text.x = element_text(size = 7, angle = 90),
axis.title.x = element_blank(),
legend.position = "right",
legend.text = element_text(size = 7),
legend.title = element_text(size = 7))
# Using dispersion() function, compare dispersion values for both group
#of genes
dispersions(dds[best_genes$ENSEMBL,])
dispersions(dds[selected_genes$ENSEMBL,])
# Add genes names
library("biomaRt")
library("org.Hs.eg.db")
# Load homo sapiens ensembl dataset
mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#Attributes define the values we are interested to retrieve.
#listAttributes(mart)
ensembl_to_geneName <- getBM(attributes = c("ensembl_gene_id", "external_gene_name"), mart = mart)
ensembl_to_geneName
names(ensembl_to_geneName) <- c("ENSEMBL", "gene")
res_tbl <- res_tbl %>%
left_join(ensembl_to_geneName) %>%
dplyr::select(ENSEMBL, gene, everything()) %>%
arrange(padj)
library(DESeq2)
library(tidyverse)
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
coldata
head(counts)
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
head(assay(dds))
colData(dds)
rowData(dds)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
dim(rld)
rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# PCA with prcomp()
pca <- prcomp(t(assay(rld)))
summary(pca)
var <- pca$sdev^2
pve <- var/sum(var) # % var explained
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# PCA with prcomp() taking 500 genes with higher variance
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(rld)[select, ]))
pve <- pca$sdev^2/sum(pca$sdev^2)
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds$Condition
dds <- DESeq(dds)
resultsNames(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
rowMeans(counts(dds[best_genes], normalize = TRUE))
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
head(metadata(res)$filterNumRej)
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 5
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res, alpha = 0.05)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
geom_point(size = 0.5)
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
This diff is collapsed.
@DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
NTGANGCTTAGGTAGGGGTAGGGAGACAGGGAAGTAACAGAGGTACTGGACATGCCAGGCAGAAGAAACAGCAGTACAGGACATGGAAGCAAACAACATGG
+DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
#0;9#4=9@<><>?????@=@???@???????<?>?=?=?<?????????????????????<@<???=?<=?<::<==<;<<==<;<<<<==<<<<<<=<
@DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
CCTGNACCCAGTAGAGAAAGCCCTTCGAGATGCCAAACTAGACAAGTCACAGATTCATGATATTGTCCTGGTTGGTGGTTCTACTCGTATCCCCAAGATTC
+DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
@@@D#2ADHHHAAF<CFHIGHIIGGIHIECGFAHDHIIIGGGIBGG?FHHHGGEHIH=FEGDDGI@GIIGIHHEHEDDBACCDCECB@BCBCCBCBBBCC@
DRR016707.26395247 147 1 19626495 60 101M = 19626490 -106 GTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGG DDDDDDDDDDDDDEEEEEEEFFFFFFHCHHHJJJHHJJJJJJJJJJJJIHFJJIHJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJHHHHHFFFFFCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.26395247 99 1 19626490 60 101M = 19626495 106 CTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTG CCCFFFFFHHHHHJIJJJJIJJJJJJJJJJIJJJJJJJJJJJJJJJHHIHIJJJIJJHIJJJIIIIIIJGIJJJJDHJJJHHHHHHHFFFFFFEDACEEDD AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 163 1 19626487 60 101M = 19626493 107 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAG CC@FFFFFHFHFFHGGJCIFJGHIIIIGIIIIJIIJJJIGIIIIIIGII?:BFHGHIIIIGEHHHGIIDGGH@GGIGHJGHHHHCEHDEFDDFFEEECCEC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 83 1 19626493 60 101M = 19626487 -107 CTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAA CDDDDDDDDDDDDECEEEEEFFFFD?=HHHHGIIGEGIGIIIHIGDIHHGGGIIHGHIIIIGIIIGIGEIIGGGIIIIGIIIIGHDIIFHHHHFFBDD?C@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.42561021 161 1 19626495 60 100M1S = 19626494 0 GTAACAATGTTATCAGTAATGCTTTAAACTAAAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGA ?8?B1:BDD,,2<:C<2A<<,+A?CBFFAE<+<+2A?EEBD99:D<*?B:*:D9B:DD/BDED@)=8=B;@=)).=(=@;7A)7.==>;3);@AAA>A@3; AS:i:-8 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:30C0C68 YT:Z:UP NH:i:1
DRR016707.42561021 81 1 19626494 60 101M = 19626495 0 TGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGACACCAAGTCTGTTTCTTGTTTTGTATTTTCGCTCTGGAAGTTGTAAG (9(??==3??>==;5(55((6.((9>;..;67>)>).5(A;;>=/))8.=>9)((?AB?92=00*;=::1)=:3=74=A7<)0<+<AAAB?=<+4AA<;== AS:i:-5 ZS:i:-19 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:54A29T16 YT:Z:UP NH:i:1
DRR016707.43465443 163 1 19626483 60 101M = 19626486 104 TGCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG B@@FAFFFHHHHHHIIIIJJJHJJIJJIIJJJJJJJFJJJJJJJJJJJJJHIJFHJJIJJJIJJIIJIJJHIHHIIIJJJJJHEHHHFEFFFFFEDEEEEE AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:-1 YT:Z:CP NH:i:1
DRR016707.43465443 83 1 19626486 60 100M1S = 19626483 -104 CATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAG DDDDDDCDDDDCCDEEEEDFFFFFDBHHHHHGJJGHDJIJJIGIHJGGJJJJJJJJJJJJJHG?JJJJJJJJJJJIIJJJJIJJJJJIGHGHHFFFFFCCC AS:i:-1 ZS:i:-12 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 147 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG DDDDDCDDDDDDEEDEEEEFFFFFFHHHHHHJJIJIGHFIHJJJIHFIHHFJJJJJIJJJJJJJIHJJJJJIJJJJJJJJJJJJJJJJHHHHHFFFFFBB@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 99 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG CCCFFFFFHHHHHJJJJJJJIJJJJJJJJJJJJJJJJJJJJJIJJJJJJIJJDGIJJJJJJJJCGIJJJIIHHJJJHHIJJJHHHHHDFFFFFFEEEEEED AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.68628658 163 1 19626487 60 97M4S = 19626487 -105 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGAGAT CCCFFFFFGHHHHJJJJJJIJJJJJJIJJJJJJJJJIJGJIIEIIJJJIE:DGIJJJJJJJJJJJIJIHIJGGHJJJJJHJHHHHHHFFFFFFFEEEDEDC AS:i:-4 ZS:i:-17 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
DRR016707.68628658 83 1 19626487 60 4S97M = 19626487 105 ATCTATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG EDDDDDDDDCDEEEEEEEFFFEEFHCHHHHHIGJJJJIHCIJJJJJIHIJJJIFIJJIJJJJJIIHFJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFCCC AS:i:-4 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
> SEQUENCE 1 (Fri Sep 18 13:05:43 2020)
GCGCCACCTTCACCTGTTGTCGAGTCCGCCGACACCAAGAACCTACCGCA
CCCTATTCCTTGGGTTCATTATAGTGTCAAGCGACACGACAAACGTAGCG
GAGATGGTGTCTCGAGGAATGATCTTCAGATATGATCTCAGAGAGGCGGT
CCGTCTTGGAAACTGGCACGGGTAACGCTGCTTCTTCCCGGCAACGAGCA
TCAAAATCACTCGGGGGATCATCCTGGGCCGTCCTCTTTAATTTAGTGGC
CATGGCGCAATATCACAGGGTACAGTCGCAACGATTGCTAAAGGAGTGAG
GAGATTTTCGAGTCGGCATAGATGCTAATACCATGCCACGGAGCGTAATC
TTGCCGCATCCCTGGTTACTTCCATTTCTCCAGCCATGCGACGGAAATTT
GCGCTGACCCTGAGGCATTATCAACTTCCGGTAGAACAGCTATAGCATCG
CCTCGTGCGCCTTGCCTTTTTAGCTATGAAACCTTCGTCATCACAGCGAG
CTGCCATGCTGGCGCCACTGTACAATCCTTGTCGCGCAGCCTACGTGCAC
AAAGCCGCGCGTAACCACTCACTTGCTGTCTGTTCTGTTCAACTGCAAGG
AAACTCGGAACGTGGACAACCCGGAGGTATGGCAAGCTGCCTTAATACCT
GAAAGAAGCTTTTGACACAAAAAGACTGGGCATATTGTCTGGGGTATATC
GCGCCAGCAGCCCCACGTGAAACACGCAGCATAAGTGAGCTGTGACGGTA
GGAAAGACGGAACTCACAGCGTCTCCGATCCAAGGAAGAGAGTTAATATG
TACGAGTTGTCAGGAATTACGTGCACTCGCACTATTACGTAATAACCTAC
TTATCGTTGAGGTTATCTGTGTCGCTCGGAAGAGTCATCAATTGGTGGTT
GCGGACCCGGCTCCTCAGGATCTTACCACTTTGTATACAGTATCAAAACA
GGTGCACCGTAATTATTCGTGACGAGAGGAGGCTAAAACTCTGACTGCCA
AGACCCCTGGGGCCGTTCTAACGGCGCATGCGGCTACCGTTGCTAAGTTG
AACATCGGTCACAAGAGATCGTTAAAGTCAGAGGTACTCGACGGCTCATT
GGGCCCCGACTGTTACTACCAAGACATCGGCGGTTTTGCACTATATAGTC
ACCGAGATTCTAGAGCCAGCGATGGGTGCCCCCAACCTGGGCCGGATTTA
AGAATTGATCGACAAATTATTCATCACATCGCTGTCATATTCGTGGACAT
AGTGTGGTATACCTTGCCTACATGCACTCGTTCCATAATAATTTCTTTCC
TGGCAATTCGGGCTTGTTTCACGATAGGTCGACCCATCTCAAGTTCTGAA
ATGATTGTTCTTGGGGACCGGTACCTTAGTGTATAGATACTTCTTCACCC
AATGCAGCACGGTACAATAGGTCCTAGGGAATGGGTGGTAAAAGGCTTCT
ATCAACACGCGCTTTCCTAGGGACCCATGTGCTCCGCATAGTGGAATGAA
ACTTGCTGATCTTTACACTATGTAAGCCTCTAACTGTGGATTGGAGTAAT
TTGGGGTGGTGCGCTAGTATACCCTCCGCACGTAACAACAACTACGGTGT
CTAGTACGGGCACCTCACCGAGAGCTGTTTCGGTTTGAGCCGGCTTCTTG
AATAGTAGCGAGATGGCGCTAAATTGATCCCGGTCTCCCAATCCGTAGTC
CAGCATGGCCGAGGTAAGAGGGAGAACCGGCTCTTTCGCAGTCAAGCGGT
GAAACTACAGATCCAGCTAAGTCATACCTGGCAATTCTAGGGTCCGTCGC
ACCCTCAATTAAGTACGAGGTTTGTGACCAGACTTCAGCATCACCATTTA
TCTGGGAGTTGGTACGTCAATCTGGTACCATCAGTCGCAATTTCTCAGAA
TAATACGCTGGCAAACCAGCTACTCCACTACACATGTCCCCCCCTCAAGG
GTCTCAAATTTGTAGGTTATTGTATTTGCGCGTGTGGGCACTACAGTGAA
CTTGTCGCTTGTGCGTCTAGGTGTTTCTGATTGAACTAATTACTGCACCA
AGTTCGCTGATAGTCAGCGACAGGCTCACGTAGCATGCCAAGACCGACAA
AAACGCCTTCTCTTAGGTTACATATAATCGGCGTGCCCGAAGAAGGCTGA
CACCTTCTCCGCCGCCTAAGAATGGGCCATTATAGAAAGCGCGATATCTA
CAGCCGTCCGTTCTCATCGGATGGCTGCAGAACATTCGGGTGAGTCTGGT
ATCTCCAAAAGTTTTCTCCTGTTCGATCTCCTGTAACTCAGGCCCTCAAT
CCGTCTGTCACCCAGTTTGGACTCAAATGTGGGGCTAAACCTGTCGGTAC
CCATACACATCTCAATGCCCCGAAGGTTACACTGACACGCCCTAAAGGCA
AATGTACAATTTCGAGTTACAAGCGGCCCCTTGCTTTGACAGATTCCGGT
TTGACAGAGTAAATTTGTGCGGTCGGTTTTATCGCTTGTAGTCGATTAGC
> SEQUENCE 10 (Fri Sep 18 13:05:43 2020)
ATACACTTTTTGGAGTTCGCCTCATAGGTGAGCAGACTCGTCAAACAAGT
CCTAGATGAGGCCGAATCTGACTGCAGCCATAGTTACCTGTACATCGCTT
ATGCTTCGAAGGCCCAATGCTTCACGAACACAGTATGGCAGCAGTTTTGT
TCCTGACGCGAAGGTCAGGCCGTAGTACCGGTTCGTTGTAGTGTCCTGTG
TGCTCCTGTCTCGACCCTTATCGCAAGAACACCTCGTCGGTGCGTCGATG
GGCGCAGAACTATCTTTCTTTGTAGTTTCTGACGAACCCAGAGAATTAAA
CTGGGTGAGAAGATAGCACTAGTTTTTTGCCTAAGAAAGATAACCACAGC
CTCACGTACTTAGTTCGCATTTAACAATAAATTGGGGATTTGAGATCGTA
ATTTCGCCCCATAACCTCCGCCAGTCGTAAAGGTAAAAGTGCACCTGCTG
TCATGAATGCAGTCTGCTTCGGATGCCCGGCAACTGTAGGGGCATTAAGC
CTTTAGTGGATACACGCTACCCCATTATAGGCTGGTCAAAGCTGTTCCGA
GCCCATCCAAGCTGTCTGAACTACCCTCTCCAGATGTTCATGATAGTTGC
TCAGATAAGATCGACACTCTCCCCGAGTGTGCTAGTTAGCGGATTCTGCG
TCCAAGGCCTCATGTTTTATAAATTGTGGGAAAGAAAGGACCTTAAGCCG
GGGGACAGTTTTAGGCGAACTCTAATCAACTTCACGTGCCTCCCTCACCC
GACGCTGTTCGGAATTCTGCCCGAGGTTTTGGGCCTTACATACACAAAGG
AGACACCACTACGCGCCTCTCCCAATGACGGGTGGGAGCCCATGGGTGAA