Commit af5e96bf authored by Axelle Loriot's avatar Axelle Loriot
Browse files

Auto-saving for axelle.loriot on branch master from commit 38f4ae17

parent 38f4ae17
Pipeline #79941 passed with stage
in 29 seconds
library(DESeq2)
library(tidyverse)
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
coldata
head(counts)
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
head(assay(dds))
colData(dds)
rowData(dds)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# PCA with prcomp()
pca <- prcomp(t(assay(rld)))
summary(pca)
var <- pca$sdev^2
pve <- var/sum(var) # % var explained
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# PCA with prcomp() taking 500 genes with higher variance
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(rld)[select, ]))
pve <- pca$sdev^2/sum(pca$sdev^2)
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds$Condition
dds <- DESeq(dds)
resultsNames(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
rowMeans(counts(dds[best_genes], normalize = TRUE))
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
head(metadata(res)$filterNumRej)
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 5
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res, alpha = 0.05)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
geom_point(size = 0.5)
library(DESeq2)
library(tidyverse)
load("wsbim2122_data/deseq2/counts.rda")
load("wsbim2122_data/deseq2/coldata.rda")
coldata
head(counts)
# Create dds object
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ Condition)
head(assay(dds))
colData(dds)
rowData(dds)
# Inspect counts distribution
as_tibble(assay(dds)) %>%
gather(sample, value = counts) %>%
ggplot(aes(x = log2(counts + 1), fill = sample)) +
geom_histogram(bins = 20) +
facet_wrap(~ sample)
# Run DESeq2
dds <- DESeq(dds)
#PCA with plotPCA
dim(rld)
rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "Condition")
# Values can be extracted to customise the plot
x <- plotPCA(rld, intgroup = "Condition", return = TRUE)
ggplot(x, aes(x = PC1, y = PC2, color = Condition, label = name)) +
geom_point() +
geom_text()
# PCA with prcomp()
pca <- prcomp(t(assay(rld)))
summary(pca)
var <- pca$sdev^2
pve <- var/sum(var) # % var explained
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# PCA with prcomp() taking 500 genes with higher variance
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(rld)[select, ]))
pve <- pca$sdev^2/sum(pca$sdev^2)
as_tibble(pca$x, rownames = "Sample") %>%
select(Sample, PC1, PC2) %>%
left_join(as_tibble(coldata, rownames = "Sample")) %>%
ggplot(aes(x = PC1, y = PC2, color = Condition)) +
geom_point() +
xlab(paste0("PC1:", round(pve[1]*100), " % variance")) +
ylab(paste0("PC2:", round(pve[2]*100), " % variance"))
# Size Factors
sizeFactors(dds)
# Compare with sequencing depths
SF <- enframe(sizeFactors(dds), name = "sample", value = "Size_Factor") %>%
ggplot(aes(x = sample, y = Size_Factor)) +
geom_bar(stat = "identity")
SD <- enframe(colSums(assay(dds, "counts")), name = "sample", value = "n_reads") %>%
ggplot(aes(x = sample, y = n_reads)) +
geom_bar(stat = "identity")
library("patchwork")
SF / SD
# Available results
resultsNames(dds)
dds$Condition
dds$Condition <- relevel(dds$Condition, ref = "mock")
dds$Condition
dds <- DESeq(dds)
resultsNames(dds)
res <- results(dds,
name = "Condition_KD_vs_mock")
res_tbl <- as_tibble(res, rownames = "ENSEMBL")
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl %>%
arrange(padj) %>%
head(5)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes <- res_tbl %>%
arrange(padj) %>%
head(5) %>% pull(ENSEMBL)
rowMeans(counts(dds[best_genes], normalize = TRUE))
rowMeans(counts(dds[best_genes], normalize = TRUE))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients(dds[best_genes])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 1:3], normalize = TRUE)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2(rowMeans(counts(dds[best_genes, 4:6], normalize = TRUE)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients(dds[best_genes])[,1] + coefficients(dds[best_genes])[,2]
# 6. How many genes have no padjusted value? Why?
summary(res$padj)
# filter genes with no padjusted values
res_tbl %>%
filter(is.na(padj))
head(metadata(res)$filterNumRej)
as_tibble(metadata(res)$filterNumRej) %>%
ggplot(aes(x = theta, y = numRej)) +
geom_point() +
geom_vline(xintercept = 0.7169,
color = 'red')
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl %>%
filter(baseMean == 0) %>%
nrow()
# Number of genes filtered by the independent filtering procedure
res_tbl %>%
filter(baseMean > 0 & baseMean < metadata(res)$filterThreshold) %>%
nrow()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF <- results(dds,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_no_IF, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr <- 5
# keep genes with counts > 5 in 3 or more samples
keep <- rowSums(counts(dds, normalized = TRUE) >= filtering_thr) >=3
dds_bis <- DESeq(dds[keep, ])
res_bis <- results(dds_bis,
name = "Condition_KD_vs_mock",
independentFiltering = FALSE)
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(is.na(padj)) %>% nrow()
as_tibble(res_bis, rownames = "ENSEMBL") %>%
filter(!is.na(padj)) %>% nrow()
# Histogram of pvalues
hist(res_tbl$pvalue)
res_tbl %>%
filter(pvalue > 0.8 & pvalue < 0.85) %>%
head(10)
res_tbl %>%
filter(baseMean > metadata(res)$filterThreshold) %>%
pull(pvalue) %>%
hist()
# plot MA
plotMA(res, alpha = 0.05)
# Volcano plot
res_tbl %>%
filter(!is.na(padj)) %>%
ggplot(aes(x= log2FoldChange, y = -log10(padj))) +
geom_point(size = 0.5)
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
This diff is collapsed.
@DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
NTGANGCTTAGGTAGGGGTAGGGAGACAGGGAAGTAACAGAGGTACTGGACATGCCAGGCAGAAGAAACAGCAGTACAGGACATGGAAGCAAACAACATGG
+DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
#0;9#4=9@<><>?????@=@???@???????<?>?=?=?<?????????????????????<@<???=?<=?<::<==<;<<==<;<<<<==<<<<<<=<
@DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
CCTGNACCCAGTAGAGAAAGCCCTTCGAGATGCCAAACTAGACAAGTCACAGATTCATGATATTGTCCTGGTTGGTGGTTCTACTCGTATCCCCAAGATTC
+DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
@@@D#2ADHHHAAF<CFHIGHIIGGIHIECGFAHDHIIIGGGIBGG?FHHHGGEHIH=FEGDDGI@GIIGIHHEHEDDBACCDCECB@BCBCCBCBBBCC@
DRR016707.26395247 147 1 19626495 60 101M = 19626490 -106 GTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGG DDDDDDDDDDDDDEEEEEEEFFFFFFHCHHHJJJHHJJJJJJJJJJJJIHFJJIHJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJHHHHHFFFFFCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.26395247 99 1 19626490 60 101M = 19626495 106 CTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTG CCCFFFFFHHHHHJIJJJJIJJJJJJJJJJIJJJJJJJJJJJJJJJHHIHIJJJIJJHIJJJIIIIIIJGIJJJJDHJJJHHHHHHHFFFFFFEDACEEDD AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 163 1 19626487 60 101M = 19626493 107 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAG CC@FFFFFHFHFFHGGJCIFJGHIIIIGIIIIJIIJJJIGIIIIIIGII?:BFHGHIIIIGEHHHGIIDGGH@GGIGHJGHHHHCEHDEFDDFFEEECCEC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 83 1 19626493 60 101M = 19626487 -107 CTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAA CDDDDDDDDDDDDECEEEEEFFFFD?=HHHHGIIGEGIGIIIHIGDIHHGGGIIHGHIIIIGIIIGIGEIIGGGIIIIGIIIIGHDIIFHHHHFFBDD?C@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.42561021 161 1 19626495 60 100M1S = 19626494 0 GTAACAATGTTATCAGTAATGCTTTAAACTAAAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGA ?8?B1:BDD,,2<:C<2A<<,+A?CBFFAE<+<+2A?EEBD99:D<*?B:*:D9B:DD/BDED@)=8=B;@=)).=(=@;7A)7.==>;3);@AAA>A@3; AS:i:-8 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:30C0C68 YT:Z:UP NH:i:1
DRR016707.42561021 81 1 19626494 60 101M = 19626495 0 TGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGACACCAAGTCTGTTTCTTGTTTTGTATTTTCGCTCTGGAAGTTGTAAG (9(??==3??>==;5(55((6.((9>;..;67>)>).5(A;;>=/))8.=>9)((?AB?92=00*;=::1)=:3=74=A7<)0<+<AAAB?=<+4AA<;== AS:i:-5 ZS:i:-19 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:54A29T16 YT:Z:UP NH:i:1
DRR016707.43465443 163 1 19626483 60 101M = 19626486 104 TGCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG B@@FAFFFHHHHHHIIIIJJJHJJIJJIIJJJJJJJFJJJJJJJJJJJJJHIJFHJJIJJJIJJIIJIJJHIHHIIIJJJJJHEHHHFEFFFFFEDEEEEE AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:-1 YT:Z:CP NH:i:1
DRR016707.43465443 83 1 19626486 60 100M1S = 19626483 -104 CATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAG DDDDDDCDDDDCCDEEEEDFFFFFDBHHHHHGJJGHDJIJJIGIHJGGJJJJJJJJJJJJJHG?JJJJJJJJJJJIIJJJJIJJJJJIGHGHHFFFFFCCC AS:i:-1 ZS:i:-12 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 147 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG DDDDDCDDDDDDEEDEEEEFFFFFFHHHHHHJJIJIGHFIHJJJIHFIHHFJJJJJIJJJJJJJIHJJJJJIJJJJJJJJJJJJJJJJHHHHHFFFFFBB@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 99 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG CCCFFFFFHHHHHJJJJJJJIJJJJJJJJJJJJJJJJJJJJJIJJJJJJIJJDGIJJJJJJJJCGIJJJIIHHJJJHHIJJJHHHHHDFFFFFFEEEEEED AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.68628658 163 1 19626487 60 97M4S = 19626487 -105 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGAGAT CCCFFFFFGHHHHJJJJJJIJJJJJJIJJJJJJJJJIJGJIIEIIJJJIE:DGIJJJJJJJJJJJIJIHIJGGHJJJJJHJHHHHHHFFFFFFFEEEDEDC AS:i:-4 ZS:i:-17 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
DRR016707.68628658 83 1 19626487 60 4S97M = 19626487 105 ATCTATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG EDDDDDDDDCDEEEEEEEFFFEEFHCHHHHHIGJJJJIHCIJJJJJIHIJJJIFIJJIJJJJJIIHFJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFCCC AS:i:-4 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
> SEQUENCE 1 (Fri Sep 18 13:05:43 2020)
GCGCCACCTTCACCTGTTGTCGAGTCCGCCGACACCAAGAACCTACCGCA
CCCTATTCCTTGGGTTCATTATAGTGTCAAGCGACACGACAAACGTAGCG
GAGATGGTGTCTCGAGGAATGATCTTCAGATATGATCTCAGAGAGGCGGT
CCGTCTTGGAAACTGGCACGGGTAACGCTGCTTCTTCCCGGCAACGAGCA
TCAAAATCACTCGGGGGATCATCCTGGGCCGTCCTCTTTAATTTAGTGGC
CATGGCGCAATATCACAGGGTACAGTCGCAACGATTGCTAAAGGAGTGAG
GAGATTTTCGAGTCGGCATAGATGCTAATACCATGCCACGGAGCGTAATC
TTGCCGCATCCCTGGTTACTTCCATTTCTCCAGCCATGCGACGGAAATTT
GCGCTGACCCTGAGGCATTATCAACTTCCGGTAGAACAGCTATAGCATCG
CCTCGTGCGCCTTGCCTTTTTAGCTATGAAACCTTCGTCATCACAGCGAG
CTGCCATGCTGGCGCCACTGTACAATCCTTGTCGCGCAGCCTACGTGCAC
AAAGCCGCGCGTAACCACTCACTTGCTGTCTGTTCTGTTCAACTGCAAGG
AAACTCGGAACGTGGACAACCCGGAGGTATGGCAAGCTGCCTTAATACCT
GAAAGAAGCTTTTGACACAAAAAGACTGGGCATATTGTCTGGGGTATATC
GCGCCAGCAGCCCCACGTGAAACACGCAGCATAAGTGAGCTGTGACGGTA
GGAAAGACGGAACTCACAGCGTCTCCGATCCAAGGAAGAGAGTTAATATG
TACGAGTTGTCAGGAATTACGTGCACTCGCACTATTACGTAATAACCTAC
TTATCGTTGAGGTTATCTGTGTCGCTCGGAAGAGTCATCAATTGGTGGTT
GCGGACCCGGCTCCTCAGGATCTTACCACTTTGTATACAGTATCAAAACA
GGTGCACCGTAATTATTCGTGACGAGAGGAGGCTAAAACTCTGACTGCCA
AGACCCCTGGGGCCGTTCTAACGGCGCATGCGGCTACCGTTGCTAAGTTG
AACATCGGTCACAAGAGATCGTTAAAGTCAGAGGTACTCGACGGCTCATT
GGGCCCCGACTGTTACTACCAAGACATCGGCGGTTTTGCACTATATAGTC
ACCGAGATTCTAGAGCCAGCGATGGGTGCCCCCAACCTGGGCCGGATTTA
AGAATTGATCGACAAATTATTCATCACATCGCTGTCATATTCGTGGACAT
AGTGTGGTATACCTTGCCTACATGCACTCGTTCCATAATAATTTCTTTCC
TGGCAATTCGGGCTTGTTTCACGATAGGTCGACCCATCTCAAGTTCTGAA
ATGATTGTTCTTGGGGACCGGTACCTTAGTGTATAGATACTTCTTCACCC
AATGCAGCACGGTACAATAGGTCCTAGGGAATGGGTGGTAAAAGGCTTCT
ATCAACACGCGCTTTCCTAGGGACCCATGTGCTCCGCATAGTGGAATGAA
ACTTGCTGATCTTTACACTATGTAAGCCTCTAACTGTGGATTGGAGTAAT
TTGGGGTGGTGCGCTAGTATACCCTCCGCACGTAACAACAACTACGGTGT
CTAGTACGGGCACCTCACCGAGAGCTGTTTCGGTTTGAGCCGGCTTCTTG
AATAGTAGCGAGATGGCGCTAAATTGATCCCGGTCTCCCAATCCGTAGTC
CAGCATGGCCGAGGTAAGAGGGAGAACCGGCTCTTTCGCAGTCAAGCGGT
GAAACTACAGATCCAGCTAAGTCATACCTGGCAATTCTAGGGTCCGTCGC
ACCCTCAATTAAGTACGAGGTTTGTGACCAGACTTCAGCATCACCATTTA
TCTGGGAGTTGGTACGTCAATCTGGTACCATCAGTCGCAATTTCTCAGAA
TAATACGCTGGCAAACCAGCTACTCCACTACACATGTCCCCCCCTCAAGG
GTCTCAAATTTGTAGGTTATTGTATTTGCGCGTGTGGGCACTACAGTGAA
CTTGTCGCTTGTGCGTCTAGGTGTTTCTGATTGAACTAATTACTGCACCA
AGTTCGCTGATAGTCAGCGACAGGCTCACGTAGCATGCCAAGACCGACAA
AAACGCCTTCTCTTAGGTTACATATAATCGGCGTGCCCGAAGAAGGCTGA
CACCTTCTCCGCCGCCTAAGAATGGGCCATTATAGAAAGCGCGATATCTA
CAGCCGTCCGTTCTCATCGGATGGCTGCAGAACATTCGGGTGAGTCTGGT
ATCTCCAAAAGTTTTCTCCTGTTCGATCTCCTGTAACTCAGGCCCTCAAT
CCGTCTGTCACCCAGTTTGGACTCAAATGTGGGGCTAAACCTGTCGGTAC
CCATACACATCTCAATGCCCCGAAGGTTACACTGACACGCCCTAAAGGCA
AATGTACAATTTCGAGTTACAAGCGGCCCCTTGCTTTGACAGATTCCGGT
TTGACAGAGTAAATTTGTGCGGTCGGTTTTATCGCTTGTAGTCGATTAGC
> SEQUENCE 10 (Fri Sep 18 13:05:43 2020)
ATACACTTTTTGGAGTTCGCCTCATAGGTGAGCAGACTCGTCAAACAAGT
CCTAGATGAGGCCGAATCTGACTGCAGCCATAGTTACCTGTACATCGCTT
ATGCTTCGAAGGCCCAATGCTTCACGAACACAGTATGGCAGCAGTTTTGT
TCCTGACGCGAAGGTCAGGCCGTAGTACCGGTTCGTTGTAGTGTCCTGTG
TGCTCCTGTCTCGACCCTTATCGCAAGAACACCTCGTCGGTGCGTCGATG
GGCGCAGAACTATCTTTCTTTGTAGTTTCTGACGAACCCAGAGAATTAAA
CTGGGTGAGAAGATAGCACTAGTTTTTTGCCTAAGAAAGATAACCACAGC
CTCACGTACTTAGTTCGCATTTAACAATAAATTGGGGATTTGAGATCGTA
ATTTCGCCCCATAACCTCCGCCAGTCGTAAAGGTAAAAGTGCACCTGCTG
TCATGAATGCAGTCTGCTTCGGATGCCCGGCAACTGTAGGGGCATTAAGC
CTTTAGTGGATACACGCTACCCCATTATAGGCTGGTCAAAGCTGTTCCGA
GCCCATCCAAGCTGTCTGAACTACCCTCTCCAGATGTTCATGATAGTTGC
TCAGATAAGATCGACACTCTCCCCGAGTGTGCTAGTTAGCGGATTCTGCG
TCCAAGGCCTCATGTTTTATAAATTGTGGGAAAGAAAGGACCTTAAGCCG
GGGGACAGTTTTAGGCGAACTCTAATCAACTTCACGTGCCTCCCTCACCC
GACGCTGTTCGGAATTCTGCCCGAGGTTTTGGGCCTTACATACACAAAGG
AGACACCACTACGCGCCTCTCCCAATGACGGGTGGGAGCCCATGGGTGAA
TTATTTTGTCGCAGCTCTCAATTAAGGCGATGCATATAGGTCATACTAGG
GCCGAGGCGCCCGATTACAGGATGAAAACCGTTGACAACCTCCAGAGAGC
TCTAGGAAATGGGTATTGAATAGCAGTATGCAACACCCTACATTCCGAAC
TTAGTAGGATTAACTTCGATCAGCAACGACGTGTATCACCACAGCCGACG
TTCGGACCTTTATAAGAGTCAAATTGCCAGAATTCGTTCCGTGAAGTTCA
AAATCACTGACAAAACGTTCGCGACAAATGGTGCGTGTGTCGACGAATTG
AGTTGTCACACGCCAACAATATAACACCTTTGGTGTATACTCGATGACAC
GCAAAATCTTCTGATCGTTTTGACGATGTTTGACATCAAACTCACTGCAT
AACGCCGGGTATCACGGCTTACAGGGGATTACTACCTCTCATAACGGGAG
ATACCTGAAGTGGCCTCGGGTAATTTATACTAGCACTATACAGGGAGACT
ACTAAAGCTCCTAGGGGGCCATTTATGTTAACCGAATGCCTCGTTATACC
CAACGGGGCCGTTACAACTACTCACGCACCGCCAGACGACATGGCACCCT
TCCTGCACTTAGTGCACCCTAGGACGCTCCTCTGGACGAACTCCCCGTAA
CCGGCTTTCGAGGCGAACAATAATCCCTGCGTCAGCGGGTTCGTCCACGG
CTAATTGACGGGACGGTCACCAGGGCGTACGTTCCCCCCAGTTCCTATAT
AAGAGGTCAAACTGTGAACTGCGCACAGTGTCTCAGAGCTCTTGTTTAGC
TAACCTCATACTGCGAAAGCAGCCACCGGCATTATGCATAGGTGAGAGCT
ATAAGTCATGACCAAAGTGAGCTAATATTGTGGCATAAAGGATTAGGAAA
GACTCGTGTCTCATATGCACTGAGCTATCTTCGAGAAAATCATTTTCAAC
GCCACTTGCGCATTCCTTCCACCCACTCCTCCATATTTTAGGGAAGGATA
CCGCGGAGGATCAAGCCGGAGAGTCACAGATGTGTCACCGACGTGATACT
TTAACGTCGACAAACTTTTCAGGATGAGAGTTCTGGCTTCCTCGTCTGGG
GATAATGGCTCCTAGTGTTGCCCCACTTGGGCTAACCTTGAGACTGACAG
AAAGTCTCGCGCTTCAACGGCCGATCTGTAGGATGGCGGTAGTGGAATTA
ACTCCTAGAGACTCGTGCTCTCAAGTACAGCTTCTACGAGCTATTTCCTC
ACTCAGCATCCCATTGCCCATGTATATTTAGCGTATCGTTTGGTTAATCG
GAAAGGGCCCATTTCTCGCTAGGATGACGACCCCGTCTAAAAATCGTAGA
TAAATCCGCGCTGCTACATTTATACCATGCATTAGGGCCACCTAGGGACT
CCACAAGGTACCTTGGGTCACATATGGGTCGAAATTCCCCATTATAGCTC
TACTCAGCGGTTTCCGCAGAAACGGCTAGGTTTCATCGCTCGACTTACAA
TTGGTCTCGGACCACTCTTAGACTAGGTATAAGCGAAATAATCGGGACCT
GGCCCTCCTCCGTGCGCCCTGAACCATCTACTGTTCGAGGGCAGTAGTGT
GTTGACTCAGGCGATGGAAGATGTACTATAGGGGGATCCCCAAAAACAGT
> SEQUENCE 100 (Fri Sep 18 13:05:43 2020)
CGTTTCATGCAAGTATTGGGGTTTAGGCGTAGAGCACGGCATACCGAGGT
GAGGTATGCCTAGACCGGGTCTTCTTGGATCCCGGTGCAACCCAAAGAGG
TCCCTAACGGCGCACGACTGATCTATTCTATATATGCATGTTTGAGCTCA
AAGTCGCTGAGAATTAAGTGGAATAGGGGGGGATGTAAAACTGCTGCCAG
GAAGCATCGCTGGTTAAGCGTCAAATCGACGGTGAGGATGACTCGCGAAC
TCGCTGAAGGCCATGACGTAAGACGTATCTAGAGGTTAGTCAGCAAGCTC
ATAGACTGAAAAAAATGTTGTACCCACTTTCCTCGTTAAACGCGCTGTGG
CTGACAGCCCACATCCGCCATGAAAGGATTCCAGGTGAGATTGTACAGTG
GCTAGTGTACGGCTCGTACCGGAAAGACTCGTTACGCCGCGCCACCTAAA
GGTTCAAACGACAGTTCGGCCAGCTTTTTACGCTCAAGGGGAAAATACCC
TGCTGTAACTTCATTGTTCGAAGCGCCTCTTAAAAACTGGCCCATAAGAG
CCACGTGAACCACTTAGACATCTGTAACCACATTATAGGAACTAACTCAG
CACCCTGTCATGTACTCACAGCCCTGGCCTATCCGACCGGAGCCCGAATA
CTTCCTTACTTCGACGACCGGAATGAGGAGAGGGGTTGGCCTTTAGCACT
CTGTCATGGTGGGGAGAGTTGGAGTATCAGTTACTGTTACCTTCCACTGA
TCATCCTGTAGTTTTAGCCGGTCCTGGGGGTAAGCATAGATGTGAGAATC
TATGAAGGACGGCCTGTCTCATGGAGGAAAGAGTTTACTTACGAGTCGGA
ACAAGCCCTACGTATGAGACCAGCGAGCACCCAACTAATTCGGCGAACCT
ATTTTTAAAGCCGCAACTGTCGGGCCTAGAACGTATCTAAAACAACTCGC
CCTGTATACCAGTGGTACCAAGGGGTGTGCAGTGATCAGGTATCTCACAT
TCTGTAAGTGGTATGGTAGTATCTCCTGGATTCCAAACTTAGTGTGGGTT
GCCAACTCTAGCCGCGACCTATCGGATGCTCTCGCGCGACAGTTCCGACT
CGATAACGGCGCTAACTGATTTTTCGCATCCGGTAACTCGTCGATAACCG
AAATCTACCGTTGATAGGTGGAGGACAAAGCAAAGACCTATCCTCAATTG
AAGGTGTCAGCTAACATACTTGTCGGCCGCCACTAAACAATCGCTTTGGT
GTCCCGGATATTGTTCAGCGTCAGTATTCCGGATCACGGGGGTTCTACTG
AACTCTGTTGCGCTAGAACATATAGATTGCCAAGCCTCATGACAACGCCA
AGGGTGAGGTCGTCGGTGTTTGGGCAGAAGCAGGTACCCGATATGCCCGA
CGCGTCTTTGATCGCCAGCCAACCATGAGAGTACTAGTTGCGAACGCGTG
GTCGCTACCAACGAAAGTGGTCATTAATAGCGTCAGATCCTTCCCCGTTT
AAACCGGCGATCGGTTGTCCACCTTTACATCAGTTTTGACAGTAAATTAT
ACGACTCACATCAAGGTTCAGAAGCCGCTCCCAGGCCTGGTCCACGACGG
CTCGCCGGCTACCGTGCTCAAAAACTGAGGGAGACATAGTCGGCAGAATT
TTCTAAAGGACACACTTTTCCCTATGGTAGGCGATAGCATCTAAAAGTTT
TAAGTGCATTCTGCCTCGAACCGAGATTCGGCTAAAAAGGAGTAGATGAA
AGACATGCGTACGAAGCAGGTTATAAGTTTCCGAATAATGATTCCAACCA
AATCTTTGAGCGGGCGCCACCATTACTAGATTGGGTCAGCAACAGGAAAT