Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Axelle Loriot
WSBIM2122-20200917
Commits
af5e96bf
Commit
af5e96bf
authored
Oct 01, 2020
by
Axelle Loriot
Browse files
Auto-saving for axelle.loriot on branch master from commit
38f4ae17
parent
38f4ae17
Pipeline
#79941
passed with stage
in 29 seconds
Changes
211
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
R01_deseq2.R
0 → 100644
View file @
af5e96bf
library
(
DESeq2
)
library
(
tidyverse
)
load
(
"wsbim2122_data/deseq2/counts.rda"
)
load
(
"wsbim2122_data/deseq2/coldata.rda"
)
coldata
head
(
counts
)
# Create dds object
dds
<-
DESeqDataSetFromMatrix
(
countData
=
counts
,
colData
=
coldata
,
design
=
~
Condition
)
head
(
assay
(
dds
))
colData
(
dds
)
rowData
(
dds
)
# Inspect counts distribution
as_tibble
(
assay
(
dds
))
%>%
gather
(
sample
,
value
=
counts
)
%>%
ggplot
(
aes
(
x
=
log2
(
counts
+
1
),
fill
=
sample
))
+
geom_histogram
(
bins
=
20
)
+
facet_wrap
(
~
sample
)
# Run DESeq2
dds
<-
DESeq
(
dds
)
#PCA with plotPCA
rld
<-
rlogTransformation
(
dds
)
plotPCA
(
rld
,
intgroup
=
"Condition"
)
# Values can be extracted to customise the plot
x
<-
plotPCA
(
rld
,
intgroup
=
"Condition"
,
return
=
TRUE
)
ggplot
(
x
,
aes
(
x
=
PC1
,
y
=
PC2
,
color
=
Condition
,
label
=
name
))
+
geom_point
()
+
geom_text
()
# PCA with prcomp()
pca
<-
prcomp
(
t
(
assay
(
rld
)))
summary
(
pca
)
var
<-
pca
$
sdev
^
2
pve
<-
var
/
sum
(
var
)
# % var explained
as_tibble
(
pca
$
x
,
rownames
=
"Sample"
)
%>%
select
(
Sample
,
PC1
,
PC2
)
%>%
left_join
(
as_tibble
(
coldata
,
rownames
=
"Sample"
))
%>%
ggplot
(
aes
(
x
=
PC1
,
y
=
PC2
,
color
=
Condition
))
+
geom_point
()
+
xlab
(
paste0
(
"PC1:"
,
round
(
pve
[
1
]
*
100
),
" % variance"
))
+
ylab
(
paste0
(
"PC2:"
,
round
(
pve
[
2
]
*
100
),
" % variance"
))
# PCA with prcomp() taking 500 genes with higher variance
rv
<-
rowVars
(
assay
(
rld
))
select
<-
order
(
rv
,
decreasing
=
TRUE
)[
seq_len
(
min
(
500
,
length
(
rv
)))]
pca
<-
prcomp
(
t
(
assay
(
rld
)[
select
,
]))
pve
<-
pca
$
sdev
^
2
/
sum
(
pca
$
sdev
^
2
)
as_tibble
(
pca
$
x
,
rownames
=
"Sample"
)
%>%
select
(
Sample
,
PC1
,
PC2
)
%>%
left_join
(
as_tibble
(
coldata
,
rownames
=
"Sample"
))
%>%
ggplot
(
aes
(
x
=
PC1
,
y
=
PC2
,
color
=
Condition
))
+
geom_point
()
+
xlab
(
paste0
(
"PC1:"
,
round
(
pve
[
1
]
*
100
),
" % variance"
))
+
ylab
(
paste0
(
"PC2:"
,
round
(
pve
[
2
]
*
100
),
" % variance"
))
# Size Factors
sizeFactors
(
dds
)
# Compare with sequencing depths
SF
<-
enframe
(
sizeFactors
(
dds
),
name
=
"sample"
,
value
=
"Size_Factor"
)
%>%
ggplot
(
aes
(
x
=
sample
,
y
=
Size_Factor
))
+
geom_bar
(
stat
=
"identity"
)
SD
<-
enframe
(
colSums
(
assay
(
dds
,
"counts"
)),
name
=
"sample"
,
value
=
"n_reads"
)
%>%
ggplot
(
aes
(
x
=
sample
,
y
=
n_reads
))
+
geom_bar
(
stat
=
"identity"
)
library
(
"patchwork"
)
SF
/
SD
# Available results
resultsNames
(
dds
)
dds
$
Condition
dds
$
Condition
<-
relevel
(
dds
$
Condition
,
ref
=
"mock"
)
dds
$
Condition
dds
<-
DESeq
(
dds
)
resultsNames
(
dds
)
res
<-
results
(
dds
,
name
=
"Condition_KD_vs_mock"
)
res_tbl
<-
as_tibble
(
res
,
rownames
=
"ENSEMBL"
)
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl
%>%
arrange
(
padj
)
%>%
head
(
5
)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes
<-
res_tbl
%>%
arrange
(
padj
)
%>%
head
(
5
)
%>%
pull
(
ENSEMBL
)
rowMeans
(
counts
(
dds
[
best_genes
],
normalize
=
TRUE
))
rowMeans
(
counts
(
dds
[
best_genes
],
normalize
=
TRUE
))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients
(
dds
[
best_genes
])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2
(
rowMeans
(
counts
(
dds
[
best_genes
,
1
:
3
],
normalize
=
TRUE
)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2
(
rowMeans
(
counts
(
dds
[
best_genes
,
4
:
6
],
normalize
=
TRUE
)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients
(
dds
[
best_genes
])[,
1
]
+
coefficients
(
dds
[
best_genes
])[,
2
]
# 6. How many genes have no padjusted value? Why?
summary
(
res
$
padj
)
# filter genes with no padjusted values
res_tbl
%>%
filter
(
is.na
(
padj
))
head
(
metadata
(
res
)
$
filterNumRej
)
as_tibble
(
metadata
(
res
)
$
filterNumRej
)
%>%
ggplot
(
aes
(
x
=
theta
,
y
=
numRej
))
+
geom_point
()
+
geom_vline
(
xintercept
=
0.7169
,
color
=
'red'
)
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl
%>%
filter
(
baseMean
==
0
)
%>%
nrow
()
# Number of genes filtered by the independent filtering procedure
res_tbl
%>%
filter
(
baseMean
>
0
&
baseMean
<
metadata
(
res
)
$
filterThreshold
)
%>%
nrow
()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF
<-
results
(
dds
,
name
=
"Condition_KD_vs_mock"
,
independentFiltering
=
FALSE
)
as_tibble
(
res_no_IF
,
rownames
=
"ENSEMBL"
)
%>%
filter
(
is.na
(
padj
))
%>%
nrow
()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr
<-
5
# keep genes with counts > 5 in 3 or more samples
keep
<-
rowSums
(
counts
(
dds
,
normalized
=
TRUE
)
>=
filtering_thr
)
>=
3
dds_bis
<-
DESeq
(
dds
[
keep
,
])
res_bis
<-
results
(
dds_bis
,
name
=
"Condition_KD_vs_mock"
,
independentFiltering
=
FALSE
)
as_tibble
(
res_bis
,
rownames
=
"ENSEMBL"
)
%>%
filter
(
is.na
(
padj
))
%>%
nrow
()
as_tibble
(
res_bis
,
rownames
=
"ENSEMBL"
)
%>%
filter
(
!
is.na
(
padj
))
%>%
nrow
()
# Histogram of pvalues
hist
(
res_tbl
$
pvalue
)
res_tbl
%>%
filter
(
pvalue
>
0.8
&
pvalue
<
0.85
)
%>%
head
(
10
)
res_tbl
%>%
filter
(
baseMean
>
metadata
(
res
)
$
filterThreshold
)
%>%
pull
(
pvalue
)
%>%
hist
()
# plot MA
plotMA
(
res
,
alpha
=
0.05
)
# Volcano plot
res_tbl
%>%
filter
(
!
is.na
(
padj
))
%>%
ggplot
(
aes
(
x
=
log2FoldChange
,
y
=
-
log10
(
padj
)))
+
geom_point
(
size
=
0.5
)
test.R
0 → 100644
View file @
af5e96bf
library
(
DESeq2
)
library
(
tidyverse
)
load
(
"wsbim2122_data/deseq2/counts.rda"
)
load
(
"wsbim2122_data/deseq2/coldata.rda"
)
coldata
head
(
counts
)
# Create dds object
dds
<-
DESeqDataSetFromMatrix
(
countData
=
counts
,
colData
=
coldata
,
design
=
~
Condition
)
head
(
assay
(
dds
))
colData
(
dds
)
rowData
(
dds
)
# Inspect counts distribution
as_tibble
(
assay
(
dds
))
%>%
gather
(
sample
,
value
=
counts
)
%>%
ggplot
(
aes
(
x
=
log2
(
counts
+
1
),
fill
=
sample
))
+
geom_histogram
(
bins
=
20
)
+
facet_wrap
(
~
sample
)
# Run DESeq2
dds
<-
DESeq
(
dds
)
#PCA with plotPCA
dim
(
rld
)
rld
<-
rlogTransformation
(
dds
)
plotPCA
(
rld
,
intgroup
=
"Condition"
)
# Values can be extracted to customise the plot
x
<-
plotPCA
(
rld
,
intgroup
=
"Condition"
,
return
=
TRUE
)
ggplot
(
x
,
aes
(
x
=
PC1
,
y
=
PC2
,
color
=
Condition
,
label
=
name
))
+
geom_point
()
+
geom_text
()
# PCA with prcomp()
pca
<-
prcomp
(
t
(
assay
(
rld
)))
summary
(
pca
)
var
<-
pca
$
sdev
^
2
pve
<-
var
/
sum
(
var
)
# % var explained
as_tibble
(
pca
$
x
,
rownames
=
"Sample"
)
%>%
select
(
Sample
,
PC1
,
PC2
)
%>%
left_join
(
as_tibble
(
coldata
,
rownames
=
"Sample"
))
%>%
ggplot
(
aes
(
x
=
PC1
,
y
=
PC2
,
color
=
Condition
))
+
geom_point
()
+
xlab
(
paste0
(
"PC1:"
,
round
(
pve
[
1
]
*
100
),
" % variance"
))
+
ylab
(
paste0
(
"PC2:"
,
round
(
pve
[
2
]
*
100
),
" % variance"
))
# PCA with prcomp() taking 500 genes with higher variance
rv
<-
rowVars
(
assay
(
rld
))
select
<-
order
(
rv
,
decreasing
=
TRUE
)[
seq_len
(
min
(
500
,
length
(
rv
)))]
pca
<-
prcomp
(
t
(
assay
(
rld
)[
select
,
]))
pve
<-
pca
$
sdev
^
2
/
sum
(
pca
$
sdev
^
2
)
as_tibble
(
pca
$
x
,
rownames
=
"Sample"
)
%>%
select
(
Sample
,
PC1
,
PC2
)
%>%
left_join
(
as_tibble
(
coldata
,
rownames
=
"Sample"
))
%>%
ggplot
(
aes
(
x
=
PC1
,
y
=
PC2
,
color
=
Condition
))
+
geom_point
()
+
xlab
(
paste0
(
"PC1:"
,
round
(
pve
[
1
]
*
100
),
" % variance"
))
+
ylab
(
paste0
(
"PC2:"
,
round
(
pve
[
2
]
*
100
),
" % variance"
))
# Size Factors
sizeFactors
(
dds
)
# Compare with sequencing depths
SF
<-
enframe
(
sizeFactors
(
dds
),
name
=
"sample"
,
value
=
"Size_Factor"
)
%>%
ggplot
(
aes
(
x
=
sample
,
y
=
Size_Factor
))
+
geom_bar
(
stat
=
"identity"
)
SD
<-
enframe
(
colSums
(
assay
(
dds
,
"counts"
)),
name
=
"sample"
,
value
=
"n_reads"
)
%>%
ggplot
(
aes
(
x
=
sample
,
y
=
n_reads
))
+
geom_bar
(
stat
=
"identity"
)
library
(
"patchwork"
)
SF
/
SD
# Available results
resultsNames
(
dds
)
dds
$
Condition
dds
$
Condition
<-
relevel
(
dds
$
Condition
,
ref
=
"mock"
)
dds
$
Condition
dds
<-
DESeq
(
dds
)
resultsNames
(
dds
)
res
<-
results
(
dds
,
name
=
"Condition_KD_vs_mock"
)
res_tbl
<-
as_tibble
(
res
,
rownames
=
"ENSEMBL"
)
## Exercices
# 1. Inspect the results table and identify the 5 “best genes” showing the lowest padjusted value.
res_tbl
%>%
arrange
(
padj
)
%>%
head
(
5
)
# 2. Calculate the mean expression level of these 5 "best genes" using
# the function `count()`. Compare with baseMean values.
best_genes
<-
res_tbl
%>%
arrange
(
padj
)
%>%
head
(
5
)
%>%
pull
(
ENSEMBL
)
rowMeans
(
counts
(
dds
[
best_genes
],
normalize
=
TRUE
))
rowMeans
(
counts
(
dds
[
best_genes
],
normalize
=
TRUE
))
# 3. Extract the ß coefficient of these 5 "best genes" from the GLM
# using the function `coefficient()`. Compare with log2FoldChange values.
coefficients
(
dds
[
best_genes
])
# 4. using the function `count()`, calculate the expression levels (in log2)
# of these 5 "best genes" in mock cells. Compare with ß coefficients.
log2
(
rowMeans
(
counts
(
dds
[
best_genes
,
1
:
3
],
normalize
=
TRUE
)))
# 5. Calculate the expression levels (in log2)
#of these 5 "best genes" in KD cells. Compare with ß coefficients.
log2
(
rowMeans
(
counts
(
dds
[
best_genes
,
4
:
6
],
normalize
=
TRUE
)))
# log2 expression levels in KD cells evaluated from ß coefficients
coefficients
(
dds
[
best_genes
])[,
1
]
+
coefficients
(
dds
[
best_genes
])[,
2
]
# 6. How many genes have no padjusted value? Why?
summary
(
res
$
padj
)
# filter genes with no padjusted values
res_tbl
%>%
filter
(
is.na
(
padj
))
head
(
metadata
(
res
)
$
filterNumRej
)
as_tibble
(
metadata
(
res
)
$
filterNumRej
)
%>%
ggplot
(
aes
(
x
=
theta
,
y
=
numRej
))
+
geom_point
()
+
geom_vline
(
xintercept
=
0.7169
,
color
=
'red'
)
# Evaluate how many genes were really filtered by the independent
# filtering procedure.
# Number of genes with basemean == 0
res_tbl
%>%
filter
(
baseMean
==
0
)
%>%
nrow
()
# Number of genes filtered by the independent filtering procedure
res_tbl
%>%
filter
(
baseMean
>
0
&
baseMean
<
metadata
(
res
)
$
filterThreshold
)
%>%
nrow
()
# Re-run the results() function on the same dds object,
#but set the independent filtering parameter to FALSE.
# Check how many genes have no padj?
res_no_IF
<-
results
(
dds
,
name
=
"Condition_KD_vs_mock"
,
independentFiltering
=
FALSE
)
as_tibble
(
res_no_IF
,
rownames
=
"ENSEMBL"
)
%>%
filter
(
is.na
(
padj
))
%>%
nrow
()
# Imagine another way of filtering genes with very low counts
# filter the data to remove genes with few counts
filtering_thr
<-
5
# keep genes with counts > 5 in 3 or more samples
keep
<-
rowSums
(
counts
(
dds
,
normalized
=
TRUE
)
>=
filtering_thr
)
>=
3
dds_bis
<-
DESeq
(
dds
[
keep
,
])
res_bis
<-
results
(
dds_bis
,
name
=
"Condition_KD_vs_mock"
,
independentFiltering
=
FALSE
)
as_tibble
(
res_bis
,
rownames
=
"ENSEMBL"
)
%>%
filter
(
is.na
(
padj
))
%>%
nrow
()
as_tibble
(
res_bis
,
rownames
=
"ENSEMBL"
)
%>%
filter
(
!
is.na
(
padj
))
%>%
nrow
()
# Histogram of pvalues
hist
(
res_tbl
$
pvalue
)
res_tbl
%>%
filter
(
pvalue
>
0.8
&
pvalue
<
0.85
)
%>%
head
(
10
)
res_tbl
%>%
filter
(
baseMean
>
metadata
(
res
)
$
filterThreshold
)
%>%
pull
(
pvalue
)
%>%
hist
()
# plot MA
plotMA
(
res
,
alpha
=
0.05
)
# Volcano plot
res_tbl
%>%
filter
(
!
is.na
(
padj
))
%>%
ggplot
(
aes
(
x
=
log2FoldChange
,
y
=
-
log10
(
padj
)))
+
geom_point
(
size
=
0.5
)
wsbim2122-20200917.Rproj
0 → 100644
View file @
af5e96bf
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
wsbim2122_data/RNAseq_pipeline/Homo_sapiens.GRCh38.94.gtf
0 → 100644
View file @
af5e96bf
This diff is collapsed.
Click to expand it.
wsbim2122_data/RNAseq_pipeline/OD01_10_featurecounts.tsv.gz
0 → 100644
View file @
af5e96bf
File added
wsbim2122_data/RNAseq_pipeline/example.fastq
0 → 100644
View file @
af5e96bf
@DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
NTGANGCTTAGGTAGGGGTAGGGAGACAGGGAAGTAACAGAGGTACTGGACATGCCAGGCAGAAGAAACAGCAGTACAGGACATGGAAGCAAACAACATGG
+DRR016707.1 DGTKZQN1:354:C2B0UACXX:5:1101:1199:1908 length=101
#0;9#4=9@<><>?????@=@???@???????<?>?=?=?<?????????????????????<@<???=?<=?<::<==<;<<==<;<<<<==<<<<<<=<
@DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
CCTGNACCCAGTAGAGAAAGCCCTTCGAGATGCCAAACTAGACAAGTCACAGATTCATGATATTGTCCTGGTTGGTGGTTCTACTCGTATCCCCAAGATTC
+DRR016707.2 DGTKZQN1:354:C2B0UACXX:5:1101:1245:1922 length=101
@@@D#2ADHHHAAF<CFHIGHIIGGIHIECGFAHDHIIIGGGIBGG?FHHHGGEHIH=FEGDDGI@GIIGIHHEHEDDBACCDCECB@BCBCCBCBBBCC@
wsbim2122_data/RNAseq_pipeline/example.sam
0 → 100644
View file @
af5e96bf
DRR016707.26395247 147 1 19626495 60 101M = 19626490 -106 GTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGG DDDDDDDDDDDDDEEEEEEEFFFFFFHCHHHJJJHHJJJJJJJJJJJJIHFJJIHJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJHHHHHFFFFFCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.26395247 99 1 19626490 60 101M = 19626495 106 CTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTG CCCFFFFFHHHHHJIJJJJIJJJJJJJJJJIJJJJJJJJJJJJJJJHHIHIJJJIJJHIJJJIIIIIIJGIJJJJDHJJJHHHHHHHFFFFFFEDACEEDD AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 163 1 19626487 60 101M = 19626493 107 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAG CC@FFFFFHFHFFHGGJCIFJGHIIIIGIIIIJIIJJJIGIIIIIIGII?:BFHGHIIIIGEHHHGIIDGGH@GGIGHJGHHHHCEHDEFDDFFEEECCEC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.34969377 83 1 19626493 60 101M = 19626487 -107 CTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAA CDDDDDDDDDDDDECEEEEEFFFFD?=HHHHGIIGEGIGIIIHIGDIHHGGGIIHGHIIIIGIIIGIGEIIGGGIIIIGIIIIGHDIIFHHHHFFBDD?C@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.42561021 161 1 19626495 60 100M1S = 19626494 0 GTAACAATGTTATCAGTAATGCTTTAAACTAAAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAAGTTGTAAGA ?8?B1:BDD,,2<:C<2A<<,+A?CBFFAE<+<+2A?EEBD99:D<*?B:*:D9B:DD/BDED@)=8=B;@=)).=(=@;7A)7.==>;3);@AAA>A@3; AS:i:-8 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:30C0C68 YT:Z:UP NH:i:1
DRR016707.42561021 81 1 19626494 60 101M = 19626495 0 TGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGACACCAAGTCTGTTTCTTGTTTTGTATTTTCGCTCTGGAAGTTGTAAG (9(??==3??>==;5(55((6.((9>;..;67>)>).5(A;;>=/))8.=>9)((?AB?92=00*;=::1)=:3=74=A7<)0<+<AAAB?=<+4AA<;== AS:i:-5 ZS:i:-19 XN:i:0 XM:i:2 XO:i:0 XG:i:0 NM:i:2 MD:Z:54A29T16 YT:Z:UP NH:i:1
DRR016707.43465443 163 1 19626483 60 101M = 19626486 104 TGCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG B@@FAFFFHHHHHHIIIIJJJHJJIJJIIJJJJJJJFJJJJJJJJJJJJJHIJFHJJIJJJIJJIIJIJJHIHHIIIJJJJJHEHHHFEFFFFFEDEEEEE AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:-1 YT:Z:CP NH:i:1
DRR016707.43465443 83 1 19626486 60 100M1S = 19626483 -104 CATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGGAG DDDDDDCDDDDCCDEEEEDFFFFFDBHHHHHGJJGHDJIJJIGIHJGGJJJJJJJJJJJJJHG?JJJJJJJJJJJIIJJJJIJJJJJIGHGHHFFFFFCCC AS:i:-1 ZS:i:-12 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 147 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG DDDDDCDDDDDDEEDEEEEFFFFFFHHHHHHJJIJIGHFIHJJJIHFIHHFJJJJJIJJJJJJJIHJJJJJIJJJJJJJJJJJJJJJJHHHHHFFFFFBB@ AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.64407509 99 1 19626484 60 101M = 19626484 -101 GCCATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGG CCCFFFFFHHHHHJJJJJJJIJJJJJJJJJJJJJJJJJJJJJIJJJJJJIJJDGIJJJJJJJJCGIJJJIIHHJJJHHIJJJHHHHHDFFFFFFEEEEEED AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:101 YS:i:0 YT:Z:CP NH:i:1
DRR016707.68628658 163 1 19626487 60 97M4S = 19626487 -105 ATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTGAGAT CCCFFFFFGHHHHJJJJJJIJJJJJJIJJJJJJJJJIJGJIIEIIJJJIE:DGIJJJJJJJJJJJIJIHIJGGHJJJJJHJHHHHHHFFFFFFFEEEDEDC AS:i:-4 ZS:i:-17 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
DRR016707.68628658 83 1 19626487 60 4S97M = 19626487 105 ATCTATTCTTCTGTAACAATGTTATCAGTAATGCTTTAAACTCCAGCACCTGGTTATGCATTTGAAACCAAGTCTGTTTCTTGTTTTGTATTTTCTCTCTG EDDDDDDDDCDEEEEEEEFFFEEFHCHHHHHIGJJJJIHCIJJJJJIHIJJJIFIJJIJJJJJIIHFJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFCCC AS:i:-4 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:97 YS:i:-4 YT:Z:CP NH:i:1
wsbim2122_data/data/seq_1.fas
0 → 100644
View file @
af5e96bf
> SEQUENCE 1 (Fri Sep 18 13:05:43 2020)
GCGCCACCTTCACCTGTTGTCGAGTCCGCCGACACCAAGAACCTACCGCA
CCCTATTCCTTGGGTTCATTATAGTGTCAAGCGACACGACAAACGTAGCG
GAGATGGTGTCTCGAGGAATGATCTTCAGATATGATCTCAGAGAGGCGGT
CCGTCTTGGAAACTGGCACGGGTAACGCTGCTTCTTCCCGGCAACGAGCA
TCAAAATCACTCGGGGGATCATCCTGGGCCGTCCTCTTTAATTTAGTGGC
CATGGCGCAATATCACAGGGTACAGTCGCAACGATTGCTAAAGGAGTGAG
GAGATTTTCGAGTCGGCATAGATGCTAATACCATGCCACGGAGCGTAATC
TTGCCGCATCCCTGGTTACTTCCATTTCTCCAGCCATGCGACGGAAATTT
GCGCTGACCCTGAGGCATTATCAACTTCCGGTAGAACAGCTATAGCATCG
CCTCGTGCGCCTTGCCTTTTTAGCTATGAAACCTTCGTCATCACAGCGAG
CTGCCATGCTGGCGCCACTGTACAATCCTTGTCGCGCAGCCTACGTGCAC
AAAGCCGCGCGTAACCACTCACTTGCTGTCTGTTCTGTTCAACTGCAAGG
AAACTCGGAACGTGGACAACCCGGAGGTATGGCAAGCTGCCTTAATACCT
GAAAGAAGCTTTTGACACAAAAAGACTGGGCATATTGTCTGGGGTATATC
GCGCCAGCAGCCCCACGTGAAACACGCAGCATAAGTGAGCTGTGACGGTA
GGAAAGACGGAACTCACAGCGTCTCCGATCCAAGGAAGAGAGTTAATATG
TACGAGTTGTCAGGAATTACGTGCACTCGCACTATTACGTAATAACCTAC
TTATCGTTGAGGTTATCTGTGTCGCTCGGAAGAGTCATCAATTGGTGGTT
GCGGACCCGGCTCCTCAGGATCTTACCACTTTGTATACAGTATCAAAACA
GGTGCACCGTAATTATTCGTGACGAGAGGAGGCTAAAACTCTGACTGCCA
AGACCCCTGGGGCCGTTCTAACGGCGCATGCGGCTACCGTTGCTAAGTTG
AACATCGGTCACAAGAGATCGTTAAAGTCAGAGGTACTCGACGGCTCATT
GGGCCCCGACTGTTACTACCAAGACATCGGCGGTTTTGCACTATATAGTC
ACCGAGATTCTAGAGCCAGCGATGGGTGCCCCCAACCTGGGCCGGATTTA
AGAATTGATCGACAAATTATTCATCACATCGCTGTCATATTCGTGGACAT
AGTGTGGTATACCTTGCCTACATGCACTCGTTCCATAATAATTTCTTTCC
TGGCAATTCGGGCTTGTTTCACGATAGGTCGACCCATCTCAAGTTCTGAA
ATGATTGTTCTTGGGGACCGGTACCTTAGTGTATAGATACTTCTTCACCC
AATGCAGCACGGTACAATAGGTCCTAGGGAATGGGTGGTAAAAGGCTTCT
ATCAACACGCGCTTTCCTAGGGACCCATGTGCTCCGCATAGTGGAATGAA
ACTTGCTGATCTTTACACTATGTAAGCCTCTAACTGTGGATTGGAGTAAT
TTGGGGTGGTGCGCTAGTATACCCTCCGCACGTAACAACAACTACGGTGT
CTAGTACGGGCACCTCACCGAGAGCTGTTTCGGTTTGAGCCGGCTTCTTG
AATAGTAGCGAGATGGCGCTAAATTGATCCCGGTCTCCCAATCCGTAGTC
CAGCATGGCCGAGGTAAGAGGGAGAACCGGCTCTTTCGCAGTCAAGCGGT
GAAACTACAGATCCAGCTAAGTCATACCTGGCAATTCTAGGGTCCGTCGC
ACCCTCAATTAAGTACGAGGTTTGTGACCAGACTTCAGCATCACCATTTA
TCTGGGAGTTGGTACGTCAATCTGGTACCATCAGTCGCAATTTCTCAGAA
TAATACGCTGGCAAACCAGCTACTCCACTACACATGTCCCCCCCTCAAGG
GTCTCAAATTTGTAGGTTATTGTATTTGCGCGTGTGGGCACTACAGTGAA
CTTGTCGCTTGTGCGTCTAGGTGTTTCTGATTGAACTAATTACTGCACCA
AGTTCGCTGATAGTCAGCGACAGGCTCACGTAGCATGCCAAGACCGACAA
AAACGCCTTCTCTTAGGTTACATATAATCGGCGTGCCCGAAGAAGGCTGA
CACCTTCTCCGCCGCCTAAGAATGGGCCATTATAGAAAGCGCGATATCTA
CAGCCGTCCGTTCTCATCGGATGGCTGCAGAACATTCGGGTGAGTCTGGT
ATCTCCAAAAGTTTTCTCCTGTTCGATCTCCTGTAACTCAGGCCCTCAAT
CCGTCTGTCACCCAGTTTGGACTCAAATGTGGGGCTAAACCTGTCGGTAC
CCATACACATCTCAATGCCCCGAAGGTTACACTGACACGCCCTAAAGGCA
AATGTACAATTTCGAGTTACAAGCGGCCCCTTGCTTTGACAGATTCCGGT
TTGACAGAGTAAATTTGTGCGGTCGGTTTTATCGCTTGTAGTCGATTAGC
wsbim2122_data/data/seq_10.fas
0 → 100644
View file @
af5e96bf
> SEQUENCE 10 (Fri Sep 18 13:05:43 2020)
ATACACTTTTTGGAGTTCGCCTCATAGGTGAGCAGACTCGTCAAACAAGT
CCTAGATGAGGCCGAATCTGACTGCAGCCATAGTTACCTGTACATCGCTT
ATGCTTCGAAGGCCCAATGCTTCACGAACACAGTATGGCAGCAGTTTTGT
TCCTGACGCGAAGGTCAGGCCGTAGTACCGGTTCGTTGTAGTGTCCTGTG
TGCTCCTGTCTCGACCCTTATCGCAAGAACACCTCGTCGGTGCGTCGATG
GGCGCAGAACTATCTTTCTTTGTAGTTTCTGACGAACCCAGAGAATTAAA
CTGGGTGAGAAGATAGCACTAGTTTTTTGCCTAAGAAAGATAACCACAGC
CTCACGTACTTAGTTCGCATTTAACAATAAATTGGGGATTTGAGATCGTA
ATTTCGCCCCATAACCTCCGCCAGTCGTAAAGGTAAAAGTGCACCTGCTG
TCATGAATGCAGTCTGCTTCGGATGCCCGGCAACTGTAGGGGCATTAAGC
CTTTAGTGGATACACGCTACCCCATTATAGGCTGGTCAAAGCTGTTCCGA
GCCCATCCAAGCTGTCTGAACTACCCTCTCCAGATGTTCATGATAGTTGC
TCAGATAAGATCGACACTCTCCCCGAGTGTGCTAGTTAGCGGATTCTGCG
TCCAAGGCCTCATGTTTTATAAATTGTGGGAAAGAAAGGACCTTAAGCCG
GGGGACAGTTTTAGGCGAACTCTAATCAACTTCACGTGCCTCCCTCACCC
GACGCTGTTCGGAATTCTGCCCGAGGTTTTGGGCCTTACATACACAAAGG
AGACACCACTACGCGCCTCTCCCAATGACGGGTGGGAGCCCATGGGTGAA
TTATTTTGTCGCAGCTCTCAATTAAGGCGATGCATATAGGTCATACTAGG
GCCGAGGCGCCCGATTACAGGATGAAAACCGTTGACAACCTCCAGAGAGC
TCTAGGAAATGGGTATTGAATAGCAGTATGCAACACCCTACATTCCGAAC
TTAGTAGGATTAACTTCGATCAGCAACGACGTGTATCACCACAGCCGACG
TTCGGACCTTTATAAGAGTCAAATTGCCAGAATTCGTTCCGTGAAGTTCA
AAATCACTGACAAAACGTTCGCGACAAATGGTGCGTGTGTCGACGAATTG
AGTTGTCACACGCCAACAATATAACACCTTTGGTGTATACTCGATGACAC
GCAAAATCTTCTGATCGTTTTGACGATGTTTGACATCAAACTCACTGCAT
AACGCCGGGTATCACGGCTTACAGGGGATTACTACCTCTCATAACGGGAG
ATACCTGAAGTGGCCTCGGGTAATTTATACTAGCACTATACAGGGAGACT
ACTAAAGCTCCTAGGGGGCCATTTATGTTAACCGAATGCCTCGTTATACC
CAACGGGGCCGTTACAACTACTCACGCACCGCCAGACGACATGGCACCCT
TCCTGCACTTAGTGCACCCTAGGACGCTCCTCTGGACGAACTCCCCGTAA
CCGGCTTTCGAGGCGAACAATAATCCCTGCGTCAGCGGGTTCGTCCACGG
CTAATTGACGGGACGGTCACCAGGGCGTACGTTCCCCCCAGTTCCTATAT
AAGAGGTCAAACTGTGAACTGCGCACAGTGTCTCAGAGCTCTTGTTTAGC
TAACCTCATACTGCGAAAGCAGCCACCGGCATTATGCATAGGTGAGAGCT
ATAAGTCATGACCAAAGTGAGCTAATATTGTGGCATAAAGGATTAGGAAA
GACTCGTGTCTCATATGCACTGAGCTATCTTCGAGAAAATCATTTTCAAC
GCCACTTGCGCATTCCTTCCACCCACTCCTCCATATTTTAGGGAAGGATA
CCGCGGAGGATCAAGCCGGAGAGTCACAGATGTGTCACCGACGTGATACT
TTAACGTCGACAAACTTTTCAGGATGAGAGTTCTGGCTTCCTCGTCTGGG
GATAATGGCTCCTAGTGTTGCCCCACTTGGGCTAACCTTGAGACTGACAG
AAAGTCTCGCGCTTCAACGGCCGATCTGTAGGATGGCGGTAGTGGAATTA
ACTCCTAGAGACTCGTGCTCTCAAGTACAGCTTCTACGAGCTATTTCCTC
ACTCAGCATCCCATTGCCCATGTATATTTAGCGTATCGTTTGGTTAATCG
GAAAGGGCCCATTTCTCGCTAGGATGACGACCCCGTCTAAAAATCGTAGA
TAAATCCGCGCTGCTACATTTATACCATGCATTAGGGCCACCTAGGGACT
CCACAAGGTACCTTGGGTCACATATGGGTCGAAATTCCCCATTATAGCTC
TACTCAGCGGTTTCCGCAGAAACGGCTAGGTTTCATCGCTCGACTTACAA
TTGGTCTCGGACCACTCTTAGACTAGGTATAAGCGAAATAATCGGGACCT
GGCCCTCCTCCGTGCGCCCTGAACCATCTACTGTTCGAGGGCAGTAGTGT
GTTGACTCAGGCGATGGAAGATGTACTATAGGGGGATCCCCAAAAACAGT
wsbim2122_data/data/seq_100.fas
0 → 100644
View file @
af5e96bf
> SEQUENCE 100 (Fri Sep 18 13:05:43 2020)
CGTTTCATGCAAGTATTGGGGTTTAGGCGTAGAGCACGGCATACCGAGGT
GAGGTATGCCTAGACCGGGTCTTCTTGGATCCCGGTGCAACCCAAAGAGG
TCCCTAACGGCGCACGACTGATCTATTCTATATATGCATGTTTGAGCTCA
AAGTCGCTGAGAATTAAGTGGAATAGGGGGGGATGTAAAACTGCTGCCAG
GAAGCATCGCTGGTTAAGCGTCAAATCGACGGTGAGGATGACTCGCGAAC
TCGCTGAAGGCCATGACGTAAGACGTATCTAGAGGTTAGTCAGCAAGCTC
ATAGACTGAAAAAAATGTTGTACCCACTTTCCTCGTTAAACGCGCTGTGG
CTGACAGCCCACATCCGCCATGAAAGGATTCCAGGTGAGATTGTACAGTG
GCTAGTGTACGGCTCGTACCGGAAAGACTCGTTACGCCGCGCCACCTAAA
GGTTCAAACGACAGTTCGGCCAGCTTTTTACGCTCAAGGGGAAAATACCC
TGCTGTAACTTCATTGTTCGAAGCGCCTCTTAAAAACTGGCCCATAAGAG
CCACGTGAACCACTTAGACATCTGTAACCACATTATAGGAACTAACTCAG
CACCCTGTCATGTACTCACAGCCCTGGCCTATCCGACCGGAGCCCGAATA
CTTCCTTACTTCGACGACCGGAATGAGGAGAGGGGTTGGCCTTTAGCACT
CTGTCATGGTGGGGAGAGTTGGAGTATCAGTTACTGTTACCTTCCACTGA
TCATCCTGTAGTTTTAGCCGGTCCTGGGGGTAAGCATAGATGTGAGAATC
TATGAAGGACGGCCTGTCTCATGGAGGAAAGAGTTTACTTACGAGTCGGA
ACAAGCCCTACGTATGAGACCAGCGAGCACCCAACTAATTCGGCGAACCT
ATTTTTAAAGCCGCAACTGTCGGGCCTAGAACGTATCTAAAACAACTCGC
CCTGTATACCAGTGGTACCAAGGGGTGTGCAGTGATCAGGTATCTCACAT
TCTGTAAGTGGTATGGTAGTATCTCCTGGATTCCAAACTTAGTGTGGGTT
GCCAACTCTAGCCGCGACCTATCGGATGCTCTCGCGCGACAGTTCCGACT
CGATAACGGCGCTAACTGATTTTTCGCATCCGGTAACTCGTCGATAACCG
AAATCTACCGTTGATAGGTGGAGGACAAAGCAAAGACCTATCCTCAATTG
AAGGTGTCAGCTAACATACTTGTCGGCCGCCACTAAACAATCGCTTTGGT
GTCCCGGATATTGTTCAGCGTCAGTATTCCGGATCACGGGGGTTCTACTG
AACTCTGTTGCGCTAGAACATATAGATTGCCAAGCCTCATGACAACGCCA
AGGGTGAGGTCGTCGGTGTTTGGGCAGAAGCAGGTACCCGATATGCCCGA
CGCGTCTTTGATCGCCAGCCAACCATGAGAGTACTAGTTGCGAACGCGTG
GTCGCTACCAACGAAAGTGGTCATTAATAGCGTCAGATCCTTCCCCGTTT
AAACCGGCGATCGGTTGTCCACCTTTACATCAGTTTTGACAGTAAATTAT
ACGACTCACATCAAGGTTCAGAAGCCGCTCCCAGGCCTGGTCCACGACGG
CTCGCCGGCTACCGTGCTCAAAAACTGAGGGAGACATAGTCGGCAGAATT
TTCTAAAGGACACACTTTTCCCTATGGTAGGCGATAGCATCTAAAAGTTT
TAAGTGCATTCTGCCTCGAACCGAGATTCGGCTAAAAAGGAGTAGATGAA
AGACATGCGTACGAAGCAGGTTATAAGTTTCCGAATAATGATTCCAACCA
AATCTTTGAGCGGGCGCCACCATTACTAGATTGGGTCAGCAACAGGAAAT