Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
R version 4.0.4 (2021-02-15) -- "Lost Library Book"
Copyright (C) 2021 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> #### ---------- dataset import ----------- ####
>
> ## Dataset info file
> # The dataset json file should have AT LEAST the following information
> # link, tissue, n_cells, n_genes (added automatically latter), description
> info_list <- list(
+ "link" = "link to study",
+ "tissue" = "tissue",
+ "description" = "A complete description about the experimental design,\n
+ for e.g. the treatment, condition, specificities, etc.",
+ "note" = "Any comment on the importance of this dataset for the benchmark,\n
+ e.g., 'example of unbalanced sample sizes'.",
+ "last.update" = Sys.time() + 3600 * 2
+ )
>
> suppressPackageStartupMessages({
+ library("BiocManager")
+ library('SingleCellExperiment')
+ library('jsonlite')
+ library('Matrix')
+ library("R.utils")
+ })
> source("src/r_utils.R")
>
> if (interactive()){
+ dataset_name <- gsub("\\/work\\/", "", getwd())
+ out_path <- paste0("data/", dataset_name)
+ dir.create(out_path, showWarnings = FALSE )
+ } else {
+ args <- (commandArgs(trailingOnly = TRUE))
+ for (i in seq_len(length(args))) {
+ eval(parse(text = args[[i]]))
+ }
+ }
>
> print(out_path)
[1] "data/some_data_test"
> print(dataset_name)
[1] "some_data_test"
>
>
>
> ### -------------------------------------------- ###
> ## ------------ Format the data ----------------- ##
> ### -------------------------------------------- ###
>
> #############
> # YOUR CODE #
> #############
>
> # Example of how the data can look like:
> sce <- dummy_data()
> # you can also check how the data files should look like:
> # sce <- dummy_data(write_data = TRUE)
>
> ### -------------------------------------------- ###
> ## --------- Control and save data -------------- ##
> ### -------------------------------------------- ###
>
> # Check that the data are in the correct form
> check_input_data(dat_counts = counts(sce),
+ meta_features = as.data.frame(rowData(sce)),
+ meta_cells = as.data.frame(colData(sce)))
>
> ## The lasts steps should always be in this form:
> # Save counts as gziped mtx
> matrix_out <- paste0(out_path, "/counts_", dataset_name, ".mtx")
> writeMM(obj = counts(sce), matrix_out)
NULL
> gzip(matrix_out, overwrite=TRUE)
>
> # save cell and features meta
> jsonlite::write_json(as.data.frame(colData(sce)), paste0(out_path, "/meta_", dataset_name, ".json"),
+ matrix = "columnmajor")
>
> jsonlite::write_json(as.data.frame(rowData(sce)), paste0(out_path, "/feature_", dataset_name, ".json"),
+ matrix = "columnmajor")
>
> # Adding final infos
> info_list$n_cells <- ncol(sce)
> info_list$n_genes <- nrow(sce)
>
> jsonlite::write_json(info_list, paste0(out_path, "/data_info_", dataset_name, ".json"))
>
> sessionInfo()
R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS
Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] R.utils_2.10.1 R.oo_1.24.0
[3] R.methodsS3_1.8.1 Matrix_1.3-2
[5] jsonlite_1.7.2 SingleCellExperiment_1.12.0
[7] SummarizedExperiment_1.20.0 Biobase_2.50.0
[9] GenomicRanges_1.42.0 GenomeInfoDb_1.26.7
[11] IRanges_2.24.1 S4Vectors_0.28.1
[13] BiocGenerics_0.36.1 MatrixGenerics_1.2.1
[15] matrixStats_0.58.0 BiocManager_1.30.12
loaded via a namespace (and not attached):
[1] XVector_0.30.0 zlibbioc_1.36.0 lattice_0.20-41
[4] tools_4.0.4 grid_4.0.4 GenomeInfoDbData_1.2.4
[7] bitops_1.0-6 RCurl_1.98-1.3 DelayedArray_0.16.3
[10] compiler_4.0.4
>
>
>
> proc.time()
user system elapsed