R version 4.0.4 (2021-02-15) -- "Lost Library Book" Copyright (C) 2021 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > #### ---------- dataset import ----------- #### > > ## Dataset info file > # The dataset json file should have AT LEAST the following information > # link, tissue, n_cells, n_genes (added automatically latter), description > info_list <- list( + "link" = "link to study", + "tissue" = "tissue", + "description" = "A complete description about the experimental design,\n + for e.g. the treatment, condition, specificities, etc.", + "note" = "Any comment on the importance of this dataset for the benchmark,\n + e.g., 'example of unbalanced sample sizes'.", + "last.update" = Sys.time() + 3600 * 2 + ) > > suppressPackageStartupMessages({ + library("BiocManager") + library('SingleCellExperiment') + library('jsonlite') + library('Matrix') + library("R.utils") + }) > source("src/r_utils.R") > > if (interactive()){ + dataset_name <- gsub("\\/work\\/", "", getwd()) + out_path <- paste0("data/", dataset_name) + dir.create(out_path, showWarnings = FALSE ) + } else { + args <- (commandArgs(trailingOnly = TRUE)) + for (i in seq_len(length(args))) { + eval(parse(text = args[[i]])) + } + } > > print(out_path) [1] "data/some_data_test" > print(dataset_name) [1] "some_data_test" > > > > ### -------------------------------------------- ### > ## ------------ Format the data ----------------- ## > ### -------------------------------------------- ### > > ############# > # YOUR CODE # > ############# > > # Example of how the data can look like: > sce <- dummy_data() > # you can also check how the data files should look like: > # sce <- dummy_data(write_data = TRUE) > > ### -------------------------------------------- ### > ## --------- Control and save data -------------- ## > ### -------------------------------------------- ### > > # Check that the data are in the correct form > check_input_data(dat_counts = counts(sce), + meta_features = as.data.frame(rowData(sce)), + meta_cells = as.data.frame(colData(sce))) > > ## The lasts steps should always be in this form: > # Save counts as gziped mtx > matrix_out <- paste0(out_path, "/counts_", dataset_name, ".mtx") > writeMM(obj = counts(sce), matrix_out) NULL > gzip(matrix_out, overwrite=TRUE) > > # save cell and features meta > jsonlite::write_json(as.data.frame(colData(sce)), paste0(out_path, "/meta_", dataset_name, ".json"), + matrix = "columnmajor") > > jsonlite::write_json(as.data.frame(rowData(sce)), paste0(out_path, "/feature_", dataset_name, ".json"), + matrix = "columnmajor") > > # Adding final infos > info_list$n_cells <- ncol(sce) > info_list$n_genes <- nrow(sce) > > jsonlite::write_json(info_list, paste0(out_path, "/data_info_", dataset_name, ".json")) > > sessionInfo() R version 4.0.4 (2021-02-15) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 20.04.2 LTS Matrix products: default BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] parallel stats4 stats graphics grDevices utils datasets [8] methods base other attached packages: [1] R.utils_2.10.1 R.oo_1.24.0 [3] R.methodsS3_1.8.1 Matrix_1.3-2 [5] jsonlite_1.7.2 SingleCellExperiment_1.12.0 [7] SummarizedExperiment_1.20.0 Biobase_2.50.0 [9] GenomicRanges_1.42.0 GenomeInfoDb_1.26.7 [11] IRanges_2.24.1 S4Vectors_0.28.1 [13] BiocGenerics_0.36.1 MatrixGenerics_1.2.1 [15] matrixStats_0.58.0 BiocManager_1.30.12 loaded via a namespace (and not attached): [1] XVector_0.30.0 zlibbioc_1.36.0 lattice_0.20-41 [4] tools_4.0.4 grid_4.0.4 GenomeInfoDbData_1.2.4 [7] bitops_1.0-6 RCurl_1.98-1.3 DelayedArray_0.16.3 [10] compiler_4.0.4 > > > > proc.time() user system elapsed 9.015 1.358 11.409