Skip to content
generate_some_data_test.Rout 4.91 KiB
Newer Older

R version 4.0.4 (2021-02-15) -- "Lost Library Book"
Copyright (C) 2021 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> #### ----------  dataset import ----------- #### 
> 
> ## Dataset info file
> # The dataset json file should have AT LEAST the following information
> # link, tissue, n_cells, n_genes (added automatically latter), description
> info_list <- list(
+   "link" = "link to study",
+   "tissue" = "tissue",
+   "description" = "A complete description about the experimental design,\n
+                      for e.g. the treatment, condition, specificities, etc.", 
+   "note" = "Any comment on the importance of this dataset for the benchmark,\n
+               e.g., 'example of unbalanced sample sizes'.", 
+   "last.update" = Sys.time() + 3600 * 2
+ )
> 
> suppressPackageStartupMessages({
+   library("BiocManager")
+   library('SingleCellExperiment')
+   library('jsonlite')
+   library('Matrix')
+   library("R.utils")
+ })
> source("src/r_utils.R")
> 
> if (interactive()){
+   dataset_name <- gsub("\\/work\\/", "", getwd())
+   out_path <- paste0("data/", dataset_name)
+   dir.create(out_path, showWarnings = FALSE )
+ } else {
+   args <- (commandArgs(trailingOnly = TRUE))
+   for (i in seq_len(length(args))) {
+     eval(parse(text = args[[i]]))
+   }
+ }
> 
> print(out_path)
[1] "data/some_data_test"
> print(dataset_name)
[1] "some_data_test"
> 
> 
> 
> ### -------------------------------------------- ###
> ## ------------ Format the data ----------------- ##
> ### -------------------------------------------- ###
> 
> #############
> # YOUR CODE #
> #############
> 
> # Example of how the data can look like:
> sce <- dummy_data()
> # you can also check how the data files should look like: 
> # sce <- dummy_data(write_data = TRUE)
> 
> ### -------------------------------------------- ###
> ## --------- Control and save data -------------- ##
> ### -------------------------------------------- ###
> 
> # Check that the data are in the correct form
> check_input_data(dat_counts = counts(sce), 
+                  meta_features = as.data.frame(rowData(sce)), 
+                  meta_cells = as.data.frame(colData(sce)))
> 
> ## The lasts steps should always be in this form:
> # Save counts as gziped mtx
> matrix_out <- paste0(out_path, "/counts_", dataset_name, ".mtx")
> writeMM(obj = counts(sce), matrix_out)
NULL
> gzip(matrix_out, overwrite=TRUE)
> 
> # save cell and features meta
> jsonlite::write_json(as.data.frame(colData(sce)), paste0(out_path, "/meta_", dataset_name, ".json"), 
+                      matrix = "columnmajor")
> 
> jsonlite::write_json(as.data.frame(rowData(sce)), paste0(out_path, "/feature_", dataset_name, ".json"), 
+                      matrix = "columnmajor")
> 
> # Adding final infos
> info_list$n_cells <- ncol(sce)
> info_list$n_genes <- nrow(sce)
> 
> jsonlite::write_json(info_list, paste0(out_path, "/data_info_", dataset_name, ".json"))
> 
> sessionInfo()
R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] R.utils_2.10.1              R.oo_1.24.0                
 [3] R.methodsS3_1.8.1           Matrix_1.3-2               
 [5] jsonlite_1.7.2              SingleCellExperiment_1.12.0
 [7] SummarizedExperiment_1.20.0 Biobase_2.50.0             
 [9] GenomicRanges_1.42.0        GenomeInfoDb_1.26.7        
[11] IRanges_2.24.1              S4Vectors_0.28.1           
[13] BiocGenerics_0.36.1         MatrixGenerics_1.2.1       
[15] matrixStats_0.58.0          BiocManager_1.30.12        

loaded via a namespace (and not attached):
 [1] XVector_0.30.0         zlibbioc_1.36.0        lattice_0.20-41       
 [4] tools_4.0.4            grid_4.0.4             GenomeInfoDbData_1.2.4
 [7] bitops_1.0-6           RCurl_1.98-1.3         DelayedArray_0.16.3   
[10] compiler_4.0.4        
> 
> 
> 
> proc.time()
   user  system elapsed 
  9.861   1.175  10.867