#### ---------- dataset import ----------- ####

## Dataset info file
# The dataset json file should have AT LEAST the following information
# link, tissue, n_cells, n_genes (added automatically latter), description
info_list <- list(
+     "link" = "link to study",
+     "tissue" = "tissue",
+     "description" = "A complete description about the experimental design,\n
+     for e.g. the treatment, condition, specificities, etc.",
+     "note" = "Any comment on the importance of this dataset for the benchmark,\n
+     e.g., 'example of unbalanced sample sizes'.",
+     "last.update" = Sys.time() + 3600 * 2
+ )

suppressPackageStartupMessages({
+     library("BiocManager")
+     library('SingleCellExperiment')
+     library('jsonlite')
+     library('Matrix')
+     library("R.utils")
+ })
source("src/r_utils.R")

if (interactive()){
+     dataset_name <- gsub("\\/work\\/", "", getwd())
+     out_path <- paste0("data/", dataset_name)
+     dir.create(out_path, showWarnings = FALSE )
+ } else {
+     args <- (commandArgs(trailingOnly = TRUE))
+     for (i in seq_len(length(args))) {
+         eval(parse(text = args[[i]]))
+     }
+ }

print(out_path)
[1] "data/some_data_test"
print(dataset_name)
[1] "some_data_test"



### -------------------------------------------- ###
## ------------ Format the data ----------------- ##
### -------------------------------------------- ###

#############
# YOUR CODE #
#############

# Example of how the data can look like:
sce <- dummy_data()
# you can also check how the data files should look like:
# sce <- dummy_data(write_data = TRUE)

### -------------------------------------------- ###
## --------- Control and save data -------------- ##
### -------------------------------------------- ###

# Check that the data are in the correct form
check_input_data(dat_counts = counts(sce),
+                  meta_features = as.data.frame(rowData(sce)),
+                  meta_cells = as.data.frame(colData(sce)))

## The lasts steps should always be in this form:
# Save counts as gziped mtx
matrix_out <- paste0(out_path, "/counts_", dataset_name, ".mtx")
writeMM(obj = counts(sce), matrix_out)
NULL
gzip(matrix_out, overwrite=TRUE)

# save cell and features meta
jsonlite::write_json(as.data.frame(colData(sce)), paste0(out_path, "/meta_", dataset_name, ".json"),
+                      matrix = "columnmajor")

jsonlite::write_json(as.data.frame(rowData(sce)), paste0(out_path, "/feature_", dataset_name, ".json"),
+                      matrix = "columnmajor")

# Adding final infos
info_list$n_cells <- ncol(sce)
info_list$n_genes <- nrow(sce)

jsonlite::write_json(info_list, paste0(out_path, "/data_info_", dataset_name, ".json"))

sessionInfo()
R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] R.utils_2.10.1              R.oo_1.24.0                
 [3] R.methodsS3_1.8.1           Matrix_1.3-2               
 [5] jsonlite_1.7.2              SingleCellExperiment_1.12.0
 [7] SummarizedExperiment_1.20.0 Biobase_2.50.0             
 [9] GenomicRanges_1.42.0        GenomeInfoDb_1.26.7        
[11] IRanges_2.24.1              S4Vectors_0.28.1           
[13] BiocGenerics_0.36.1         MatrixGenerics_1.2.1       
[15] matrixStats_0.58.0          BiocManager_1.30.12        

loaded via a namespace (and not attached):
[1] XVector_0.30.0          zlibbioc_1.36.0         lattice_0.20-41        
[4] tools_4.0.4             grid_4.0.4              GenomeInfoDbData_1.2.4 
[7] bitops_1.0-6            RCurl_1.98-1.3          DelayedArray_0.16.3    
[10] compiler_4.0.4         



proc.time()
   user  system elapsed 
  9.015   1.358  11.409