Update 11/12/2020

8eb9b1fa · Jeffrey Pullin · ce84d92d · 8eb9b1fa · 8eb9b1fa · 8eb9b1fa
Commit 8eb9b1fa authored Jan 27, 2021 by Jeffrey Pullin
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@
 config
 data
 logs
+results
--- a/Snakefile
+++ b/Snakefile
@@ -11,20 +11,21 @@ Jeffrey Pullin 2020/2021

 import json

-onstart:
-	shell("Rscript code/simulation-pars.R")
-  shell("Rscript code/method-pars.R")
-
 configfile: "config.yaml"
-
 R = config["R"]

+onstart:
+  shell("Rscript code/simulation-pars.R") # simulation parameters
+  shell("Rscript code/method-pars.R")
+
 sim_ids = json.loads(open(config["sim_ids_path"]).read())
-meth_ids = json.loads(open(config["meth_ids_path"]).read())
+method_ids = json.loads(open(config["method_ids_path"]).read())

 rule all:
  input:
-      expand(config["sim_data_folder"] + "{sim_id}.rds", sim_id=sim_ids)
+      expand(config["sim_data_folder"] + "{sim_id}.rds", sim_id = sim_ids),
+      expand(config["results_folder"] + "{sim_id}-{method_id}.rds", sim_id = sim_ids,
+      method_id = method_ids)

 rule simulate_data:
  priority: 100
@@ -39,15 +40,15 @@ rule simulate_data:

 rule run_method:
 	priority: 99
-	input:	script = config["code_folder"] + "run_method.R",
+	input:	script = config["code_folder"] + "run-method.R",
 			sim = config["sim_data_folder"] + "{sim_id}.rds",
-			meth_pars = config["meth_pars_folder"] + "{meth_id}.json",
-			fun = lambda wc: config["scripts"] + "run_" + mids.loc[wc.mid, "type"] + ".R"
-	output: res = config["results"] + "{sid}-{mid}.rds"
-	log:	config["logs_folder"] + "run_meth-{sid}-{mid}.Rout"
+			method_pars = config["method_pars_folder"] + "{method_id}.json",
+			fun = lambda wc: config["code_folder"] + "run-" + wc.method_id.split('_')[0] + ".R"
+	output: res = config["results_folder"] + "{sim_id}-{method_id}.rds"
+	log:	config["logs_folder"] + "run_method-{sim_id}-{method_id}.Rout"
 	shell:	'''{R} CMD BATCH --no-restore --no-save\
 		"--args sim={input.sim} fun={input.fun}\
-		meth_pars={input.meth_pars} res={output.res}"\
+		meth_pars={input.method_pars} res={output.res}"\
 		{input.script} {log}'''

 onsuccess:

--- a/code/method-pars.R
+++ b/code/method-pars.R
 # This file specifies the marker genes methods (and their parameters) which
 # will be assessed.

-suppressMessages({
-  library(tidyr)
-  library(yaml)
-  library(here)
-})
-
 # Read the overall config file.
-config <- read_yaml(here("config.yaml"))
+config <- yaml::read_yaml(here::here("config.yaml"))

 # For now we will start with {scran} (with default arguments) only.

@@ -16,10 +10,30 @@ config <- read_yaml(here("config.yaml"))
 methods <- c("scran")

 # Using expand_grid because it has nicer as.list behavior.
-scran <- expand_grid(method = "scran",
-                     test.type = c("t"),
-                     pval.type = "any")
+scran <- expand.grid(method = "scran",
+                     test.type = c("t", "wilcox"),
+                     pval.type = c("any", "all"))
+
+method_pars <- scran
+
+method_ids <- apply(method_pars, 1, function(x) paste0(x, collapse = "_"))
+
+# We want each row to be a list.
+method_pars <- apply(method_pars, 1, function(x) as.list(x))
+names(method_pars) <- method_ids
+
+# TODO: Use csv or stick to JSON?

+# Write out the method ids to a (JSON) file.
+write(jsonlite::toJSON(method_ids), config$method_ids_path)

-# TODO
+# Construct the paths of the method id files.
+# TODO: This could be made simpler...
+method_paths <- here::here(config$method_pars_folder, method_ids)
+method_paths <- paste0(method_paths, ".json")
+names(method_paths) <- method_ids

+# Write all the parameters of each method to file.
+for (id in method_ids) {
+  write(jsonlite::toJSON(method_pars[[id]], null = "null"), method_paths[[id]])
+}
--- a/code/run-method.R
+++ b/code/run-method.R
@@ -14,14 +14,16 @@ print(args)
 # Extract the elements of the passed argument.
 sim_data <- readRDS(args$sim)
 meth_pars <- as.list(fromJSON(args$meth_pars))
-method_file <- args$method_file
-result_file <- args$result_file
+# TODO: Improve argument names. These come from the snakefile.
+method_file <- args$fun
+result_file <- args$res

 # In this project the file run_{method} will contain a function `run_{method}`
 # which takes two arguments: a SingleCellExperiment object and a named list of
 # parameters/arguments for the method.
 source(method_file)
-func <- gsub("(.R)", "", basename(fun))
+func <- gsub("(.R)", "", basename(method_file))
+func <- gsub("-", "_", func)
 result <- get(func)(sim_data, meth_pars)

 # TODO: Add more context the the result.

--- a/code/run-scran.R
+++ b/code/run-scran.R
@@ -21,7 +21,7 @@ run_scran <- function(sce, pars) {
  )

  # Get the elapsed time.
-  time <- time[[3]]
+  time <- times[[3]]

  list(time = time, result = result)
 }

--- a/code/simulate-data.R
+++ b/code/simulate-data.R
@@ -68,8 +68,14 @@ colLabels(sim) <- colData(sim)$Group

 # Add information about the locations of the DE genes in each group.
 sim <- SingleCellExperiment(
-  sim,
-  metadata = list(de_inds = de_inds)
+  assays = assays(sim),
+  colData = colData(sim),
+  rowData = rowData(sim),
+  metadata = list(de_inds = de_inds),
+  altExps = altExps(sim),
+  reducedDims = reducedDims(sim),
+  rowPairs = rowPairs(sim),
+  colPairs = colPairs(sim)
 )

 # For debugging purposes.

--- a/config.yaml
+++ b/config.yaml
@@ -4,10 +4,11 @@ code_folder: "code/"
 logs_folder: "logs/"

 sim_ids_path: "config/sim_ids.json"
-meth_ids_path: "confgi/meth_ids.json"
+method_ids_path: "config/method_ids.json"

 sim_pars_folder: "config/sim_pars/"
-meth_pars_folder: "config/meth_pars/"
+method_pars_folder: "config/method_pars/"

 sim_data_folder: "data/sim_data/"

+results_folder: "results/"
--- a/notes/thinking.md
+++ b/notes/thinking.md
@@ -18,4 +18,5 @@ What is the true number of marker genes in a real dataset?

 Is pval.type = "all" the same as one vs rest?

+onstart not running