splatter.Rmd

title: "Introduction to Splatter"
author: "Luke Zappia"
date: "`r Sys.Date()`"
output:
    BiocStyle::html_document:
        toc: true
vignette: >
  %\VignetteIndexEntry{An introduction to the Splatter package}
  %\VignetteEngine{knitr::rmarkdown}
  \usepackage[utf8]{inputenc}
# To render an HTML version that works nicely with github and web pages, do:
# rmarkdown::render("vignettes/splatter.Rmd", "all")
knitr::opts_chunk$set(fig.align = 'center', fig.width = 6, fig.height = 5,
                      dev = 'png')
source("https://bioconductor.org/biocLite.R")
biocLite("splatter")
biocLite("Oshlack/splatter", dependencies = TRUE,
         build_vignettes = TRUE)
# Load package
library(splatter)

# Load example data
library(scater)
data("sc_example_counts")
# Estimate parameters from example data
params <- splatEstimate(sc_example_counts)
# Simulate data using estimated parameters
sim <- splatSimulate(params, dropout.present = FALSE)
params <- newSplatParams()
params
getParam(params, "nGenes")
params <- setParam(params, "nGenes", 5000)
getParam(params, "nGenes")
# Set multiple parameters at once (using a list)
params <- setParams(params, update = list(nGenes = 8000, mean.rate = 0.5))
# Extract multiple parameters as a list
getParams(params, c("nGenes", "mean.rate", "mean.shape"))
# Set multiple parameters at once (using additional arguments)
params <- setParams(params, mean.shape = 0.5, de.prob = 0.2)
params
params <- newSplatParams(lib.loc = 12, lib.scale = 0.6)
getParams(params, c("lib.loc", "lib.scale"))
# Check that sc_example counts is an integer matrix
class(sc_example_counts)
typeof(sc_example_counts)
# Check the dimensions, each row is a gene, each column is a cell
dim(sc_example_counts)
# Show the first few entries
sc_example_counts[1:5, 1:5]

params <- splatEstimate(sc_example_counts)
sim <- splatSimulate(params, nGenes = 1000, dropout.present = FALSE)
sim
# Access the counts
counts(sim)[1:5, 1:5]
# Information about genes
head(rowData(sim))
# Information about cells
head(colData(sim))
# Gene by cell matrices
names(assays(sim))
# Example of cell means matrix
assays(sim)$CellMeans[1:5, 1:5]
# Use scater to calculate logcounts
sim <- normalise(sim)
# Plot PCA
plotPCA(sim)
sim.groups <- splatSimulate(group.prob = c(0.5, 0.5), method = "groups",
                            verbose = FALSE)
sim.groups <- normalise(sim.groups)
plotPCA(sim.groups, colour_by = "Group")
sim.paths <- splatSimulate(method = "paths", verbose = FALSE)
sim.paths <- normalise(sim.paths)
plotPCA(sim.paths, colour_by = "Step")
sim.batches <- splatSimulate(batchCells = c(50, 50), verbose = FALSE)
sim.batches <- normalise(sim.batches)
plotPCA(sim.batches, colour_by = "Batch")
sim.groups <- splatSimulate(batchCells = c(50, 50), group.prob = c(0.5, 0.5),
                            method = "groups", verbose = FALSE)
sim.groups <- normalise(sim.groups)
plotPCA(sim.groups, shape_by = "Batch", colour_by = "Group",
        exprs_values = "counts")