Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
sirplus
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
BioCellGen-public
sirplus
Commits
0e4f7aa2
Commit
0e4f7aa2
authored
8 years ago
by
Luke Zappia
Browse files
Options
Downloads
Patches
Plain Diff
Rename n.genes, n.cells, n.groups for consistency
parent
63d2b234
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
DESCRIPTION
+1
-1
1 addition, 1 deletion
DESCRIPTION
R/params.R
+11
-11
11 additions, 11 deletions
R/params.R
R/simulate.R
+41
-41
41 additions, 41 deletions
R/simulate.R
with
53 additions
and
53 deletions
DESCRIPTION
+
1
−
1
View file @
0e4f7aa2
Package: splatter
Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 0.3.1
7
Version: 0.3.1
8
Date: 2016-10-11
Author: Luke Zappia
Authors@R: as.person(c(
...
...
This diff is collapsed.
Click to expand it.
R/params.R
+
11
−
11
View file @
0e4f7aa2
...
...
@@ -336,7 +336,7 @@ checkParams <- function(params) {
# Define which parameters are allowed to be vectors
vectors
<-
c
(
"groupCells"
,
"path.from"
,
"path.length"
,
"path.skew"
)
n
.g
roups
<-
length
(
getParams
(
params
,
"groupCells"
))
n
G
roups
<-
length
(
getParams
(
params
,
"groupCells"
))
for
(
idx
in
seq_along
(
types
))
{
name
<-
names
(
types
)[
idx
]
...
...
@@ -349,7 +349,7 @@ checkParams <- function(params) {
if
(
name
%in%
vectors
)
{
if
(
any
(
is.na
(
value
)))
{
stop
(
name
,
" is a vector and contains NA values"
)
}
else
if
(
length
(
value
)
!=
n
.g
roups
)
{
}
else
if
(
length
(
value
)
!=
n
G
roups
)
{
stop
(
"length of "
,
name
,
" must be 1 or the length of "
,
"the groupCells parameter"
)
}
...
...
@@ -385,11 +385,11 @@ checkParams <- function(params) {
}
# Check groupCells matches nCells, nGroups
n
.c
ells
<-
getParams
(
params
,
"nCells"
)
n
.g
roups
<-
getParams
(
params
,
"nGroups"
)
n
C
ells
<-
getParams
(
params
,
"nCells"
)
n
G
roups
<-
getParams
(
params
,
"nGroups"
)
group.cells
<-
getParams
(
params
,
"groupCells"
)
if
(
!
all
(
is.na
(
group.cells
))
&&
(
n
.c
ells
!=
sum
(
group.cells
)
||
n
.g
roups
!=
length
(
group.cells
)))
{
(
n
C
ells
!=
sum
(
group.cells
)
||
n
G
roups
!=
length
(
group.cells
)))
{
stop
(
"nCells, nGroups and groupCells are not consistent"
)
}
...
...
@@ -399,9 +399,9 @@ checkParams <- function(params) {
if
(
!
all
(
is.na
(
path.from
)))
{
if
(
!
(
0
%in%
path.from
))
{
stop
(
"origin must be specified in path.from"
)
}
else
if
(
any
(
path.from
>
n
.g
roups
))
{
}
else
if
(
any
(
path.from
>
n
G
roups
))
{
stop
(
"values in path.from cannot be greater than number of paths"
)
}
else
if
(
any
(
path.from
==
1
:
n
.g
roups
))
{
}
else
if
(
any
(
path.from
==
1
:
n
G
roups
))
{
stop
(
"path cannot begin at itself"
)
}
}
...
...
@@ -486,21 +486,21 @@ defaultParams <- function() {
#' }
expandPathParams
<-
function
(
params
)
{
n
.g
roups
<-
getParams
(
params
,
"nGroups"
)
n
G
roups
<-
getParams
(
params
,
"nGroups"
)
path.from
<-
getParams
(
params
,
"path.from"
)
path.length
<-
getParams
(
params
,
"path.length"
)
path.skew
<-
getParams
(
params
,
"path.skew"
)
if
(
length
(
path.from
)
==
1
)
{
params
<-
setParams
(
params
,
path.from
=
rep
(
path.from
,
n
.g
roups
))
params
<-
setParams
(
params
,
path.from
=
rep
(
path.from
,
n
G
roups
))
}
if
(
length
(
path.length
)
==
1
)
{
params
<-
setParams
(
params
,
path.length
=
rep
(
path.length
,
n
.g
roups
))
params
<-
setParams
(
params
,
path.length
=
rep
(
path.length
,
n
G
roups
))
}
if
(
length
(
path.skew
)
==
1
)
{
params
<-
setParams
(
params
,
path.skew
=
rep
(
path.skew
,
n
.g
roups
))
params
<-
setParams
(
params
,
path.skew
=
rep
(
path.skew
,
n
G
roups
))
}
return
(
params
)
...
...
This diff is collapsed.
Click to expand it.
R/simulate.R
+
41
−
41
View file @
0e4f7aa2
...
...
@@ -116,22 +116,22 @@ splat <- function(params = defaultParams(), method = c("groups", "paths"),
params
<-
expandPathParams
(
params
)
# Get the parameters we are going to use
n
.c
ells
<-
getParams
(
params
,
"nCells"
)
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
.g
roups
<-
getParams
(
params
,
"nGroups"
)
n
C
ells
<-
getParams
(
params
,
"nCells"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
n
G
roups
<-
getParams
(
params
,
"nGroups"
)
group.cells
<-
getParams
(
params
,
"groupCells"
)
# Set up name vectors
cell.names
<-
paste0
(
"Cell"
,
1
:
n
.c
ells
)
gene.names
<-
paste0
(
"Gene"
,
1
:
n
.g
enes
)
cell.names
<-
paste0
(
"Cell"
,
1
:
n
C
ells
)
gene.names
<-
paste0
(
"Gene"
,
1
:
n
G
enes
)
if
(
method
==
"groups"
)
{
group.names
<-
paste0
(
"Group"
,
1
:
n
.g
roups
)
group.names
<-
paste0
(
"Group"
,
1
:
n
G
roups
)
}
else
if
(
method
==
"paths"
)
{
group.names
<-
paste0
(
"Path"
,
1
:
n
.g
roups
)
group.names
<-
paste0
(
"Path"
,
1
:
n
G
roups
)
}
# Create SCESet with dummy counts to store simulation
dummy.counts
<-
matrix
(
1
,
ncol
=
n
.c
ells
,
nrow
=
n
.g
enes
)
dummy.counts
<-
matrix
(
1
,
ncol
=
n
C
ells
,
nrow
=
n
G
enes
)
rownames
(
dummy.counts
)
<-
gene.names
colnames
(
dummy.counts
)
<-
cell.names
phenos
<-
new
(
"AnnotatedDataFrame"
,
data
=
data.frame
(
Cell
=
cell.names
))
...
...
@@ -143,7 +143,7 @@ splat <- function(params = defaultParams(), method = c("groups", "paths"),
# Make groups vector which is the index of param$groupCells repeated
# params$groupCells[index] times
groups
<-
lapply
(
1
:
n
.g
roups
,
function
(
i
,
g
)
{
rep
(
i
,
g
[
i
])},
groups
<-
lapply
(
1
:
n
G
roups
,
function
(
i
,
g
)
{
rep
(
i
,
g
[
i
])},
g
=
group.cells
)
groups
<-
unlist
(
groups
)
pData
(
sim
)
$
Group
<-
group.names
[
groups
]
...
...
@@ -205,11 +205,11 @@ splatPaths <- function(params = defaultParams(), verbose = TRUE, ...) {
#' @importFrom stats rlnorm
simLibSizes
<-
function
(
sim
,
params
)
{
n
.c
ells
<-
getParams
(
params
,
"nCells"
)
n
C
ells
<-
getParams
(
params
,
"nCells"
)
lib.loc
<-
getParams
(
params
,
"lib.loc"
)
lib.scale
<-
getParams
(
params
,
"lib.scale"
)
exp.lib.sizes
<-
rlnorm
(
n
.c
ells
,
lib.loc
,
lib.scale
)
exp.lib.sizes
<-
rlnorm
(
n
C
ells
,
lib.loc
,
lib.scale
)
pData
(
sim
)
$
ExpLibSize
<-
exp.lib.sizes
return
(
sim
)
...
...
@@ -230,7 +230,7 @@ simLibSizes <- function(sim, params) {
#' @importFrom stats rgamma median
simGeneMeans
<-
function
(
sim
,
params
)
{
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
mean.shape
<-
getParams
(
params
,
"mean.shape"
)
mean.rate
<-
getParams
(
params
,
"mean.rate"
)
out.prob
<-
getParams
(
params
,
"out.prob"
)
...
...
@@ -239,10 +239,10 @@ simGeneMeans <- function(sim, params) {
out.facScale
<-
getParams
(
params
,
"out.facScale"
)
# Simulate base gene means
base.means.gene
<-
rgamma
(
n
.g
enes
,
shape
=
mean.shape
,
rate
=
mean.rate
)
base.means.gene
<-
rgamma
(
n
G
enes
,
shape
=
mean.shape
,
rate
=
mean.rate
)
# Add expression outliers
outlier.facs
<-
getLNormFactors
(
n
.g
enes
,
out.prob
,
out.loProb
,
out.facLoc
,
outlier.facs
<-
getLNormFactors
(
n
G
enes
,
out.prob
,
out.loProb
,
out.facLoc
,
out.facScale
)
median.means.gene
<-
median
(
base.means.gene
)
outlier.means
<-
median.means.gene
*
outlier.facs
...
...
@@ -271,7 +271,7 @@ simGeneMeans <- function(sim, params) {
#' @importFrom Biobase fData pData
simGroupDE
<-
function
(
sim
,
params
)
{
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
de.prob
<-
getParams
(
params
,
"de.prob"
)
de.downProb
<-
getParams
(
params
,
"de.downProb"
)
de.facLoc
<-
getParams
(
params
,
"de.facLoc"
)
...
...
@@ -280,7 +280,7 @@ simGroupDE <- function(sim, params) {
group.names
<-
unique
(
pData
(
sim
)
$
Group
)
for
(
group.name
in
group.names
)
{
de.facs
<-
getLNormFactors
(
n
.g
enes
,
de.prob
,
de.downProb
,
de.facLoc
,
de.facs
<-
getLNormFactors
(
n
G
enes
,
de.prob
,
de.downProb
,
de.facLoc
,
de.facScale
)
group.means.gene
<-
means.gene
*
de.facs
fData
(
sim
)[[
paste0
(
"DEFac"
,
group.name
)]]
<-
de.facs
...
...
@@ -304,7 +304,7 @@ simGroupDE <- function(sim, params) {
#' @importFrom Biobase fData pData
simPathDE
<-
function
(
sim
,
params
)
{
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
de.prob
<-
getParams
(
params
,
"de.prob"
)
de.downProb
<-
getParams
(
params
,
"de.downProb"
)
de.facLoc
<-
getParams
(
params
,
"de.facLoc"
)
...
...
@@ -320,7 +320,7 @@ simPathDE <- function(sim, params) {
}
else
{
means.gene
<-
fData
(
sim
)[[
paste0
(
"GeneMeanPath"
,
from
)]]
}
de.facs
<-
getLNormFactors
(
n
.g
enes
,
de.prob
,
de.downProb
,
de.facLoc
,
de.facs
<-
getLNormFactors
(
n
G
enes
,
de.prob
,
de.downProb
,
de.facLoc
,
de.facScale
)
path.means.gene
<-
means.gene
*
de.facs
fData
(
sim
)[[
paste0
(
"DEFacPath"
,
path
)]]
<-
de.facs
...
...
@@ -344,7 +344,7 @@ simPathDE <- function(sim, params) {
#' @importFrom Biobase fData pData assayData assayData<-
simGroupCellMeans
<-
function
(
sim
,
params
)
{
n
.g
roups
<-
getParams
(
params
,
"nGroups"
)
n
G
roups
<-
getParams
(
params
,
"nGroups"
)
cell.names
<-
pData
(
sim
)
$
Cell
gene.names
<-
fData
(
sim
)
$
Gene
groups
<-
pData
(
sim
)
$
Group
...
...
@@ -352,7 +352,7 @@ simGroupCellMeans <- function(sim, params) {
exp.lib.sizes
<-
pData
(
sim
)
$
ExpLibSize
group.means.gene
<-
fData
(
sim
)[,
paste0
(
"GeneMean"
,
group.names
)]
if
(
n
.g
roups
==
1
)
{
if
(
n
G
roups
==
1
)
{
group.means.gene
<-
matrix
(
group.means.gene
)
colnames
(
group.means.gene
)
<-
"GeneMeanGroup1"
}
...
...
@@ -383,8 +383,8 @@ simGroupCellMeans <- function(sim, params) {
#' @importFrom stats rbinom
simPathCellMeans
<-
function
(
sim
,
params
)
{
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
.g
roups
<-
getParams
(
params
,
"nGroups"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
n
G
roups
<-
getParams
(
params
,
"nGroups"
)
group.cells
<-
getParams
(
params
,
"groupCells"
)
path.from
<-
getParams
(
params
,
"path.from"
)
path.length
<-
getParams
(
params
,
"path.length"
)
...
...
@@ -398,7 +398,7 @@ simPathCellMeans <- function(sim, params) {
exp.lib.sizes
<-
pData
(
sim
)
$
ExpLibSize
# Generate paths. Each path is a matrix with path.length columns and
# n
.g
enes rows where the expression from each genes changes along the path.
# n
G
enes rows where the expression from each genes changes along the path.
path.steps
<-
lapply
(
seq_along
(
path.from
),
function
(
idx
)
{
from
<-
path.from
[
idx
]
# Find the means at the starting position
...
...
@@ -411,8 +411,8 @@ simPathCellMeans <- function(sim, params) {
means.end
<-
fData
(
sim
)[[
paste0
(
"GeneMeanPath"
,
idx
)]]
# Select genes to follow a non-linear path
is.nonlinear
<-
as.logical
(
rbinom
(
n
.g
enes
,
1
,
path.nonlinearProb
))
sigma.facs
<-
rep
(
0
,
n
.g
enes
)
is.nonlinear
<-
as.logical
(
rbinom
(
n
G
enes
,
1
,
path.nonlinearProb
))
sigma.facs
<-
rep
(
0
,
n
G
enes
)
sigma.facs
[
is.nonlinear
]
<-
path.sigmaFac
# Build Brownian bridges from start to end
steps
<-
buildBridges
(
means.start
,
means.end
,
n
=
path.length
[
idx
],
...
...
@@ -424,7 +424,7 @@ simPathCellMeans <- function(sim, params) {
})
# Randomly assign a position in the appropriate path to each cell
cell.steps
<-
lapply
(
1
:
n
.g
roups
,
function
(
idx
)
{
cell.steps
<-
lapply
(
1
:
n
G
roups
,
function
(
idx
)
{
path.probs
<-
seq
(
path.skew
[
idx
],
1
-
path.skew
[
idx
],
length
=
path.length
[
idx
])
path.probs
<-
path.probs
/
sum
(
path.probs
)
...
...
@@ -435,7 +435,7 @@ simPathCellMeans <- function(sim, params) {
})
# Collect the underlying expression levels for each cell
cell.means.gene
<-
lapply
(
1
:
n
.g
roups
,
function
(
idx
)
{
cell.means.gene
<-
lapply
(
1
:
n
G
roups
,
function
(
idx
)
{
cell.means
<-
path.steps
[[
idx
]][,
cell.steps
[[
idx
]]]
return
(
cell.means
)
})
...
...
@@ -468,8 +468,8 @@ simPathCellMeans <- function(sim, params) {
#' @importFrom stats rchisq rgamma
simBCVMeans
<-
function
(
sim
,
params
)
{
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
.c
ells
<-
getParams
(
params
,
"nCells"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
n
C
ells
<-
getParams
(
params
,
"nCells"
)
bcv.common
<-
getParams
(
params
,
"bcv.common"
)
bcv.DF
<-
getParams
(
params
,
"bcv.DF"
)
cell.names
<-
pData
(
sim
)
$
Cell
...
...
@@ -477,11 +477,11 @@ simBCVMeans <- function(sim, params) {
base.means.cell
<-
assayData
(
sim
)
$
BaseCellMeans
bcv
<-
(
bcv.common
+
(
1
/
sqrt
(
base.means.cell
)))
*
sqrt
(
bcv.DF
/
rchisq
(
n
.g
enes
,
df
=
bcv.DF
))
sqrt
(
bcv.DF
/
rchisq
(
n
G
enes
,
df
=
bcv.DF
))
means.cell
<-
matrix
(
rgamma
(
n
.g
enes
*
n
.c
ells
,
shape
=
1
/
(
bcv
^
2
),
means.cell
<-
matrix
(
rgamma
(
n
G
enes
*
n
C
ells
,
shape
=
1
/
(
bcv
^
2
),
scale
=
base.means.cell
*
(
bcv
^
2
)),
nrow
=
n
.g
enes
,
ncol
=
n
.c
ells
)
nrow
=
n
G
enes
,
ncol
=
n
C
ells
)
colnames
(
bcv
)
<-
cell.names
rownames
(
bcv
)
<-
gene.names
...
...
@@ -509,14 +509,14 @@ simBCVMeans <- function(sim, params) {
#' @importFrom stats rpois
simTrueCounts
<-
function
(
sim
,
params
)
{
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
.c
ells
<-
getParams
(
params
,
"nCells"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
n
C
ells
<-
getParams
(
params
,
"nCells"
)
cell.names
<-
pData
(
sim
)
$
Cell
gene.names
<-
fData
(
sim
)
$
Gene
cell.means
<-
assayData
(
sim
)
$
CellMeans
true.counts
<-
matrix
(
rpois
(
n
.g
enes
*
n
.c
ells
,
lambda
=
cell.means
),
nrow
=
n
.g
enes
,
ncol
=
n
.c
ells
)
true.counts
<-
matrix
(
rpois
(
n
G
enes
*
n
C
ells
,
lambda
=
cell.means
),
nrow
=
n
G
enes
,
ncol
=
n
C
ells
)
colnames
(
true.counts
)
<-
cell.names
rownames
(
true.counts
)
<-
gene.names
...
...
@@ -546,8 +546,8 @@ simDropout <- function(sim, params) {
true.counts
<-
assayData
(
sim
)
$
TrueCounts
if
(
dropout.present
)
{
n
.c
ells
<-
getParams
(
params
,
"nCells"
)
n
.g
enes
<-
getParams
(
params
,
"nGenes"
)
n
C
ells
<-
getParams
(
params
,
"nCells"
)
n
G
enes
<-
getParams
(
params
,
"nGenes"
)
dropout.mid
<-
getParams
(
params
,
"dropout.mid"
)
dropout.shape
<-
getParams
(
params
,
"dropout.shape"
)
cell.names
<-
pData
(
sim
)
$
Cell
...
...
@@ -557,14 +557,14 @@ simDropout <- function(sim, params) {
# Generate probabilites based on expression
lib.sizes
<-
colSums
(
true.counts
)
cell.facs
<-
log
(
lib.sizes
)
/
median
(
lib.sizes
)
drop.prob
<-
sapply
(
1
:
n
.c
ells
,
function
(
idx
)
{
drop.prob
<-
sapply
(
1
:
n
C
ells
,
function
(
idx
)
{
eta
<-
cell.facs
[
idx
]
*
(
log
(
cell.means
[,
idx
]))
return
(
logistic
(
eta
,
x0
=
dropout.mid
,
k
=
dropout.shape
))
})
# Decide which counts to keep
keep
<-
matrix
(
rbinom
(
n
.c
ells
*
n
.g
enes
,
1
,
1
-
drop.prob
),
nrow
=
n
.g
enes
,
ncol
=
n
.c
ells
)
keep
<-
matrix
(
rbinom
(
n
C
ells
*
n
G
enes
,
1
,
1
-
drop.prob
),
nrow
=
n
G
enes
,
ncol
=
n
C
ells
)
counts
<-
true.counts
*
keep
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment