Skip to content
Snippets Groups Projects
splatter.html 60.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • Luke Zappia's avatar
    Luke Zappia committed
    <!DOCTYPE html>
    <!-- Generated by pkgdown: do not edit by hand --><html>
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    
    Luke Zappia's avatar
    Luke Zappia committed
    <title>Introduction to Splatter • Splatter</title>
    <!-- jquery --><script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
    
    Luke Zappia's avatar
    Luke Zappia committed
    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script><!-- Font Awesome icons --><link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
    <!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet">
    <script src="../jquery.sticky-kit.min.js"></script><script src="../pkgdown.js"></script><!-- mathjax --><script src="https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    
    Luke Zappia's avatar
    Luke Zappia committed
    <![endif]--><!-- Google analytics --><script>
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
      })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
    
      ga('create', 'UA-52309538-4', 'auto');
      ga('send', 'pageview');
    
    </script>
    
    Luke Zappia's avatar
    Luke Zappia committed
    25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
    </head>
    <body>
        <div class="container template-vignette">
          <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
      <div class="container">
        <div class="navbar-header">
          <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>
          <a class="navbar-brand" href="../index.html">splatter</a>
        </div>
        <div id="navbar" class="navbar-collapse collapse">
          <ul class="nav navbar-nav">
    <li>
      <a href="..//index.html">
        <span class="fa fa-home fa-lg"></span>
         
      </a>
    </li>
    <li>
      <a href="../articles/splatter.html">Get Started</a>
    </li>
    <li>
      <a href="../reference/index.html">Reference</a>
    </li>
    <li>
      <a href="../news/index.html">News</a>
    </li>
          </ul>
    <ul class="nav navbar-nav navbar-right">
    <li>
      <a href="https://github.com/Oshlack/splatter">
        <span class="fa fa-github fa-lg"></span>
         
      </a>
    </li>
          </ul>
    </div>
    <!--/.nav-collapse -->
      </div>
    <!--/.container -->
    </div>
    <!--/.navbar -->
    
          
          </header><div class="row">
      <div class="col-md-9">
        <div class="page-header toc-ignore">
          <h1>Introduction to Splatter</h1>
                            <h4 class="author">Luke Zappia</h4>
                
                <h4 class="date">2017-08-04</h4>
              </div>
    
        
        
    <div class="contents">
    <div class="figure">
    <img src="splatter-logo-small.png" alt="Splatter logo"><p class="caption">Splatter logo</p>
    </div>
    <p>Welcome to Splatter! Splatter is an R package for the simple simulation of single-cell RNA sequencing data. This vignette gives an overview and introduction to Splatter’s functionality.</p>
    <div id="installation" class="section level1">
    <h1 class="hasAnchor">
    <a href="#installation" class="anchor"></a>Installation</h1>
    <p>Splatter can be installed from Bioconductor:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">source</span>(<span class="st">"https://bioconductor.org/biocLite.R"</span>)
    <span class="kw">biocLite</span>(<span class="st">"splatter"</span>)</code></pre></div>
    <p>To install the most recent development version from Github use:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">biocLite</span>(<span class="st">"Oshlack/splatter"</span>, <span class="dt">dependencies =</span> <span class="ot">TRUE</span>, 
             <span class="dt">build_vignettes =</span> <span class="ot">TRUE</span>)</code></pre></div>
    </div>
    <div id="quickstart" class="section level1">
    <h1 class="hasAnchor">
    <a href="#quickstart" class="anchor"></a>Quickstart</h1>
    <p>Assuming you already have a matrix of count data similar to that you wish to simulate there are two simple steps to creating a simulated data set with Splatter. Here is an example using the example dataset in the <code>scater</code> package:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Load package</span>
    <span class="kw">library</span>(splatter)</code></pre></div>
    <pre><code>## Loading required package: scater</code></pre>
    <pre><code>## Loading required package: Biobase</code></pre>
    <pre><code>## Loading required package: BiocGenerics</code></pre>
    <pre><code>## Loading required package: parallel</code></pre>
    <pre><code>## 
    ## Attaching package: 'BiocGenerics'</code></pre>
    <pre><code>## The following objects are masked from 'package:parallel':
    ## 
    ##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    ##     clusterExport, clusterMap, parApply, parCapply, parLapply,
    ##     parLapplyLB, parRapply, parSapply, parSapplyLB</code></pre>
    <pre><code>## The following objects are masked from 'package:stats':
    ## 
    ##     IQR, mad, sd, var, xtabs</code></pre>
    <pre><code>## The following objects are masked from 'package:base':
    ## 
    ##     anyDuplicated, append, as.data.frame, cbind, colMeans,
    ##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
    ##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
    ##     lengths, Map, mapply, match, mget, order, paste, pmax,
    ##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
    ##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
    ##     tapply, union, unique, unsplit, which, which.max, which.min</code></pre>
    <pre><code>## Welcome to Bioconductor
    ## 
    ##     Vignettes contain introductory material; view with
    ##     'browseVignettes()'. To cite Bioconductor, see
    ##     'citation("Biobase")', and for packages 'citation("pkgname")'.</code></pre>
    <pre><code>## Loading required package: ggplot2</code></pre>
    <pre><code>## 
    ## Attaching package: 'scater'</code></pre>
    <pre><code>## The following object is masked from 'package:stats':
    ## 
    ##     filter</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Load example data</span>
    <span class="kw">data</span>(<span class="st">"sc_example_counts"</span>)
    <span class="co"># Estimate parameters from example data</span>
    params &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatEstimate.html">splatEstimate</a></span>(sc_example_counts)
    <span class="co"># Simulate data using estimated parameters</span>
    sim &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatSimulate.html">splatSimulate</a></span>(params, <span class="dt">dropout.present =</span> <span class="ot">FALSE</span>)</code></pre></div>
    <pre><code>## Getting parameters...</code></pre>
    <pre><code>## Creating simulation object...</code></pre>
    <pre><code>## Simulating library sizes...</code></pre>
    <pre><code>## Simulating gene means...</code></pre>
    <pre><code>## Simulating BCV...</code></pre>
    <pre><code>## Simulating counts..</code></pre>
    <pre><code>## Simulating dropout (if needed)...</code></pre>
    <pre><code>## Creating final SCESet...</code></pre>
    <pre><code>## Done!</code></pre>
    <p>These steps will be explained in detail in the following sections but briefly the first step takes a dataset and estimates simulation parameters from it and the second step takes those parameters and simulates a new dataset.</p>
    </div>
    <div id="the-splat-simulation" class="section level1">
    <h1 class="hasAnchor">
    <a href="#the-splat-simulation" class="anchor"></a>The Splat simulation</h1>
    <p>Before we look at how we estimate parameters let’s first look at how Splatter simulates data and what those parameters are. We use the term ‘Splat’ to refer to the Splatter’s own simulation and differentiate it from the package itself. The core of the Splat model is a gamma-Poisson distribution used to generate a gene by cell matrix of counts. Mean expression levels for each gene are simulated from a <a href="https://en.wikipedia.org/wiki/Gamma_distribution">gamma distribution</a> and the Biological Coefficient of Variation is used to enforce a mean-variance trend before counts are simulated from a <a href="https://en.wikipedia.org/wiki/Poisson_distribution">Poisson distribution</a>. Splat also allows you to simulate expression outlier genes (genes with mean expression outside the gamma distribution) and dropout (random knock out of counts based on mean expression). Each cell is given an expected library size (simulated from a log-normal distribution) that makes it easier to match to a given dataset.</p>
    <p>Splat can also simulate differential expression between groups of different types of cells or differentiation paths between different cells types where expression changes in a continuous way. These are described further in the <a href="#simulating-counts">simulating counts</a> section.</p>
    <div id="parameters" class="section level2">
    <h2 class="hasAnchor">
    <a href="#parameters" class="anchor"></a>Parameters</h2>
    <p>The parameters required for the Splat simulation are briefly described here:</p>
    <ul>
    <li>
    <strong>Global parameters</strong>
    <ul>
    <li>
    <code>nGenes</code> - The number of genes to simulate.</li>
    <li>
    <code>nCells</code> - The number of cells to simulate.</li>
    <li>
    <code>seed</code> - Seed to use for generating random numbers.</li>
    </ul>
    </li>
    <li>
    <strong>Batch parameters</strong>
    <ul>
    <li>
    <code>nBatches</code> - The number of batches to simulate.</li>
    <li>
    <code>batchCells</code> - The number of cells in each batch.</li>
    <li>
    <code>batch.facLoc</code> - Location (meanlog) parameter for the batch effects factor log-normal distribution.</li>
    <li>
    <code>batch.facScale</code> - Scale (sdlog) parameter for the batch effects factor log-normal distribution.</li>
    </ul>
    </li>
    <li>
    <strong>Mean parameters</strong>
    <ul>
    <li>
    <code>mean.shape</code> - Shape parameter for the mean gamma distribution.</li>
    <li>
    <code>mean.rate</code> - Rate parameter for the mean gamma distribution.</li>
    </ul>
    </li>
    <li>
    <strong>Library size parameters</strong>
    <ul>
    <li>
    <code>lib.loc</code> - Location (meanlog) parameter for the library size log-normal distribution.</li>
    <li>
    <code>lib.scale</code> - Scale (sdlog) parameter for the library size log-normal distribution.</li>
    </ul>
    </li>
    <li>
    <strong>Expression outlier parameters</strong>
    <ul>
    <li>
    <code>out.prob</code> - Probability that a gene is an expression outlier.</li>
    <li>
    <code>out.facLoc</code> - Location (meanlog) parameter for the expression outlier factor log-normal distribution.</li>
    <li>
    <code>out.facScale</code> - Scale (sdlog) parameter for the expression outlier factor log-normal distribution.</li>
    </ul>
    </li>
    <li>
    <strong>Group parameters</strong>
    <ul>
    <li>
    <code>nGroups</code> - The number of groups or paths to simulate.</li>
    <li>
    <code>group.prob</code> - The probabilities that cells come from particular groups.</li>
    </ul>
    </li>
    <li>
    <strong>Differential expression parameters</strong>
    <ul>
    <li>
    <code>de.prob</code> - Probability that a gene is differentially expressed in each group or path.</li>
    <li>
    <code>de.loProb</code> - Probability that a differentially expressed gene is down-regulated.</li>
    <li>
    <code>de.facLoc</code> - Location (meanlog) parameter for the differential expression factor log-normal distribution.</li>
    <li>
    <code>de.facScale</code> - Scale (sdlog) parameter for the differential expression factor log-normal distribution.</li>
    </ul>
    </li>
    <li>
    <strong>Biological Coefficient of Variation parameters</strong>
    <ul>
    <li>
    <code>bcv.common</code> - Underlying common dispersion across all genes.</li>
    <li>
    <code>bcv.df</code> - Degrees of Freedom for the BCV inverse chi-squared distribution.</li>
    </ul>
    </li>
    <li>
    <strong>Dropout parameters</strong>
    <ul>
    <li>
    <code>dropout.present</code> - Logical. Whether to simulate dropout.</li>
    <li>
    <code>dropout.mid</code> - Midpoint parameter for the dropout logistic function.</li>
    <li>
    <code>dropout.shape</code> - Shape parameter for the dropout logistic function.</li>
    </ul>
    </li>
    <li>
    <strong>Differentiation path parameters</strong>
    <ul>
    <li>
    <code>path.from</code> - Vector giving the originating point of each path.</li>
    <li>
    <code>path.length</code> - Vector giving the number of steps to simulate along each path.</li>
    <li>
    <code>path.skew</code> - Vector giving the skew of each path.</li>
    <li>
    <code>path.nonlinearProb</code> - Probability that a gene changes expression in a non-linear way along the differentiation path.</li>
    <li>
    <code>path.sigmaFac</code> - Sigma factor for non-linear gene paths.</li>
    </ul>
    </li>
    </ul>
    <p>While this may look like a lot of parameters Splatter attempts to make it easy for the user, both by providing sensible defaults and making it easy to estimate many of the parameters from real data. For more details on the parameters see <code><a href="../reference/SplatParams.html">?SplatParams</a></code>.</p>
    </div>
    </div>
    <div id="the-splatparams-object" class="section level1">
    <h1 class="hasAnchor">
    <a href="#the-splatparams-object" class="anchor"></a>The <code>SplatParams</code> object</h1>
    <p>All the parameters for the Splat simulation are stored in a <code>SplatParams</code> object. Let’s create a new one and see what it looks like.</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">params &lt;-<span class="st"> </span><span class="kw"><a href="../reference/newParams.html">newSplatParams</a></span>()
    params</code></pre></div>
    <pre><code>## A Params object of class SplatParams 
    ## Parameters can be (estimable) or [not estimable], 'Default' or 'NOT DEFAULT'. 
    ## 
    ## Global: 
    ## (Genes)  (Cells)   [Seed]  
    ##   10000      100    98513  
    ## 
    ## 27 additional parameters 
    ## 
    ## Batches: 
    ##     [Batches]  [Batch Cells]     [Location]        [Scale]  
    ##             1            100            0.1            0.1  
    ## 
    ## Mean: 
    ##  (Rate)  (Shape)  
    ##     0.3      0.6  
    ## 
    ## Library size: 
    ## (Location)     (Scale)  
    ##         11         0.2  
    ## 
    ## Exprs outliers: 
    ## (Probability)     (Location)        (Scale)  
    ##          0.05              4            0.5  
    ## 
    ## Groups: 
    ##      [Groups]  [Group Probs]  
    ##             1              1  
    ## 
    ## Diff expr: 
    ## [Probability]    [Down Prob]     [Location]        [Scale]  
    ##           0.1            0.5            0.1            0.4  
    ## 
    ## BCV: 
    ## (Common Disp)          (DoF)  
    ##           0.1             60  
    ## 
    ## Dropout: 
    ##  [Present]  (Midpoint)     (Shape)  
    ##      FALSE           0          -1  
    ## 
    ## Paths: 
    ##         [From]        [Length]          [Skew]    [Non-linear]  
    ##              0             100             0.5             0.1  
    ## [Sigma Factor]  
    ##            0.8</code></pre>
    <p>As well as telling us what type of object we have (“A <code>Params</code> object of class <code>SplatParams</code>”) and showing us the values of the parameter this output gives us some extra information. We can see which parameters can be estimated by the <code>splatEstimate</code> function (those in parentheses), which can’t be estimated (those in brackets) and which have been changed from their default values (those in ALL CAPS).</p>
    <div id="getting-and-setting" class="section level2">
    <h2 class="hasAnchor">
    <a href="#getting-and-setting" class="anchor"></a>Getting and setting</h2>
    <p>If we want to look at a particular parameter, for example the number of genes to simulate, we can extract it using the <code>getParam</code> function:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw"><a href="../reference/getParam.html">getParam</a></span>(params, <span class="st">"nGenes"</span>)</code></pre></div>
    <pre><code>## [1] 10000</code></pre>
    <p>Alternatively, to give a parameter a new value we can use the <code>setParam</code> function:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">params &lt;-<span class="st"> </span><span class="kw"><a href="../reference/setParam.html">setParam</a></span>(params, <span class="st">"nGenes"</span>, <span class="dv">5000</span>)
    <span class="kw"><a href="../reference/getParam.html">getParam</a></span>(params, <span class="st">"nGenes"</span>)</code></pre></div>
    <pre><code>## [1] 5000</code></pre>
    <p>If we want to extract multiple parameters (as a list) or set multiple parameters we can use the <code>getParams</code> or <code>setParams</code> functions:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Set multiple parameters at once (using a list)</span>
    params &lt;-<span class="st"> </span><span class="kw"><a href="../reference/setParams.html">setParams</a></span>(params, <span class="dt">update =</span> <span class="kw">list</span>(<span class="dt">nGenes =</span> <span class="dv">8000</span>, <span class="dt">mean.rate =</span> <span class="fl">0.5</span>))
    <span class="co"># Extract multiple parameters as a list</span>
    <span class="kw"><a href="../reference/getParams.html">getParams</a></span>(params, <span class="kw">c</span>(<span class="st">"nGenes"</span>, <span class="st">"mean.rate"</span>, <span class="st">"mean.shape"</span>))</code></pre></div>
    <pre><code>## $nGenes
    ## [1] 8000
    ## 
    ## $mean.rate
    ## [1] 0.5
    ## 
    ## $mean.shape
    ## [1] 0.6</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Set multiple parameters at once (using additional arguments)</span>
    params &lt;-<span class="st"> </span><span class="kw"><a href="../reference/setParams.html">setParams</a></span>(params, <span class="dt">mean.shape =</span> <span class="fl">0.5</span>, <span class="dt">de.prob =</span> <span class="fl">0.2</span>)
    params</code></pre></div>
    <pre><code>## A Params object of class SplatParams 
    ## Parameters can be (estimable) or [not estimable], 'Default' or 'NOT DEFAULT'. 
    ## 
    ## Global: 
    ## (GENES)  (Cells)   [Seed]  
    ##    8000      100    98513  
    ## 
    ## 27 additional parameters 
    ## 
    ## Batches: 
    ##     [Batches]  [Batch Cells]     [Location]        [Scale]  
    ##             1            100            0.1            0.1  
    ## 
    ## Mean: 
    ##  (RATE)  (SHAPE)  
    ##     0.5      0.5  
    ## 
    ## Library size: 
    ## (Location)     (Scale)  
    ##         11         0.2  
    ## 
    ## Exprs outliers: 
    ## (Probability)     (Location)        (Scale)  
    ##          0.05              4            0.5  
    ## 
    ## Groups: 
    ##      [Groups]  [Group Probs]  
    ##             1              1  
    ## 
    ## Diff expr: 
    ## [PROBABILITY]    [Down Prob]     [Location]        [Scale]  
    ##           0.2            0.5            0.1            0.4  
    ## 
    ## BCV: 
    ## (Common Disp)          (DoF)  
    ##           0.1             60  
    ## 
    ## Dropout: 
    ##  [Present]  (Midpoint)     (Shape)  
    ##      FALSE           0          -1  
    ## 
    ## Paths: 
    ##         [From]        [Length]          [Skew]    [Non-linear]  
    ##              0             100             0.5             0.1  
    ## [Sigma Factor]  
    ##            0.8</code></pre>
    <p>The parameters with have changed are now shown in ALL CAPS to indicate that they been changed form the default.</p>
    <p>We can also set parameters directly when we call <code>newSplatParams</code>:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">params &lt;-<span class="st"> </span><span class="kw"><a href="../reference/newParams.html">newSplatParams</a></span>(<span class="dt">lib.loc =</span> <span class="dv">12</span>, <span class="dt">lib.scale =</span> <span class="fl">0.6</span>)
    <span class="kw"><a href="../reference/getParams.html">getParams</a></span>(params, <span class="kw">c</span>(<span class="st">"lib.loc"</span>, <span class="st">"lib.scale"</span>))</code></pre></div>
    <pre><code>## $lib.loc
    ## [1] 12
    ## 
    ## $lib.scale
    ## [1] 0.6</code></pre>
    </div>
    </div>
    <div id="estimating-parameters" class="section level1">
    <h1 class="hasAnchor">
    <a href="#estimating-parameters" class="anchor"></a>Estimating parameters</h1>
    <p>Splat allows you to estimate many of it’s parameters from a data set containing counts using the <code>splatEstimate</code> function.</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Check that sc_example counts is an integer matrix</span>
    <span class="kw">class</span>(sc_example_counts)</code></pre></div>
    <pre><code>## [1] "matrix"</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">typeof</span>(sc_example_counts)</code></pre></div>
    <pre><code>## [1] "integer"</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Check the dimensions, each row is a gene, each column is a cell</span>
    <span class="kw">dim</span>(sc_example_counts)</code></pre></div>
    <pre><code>## [1] 2000   40</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Show the first few entries</span>
    sc_example_counts[<span class="dv">1</span><span class="op">:</span><span class="dv">5</span>, <span class="dv">1</span><span class="op">:</span><span class="dv">5</span>]</code></pre></div>
    <pre><code>##           Cell_001 Cell_002 Cell_003 Cell_004 Cell_005
    ## Gene_0001        0      123        2        0        0
    ## Gene_0002      575       65        3     1561     2311
    ## Gene_0003        0        0        0        0     1213
    ## Gene_0004        0        1        0        0        0
    ## Gene_0005        0        0       11        0        0</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">params &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatEstimate.html">splatEstimate</a></span>(sc_example_counts)</code></pre></div>
    <p>Here we estimated parameters from a counts matrix but <code>splatEstimate</code> can also take an <code>SCESet</code> object from the <code>scater</code> package. The estimation process has the following steps:</p>
    <ol style="list-style-type: decimal">
    <li>Mean parameters are estimated by fitting a gamma distribution to the mean expression levels.</li>
    <li>Library size parameters are estimated by fitting a log-normal distribution to the library sizes.</li>
    <li>Expression outlier parameters are estimated by determining the number of outliers and fitting a log-normal distribution to their difference from the median.</li>
    <li>BCV parameters are estimated using the <code>estimateDisp</code> function from the <code>edgeR</code> package.</li>
    <li>Dropout parameters are estimated by checking if dropout is present and fitting a logistic function to the relationship between mean expression and proportion of zeros.</li>
    </ol>
    <p>For more details of the estimation procedures see <code><a href="../reference/splatEstimate.html">?splatEstimate</a></code>.</p>
    </div>
    <div id="simulating-counts" class="section level1">
    <h1 class="hasAnchor">
    <a href="#simulating-counts" class="anchor"></a>Simulating counts</h1>
    <p>Once we have a set of parameters we are happy with we can use <code>splatSimulate</code> to simulate counts. If we want to make small adjustments to the parameters we can provide them as additional arguments, alternatively if we don’t supply any parameters the defaults will be used:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatSimulate.html">splatSimulate</a></span>(params, <span class="dt">nGenes =</span> <span class="dv">1000</span>, <span class="dt">dropout.present =</span> <span class="ot">FALSE</span>)</code></pre></div>
    <pre><code>## Getting parameters...</code></pre>
    <pre><code>## Creating simulation object...</code></pre>
    <pre><code>## Simulating library sizes...</code></pre>
    <pre><code>## Simulating gene means...</code></pre>
    <pre><code>## Simulating BCV...</code></pre>
    <pre><code>## Simulating counts..</code></pre>
    <pre><code>## Simulating dropout (if needed)...</code></pre>
    <pre><code>## Creating final SCESet...</code></pre>
    <pre><code>## Done!</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim</code></pre></div>
    <pre><code>## SCESet (storageMode: lockedEnvironment)
    ## assayData: 1000 features, 40 samples 
    ##   element names: BaseCellMeans, BatchCellMeans, BCV, CellMeans, counts, exprs, TrueCounts 
    ## protocolData: none
    ## phenoData
    ##   sampleNames: Cell1 Cell2 ... Cell40 (40 total)
    ##   varLabels: Cell Batch ExpLibSize
    ##   varMetadata: labelDescription
    ## featureData
    ##   featureNames: Gene1 Gene2 ... Gene1000 (1000 total)
    ##   fvarLabels: Gene BaseGeneMean OutlierFactor GeneMean
    ##   fvarMetadata: labelDescription
    ## experimentData: use 'experimentData(object)'
    ## Annotation:</code></pre>
    <p>Looking at the output of <code>splatSimulate</code> we can see that <code>sim</code> is an <code>SCESet</code> object with 1000 features (genes) and 40 samples (cells). The main part of this object is a features by samples matrix containing the simulated counts (accessed using <code>counts</code>), although it can also hold other expression measures such as FPKM or TPM. Additionaly an <code>SCESet</code> contains phenotype information about each cell (accessed using <code>pData</code>) and feature information about each gene (accessed using <code>fData</code>). Splatter uses these slots to store information about the intermediate values of the simulation.</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Access the counts</span>
    <span class="kw">counts</span>(sim)[<span class="dv">1</span><span class="op">:</span><span class="dv">5</span>, <span class="dv">1</span><span class="op">:</span><span class="dv">5</span>]</code></pre></div>
    <pre><code>##       Cell1 Cell2 Cell3 Cell4 Cell5
    ## Gene1    71    18     0     0     0
    ## Gene2     0   202    97     0    13
    ## Gene3    76     0    70     0  4352
    ## Gene4    13     0     0     0     0
    ## Gene5   563    89    77     0    17</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Information about genes</span>
    <span class="kw">head</span>(<span class="kw">fData</span>(sim))</code></pre></div>
    <pre><code>##        Gene BaseGeneMean OutlierFactor  GeneMean
    ## Gene1 Gene1     12.15713             1  12.15713
    ## Gene2 Gene2     24.31063             1  24.31063
    ## Gene3 Gene3    297.25887             1 297.25887
    ## Gene4 Gene4     59.50166             1  59.50166
    ## Gene5 Gene5     22.53820             1  22.53820
    ## Gene6 Gene6     20.61026             1  20.61026</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Information about cells</span>
    <span class="kw">head</span>(<span class="kw">pData</span>(sim))</code></pre></div>
    <pre><code>##        Cell  Batch ExpLibSize
    ## Cell1 Cell1 Batch1   95909.41
    ## Cell2 Cell2 Batch1  244118.47
    ## Cell3 Cell3 Batch1  240747.25
    ## Cell4 Cell4 Batch1  271459.87
    ## Cell5 Cell5 Batch1  424160.08
    ## Cell6 Cell6 Batch1  228110.31</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Gene by cell matrices</span>
    <span class="kw">names</span>(<span class="kw">assayData</span>(sim))</code></pre></div>
    <pre><code>## [1] "TrueCounts"     "BaseCellMeans"  "BatchCellMeans" "counts"        
    ## [5] "BCV"            "CellMeans"      "exprs"</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Example of cell means matrix</span>
    <span class="kw">get_exprs</span>(sim, <span class="st">"CellMeans"</span>)[<span class="dv">1</span><span class="op">:</span><span class="dv">5</span>, <span class="dv">1</span><span class="op">:</span><span class="dv">5</span>]</code></pre></div>
    <pre><code>##             Cell1        Cell2        Cell3        Cell4        Cell5
    ## Gene1  65.5696590 1.391352e+01   0.06193530 2.453784e-02 1.355897e-03
    ## Gene2   0.4790134 2.126821e+02 102.46301607 3.225135e-09 9.960240e+00
    ## Gene3  67.4445673 4.695054e-05  71.05222310 6.950506e-11 4.334285e+03
    ## Gene4  10.4052334 9.127828e-08   0.01250279 1.721067e-07 1.163755e-16
    ## Gene5 556.8580925 8.854150e+01  67.68883291 1.097594e-06 1.449732e+01</code></pre>
    <p>An additional (big) advantage of outputting an <code>SCESet</code> is that we get immediate access to all of the <code>scater</code> functions. For example we can make a PCA plot:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plotPCA</span>(sim)</code></pre></div>
    <p><img src="splatter_files/figure-html/pca-1.png" width="576" style="display: block; margin: auto;"></p>
    <p>(<strong>NOTE:</strong> Your values and plots may look different as the simulation is random and produces different results each time it is run.)</p>
    <p>For more details of the <code>SCESet</code> and what you can do with <code>scater</code> refer to the <code>scater</code> documentation and <a href="https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/vignette.html">vignette</a>.</p>
    <p>The <code>splatSimulate</code> function outputs the following additional information about the simulation:</p>
    <ul>
    <li>
    <strong>Cell information (<code>pData</code>)</strong>
    <ul>
    <li>
    <code>Cell</code> - Unique cell identifier.</li>
    <li>
    <code>Group</code> - The group or path the cell belongs to.</li>
    <li>
    <code>ExpLibSize</code> - The expected library size for that cell.</li>
    <li>
    <code>Step</code> (paths only) - How far along the path each cell is.</li>
    </ul>
    </li>
    <li>
    <strong>Gene information (<code>fData</code>)</strong>
    <ul>
    <li>
    <code>Gene</code> - Unique gene identifier.</li>
    <li>
    <code>BaseGeneMean</code> - The base expression level for that gene.</li>
    <li>
    <code>OutlierFactor</code> - Expression outlier factor for that gene (1 is not an outlier).</li>
    <li>
    <code>GeneMean</code> - Expression level after applying outlier factors.</li>
    <li>
    <code>DEFac[Group]</code> - The differential expression factor for each gene in a particular group (1 is not differentially expressed).</li>
    <li>
    <code>GeneMean[Group]</code> - Expression level of a gene in a particular group after applying differential expression factors.</li>
    </ul>
    </li>
    <li>
    <strong>Gene by cell information (<code>assayData</code>)</strong>
    <ul>
    <li>
    <code>BaseCellMeans</code> - The expression of genes in each cell adjusted for expected library size.</li>
    <li>
    <code>BCV</code> - The Biological Coefficient of Variation for each gene in each cell.</li>
    <li>
    <code>CellMeans</code> - The expression level of genes in each cell adjusted for BCV.</li>
    <li>
    <code>TrueCounts</code> - The simulated counts before dropout.</li>
    <li>
    <code>Dropout</code> - Logical matrix showing which counts have been dropped in which cells.</li>
    </ul>
    </li>
    </ul>
    <p>Values that have been added by Splatter are named using <code>UpperCamelCase</code> to separate them from the <code>underscore_naming</code> used by <code>scater</code>. For more information on the simulation see <code><a href="../reference/splatSimulate.html">?splatSimulate</a></code>.</p>
    <div id="simulating-groups" class="section level2">
    <h2 class="hasAnchor">
    <a href="#simulating-groups" class="anchor"></a>Simulating groups</h2>
    <p>So far we have only simulated a single population of cells but often we are interested in investigating a mixed population of cells and looking to see what cell types are present or what differences there are between them. Splatter is able to simulate these situations by changing the <code>method</code> argument Here we are going to simulate two groups, by specifying the <code>group.prob</code> parameter and setting the <code>method</code> parameter to <code>"groups"</code>:</p>
    <p>(<strong>NOTE:</strong> We have also set the <code>verbose</code> argument to <code>FALSE</code> to stop Splatter printing progress messages.)</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim.groups &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatSimulate.html">splatSimulate</a></span>(<span class="dt">group.prob =</span> <span class="kw">c</span>(<span class="fl">0.5</span>, <span class="fl">0.5</span>), <span class="dt">method =</span> <span class="st">"groups"</span>,
                                <span class="dt">verbose =</span> <span class="ot">FALSE</span>)
    <span class="kw">plotPCA</span>(sim.groups, <span class="dt">colour_by =</span> <span class="st">"Group"</span>)</code></pre></div>
    <p><img src="splatter_files/figure-html/groups-1.png" width="576" style="display: block; margin: auto;"></p>
    <p>As we have set both the group probabilites to 0.5 we should get approximately equal numbers of cells in each group (around 50 in this case). If we wanted uneven groups we could set <code>group.prob</code> to any set of probabilites that sum to 1.</p>
    </div>
    <div id="simulating-paths" class="section level2">
    <h2 class="hasAnchor">
    <a href="#simulating-paths" class="anchor"></a>Simulating paths</h2>
    <p>The other situation that is often of interest is a differentiation process where one cell type is changing into another. Splatter approximates this process by simulating a series of steps between two groups and randomly assigning each cell to a step. We can create this kind of simulation using the <code>"paths"</code> method.</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim.paths &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatSimulate.html">splatSimulate</a></span>(<span class="dt">method =</span> <span class="st">"paths"</span>, <span class="dt">verbose =</span> <span class="ot">FALSE</span>)
    <span class="kw">plotPCA</span>(sim.paths, <span class="dt">colour_by =</span> <span class="st">"Step"</span>)</code></pre></div>
    <p><img src="splatter_files/figure-html/paths-1.png" width="576" style="display: block; margin: auto;"></p>
    <p>Here the colours represent the “step” of each cell or how far along the differentiation path it is. We can see that the cells with dark colours are more similar to the originating cell type and the light coloured cells are closer to the final, differentiated, cell type. By setting additional parameters it is possible to simulate more complex process (for example multiple mature cell types from a single progenitor).</p>
    </div>
    <div id="batch-effects" class="section level2">
    <h2 class="hasAnchor">
    <a href="#batch-effects" class="anchor"></a>Batch effects</h2>
    <p>Another factor that is important in the analysis of any sequencing experiment are batch effects, technical variation that is common to a set of samples processed at the same time. We apply batch effects by telling Splatter how many cells are in each batch:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim.batches &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatSimulate.html">splatSimulate</a></span>(<span class="dt">batchCells =</span> <span class="kw">c</span>(<span class="dv">50</span>, <span class="dv">50</span>), <span class="dt">verbose =</span> <span class="ot">FALSE</span>)
    <span class="kw">plotPCA</span>(sim.batches, <span class="dt">colour_by =</span> <span class="st">"Batch"</span>)</code></pre></div>
    <p><img src="splatter_files/figure-html/batches-1.png" width="576" style="display: block; margin: auto;"></p>
    <p>This looks at lot like when we simulated groups and that is because the process is very similar. The difference is that batch effects are applied to all genes, not just those that are differentially expressed, and the effects are usually smaller. By combining groups and batches we can simulate both unwanted variation that we aren’t interested in (batch) and the wanted variation we are looking for (group):</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim.groups &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatSimulate.html">splatSimulate</a></span>(<span class="dt">batchCells =</span> <span class="kw">c</span>(<span class="dv">50</span>, <span class="dv">50</span>), <span class="dt">group.prob =</span> <span class="kw">c</span>(<span class="fl">0.5</span>, <span class="fl">0.5</span>),
                                <span class="dt">method =</span> <span class="st">"groups"</span>, <span class="dt">verbose =</span> <span class="ot">FALSE</span>)
    <span class="kw">plotPCA</span>(sim.groups, <span class="dt">shape_by =</span> <span class="st">"Batch"</span>, <span class="dt">colour_by =</span> <span class="st">"Group"</span>)</code></pre></div>
    <p><img src="splatter_files/figure-html/batch-groups-1.png" width="576" style="display: block; margin: auto;"></p>
    <p>Here we see that the effects of the group (first component) are stronger than the batch effects (second component) but by adjusting the parameters we could made the batch effects dominate.</p>
    </div>
    <div id="convenience-functions" class="section level2">
    <h2 class="hasAnchor">
    <a href="#convenience-functions" class="anchor"></a>Convenience functions</h2>
    <p>Each of the Splatter simulation methods has it’s own convenience function. To simulate a single population use <code><a href="../reference/splatSimulate.html">splatSimulateSingle()</a></code> (equivalent to <code><a href="../reference/splatSimulate.html">splatSimulate(method = "single")</a></code>), to simulate grops use <code><a href="../reference/splatSimulate.html">splatSimulateGroups()</a></code> (equivalent to <code><a href="../reference/splatSimulate.html">splatSimulate(method = "groups")</a></code>) or to simulate paths use <code><a href="../reference/splatSimulate.html">splatSimulatePaths()</a></code> (equivalent to <code><a href="../reference/splatSimulate.html">splatSimulate(method = "paths")</a></code>).</p>
    </div>
    </div>
    <div id="other-simulations" class="section level1">
    <h1 class="hasAnchor">
    <a href="#other-simulations" class="anchor"></a>Other simulations</h1>
    <p>As well as it’s own Splat simulation method the Splatter package contains implementations of other single-cell RNA-seq simulations that have been published or wrappers around simulations included in other packages. To see all the available simulations run the <code><a href="../reference/listSims.html">listSims()</a></code> function:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw"><a href="../reference/listSims.html">listSims</a></span>()</code></pre></div>
    <pre><code>## Splatter currently contains 8 simulations 
    ## 
    ## Splat (splat) 
    ## DOI:      Github:  
    ## The Splat simulation generates means from a gamma distribution, adjusts them for BCV and generates counts from a gamma-poisson. Dropout can be optionally added. 
    ## 
    ## Splat Single (splatSingle) 
    ## DOI:      Github:  
    ## The Splat simulation with a single population. 
    ## 
    ## Splat Groups (splatGroups) 
    ## DOI:      Github:  
    ## The Splat simulation with multiple groups. Each group can have it's own differential expression probability and fold change distribution. 
    ## 
    ## Splat Paths (splatPaths) 
    ## DOI:      Github:  
    ## The Splat simulation with differentiation paths. Each path can have it's own length, skew and probability. Genes can change in non-linear ways. 
    ## 
    ## Simple (simple) 
    ## DOI:      Github:  
    ## A simple simulation with gamma means and negative binomial counts. 
    ## 
    ## Lun (lun) 
    ## DOI: 10.1186/s13059-016-0947-7    Github: MarioniLab/Deconvolution2016 
    ## Gamma distributed means and negative binomial counts. Cells are given a size factor and differential expression can be simulated with fixed fold changes. 
    ## 
    ## Lun 2 (lun2) 
    ## DOI: 10.1101/073973   Github: MarioniLab/PlateEffects2016 
    ## Negative binomial counts where the means and dispersions have been sampled from a real dataset. The core feature of the Lun 2 simulation is the addition of plate effects. Differential expression can be added between two groups of plates and optionally a zero-inflated negative-binomial can be used. 
    ## 
    ## scDD (scDD) 
    ## DOI: 10.1186/s13059-016-1077-y    Github: kdkorthauer/scDD 
    ## The scDD simulation samples a given dataset and can simulate differentially expressed and differentially distributed genes between two conditions.</code></pre>
    <p>(or more conveniently for the vignette as a table)</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">knitr<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/knitr/topics/kable">kable</a></span>(<span class="kw"><a href="../reference/listSims.html">listSims</a></span>(<span class="dt">print =</span> <span class="ot">FALSE</span>))</code></pre></div>
    <table class="table">
    <thead><tr class="header">
    <th align="left">Name</th>
    <th align="left">Prefix</th>
    <th align="left">DOI</th>
    <th align="left">Github</th>
    <th align="left">Description</th>
    </tr></thead>
    <tbody>
    <tr class="odd">
    <td align="left">Splat</td>
    <td align="left">splat</td>
    <td align="left"></td>
    <td align="left"></td>
    <td align="left">The Splat simulation generates means from a gamma distribution, adjusts them for BCV and generates counts from a gamma-poisson. Dropout can be optionally added.</td>
    </tr>
    <tr class="even">
    <td align="left">Splat Single</td>
    <td align="left">splatSingle</td>
    <td align="left"></td>
    <td align="left"></td>
    <td align="left">The Splat simulation with a single population.</td>
    </tr>
    <tr class="odd">
    <td align="left">Splat Groups</td>
    <td align="left">splatGroups</td>
    <td align="left"></td>
    <td align="left"></td>
    <td align="left">The Splat simulation with multiple groups. Each group can have it’s own differential expression probability and fold change distribution.</td>
    </tr>
    <tr class="even">
    <td align="left">Splat Paths</td>
    <td align="left">splatPaths</td>
    <td align="left"></td>
    <td align="left"></td>
    <td align="left">The Splat simulation with differentiation paths. Each path can have it’s own length, skew and probability. Genes can change in non-linear ways.</td>
    </tr>
    <tr class="odd">
    <td align="left">Simple</td>
    <td align="left">simple</td>
    <td align="left"></td>
    <td align="left"></td>
    <td align="left">A simple simulation with gamma means and negative binomial counts.</td>
    </tr>
    <tr class="even">
    <td align="left">Lun</td>
    <td align="left">lun</td>
    <td align="left">10.1186/s13059-016-0947-7</td>
    <td align="left">MarioniLab/Deconvolution2016</td>
    <td align="left">Gamma distributed means and negative binomial counts. Cells are given a size factor and differential expression can be simulated with fixed fold changes.</td>
    </tr>
    <tr class="odd">
    <td align="left">Lun 2</td>
    <td align="left">lun2</td>
    <td align="left">10.1101/073973</td>
    <td align="left">MarioniLab/PlateEffects2016</td>
    <td align="left">Negative binomial counts where the means and dispersions have been sampled from a real dataset. The core feature of the Lun 2 simulation is the addition of plate effects. Differential expression can be added between two groups of plates and optionally a zero-inflated negative-binomial can be used.</td>
    </tr>
    <tr class="even">
    <td align="left">scDD</td>
    <td align="left">scDD</td>
    <td align="left">10.1186/s13059-016-1077-y</td>
    <td align="left">kdkorthauer/scDD</td>
    <td align="left">The scDD simulation samples a given dataset and can simulate differentially expressed and differentially distributed genes between two conditions.</td>
    </tr>
    </tbody>
    </table>
    <p>Each simulation has it’s own prefix which gives the name of the functions associated with that simulation. For example the prefix for the simple simulation is <code>simple</code> so it would store it’s parameters in a <code>SimpleParams</code> object that can be created using <code><a href="../reference/newParams.html">newSimpleParams()</a></code> or estimated from real data using <code><a href="../reference/simpleEstimate.html">simpleEstimate()</a></code>. To simulate data using that simulation you would use <code><a href="../reference/simpleSimulate.html">simpleSimulate()</a></code>. Each simulation returns an <code>SCESet</code> object with intermediate values similar to that returned by <code><a href="../reference/splatSimulate.html">splatSimulate()</a></code>. For more detailed information on each simulation see the appropriate help page (eg. <code><a href="../reference/simpleSimulate.html">?simpleSimulate</a></code> for information on how the simple simulation works or <code><a href="../reference/lun2Estimate.html">?lun2Estimate</a></code> for details of how the Lun 2 simulation estimates parameters) or refer to the appropriate paper or package.</p>
    </div>
    <div id="other-expression-values" class="section level1">
    <h1 class="hasAnchor">
    <a href="#other-expression-values" class="anchor"></a>Other expression values</h1>
    <p>Splatter is designed to simulate count data but some analysis methods expect other expression values, particularly length-normalised values such as TPM or FPKM. The <code>scater</code> package has functions for adding these values to an <code>SCESet</code> object but they require a length for each gene. The <code>addGeneLengths</code> can be used to simulate these lengths:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim &lt;-<span class="st"> </span><span class="kw"><a href="../reference/simpleSimulate.html">simpleSimulate</a></span>(<span class="dt">verbose =</span> <span class="ot">FALSE</span>)
    sim &lt;-<span class="st"> </span><span class="kw"><a href="../reference/addGeneLengths.html">addGeneLengths</a></span>(sim)
    <span class="kw">head</span>(<span class="kw">fData</span>(sim))</code></pre></div>
    <pre><code>##        Gene     GeneMean Length
    ## Gene1 Gene1 0.0002067453   1694
    ## Gene2 Gene2 0.2420681809   1908
    ## Gene3 Gene3 1.1117145796   6058
    ## Gene4 Gene4 0.1920623085  15962
    ## Gene5 Gene5 0.1872010591   3664
    ## Gene6 Gene6 0.2867576121  12947</code></pre>
    <p>We can then use <code>scater</code> to calculate TPM:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">tpm</span>(sim) &lt;-<span class="st"> </span><span class="kw">calculateTPM</span>(sim, <span class="kw">fData</span>(sim)<span class="op">$</span>Length)
    <span class="kw">tpm</span>(sim)[<span class="dv">1</span><span class="op">:</span><span class="dv">5</span>, <span class="dv">1</span><span class="op">:</span><span class="dv">5</span>]</code></pre></div>
    <pre><code>##          Cell1 Cell2    Cell3    Cell4 Cell5
    ## Gene1  0.00000     0  0.00000  0.00000     0
    ## Gene2  0.00000     0  0.00000  0.00000     0
    ## Gene3 53.00124     0 26.08949 53.40399     0
    ## Gene4  0.00000     0  0.00000  0.00000     0
    ## Gene5 43.81571     0 43.13596  0.00000     0</code></pre>
    <p>The default method used by <code>addGeneLengths</code> to simulate lengths is to generate values from a log-normal distribution which are then rounded to give an integer length. The parameters for this distribution are based on human protein coding genes but can be adjusted if needed (for example for other species). Alternatively lengths can be sampled from a provided vector (see <code><a href="../reference/addGeneLengths.html">?addGeneLengths</a></code> for details and an example).</p>
    </div>
    <div id="comparing-simulations-and-real-data" class="section level1">
    <h1 class="hasAnchor">
    <a href="#comparing-simulations-and-real-data" class="anchor"></a>Comparing simulations and real data</h1>
    <p>One thing you might like to do after simulating data is to compare it to a real dataset, or compare simulations with different parameters or models. Splatter provides a function <code>compareSCESets</code> that aims to make these comparisons easier. As the name suggests this function takes a list of <code>SCESet</code> objects, combines the datasets and produces some plots comparing them. Let’s make two small simulations and see how they compare.</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">sim1 &lt;-<span class="st"> </span><span class="kw"><a href="../reference/splatSimulate.html">splatSimulate</a></span>(<span class="dt">nGenes =</span> <span class="dv">1000</span>, <span class="dt">batchCells =</span> <span class="dv">20</span>, <span class="dt">verbose =</span> <span class="ot">FALSE</span>)
    sim2 &lt;-<span class="st"> </span><span class="kw"><a href="../reference/simpleSimulate.html">simpleSimulate</a></span>(<span class="dt">nGenes =</span> <span class="dv">1000</span>, <span class="dt">nCells =</span> <span class="dv">20</span>, <span class="dt">verbose =</span> <span class="ot">FALSE</span>)
    comparison &lt;-<span class="st"> </span><span class="kw"><a href="../reference/compareSCESets.html">compareSCESets</a></span>(<span class="kw">list</span>(<span class="dt">Splat =</span> sim1, <span class="dt">Simple =</span> sim2))
    
    <span class="kw">names</span>(comparison)</code></pre></div>
    <pre><code>## [1] "FeatureData" "PhenoData"   "Plots"</code></pre>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">names</span>(comparison<span class="op">$</span>Plots)</code></pre></div>
    <pre><code>## [1] "Means"        "Variances"    "MeanVar"      "LibrarySizes"
    ## [5] "ZerosGene"    "ZerosCell"    "MeanZeros"</code></pre>
    <p>The returned list has three items. The first two are the combined datasets by gene (<code>FeatureData</code>) and by cell (<code>PhenoData</code>) and the third contains some comparison plots (produced using <code>ggplot2</code>), for example a plot of the distribution of means:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">comparison<span class="op">$</span>Plots<span class="op">$</span>Means</code></pre></div>
    <p><img src="splatter_files/figure-html/comparison-means-1.png" width="576" style="display: block; margin: auto;"></p>
    <p>These are only a few of the plots you might want to consider but it should be easy to make more using the returned data. For example, we could plot the number of expressed genes against the library size:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"ggplot2"</span>)
    <span class="kw"><a href="http://www.rdocumentation.org/packages/ggplot2/topics/ggplot">ggplot</a></span>(comparison<span class="op">$</span>PhenoData,
           <span class="kw"><a href="http://www.rdocumentation.org/packages/ggplot2/topics/aes">aes</a></span>(<span class="dt">x =</span> total_counts, <span class="dt">y =</span> total_features, <span class="dt">colour =</span> Dataset)) <span class="op">+</span>
    <span class="st">    </span><span class="kw"><a href="http://www.rdocumentation.org/packages/ggplot2/topics/geom_point">geom_point</a></span>()</code></pre></div>
    <p><img src="splatter_files/figure-html/comparison-libsize-features-1.png" width="576" style="display: block; margin: auto;"></p>
    <div id="comparing-differences" class="section level2">
    <h2 class="hasAnchor">
    <a href="#comparing-differences" class="anchor"></a>Comparing differences</h2>
    <p>Sometimes instead of visually comparing datasets it may be more interesting to look at the differences between them. We can do this using the <code>diffSCESets</code> function. Similar to <code>compareSCESets</code> this function takes a list of <code>SCESet</code> objects but now we also specify one to be a reference. A series of similar plots are returned but instead of showing the overall distributions they demonstrate differences from the reference.</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">difference &lt;-<span class="st"> </span><span class="kw"><a href="../reference/diffSCESets.html">diffSCESets</a></span>(<span class="kw">list</span>(<span class="dt">Splat =</span> sim1, <span class="dt">Simple =</span> sim2), <span class="dt">ref =</span> <span class="st">"Simple"</span>)
    difference<span class="op">$</span>Plots<span class="op">$</span>Means</code></pre></div>
    <p><img src="splatter_files/figure-html/difference-1.png" width="576" style="display: block; margin: auto;"></p>
    <p>We also get a series of Quantile-Quantile plot that can be used to compare distributions.</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">difference<span class="op">$</span>QQPlots<span class="op">$</span>Means</code></pre></div>
    <p><img src="splatter_files/figure-html/difference-qq-1.png" width="576" style="display: block; margin: auto;"></p>
    </div>
    <div id="making-panels" class="section level2">
    <h2 class="hasAnchor">
    <a href="#making-panels" class="anchor"></a>Making panels</h2>
    <p>Each of these comparisons makes several plots which can be a lot to look at. To make this easier, or to produce figures for publications, you can make use of the functions <code>makeCompPanel</code>, <code>makeDiffPanel</code> and <code>makeOverallPanel</code>.</p>
    <p>These functions combine the plots into a single panel using the <code>cowplot</code> package. The panels can be quite large and hard to view (for example in RStudio’s plot viewer) so it can be better to output the panels and view them separately. Luckily <code>cowplot</code> provides a convenient function for saving the images. Here are some suggested parameters for outputting each of the panels:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># This code is just an example and is not run</span>
    panel &lt;-<span class="st"> </span><span class="kw"><a href="../reference/makeCompPanel.html">makeCompPanel</a></span>(comparison)
    cowplot<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/cowplot/topics/save_plot">save_plot</a></span>(<span class="st">"comp_panel.png"</span>, panel, <span class="dt">nrow =</span> <span class="dv">4</span>, <span class="dt">ncol =</span> <span class="dv">3</span>)
    
    panel &lt;-<span class="st"> </span><span class="kw"><a href="../reference/makeDiffPanel.html">makeDiffPanel</a></span>(difference)
    cowplot<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/cowplot/topics/save_plot">save_plot</a></span>(<span class="st">"diff_panel.png"</span>, panel, <span class="dt">nrow =</span> <span class="dv">3</span>, <span class="dt">ncol =</span> <span class="dv">5</span>)
    
    panel &lt;-<span class="st"> </span><span class="kw"><a href="../reference/makeOverallPanel.html">makeOverallPanel</a></span>(comparison, difference)
    cowplot<span class="op">::</span><span class="kw"><a href="http://www.rdocumentation.org/packages/cowplot/topics/save_plot">save_plot</a></span>(<span class="st">"overall_panel.png"</span>, panel, <span class="dt">ncol =</span> <span class="dv">4</span>, <span class="dt">nrow =</span> <span class="dv">7</span>)</code></pre></div>
    </div>
    </div>
    <div id="citing-splatter" class="section level1">
    <h1 class="hasAnchor">
    <a href="#citing-splatter" class="anchor"></a>Citing Splatter</h1>
    <p>If you use Splatter in your work please cite our paper:</p>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">citation</span>(<span class="st">"splatter"</span>)</code></pre></div>
    <pre><code>## 
    ##   Zappia L, Phipson B, Oshlack A. Splatter: Simulation Of
    ##   Single-Cell RNA Sequencing Data. bioRxiv. 2017;
    ##   doi:10.1101/133173
    ## 
    ## A BibTeX entry for LaTeX users is
    ## 
    ##   @Article{,
    ##     author = {Luke Zappia and Belinda Phipson and Alicia Oshlack},
    ##     title = {Splatter: Simulation Of Single-Cell RNA Sequencing Data},
    ##     journal = {bioRxiv},
    ##     year = {2017},
    ##     url = {http://dx.doi.org/10.1101/133173},
    ##     doi = {10.1101/133173},
    ##   }</code></pre>
    </div>
    <div id="session-information" class="section level1 unnumbered">
    <h1 class="hasAnchor">
    <a href="#session-information" class="anchor"></a>Session information</h1>
    <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sessionInfo</span>()</code></pre></div>
    <pre><code>## R version 3.4.1 (2017-06-30)
    ## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    ## Running under: macOS Sierra 10.12.5
    ## 
    ## Matrix products: default
    ## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
    ## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
    ## 
    ## locale:
    ## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
    ## 
    ## attached base packages:
    ## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
    ## [8] base     
    ## 
    ## other attached packages:
    ## [1] splatter_1.1.4      scater_1.4.0        ggplot2_2.2.1      
    ## [4] Biobase_2.36.2      BiocGenerics_0.22.0
    ## 
    ## loaded via a namespace (and not attached):
    ##  [1] viridis_0.4.0              edgeR_3.18.1              
    ##  [3] splines_3.4.1              bit64_0.9-7               
    ##  [5] viridisLite_0.2.0          shiny_1.0.3               
    ##  [7] assertthat_0.2.0           highr_0.6                 
    ##  [9] sp_1.2-5                   stats4_3.4.1              
    ## [11] blob_1.1.0                 GenomeInfoDbData_0.99.0   
    ## [13] vipor_0.4.5                yaml_2.1.14               
    ## [15] RSQLite_2.0                backports_1.1.0           
    ## [17] lattice_0.20-35            glue_1.1.1                
    ## [19] limma_3.32.4               digest_0.6.12             
    ## [21] XVector_0.16.0             GenomicRanges_1.28.4      
    ## [23] checkmate_1.8.3            colorspace_1.3-2          
    ## [25] cowplot_0.7.0              htmltools_0.3.6           
    ## [27] httpuv_1.3.5               Matrix_1.2-10             
    ## [29] plyr_1.8.4                 XML_3.98-1.9              
    ## [31] pkgconfig_2.0.1            biomaRt_2.32.1            
    ## [33] zlibbioc_1.22.0            xtable_1.8-2              
    ## [35] scales_0.4.1               BiocParallel_1.10.1       
    ## [37] tibble_1.3.3               IRanges_2.10.2            
    ## [39] SummarizedExperiment_1.6.3 lazyeval_0.2.0            
    ## [41] survival_2.41-3            magrittr_1.5              
    ## [43] mime_0.5                   memoise_1.1.0             
    ## [45] evaluate_0.10.1            MASS_7.3-47               
    ## [47] beeswarm_0.2.3             shinydashboard_0.6.1      
    ## [49] fitdistrplus_1.0-9         tools_3.4.1               
    ## [51] data.table_1.10.4          matrixStats_0.52.2        
    ## [53] stringr_1.2.0              S4Vectors_0.14.3          
    ## [55] munsell_0.4.3              locfit_1.5-9.1            
    ## [57] DelayedArray_0.2.7         AnnotationDbi_1.38.1      
    ## [59] bindrcpp_0.2               akima_0.6-2               
    ## [61] compiler_3.4.1             GenomeInfoDb_1.12.2       
    ## [63] rlang_0.1.1.9000           rhdf5_2.20.0              
    ## [65] grid_3.4.1                 RCurl_1.95-4.8            
    ## [67] tximport_1.4.0             rjson_0.2.15              
    ## [69] labeling_0.3               bitops_1.0-6              
    ## [71] rmarkdown_1.6              gtable_0.2.0              
    ## [73] DBI_0.7                    reshape2_1.4.2            
    ## [75] R6_2.2.2                   gridExtra_2.2.1           
    ## [77] knitr_1.16                 dplyr_0.7.2               
    ## [79] bit_1.1-12                 bindr_0.1                 
    ## [81] rprojroot_1.2              stringi_1.1.5             
    ## [83] ggbeeswarm_0.5.3           Rcpp_0.12.12</code></pre>
    </div>
    </div>
      </div>
    
      <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
            <div id="tocnav">
          <h2 class="hasAnchor">
    <a href="#tocnav" class="anchor"></a>Contents</h2>
          <ul class="nav nav-pills nav-stacked">
    <li><a href="#installation">Installation</a></li>
          <li><a href="#quickstart">Quickstart</a></li>
          <li>
    <a href="#the-splat-simulation">The Splat simulation</a><ul class="nav nav-pills nav-stacked">
    <li><a href="#parameters">Parameters</a></li>
          </ul>
    </li>
          <li>
    <a href="#the-splatparams-object">The <code>SplatParams</code> object</a><ul class="nav nav-pills nav-stacked">
    <li><a href="#getting-and-setting">Getting and setting</a></li>
          </ul>
    </li>
          <li><a href="#estimating-parameters">Estimating parameters</a></li>
          <li>
    <a href="#simulating-counts">Simulating counts</a><ul class="nav nav-pills nav-stacked">
    <li><a href="#simulating-groups">Simulating groups</a></li>
          <li><a href="#simulating-paths">Simulating paths</a></li>
          <li><a href="#batch-effects">Batch effects</a></li>
          <li><a href="#convenience-functions">Convenience functions</a></li>
          </ul>
    </li>
          <li><a href="#other-simulations">Other simulations</a></li>
          <li><a href="#other-expression-values">Other expression values</a></li>
          <li>
    <a href="#comparing-simulations-and-real-data">Comparing simulations and real data</a><ul class="nav nav-pills nav-stacked">
    <li><a href="#comparing-differences">Comparing differences</a></li>
          <li><a href="#making-panels">Making panels</a></li>
          </ul>
    </li>
          <li><a href="#citing-splatter">Citing Splatter</a></li>
          <li><a href="#session-information">Session information</a></li>
          </ul>
    </div>
          </div>
    
    </div>
    
    
          <footer><div class="copyright">
      <p>Developed by Luke Zappia, Belinda Phipson, Alicia Oshlack.</p>
    </div>
    
    <div class="pkgdown">
      <p>Site built with <a href="http://hadley.github.io/pkgdown/">pkgdown</a>.</p>
    </div>
    
          </footer>
    </div>
    
      </body>
    </html>