From 094d3002cbc4c25e579d542d80df42577c6e2b37 Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Tue, 12 Jun 2018 15:51:43 +1000
Subject: [PATCH] Fix errors in splatEstLib normality test

Fixes #48
---
 DESCRIPTION        |  4 ++--
 NEWS.md            | 10 ++++++++++
 R/splat-estimate.R | 15 ++++++++++++---
 man/splatEstLib.Rd |  2 +-
 4 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c4e7d36..988ada9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 1.4.0
-Date: 2018-04-30
+Version: 1.4.1
+Date: 2018-06-12
 Author: Luke Zappia
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index 0ef8719..a4fdfa7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,13 @@
+## Version 1.4.1 (2018-06-12)
+
+* Fix normality testing error in splatEstLib
+* Correct p-value cutoff in normality test
+* Sample library sizes for normality testing if > 5000 cells
+
+# Version 1.4.0 (2018-05-02)
+
+* Bioconductor 3.7 release
+
 ## Version 1.3.6 (2018-04-30)
 
 * Set seed in sparseDCEstimate tests
diff --git a/R/splat-estimate.R b/R/splat-estimate.R
index 5af4646..0af1ced 100644
--- a/R/splat-estimate.R
+++ b/R/splat-estimate.R
@@ -101,7 +101,7 @@ splatEstMean <- function(norm.counts, params) {
 
 #' Estimate Splat library size parameters
 #'
-#' The Shapiro-Wilk test is used to determine if the library sizes are
+#' The Shapiro-Wilks test is used to determine if the library sizes are
 #' normally distributed. If so a normal distribution is fitted to the library
 #' sizes, if not (most cases) a log-normal distribution is fitted and the
 #' estimated parameters are added to the params object. See
@@ -116,8 +116,17 @@ splatEstMean <- function(norm.counts, params) {
 splatEstLib <- function(counts, params) {
 
     lib.sizes <- colSums(counts)
-    norm.test <- shapiro.test(lib.sizes)
-    lib.norm <- norm.test$p.value < 0.05
+
+    if (length(lib.sizes) > 5000) {
+        message("NOTE: More than 5000 cells provided. ",
+                "5000 sampled library sizes will be used to test normality.")
+        lib.sizes.sampled <- sample(lib.sizes, 5000, replace = FALSE)
+    } else {
+        lib.sizes.sampled <- lib.sizes
+    }
+
+    norm.test <- shapiro.test(lib.sizes.sampled)
+    lib.norm <- norm.test$p.value > 0.2
 
     if (lib.norm) {
         fit <- fitdistrplus::fitdist(lib.sizes, "norm")
diff --git a/man/splatEstLib.Rd b/man/splatEstLib.Rd
index c7eda13..30df262 100644
--- a/man/splatEstLib.Rd
+++ b/man/splatEstLib.Rd
@@ -15,7 +15,7 @@ splatEstLib(counts, params)
 splatParams object with estimated values.
 }
 \description{
-The Shapiro-Wilk test is used to determine if the library sizes are
+The Shapiro-Wilks test is used to determine if the library sizes are
 normally distributed. If so a normal distribution is fitted to the library
 sizes, if not (most cases) a log-normal distribution is fitted and the
 estimated parameters are added to the params object. See
-- 
GitLab