pachterlab · warrenmcg · Jun 27, 2018 · Jun 27, 2018 · Jun 28, 2018 · Jun 21, 2018
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: sleuth
 Title: Tools for investigating RNA-Seq
-Version: 0.30.0
+Version: 0.30.0-1
 Authors@R: c(
   person("Harold", "Pimentel", , "[email protected]", role = c("aut", "cre")),
   person("Warren", "McGee", , "[email protected]", role = "aut"))
@@ -21,16 +21,18 @@ Imports:
     tidyr,
     reshape2,
     rhdf5,
+    rlang,
     parallel,
     lazyeval,
     matrixStats,
     pheatmap,
     shiny,
-    aggregation
+    aggregation,
+    limma
 Suggests:
     MASS,
     lintr,
     testthat,
     knitr
 VignetteBuilder: knitr
-RoxygenNote: 6.0.1
+RoxygenNote: 6.1.0
diff --git a/NAMESPACE b/NAMESPACE
@@ -24,18 +24,21 @@ S3method(transform_status,sleuth_model)
 export("transform_fun_counts<-")
 export("transform_fun_tpm<-")
 export(basic_filter)
+export(basic_shrink_fun)
 export(bias_table)
 export(counts_to_fpkm)
 export(counts_to_tpm)
 export(design_matrix)
 export(enclosed_brush)
 export(excluded_ids)
 export(extract_model)
+export(gene_from_gene)
 export(get_bootstrap_summary)
 export(get_bootstraps)
 export(get_quantile)
 export(is_kallisto_subset)
 export(kallisto_table)
+export(limma_shrink_fun)
 export(log_transform)
 export(melt_bootstrap_sleuth)
 export(models)
@@ -84,5 +87,7 @@ importFrom(data.table,fread)
 importFrom(dplyr,"%>%")
 importFrom(lazyeval,interp)
 importFrom(lazyeval,lazy)
+importFrom(limma,squeezeVar)
 importFrom(rhdf5,h5write)
 importFrom(rhdf5,h5write.default)
+importFrom(rlang,eval_tidy)
diff --git a/R/bootstrap.R b/R/bootstrap.R
@@ -380,7 +380,7 @@ dcast_bootstrap.kallisto <- function(obj, units, nsamples = NULL) {
 
 # Function to process bootstraps for parallelization
 process_bootstrap <- function(i, samp_name, kal_path,
-                              num_transcripts, est_count_sf,
+                              num_transcripts, est_count_sf, est_tpm_sf,
                               read_bootstrap_tpm, gene_mode,
                               extra_bootstrap_summary,
                               target_id, mappings, which_ids,
@@ -402,12 +402,11 @@ process_bootstrap <- function(i, samp_name, kal_path,
   eff_len <- rhdf5::h5read(kal_path$path, "aux/eff_lengths")
   bs_mat <- read_bootstrap_mat(fname = kal_path$path,
                                num_bootstraps = num_bootstrap,
-                               num_transcripts = num_transcripts,
-                               est_count_sf = est_count_sf)
+                               num_transcripts = num_transcripts)
 
   if (read_bootstrap_tpm) {
-    bs_tpm <- aperm(apply(bs_mat, 1, counts_to_tpm,
-                                eff_len))
+    bs_tpm <- t(apply(bs_mat, 1, counts_to_tpm,
+                      eff_len))
     colnames(bs_tpm) <- colnames(bs_mat)
 
     # gene level code is analogous here to below code
@@ -438,9 +437,9 @@ process_bootstrap <- function(i, samp_name, kal_path,
       bs_tpm <- as.matrix(bs_tpm[, -1])
       rm(tidy_tpm) # these tables are very large
     }
-    bs_tpm <- transform_fun_tpm(bs_tpm[, which_ids])
-    bs_quant_tpm <- aperm(apply(bs_tpm, 2,
-                                quantile))
+    bs_tpm <- transform_fun_tpm(bs_tpm[, which_ids], sf = est_tpm_sf)
+    bs_quant_tpm <- t(apply(bs_tpm, 2,
+                            quantile))
     colnames(bs_quant_tpm) <- c("min", "lower", "mid",
                                 "upper", "max")
     bs_quants$tpm <- bs_quant_tpm
@@ -493,10 +492,10 @@ process_bootstrap <- function(i, samp_name, kal_path,
     rm(tidy_bs, scaled_bs)
   }
 
-  bs_mat <- transform_fun_counts(bs_mat[, which_ids])
+  bs_mat <- transform_fun_counts(bs_mat[, which_ids], sf = est_count_sf)
   if (extra_bootstrap_summary) {
-    bs_quant_est_counts <- aperm(apply(bs_mat, 2,
-                                       quantile))
+    bs_quant_est_counts <- t(apply(bs_mat, 2,
+                                   quantile))
     colnames(bs_quant_est_counts) <- c("min", "lower",
                                        "mid", "upper", "max")
     bs_quants$est_counts <- bs_quant_est_counts

diff --git a/R/likelihood.R b/R/likelihood.R
@@ -4,34 +4,39 @@ compute_likelihood <- function(obj, which_model) {
   stopifnot(is(obj, "sleuth"))
   model_exists(obj, which_model)
 
-  # we basically do lapply on all of the models
-  #
   # the fitted values are here:
-  #   obj$fits[[which_model]]$models$ols_fit$fitted.values
+  #   obj$fits[[which_model]]$models$fitted.values
   #
   # the observations can be recovered by:
-  #   obj$fits$full$models[[1]]$ols_fit$residuals + fitted values
-
-  # TODO: move this elsewhere
-  obj$fits[[which_model]]$summary <- obj$fits[[which_model]]$summary[
-    match(names(obj$fits[[which_model]]$models),
-      obj$fits[[which_model]]$summary$target_id), ]
-
-  all_likelihood <- sapply(seq_along(obj$fits[[which_model]]$models),
-    function( i ) {
-      cur_model <- obj$fits[[which_model]]$models[[ i ]]
-      cur_mu <- cur_model$ols_fit$fitted.values
-      obs <- cur_model$ols_fit$residuals + cur_mu
-
-      cur_summary <- obj$fits[[which_model]]$summary
-
-      cur_var <- cur_summary[i, "smooth_sigma_sq_pmax"] +
-        cur_summary[i, "sigma_q_sq"]
-
-      sum(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE))
+  #   obj$fits[[which_model]]$models$residuals + fitted values
+
+  models <- obj$fits[[which_model]]$models
+  if (names(models)[1] == "coefficients") {
+    cur_model <- models
+    cur_mu <- cur_model$fitted.values
+    obs <- cur_model$residuals + cur_mu
+    cur_summary <- obj$fits[[which_model]]$summary
+    cur_var <- cur_summary$smooth_sigma_sq_pmax + cur_summary$sigma_q_sq
+    cur_var <- matrix(rep(cur_var, nrow(obs)), nrow = nrow(obs), byrow = TRUE)
+
+    all_likelihood <- colSums(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE))
+  } else {
+    # This is retained for backward compatibility with older versions of sleuth
+    all_likelihood <- sapply(seq_along(models),
+      function(i) {
+        cur_model <- models[[i]]
+        cur_mu <- cur_model$ols_fit$fitted.values
+        obs <- cur_model$ols_fit$residuals + cur_mu
+
+        cur_summary <- obj$fits[[which_model]]$summary
+
+        cur_var <- cur_summary[i, "smooth_sigma_sq_pmax"] +
+          cur_summary[i, "sigma_q_sq"]
+
+        sum(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE))
     })
-
-  names(all_likelihood) <- names(obj$fits[[which_model]]$models)
+    names(all_likelihood) <- names(models)
+  }
 
   obj$fits[[which_model]]$likelihood <- all_likelihood
 
@@ -91,16 +96,20 @@ sleuth_lrt <- function(obj, null_model, alt_model) {
 
   test_statistic <- 2 * (a_ll - n_ll)
 
-  degrees_free <- obj$fits[[null_model]]$models[[1]]$ols_fit$df.residual -
-    obj$fits[[alt_model]]$models[[1]]$ols_fit$df.residual
+  if (names(obj$fits[[alt_model]]$models)[1] == "coefficients") {
+    degrees_free <- obj$fits[[null_model]]$models$df.residual -
+      obj$fits[[alt_model]]$models$df.residual
+  } else {
+    degrees_free <- obj$fits[[null_model]]$models[[1]]$ols_fit$df.residual -
+      obj$fits[[alt_model]]$models[[1]]$ols_fit$df.residual
+  }
 
   # P(chisq > test_statistic)
   p_value <- pchisq(test_statistic, degrees_free, lower.tail = FALSE)
   result <- adf(target_id = names(obj$fits[[alt_model]]$likelihood),
     test_stat = test_statistic, pval = p_value)
   result <- dplyr::mutate(result, qval = p.adjust(pval, method = "BH"))
   model_info <- data.table::data.table(obj$fits[[null_model]]$summary)
-  model_info <- dplyr::select(model_info, -c(iqr))
   result <- dplyr::left_join(
     data.table::data.table(result),
     model_info,

diff --git a/R/matrix.R b/R/matrix.R
@@ -24,8 +24,9 @@
 #' @param which_df character vector of length one. Which type of data to use
 #' ("obs_norm" or "obs_raw")
 #' @param which_units character vector of length one. Which units to use ("tpm"
-#' or "est_counts")
-#' @return a matrix which contains a matrix of target_ids and transcript expression in \code{which_units}
+#' or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))
+#' @return a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}.
+#'   Note this currently does not support returning raw values for gene-level counts or TPMs.
 #' @examples
 #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm')
 #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name