Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9acf16e
add back in deleted 'gene_from_gene' method to fix #190
warrenmcg Jun 27, 2018
50b35d0
fix outdated documentation for 'sleuth_to_matrix'
warrenmcg Jun 27, 2018
5b4a330
fix bug found by @lmigueel in #190; switch condition to just check fo…
warrenmcg Jun 28, 2018
4da1c6c
speed up measurement model estimation:
warrenmcg Jun 21, 2018
a289b4d
remove redundant code; this is taken care of in sleuth_fit
warrenmcg Jun 21, 2018
b7cdbc8
change sleuth_lrt, sleuth_wt, and extract_model to handle the new sle…
warrenmcg Jun 21, 2018
0a8034c
update how beta_covars is calculated and retrieved for sleuth_model o…
warrenmcg Jul 24, 2018
4176b7a
move sleuth shrinkage procedure to a separate function:
warrenmcg Jun 27, 2018
a90ac31
add sanity checks for shrink fun
warrenmcg Jun 29, 2018
73c61ff
modify how the shrinkage function is called to prevent warnings:
warrenmcg Jul 17, 2018
a02423d
add a variance shrinkage function to use limma's procedure
warrenmcg Jun 28, 2018
55e061d
add limma to imports list to reflect adding the squeezeVar function
warrenmcg Jun 29, 2018
f211f2f
fix bug that occurs when calculating degrees of freedom between two m…
warrenmcg Jun 29, 2018
d33e620
remove a line referencing a basic_shrink_fun specific column
warrenmcg Jun 29, 2018
243716b
fix memory leak from formulas that have captured external environments
warrenmcg Jul 24, 2018
f12f6ad
replace outdated aperm with t
warrenmcg Aug 13, 2018
17242ba
add sanity check for adding off diagonal beta covariances
warrenmcg Aug 13, 2018
0d7b5cb
change API for transform functions to explicitly normalize within the…
warrenmcg Aug 13, 2018
79e99cf
fix bug where sleuth_model summary table was in a different order tha…
warrenmcg Aug 20, 2018
d733061
fix eval statements for scatter plot on shiny:
warrenmcg Aug 21, 2018
d1c2d86
Merge branch 'issue194' into speedy_fit
warrenmcg Aug 21, 2018
653908d
update sanity check for gene_mode and pval_aggregate in sleuth_results
warrenmcg Nov 5, 2018
e8df07a
Merge branch 'issue202' into speedy_fit
warrenmcg Nov 5, 2018
4dc4355
fix bug where obs_to_matrix was using obs_raw rather than obs_norm
warrenmcg Mar 16, 2019
b3b1c58
small version bump
warrenmcg Mar 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sleuth
Title: Tools for investigating RNA-Seq
Version: 0.30.0
Version: 0.30.0-1
Authors@R: c(
person("Harold", "Pimentel", , "[email protected]", role = c("aut", "cre")),
person("Warren", "McGee", , "[email protected]", role = "aut"))
Expand All @@ -21,16 +21,18 @@ Imports:
tidyr,
reshape2,
rhdf5,
rlang,
parallel,
lazyeval,
matrixStats,
pheatmap,
shiny,
aggregation
aggregation,
limma
Suggests:
MASS,
lintr,
testthat,
knitr
VignetteBuilder: knitr
RoxygenNote: 6.0.1
RoxygenNote: 6.1.0
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,21 @@ S3method(transform_status,sleuth_model)
export("transform_fun_counts<-")
export("transform_fun_tpm<-")
export(basic_filter)
export(basic_shrink_fun)
export(bias_table)
export(counts_to_fpkm)
export(counts_to_tpm)
export(design_matrix)
export(enclosed_brush)
export(excluded_ids)
export(extract_model)
export(gene_from_gene)
export(get_bootstrap_summary)
export(get_bootstraps)
export(get_quantile)
export(is_kallisto_subset)
export(kallisto_table)
export(limma_shrink_fun)
export(log_transform)
export(melt_bootstrap_sleuth)
export(models)
Expand Down Expand Up @@ -84,5 +87,7 @@ importFrom(data.table,fread)
importFrom(dplyr,"%>%")
importFrom(lazyeval,interp)
importFrom(lazyeval,lazy)
importFrom(limma,squeezeVar)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this used anywhere? I don't see it

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just kidding, see it now. hm. I'd like to not depend on yet another package, ideally

importFrom(rhdf5,h5write)
importFrom(rhdf5,h5write.default)
importFrom(rlang,eval_tidy)
21 changes: 10 additions & 11 deletions R/bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ dcast_bootstrap.kallisto <- function(obj, units, nsamples = NULL) {

# Function to process bootstraps for parallelization
process_bootstrap <- function(i, samp_name, kal_path,
num_transcripts, est_count_sf,
num_transcripts, est_count_sf, est_tpm_sf,
read_bootstrap_tpm, gene_mode,
extra_bootstrap_summary,
target_id, mappings, which_ids,
Expand All @@ -402,12 +402,11 @@ process_bootstrap <- function(i, samp_name, kal_path,
eff_len <- rhdf5::h5read(kal_path$path, "aux/eff_lengths")
bs_mat <- read_bootstrap_mat(fname = kal_path$path,
num_bootstraps = num_bootstrap,
num_transcripts = num_transcripts,
est_count_sf = est_count_sf)
num_transcripts = num_transcripts)

if (read_bootstrap_tpm) {
bs_tpm <- aperm(apply(bs_mat, 1, counts_to_tpm,
eff_len))
bs_tpm <- t(apply(bs_mat, 1, counts_to_tpm,
eff_len))
colnames(bs_tpm) <- colnames(bs_mat)

# gene level code is analogous here to below code
Expand Down Expand Up @@ -438,9 +437,9 @@ process_bootstrap <- function(i, samp_name, kal_path,
bs_tpm <- as.matrix(bs_tpm[, -1])
rm(tidy_tpm) # these tables are very large
}
bs_tpm <- transform_fun_tpm(bs_tpm[, which_ids])
bs_quant_tpm <- aperm(apply(bs_tpm, 2,
quantile))
bs_tpm <- transform_fun_tpm(bs_tpm[, which_ids], sf = est_tpm_sf)
bs_quant_tpm <- t(apply(bs_tpm, 2,
quantile))
colnames(bs_quant_tpm) <- c("min", "lower", "mid",
"upper", "max")
bs_quants$tpm <- bs_quant_tpm
Expand Down Expand Up @@ -493,10 +492,10 @@ process_bootstrap <- function(i, samp_name, kal_path,
rm(tidy_bs, scaled_bs)
}

bs_mat <- transform_fun_counts(bs_mat[, which_ids])
bs_mat <- transform_fun_counts(bs_mat[, which_ids], sf = est_count_sf)
if (extra_bootstrap_summary) {
bs_quant_est_counts <- aperm(apply(bs_mat, 2,
quantile))
bs_quant_est_counts <- t(apply(bs_mat, 2,
quantile))
colnames(bs_quant_est_counts) <- c("min", "lower",
"mid", "upper", "max")
bs_quants$est_counts <- bs_quant_est_counts
Expand Down
63 changes: 36 additions & 27 deletions R/likelihood.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,39 @@ compute_likelihood <- function(obj, which_model) {
stopifnot(is(obj, "sleuth"))
model_exists(obj, which_model)

# we basically do lapply on all of the models
#
# the fitted values are here:
# obj$fits[[which_model]]$models$ols_fit$fitted.values
# obj$fits[[which_model]]$models$fitted.values
#
# the observations can be recovered by:
# obj$fits$full$models[[1]]$ols_fit$residuals + fitted values

# TODO: move this elsewhere
obj$fits[[which_model]]$summary <- obj$fits[[which_model]]$summary[
match(names(obj$fits[[which_model]]$models),
obj$fits[[which_model]]$summary$target_id), ]

all_likelihood <- sapply(seq_along(obj$fits[[which_model]]$models),
function( i ) {
cur_model <- obj$fits[[which_model]]$models[[ i ]]
cur_mu <- cur_model$ols_fit$fitted.values
obs <- cur_model$ols_fit$residuals + cur_mu

cur_summary <- obj$fits[[which_model]]$summary

cur_var <- cur_summary[i, "smooth_sigma_sq_pmax"] +
cur_summary[i, "sigma_q_sq"]

sum(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE))
# obj$fits[[which_model]]$models$residuals + fitted values

models <- obj$fits[[which_model]]$models
if (names(models)[1] == "coefficients") {
cur_model <- models
cur_mu <- cur_model$fitted.values
obs <- cur_model$residuals + cur_mu
cur_summary <- obj$fits[[which_model]]$summary
cur_var <- cur_summary$smooth_sigma_sq_pmax + cur_summary$sigma_q_sq
cur_var <- matrix(rep(cur_var, nrow(obs)), nrow = nrow(obs), byrow = TRUE)

all_likelihood <- colSums(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE))
} else {
# This is retained for backward compatibility with older versions of sleuth
all_likelihood <- sapply(seq_along(models),
function(i) {
cur_model <- models[[i]]
cur_mu <- cur_model$ols_fit$fitted.values
obs <- cur_model$ols_fit$residuals + cur_mu

cur_summary <- obj$fits[[which_model]]$summary

cur_var <- cur_summary[i, "smooth_sigma_sq_pmax"] +
cur_summary[i, "sigma_q_sq"]

sum(dnorm(obs, mean = cur_mu, sd = sqrt(cur_var), log = TRUE))
})

names(all_likelihood) <- names(obj$fits[[which_model]]$models)
names(all_likelihood) <- names(models)
}

obj$fits[[which_model]]$likelihood <- all_likelihood

Expand Down Expand Up @@ -91,16 +96,20 @@ sleuth_lrt <- function(obj, null_model, alt_model) {

test_statistic <- 2 * (a_ll - n_ll)

degrees_free <- obj$fits[[null_model]]$models[[1]]$ols_fit$df.residual -
obj$fits[[alt_model]]$models[[1]]$ols_fit$df.residual
if (names(obj$fits[[alt_model]]$models)[1] == "coefficients") {
degrees_free <- obj$fits[[null_model]]$models$df.residual -
obj$fits[[alt_model]]$models$df.residual
} else {
degrees_free <- obj$fits[[null_model]]$models[[1]]$ols_fit$df.residual -
obj$fits[[alt_model]]$models[[1]]$ols_fit$df.residual
}

# P(chisq > test_statistic)
p_value <- pchisq(test_statistic, degrees_free, lower.tail = FALSE)
result <- adf(target_id = names(obj$fits[[alt_model]]$likelihood),
test_stat = test_statistic, pval = p_value)
result <- dplyr::mutate(result, qval = p.adjust(pval, method = "BH"))
model_info <- data.table::data.table(obj$fits[[null_model]]$summary)
model_info <- dplyr::select(model_info, -c(iqr))
result <- dplyr::left_join(
data.table::data.table(result),
model_info,
Expand Down
5 changes: 3 additions & 2 deletions R/matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
#' @param which_df character vector of length one. Which type of data to use
#' ("obs_norm" or "obs_raw")
#' @param which_units character vector of length one. Which units to use ("tpm"
#' or "est_counts")
#' @return a matrix which contains a matrix of target_ids and transcript expression in \code{which_units}
#' or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))
#' @return a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}.
#' Note this currently does not support returning raw values for gene-level counts or TPMs.
#' @examples
#' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm')
#' head(sleuth_matrix) # look at first 5 transcripts, sorted by name
Expand Down
Loading