diff --git a/DESCRIPTION b/DESCRIPTION index 2a567bc..3a2e896 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,6 @@ Imports: dplyr, data.table, tidyr, - reshape2, rhdf5, parallel, lazyeval, diff --git a/NAMESPACE b/NAMESPACE index 01d495b..03f118b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(design_matrix) export(enclosed_brush) export(excluded_ids) export(extract_model) +export(gene_from_gene) export(get_bootstrap_summary) export(get_bootstraps) export(get_quantile) @@ -40,6 +41,7 @@ export(log_transform) export(melt_bootstrap_sleuth) export(models) export(norm_factors) +export(head) export(plot_bootstrap) export(plot_fld) export(plot_group_density) @@ -86,3 +88,4 @@ importFrom(lazyeval,interp) importFrom(lazyeval,lazy) importFrom(rhdf5,h5write) importFrom(rhdf5,h5write.default) +importFrom(utils, head) diff --git a/R/bootstrap.R b/R/bootstrap.R index 6e1d11f..67edaac 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -485,6 +485,7 @@ process_bootstrap <- function(i, samp_name, kal_path, mappings) # this step undoes the tidying to get back a matrix format # target_ids here are now the aggregation column ids + scaled_bs <- data.table::as.data.table(scaled_bs) bs_mat <- data.table::dcast(scaled_bs, sample ~ target_id, value.var = "scaled_reads_per_base") # this now has the same format as the transcript matrix diff --git a/R/fix_head.R b/R/fix_head.R new file mode 100644 index 0000000..65fdc0d --- /dev/null +++ b/R/fix_head.R @@ -0,0 +1,6 @@ + +## ---- head +#' Fix unexported head error. +#' +#' @export head +head <- utils::head diff --git a/R/matrix.R b/R/matrix.R index 4ffecf3..ee17e41 100644 --- a/R/matrix.R +++ b/R/matrix.R @@ -24,12 +24,14 @@ #' @param which_df character vector of length one. Which type of data to use #' ("obs_norm" or "obs_raw") #' @param which_units character vector of length one. Which units to use ("tpm" -#' or "est_counts") -#' @return a matrix which contains a matrix of target_ids and transcript expression in \code{which_units} +#' or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses)) +#' @return a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}. +#' Note this currently does not support returning raw values for gene-level counts or TPMs. #' @examples #' sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm') #' head(sleuth_matrix) # look at first 5 transcripts, sorted by name #' @export +#' importFrom utils head sleuth_to_matrix <- function(obj, which_df, which_units) { if ( !(which_df %in% c("obs_norm", "obs_raw")) ) { stop("Invalid object") diff --git a/R/plots.R b/R/plots.R index 5077857..5d6d0e1 100644 --- a/R/plots.R +++ b/R/plots.R @@ -1035,13 +1035,13 @@ plot_transcript_heatmap <- function(obj, if (units == 'tpm') { tabd_df <- dplyr::select(tabd_df, target_id, sample, tpm) - tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'tpm') + tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'tpm') } else if (units == 'est_counts') { tabd_df <- dplyr::select(tabd_df, target_id, sample, est_counts) - tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, value.var = 'est_counts') + tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'est_counts') } else if (units == 'scaled_reads_per_base') { tabd_df <- dplyr::select(tabd_df, target_id, sample, scaled_reads_per_base) - tabd_df <- reshape2::dcast(tabd_df, target_id ~sample, + tabd_df <- data.table::dcast(tabd_df, target_id ~sample, value.var = 'scaled_reads_per_base') } else { stop("Didn't recognize the following unit: ", units) diff --git a/R/sleuth.R b/R/sleuth.R index fea2305..d387818 100644 --- a/R/sleuth.R +++ b/R/sleuth.R @@ -1007,7 +1007,8 @@ kallisto_table <- function(obj, # @return a matrix with the appropriate names obs_to_matrix <- function(obj, value_name) { - obs_counts <- reshape2::dcast(obj$obs_norm, target_id ~ sample, + obj$obs_norm <- data.table::as.data.table(obj$obs_norm) + obs_counts <- data.table::dcast(obj$obs_norm, target_id ~ sample, value.var = value_name) obs_counts <- as.data.frame(obs_counts) @@ -1091,6 +1092,7 @@ summary.sleuth <- function(obj, covariates = TRUE) { #' head(sleuth_genes) # show info for first 5 genes #' sleuth_genes[1:5, 6] # show transcripts for first 5 genes #' @export +#' @importFrom utils head sleuth_gene_table <- function(obj, test, test_type = 'lrt', which_model = 'full', which_group = 'ens_gene') { if (is.null(obj$target_mapping)) { @@ -1150,6 +1152,52 @@ transcripts_from_gene <- function(obj, test, test_type, table$target_id[table[, 2] == gene_name] } +#' Get the gene ID using other gene identifiers +#' +#' Get the \code{target_id} of a gene using other gene identifiers. +#' The identifiers found under the \code{obj$gene_column} are often +#' difficult to remember (e.g. ensembl gene ID, ENSG00000111640). +#' This function allows a user to find that difficult-to-remember +#' identifier using more-easily-remembered identifiers, such as +#' gene symbol (e.g. "GAPDH"). +#' +#' @param obj a \code{sleuth} object +#' @param gene_colname the name of the column containing 'gene_name'. +#' This parameter refers to the name of the column that the gene you are searching for appears in. +#' Check the column names using \code{colnames(obj$target_mapping)}. +#' @param gene_name a string containing the name of the gene you are interested in. +#' @return a character vector containing the \code{target_id} of the gene, found under +#' \code{obj$gene_column} within \code{obj$target_mapping}. +#' If the column name provided is the same as \code{obj$gene_column}, and the +#' gene_name used is found, that gene_name will be returned. +#' @examples +#' \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")} +#' @export +gene_from_gene <- function(obj, gene_colname, gene_name) { + + if (!obj$gene_mode) { + stop("this sleuth object is in transcript mode. Please use 'transcripts_from_gene' instead.") + } + + table <- as.data.frame(obj$target_mapping) + if (gene_colname == obj$gene_column) { + if (!(gene_name %in% table[, eval(parse(text = obj$gene_column))])) { + stop("Couldn't find gene ", gene_name) + } else { + return(gene_name) + } + } + + table <- unique(dplyr::select_(table, obj$gene_column, gene_colname)) + if (!(gene_name %in% table[, 2])) { + stop("Couldn't find gene ", gene_name) + } + hits <- unique(table[table[,2] == gene_name, 1]) + if (length(hits) > 1) { + warning("there was more than one gene ID that matched this identifier; taking the first one") + } + hits[1] + } #' Change sleuth transform counts function #' diff --git a/man/gene_from_gene.Rd b/man/gene_from_gene.Rd new file mode 100644 index 0000000..0f7077e --- /dev/null +++ b/man/gene_from_gene.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sleuth.R +\name{gene_from_gene} +\alias{gene_from_gene} +\title{Get the gene ID using other gene identifiers} +\usage{ +gene_from_gene(obj, gene_colname, gene_name) +} +\arguments{ +\item{obj}{a \code{sleuth} object} + +\item{gene_colname}{the name of the column containing 'gene_name'. +This parameter refers to the name of the column that the gene you are searching for appears in. +Check the column names using \code{colnames(obj$target_mapping)}.} + +\item{gene_name}{a string containing the name of the gene you are interested in.} +} +\value{ +a character vector containing the \code{target_id} of the gene, found under + \code{obj$gene_column} within \code{obj$target_mapping}. + If the column name provided is the same as \code{obj$gene_column}, and the + gene_name used is found, that gene_name will be returned. +} +\description{ +Get the \code{target_id} of a gene using other gene identifiers. +The identifiers found under the \code{obj$gene_column} are often +difficult to remember (e.g. ensembl gene ID, ENSG00000111640). +This function allows a user to find that difficult-to-remember +identifier using more-easily-remembered identifiers, such as +gene symbol (e.g. "GAPDH"). +} +\examples{ + \dontrun{gene_from_gene(obj, "gene_symbol", "GAPDH")} +} diff --git a/man/sleuth_to_matrix.Rd b/man/sleuth_to_matrix.Rd index bc4bc30..c5e0f6d 100644 --- a/man/sleuth_to_matrix.Rd +++ b/man/sleuth_to_matrix.Rd @@ -13,16 +13,16 @@ sleuth_to_matrix(obj, which_df, which_units) ("obs_norm" or "obs_raw")} \item{which_units}{character vector of length one. Which units to use ("tpm" -or "est_counts")} +or "est_counts" (for transcript-level analyses) or "scaled_reads_per_base" (for gene-level analyses))} } \value{ -a \code{list} with an attribute 'data', which contains a matrix of target_ids - and transcript expression in \code{which_units} +a matrix which contains a matrix of target_ids and transcript (or gene) expression in \code{which_units}. + Note this currently does not support returning raw values for gene-level counts or TPMs. } \description{ Convert a sleuth object to a matrix with the condition names. } \examples{ sleuth_matrix <- sleuth_to_matrix(sleuth_obj, 'obs_norm', 'tpm') -head(sleuth_matrix$data) # look at first 5 transcripts, sorted by name +head(sleuth_matrix) # look at first 5 transcripts, sorted by name }