Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
62b5917
Add functions to extract column and row data from mutated tibbles, an…
stemangiola Aug 30, 2025
338de95
Refactor mutate functionality in SummarizedExperiment by renaming mut…
stemangiola Aug 31, 2025
7a2a77c
adapt rename
stemangiola Aug 31, 2025
3ee1298
Add dependency analysis for query scope in analyze_query_scope_mutate
stemangiola Aug 31, 2025
65433ab
Implement mixed scope handling in mutate operations
stemangiola Aug 31, 2025
fbc3800
Refactor mutate.SummarizedExperiment to streamline scope handling
stemangiola Aug 31, 2025
92072d7
Enhance mutate functionality with new decomposition and analysis feat…
stemangiola Aug 31, 2025
0fbadb6
Add comprehensive tests for mutate function in SummarizedExperiment
stemangiola Aug 31, 2025
4fa1e02
Enhance mutate.SummarizedExperiment to record latest mutate scope
stemangiola Aug 31, 2025
b3cd45a
version UP
stemangiola Aug 31, 2025
9bd3589
version UP
stemangiola Aug 31, 2025
fd8e9db
Add benchmark vignette for `mutate()` performance comparison
stemangiola Aug 31, 2025
2b97883
Enhance filter functionality with scope analysis and decomposition
stemangiola Aug 31, 2025
3c4d1ad
Add analyze_query_scope_filter function and integrate into filter.Sum…
stemangiola Aug 31, 2025
7d35644
Add rename functionality
stemangiola Aug 31, 2025
b7e5f7f
Remove filter and join methods from dplyr_methods.R
stemangiola Aug 31, 2025
eb90a12
Refactor join methods for SummarizedExperiment and add new join utili…
stemangiola Aug 31, 2025
29019fd
Refactor join functions
stemangiola Aug 31, 2025
79e6445
Add select functionality with scope analysis for SummarizedExperiment
stemangiola Sep 1, 2025
5a9de75
Rename scoping vignette
stemangiola Sep 1, 2025
9ae788d
Remove deprecated select.SummarizedExperiment function and associated…
stemangiola Sep 1, 2025
94cdc85
Add left_join documentation and examples for SummarizedExperiment
stemangiola Sep 1, 2025
9487269
fix CHECKS
stemangiola Sep 1, 2025
6ccebeb
Add caching mechanism for benchmark results in the scoping vignette
stemangiola Sep 1, 2025
ee7aacd
Merge branch 'master' into query-to-slot-routines
stemangiola Sep 1, 2025
5d4de8b
Moving benchmark vignette to dev
stemangiola Oct 2, 2025
b0b7bab
Update installation instructions and clarify usage of slice function …
stemangiola Oct 2, 2025
caa3fcb
Solve error for count() in vignette
stemangiola Oct 2, 2025
6b36fb7
Update website links in introduction vignette to reflect new tidyomic…
stemangiola Oct 2, 2025
728ece1
Clarify usage of rename function in introduction vignette by specifyi…
stemangiola Oct 2, 2025
33d2504
Add 'airway' to DESCRIPTION suggests for improved functionality
stemangiola Oct 2, 2025
a10d0f0
Add NEWS
stemangiola Oct 2, 2025
79dcfd9
Remove deprecated tests for old vocabulary from the test suite to str…
stemangiola Oct 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: tidySummarizedExperiment
Title: Brings SummarizedExperiment to the Tidyverse
Version: 1.19.6
Version: 1.19.7
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
role = c("aut", "cre")) )
Author: Stefano Mangiola [aut, cre] <[email protected]>
Expand Down Expand Up @@ -40,7 +40,8 @@ Imports:
cli,
fansi,
stats,
pkgconfig
pkgconfig,
plyxp
Suggests:
BiocStyle,
testthat,
Expand All @@ -49,7 +50,8 @@ Suggests:
rmarkdown,
plotly,
rbibutils,
prettydoc
prettydoc,
airway
VignetteBuilder:
knitr
RdMacros:
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ S3method(tidy,SummarizedExperiment)
S3method(unite,SummarizedExperiment)
S3method(unnest,tidySummarizedExperiment_nested)
export("%>%")
export(mutate_features)
export(mutate_samples)
export(tidy)
export(unnest_summarized_experiment)
importFrom(S4Vectors,"metadata<-")
Expand Down Expand Up @@ -115,6 +113,7 @@ importFrom(rlang,is_spliced)
importFrom(rlang,quo_is_null)
importFrom(rlang,quo_name)
importFrom(rlang,quo_squash)
importFrom(stats,setNames)
importFrom(stringr,regex)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace)
Expand All @@ -131,6 +130,8 @@ importFrom(tidyr,separate)
importFrom(tidyr,spread)
importFrom(tidyr,unite)
importFrom(tidyr,unnest)
importFrom(tidyselect,all_of)
importFrom(tidyselect,any_of)
importFrom(tidyselect,eval_select)
importFrom(tidyselect,one_of)
importFrom(ttservice,append_samples)
Expand Down
245 changes: 245 additions & 0 deletions R/decompose_tidy_operations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
#' Check if a dplyr query is composed (has multiple expressions)
#'
#' This function determines whether a dplyr operation contains multiple expressions
#' that could benefit from decomposition and individual analysis.
#'
#' @param operation Character string specifying the dplyr operation (e.g., "mutate", "filter")
#' @param ... The expressions passed to the dplyr function
#' @return Logical indicating whether the query is composed (TRUE) or simple (FALSE)
#'
#' @examples
#' \dontrun{
#' library(airway)
#' data(airway)
#'
#' # Simple query - not composed
#' is_composed("mutate", new_col = dex) # FALSE
#'
#' # Composed query - multiple expressions
#' is_composed("mutate", col1 = dex, col2 = cell) # TRUE
#'
#' # Filter with multiple conditions
#' is_composed("filter", dex == "trt", cell == "N61311") # TRUE
#' }
#'
#' @keywords internal
#' @noRd
is_composed <- function(operation, ...) {

# Capture the expressions
dots <- rlang::enquos(...)

# If no expressions, not composed
if (length(dots) == 0) {
return(FALSE)
}

# If only one expression, not composed
if (length(dots) == 1) {
return(FALSE)
}

# Multiple expressions - composed
return(TRUE)
}

#' Function factory for substitute-based query decomposition
#'
#' Creates a decomposition function that uses substitute() to transform:
#' fx(a = x, b = y, c = z, .preserve = FALSE) into:
#' fx(a = x, .preserve = FALSE) |> fx(b = y, .preserve = FALSE) |> fx(c = z, .preserve = FALSE)
#'
#' Handles additional arguments (like .preserve, .by, etc.) by passing them to each decomposed step.
#'
#' Can be used in two ways:
#' 1. Get decomposition info: decompose_tidy_operation("mutate", x = a, y = b)
#' 2. Create executable function: decompose_tidy_operation("mutate", x = a, y = b)(se)
#'
#' @param fx_name Character name of the function (e.g., "mutate", "filter", "select")
#' @param ... Main expressions to decompose AND additional named arguments
#' @return Function that can be applied to a SummarizedExperiment, or if no SE provided, decomposition info
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(airway)
#' data(airway)
#'
#' # Basic mutate decomposition
#' mutate_fn <- decompose_tidy_operation("mutate", new_dex = dex, new_cell = cell)
#' attr(mutate_fn, "pipeline_text")
#' # "mutate(new_dex = dex) |> mutate(new_cell = cell)"
Comment on lines +62 to +71
Copy link
Preview

Copilot AI Sep 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The examples use \dontrun{} but these appear to be valid executable examples that would help users understand the functionality. Consider using \donttest{} instead if the concern is execution time.

Copilot uses AI. Check for mistakes.

#'
#' # Execute the decomposed pipeline
#' result_se <- mutate_fn(airway)
#'
#' # One-liner execution (your brilliant syntax!)
#' se_result <- decompose_tidy_operation("mutate", dex_1 = dex, dex_2 = dex)(airway)
#'
#' # With additional arguments
#' filter_preserve <- decompose_tidy_operation("filter",
#' dex == "trt",
#' cell == "N61311",
#' .preserve = FALSE)
#' attr(filter_preserve, "pipeline_text")
#' # "filter(dex == \"trt\", .preserve = FALSE) |> filter(cell == \"N61311\", .preserve = FALSE)"
#'
#' # Multiple additional arguments
#' mutate_with_args <- decompose_tidy_operation("mutate",
#' log_counts = log2(counts + 1),
#' is_treated = dex == "trt",
#' .keep = "used")
#'
#' # Universal pattern works with any dplyr operation
#' select_fn <- decompose_tidy_operation("select", dex, cell, counts)
#' group_by_fn <- decompose_tidy_operation("group_by", dex, cell)
#' summarise_fn <- decompose_tidy_operation("summarise",
#' mean_counts = mean(counts),
#' n_samples = n(),
#' .groups = "drop")
#'
#' # Access metadata
#' attr(mutate_fn, "decomposed") # TRUE
#' attr(mutate_fn, "function_name") # "mutate"
#' attr(mutate_fn, "individual_calls") # Character vector of each step
#' attr(mutate_fn, "additional_args") # List of .* arguments
#' }
#'
#' @keywords internal
#' @noRd
decompose_tidy_operation <- function(fx_name, ...) {

# Capture the expressions
dots <- rlang::enquos(...)

# Separate main expressions from additional arguments (like .preserve, .by, etc.)
dot_names <- names(dots)
additional_args <- list()
main_expressions <- list()

for (i in seq_along(dots)) {
name <- dot_names[i]
if (!is.null(name) && startsWith(name, ".")) {
# Additional argument (starts with .)
additional_args[[name]] <- dots[[i]]
} else {
# Main expression to decompose
main_expressions <- append(main_expressions, dots[i])
}
}

# If no names, all are main expressions
if (is.null(dot_names)) {
main_expressions <- dots
}

# Convert back to quosures for consistency
main_expressions <- rlang::as_quosures(main_expressions, env = parent.frame())

if (length(main_expressions) <= 1) {
# Single expression - no decomposition needed, but still return executable function
single_function <- function(se) {
if (fx_name == "mutate") {
return(se %>% mutate(!!!main_expressions, !!!additional_args))
} else if (fx_name == "filter") {
return(se %>% filter(!!!main_expressions, !!!additional_args))
} else if (fx_name == "select") {
return(se %>% select(!!!main_expressions, !!!additional_args))
} else {
# Generic execution
op_call <- rlang::call2(fx_name, se, !!!main_expressions, !!!additional_args)
return(rlang::eval_tidy(op_call, env = parent.frame()))
}
}

attr(single_function, "decomposed") <- FALSE
attr(single_function, "function_name") <- fx_name
attr(single_function, "expressions") <- main_expressions
attr(single_function, "additional_args") <- additional_args

return(single_function)
}

# Create individual function calls as text
individual_calls <- vector("character", length(main_expressions))
individual_expressions <- vector("list", length(main_expressions))

# Create additional args text for inclusion in each call
additional_args_text <- ""
if (length(additional_args) > 0) {
additional_parts <- character(length(additional_args))
for (j in seq_along(additional_args)) {
arg_name <- names(additional_args)[j]
arg_value <- rlang::quo_text(additional_args[[j]])
additional_parts[j] <- paste0(arg_name, " = ", arg_value)
}
additional_args_text <- paste0(", ", paste(additional_parts, collapse = ", "))
}

for (i in seq_along(main_expressions)) {
# Check if expression is named
expr_name <- names(main_expressions)[i]
is_named <- !is.null(expr_name) && expr_name != ""

# For unnamed expressions, use a placeholder name for indexing but keep original for execution
display_name <- if (!is_named) paste0("expr_", i) else expr_name

single_expr <- main_expressions[i]
# Only set names for named expressions
if (is_named) {
names(single_expr) <- expr_name
}

# Create the function call as text
expr_text <- rlang::quo_text(single_expr[[1]])
if (is_named) {
# Named expression
individual_calls[i] <- paste0(fx_name, "(", expr_name, " = ", expr_text, additional_args_text, ")")
} else {
# Unnamed expression
individual_calls[i] <- paste0(fx_name, "(", expr_text, additional_args_text, ")")
}

individual_expressions[[i]] <- single_expr
names(individual_calls)[i] <- display_name
names(individual_expressions)[i] <- display_name
}

# Create pipeline representation as text
pipeline_text <- paste(individual_calls, collapse = " |> ")

# Create the executable function
pipeline_function <- function(se) {
# Execute each step sequentially
result_se <- se

for (i in seq_along(individual_expressions)) {
step_expr <- individual_expressions[[i]]

# Execute this step using the appropriate dplyr function, including additional args
if (fx_name == "mutate") {
result_se <- result_se %>% mutate(!!!step_expr, !!!additional_args)
} else if (fx_name == "filter") {
result_se <- result_se %>% filter(!!!step_expr, !!!additional_args)
} else if (fx_name == "select") {
result_se <- result_se %>% select(!!!step_expr, !!!additional_args)
} else {
# Generic execution for other operations
op_call <- rlang::call2(fx_name, result_se, !!!step_expr, !!!additional_args)
result_se <- rlang::eval_tidy(op_call, env = parent.frame())
}
}

return(result_se)
}

# Add metadata to the function
attr(pipeline_function, "decomposed") <- TRUE
attr(pipeline_function, "function_name") <- fx_name
attr(pipeline_function, "pipeline_text") <- pipeline_text
attr(pipeline_function, "individual_calls") <- individual_calls
attr(pipeline_function, "individual_expressions") <- individual_expressions
attr(pipeline_function, "additional_args") <- additional_args

return(pipeline_function)
}
Loading
Loading