Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 1.2.0.8
Version: 1.2.0.9
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531")),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ S3method(as.double,parameters_smoothness)
S3method(as.numeric,parameters_kurtosis)
S3method(as.numeric,parameters_skewness)
S3method(as.numeric,parameters_smoothness)
S3method(as.prop.table,datawizard_crosstab)
S3method(as.prop.table,datawizard_crosstabs)
S3method(as.table,datawizard_crosstab)
S3method(as.table,datawizard_crosstabs)
S3method(as.table,datawizard_table)
Expand Down Expand Up @@ -230,6 +232,7 @@ S3method(winsorize,factor)
S3method(winsorize,logical)
S3method(winsorize,numeric)
export(adjust)
export(as.prop.table)
export(assign_labels)
export(categorize)
export(center)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ BREAKING CHANGES

CHANGES

* `data_tabulate()` now saves the table of proportions for crosstables as
attribute, accessible via the new `as.prop.table()` method (#656).

* Due to changes in the package `insight`, `data_tabulate()` no longer prints
decimals when all values in a column are integers (#641).

Expand Down
91 changes: 91 additions & 0 deletions R/data_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
#' frequencies of the variable. This is useful for further statistical analysis,
#' e.g. for using `chisq.test()` on the frequency table. See 'Examples'.
#'
#' Finally, the `as.prop.table()` method returns the proportions of the
#' crosstable as a table object, if `by` was supplied to `data_tabulate()`.
#'
#' @section Crosstables:
#' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>`
#' (missing) values by default. The first column indicates values of `x`, the
Expand Down Expand Up @@ -675,6 +678,94 @@ as.table.datawizard_crosstabs <- function(
out
}

#' @rdname data_tabulate
#' @export
as.prop.table <- function(x, ...) {
UseMethod("as.prop.table")
}

#' @rdname data_tabulate
#' @export
as.prop.table.datawizard_crosstab <- function(
x,
remove_na = TRUE,
simplify = FALSE,
verbose = TRUE,
...
) {
# sanity check - the `.data.frame` method returns a list, but not the
# default method
if (!is.data.frame(x)) {
x <- x[[1]]
}
prop_table <- attributes(x)$prop_table

if (is.null(prop_table)) {
insight::format_warning("No proportions available.")
return(NULL)
}

if (remove_na) {
if (
verbose &&
("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table))
) {
insight::format_alert("Removing NA values from frequency table.")
}
if (!is.null(prop_table[["NA"]])) {
prop_table[["NA"]] <- NULL
}
if ("NA" %in% rownames(prop_table)) {
prop_table <- prop_table[rownames(prop_table) != "NA", ]
}
}
# coerce to table
result <- as.table(as.matrix(prop_table))
# if we don't want to simplify the table, we wrap it into a list
if (!simplify) {
result <- list(result)
}

result
}

#' @export
as.prop.table.datawizard_crosstabs <- function(
x,
remove_na = TRUE,
simplify = FALSE,
verbose = TRUE,
...
) {
# only show message once we set `verbose = FALSE` in the lapply()
if (remove_na && verbose) {
prop_table <- attributes(x[[1]])$prop_table
if ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) {
insight::format_alert("Removing NA values from frequency table.")
}
}

out <- insight::compact_list(lapply(
x,
as.prop.table.datawizard_crosstab,
remove_na = remove_na,
simplify = TRUE,
verbose = FALSE,
...
))

# if no proportions found, return NULL
if (!length(out)) {
return(NULL)
}

# if only one table is returned, "unlist"
if (length(out) == 1 && simplify) {
out <- out[[1]]
}
out
}


.is_grouped_df_xtab <- function(x) {
if (!is.data.frame(x)) {
Expand Down
147 changes: 99 additions & 48 deletions R/data_xtabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,100 @@
attr(out, "proportions") <- proportions
attr(out, "varname") <- obj_name
attr(out, "grouped_df") <- !is.null(group_variable)
attr(out, "prop_table") <- .prop_table(out)

class(out) <- c("datawizard_crosstab", "data.frame")

out
}

# Helper function to calculate a table of proportions from a frequency table
.prop_table <- function(x) {
# Extract the "proportions" attribute, which determines the type of calculation (row, column, or full)
props <- attributes(x)$proportions
out <- NULL

# Proceed only if the "proportions" attribute is set
if (!is.null(props)) {
# Identify numeric columns, as proportions are only calculated for these
numeric_columns <- vapply(x, is.numeric, logical(1))
# Get the total count from the attributes, used for "full" proportions
total_n <- attributes(x)$total_n

# Use a switch to perform the calculation based on the "props" value
out <- switch(
props,
# Calculate row-wise proportions
row = lapply(seq_len(nrow(x)), function(i) {
# Sum of the current row's numeric values
row_sum <- sum(x[i, numeric_columns], na.rm = TRUE)
# Avoid division by zero; if row sum is 0, return a row of zeros
if (row_sum == 0) {
tmp <- as.data.frame(as.list(rep(0, sum(numeric_columns))))
# for later rbind, we need identical column names
colnames(tmp) <- colnames(x)[numeric_columns]
tmp
} else {
x[i, numeric_columns] / row_sum
}
}),
# Calculate column-wise proportions
column = lapply(seq_len(ncol(x))[numeric_columns], function(i) {
# Sum of the current column's values
col_sum <- sum(x[, i], na.rm = TRUE)
# Avoid division by zero; if column sum is 0, return a vector of zeros
if (col_sum == 0) {
rep(0, nrow(x))
} else {
x[, i] / col_sum
}
}),
# Calculate proportions relative to the total count of the entire table
full = lapply(seq_len(ncol(x))[numeric_columns], function(i) {
# Avoid division by zero; if total is 0, return a vector of zeros
if (total_n == 0) {
rep(0, nrow(x))
} else {
x[, i] / total_n
}
})
)
}

# If a proportion table was calculated, format it into a data frame
if (!is.null(out)) {
# The output of the switch is a list. We need to bind it into a data frame.
# For row proportions, we bind rows. For column/full, we bind columns.
out <- switch(
props,
row = as.data.frame(do.call(rbind, out)),
as.data.frame(do.call(cbind, out))
)
# Set the column names of the new proportion table
colnames(out) <- colnames(x)[numeric_columns]

# Check if the dimensions are consistent before setting row names
if (nrow(out) == nrow(x)) {
# If the first column of the original data is not numeric, it's likely a
# label column. Use these labels as row names in the output for better
# readability. This is useful for identifying rows, especially when NAs
# are present.
if (isFALSE(numeric_columns[1])) {
r_names <- x[[1]]
r_names <- as.character(r_names)
# Replace NA in labels with the string "NA", else we cannot set rownames
r_names[is.na(r_names)] <- "NA"
rownames(out) <- r_names
} else {
# Otherwise, just use the original row names
rownames(out) <- rownames(x)
}
}
}

out
}


# methods ---------------------

Expand All @@ -113,7 +201,7 @@ format.datawizard_crosstab <- function(
x <- as.data.frame(x)

# find numeric columns, only for these we need row/column sums
numeric_columns <- vapply(x, is.numeric, logical(1))
numeric_columns <- which(vapply(x, is.numeric, logical(1)))

# compute total N for rows and columns
total_n <- attributes(x)$total_n
Expand All @@ -122,55 +210,18 @@ format.datawizard_crosstab <- function(

# proportions?
props <- attributes(x)$proportions

if (!is.null(props)) {
# we copy x to tmp, because when we create strings with "sprintf()", the
# variable is coerced to character, and in subsequent iterations of the loop,
# mathemathical operations are not possible anymore
tmp <- x
if (identical(props, "row")) {
for (i in seq_len(nrow(x))) {
row_sum <- sum(x[i, numeric_columns], na.rm = TRUE)
if (row_sum == 0) {
row_sum_string <- "(0%)"
} else {
row_sum_string <- sprintf(
"(%.*f%%)",
digits,
100 * x[i, numeric_columns] / row_sum
)
}
tmp[i, numeric_columns] <- paste(
format(x[i, numeric_columns]),
format(row_sum_string, justify = "right")
prop_table <- attributes(x)$prop_table

if (!is.null(props) && !is.null(prop_table)) {
for (i in seq_len(ncol(prop_table))) {
x[, numeric_columns[i]] <- paste(
format(x[, numeric_columns[i]]),
format(
sprintf("(%.*f%%)", digits, 100 * prop_table[, i]),
justify = "right"
)
}
} else if (identical(props, "column")) {
for (i in seq_len(ncol(x))[numeric_columns]) {
col_sum <- sum(x[, i], na.rm = TRUE)
if (col_sum == 0) {
col_sum_string <- "(0%)"
} else {
col_sum_string <- sprintf("(%.*f%%)", digits, 100 * x[, i] / col_sum)
}
tmp[, i] <- paste(
format(x[, i]),
format(col_sum_string, justify = "right")
)
}
} else if (identical(props, "full")) {
for (i in seq_len(ncol(x))[numeric_columns]) {
tmp[, i] <- paste(
format(x[, i]),
format(
sprintf("(%.*f%%)", digits, 100 * x[, i] / total_n),
justify = "right"
)
)
}
)
}
# copy back final result
x <- tmp
}

x[] <- lapply(x, as.character)
Expand Down
10 changes: 5 additions & 5 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@
for (nm in setdiff(names(custom_attr), names(attributes(data.frame())))) {
attr(data, which = nm) <- custom_attr[[nm]]
}
return(data)
data
}


Expand Down Expand Up @@ -241,7 +241,7 @@
}


#' Taken from https://github.com/coolbutuseless/gluestick [licence: MIT]
#' Taken from https://github.com/coolbutuseless/gluestick (licence: MIT)
#' Same functionality as `{glue}`
#'
#' @noRd
Expand Down Expand Up @@ -292,15 +292,15 @@

# Evaluate
if (eval) {
args <- lapply(exprs, function(expr) {
fun_args <- lapply(exprs, function(expr) {
eval(parse(text = expr), envir = src)
})
} else {
args <- unname(mget(exprs, envir = as.environment(src)))
fun_args <- unname(mget(exprs, envir = as.environment(src)))
}

# Create the string(s)
do.call(sprintf, c(list(fmt_sprintf), args))
do.call(sprintf, c(list(fmt_sprintf), fun_args))
}


Expand Down
9 changes: 9 additions & 0 deletions man/data_tabulate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading