diff --git a/.dev/compare_branches.R b/.dev/compare_branches.R index bb10e15548..b1c5ae9273 100755 --- a/.dev/compare_branches.R +++ b/.dev/compare_branches.R @@ -17,22 +17,16 @@ # And then compare the results found in the new CSV file in .dev # TODO -# - make sure this works for comparing tags to facilitate release testing # - handle the case when working directory is not the lintr directory # - support an interface for ad hoc download of packages to support running # the script without needing a CRAN mirror more easily/friendly -# TODO -# - make sure this works for comparing tags to facilitate release testing -# - handle the case when working directory is not the lintr directory - suppressPackageStartupMessages({ library(optparse) - library(data.table, include.only = "fwrite") + library(data.table, include.only = c("fwrite", "rbindlist", "data.table", "dcast")) library(dplyr) library(purrr) library(tibble) - library(usethis) library(gert) library(pkgload) }) @@ -48,41 +42,43 @@ if (!file.exists("lintr.Rproj")) { # all current edits must be checked in before running) # named lintr_repo in case this script happens to be run against # a tar of lintr itself... +# TODO(michaelchirico): use git clone instead to just clone a single commit to tempdir() temp_repo <- file.path(tempdir(), "lintr_repo") dir.create(temp_repo) invisible(file.copy(".", temp_repo, recursive = TRUE)) message("Executing from copy of repo at ", temp_repo) old_wd <- setwd(temp_repo) -if (!interactive()) { - .Last <- function() { - setwd(old_wd) - unlink(temp_repo, recursive = TRUE) - } +# ensure no accidental pushes to any remote from this temp repo, so we can abuse it safely +remotes <- system2("git", "remote", stdout = TRUE) +for (remote in remotes) system2("git", c("remote", "remove", remote)) +.Last <- function() { + setwd(old_wd) + unlink(temp_repo, recursive = TRUE) } param_list <- list( optparse::make_option( "--linters", default = if (interactive()) { - readline("Provide a comma-separated list of linters to compare: ") + readline("Provide a comma-separated list of linters to compare (skip to include all linters on each branch): ") }, help = "Run the comparison for these linter(s) (comma-separated)" ), optparse::make_option( "--base_branch", default = if (interactive()) { - readline("Name a branch to use as base (skip to use main): ") + readline("Name a branch/tag to use as base (skip to use main): ") } else { "main" }, - help = "Compare to this branch" + help = "Compare to this branch/tag" ), optparse::make_option( "--branch", default = if (interactive()) { - readline("Name a branch to compare to the base branch (or skip to enter a PR# or to run only on base_branch): ") + readline("Name a branch/tag to compare to base_branch (or skip to enter a PR# or to run only on base_branch): ") }, - help = "Run the comparison for base vs. this branch" + help = "Run the comparison for base vs. this branch/tag" ), optparse::make_option( "--pr", @@ -91,7 +87,7 @@ param_list <- list( readline("Name a PR # to compare to the base branch (skip if you've entered a branch or to run only on base_branch): ") }, type = "integer", - help = "Run the comparison for base vs. this PR" + help = "Run the comparison for base_branch vs. this PR" ), optparse::make_option( "--pkg_dir", @@ -123,6 +119,14 @@ param_list <- list( "--outfile", default = file.path(".dev", sprintf("lintr_compare_branches_%d.csv", as.integer(Sys.time()))), help = "Destination file to which to write the output" + ), + optparse::make_option( + "--benchmark", + default = if (interactive()) { + askYesNo("Benchmark the timing of linter execution?") + }, + type = "logical", + help = "Whether to run performance diagnostics of the branch(es)" ) ) @@ -131,16 +135,22 @@ params$outdir <- dirname(params$outfile) # treat any skipped arguments from the prompt as missing if (interactive()) { - for (opt in c("branch", "pr", "packages", "pkg_dir", "sample_size")) { + for (opt in c("linters", "branch", "pr", "packages", "pkg_dir", "sample_size")) { # typed arguments get cast even when missing, probably to NA if (isTRUE(is.na(params[[opt]]) || params[[opt]] == "")) params[[opt]] <- NULL } if (isTRUE(is.na(params$base_branch) || params$base_branch == "")) params$base_branch <- "main" } -linter_names <- strsplit(params$linters, ",", fixed = TRUE)[[1L]] -if (length(linter_names) == 0L) { - stop("Please supply linters (--linters)") +if (params$benchmark) { + library(microbenchmark) + recorded_timings <- new.env() +} + +if (is.null(params$linters)) { + linter_names <- "_all_" +} else { + linter_names <- strsplit(params$linters, ",", fixed = TRUE)[[1L]] } base_branch <- params$base_branch @@ -210,20 +220,21 @@ if (is.null(params$sample_size)) { # test if nchar(., "chars") works as intended # for all files in dir (see #541) test_encoding <- function(dir) { - tryCatch( - { - lapply( - list.files(dir, pattern = "(?i)\\.r(?:md)?$", recursive = TRUE, full.names = TRUE), - function(x) { - con <- file(x, encoding = lintr:::find_default_encoding(x) %||% "UTF-8") - on.exit(close(con)) - nchar(readLines(con, warn = FALSE)) - } - ) - TRUE + !inherits(what = "error", tryCatch( + for (r_file in list.files(dir, pattern = "(?i)\\.r(?:md)?$", recursive = TRUE, full.names = TRUE)) { + # lintr has better encoding support since 8cd6ad~linter>2.0.1~Jul 2021; use + # the accompanying helper if possible. clunkier default otherwise. + encoding <- tryCatch(lintr:::find_default_encoding(r_file), error = function(...) NULL) + local({ + con <- file(r_file, encoding = encoding %||% "UTF-8") + on.exit(close(con)) + lines <- readLines(con, warn = FALSE) + nchar(lines) + nchar(lines, "chars") + }) }, - error = function(x) FALSE - ) + error = identity + )) } # read Depends from DESCRIPTION @@ -242,6 +253,10 @@ get_deps <- function(pkg) { lint_one_package <- function(package, linters, out_dir, check_deps) { package_is_dir <- file.info(package)$isdir package_name <- gsub("_.*", "", basename(package)) + if (params$benchmark) { + recorded_timings$current_package <- package_name + on.exit(rm("current_package", envir = recorded_timings)) + } if (!package_is_dir) { tmp <- file.path(tempdir(), package_name) @@ -253,7 +268,7 @@ lint_one_package <- function(package, linters, out_dir, check_deps) { # --strip-components makes sure the output structure is # /path/to/tmp/pkg/ instead of /path/to/tmp/pkg/pkg utils::untar(package, exdir = tmp, extras = "--strip-components=1") - on.exit(unlink(tmp, recursive = TRUE)) + on.exit(unlink(tmp, recursive = TRUE), add = TRUE) package <- tmp } if (!test_encoding(package)) { @@ -291,11 +306,117 @@ lint_one_package <- function(package, linters, out_dir, check_deps) { } } - lints <- as.data.frame(lint_dir(package, linters = linters, parse_settings = FALSE)) + # terminal_blank_lines_linter() started suppressing terminal newline warning + # in d20768a~lintr>2.0.1~Feb 2021; prior to that, we get a ton of those warnings. + # ignore them because they're innocuous. + # also ignore lintr's deprecations to make including all linters easier + suppressMessages(withCallingHandlers( + lints <- as.data.frame(lint_dir(package, linters = linters, parse_settings = FALSE)), + warning = function(cond) { + if (!grepl("ncomplete final line found|was deprecated in lintr", cond$message)) { + warning(cond$message, call. = FALSE) + } + invokeRestart("muffleWarning") + } + )) if (nrow(lints) > 0L) data.table::fwrite(lints, file.path(out_dir, paste0(package_name, ".csv"))) TRUE } +# available_linters is the preferred way to do this, but only +# available since aafba4~lintr>2.0.1~Mar 2022. relying on +# 'exports ending in _linter' seems to work for v2.0.1, +# but not clear how robust it is in general. +get_all_linters <- function() { + lintr_exports <- getNamespaceExports("lintr") + if ("available_linters" %in% lintr_exports) { + linter_names <- available_linters(tags = NULL)$linter + } else { + linter_names <- grep("_linter$", lintr_exports, value = TRUE) + } +} + +# since 2d76469~lintr>2.0.1~Feb 2021, all linters are function factories. +# but we also want to support the earlier 'simple' linters for robustness +get_linter_from_name <- function(linter_name) { + tryCatch( + # apparently v2.0.1 does not have a default because somehow a0fff5~linter 1L) { + warning("Matched more than one tag! Selecting the first of: ", toString(tag_metadata$name)) + tag_hash <- tag_hash[1L] + } + # no way to checkout a commit directly, so create a branch based to it instead -- gert#147 + # also don't have checkout --force, so just reset to prevent that from blocking -- gert#177 + gert::git_reset_hard() + gert::git_branch_create(paste(sample(letters), collapse = ""), ref = tag_hash) + } + ) +} + run_workflow <- function(what, packages, linter_names, branch, number) { t0 <- Sys.time() old_branch <- gert::git_branch() @@ -307,24 +428,33 @@ run_workflow <- function(what, packages, linter_names, branch, number) { # safe to use force=TRUE because we're in temp_repo if (what == "pr") { - # pr_fetch doesn't expose this so use this to reset gert::git_branch_checkout("main", force = TRUE) - usethis::pr_fetch(number) + gert::git_checkout_pull_request(number) } else { - gert::git_branch_checkout(branch, force = TRUE) + switch_to_ref(branch) } - pkgload::load_all() + pkgload::load_all(export_all = FALSE) + if (identical(linter_names, "_all_")) { + linter_names <- get_all_linters() + } check_deps <- any(c("object_usage_linter", "object_name_linter") %in% linter_names) - linters <- lapply(linter_names, function(linter_name) eval(call(linter_name))) + + if (params$benchmark) { + recorded_timings[[branch]] <- list() + linters <- lapply(linter_names, get_benchmarked_linter, branch) + } else { + linters <- lapply(linter_names, get_linter_from_name) + } + # accumulate results sequentially to allow for interruptions of long-running executions without losing progress out_temp_dir <- file.path(old_wd, params$outdir, ".partial", if (what == "pr") paste0("pr", number) else branch) dir.create(out_temp_dir, recursive = TRUE, showWarnings = FALSE) linted_packages <- 0L package_i <- 0L - pkgs_width <- as.integer(ceiling(log10(length(packages)))) - done_width <- as.integer(ceiling(log10(n_packages))) + pkgs_width <- max_digits(length(packages)) + done_width <- max_digits(n_packages) stdout_width <- getOption("width") # given how common it can be to skip packages (e.g. due to uninstalled # dependencies), use a while loop to try and reach n_packages instead @@ -337,6 +467,7 @@ run_workflow <- function(what, packages, linter_names, branch, number) { package_str <- gsub("_.*", "", basename(package)) success <- lint_one_package(package, linters, out_temp_dir, check_deps) linted_packages <- linted_packages + success + # TODO(michaelchirico): this stopped working interactively (only dumps at the end of the loop) -- why? cat(sprintf( "\r[%0*s : %0*s / %d] %s%s", pkgs_width, package_i, done_width, linted_packages, n_packages, package_str, @@ -372,7 +503,8 @@ if (length(packages) > 50L) { ) } else { message( - "Comparing output of lint_dir run for the following packages: ", + "Comparing output ", if (params$benchmark) "and performance ", + "of lint_dir run for the following packages: ", toString(basename(packages)) ) } @@ -398,7 +530,7 @@ if (has_target) { } setwd(old_wd) -message("Writing output to ", params$outfile) +message("Writing lint comparison output to ", params$outfile) load_partial_results <- function(target, is_branch) { directory <- file.path(params$outdir, ".partial", if (is_branch) target else paste0("pr", target)) @@ -419,6 +551,39 @@ if (has_target) { unlink(file.path(params$outdir, ".partial"), recursive = TRUE) data.table::fwrite(lints, params$outfile, row.names = FALSE) +if (params$benchmark) { + benchmark_file <- gsub("\\.csv$", "_benchmark_timings.csv", params$outfile) + message("Writing benchmark timing output to ", benchmark_file) + # lots of nesting, fun + timings_data <- rbindlist( + idcol = "branch", + lapply( + recorded_timings, + function(branch) rbindlist( + idcol = "linter", + lapply( + branch, + function(linter) rbindlist( + idcol = "package", + lapply( + linter, + function(package) data.table::data.table(filename = names(package), duration = unlist(package)) + ) + ) + ) + ) + ) + ) + # delete noisy/redundant information from filename + timings_data[, filename := sub(file.path(tempdir(), .BY$package, ""), "", filename), by = package] + # save data in wide format to save some space (data gets saved as column names) + timings_data[, + data.table::dcast(.SD, linter + package + filename ~ branch, value.var = "duration") + ][, + data.table::fwrite(timings_data, benchmark_file) + ] +} + if (interactive()) { unlink(temp_repo, recursive = TRUE) } else {