Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
132 commits
Select commit Hold shift + click to select a range
ef12f41
start with naive conversion using feather
javierluraschi Jul 19, 2018
d1bb682
use scala helper to load binary rdd holding arrow data
javierluraschi Jul 19, 2018
bad70b1
support arrowconverters using public spark api
javierluraschi Jul 20, 2018
d8cf360
make use of converters from arrow_copy_to
javierluraschi Jul 20, 2018
9e5ff64
fix typos while calling arrow converter from r
javierluraschi Jul 20, 2018
fda084e
add interface for python arrow serializer to compare with ease
javierluraschi Jul 20, 2018
596fb28
reuse known schema instead of relying on arrow's file schema
javierluraschi Jul 20, 2018
f0a52de
use proper arrow batch writer since batches are expected in java arro…
javierluraschi Jul 21, 2018
a33ae58
avoid __index_level_0__ while converting to arrow
javierluraschi Jul 23, 2018
a223578
use internal rows to match pythons arrow converter
javierluraschi Jul 23, 2018
b64e4ed
use sparklyr's invoke to properly match method arguments
javierluraschi Jul 24, 2018
348493c
fix reticulate reference in arrow poc
javierluraschi Jul 24, 2018
5963031
add arrow as remote
javierluraschi Oct 2, 2018
d7c0ff8
make use of new arrow serializer and default to this
javierluraschi Oct 2, 2018
a6d1b2b
simplify arrow serializers to only use the R serializer
javierluraschi Oct 2, 2018
7ef97b1
enable arrow upstream serialization in sdf_copy_to, dplyr and dbi
javierluraschi Oct 2, 2018
12a22ad
add tobatchiterator for arrowconverters
javierluraschi Oct 3, 2018
0c8858b
complete arrow serialization to enable raw collection
javierluraschi Oct 4, 2018
eadfec3
add message headers with schema to arrow collection
javierluraschi Oct 4, 2018
004e24a
make use of arrows record_batch() pr to improve performance
javierluraschi Oct 10, 2018
5c37842
support for collecting using arrow
javierluraschi Oct 10, 2018
e716469
enable arrow collection
javierluraschi Oct 10, 2018
23c353a
support for arrow collection in spark_apply scala codebase
javierluraschi Oct 12, 2018
e2ff0ae
support for arrow collection in spark_apply R codebase
javierluraschi Oct 12, 2018
8917bb3
rebuild docs and sources
javierluraschi Oct 12, 2018
85830e3
fix null pointer exception while processing distributed map due to se…
javierluraschi Oct 12, 2018
45fa427
fix typo in refactored schema check
javierluraschi Oct 12, 2018
f452a19
fix schema retrieval from arrow refactoring
javierluraschi Oct 12, 2018
a3e3461
fix missing sc exception in R worker script
javierluraschi Oct 12, 2018
2d35dd8
fix additional typos from spark_apply() refactoring
javierluraschi Oct 12, 2018
a693501
proper support for passing timezone into spark_apply with arrow
javierluraschi Oct 12, 2018
b719a5e
support enabling arrow in r worker
javierluraschi Oct 12, 2018
6562dcf
use worker invokes in workers code
javierluraschi Oct 12, 2018
610fdae
fix parameter call order in r worker with arrow
javierluraschi Oct 12, 2018
ee3a052
use entire record batch while streaming bindings are implemented
javierluraschi Oct 13, 2018
e12add8
fix schema retrieval while using spark_apply with arrow
javierluraschi Oct 13, 2018
29049a4
disable arrow for int64 while type not implemented in arrow
javierluraschi Oct 13, 2018
f49d0e0
support for returning data to using arrow in spark_apply()
javierluraschi Oct 16, 2018
a678bb0
fix r cmd check warnigns
javierluraschi Oct 16, 2018
8ce9468
fix connection issue using older sparklyr package with newer spark ve…
javierluraschi Oct 17, 2018
e1c86c1
support for tunring on off arrow and jit in spark_apply
javierluraschi Oct 17, 2018
af76e45
transition to use new streaming bindings pr
javierluraschi Oct 17, 2018
de62edb
remove remote to avoid travis dependencies
javierluraschi Oct 17, 2018
a73cc39
fix r cmd check warnings
javierluraschi Oct 18, 2018
cf20011
support for factors in copy_to and apply within arrow
javierluraschi Oct 18, 2018
dc1d4f1
support in spark_apply() for groupby using arrow in scala
javierluraschi Oct 18, 2018
a632c52
support in spark_apply() for groupby using arrow in R
javierluraschi Oct 18, 2018
f299bd2
enable groupby over arrow only on spark 2.3 or newer
javierluraschi Oct 18, 2018
79fc721
fix r cmd warning
javierluraschi Oct 18, 2018
36da73f
add missing livy arrow sources
javierluraschi Oct 19, 2018
cff9863
private classes not compatible with livy connections
javierluraschi Oct 19, 2018
f1bc255
add support for arrow in travis
javierluraschi Oct 18, 2018
70727b6
use addons to install arrow binaries
javierluraschi Oct 19, 2018
6da62af
fix typos and clean settings
javierluraschi Oct 19, 2018
b410560
correct spacing for travis apt source line
javierluraschi Oct 19, 2018
5c9eed9
fix script check in travis arrow installer
javierluraschi Oct 19, 2018
dc3f02b
one more fix to arrow travis installer
javierluraschi Oct 19, 2018
a9d3248
install devtools package
javierluraschi Oct 19, 2018
0e664c1
fix typo while installing devtools for arrow in travis
javierluraschi Oct 19, 2018
7ced2d3
enable arrow in travis tests
javierluraschi Oct 19, 2018
7011854
fix iris copy test under arrow
javierluraschi Oct 19, 2018
eccd998
qualify utils class for livy
javierluraschi Oct 19, 2018
120aedf
split arrowbatchstreamwritter to its own file
javierluraschi Oct 20, 2018
409c706
enable arrow transfer with livy
javierluraschi Oct 20, 2018
afacd9b
Revert "qualify utils class for livy"
javierluraschi Oct 20, 2018
9832c89
make arrow library call in tests dynamic to avoid suggests
javierluraschi Oct 20, 2018
945133f
fix livy connections under spark_apply() in spark 2.3
javierluraschi Oct 20, 2018
85d18e5
better label for arrow travis environment
javierluraschi Oct 20, 2018
7610a7f
enable copy_to and collect using arrow and livy
javierluraschi Oct 22, 2018
1fbc963
disble spark apply packages distribution by default in livy
javierluraschi Oct 22, 2018
792d21e
add log entry to connections pane under livy
javierluraschi Oct 22, 2018
cdcd615
support for spark_apply in livy with arrow
javierluraschi Oct 22, 2018
945fb55
no need to use nul checks with ARROW-3547 resolved
javierluraschi Oct 22, 2018
2858053
support for debugging and profiling schema infer spark apply queries
javierluraschi Oct 23, 2018
e690f1d
refactor dupe code retrieving arrow batch in spark apply
javierluraschi Oct 23, 2018
c7dad52
apply schema limits before grouping to properly reduce row size and f…
javierluraschi Oct 23, 2018
fba17d7
add group by column while using arrow in spark apply
javierluraschi Oct 23, 2018
2ec9697
consistent prefixes for spark worker helper functions
javierluraschi Oct 23, 2018
0c4b8d4
properly clean factors from grouped columns under spark apply
javierluraschi Oct 23, 2018
e3f0548
mark spark apply data frames to not use factors
javierluraschi Oct 23, 2018
22f7ed1
map integer64 to longtype in scala
javierluraschi Oct 23, 2018
643e154
ignore local tests output
javierluraschi Oct 24, 2018
9cae7bf
repartition tests under arrow change final repartitions
javierluraschi Oct 24, 2018
6a664e5
default to 32 bit integers in sdf index creation functions
javierluraschi Oct 25, 2018
46b5f73
couple fixes for sdf 32bit indexes
javierluraschi Oct 25, 2018
2ec641a
disable collection of nested data while ARROW-2969 is implemented
javierluraschi Oct 25, 2018
3b66fb9
disable collection of nested arrays while ARROW-2969 is implemented
javierluraschi Oct 25, 2018
476b329
ml_find_synonyms() is not deterministic under arrow
javierluraschi Oct 25, 2018
73fab79
fix typos in debug_string()
javierluraschi Oct 25, 2018
0684a59
disable arrow for timestamps while ARROW-2969 and #1733 are implemented
javierluraschi Oct 25, 2018
1fd2ec0
also disable arrow with StructType until ARROW-2969 is implemented
javierluraschi Oct 25, 2018
b4e623e
fix broom tests under arrow
javierluraschi Oct 26, 2018
dc3d9e9
cast dplyr tests to integers for arrow
javierluraschi Oct 26, 2018
b1c754f
cast pivot tests to integers for arrow
javierluraschi Oct 26, 2018
76782e0
NaN in R arrow bindings require ARROW-3615
javierluraschi Oct 26, 2018
85c7ccf
attempt to enable all tests under arrow
javierluraschi Oct 22, 2018
bd5b857
couple test fixes
javierluraschi Oct 26, 2018
24ee6ce
fix livy from workerutils refactor
javierluraschi Oct 26, 2018
3034e52
fix dplyr top_n test
javierluraschi Oct 26, 2018
69a7c80
disable test while #1736 is investigated
javierluraschi Oct 26, 2018
2b04013
disable test while #1736 is investigated
javierluraschi Oct 26, 2018
650b958
adjust tolerance and factors in tests
javierluraschi Oct 26, 2018
9b9837d
support to disable arrow in spark_apply() based on schema
javierluraschi Oct 26, 2018
5c2bda9
skip bisecting kmeans over iris in arrow
javierluraschi Oct 26, 2018
6df0cee
skip on travis failure that can't reproduce locally
javierluraschi Oct 26, 2018
3ed795c
explicit cast on dplyr top_n() test
javierluraschi Oct 26, 2018
2bad77f
longer wait time for livy start
javierluraschi Oct 26, 2018
bc60178
skip top_n() test under arrow
javierluraschi Oct 26, 2018
9a5fc43
test improvements for arrow support
javierluraschi Oct 26, 2018
a041be0
clean up travis script to print arrow results
javierluraschi Oct 26, 2018
014001a
adjust package installation paths in travis
javierluraschi Oct 26, 2018
0937a04
troubleshoot arrow
javierluraschi Oct 26, 2018
48b2966
implement performance reporter for testthat
javierluraschi Oct 26, 2018
7ebad73
support to print summaries and failures
javierluraschi Oct 27, 2018
4240507
better printing for performance results
javierluraschi Oct 27, 2018
a994e90
improve performance results formatting
javierluraschi Oct 27, 2018
d846aa4
add missing r6 suggest
javierluraschi Oct 27, 2018
de7726b
fix perf printing order
javierluraschi Oct 27, 2018
70874dd
verbose livy to avoid travis timeout
javierluraschi Oct 27, 2018
6c2be97
rescilient checks while printing perf results
javierluraschi Oct 27, 2018
7d1210c
log start of livy install
javierluraschi Oct 27, 2018
b30d9fb
properly disable code coverage
javierluraschi Oct 27, 2018
1be357f
livy memory usage improvements
javierluraschi Oct 27, 2018
9a48865
fix performance reporter formatting
javierluraschi Oct 27, 2018
31181ec
split kmeans and spark apply test to run subset in livy
javierluraschi Oct 27, 2018
0a7b757
use repartitions parameter in arrow to improve perf in master
javierluraschi Oct 29, 2018
8145dcc
re-enable pivot test with arrow with upstream fix
javierluraschi Oct 30, 2018
a74a19c
use arrow branch with bit64 description fix
javierluraschi Oct 30, 2018
22b0d78
fix remote in description
javierluraschi Oct 30, 2018
cc18a00
arrow installed from travis helper
javierluraschi Oct 31, 2018
2deea96
revert remote branch since ARROW-3657 is merged
javierluraschi Nov 2, 2018
ed1413f
rebuild jars, sources and docs
javierluraschi Nov 2, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,8 @@ flights_model
flights_pipeline
new_flights_model
/checkpoints
tests/testthat/test.csv
tests/testthat/test.json
tests/testthat/batch.csv/
tests/testthat/iris-in/
tests/testthat/iris-out/
7 changes: 6 additions & 1 deletion .travis.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ args <- commandArgs(trailingOnly=TRUE)
if (length(args) == 0) {
stop("Missing arguments")
} else if (args[[1]] == "--testthat") {
parent_dir <- dir("../", full.names = TRUE)
parent_dir <- dir(".", full.names = TRUE)
sparklyr_package <- parent_dir[grepl("sparklyr_", parent_dir)]
install.packages(sparklyr_package, repos = NULL, type = "source")

on.exit(setwd(".."))
setwd("tests")
source("testthat.R")
} else if (args[[1]] == "--coverage") {
covr::codecov()
} else if (args[[1]] == "--arrow") {
install.packages("devtools")
devtools::install_github("apache/arrow", subdir = "r")
} else {
stop("Unsupported arguments")
}
30 changes: 22 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,31 +33,45 @@ matrix:
env:
- SPARK_VERSION="2.3.2"
- JAVA_VERSION=openjdk8
- name: "Livy 0.5.0 (R release, openjdk8)"
- name: "Livy 0.5.0 (R release, openjdk8, Spark 2.3.0)"
r: release
env:
- LIVY_VERSION="0.5.0"
- SPARK_VERSION="2.3.0"
- JAVA_VERSION=openjdk8
- name: "Arrow (R release, openjdk8)"
r: release
env:
- R_ARROW="true"
- JAVA_VERSION=openjdk8
addons:
apt:
sources:
- sourceline: deb https://packages.red-data-tools.org/ubuntu/ trusty universe
key_url: https://packages.red-data-tools.org/ubuntu/red-data-tools-keyring.gpg
packages:
- apt-transport-https
- lsb-release
- libarrow-dev
- libarrow-glib-dev

before_install:
- jdk_switcher use $JAVA_VERSION
- echo $JAVA_HOME
- if [[ $R_ARROW == "true" ]]; then Rscript .travis.R --arrow; fi

script:
- |
R CMD build .
export SPARKLYR_LOG_FILE=/tmp/sparklyr.log
if [[ $SPARK_VERSION == "2.3.0" ]]; then
if [[ $CODE_COVERAGE == "true" ]]; then
R CMD check --no-build-vignettes --no-manual --no-tests sparklyr*tar.gz
travis_wait 45 Rscript .travis.R --coverage
Rscript .travis.R --coverage
else
travis_wait 35 R CMD check --no-build-vignettes --no-manual sparklyr*tar.gz
travis_wait 35 R CMD check --no-build-vignettes --no-tests --no-manual sparklyr*tar.gz
Rscript .travis.R --testthat
fi

after_failure:
- |
grep -B 10 -A 20 ERROR /tmp/sparklyr.log
cd tests
export NOT_CRAN=true
travis_wait 35 Rscript ../.travis.R --testthat
sleep 2
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Suggests:
mlbench,
nnet,
nycflights13,
R6,
RCurl,
reshape2,
testthat
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Sparklyr 0.9.3 (unreleased)

- Improved memory use in Livy by using string builders and avoid print
backs.

- `sdf_len()`, `sdf_along()` and `sdf_seq()` default to 32 bit integers
but allow support for 64 bits through `bits` parameter.

- Removed `stream_read_jdbc()` and `stream_write_jdbc()` since they are
not yet implemented in Spark.

Expand All @@ -25,6 +31,10 @@
- Fix new connection RStudio selectors colors when running
under OS X Mojave.

- Support for launching Livy logs from connection pane.

# Sparklyr 0.9.2

- Removed `overwrite` parameter in `spark_read_table()` (#1698).

- Fix regression preventing using R 3.2 (#1695).
Expand Down
105 changes: 105 additions & 0 deletions R/arrow_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
arrow_enabled <- function(sc, object) {
enabled <- spark_config_value(sc, "sparklyr.arrow", "package:arrow" %in% search())
if (!enabled) {
enabled
}
else {
arrow_enabled_object(object)
}
}

arrow_enabled_object <- function(object) {
UseMethod("arrow_enabled_object")
}

arrow_enabled_object.tbl_spark <- function(object) {
sdf <- spark_dataframe(object)
arrow_enabled_object(sdf)
}

arrow_enabled_object.spark_jobj <- function(object) {
unsupported_expr <- ".Vector|ArrayType|TimestampType|StructType"
unsupported <- object %>%
sdf_schema() %>%
Filter(function(x) grepl(unsupported_expr, x$type), .)
enabled <- length(unsupported) == 0
if (!enabled) warning("Arrow disabled due to columns: ", paste(names(unsupported), collapse = ", "))

enabled
}

arrow_enabled_dataframe_schema <- function(types) {
unsupported_expr <- "POSIXct"
unsupported <- Filter(function(e) grepl(unsupported_expr , e), types)

enabled <- length(unsupported) == 0
if (!enabled) warning("Arrow disabled due to columns: ", paste(names(unsupported), collapse = ", "))

enabled
}

arrow_enabled_object.data.frame <- function(object) {
arrow_enabled_dataframe_schema(sapply(object, function(e) class(e)[[1]]))
}

arrow_batch <- function(df)
{
record_batch <- get("record_batch", envir = as.environment(asNamespace("arrow")))
write_record_batch <- get("write_record_batch", envir = as.environment(asNamespace("arrow")))

record <- record_batch(df)
write_record_batch(record, raw())
}

arrow_read_stream <- function(stream)
{
record_batch_stream_reader <- get("record_batch_stream_reader", envir = as.environment(asNamespace("arrow")))
read_record_batch <- get("read_record_batch", envir = as.environment(asNamespace("arrow")))

reader <- record_batch_stream_reader(stream)
record_entry <- read_record_batch(reader)

entries <- list()
while (!is.null(record_entry)) {
entries[[length(entries) + 1]] <- tibble::as_tibble(record_entry)
record_entry <- read_record_batch(reader)
}

entries
}

arrow_copy_to <- function(sc, df, parallelism)
{
# replace factors with characters
if (any(sapply(df, is.factor))) {
df <- dplyr::as_data_frame(lapply(df, function(x) if(is.factor(x)) as.character(x) else x))
}

# serialize to arrow
bytes <- arrow_batch(df)

# create batches data frame
batches <- list(bytes)

# build schema
schema <- spark_data_build_types(sc, lapply(df, class))

# load arrow file in scala
rdd <- invoke_static(sc, "sparklyr.ArrowHelper", "javaRddFromBinaryBatches", spark_context(sc), batches, parallelism)
sdf <- invoke_static(sc, "sparklyr.ArrowConverters", "toDataFrame", rdd, schema, spark_session(sc))

sdf
}

arrow_collect <- function(tbl, ...)
{
sc <- spark_connection(tbl)
sdf <- spark_dataframe(tbl)
session <- spark_session(sc)

time_zone <- spark_session(sc) %>% invoke("sessionState") %>% invoke("conf") %>% invoke("sessionLocalTimeZone")

invoke_static(sc, "sparklyr.ArrowConverters", "toArrowBatchRdd", sdf, session, time_zone) %>%
arrow_read_stream() %>%
dplyr::bind_rows()
}
1 change: 1 addition & 0 deletions R/config_settings.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ spark_config_settings <- function() {
sparklyr.apply.packages = "Configures default value for packages parameter in spark_apply().",
sparklyr.apply.rlang = "Experimental feature. Turns on improved serialization for spark_apply().",
sparklyr.apply.schema.infer = "Number of rows collected to infer schema when column types specified in spark_apply().",
sparklyr.arrow = "Use Apache Arrow to serialize data?",
sparklyr.backend.interval = "Total seconds sparklyr will check on a backend operation.",
sparklyr.backend.timeout = "Total seconds before sparklyr will give up waiting for a backend operation to complete.",
sparklyr.connect.aftersubmit = "R function to call after spark-submit executes.",
Expand Down
6 changes: 6 additions & 0 deletions R/connection_viewer.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ spark_actions <- function(scon) {
callback = function() {
utils::browseURL(file.path(scon$master, "ui"))
}
),
"Log" = list(
icon = file.path(icons, "spark-log.png"),
callback = function() {
utils::browseURL(file.path(scon$master, "ui", "session", scon$sessionId, "log"))
}
)
)
)
Expand Down
4 changes: 3 additions & 1 deletion R/core_worker_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ worker_config_serialize <- function(config) {
spark_config_value(config, "sparklyr.worker.gateway.address", "localhost"),
if (isTRUE(config$profile)) "TRUE" else "FALSE",
if (isTRUE(config$schema)) "TRUE" else "FALSE",
if (isTRUE(config$arrow)) "TRUE" else "FALSE",
sep = ";"
)
}
Expand All @@ -17,6 +18,7 @@ worker_config_deserialize <- function(raw) {
sparklyr.gateway.port = as.integer(parts[[2]]),
sparklyr.gateway.address = parts[[3]],
profile = as.logical(parts[[4]]),
schema = as.logical(parts[[5]])
schema = as.logical(parts[[5]]),
arrow = as.logical(parts[[6]])
)
}
32 changes: 24 additions & 8 deletions R/data_copy.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,20 @@ spark_serialize_csv_scala <- function(sc, df, columns, repartition) {
invoke(hive_context(sc), "createDataFrame", rdd, structType)
}

spark_serialize_arrow <- function(sc, df, columns, repartition) {
arrow_copy_to(
sc,
df,
as.integer(if (repartition <= 0) 1 else repartition)
)
}

spark_data_copy <- function(
sc,
df,
name,
repartition,
serializer = getOption("sparklyr.copy.serializer", "csv_file")) {
serializer = NULL) {

if (!is.numeric(repartition)) {
stop("The repartition parameter must be an integer")
Expand All @@ -130,12 +138,19 @@ spark_data_copy <- function(
stop("Using a local file to copy data is not supported for remote clusters")
}

serializer <- ifelse(is.null(serializer),
ifelse(spark_connection_is_local(sc) ||
spark_connection_is_yarn_client(sc),
"csv_file_scala",
"csv_string"),
serializer)
serializer <- ifelse(
is.null(serializer),
ifelse(
arrow_enabled(sc, df),
"arrow",
ifelse(
spark_connection_is_local(sc) || spark_connection_is_yarn_client(sc),
"csv_file_scala",
getOption("sparklyr.copy.serializer", "csv_string")
)
),
serializer
)

# Spark unfortunately has a number of issues with '.'s in column names, e.g.
#
Expand All @@ -159,7 +174,8 @@ spark_data_copy <- function(
serializers <- list(
"csv_file" = spark_serialize_csv_file,
"csv_string" = spark_serialize_csv_string,
"csv_file_scala" = spark_serialize_csv_scala
"csv_file_scala" = spark_serialize_csv_scala,
"arrow" = spark_serialize_arrow
)

df <- serializers[[serializer]](sc, df, columns, repartition)
Expand Down
2 changes: 1 addition & 1 deletion R/dbi_spark_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ setMethod("dbReadTable", c("spark_connection", "character"),


setMethod("dbListTables", "spark_connection", function(conn) {
df <- df_from_sql(conn, "SHOW TABLES")
df <- df_from_sql(conn, "SHOW TABLES", arrow = FALSE)

tableNames <- df$tableName
filtered <- grep("^sparklyr_tmp_", tableNames, invert = TRUE, value = TRUE)
Expand Down
3 changes: 1 addition & 2 deletions R/install_spark_versions.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ spark_versions <- function(latest = TRUE) {
if (dir.exists(maybeDir)) {
fileName <- basename(maybeDir)
m <- regmatches(fileName, regexec(spark_versions_file_pattern(), fileName))[[1]]
if (length(m) > 2) list(spark = m[[2]], hadoop = m[[3]]) else NULL
if (length(m) > 2) list(spark = m[[2]], hadoop = m[[3]], pattern = fileName) else NULL
}
})
),
Expand All @@ -134,7 +134,6 @@ spark_versions <- function(latest = TRUE) {

newRow <- c(row, installed = TRUE)
newRow$base <- ""
newRow$pattern <- ""
newRow$download <- ""
newRow$default <- FALSE
newRow$hadoop_default <- FALSE
Expand Down
Loading