From e8c64156e468105a4323f59d1ece87c8fb6662f4 Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Tue, 16 Feb 2016 10:14:40 -0800 Subject: [PATCH 1/2] Added parameter validations for colnames<- --- R/pkg/R/DataFrame.R | 22 +++++++++++++++++++++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 7 +++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 3b7b8250b94f7..50655e9382325 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -303,8 +303,28 @@ setMethod("colnames", #' @rdname columns #' @name colnames<- setMethod("colnames<-", - signature(x = "DataFrame", value = "character"), + signature(x = "DataFrame"), function(x, value) { + + # Check parameter integrity + if (class(value) != "character") { + stop("Invalid column names.") + } + + if (length(value) != ncol(x)) { + stop( + "Column names must have the same length as the number of columns in the dataset.") + } + + if (any(is.na(value))) { + stop("Column names cannot be NA.") + } + + # Check if the column names have . in it + if (any(regexec(".", value, fixed=TRUE)[[1]][1] != -1)) { + stop("Colum names cannot contain the '.' symbol.") + } + sdf <- callJMethod(x@sdf, "toDF", as.list(value)) dataFrame(sdf) }) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 7b5713720df87..e14028048c9d7 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -691,6 +691,13 @@ test_that("names() colnames() set the column names", { colnames(df) <- c("col3", "col4") expect_equal(names(df)[1], "col3") + expect_error(colnames(df) <- c("sepal.length", "sepal_width"), + "Colum names cannot contain the '.' symbol.") + expect_error(colnames(df) <- c(1, 2), "Invalid column names.") + expect_error(colnames(df) <- c("a"), + "Column names must have the same length as the number of columns in the dataset.") + expect_error(colnames(df) <- c("1", NA), "Column names cannot be NA.") + # Test base::colnames base::names m2 <- cbind(1, 1:4) expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2")) From 07e541b7e55a322ea7c74e230ee897ebe9584197 Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Mon, 22 Feb 2016 10:16:21 -0800 Subject: [PATCH 2/2] Added one test for replacing . with _ in column names assignment --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index e14028048c9d7..efea70ef60b44 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -698,6 +698,10 @@ test_that("names() colnames() set the column names", { "Column names must have the same length as the number of columns in the dataset.") expect_error(colnames(df) <- c("1", NA), "Column names cannot be NA.") + # Note: if this test is broken, remove check for "." character on colnames<- method + irisDF <- suppressWarnings(createDataFrame(sqlContext, iris)) + expect_equal(names(irisDF)[1], "Sepal_Length") + # Test base::colnames base::names m2 <- cbind(1, 1:4) expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2"))