-
Notifications
You must be signed in to change notification settings - Fork 323
[SparkR-239] buildSchema
and field
functions
#235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f7e88ba
162e76f
b50000d
483506a
eb728b1
79d4876
6b404df
0ab9862
c2bb246
af21482
c3be1ed
0c241bf
243df0d
921d64f
afb38cd
50f4c90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# A set of S3 classes and methods that support the SparkSQL `StructType` and `StructField | ||
# datatypes. These are used to create and interact with DataFrame schemas. | ||
|
||
#' structType | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A one line comment on top with a description of what is in this file would be good There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call. Fixing that now. |
||
#' | ||
#' Create a structType object that contains the metadata for a DataFrame. Intended for | ||
#' use with createDataFrame and toDF. | ||
#' | ||
#' @param x a structField object (created with the field() function) | ||
#' @param ... additional structField objects | ||
#' @return a structType object | ||
#' @export | ||
#' @examples | ||
#'\dontrun{ | ||
#' sc <- sparkR.init() | ||
#' sqlCtx <- sparkRSQL.init(sc) | ||
#' rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) | ||
#' schema <- structType(structField("a", "integer"), structField("b", "string")) | ||
#' df <- createDataFrame(sqlCtx, rdd, schema) | ||
#' } | ||
structType <- function(x, ...) { | ||
UseMethod("structType", x) | ||
} | ||
|
||
structType.jobj <- function(x) { | ||
obj <- structure(list(), class = "structType") | ||
obj$jobj <- x | ||
obj$fields <- function() { lapply(callJMethod(obj$jobj, "fields"), structField) } | ||
obj | ||
} | ||
|
||
structType.structField <- function(x, ...) { | ||
fields <- list(x, ...) | ||
if (!all(sapply(fields, inherits, "structField"))) { | ||
stop("All arguments must be structField objects.") | ||
} | ||
sfObjList <- lapply(fields, function(field) { | ||
field$jobj | ||
}) | ||
stObj <- callJStatic("edu.berkeley.cs.amplab.sparkr.SQLUtils", | ||
"createStructType", | ||
listToSeq(sfObjList)) | ||
structType(stObj) | ||
} | ||
|
||
#' Print a Spark StructType. | ||
#' | ||
#' This function prints the contents of a StructType returned from the | ||
#' SparkR JVM backend. | ||
#' | ||
#' @param x A StructType object | ||
#' @param ... further arguments passed to or from other methods | ||
print.structType <- function(x, ...) { | ||
cat("StructType\n", | ||
sapply(x$fields(), function(field) { paste("|-", "name = \"", field$name(), | ||
"\", type = \"", field$dataType.toString(), | ||
"\", nullable = ", field$nullable(), "\n", | ||
sep = "") }) | ||
, sep = "") | ||
} | ||
|
||
#' structField | ||
#' | ||
#' Create a structField object that contains the metadata for a single field in a schema. | ||
#' | ||
#' @param x The name of the field | ||
#' @param type The data type of the field | ||
#' @param nullable A logical vector indicating whether or not the field is nullable | ||
#' @return a structField object | ||
#' @export | ||
#' @examples | ||
#'\dontrun{ | ||
#' sc <- sparkR.init() | ||
#' sqlCtx <- sparkRSQL.init(sc) | ||
#' rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) | ||
#' field1 <- structField("a", "integer", TRUE) | ||
#' field2 <- structField("b", "string", TRUE) | ||
#' schema <- structType(field1, field2) | ||
#' df <- createDataFrame(sqlCtx, rdd, schema) | ||
#' } | ||
|
||
structField <- function(x, ...) { | ||
UseMethod("structField", x) | ||
} | ||
|
||
structField.jobj <- function(x) { | ||
obj <- structure(list(), class = "structField") | ||
obj$jobj <- x | ||
obj$name <- function() { callJMethod(x, "name") } | ||
obj$dataType <- function() { callJMethod(x, "dataType") } | ||
obj$dataType.toString <- function() { callJMethod(obj$dataType(), "toString") } | ||
obj$dataType.simpleString <- function() { callJMethod(obj$dataType(), "simpleString") } | ||
obj$nullable <- function() { callJMethod(x, "nullable") } | ||
obj | ||
} | ||
|
||
structField.character <- function(x, type, nullable = TRUE) { | ||
if (class(x) != "character") { | ||
stop("Field name must be a string.") | ||
} | ||
if (class(type) != "character") { | ||
stop("Field type must be a string.") | ||
} | ||
if (class(nullable) != "logical") { | ||
stop("nullable must be either TRUE or FALSE") | ||
} | ||
options <- c("byte", | ||
"integer", | ||
"double", | ||
"numeric", | ||
"character", | ||
"string", | ||
"binary", | ||
"raw", | ||
"logical", | ||
"boolean", | ||
"timestamp", | ||
"date") | ||
dataType <- if (type %in% options) { | ||
type | ||
} else { | ||
stop(paste("Unsupported type for Dataframe:", type)) | ||
} | ||
sfObj <- callJStatic("edu.berkeley.cs.amplab.sparkr.SQLUtils", | ||
"createStructField", | ||
x, | ||
dataType, | ||
nullable) | ||
structField(sfObj) | ||
} | ||
|
||
#' Print a Spark StructField. | ||
#' | ||
#' This function prints the contents of a StructField returned from the | ||
#' SparkR JVM backend. | ||
#' | ||
#' @param x A StructField object | ||
#' @param ... further arguments passed to or from other methods | ||
print.structField <- function(x, ...) { | ||
cat("StructField(name = \"", x$name(), | ||
"\", type = \"", x$dataType.toString(), | ||
"\", nullable = ", x$nullable(), | ||
")", | ||
sep = "") | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just curious: does the
structField
constructor fail if we don't exportstructField.jobj
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does in some cases. For example, when you call the
fields()
closure of a pre-existingstructType
object, at that point you're trying to explicitly create astructField
from ajobj
directly and it won't be able to find the correct method: