@@ -1314,9 +1314,8 @@ setMethod("except",
13141314# ' write.df(df, "myfile", "parquet", "overwrite")
13151315# ' }
13161316setMethod ("write.df ",
1317- signature(df = " DataFrame" , path = ' character' , source = ' character' ,
1318- mode = ' character' ),
1319- function (df , path = NULL , source = NULL , mode = " append" , ... ){
1317+ signature(df = " DataFrame" , path = ' character' ),
1318+ function (df , path , source = NULL , mode = " append" , ... ){
13201319 if (is.null(source )) {
13211320 sqlContext <- get(" .sparkRSQLsc" , envir = .sparkREnv )
13221321 source <- callJMethod(sqlContext , " getConf" , " spark.sql.sources.default" ,
@@ -1338,9 +1337,8 @@ setMethod("write.df",
13381337# ' @aliases saveDF
13391338# ' @export
13401339setMethod ("saveDF ",
1341- signature(df = " DataFrame" , path = ' character' , source = ' character' ,
1342- mode = ' character' ),
1343- function (df , path = NULL , source = NULL , mode = " append" , ... ){
1340+ signature(df = " DataFrame" , path = ' character' ),
1341+ function (df , path , source = NULL , mode = " append" , ... ){
13441342 write.df(df , path , source , mode , ... )
13451343 })
13461344
@@ -1431,3 +1429,128 @@ setMethod("describe",
14311429 sdf <- callJMethod(x @ sdf , " describe" , listToSeq(colList ))
14321430 dataFrame(sdf )
14331431 })
1432+
1433+ # ' dropna
1434+ # '
1435+ # ' Returns a new DataFrame omitting rows with null values.
1436+ # '
1437+ # ' @param x A SparkSQL DataFrame.
1438+ # ' @param how "any" or "all".
1439+ # ' if "any", drop a row if it contains any nulls.
1440+ # ' if "all", drop a row only if all its values are null.
1441+ # ' if minNonNulls is specified, how is ignored.
1442+ # ' @param minNonNulls If specified, drop rows that have less than
1443+ # ' minNonNulls non-null values.
1444+ # ' This overwrites the how parameter.
1445+ # ' @param cols Optional list of column names to consider.
1446+ # ' @return A DataFrame
1447+ # '
1448+ # ' @rdname nafunctions
1449+ # ' @export
1450+ # ' @examples
1451+ # '\dontrun{
1452+ # ' sc <- sparkR.init()
1453+ # ' sqlCtx <- sparkRSQL.init(sc)
1454+ # ' path <- "path/to/file.json"
1455+ # ' df <- jsonFile(sqlCtx, path)
1456+ # ' dropna(df)
1457+ # ' }
1458+ setMethod ("dropna ",
1459+ signature(x = " DataFrame" ),
1460+ function (x , how = c(" any" , " all" ), minNonNulls = NULL , cols = NULL ) {
1461+ how <- match.arg(how )
1462+ if (is.null(cols )) {
1463+ cols <- columns(x )
1464+ }
1465+ if (is.null(minNonNulls )) {
1466+ minNonNulls <- if (how == " any" ) { length(cols ) } else { 1 }
1467+ }
1468+
1469+ naFunctions <- callJMethod(x @ sdf , " na" )
1470+ sdf <- callJMethod(naFunctions , " drop" ,
1471+ as.integer(minNonNulls ), listToSeq(as.list(cols )))
1472+ dataFrame(sdf )
1473+ })
1474+
1475+ # ' @aliases dropna
1476+ # ' @export
1477+ setMethod ("na.omit ",
1478+ signature(x = " DataFrame" ),
1479+ function (x , how = c(" any" , " all" ), minNonNulls = NULL , cols = NULL ) {
1480+ dropna(x , how , minNonNulls , cols )
1481+ })
1482+
1483+ # ' fillna
1484+ # '
1485+ # ' Replace null values.
1486+ # '
1487+ # ' @param x A SparkSQL DataFrame.
1488+ # ' @param value Value to replace null values with.
1489+ # ' Should be an integer, numeric, character or named list.
1490+ # ' If the value is a named list, then cols is ignored and
1491+ # ' value must be a mapping from column name (character) to
1492+ # ' replacement value. The replacement value must be an
1493+ # ' integer, numeric or character.
1494+ # ' @param cols optional list of column names to consider.
1495+ # ' Columns specified in cols that do not have matching data
1496+ # ' type are ignored. For example, if value is a character, and
1497+ # ' subset contains a non-character column, then the non-character
1498+ # ' column is simply ignored.
1499+ # ' @return A DataFrame
1500+ # '
1501+ # ' @rdname nafunctions
1502+ # ' @export
1503+ # ' @examples
1504+ # '\dontrun{
1505+ # ' sc <- sparkR.init()
1506+ # ' sqlCtx <- sparkRSQL.init(sc)
1507+ # ' path <- "path/to/file.json"
1508+ # ' df <- jsonFile(sqlCtx, path)
1509+ # ' fillna(df, 1)
1510+ # ' fillna(df, list("age" = 20, "name" = "unknown"))
1511+ # ' }
1512+ setMethod ("fillna ",
1513+ signature(x = " DataFrame" ),
1514+ function (x , value , cols = NULL ) {
1515+ if (! (class(value ) %in% c(" integer" , " numeric" , " character" , " list" ))) {
1516+ stop(" value should be an integer, numeric, charactor or named list." )
1517+ }
1518+
1519+ if (class(value ) == " list" ) {
1520+ # Check column names in the named list
1521+ colNames <- names(value )
1522+ if (length(colNames ) == 0 || ! all(colNames != " " )) {
1523+ stop(" value should be an a named list with each name being a column name." )
1524+ }
1525+
1526+ # Convert to the named list to an environment to be passed to JVM
1527+ valueMap <- new.env()
1528+ for (col in colNames ) {
1529+ # Check each item in the named list is of valid type
1530+ v <- value [[col ]]
1531+ if (! (class(v ) %in% c(" integer" , " numeric" , " character" ))) {
1532+ stop(" Each item in value should be an integer, numeric or charactor." )
1533+ }
1534+ valueMap [[col ]] <- v
1535+ }
1536+
1537+ # When value is a named list, caller is expected not to pass in cols
1538+ if (! is.null(cols )) {
1539+ warning(" When value is a named list, cols is ignored!" )
1540+ cols <- NULL
1541+ }
1542+
1543+ value <- valueMap
1544+ } else if (is.integer(value )) {
1545+ # Cast an integer to a numeric
1546+ value <- as.numeric(value )
1547+ }
1548+
1549+ naFunctions <- callJMethod(x @ sdf , " na" )
1550+ sdf <- if (length(cols ) == 0 ) {
1551+ callJMethod(naFunctions , " fill" , value )
1552+ } else {
1553+ callJMethod(naFunctions , " fill" , value , listToSeq(as.list(cols )))
1554+ }
1555+ dataFrame(sdf )
1556+ })
0 commit comments