@@ -1314,9 +1314,8 @@ setMethod("except",
13141314# ' write.df(df, "myfile", "parquet", "overwrite")
13151315# ' }
13161316setMethod ("write.df ",
1317-           signature(df  =  " DataFrame" path  =  ' character' source  =  ' character' 
1318-                     mode  =  ' character' 
1319-           function (df , path  =  NULL , source  =  NULL , mode  =  " append" ... ){
1317+           signature(df  =  " DataFrame" path  =  ' character' 
1318+           function (df , path , source  =  NULL , mode  =  " append" ... ){
13201319            if  (is.null(source )) {
13211320              sqlContext  <-  get(" .sparkRSQLsc" envir  =  .sparkREnv )
13221321              source  <-  callJMethod(sqlContext , " getConf" " spark.sql.sources.default" 
@@ -1338,9 +1337,8 @@ setMethod("write.df",
13381337# ' @aliases saveDF
13391338# ' @export
13401339setMethod ("saveDF ",
1341-           signature(df  =  " DataFrame" path  =  ' character' source  =  ' character' 
1342-                     mode  =  ' character' 
1343-           function (df , path  =  NULL , source  =  NULL , mode  =  " append" ... ){
1340+           signature(df  =  " DataFrame" path  =  ' character' 
1341+           function (df , path , source  =  NULL , mode  =  " append" ... ){
13441342            write.df(df , path , source , mode , ... )
13451343          })
13461344
@@ -1431,3 +1429,128 @@ setMethod("describe",
14311429            sdf  <-  callJMethod(x @ sdf , " describe" colList ))
14321430            dataFrame(sdf )
14331431          })
1432+ 
1433+ # ' dropna
1434+ # '
1435+ # ' Returns a new DataFrame omitting rows with null values.
1436+ # '
1437+ # ' @param x A SparkSQL DataFrame.
1438+ # ' @param how "any" or "all".
1439+ # '            if "any", drop a row if it contains any nulls.
1440+ # '            if "all", drop a row only if all its values are null.
1441+ # '            if minNonNulls is specified, how is ignored.
1442+ # ' @param minNonNulls If specified, drop rows that have less than
1443+ # '                    minNonNulls non-null values.
1444+ # '                    This overwrites the how parameter.
1445+ # ' @param cols Optional list of column names to consider.
1446+ # ' @return A DataFrame
1447+ # ' 
1448+ # ' @rdname nafunctions
1449+ # ' @export
1450+ # ' @examples
1451+ # '\dontrun{
1452+ # ' sc <- sparkR.init()
1453+ # ' sqlCtx <- sparkRSQL.init(sc)
1454+ # ' path <- "path/to/file.json"
1455+ # ' df <- jsonFile(sqlCtx, path)
1456+ # ' dropna(df)
1457+ # ' }
1458+ setMethod ("dropna ",
1459+           signature(x  =  " DataFrame" 
1460+           function (x , how  =  c(" any" " all" minNonNulls  =  NULL , cols  =  NULL ) {
1461+             how  <-  match.arg(how )
1462+             if  (is.null(cols )) {
1463+               cols  <-  columns(x )
1464+             }
1465+             if  (is.null(minNonNulls )) {
1466+               minNonNulls  <-  if  (how  ==  " any" cols ) } else  { 1  }
1467+             }
1468+             
1469+             naFunctions  <-  callJMethod(x @ sdf , " na" 
1470+             sdf  <-  callJMethod(naFunctions , " drop" 
1471+                                as.integer(minNonNulls ), listToSeq(as.list(cols )))
1472+             dataFrame(sdf )
1473+           })
1474+ 
1475+ # ' @aliases dropna
1476+ # ' @export
1477+ setMethod ("na.omit ",
1478+           signature(x  =  " DataFrame" 
1479+           function (x , how  =  c(" any" " all" minNonNulls  =  NULL , cols  =  NULL ) {
1480+             dropna(x , how , minNonNulls , cols )
1481+           })
1482+ 
1483+ # ' fillna
1484+ # '
1485+ # ' Replace null values.
1486+ # '
1487+ # ' @param x A SparkSQL DataFrame.
1488+ # ' @param value Value to replace null values with.
1489+ # '              Should be an integer, numeric, character or named list.
1490+ # '              If the value is a named list, then cols is ignored and
1491+ # '              value must be a mapping from column name (character) to 
1492+ # '              replacement value. The replacement value must be an
1493+ # '              integer, numeric or character.
1494+ # ' @param cols optional list of column names to consider.
1495+ # '             Columns specified in cols that do not have matching data
1496+ # '             type are ignored. For example, if value is a character, and 
1497+ # '             subset contains a non-character column, then the non-character
1498+ # '             column is simply ignored.
1499+ # ' @return A DataFrame
1500+ # ' 
1501+ # ' @rdname nafunctions
1502+ # ' @export
1503+ # ' @examples
1504+ # '\dontrun{
1505+ # ' sc <- sparkR.init()
1506+ # ' sqlCtx <- sparkRSQL.init(sc)
1507+ # ' path <- "path/to/file.json"
1508+ # ' df <- jsonFile(sqlCtx, path)
1509+ # ' fillna(df, 1)
1510+ # ' fillna(df, list("age" = 20, "name" = "unknown"))
1511+ # ' }
1512+ setMethod ("fillna ",
1513+           signature(x  =  " DataFrame" 
1514+           function (x , value , cols  =  NULL ) {
1515+             if  (! (class(value ) %in%  c(" integer" " numeric" " character" " list" 
1516+               stop(" value should be an integer, numeric, charactor or named list." 
1517+             }
1518+             
1519+             if  (class(value ) ==  " list" 
1520+               #  Check column names in the named list
1521+               colNames  <-  names(value )
1522+               if  (length(colNames ) ==  0  ||  ! all(colNames  !=  " " 
1523+                 stop(" value should be an a named list with each name being a column name." 
1524+               }
1525+               
1526+               #  Convert to the named list to an environment to be passed to JVM
1527+               valueMap  <-  new.env()
1528+               for  (col  in  colNames ) {
1529+                 #  Check each item in the named list is of valid type
1530+                 v  <-  value [[col ]]
1531+                 if  (! (class(v ) %in%  c(" integer" " numeric" " character" 
1532+                   stop(" Each item in value should be an integer, numeric or charactor." 
1533+                 }
1534+                 valueMap [[col ]] <-  v 
1535+               }
1536+               
1537+               #  When value is a named list, caller is expected not to pass in cols
1538+               if  (! is.null(cols )) {
1539+                 warning(" When value is a named list, cols is ignored!" 
1540+                 cols  <-  NULL 
1541+               }
1542+               
1543+               value  <-  valueMap 
1544+             } else  if  (is.integer(value )) {
1545+               #  Cast an integer to a numeric
1546+               value  <-  as.numeric(value )
1547+             }
1548+             
1549+             naFunctions  <-  callJMethod(x @ sdf , " na" 
1550+             sdf  <-  if  (length(cols ) ==  0 ) {
1551+               callJMethod(naFunctions , " fill" value )
1552+             } else  {
1553+               callJMethod(naFunctions , " fill" value , listToSeq(as.list(cols )))
1554+             }
1555+             dataFrame(sdf )
1556+           })
0 commit comments