Skip to content

Commit 28395ff

Browse files
committed
Add isEmpty to Java, Python
1 parent 7dd04b7 commit 28395ff

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,12 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
435435
*/
436436
def first(): T = rdd.first()
437437

438+
/**
439+
* @return true if and only if the RDD contains no elements at all. Note that an RDD
440+
* may be empty even when it has at least 1 partition.
441+
*/
442+
def isEmpty(): Boolean = rdd.isEmpty()
443+
438444
/**
439445
* Save this RDD as a text file, using string representations of elements.
440446
*/

core/src/test/java/org/apache/spark/JavaAPISuite.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,13 @@ public void take() {
606606
rdd.takeSample(false, 2, 42);
607607
}
608608

609+
@Test
610+
public void isEmpty() {
611+
Assert.assertTrue(sc.emptyRDD().isEmpty());
612+
Assert.assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
613+
Assert.assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
614+
}
615+
609616
@Test
610617
public void cartesian() {
611618
JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));

python/pyspark/rdd.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,18 @@ def first(self):
11301130
return rs[0]
11311131
raise ValueError("RDD is empty")
11321132

1133+
def isEmpty(self):
1134+
"""
1135+
Returns true if and only if the RDD contains no elements at all. Note that an RDD
1136+
may be empty even when it has at least 1 partition.
1137+
1138+
>>> sc.parallelize([]).isEmpty()
1139+
True
1140+
>>> sc.parallelize([1]).isEmpty()
1141+
False
1142+
"""
1143+
return self._jrdd.partitions().size() == 0 or len(self.take(1)) == 0
1144+
11331145
def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
11341146
"""
11351147
Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file

0 commit comments

Comments
 (0)