@@ -2004,20 +2004,20 @@ def keyBy(self, f):
20042004
20052005 def repartition (self , numPartitions ):
20062006 """
2007- Return a new RDD that has exactly numPartitions partitions.
2008-
2009- Can increase or decrease the level of parallelism in this RDD.
2010- Internally, this uses a shuffle to redistribute data.
2011- If you are decreasing the number of partitions in this RDD, consider
2012- using `coalesce`, which can avoid performing a shuffle.
2013-
2014- >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4)
2015- >>> sorted(rdd.glom().collect())
2016- [[1], [2, 3], [4, 5], [6, 7]]
2017- >>> len(rdd.repartition(2).glom().collect())
2018- 2
2019- >>> len(rdd.repartition(10).glom().collect())
2020- 10
2007+ Return a new RDD that has exactly numPartitions partitions.
2008+
2009+ Can increase or decrease the level of parallelism in this RDD.
2010+ Internally, this uses a shuffle to redistribute data.
2011+ If you are decreasing the number of partitions in this RDD, consider
2012+ using `coalesce`, which can avoid performing a shuffle.
2013+
2014+ >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4)
2015+ >>> sorted(rdd.glom().collect())
2016+ [[1], [2, 3], [4, 5], [6, 7]]
2017+ >>> len(rdd.repartition(2).glom().collect())
2018+ 2
2019+ >>> len(rdd.repartition(10).glom().collect())
2020+ 10
20212021 """
20222022 jrdd = self ._jrdd .repartition (numPartitions )
20232023 return RDD (jrdd , self .ctx , self ._jrdd_deserializer )
0 commit comments