@@ -758,6 +758,32 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
758758 rdd.saveAsHadoopDataset(conf)
759759 }
760760
761+ /**
762+ * Repartition the RDD according to the given partitioner and, within each resulting partition,
763+ * sort records by their keys.
764+ *
765+ * This is more efficient than calling `repartition` and then sorting within each partition
766+ * because it can push the sorting down into the shuffle machinery.
767+ */
768+ def repartitionAndSortWithinPartition (partitioner : Partitioner ): JavaPairRDD [K , V ] = {
769+ val comp = com.google.common.collect.Ordering .natural().asInstanceOf [Comparator [K ]]
770+ repartitionAndSortWithinPartition(partitioner, comp)
771+ }
772+
773+ /**
774+ * Repartition the RDD according to the given partitioner and, within each resulting partition,
775+ * sort records by their keys.
776+ *
777+ * This is more efficient than calling `repartition` and then sorting within each partition
778+ * because it can push the sorting down into the shuffle machinery.
779+ */
780+ def repartitionAndSortWithinPartition (partitioner : Partitioner , comp : Comparator )
781+ : JavaPairRDD [K , V ] = {
782+ implicit val ordering = comp // Allow implicit conversion of Comparator to Ordering.
783+ fromRDD(
784+ new OrderedRDDFunctions [K , V , (K , V )](rdd).repartitionAndSortWithinPartition(partitioner))
785+ }
786+
761787 /**
762788 * Sort the RDD by key, so that each partition contains a sorted range of the elements in
763789 * ascending order. Calling `collect` or `save` on the resulting RDD will return or output an
0 commit comments