Skip to content

Commit 1fbcb6e

Browse files
wyaronsarutak
authored andcommitted
[SPARK-12517] add default RDD name for one created via sc.textFile
The feature was first added at commit: 7b877b2 but was later removed (probably by mistake) at commit: fc8b581. This change sets the default path of RDDs created via sc.textFile(...) to the path argument. Here is the symptom: * Using spark-1.5.2-bin-hadoop2.6: scala> sc.textFile("/home/root/.bashrc").name res5: String = null scala> sc.binaryFiles("/home/root/.bashrc").name res6: String = /home/root/.bashrc * while using Spark 1.3.1: scala> sc.textFile("/home/root/.bashrc").name res0: String = /home/root/.bashrc scala> sc.binaryFiles("/home/root/.bashrc").name res1: String = /home/root/.bashrc Author: Yaron Weinsberg <[email protected]> Author: yaron <[email protected]> Closes #10456 from wyaron/master. (cherry picked from commit 73b70f0) Signed-off-by: Kousuke Saruta <[email protected]>
1 parent b8da77e commit 1fbcb6e

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
830830
minPartitions: Int = defaultMinPartitions): RDD[String] = withScope {
831831
assertNotStopped()
832832
hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
833-
minPartitions).map(pair => pair._2.toString)
833+
minPartitions).map(pair => pair._2.toString).setName(path)
834834
}
835835

836836
/**
@@ -879,7 +879,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
879879
classOf[Text],
880880
classOf[Text],
881881
updateConf,
882-
minPartitions).setName(path).map(record => (record._1.toString, record._2.toString))
882+
minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path)
883883
}
884884

885885
/**

core/src/test/scala/org/apache/spark/SparkContextSuite.scala

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,31 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext {
274274
}
275275
}
276276

277+
test("Default path for file based RDDs is properly set (SPARK-12517)") {
278+
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
279+
280+
// Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and
281+
// newAPIHadoopFile for setting the default path as the RDD name
282+
val mockPath = "default/path/for/"
283+
284+
var targetPath = mockPath + "textFile"
285+
assert(sc.textFile(targetPath).name === targetPath)
286+
287+
targetPath = mockPath + "wholeTextFiles"
288+
assert(sc.wholeTextFiles(targetPath).name === targetPath)
289+
290+
targetPath = mockPath + "binaryFiles"
291+
assert(sc.binaryFiles(targetPath).name === targetPath)
292+
293+
targetPath = mockPath + "hadoopFile"
294+
assert(sc.hadoopFile(targetPath).name === targetPath)
295+
296+
targetPath = mockPath + "newAPIHadoopFile"
297+
assert(sc.newAPIHadoopFile(targetPath).name === targetPath)
298+
299+
sc.stop()
300+
}
301+
277302
test("calling multiple sc.stop() must not throw any exception") {
278303
noException should be thrownBy {
279304
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))

0 commit comments

Comments
 (0)