Skip to content

Commit 73b70f0

Browse files
wyaronsarutak
authored andcommitted
[SPARK-12517] add default RDD name for one created via sc.textFile
The feature was first added at commit: 7b877b2 but was later removed (probably by mistake) at commit: fc8b581. This change sets the default path of RDDs created via sc.textFile(...) to the path argument. Here is the symptom: * Using spark-1.5.2-bin-hadoop2.6: scala> sc.textFile("/home/root/.bashrc").name res5: String = null scala> sc.binaryFiles("/home/root/.bashrc").name res6: String = /home/root/.bashrc * while using Spark 1.3.1: scala> sc.textFile("/home/root/.bashrc").name res0: String = /home/root/.bashrc scala> sc.binaryFiles("/home/root/.bashrc").name res1: String = /home/root/.bashrc Author: Yaron Weinsberg <[email protected]> Author: yaron <[email protected]> Closes #10456 from wyaron/master.
1 parent fd50df4 commit 73b70f0

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
836836
minPartitions: Int = defaultMinPartitions): RDD[String] = withScope {
837837
assertNotStopped()
838838
hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
839-
minPartitions).map(pair => pair._2.toString)
839+
minPartitions).map(pair => pair._2.toString).setName(path)
840840
}
841841

842842
/**
@@ -885,7 +885,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
885885
classOf[Text],
886886
classOf[Text],
887887
updateConf,
888-
minPartitions).setName(path).map(record => (record._1.toString, record._2.toString))
888+
minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path)
889889
}
890890

891891
/**

core/src/test/scala/org/apache/spark/SparkContextSuite.scala

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,31 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext {
274274
}
275275
}
276276

277+
test("Default path for file based RDDs is properly set (SPARK-12517)") {
278+
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
279+
280+
// Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and
281+
// newAPIHadoopFile for setting the default path as the RDD name
282+
val mockPath = "default/path/for/"
283+
284+
var targetPath = mockPath + "textFile"
285+
assert(sc.textFile(targetPath).name === targetPath)
286+
287+
targetPath = mockPath + "wholeTextFiles"
288+
assert(sc.wholeTextFiles(targetPath).name === targetPath)
289+
290+
targetPath = mockPath + "binaryFiles"
291+
assert(sc.binaryFiles(targetPath).name === targetPath)
292+
293+
targetPath = mockPath + "hadoopFile"
294+
assert(sc.hadoopFile(targetPath).name === targetPath)
295+
296+
targetPath = mockPath + "newAPIHadoopFile"
297+
assert(sc.newAPIHadoopFile(targetPath).name === targetPath)
298+
299+
sc.stop()
300+
}
301+
277302
test("calling multiple sc.stop() must not throw any exception") {
278303
noException should be thrownBy {
279304
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))

0 commit comments

Comments
 (0)