@@ -55,6 +55,91 @@ class PythonDStream[T: ClassTag](
5555 case None => None
5656 }
5757 }
58+ <<<<<<< HEAD
5859
5960 val asJavaDStream = JavaDStream .fromDStream(this )
6061}
62+ =======
63+ val asJavaDStream = JavaDStream .fromDStream(this )
64+
65+ /**
66+ * Print the first ten elements of each PythonRDD generated in this PythonDStream. This is an output
67+ * operator, so this PythonDStream will be registered as an output stream and there materialized.
68+ * Since serialized Python object is readable by Python, pyprint writes out binary data to
69+ * temporary file and run python script to deserialized and print the first ten elements
70+ */
71+ private [streaming] def ppyprint () {
72+ def foreachFunc = (rdd : RDD [Array [Byte ]], time : Time ) => {
73+ val iter = rdd.take(11 ).iterator
74+
75+ // make a temporary file
76+ val prefix = " spark"
77+ val suffix = " .tmp"
78+ val tempFile = File .createTempFile(prefix, suffix)
79+ val tempFileStream = new DataOutputStream (new FileOutputStream (tempFile.getAbsolutePath))
80+ // write out serialized python object
81+ PythonRDD .writeIteratorToStream(iter, tempFileStream)
82+ tempFileStream.close()
83+
84+ // This value has to be passed from python
85+ // val pythonExec = new ProcessBuilder().environment().get("PYSPARK_PYTHON")
86+ val sparkHome = new ProcessBuilder ().environment().get(" SPARK_HOME" )
87+ // val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/streaming/pyprint.py", tempFile.getAbsolutePath())) // why this fails to compile???
88+ // absolute path to the python script is needed to change because we do not use pysparkstreaming
89+ val pb = new ProcessBuilder (pythonExec, sparkHome + " /python/pysparkstreaming/streaming/pyprint.py" , tempFile.getAbsolutePath)
90+ val workerEnv = pb.environment()
91+
92+ // envVars also need to be pass
93+ // workerEnv.putAll(envVars)
94+ val pythonPath = sparkHome + " /python/" + File .pathSeparator + workerEnv.get(" PYTHONPATH" )
95+ workerEnv.put(" PYTHONPATH" , pythonPath)
96+ val worker = pb.start()
97+ val is = worker.getInputStream()
98+ val isr = new InputStreamReader (is)
99+ val br = new BufferedReader (isr)
100+
101+ println (" -------------------------------------------" )
102+ println (" Time: " + time)
103+ println (" -------------------------------------------" )
104+
105+ // print value from python std out
106+ var line = " "
107+ breakable {
108+ while (true ) {
109+ line = br.readLine()
110+ if (line == null ) break()
111+ println(line)
112+ }
113+ }
114+ // delete temporary file
115+ tempFile.delete()
116+ println()
117+
118+ }
119+ new ForEachDStream (this , context.sparkContext.clean(foreachFunc)).register()
120+ }
121+ }
122+
123+
124+ private class PairwiseDStream (prev: DStream [Array [Byte ]]) extends
125+ DStream [(Long , Array [Byte ])](prev.ssc){
126+ override def dependencies = List (prev)
127+
128+ override def slideDuration : Duration = prev.slideDuration
129+
130+ override def compute (validTime: Time ): Option [RDD [(Long , Array [Byte ])]]= {
131+ prev.getOrCompute(validTime) match {
132+ case Some (rdd)=> Some (rdd)
133+ val pairwiseRDD = new PairwiseRDD (rdd)
134+ Some (pairwiseRDD.asJavaPairRDD.rdd)
135+ case None => None
136+ }
137+ }
138+ val asJavaPairDStream : JavaPairDStream [Long , Array [Byte ]] = JavaPairDStream .fromJavaDStream(this )
139+ }
140+
141+
142+
143+
144+
145+ >>>>>>> added reducedByKey not working yet
0 commit comments