@@ -55,7 +55,6 @@ class PythonDStream[T: ClassTag](
5555 case None => None
5656 }
5757 }
58- <<<<<<< HEAD
5958
6059 val asJavaDStream = JavaDStream .fromDStream(this )
6160
@@ -134,87 +133,31 @@ DStream[(Long, Array[Byte])](prev.ssc){
134133 }
135134 val asJavaPairDStream : JavaPairDStream [Long , Array [Byte ]] = JavaPairDStream .fromJavaDStream(this )
136135}
137- =======
138- val asJavaDStream = JavaDStream .fromDStream(this )
139-
140- /**
141- * Print the first ten elements of each PythonRDD generated in this PythonDStream. This is an output
142- * operator, so this PythonDStream will be registered as an output stream and there materialized.
143- * Since serialized Python object is readable by Python, pyprint writes out binary data to
144- * temporary file and run python script to deserialized and print the first ten elements
145- */
146- private [streaming] def ppyprint () {
147- def foreachFunc = (rdd : RDD [Array [Byte ]], time : Time ) => {
148- val iter = rdd.take(11 ).iterator
149-
150- // make a temporary file
151- val prefix = " spark"
152- val suffix = " .tmp"
153- val tempFile = File .createTempFile(prefix, suffix)
154- val tempFileStream = new DataOutputStream (new FileOutputStream (tempFile.getAbsolutePath))
155- // write out serialized python object
156- PythonRDD .writeIteratorToStream(iter, tempFileStream)
157- tempFileStream.close()
158-
159- // This value has to be passed from python
160- // val pythonExec = new ProcessBuilder().environment().get("PYSPARK_PYTHON")
161- val sparkHome = new ProcessBuilder ().environment().get(" SPARK_HOME" )
162- // val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/streaming/pyprint.py", tempFile.getAbsolutePath())) // why this fails to compile???
163- // absolute path to the python script is needed to change because we do not use pysparkstreaming
164- val pb = new ProcessBuilder (pythonExec, sparkHome + " /python/pysparkstreaming/streaming/pyprint.py" , tempFile.getAbsolutePath)
165- val workerEnv = pb.environment()
166-
167- // envVars also need to be pass
168- // workerEnv.putAll(envVars)
169- val pythonPath = sparkHome + " /python/" + File .pathSeparator + workerEnv.get(" PYTHONPATH" )
170- workerEnv.put(" PYTHONPATH" , pythonPath)
171- val worker = pb.start()
172- val is = worker.getInputStream()
173- val isr = new InputStreamReader (is)
174- val br = new BufferedReader (isr)
175136
176- println (" -------------------------------------------" )
177- println (" Time: " + time)
178- println (" -------------------------------------------" )
179-
180- // print value from python std out
181- var line = " "
182- breakable {
183- while (true ) {
184- line = br.readLine()
185- if (line == null ) break()
186- println(line)
187- }
188- }
189- // delete temporary file
190- tempFile.delete()
191- println()
192137
193- }
194- new ForEachDStream (this , context.sparkContext.clean(foreachFunc)).register()
195- }
196- }
197-
198-
199- private class PairwiseDStream (prev: DStream [Array [Byte ]]) extends
200- DStream [(Long , Array [Byte ])](prev.ssc){
138+ private class PairwiseDStream (prev: DStream [Array [Byte ]], partitioner : Partitioner ) extends
139+ DStream [Array [Byte ]](prev.ssc){
201140 override def dependencies = List (prev)
202141
203142 override def slideDuration : Duration = prev.slideDuration
204143
205- override def compute (validTime: Time ): Option [RDD [( Long , Array [Byte ]) ]]= {
144+ override def compute (validTime: Time ): Option [RDD [Array [Byte ]]]= {
206145 prev.getOrCompute(validTime) match {
207146 case Some (rdd)=> Some (rdd)
208147 val pairwiseRDD = new PairwiseRDD (rdd)
209- Some (pairwiseRDD.asJavaPairRDD.rdd)
148+ /*
149+ * This is equivalent to following python code
150+ * with _JavaStackTrace(self.context) as st:
151+ * pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
152+ * partitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
153+ * id(partitionFunc))
154+ * jrdd = pairRDD.partitionBy(partitioner).values()
155+ * rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
156+ */
157+ Some (pairwiseRDD.asJavaPairRDD.partitionBy(partitioner).values().rdd)
210158 case None => None
211159 }
212160 }
213- val asJavaPairDStream : JavaPairDStream [Long , Array [Byte ]] = JavaPairDStream .fromJavaDStream(this )
161+ val asJavaDStream = JavaDStream .fromDStream(this )
162+ // val asJavaPairDStream : JavaPairDStream[Long, Array[Byte]] = JavaPairDStream.fromJavaDStream(this)
214163}
215-
216-
217-
218-
219-
220- >>>>>>> added reducedByKey not working yet
0 commit comments