@@ -55,7 +55,6 @@ class PythonDStream[T: ClassTag](
5555      case  None  =>  None 
5656    }
5757  }
58- <<<<<<<  HEAD 
5958
6059  val  asJavaDStream   =  JavaDStream .fromDStream(this )
6160
@@ -134,87 +133,31 @@ DStream[(Long, Array[Byte])](prev.ssc){
134133  }
135134  val  asJavaPairDStream  :  JavaPairDStream [Long , Array [Byte ]]  =  JavaPairDStream .fromJavaDStream(this )
136135}
137- ======= 
138-   val  asJavaDStream   =  JavaDStream .fromDStream(this )
139- 
140-   /**  
141-    * Print the first ten elements of each PythonRDD generated in this PythonDStream. This is an output 
142-    * operator, so this PythonDStream will be registered as an output stream and there materialized. 
143-    * Since serialized Python object is readable by Python, pyprint writes out binary data to 
144-    * temporary file and run python script to deserialized and print the first ten elements 
145-    */  
146-   private [streaming] def  ppyprint () {
147-     def  foreachFunc  =  (rdd : RDD [Array [Byte ]], time : Time ) =>  {
148-       val  iter  =  rdd.take(11 ).iterator
149- 
150-       //  make a temporary file
151-       val  prefix  =  " spark" 
152-       val  suffix  =  " .tmp" 
153-       val  tempFile  =  File .createTempFile(prefix, suffix)
154-       val  tempFileStream  =  new  DataOutputStream (new  FileOutputStream (tempFile.getAbsolutePath))
155-       // write out serialized python object
156-       PythonRDD .writeIteratorToStream(iter, tempFileStream)
157-       tempFileStream.close()
158- 
159-       //  This value has to be passed from python
160-       // val pythonExec = new ProcessBuilder().environment().get("PYSPARK_PYTHON")
161-       val  sparkHome  =  new  ProcessBuilder ().environment().get(" SPARK_HOME" 
162-       // val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/streaming/pyprint.py", tempFile.getAbsolutePath())) // why this fails to compile???
163-       // absolute path to the python script is needed to change because we do not use pysparkstreaming
164-       val  pb  =  new  ProcessBuilder (pythonExec, sparkHome +  " /python/pysparkstreaming/streaming/pyprint.py" 
165-       val  workerEnv  =  pb.environment()
166- 
167-       // envVars also need to be pass
168-       // workerEnv.putAll(envVars)
169-       val  pythonPath  =  sparkHome +  " /python/" +  File .pathSeparator +  workerEnv.get(" PYTHONPATH" 
170-       workerEnv.put(" PYTHONPATH" 
171-       val  worker  =  pb.start()
172-       val  is  =  worker.getInputStream()
173-       val  isr  =  new  InputStreamReader (is)
174-       val  br  =  new  BufferedReader (isr)
175136
176-       println (" -------------------------------------------" 
177-       println (" Time: " +  time)
178-       println (" -------------------------------------------" 
179- 
180-       // print value from python std out
181-       var  line  =  " " 
182-       breakable {
183-         while  (true ) {
184-           line =  br.readLine()
185-           if  (line ==  null ) break()
186-           println(line)
187-         }
188-       }
189-       // delete temporary file
190-       tempFile.delete()
191-       println()
192137
193-     }
194-     new  ForEachDStream (this , context.sparkContext.clean(foreachFunc)).register()
195-   }
196- }
197- 
198- 
199- private  class  PairwiseDStream (prev: DStream [Array [Byte ]]) extends 
200- DStream [(Long , Array [Byte ])](prev.ssc){
138+ private  class  PairwiseDStream (prev: DStream [Array [Byte ]], partitioner : Partitioner ) extends 
139+ DStream [Array [Byte ]](prev.ssc){
201140  override  def  dependencies  =  List (prev)
202141
203142  override  def  slideDuration :  Duration  =  prev.slideDuration
204143
205-   override  def  compute (validTime: Time ): Option [RDD [( Long ,  Array [Byte ]) ]]= {
144+   override  def  compute (validTime: Time ): Option [RDD [Array [Byte ]]]= {
206145    prev.getOrCompute(validTime) match {
207146      case  Some (rdd)=> Some (rdd)
208147        val  pairwiseRDD  =  new  PairwiseRDD (rdd)
209-         Some (pairwiseRDD.asJavaPairRDD.rdd)
148+         /* 
149+          * This is equivalent to following python code 
150+          * with _JavaStackTrace(self.context) as st: 
151+          *    pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD() 
152+          *    partitioner = self.ctx._jvm.PythonPartitioner(numPartitions, 
153+          *                                                  id(partitionFunc)) 
154+          * jrdd = pairRDD.partitionBy(partitioner).values() 
155+          * rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer)) 
156+          */  
157+         Some (pairwiseRDD.asJavaPairRDD.partitionBy(partitioner).values().rdd)
210158      case  None  =>  None 
211159    }
212160  }
213-   val  asJavaPairDStream  :  JavaPairDStream [Long , Array [Byte ]]  =  JavaPairDStream .fromJavaDStream(this )
161+   val  asJavaDStream   =  JavaDStream .fromDStream(this )
162+   // val asJavaPairDStream : JavaPairDStream[Long, Array[Byte]]  = JavaPairDStream.fromJavaDStream(this)
214163}
215- 
216- 
217- 
218- 
219- 
220- >>>>>>>  added reducedByKey not working yet
0 commit comments