@@ -142,49 +142,9 @@ def stop(self, stopSparkContext=True, stopGraceFully=False):
142142
143143 def _testInputStream (self , test_inputs , numSlices = None ):
144144 """
145- <<<<<<< HEAD
146145 This function is only for test.
147146 This implementation is inspired by QueStream implementation.
148147 Give list of RDD to generate DStream which contains the RDD.
149- =======
150- Generate multiple files to make "stream" in Scala side for test.
151- Scala chooses one of the files and generates RDD using PythonRDD.readRDDFromFile.
152-
153- QueStream maybe good way to implement this function
154- """
155- numSlices = numSlices or self ._sc .defaultParallelism
156- # Calling the Java parallelize() method with an ArrayList is too slow,
157- # because it sends O(n) Py4J commands. As an alternative, serialized
158- # objects are written to a file and loaded through textFile().
159-
160- tempFiles = list ()
161- for test_input in test_inputs :
162- tempFile = NamedTemporaryFile (delete = False , dir = self ._sc ._temp_dir )
163-
164- # Make sure we distribute data evenly if it's smaller than self.batchSize
165- if "__len__" not in dir (test_input ):
166- test_input = list (test_input ) # Make it a list so we can compute its length
167- batchSize = min (len (test_input ) // numSlices , self ._sc ._batchSize )
168- if batchSize > 1 :
169- serializer = BatchedSerializer (self ._sc ._unbatched_serializer ,
170- batchSize )
171- else :
172- serializer = self ._sc ._unbatched_serializer
173- serializer .dump_stream (test_input , tempFile )
174- tempFile .close ()
175- tempFiles .append (tempFile .name )
176-
177- jtempFiles = ListConverter ().convert (tempFiles , SparkContext ._gateway ._gateway_client )
178- jinput_stream = self ._jvm .PythonTestInputStream (self ._jssc ,
179- jtempFiles ,
180- numSlices ).asJavaDStream ()
181- return DStream (jinput_stream , self , BatchedSerializer (PickleSerializer ()))
182-
183- def _testInputStream2 (self , test_inputs , numSlices = None ):
184- """
185- This is inpired by QueStream implementation. Give list of RDD and generate DStream
186- which contain the RDD.
187- >>>>>>> broke something
188148 """
189149 test_rdds = list ()
190150 test_rdd_deserializers = list ()
@@ -196,10 +156,4 @@ def _testInputStream2(self, test_inputs, numSlices=None):
196156 jtest_rdds = ListConverter ().convert (test_rdds , SparkContext ._gateway ._gateway_client )
197157 jinput_stream = self ._jvm .PythonTestInputStream (self ._jssc , jtest_rdds ).asJavaDStream ()
198158
199- < << << << HEAD
200159 return DStream (jinput_stream , self , test_rdd_deserializers [0 ])
201- == == == =
202- dstream = DStream (jinput_stream , self , test_rdd_deserializers [0 ])
203- dstream ._test_switch_dserializer (test_rdd_deserializers )
204- return dstream
205- >> >> >> > broke something
0 commit comments