apache · gliptak · Nov 14, 2015 · Jan 25, 2016 · Feb 11, 2016 · Feb 12, 2016
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
@@ -15,77 +15,6 @@
 # limitations under the License.
 #
 
-"""
->>> from pyspark.context import SparkContext
->>> sc = SparkContext('local', 'test')
->>> a = sc.accumulator(1)
->>> a.value
-1
->>> a.value = 2
->>> a.value
-2
->>> a += 5
->>> a.value
-7
-
->>> sc.accumulator(1.0).value
-1.0
-
->>> sc.accumulator(1j).value
-1j
-
->>> rdd = sc.parallelize([1,2,3])
->>> def f(x):
-...     global a
-...     a += x
->>> rdd.foreach(f)
->>> a.value
-13
-
->>> b = sc.accumulator(0)
->>> def g(x):
-...     b.add(x)
->>> rdd.foreach(g)
->>> b.value
-6
-
->>> from pyspark.accumulators import AccumulatorParam
->>> class VectorAccumulatorParam(AccumulatorParam):
-...     def zero(self, value):
-...         return [0.0] * len(value)
-...     def addInPlace(self, val1, val2):
-...         for i in range(len(val1)):
-...              val1[i] += val2[i]
-...         return val1
->>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
->>> va.value
-[1.0, 2.0, 3.0]
->>> def g(x):
-...     global va
-...     va += [x] * 3
->>> rdd.foreach(g)
->>> va.value
-[7.0, 8.0, 9.0]
-
->>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-    ...
-Py4JJavaError:...
-
->>> def h(x):
-...     global a
-...     a.value = 7
->>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-    ...
-Py4JJavaError:...
-
->>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-    ...
-TypeError:...
-"""
-
 import sys
 import select
 import struct
@@ -117,6 +46,76 @@ def _deserialize_accumulator(aid, zero_value, accum_param):
 
 
 class Accumulator(object):
+    """
+    >>> from pyspark.context import SparkContext
+    >>> sc = SparkContext('local', 'test')
+    >>> a = sc.accumulator(1)
+    >>> a.value
+    1
+    >>> a.value = 2
+    >>> a.value
+    2
+    >>> a += 5
+    >>> a.value
+    7
+
+    >>> sc.accumulator(1.0).value
+    1.0
+
+    >>> sc.accumulator(1j).value
+    1j
+
+    >>> rdd = sc.parallelize([1,2,3])
+    >>> def f(x):
+    ...     global a
+    ...     a += x
+    >>> rdd.foreach(f)
+    >>> a.value
+    13
+
+    >>> b = sc.accumulator(0)
+    >>> def g(x):
+    ...     b.add(x)
+    >>> rdd.foreach(g)
+    >>> b.value
+    6
+
+    >>> from pyspark.accumulators import AccumulatorParam
+    >>> class VectorAccumulatorParam(AccumulatorParam):
+    ...     def zero(self, value):
+    ...         return [0.0] * len(value)
+    ...     def addInPlace(self, val1, val2):
+    ...         for i in range(len(val1)):
+    ...              val1[i] += val2[i]
+    ...         return val1
+    >>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
+    >>> va.value
+    [1.0, 2.0, 3.0]
+    >>> def g(x):
+    ...     global va
+    ...     va += [x] * 3
+    >>> rdd.foreach(g)
+    >>> va.value
+    [7.0, 8.0, 9.0]
+
+    >>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    Py4JJavaError:...
+
+    >>> def h(x):
+    ...     global a
+    ...     a.value = 7
+    >>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    Py4JJavaError:...
+
+    >>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    TypeError:...
+    """
 
     """
     A shared variable that can be accumulated, i.e., has a commutative and associative "add"
@@ -263,7 +262,7 @@ def _start_update_server():
     return server
 
 if __name__ == "__main__":
-    import doctest
-    (failure_count, test_count) = doctest.testmod()
-    if failure_count:
+    from pyspark.doctesthelper import run_doctests
+    result = run_doctests(__file__)
+    if not result.wasSuccessful():
         exit(-1)
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
@@ -114,7 +114,7 @@ def __reduce__(self):
 
 
 if __name__ == "__main__":
-    import doctest
-    (failure_count, test_count) = doctest.testmod()
-    if failure_count:
+    from pyspark.doctesthelper import run_doctests
+    result = run_doctests(__file__)
+    if not result.wasSuccessful():
         exit(-1)
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
@@ -15,45 +15,6 @@
 # limitations under the License.
 #
 
-"""
->>> from pyspark.conf import SparkConf
->>> from pyspark.context import SparkContext
->>> conf = SparkConf()
->>> conf.setMaster("local").setAppName("My app")
-<pyspark.conf.SparkConf object at ...>
->>> conf.get("spark.master")
-u'local'
->>> conf.get("spark.app.name")
-u'My app'
->>> sc = SparkContext(conf=conf)
->>> sc.master
-u'local'
->>> sc.appName
-u'My app'
->>> sc.sparkHome is None
-True
-
->>> conf = SparkConf(loadDefaults=False)
->>> conf.setSparkHome("/path")
-<pyspark.conf.SparkConf object at ...>
->>> conf.get("spark.home")
-u'/path'
->>> conf.setExecutorEnv("VAR1", "value1")
-<pyspark.conf.SparkConf object at ...>
->>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")])
-<pyspark.conf.SparkConf object at ...>
->>> conf.get("spark.executorEnv.VAR1")
-u'value1'
->>> print(conf.toDebugString())
-spark.executorEnv.VAR1=value1
-spark.executorEnv.VAR3=value3
-spark.executorEnv.VAR4=value4
-spark.home=/path
->>> sorted(conf.getAll(), key=lambda p: p[0])
-[(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), \
-(u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')]
-"""
-
 __all__ = ['SparkConf']
 
 import sys
@@ -86,6 +47,45 @@ class SparkConf(object):
     and can no longer be modified by the user.
     """
 
+    """
+    >>> from pyspark.conf import SparkConf
+    >>> from pyspark.context import SparkContext
+    >>> conf = SparkConf()
+    >>> conf.setMaster("local").setAppName("My app")
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.get("spark.master")
+    u'local'
+    >>> conf.get("spark.app.name")
+    u'My app'
+    >>> sc = SparkContext(conf=conf)
+    >>> sc.master
+    u'local'
+    >>> sc.appName
+    u'My app'
+    >>> sc.sparkHome is None
+    True
+
+    >>> conf = SparkConf(loadDefaults=False)
+    >>> conf.setSparkHome("/path")
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.get("spark.home")
+    u'/path'
+    >>> conf.setExecutorEnv("VAR1", "value1")
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")])
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.get("spark.executorEnv.VAR1")
+    u'value1'
+    >>> print(conf.toDebugString())
+    spark.executorEnv.VAR1=value1
+    spark.executorEnv.VAR3=value3
+    spark.executorEnv.VAR4=value4
+    spark.home=/path
+    >>> sorted(conf.getAll(), key=lambda p: p[0])
+    [(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), \
+    (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')]
+    """
+
     def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
         """
         Create a new Spark configuration.
@@ -182,8 +182,9 @@ def toDebugString(self):
 
 def _test():
     import doctest
-    (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
-    if failure_count:
+    from pyspark.doctesthelper import run_doctests
+    result = run_doctests(__file__, optionflags=doctest.ELLIPSIS)
+    if not result.wasSuccessful():
         exit(-1)
 
 

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
@@ -951,15 +951,17 @@ def dump_profiles(self, path):
 
 def _test():
     import atexit
+    from pyspark.doctesthelper import run_doctests
     import doctest
     import tempfile
     globs = globals().copy()
     globs['sc'] = SparkContext('local[4]', 'PythonTest')
     globs['tempdir'] = tempfile.mkdtemp()
     atexit.register(lambda: shutil.rmtree(globs['tempdir']))
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    result = run_doctests(__file__, globs=globs,
+                          optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
-    if failure_count:
+    if not result.wasSuccessful():
         exit(-1)
 
 

diff --git a/python/pyspark/doctesthelper.py b/python/pyspark/doctesthelper.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+import doctest
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+
+def run_doctests(file_name, globs={}, optionflags=0):
+    t = doctest.DocFileSuite(file_name, module_relative=False,
+                             globs=globs, optionflags=optionflags)
+    if xmlrunner:
+        return xmlrunner.XMLTestRunner(output='target/test-reports',
+                                       verbosity=3).run(t)
+    else:
+        return unittest.TextTestRunner(verbosity=3).run(t)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
@@ -16,6 +16,7 @@
 #
 
 import warnings
+import sys
 
 from pyspark import since
 from pyspark.ml.util import *
@@ -878,26 +879,28 @@ def weights(self):
 if __name__ == "__main__":
     import doctest
     import pyspark.ml.classification
+    from pyspark.doctesthelper import run_doctests
     from pyspark.context import SparkContext
     from pyspark.sql import SQLContext
+    import tempfile
     globs = pyspark.ml.classification.__dict__.copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
     sc = SparkContext("local[2]", "ml.classification tests")
     sqlContext = SQLContext(sc)
     globs['sc'] = sc
     globs['sqlContext'] = sqlContext
-    import tempfile
     temp_path = tempfile.mkdtemp()
     globs['temp_path'] = temp_path
     try:
-        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        result = run_doctests(__file__, globs=globs,
+                              optionflags=doctest.ELLIPSIS)
         sc.stop()
     finally:
         from shutil import rmtree
         try:
             rmtree(temp_path)
         except OSError:
             pass
-    if failure_count:
+    if not result.wasSuccessful():
         exit(-1)