[SPARK-13019] mainly re-organize java import

keypointt · keypointt · commit f945222ad1cf · 2016-02-21T14:09:58.000-08:00
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.examples.mllib;
 
+import java.util.Arrays;
+
 import org.apache.spark.SparkConf;
 // $example on$
 import org.apache.spark.api.java.JavaRDD;
@@ -27,8 +29,6 @@
 import org.apache.spark.mllib.linalg.Vectors;
 import org.apache.spark.mllib.stat.Statistics;
 // $example off$
-import java.util.Arrays;
-
 
 public class JavaCorrelationsExample {
   public static void main(String[] args) {
@@ -44,8 +44,8 @@ public static void main(String[] args) {
     JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
       Arrays.asList(new Double[]{11.0, 22.0, 33.0, 33.0, 555.0}));
 
-    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
-    // method is not specified, Pearson's method will be used by default.
+    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
+    // If a method is not specified, Pearson's method will be used by default.
     Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
     System.out.println("correlation is: " + correlation);
 
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.examples.mllib;
 
+import java.util.Arrays;
+
 import org.apache.spark.SparkConf;
 // $example on$
 import org.apache.spark.api.java.JavaRDD;
@@ -29,7 +31,6 @@
 import org.apache.spark.mllib.stat.Statistics;
 import org.apache.spark.mllib.stat.test.ChiSqTestResult;
 // $example off$
-import java.util.Arrays;
 
 public class JavaHypothesisTestingExample {
   public static void main(String[] args) {
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
@@ -17,14 +17,15 @@
 
 package org.apache.spark.examples.mllib;
 
+import java.util.Arrays;
+
 import org.apache.spark.SparkConf;
 // $example on$
 import org.apache.spark.api.java.JavaDoubleRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.mllib.stat.Statistics;
 import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
 // $example off$
-import java.util.Arrays;
 
 public class JavaHypothesisTestingKolmogorovSmirnovTestExample {
   public static void main(String[] args) {
@@ -35,8 +36,7 @@ public static void main(String[] args) {
     // $example on$
     JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
     KolmogorovSmirnovTestResult testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
-    // summary of the test including the p-value, test statistic,
-    // and null hypothesis
+    // summary of the test including the p-value, test statistic, and null hypothesis
     // if our p-value indicates significance, we can reject the null hypothesis
     System.out.println(testResult);
     // $example off$
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
@@ -17,14 +17,14 @@
 
 package org.apache.spark.examples.mllib;
 
+import java.util.Arrays;
+
 import org.apache.spark.SparkConf;
 // $example on$
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.mllib.stat.KernelDensity;
-import org.apache.spark.rdd.RDD;
 // $example off$
-import java.util.Arrays;
 
 public class JavaKernelDensityEstimationExample {
   public static void main(String[] args) {
@@ -36,11 +36,8 @@ public static void main(String[] args) {
     JavaRDD<Double> data = jsc.parallelize(
       Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0)); // an RDD of sample data
 
-    // Construct the density estimator with the sample data and a standard deviation for the Gaussian
-    // kernels
-    KernelDensity kd = new KernelDensity()
-      .setSample(data)
-      .setBandwidth(3.0);
+    // Construct the density estimator with the sample data and a standard deviation for the Gaussian kernels
+    KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);
 
     // Find density estimates for the given values
     double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
@@ -17,16 +17,22 @@
 
 package org.apache.spark.examples.mllib;
 
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.VoidFunction;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.HashMap;
+// $example on$
+import java.util.Map;
+// $example off$
+
 import scala.Tuple2;
+
+import org.apache.spark.api.java.function.VoidFunction;
 // $example on$
-import java.util.*;
-import org.apache.spark.api.java.JavaRDD;
+
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaSparkContext;
 // $example off$
+import org.apache.spark.SparkConf;
 
 public class JavaStratifiedSamplingExample {
   public static void main(String[] args) {
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
@@ -17,25 +17,23 @@
 
 package org.apache.spark.examples.mllib;
 
-import org.apache.spark.SparkConf;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.mllib.linalg.Vectors;
+import java.util.Arrays;
+
 // $example on$
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
 import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
 import org.apache.spark.mllib.stat.Statistics;
 // $example off$
-import java.util.Arrays;
-
+import org.apache.spark.SparkConf;
 
 public class JavaSummaryStatisticsExample {
   public static void main(String[] args) {
 
     SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample");
     JavaSparkContext jsc = new JavaSparkContext(conf);
-    SQLContext sqlContext = new SQLContext(jsc);
 
     // $example on$
     Vector v1 = Vectors.dense(1.0, 10.0, 100.0);
diff --git a/examples/src/main/python/mllib/correlations_example.py b/examples/src/main/python/mllib/correlations_example.py
@@ -18,23 +18,21 @@
 from __future__ import print_function
 
 from pyspark import SparkContext
-from pyspark.sql import SQLContext
 import numpy as np
-from pyspark.mllib.linalg import Vectors
 # $example on$
 from pyspark.mllib.stat import Statistics
 # $example off$
 
 if __name__ == "__main__":
-    # $example on$
     sc = SparkContext(appName="CorrelationsExample")  # SparkContext
 
+    # $example on$
     seriesX = sc.parallelize([1.0, 2.0, 3.0, 3.0, 5.0])  # a series
     # seriesY must have the same number of partitions and cardinality as seriesX
     seriesY = sc.parallelize([11.0, 22.0, 33.0, 33.0, 555.0])
 
-    # Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
-    # method is not specified, Pearson's method will be used by default.
+    # Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
+    # If a method is not specified, Pearson's method will be used by default.
     print(Statistics.corr(seriesX, seriesY, method="pearson"))
 
     v1 = np.array([1.0, 10.0, 100.0])
@@ -45,7 +43,6 @@
     # calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
     # If a method is not specified, Pearson's method will be used by default.
     print(Statistics.corr(data, method="pearson"))
-
     # $example off$
 
     sc.stop()
diff --git a/examples/src/main/python/mllib/hypothesis_testing_example.py b/examples/src/main/python/mllib/hypothesis_testing_example.py
@@ -18,8 +18,6 @@
 from __future__ import print_function
 
 from pyspark import SparkContext
-from pyspark.sql import SQLContext
-import numpy as np
 from pyspark.mllib.linalg import Vectors
 # $example on$
 from pyspark import SparkContext
@@ -30,7 +28,6 @@
 
 if __name__ == "__main__":
     sc = SparkContext(appName="HypothesisTestingExample")  # SparkContext
-    sqlContext = SQLContext(sc)
 
     # $example on$
     vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25)  # a vector composed of the frequencies of events
@@ -60,7 +57,6 @@
     for i, result in enumerate(featureTestResults):
         print("Column: " + str(i + 1))
         print(result)
-
     # $example off$
 
     sc.stop()
diff --git a/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py b/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py
@@ -18,28 +18,23 @@
 from __future__ import print_function
 
 from pyspark import SparkContext
-from pyspark.sql import SQLContext
-import numpy as np
-from pyspark.mllib.linalg import Vectors
 # $example on$
 from pyspark.mllib.stat import Statistics
 # $example off$
 
 if __name__ == "__main__":
     sc = SparkContext(appName="HypothesisTestingKolmogorovSmirnovTestExample")  # SparkContext
-    sqlContext = SQLContext(sc)
 
     # $example on$
     parallelData = sc.parallelize([0.1, 0.15, 0.2, 0.3, 0.25])
 
     # run a KS test for the sample versus a standard normal distribution
     testResult = Statistics.kolmogorovSmirnovTest(parallelData, "norm", 0, 1)
-    print(testResult)  # summary of the test including the p-value, test statistic,
-    # and null hypothesis
+    # summary of the test including the p-value, test statistic, and null hypothesis
     # if our p-value indicates significance, we can reject the null hypothesis
     # Note that the Scala functionality of calling Statistics.kolmogorovSmirnovTest with
     # a lambda to calculate the CDF is not made available in the Python API
-
+    print(testResult)
     # $example off$
 
     sc.stop()
diff --git a/examples/src/main/python/mllib/kernel_density_estimation_example.py b/examples/src/main/python/mllib/kernel_density_estimation_example.py
@@ -18,16 +18,12 @@
 from __future__ import print_function
 
 from pyspark import SparkContext
-from pyspark.sql import SQLContext
-import numpy as np
-from pyspark.mllib.linalg import Vectors
 # $example on$
 from pyspark.mllib.stat import KernelDensity
 # $example off$
 
 if __name__ == "__main__":
     sc = SparkContext(appName="KernelDensityEstimationExample")  # SparkContext
-    sqlContext = SQLContext(sc)
 
     # $example on$
     # an RDD of sample data
diff --git a/examples/src/main/python/mllib/random_data_generation_example.py b/examples/src/main/python/mllib/random_data_generation_example.py
@@ -18,16 +18,12 @@
 from __future__ import print_function
 
 from pyspark import SparkContext
-from pyspark.sql import SQLContext
-import numpy as np
-from pyspark.mllib.linalg import Vectors
 # $example on$
 from pyspark.mllib.random import RandomRDDs
 # $example off$
 
 if __name__ == "__main__":
     sc = SparkContext(appName="RandomDataGenerationExample")  # SparkContext
-    sqlContext = SQLContext(sc)
 
     # $example on$
     # Generate a random double RDD that contains 1 million i.i.d. values drawn from the
diff --git a/examples/src/main/python/mllib/stratified_sampling_example.py b/examples/src/main/python/mllib/stratified_sampling_example.py
@@ -18,16 +18,9 @@
 from __future__ import print_function
 
 from pyspark import SparkContext
-from pyspark.sql import SQLContext
-import numpy as np
-from pyspark.mllib.linalg import Vectors
-# $example on$
-from pyspark.mllib.stat import Statistics
-# $example off$
 
 if __name__ == "__main__":
     sc = SparkContext(appName="StratifiedSamplingExample")  # SparkContext
-    sqlContext = SQLContext(sc)
 
     # $example on$
     # an RDD of any key value pairs
@@ -37,7 +30,6 @@
     fractions = {1: 0.1, 2: 0.6, 3: 0.3}
 
     approxSample = data.sampleByKey(False, fractions)
-
     # $example off$
 
     for each in approxSample.collect():
diff --git a/examples/src/main/python/mllib/summary_statistics_example.py b/examples/src/main/python/mllib/summary_statistics_example.py
@@ -18,16 +18,13 @@
 from __future__ import print_function
 
 from pyspark import SparkContext
-from pyspark.sql import SQLContext
 import numpy as np
-from pyspark.mllib.linalg import Vectors
 # $example on$
 from pyspark.mllib.stat import Statistics
 # $example off$
 
 if __name__ == "__main__":
     sc = SparkContext(appName="SummaryStatisticsExample")  # SparkContext
-    sqlContext = SQLContext(sc)
 
     # $example on$
     v1 = np.array([1.0, 2.0, 3.0])
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala
@@ -29,7 +29,7 @@ object CorrelationsExample {
 
   def main(args: Array[String]) {
 
-    val conf = new SparkConf().setAppName("CorrelationsExample").setMaster("local[*]")
+    val conf = new SparkConf().setAppName("CorrelationsExample")
     val sc = new SparkContext(conf)
 
     // $example on$
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomDataGenerationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomDataGenerationExample.scala
@@ -37,6 +37,7 @@ object RandomDataGenerationExample {
     // Apply a transform to get a random double RDD following `N(1, 4)`.
     val v = u.map(x => 1.0 + 2.0 * x)
     // $example off$
+
     u.foreach(print)
     v.foreach(print)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
@@ -21,16 +21,13 @@ package org.apache.spark.examples.mllib
 // $example on$
 import org.apache.spark.{SparkConf, SparkContext}
 // $example off$
-import org.apache.spark.sql.SQLContext
-
 
 object StratifiedSamplingExample {
 
   def main(args: Array[String]) {
 
-    val conf = new SparkConf().setAppName("StratifiedSamplingExample").setMaster("local[*]")
+    val conf = new SparkConf().setAppName("StratifiedSamplingExample")
     val sc = new SparkContext(conf)
-    val sqlContext = new SQLContext(sc)
 
     // $example on$
     // an RDD[(K, V)] of any key value pairs
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala
@@ -28,7 +28,7 @@ object SummaryStatisticsExample {
 
   def main(args: Array[String]) {
 
-    val conf = new SparkConf().setAppName("SummaryStatisticsExample").setMaster("local[*]")
+    val conf = new SparkConf().setAppName("SummaryStatisticsExample")
     val sc = new SparkContext(conf)
 
     // $example on$