Skip to content

Commit f945222

Browse files
committed
[SPARK-13019] mainly re-organize java import
1 parent d817d0b commit f945222

17 files changed

+37
-68
lines changed

examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.examples.mllib;
1919

20+
import java.util.Arrays;
21+
2022
import org.apache.spark.SparkConf;
2123
// $example on$
2224
import org.apache.spark.api.java.JavaRDD;
@@ -27,8 +29,6 @@
2729
import org.apache.spark.mllib.linalg.Vectors;
2830
import org.apache.spark.mllib.stat.Statistics;
2931
// $example off$
30-
import java.util.Arrays;
31-
3232

3333
public class JavaCorrelationsExample {
3434
public static void main(String[] args) {
@@ -44,8 +44,8 @@ public static void main(String[] args) {
4444
JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
4545
Arrays.asList(new Double[]{11.0, 22.0, 33.0, 33.0, 555.0}));
4646

47-
// compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
48-
// method is not specified, Pearson's method will be used by default.
47+
// compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
48+
// If a method is not specified, Pearson's method will be used by default.
4949
Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
5050
System.out.println("correlation is: " + correlation);
5151

examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.examples.mllib;
1919

20+
import java.util.Arrays;
21+
2022
import org.apache.spark.SparkConf;
2123
// $example on$
2224
import org.apache.spark.api.java.JavaRDD;
@@ -29,7 +31,6 @@
2931
import org.apache.spark.mllib.stat.Statistics;
3032
import org.apache.spark.mllib.stat.test.ChiSqTestResult;
3133
// $example off$
32-
import java.util.Arrays;
3334

3435
public class JavaHypothesisTestingExample {
3536
public static void main(String[] args) {

examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717

1818
package org.apache.spark.examples.mllib;
1919

20+
import java.util.Arrays;
21+
2022
import org.apache.spark.SparkConf;
2123
// $example on$
2224
import org.apache.spark.api.java.JavaDoubleRDD;
2325
import org.apache.spark.api.java.JavaSparkContext;
2426
import org.apache.spark.mllib.stat.Statistics;
2527
import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
2628
// $example off$
27-
import java.util.Arrays;
2829

2930
public class JavaHypothesisTestingKolmogorovSmirnovTestExample {
3031
public static void main(String[] args) {
@@ -35,8 +36,7 @@ public static void main(String[] args) {
3536
// $example on$
3637
JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
3738
KolmogorovSmirnovTestResult testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
38-
// summary of the test including the p-value, test statistic,
39-
// and null hypothesis
39+
// summary of the test including the p-value, test statistic, and null hypothesis
4040
// if our p-value indicates significance, we can reject the null hypothesis
4141
System.out.println(testResult);
4242
// $example off$

examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717

1818
package org.apache.spark.examples.mllib;
1919

20+
import java.util.Arrays;
21+
2022
import org.apache.spark.SparkConf;
2123
// $example on$
2224
import org.apache.spark.api.java.JavaRDD;
2325
import org.apache.spark.api.java.JavaSparkContext;
2426
import org.apache.spark.mllib.stat.KernelDensity;
25-
import org.apache.spark.rdd.RDD;
2627
// $example off$
27-
import java.util.Arrays;
2828

2929
public class JavaKernelDensityEstimationExample {
3030
public static void main(String[] args) {
@@ -36,11 +36,8 @@ public static void main(String[] args) {
3636
JavaRDD<Double> data = jsc.parallelize(
3737
Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0)); // an RDD of sample data
3838

39-
// Construct the density estimator with the sample data and a standard deviation for the Gaussian
40-
// kernels
41-
KernelDensity kd = new KernelDensity()
42-
.setSample(data)
43-
.setBandwidth(3.0);
39+
// Construct the density estimator with the sample data and a standard deviation for the Gaussian kernels
40+
KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);
4441

4542
// Find density estimates for the given values
4643
double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});

examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,22 @@
1717

1818
package org.apache.spark.examples.mllib;
1919

20-
import org.apache.spark.SparkConf;
21-
import org.apache.spark.api.java.function.VoidFunction;
20+
import java.util.ArrayList;
21+
import java.util.List;
22+
import java.util.HashMap;
23+
// $example on$
24+
import java.util.Map;
25+
// $example off$
26+
2227
import scala.Tuple2;
28+
29+
import org.apache.spark.api.java.function.VoidFunction;
2330
// $example on$
24-
import java.util.*;
25-
import org.apache.spark.api.java.JavaRDD;
31+
2632
import org.apache.spark.api.java.JavaSparkContext;
2733
import org.apache.spark.api.java.JavaPairRDD;
28-
import org.apache.spark.api.java.JavaSparkContext;
2934
// $example off$
35+
import org.apache.spark.SparkConf;
3036

3137
public class JavaStratifiedSamplingExample {
3238
public static void main(String[] args) {

examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,23 @@
1717

1818
package org.apache.spark.examples.mllib;
1919

20-
import org.apache.spark.SparkConf;
21-
import org.apache.spark.sql.SQLContext;
22-
import org.apache.spark.mllib.linalg.Vectors;
20+
import java.util.Arrays;
21+
2322
// $example on$
2423
import org.apache.spark.api.java.JavaRDD;
2524
import org.apache.spark.api.java.JavaSparkContext;
2625
import org.apache.spark.mllib.linalg.Vector;
26+
import org.apache.spark.mllib.linalg.Vectors;
2727
import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
2828
import org.apache.spark.mllib.stat.Statistics;
2929
// $example off$
30-
import java.util.Arrays;
31-
30+
import org.apache.spark.SparkConf;
3231

3332
public class JavaSummaryStatisticsExample {
3433
public static void main(String[] args) {
3534

3635
SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample");
3736
JavaSparkContext jsc = new JavaSparkContext(conf);
38-
SQLContext sqlContext = new SQLContext(jsc);
3937

4038
// $example on$
4139
Vector v1 = Vectors.dense(1.0, 10.0, 100.0);

examples/src/main/python/mllib/correlations_example.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,21 @@
1818
from __future__ import print_function
1919

2020
from pyspark import SparkContext
21-
from pyspark.sql import SQLContext
2221
import numpy as np
23-
from pyspark.mllib.linalg import Vectors
2422
# $example on$
2523
from pyspark.mllib.stat import Statistics
2624
# $example off$
2725

2826
if __name__ == "__main__":
29-
# $example on$
3027
sc = SparkContext(appName="CorrelationsExample") # SparkContext
3128

29+
# $example on$
3230
seriesX = sc.parallelize([1.0, 2.0, 3.0, 3.0, 5.0]) # a series
3331
# seriesY must have the same number of partitions and cardinality as seriesX
3432
seriesY = sc.parallelize([11.0, 22.0, 33.0, 33.0, 555.0])
3533

36-
# Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
37-
# method is not specified, Pearson's method will be used by default.
34+
# Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
35+
# If a method is not specified, Pearson's method will be used by default.
3836
print(Statistics.corr(seriesX, seriesY, method="pearson"))
3937

4038
v1 = np.array([1.0, 10.0, 100.0])
@@ -45,7 +43,6 @@
4543
# calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
4644
# If a method is not specified, Pearson's method will be used by default.
4745
print(Statistics.corr(data, method="pearson"))
48-
4946
# $example off$
5047

5148
sc.stop()

examples/src/main/python/mllib/hypothesis_testing_example.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
from __future__ import print_function
1919

2020
from pyspark import SparkContext
21-
from pyspark.sql import SQLContext
22-
import numpy as np
2321
from pyspark.mllib.linalg import Vectors
2422
# $example on$
2523
from pyspark import SparkContext
@@ -30,7 +28,6 @@
3028

3129
if __name__ == "__main__":
3230
sc = SparkContext(appName="HypothesisTestingExample") # SparkContext
33-
sqlContext = SQLContext(sc)
3431

3532
# $example on$
3633
vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25) # a vector composed of the frequencies of events
@@ -60,7 +57,6 @@
6057
for i, result in enumerate(featureTestResults):
6158
print("Column: " + str(i + 1))
6259
print(result)
63-
6460
# $example off$
6561

6662
sc.stop()

examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,23 @@
1818
from __future__ import print_function
1919

2020
from pyspark import SparkContext
21-
from pyspark.sql import SQLContext
22-
import numpy as np
23-
from pyspark.mllib.linalg import Vectors
2421
# $example on$
2522
from pyspark.mllib.stat import Statistics
2623
# $example off$
2724

2825
if __name__ == "__main__":
2926
sc = SparkContext(appName="HypothesisTestingKolmogorovSmirnovTestExample") # SparkContext
30-
sqlContext = SQLContext(sc)
3127

3228
# $example on$
3329
parallelData = sc.parallelize([0.1, 0.15, 0.2, 0.3, 0.25])
3430

3531
# run a KS test for the sample versus a standard normal distribution
3632
testResult = Statistics.kolmogorovSmirnovTest(parallelData, "norm", 0, 1)
37-
print(testResult) # summary of the test including the p-value, test statistic,
38-
# and null hypothesis
33+
# summary of the test including the p-value, test statistic, and null hypothesis
3934
# if our p-value indicates significance, we can reject the null hypothesis
4035
# Note that the Scala functionality of calling Statistics.kolmogorovSmirnovTest with
4136
# a lambda to calculate the CDF is not made available in the Python API
42-
37+
print(testResult)
4338
# $example off$
4439

4540
sc.stop()

examples/src/main/python/mllib/kernel_density_estimation_example.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,12 @@
1818
from __future__ import print_function
1919

2020
from pyspark import SparkContext
21-
from pyspark.sql import SQLContext
22-
import numpy as np
23-
from pyspark.mllib.linalg import Vectors
2421
# $example on$
2522
from pyspark.mllib.stat import KernelDensity
2623
# $example off$
2724

2825
if __name__ == "__main__":
2926
sc = SparkContext(appName="KernelDensityEstimationExample") # SparkContext
30-
sqlContext = SQLContext(sc)
3127

3228
# $example on$
3329
# an RDD of sample data

0 commit comments

Comments
 (0)