From ab38ce71a3121ca37380dfd1d6e4a33cb3c2e54e Mon Sep 17 00:00:00 2001 From: Shahid Date: Wed, 9 May 2018 23:05:36 +0530 Subject: [PATCH 01/11] Example code for Power Iteration Clustering --- .../JavaPowerIterationClusteringExample.java | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java new file mode 100644 index 0000000000000..005a0ae5b0842 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +// $example on$ + +import org.apache.spark.ml.clustering.PowerIterationClustering; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +import java.util.Arrays; +import java.util.List; + +// $example off$ + +/** + * An example demonstrating PowerIterationClusteringExample. + * Run with + *
+ * bin/run-example ml.JavaPowerIterationClusteringExample
+ * 
+ */ +public class JavaPowerIterationClusteringExample { + + public static void main(String[] args) { + // Create a SparkSession. + SparkSession spark = SparkSession + .builder() + .appName("JavaPowerIterationClustering") + .getOrCreate(); + + // $example on$ + // Creates data. + List data = Arrays.asList( + RowFactory.create(0L, Arrays.asList(1L), Arrays.asList(0.9)), + RowFactory.create(1L, Arrays.asList(2L), Arrays.asList(0.9)), + RowFactory.create(2L, Arrays.asList(3L), Arrays.asList(0.9)), + RowFactory.create(3L, Arrays.asList(4L), Arrays.asList(0.1)), + RowFactory.create(4L, Arrays.asList(5L), Arrays.asList(0.9)) + ); + StructType schema = new StructType(new StructField[]{ + new StructField("id", DataTypes.LongType, false, Metadata.empty()), + new StructField("neighbors", DataTypes.createArrayType(DataTypes.LongType, false), false, Metadata.empty()), + new StructField("similarities", DataTypes.createArrayType(DataTypes.DoubleType, false), false, Metadata.empty()) + }); + + Dataset df = spark.createDataFrame(data, schema); + + PowerIterationClustering pic = new PowerIterationClustering() + .setK(2) + .setMaxIter(10); + + Dataset result = pic.transform(df).select("id", "prediction"); + + // printing results + System.out.println("Clustering results [id , cluster]"); + for (Row row : result.collectAsList()) { + System.out.println("[" + row.get(0) + " , " + row.get(1) + "]"); + } + + // $example off$ + spark.stop(); + } +} \ No newline at end of file From 7329681350a00361dcefaa8c7fc91b2c5ff841a6 Mon Sep 17 00:00:00 2001 From: Shahid Date: Wed, 9 May 2018 23:07:44 +0530 Subject: [PATCH 02/11] Example code for Power Iteration Clustering --- .../spark/examples/ml/JavaPowerIterationClusteringExample.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index 005a0ae5b0842..fb4c99a3a6210 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -82,4 +82,4 @@ public static void main(String[] args) { // $example off$ spark.stop(); } -} \ No newline at end of file +} From 3d1d6bbac7e121de04b81be6e70326b82110b34e Mon Sep 17 00:00:00 2001 From: Shahid Date: Wed, 9 May 2018 23:15:09 +0530 Subject: [PATCH 03/11] Example code for Power Iteration Clustering --- .../examples/ml/JavaPowerIterationClusteringExample.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index fb4c99a3a6210..40d0ddcb8f0cb 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -71,11 +71,11 @@ public static void main(String[] args) { .setK(2) .setMaxIter(10); - Dataset result = pic.transform(df).select("id", "prediction"); + Dataset result = pic.transform(df); // printing results System.out.println("Clustering results [id , cluster]"); - for (Row row : result.collectAsList()) { + for (Row row : result.select("id", "prediction").collectAsList()) { System.out.println("[" + row.get(0) + " , " + row.get(1) + "]"); } From 427e4c6225b3a93799294ac7cc9a3cc1ccf644d6 Mon Sep 17 00:00:00 2001 From: Shahid Date: Wed, 9 May 2018 23:22:11 +0530 Subject: [PATCH 04/11] Example code for Power Iteration Clustering --- .../examples/ml/JavaPowerIterationClusteringExample.java | 4 ++-- .../scala/org/apache/spark/examples/ml/KMeansExample.scala | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index 40d0ddcb8f0cb..b8f2ffc65ef12 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -71,11 +71,11 @@ public static void main(String[] args) { .setK(2) .setMaxIter(10); - Dataset result = pic.transform(df); + Dataset result = pic.transform(df).select("id", "prediction"); // printing results System.out.println("Clustering results [id , cluster]"); - for (Row row : result.select("id", "prediction").collectAsList()) { + for (Row row : result.collectAsList()) { System.out.println("[" + row.get(0) + " , " + row.get(1) + "]"); } diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala index 2bc8184e623ff..873df0bbf50a4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala @@ -38,6 +38,7 @@ object KMeansExample { val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") + .master("local[*]") .getOrCreate() // $example on$ From adb28aae1995bf2e00b273bc9112fd0a75b71eb5 Mon Sep 17 00:00:00 2001 From: Shahid Date: Wed, 9 May 2018 23:24:13 +0530 Subject: [PATCH 05/11] Example code for Power Iteration Clustering --- .../examples/ml/JavaPowerIterationClusteringExample.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index b8f2ffc65ef12..493870eb60eb1 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -71,11 +71,11 @@ public static void main(String[] args) { .setK(2) .setMaxIter(10); - Dataset result = pic.transform(df).select("id", "prediction"); + Dataset result = pic.transform(df); // printing results System.out.println("Clustering results [id , cluster]"); - for (Row row : result.collectAsList()) { + for (Row row : result.select("id","prediction").collectAsList()) { System.out.println("[" + row.get(0) + " , " + row.get(1) + "]"); } From a7c4aa22ce638840da6be801e0d471ebf8ad39c1 Mon Sep 17 00:00:00 2001 From: Shahid Date: Tue, 22 May 2018 00:58:28 +0530 Subject: [PATCH 06/11] Example code for Power Iteration Clustering --- .../JavaPowerIterationClusteringExample.java | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index 493870eb60eb1..162a8f208ef98 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -18,6 +18,8 @@ package org.apache.spark.examples.ml; // $example on$ +import java.util.Arrays; +import java.util.List; import org.apache.spark.ml.clustering.PowerIterationClustering; import org.apache.spark.sql.Dataset; @@ -28,10 +30,6 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; - -import java.util.Arrays; -import java.util.List; - // $example off$ /** @@ -51,18 +49,20 @@ public static void main(String[] args) { .getOrCreate(); // $example on$ - // Creates data. List data = Arrays.asList( - RowFactory.create(0L, Arrays.asList(1L), Arrays.asList(0.9)), - RowFactory.create(1L, Arrays.asList(2L), Arrays.asList(0.9)), - RowFactory.create(2L, Arrays.asList(3L), Arrays.asList(0.9)), - RowFactory.create(3L, Arrays.asList(4L), Arrays.asList(0.1)), - RowFactory.create(4L, Arrays.asList(5L), Arrays.asList(0.9)) + RowFactory.create(0L, Arrays.asList(1L, 2L, 4L), Arrays.asList(0.9, 0.9, 0.1)), + RowFactory.create(1L, Arrays.asList(0L, 2L), Arrays.asList(0.9, 0.9)), + RowFactory.create(2L, Arrays.asList(0L, 1L), Arrays.asList(0.9, 0.9)), + RowFactory.create(3L, Arrays.asList(4L), Arrays.asList(0.9)), + RowFactory.create(4L, Arrays.asList(3L, 0L), Arrays.asList(0.9, 0.1)) + ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.LongType, false, Metadata.empty()), - new StructField("neighbors", DataTypes.createArrayType(DataTypes.LongType, false), false, Metadata.empty()), - new StructField("similarities", DataTypes.createArrayType(DataTypes.DoubleType, false), false, Metadata.empty()) + new StructField("neighbors", DataTypes.createArrayType(DataTypes.LongType, false), + false, Metadata.empty()), + new StructField("similarities", DataTypes.createArrayType(DataTypes.DoubleType, false), + false, Metadata.empty()) }); Dataset df = spark.createDataFrame(data, schema); @@ -73,13 +73,13 @@ public static void main(String[] args) { Dataset result = pic.transform(df); - // printing results - System.out.println("Clustering results [id , cluster]"); - for (Row row : result.select("id","prediction").collectAsList()) { - System.out.println("[" + row.get(0) + " , " + row.get(1) + "]"); + // Printing results + System.out.println("Clustering results [id, cluster]"); + for (Row row : result.select("id", "prediction").collectAsList()) { + System.out.println("[" + row.get(0) + ", " + row.get(1) + "]"); } - // $example off$ + spark.stop(); } } From dd95abc65c509d9f492db49052bf7f65d6503ce3 Mon Sep 17 00:00:00 2001 From: Shahid Date: Thu, 7 Jun 2018 23:33:17 +0530 Subject: [PATCH 07/11] Example code for Power Iteration Clustering --- .../JavaPowerIterationClusteringExample.java | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index 162a8f208ef98..0777a636dc864 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -46,36 +46,37 @@ public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaPowerIterationClustering") + .master("local") .getOrCreate(); // $example on$ List data = Arrays.asList( - RowFactory.create(0L, Arrays.asList(1L, 2L, 4L), Arrays.asList(0.9, 0.9, 0.1)), - RowFactory.create(1L, Arrays.asList(0L, 2L), Arrays.asList(0.9, 0.9)), - RowFactory.create(2L, Arrays.asList(0L, 1L), Arrays.asList(0.9, 0.9)), - RowFactory.create(3L, Arrays.asList(4L), Arrays.asList(0.9)), - RowFactory.create(4L, Arrays.asList(3L, 0L), Arrays.asList(0.9, 0.1)) - + RowFactory.create(0L, 1L, 1.0), + RowFactory.create(0L, 2L, 1.0), + RowFactory.create(1L, 2L, 1.0), + RowFactory.create(3L, 4L, 1.0), + RowFactory.create(4L, 0L, 0.1) ); + StructType schema = new StructType(new StructField[]{ - new StructField("id", DataTypes.LongType, false, Metadata.empty()), - new StructField("neighbors", DataTypes.createArrayType(DataTypes.LongType, false), - false, Metadata.empty()), - new StructField("similarities", DataTypes.createArrayType(DataTypes.DoubleType, false), - false, Metadata.empty()) + new StructField("src", DataTypes.LongType, false, Metadata.empty()), + new StructField("dst", DataTypes.LongType, false, Metadata.empty()), + new StructField("weight", DataTypes.DoubleType, false, Metadata.empty()) }); Dataset df = spark.createDataFrame(data, schema); - PowerIterationClustering pic = new PowerIterationClustering() + Dataset result = new PowerIterationClustering() .setK(2) - .setMaxIter(10); + .setMaxIter(10) + .setInitMode("degree") + .setWeightCol("weight") + .assignClusters(df); - Dataset result = pic.transform(df); // Printing results System.out.println("Clustering results [id, cluster]"); - for (Row row : result.select("id", "prediction").collectAsList()) { + for (Row row : result.select("id", "cluster").collectAsList()) { System.out.println("[" + row.get(0) + ", " + row.get(1) + "]"); } // $example off$ From 1c5feed81628f077a174def353fed38caa56d01c Mon Sep 17 00:00:00 2001 From: Shahid Date: Thu, 7 Jun 2018 23:36:32 +0530 Subject: [PATCH 08/11] Example code for Power Iteration Clustering --- .../spark/examples/ml/JavaPowerIterationClusteringExample.java | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index 0777a636dc864..f5a4b7aff31da 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -46,7 +46,6 @@ public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaPowerIterationClustering") - .master("local") .getOrCreate(); // $example on$ From 12210d3b15de93471ddfc5e7a4538a59a60dcc8c Mon Sep 17 00:00:00 2001 From: Shahid Date: Thu, 7 Jun 2018 23:46:38 +0530 Subject: [PATCH 09/11] Example code for Power Iteration Clustering --- .../main/scala/org/apache/spark/examples/ml/KMeansExample.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala index 873df0bbf50a4..2bc8184e623ff 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala @@ -38,7 +38,6 @@ object KMeansExample { val spark = SparkSession .builder .appName(s"${this.getClass.getSimpleName}") - .master("local[*]") .getOrCreate() // $example on$ From acbb6fb998ef5ac1649a21283d706f779ff2c9a2 Mon Sep 17 00:00:00 2001 From: Shahid Date: Thu, 7 Jun 2018 23:47:59 +0530 Subject: [PATCH 10/11] Example code for Power Iteration Clustering --- .../spark/examples/ml/JavaPowerIterationClusteringExample.java | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index f5a4b7aff31da..23b6b9a55556f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -72,7 +72,6 @@ public static void main(String[] args) { .setWeightCol("weight") .assignClusters(df); - // Printing results System.out.println("Clustering results [id, cluster]"); for (Row row : result.select("id", "cluster").collectAsList()) { From 90450e0950b334af4373c247fe3b0ed0cc6fe6c0 Mon Sep 17 00:00:00 2001 From: Shahid Date: Fri, 8 Jun 2018 01:18:36 +0530 Subject: [PATCH 11/11] Example code for Power Iteration Clustering --- .../JavaPowerIterationClusteringExample.java | 73 ++++++++----------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java index 23b6b9a55556f..51865637df6f6 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java @@ -32,53 +32,40 @@ import org.apache.spark.sql.types.StructType; // $example off$ -/** - * An example demonstrating PowerIterationClusteringExample. - * Run with - *
- * bin/run-example ml.JavaPowerIterationClusteringExample
- * 
- */ public class JavaPowerIterationClusteringExample { + public static void main(String[] args) { + // Create a SparkSession. + SparkSession spark = SparkSession + .builder() + .appName("JavaPowerIterationClustering") + .getOrCreate(); - public static void main(String[] args) { - // Create a SparkSession. - SparkSession spark = SparkSession - .builder() - .appName("JavaPowerIterationClustering") - .getOrCreate(); - - // $example on$ - List data = Arrays.asList( - RowFactory.create(0L, 1L, 1.0), - RowFactory.create(0L, 2L, 1.0), - RowFactory.create(1L, 2L, 1.0), - RowFactory.create(3L, 4L, 1.0), - RowFactory.create(4L, 0L, 0.1) - ); - - StructType schema = new StructType(new StructField[]{ - new StructField("src", DataTypes.LongType, false, Metadata.empty()), - new StructField("dst", DataTypes.LongType, false, Metadata.empty()), - new StructField("weight", DataTypes.DoubleType, false, Metadata.empty()) - }); + // $example on$ + List data = Arrays.asList( + RowFactory.create(0L, 1L, 1.0), + RowFactory.create(0L, 2L, 1.0), + RowFactory.create(1L, 2L, 1.0), + RowFactory.create(3L, 4L, 1.0), + RowFactory.create(4L, 0L, 0.1) + ); - Dataset df = spark.createDataFrame(data, schema); + StructType schema = new StructType(new StructField[]{ + new StructField("src", DataTypes.LongType, false, Metadata.empty()), + new StructField("dst", DataTypes.LongType, false, Metadata.empty()), + new StructField("weight", DataTypes.DoubleType, false, Metadata.empty()) + }); - Dataset result = new PowerIterationClustering() - .setK(2) - .setMaxIter(10) - .setInitMode("degree") - .setWeightCol("weight") - .assignClusters(df); + Dataset df = spark.createDataFrame(data, schema); - // Printing results - System.out.println("Clustering results [id, cluster]"); - for (Row row : result.select("id", "cluster").collectAsList()) { - System.out.println("[" + row.get(0) + ", " + row.get(1) + "]"); - } - // $example off$ + PowerIterationClustering model = new PowerIterationClustering() + .setK(2) + .setMaxIter(10) + .setInitMode("degree") + .setWeightCol("weight"); - spark.stop(); - } + Dataset result = model.assignClusters(df); + result.show(false); + // $example off$ + spark.stop(); + } }