@@ -84,7 +84,8 @@ import org.apache.spark.graphx._
8484import org.apache.spark.rdd.RDD
8585{% endhighlight %}
8686
87- If you are not using the Spark shell you will also need a Spark context.
87+ If you are not using the Spark shell you will also need a ` SparkContext ` . To learn more about
88+ getting started with Spark refer to the [ Spark Quick Start Guide] ( quick-start.html ) .
8889
8990# The Property Graph
9091<a name =" property_graph " ></a >
@@ -190,7 +191,7 @@ and `graph.edges` members respectively.
190191{% highlight scala %}
191192val graph: Graph[ (String, String), String] // Constructed from above
192193// Count all users which are postdocs
193- graph.vertices.filter { case (id, (name, pos)) => pos == "postdoc"}.count
194+ graph.vertices.filter { case (id, (name, pos)) => pos == "postdoc" }.count
194195// Count all the edges where src > dst
195196graph.edges.filter(e => e.srcId > e.dstId).count
196197{% endhighlight %}
@@ -258,8 +259,10 @@ val graph: Graph[(String, String), String]
258259val indDegrees: VertexRDD[ Int] = graph.inDegrees
259260{% endhighlight %}
260261
261- The reason for differentiating between core graph operations and GraphOps is to be able to support
262- various graph representations in the future.
262+ The reason for differentiating between core graph operations and [ ` GraphOps ` ] [ GraphOps ] is to be
263+ able to support different graph representations in the future. Each graph representation must
264+ provide implementations of the core operations and reuse many of the useful operations defined in
265+ [ ` GraphOps ` ] [ GraphOps ] .
263266
264267## Property Operators
265268
@@ -334,14 +337,32 @@ interest or eliminate broken links. For example in the following code we remove
334337[ Graph.subgraph ] : api/graphx/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexID,VD)⇒Boolean):Graph[VD,ED]
335338
336339{% highlight scala %}
337- val users: RDD[ (VertexId, (String, String))]
338- val edges: RDD[ Edge[ String]]
340+ // Create an RDD for the vertices
341+ val users: RDD[ (VertexID, (String, String))] =
342+ sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
343+ (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
344+ (4L, ("peter", "student"))))
345+ // Create an RDD for edges
346+ val relationships: RDD[ Edge[ String]] =
347+ sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
348+ Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
349+ Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague")))
339350// Define a default user in case there are relationship with missing user
340351val defaultUser = ("John Doe", "Missing")
341352// Build the initial Graph
342353val graph = Graph(users, relationships, defaultUser)
354+ // Notice that there is a user 0 (for which we have no information) connecting users
355+ // 4 (peter) and 5 (franklin).
356+ graph.triplets.map(
357+ triplet => triplet.srcAttr._ 1 + " is the " + triplet.attr + " of " + triplet.dstAttr._ 1
358+ ).collect.foreach(println(_ ))
343359// Remove missing vertices as well as the edges to connected to them
344360val validGraph = graph.subgraph(vpred = (id, attr) => attr._ 2 != "Missing")
361+ // The valid subgraph will disconnect users 4 and 5 by removing user 0
362+ validGraph.vertices.collect.foreach(println(_ ))
363+ validGraph.triplets.map(
364+ triplet => triplet.srcAttr._ 1 + " is the " + triplet.attr + " of " + triplet.dstAttr._ 1
365+ ).collect.foreach(println(_ ))
345366{% endhighlight %}
346367
347368> Note in the above example only the vertex predicate is provided. The ` subgraph ` operator defaults
0 commit comments