| 
 | 1 | +/*  | 
 | 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more  | 
 | 3 | + * contributor license agreements.  See the NOTICE file distributed with  | 
 | 4 | + * this work for additional information regarding copyright ownership.  | 
 | 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0  | 
 | 6 | + * (the "License"); you may not use this file except in compliance with  | 
 | 7 | + * the License.  You may obtain a copy of the License at  | 
 | 8 | + *  | 
 | 9 | + *    http://www.apache.org/licenses/LICENSE-2.0  | 
 | 10 | + *  | 
 | 11 | + * Unless required by applicable law or agreed to in writing, software  | 
 | 12 | + * distributed under the License is distributed on an "AS IS" BASIS,  | 
 | 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
 | 14 | + * See the License for the specific language governing permissions and  | 
 | 15 | + * limitations under the License.  | 
 | 16 | + */  | 
 | 17 | + | 
 | 18 | +package org.apache.spark.graphx.lib  | 
 | 19 | + | 
 | 20 | +import scala.reflect.ClassTag  | 
 | 21 | +import org.apache.spark.graphx._  | 
 | 22 | + | 
 | 23 | +/** LPA algorithm. */  | 
 | 24 | +object LPA {  | 
 | 25 | +  /**  | 
 | 26 | +   * Run LPA (label propogation algorithm) for detecting communities in networks using the pregel framework.  | 
 | 27 | +   *   | 
 | 28 | +   * Each node in the network is initially assigned to its own community.  At every super step   | 
 | 29 | +   * nodes send their community affiliation to all neighbors and update their state to the mode   | 
 | 30 | +   * community affiliation of incomming messages.    | 
 | 31 | +   *  | 
 | 32 | +   * LPA is a standard community detection algorithm for graphs.  It is very inexpensive   | 
 | 33 | +   * computationally, although (1) convergence is not guaranteed and (2) one can end up with  | 
 | 34 | +   * trivial solutions (all nodes are identified into a single community).  | 
 | 35 | +   *  | 
 | 36 | +   * @tparam VD the vertex attribute type (discarded in the computation)  | 
 | 37 | +   * @tparam ED the edge attribute type (not used in the computation)  | 
 | 38 | +   *  | 
 | 39 | +   * @param graph the graph for which to compute the community affiliation  | 
 | 40 | +   * @param maxSteps the number of supersteps of LPA to be performed  | 
 | 41 | +   *  | 
 | 42 | +   * @return a graph with vertex attributes containing the label of community affiliation  | 
 | 43 | +   */  | 
 | 44 | +  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], maxSteps: Int): Graph[VertexId, Long]{  | 
 | 45 | +    val lpaGraph = graph.mapVertices { case (vid, _) => vid }  | 
 | 46 | +    def sendMessage(edge: EdgeTriplet[VertexId, ED]) = {  | 
 | 47 | +      Iterator((e.srcId, Map(e.dstAttr -> 1L)),(e.dstId, Map(e.srcAttr -> 1L)))  | 
 | 48 | +    }  | 
 | 49 | +    def mergeMessage(count1: Map[VertexId, Long], count2: Map[VertexId, Long]): Map[VertexId, Long] = {  | 
 | 50 | +      (count1.keySet ++ count2.keySet).map { i =>  | 
 | 51 | +        val count1Val = count1.getOrElse(i,0L)  | 
 | 52 | +        val count2Val = count2.getOrElse(i,0L)  | 
 | 53 | +	i -> (count1Val +count2Val)  | 
 | 54 | +	}.toMap  | 
 | 55 | +    }  | 
 | 56 | +    def vertexProgram(vid: VertexId, attr: Long, message: Map[VertexId, Long])={  | 
 | 57 | +      if (message.isEmpty) attr else message.maxBy{_._2}._1),  | 
 | 58 | +    }  | 
 | 59 | +    val initialMessage = Map[VertexId,Long]()  | 
 | 60 | +    Pregel(lpaGraph, initialMessage, maxIterations = maxSteps)(  | 
 | 61 | +      vprog = vertexProgram,  | 
 | 62 | +      sendMsg = sendMessage,  | 
 | 63 | +      mergeMsg = mergeMessage)  | 
 | 64 | +  }  | 
 | 65 | +}  | 
0 commit comments