Skip to content

Commit 0d7469b

Browse files
committed
[SPARK-2199] a bug in perplexity computation fixed
collection length computation was wrong
1 parent b6f852e commit 0d7469b

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

mllib/src/main/scala/org/apache/spark/mllib/clustering/topicmodeling/AbstractPLSA.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ private[topicmodeling] trait AbstractPLSA[DocumentParameterType <: DocumentParam
4949
protected def getAlphabetSize(documents: RDD[Document]) = documents.first().alphabetSize
5050

5151
protected def getCollectionLength(documents: RDD[Document]) =
52-
documents.map(_.tokens.activeSize).reduce(_ + _)
52+
documents.map(doc => sum(doc.tokens)).reduce(_ + _)
5353

5454
protected def singleDocumentLikelihood(parameter: DocumentParameters,
5555
topicsBC: Broadcast[Array[Array[Float]]],

0 commit comments

Comments
 (0)