@@ -39,7 +39,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
3939
4040 // Check: describeTopics() with all terms
4141 val fullTopicSummary = model.describeTopics()
42- assert(fullTopicSummary.size === tinyK)
42+ assert(fullTopicSummary.length === tinyK)
4343 fullTopicSummary.zip(tinyTopicDescription).foreach {
4444 case ((algTerms, algTermWeights), (terms, termWeights)) =>
4545 assert(algTerms === terms)
@@ -101,7 +101,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
101101 // Check: per-doc topic distributions
102102 val topicDistributions = model.topicDistributions.collect()
103103 // Ensure all documents are covered.
104- assert(topicDistributions.size === tinyCorpus.size )
104+ assert(topicDistributions.length === tinyCorpus.length )
105105 assert(tinyCorpus.map(_._1).toSet === topicDistributions.map(_._1).toSet)
106106 // Ensure we have proper distributions
107107 topicDistributions.foreach { case (docId, topicDistribution) =>
@@ -139,8 +139,8 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
139139 val corpus = sc.parallelize(tinyCorpus, 2 )
140140 val op = new OnlineLDAOptimizer ().initialize(corpus, lda)
141141 op.setKappa(0.9876 ).setMiniBatchFraction(0.123 ).setTau_0(567 )
142- assert(op.alpha == 0.5 ) // default 1.0 / k
143- assert(op.eta == 0.5 ) // default 1.0 / k
142+ assert(op.getAlpha == 0.5 ) // default 1.0 / k
143+ assert(op.getEta == 0.5 ) // default 1.0 / k
144144 assert(op.getKappa == 0.9876 )
145145 assert(op.getMiniBatchFraction == 0.123 )
146146 assert(op.getTau_0 == 567 )
@@ -154,14 +154,14 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
154154
155155 def docs : Array [(Long , Vector )] = Array (
156156 Vectors .sparse(vocabSize, Array (0 , 1 , 2 ), Array (1 , 1 , 1 )), // apple, orange, banana
157- Vectors .sparse(vocabSize, Array (3 , 4 , 5 ), Array (1 , 1 , 1 ))) // tiger, cat, dog
158- .zipWithIndex.map { case (wordCounts, docId) => (docId.toLong, wordCounts) }
157+ Vectors .sparse(vocabSize, Array (3 , 4 , 5 ), Array (1 , 1 , 1 )) // tiger, cat, dog
158+ ) .zipWithIndex.map { case (wordCounts, docId) => (docId.toLong, wordCounts) }
159159 val corpus = sc.parallelize(docs, 2 )
160160
161- // setGammaShape large so to avoid the stochastic impact.
161+ // Set GammaShape large to avoid the stochastic impact.
162162 val op = new OnlineLDAOptimizer ().setTau_0(1024 ).setKappa(0.51 ).setGammaShape(1e40 )
163163 .setMiniBatchFraction(1 )
164- val lda = new LDA ().setK(k).setMaxIterations(1 ).setOptimizer(op)
164+ val lda = new LDA ().setK(k).setMaxIterations(1 ).setOptimizer(op).setSeed( 12345 )
165165
166166 val state = op.initialize(corpus, lda)
167167 // override lambda to simulate an intermediate state
@@ -175,8 +175,8 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
175175
176176 // verify the result, Note this generate the identical result as
177177 // [[https://github.com/Blei-Lab/onlineldavb]]
178- val topic1 = op.lambda (0 , :: ).inner.toArray.map(" %.4f" .format(_)).mkString(" , " )
179- val topic2 = op.lambda (1 , :: ).inner.toArray.map(" %.4f" .format(_)).mkString(" , " )
178+ val topic1 = op.getLambda (0 , :: ).inner.toArray.map(" %.4f" .format(_)).mkString(" , " )
179+ val topic2 = op.getLambda (1 , :: ).inner.toArray.map(" %.4f" .format(_)).mkString(" , " )
180180 assert(" 1.1101, 1.2076, 1.3050, 0.8899, 0.7924, 0.6950" == topic1)
181181 assert(" 0.8899, 0.7924, 0.6950, 1.1101, 1.2076, 1.3050" == topic2)
182182 }
@@ -186,7 +186,6 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
186186 Vectors .sparse(6 , Array (0 , 1 ), Array (1 , 1 )),
187187 Vectors .sparse(6 , Array (1 , 2 ), Array (1 , 1 )),
188188 Vectors .sparse(6 , Array (0 , 2 ), Array (1 , 1 )),
189-
190189 Vectors .sparse(6 , Array (3 , 4 ), Array (1 , 1 )),
191190 Vectors .sparse(6 , Array (3 , 5 ), Array (1 , 1 )),
192191 Vectors .sparse(6 , Array (4 , 5 ), Array (1 , 1 ))
@@ -200,6 +199,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
200199 .setTopicConcentration(0.01 )
201200 .setMaxIterations(100 )
202201 .setOptimizer(op)
202+ .setSeed(12345 )
203203
204204 val ldaModel = lda.run(docs)
205205 val topicIndices = ldaModel.describeTopics(maxTermsPerTopic = 10 )
@@ -208,10 +208,10 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
208208 }
209209
210210 // check distribution for each topic, typical distribution is (0.3, 0.3, 0.3, 0.02, 0.02, 0.02)
211- topics.foreach( topic => {
212- val smalls = topic.filter(t => ( t._2 < 0.1 ) ).map(_._2)
213- assert(smalls.size == 3 && smalls.sum < 0.2 )
214- })
211+ topics.foreach { topic =>
212+ val smalls = topic.filter(t => t._2 < 0.1 ).map(_._2)
213+ assert(smalls.length == 3 && smalls.sum < 0.2 )
214+ }
215215 }
216216
217217}
0 commit comments