@@ -39,7 +39,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
3939
4040    //  Check: describeTopics() with all terms
4141    val  fullTopicSummary  =  model.describeTopics()
42-     assert(fullTopicSummary.size  ===  tinyK)
42+     assert(fullTopicSummary.length  ===  tinyK)
4343    fullTopicSummary.zip(tinyTopicDescription).foreach {
4444      case  ((algTerms, algTermWeights), (terms, termWeights)) => 
4545        assert(algTerms ===  terms)
@@ -101,7 +101,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
101101    //  Check: per-doc topic distributions
102102    val  topicDistributions  =  model.topicDistributions.collect()
103103    //   Ensure all documents are covered.
104-     assert(topicDistributions.size  ===  tinyCorpus.size )
104+     assert(topicDistributions.length  ===  tinyCorpus.length )
105105    assert(tinyCorpus.map(_._1).toSet ===  topicDistributions.map(_._1).toSet)
106106    //   Ensure we have proper distributions
107107    topicDistributions.foreach { case  (docId, topicDistribution) => 
@@ -139,8 +139,8 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
139139    val  corpus  =  sc.parallelize(tinyCorpus, 2 )
140140    val  op  =  new  OnlineLDAOptimizer ().initialize(corpus, lda)
141141    op.setKappa(0.9876 ).setMiniBatchFraction(0.123 ).setTau_0(567 )
142-     assert(op.alpha  ==  0.5 ) //  default 1.0 / k
143-     assert(op.eta  ==  0.5 )   //  default 1.0 / k
142+     assert(op.getAlpha  ==  0.5 ) //  default 1.0 / k
143+     assert(op.getEta  ==  0.5 )   //  default 1.0 / k
144144    assert(op.getKappa ==  0.9876 )
145145    assert(op.getMiniBatchFraction ==  0.123 )
146146    assert(op.getTau_0 ==  567 )
@@ -154,14 +154,14 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
154154
155155    def  docs :  Array [(Long , Vector )] =  Array (
156156      Vectors .sparse(vocabSize, Array (0 , 1 , 2 ), Array (1 , 1 , 1 )), //  apple, orange, banana
157-       Vectors .sparse(vocabSize, Array (3 , 4 , 5 ), Array (1 , 1 , 1 )))  //  tiger, cat, dog
158-        .zipWithIndex.map { case  (wordCounts, docId) =>  (docId.toLong, wordCounts) }
157+       Vectors .sparse(vocabSize, Array (3 , 4 , 5 ), Array (1 , 1 , 1 )) //  tiger, cat, dog
158+     ) .zipWithIndex.map { case  (wordCounts, docId) =>  (docId.toLong, wordCounts) }
159159    val  corpus  =  sc.parallelize(docs, 2 )
160160
161-     //  setGammaShape large so  to avoid the stochastic impact.
161+     //  Set GammaShape large  to avoid the stochastic impact.
162162    val  op  =  new  OnlineLDAOptimizer ().setTau_0(1024 ).setKappa(0.51 ).setGammaShape(1e40 )
163163      .setMiniBatchFraction(1 )
164-     val  lda  =  new  LDA ().setK(k).setMaxIterations(1 ).setOptimizer(op)
164+     val  lda  =  new  LDA ().setK(k).setMaxIterations(1 ).setOptimizer(op).setSeed( 12345 ) 
165165
166166    val  state  =  op.initialize(corpus, lda)
167167    //  override lambda to simulate an intermediate state
@@ -175,8 +175,8 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
175175
176176    //  verify the result, Note this generate the identical result as
177177    //  [[https://github.com/Blei-Lab/onlineldavb]]
178-     val  topic1  =  op.lambda (0 , :: ).inner.toArray.map(" %.4f" " , " 
179-     val  topic2  =  op.lambda (1 , :: ).inner.toArray.map(" %.4f" " , " 
178+     val  topic1  =  op.getLambda (0 , :: ).inner.toArray.map(" %.4f" " , " 
179+     val  topic2  =  op.getLambda (1 , :: ).inner.toArray.map(" %.4f" " , " 
180180    assert(" 1.1101, 1.2076, 1.3050, 0.8899, 0.7924, 0.6950" ==  topic1)
181181    assert(" 0.8899, 0.7924, 0.6950, 1.1101, 1.2076, 1.3050" ==  topic2)
182182  }
@@ -186,7 +186,6 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
186186      Vectors .sparse(6 , Array (0 , 1 ), Array (1 , 1 )),
187187      Vectors .sparse(6 , Array (1 , 2 ), Array (1 , 1 )),
188188      Vectors .sparse(6 , Array (0 , 2 ), Array (1 , 1 )),
189- 
190189      Vectors .sparse(6 , Array (3 , 4 ), Array (1 , 1 )),
191190      Vectors .sparse(6 , Array (3 , 5 ), Array (1 , 1 )),
192191      Vectors .sparse(6 , Array (4 , 5 ), Array (1 , 1 ))
@@ -200,6 +199,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
200199      .setTopicConcentration(0.01 )
201200      .setMaxIterations(100 )
202201      .setOptimizer(op)
202+       .setSeed(12345 )
203203
204204    val  ldaModel  =  lda.run(docs)
205205    val  topicIndices  =  ldaModel.describeTopics(maxTermsPerTopic =  10 )
@@ -208,10 +208,10 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
208208    }
209209
210210    //  check distribution for each topic, typical distribution is (0.3, 0.3, 0.3, 0.02, 0.02, 0.02)
211-     topics.foreach( topic => { 
212-       val  smalls  =  topic.filter(t =>  ( t._2 <  0.1 ) ).map(_._2)
213-       assert(smalls.size  ==  3  &&  smalls.sum <  0.2 )
214-     }) 
211+     topics.foreach {  topic => 
212+       val  smalls  =  topic.filter(t =>  t._2 <  0.1 ).map(_._2)
213+       assert(smalls.length  ==  3  &&  smalls.sum <  0.2 )
214+     }
215215  }
216216
217217}
0 commit comments