@@ -89,10 +89,15 @@ case class Rating(val user: Int, val product: Int, val rating: Double)
8989 * indicated user
9090 * preferences rather than explicit ratings given to items.
9191 */
92- class ALS private (var numBlocks : Int , var rank : Int , var iterations : Int , var lambda : Double ,
93- var implicitPrefs : Boolean , var alpha : Double )
94- extends Serializable with Logging
95- {
92+ class ALS private (
93+ var numBlocks : Int ,
94+ var rank : Int ,
95+ var iterations : Int ,
96+ var lambda : Double ,
97+ var implicitPrefs : Boolean ,
98+ var alpha : Double ,
99+ var seed : Long = System .nanoTime()
100+ ) extends Serializable with Logging {
96101 def this () = this (- 1 , 10 , 10 , 0.01 , false , 1.0 )
97102
98103 /**
@@ -132,6 +137,11 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
132137 this
133138 }
134139
140+ def setSeed (seed : Long ): ALS = {
141+ this .seed = seed
142+ this
143+ }
144+
135145 /**
136146 * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
137147 * Returns a MatrixFactorizationModel with feature vectors for each user and product.
@@ -155,7 +165,7 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
155165
156166 // Initialize user and product factors randomly, but use a deterministic seed for each
157167 // partition so that fault recovery works
158- val seedGen = new Random ()
168+ val seedGen = new Random (seed )
159169 val seed1 = seedGen.nextInt()
160170 val seed2 = seedGen.nextInt()
161171 // Hash an integer to propagate random bits at all positions, similar to java.util.HashTable
@@ -468,6 +478,7 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
468478 * Top-level methods for calling Alternating Least Squares (ALS) matrix factorization.
469479 */
470480object ALS {
481+
471482 /**
472483 * Train a matrix factorization model given an RDD of ratings given by users to some products,
473484 * in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
@@ -480,15 +491,39 @@ object ALS {
480491 * @param iterations number of iterations of ALS (recommended: 10-20)
481492 * @param lambda regularization factor (recommended: 0.01)
482493 * @param blocks level of parallelism to split computation into
494+ * @param seed random seed
483495 */
484496 def train (
485497 ratings : RDD [Rating ],
486498 rank : Int ,
487499 iterations : Int ,
488500 lambda : Double ,
489- blocks : Int )
490- : MatrixFactorizationModel =
491- {
501+ blocks : Int ,
502+ seed : Long
503+ ): MatrixFactorizationModel = {
504+ new ALS (blocks, rank, iterations, lambda, false , 1.0 , seed).run(ratings)
505+ }
506+
507+ /**
508+ * Train a matrix factorization model given an RDD of ratings given by users to some products,
509+ * in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
510+ * product of two lower-rank matrices of a given rank (number of features). To solve for these
511+ * features, we run a given number of iterations of ALS. This is done using a level of
512+ * parallelism given by `blocks`.
513+ *
514+ * @param ratings RDD of (userID, productID, rating) pairs
515+ * @param rank number of features to use
516+ * @param iterations number of iterations of ALS (recommended: 10-20)
517+ * @param lambda regularization factor (recommended: 0.01)
518+ * @param blocks level of parallelism to split computation into
519+ */
520+ def train (
521+ ratings : RDD [Rating ],
522+ rank : Int ,
523+ iterations : Int ,
524+ lambda : Double ,
525+ blocks : Int
526+ ): MatrixFactorizationModel = {
492527 new ALS (blocks, rank, iterations, lambda, false , 1.0 ).run(ratings)
493528 }
494529
@@ -505,8 +540,7 @@ object ALS {
505540 * @param lambda regularization factor (recommended: 0.01)
506541 */
507542 def train (ratings : RDD [Rating ], rank : Int , iterations : Int , lambda : Double )
508- : MatrixFactorizationModel =
509- {
543+ : MatrixFactorizationModel = {
510544 train(ratings, rank, iterations, lambda, - 1 )
511545 }
512546
@@ -522,8 +556,7 @@ object ALS {
522556 * @param iterations number of iterations of ALS (recommended: 10-20)
523557 */
524558 def train (ratings : RDD [Rating ], rank : Int , iterations : Int )
525- : MatrixFactorizationModel =
526- {
559+ : MatrixFactorizationModel = {
527560 train(ratings, rank, iterations, 0.01 , - 1 )
528561 }
529562
@@ -540,16 +573,42 @@ object ALS {
540573 * @param lambda regularization factor (recommended: 0.01)
541574 * @param blocks level of parallelism to split computation into
542575 * @param alpha confidence parameter (only applies when immplicitPrefs = true)
576+ * @param seed random seed
543577 */
544578 def trainImplicit (
545579 ratings : RDD [Rating ],
546580 rank : Int ,
547581 iterations : Int ,
548582 lambda : Double ,
549583 blocks : Int ,
550- alpha : Double )
551- : MatrixFactorizationModel =
552- {
584+ alpha : Double ,
585+ seed : Long
586+ ): MatrixFactorizationModel = {
587+ new ALS (blocks, rank, iterations, lambda, true , alpha, seed).run(ratings)
588+ }
589+
590+ /**
591+ * Train a matrix factorization model given an RDD of 'implicit preferences' given by users
592+ * to some products, in the form of (userID, productID, preference) pairs. We approximate the
593+ * ratings matrix as the product of two lower-rank matrices of a given rank (number of features).
594+ * To solve for these features, we run a given number of iterations of ALS. This is done using
595+ * a level of parallelism given by `blocks`.
596+ *
597+ * @param ratings RDD of (userID, productID, rating) pairs
598+ * @param rank number of features to use
599+ * @param iterations number of iterations of ALS (recommended: 10-20)
600+ * @param lambda regularization factor (recommended: 0.01)
601+ * @param blocks level of parallelism to split computation into
602+ * @param alpha confidence parameter (only applies when immplicitPrefs = true)
603+ */
604+ def trainImplicit (
605+ ratings : RDD [Rating ],
606+ rank : Int ,
607+ iterations : Int ,
608+ lambda : Double ,
609+ blocks : Int ,
610+ alpha : Double
611+ ): MatrixFactorizationModel = {
553612 new ALS (blocks, rank, iterations, lambda, true , alpha).run(ratings)
554613 }
555614
@@ -565,8 +624,8 @@ object ALS {
565624 * @param iterations number of iterations of ALS (recommended: 10-20)
566625 * @param lambda regularization factor (recommended: 0.01)
567626 */
568- def trainImplicit (ratings : RDD [Rating ], rank : Int , iterations : Int , lambda : Double ,
569- alpha : Double ) : MatrixFactorizationModel = {
627+ def trainImplicit (ratings : RDD [Rating ], rank : Int , iterations : Int , lambda : Double , alpha : Double )
628+ : MatrixFactorizationModel = {
570629 trainImplicit(ratings, rank, iterations, lambda, - 1 , alpha)
571630 }
572631
@@ -583,8 +642,7 @@ object ALS {
583642 * @param iterations number of iterations of ALS (recommended: 10-20)
584643 */
585644 def trainImplicit (ratings : RDD [Rating ], rank : Int , iterations : Int )
586- : MatrixFactorizationModel =
587- {
645+ : MatrixFactorizationModel = {
588646 trainImplicit(ratings, rank, iterations, 0.01 , - 1 , 1.0 )
589647 }
590648
0 commit comments