From 956ca1b86aacb22fabd52740ce0c6fef5524bae8 Mon Sep 17 00:00:00 2001 From: Senior Stefano El Bour-que Date: Fri, 28 Nov 2014 09:40:40 +0100 Subject: [PATCH 1/2] added functionality to find similar users and similar products --- .../MatrixFactorizationModel.scala | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala index ed2f8b41bcae5..26bbc3b87e7d0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala @@ -94,6 +94,35 @@ class MatrixFactorizationModel( predict(usersProducts.rdd.asInstanceOf[RDD[(Int, Int)]]).toJavaRDD() } + /** + * Recommends similar products + * + * @param user the user to find similar users for + * @param num how many products to return. The number returned may be less than this. + * @return [[Rating]] objects, each of which contains the given user ID, a user ID, and a + * "score" in the rating field. Each represents one recommended user, and they are sorted + * by score, decreasing. The first returned is the one predicted to be most similar + * user to the specified user ID. The score is an opaque value that indicates how strongly + * recommended the user is. + */ + def recommendSimilariUsers(user: Int, num: Int): Array[Rating] = + recommend(userFeatures.lookup(user).head, userFeatures, num) + .map(t => Rating(user, t._1, t._2)) + + /** + * Recommends similar products + * + * @param product the product to find similar products for + * @param num how many products to return. The number returned may be less than this. + * @return [[Rating]] objects, each of which contains the given product ID, a product ID, and a + * "score" in the rating field. Each represents one recommended product, and they are sorted + * by score, decreasing. The first returned is the one predicted to be most similar + * product to the specified product ID. The score is an opaque value that indicates how strongly + * recommended the product is. + */ + def recommendSimilariProducts(product: Int, num: Int): Array[Rating] = + recommend(productFeatures.lookup(product).head, productFeatures, num) + .map(t => Rating(product, t._1, t._2)) /** * Recommends products to a user. * From 12e6b6b3a2cbfa1baa29449396e7e85bed1dec56 Mon Sep 17 00:00:00 2001 From: Steven Bourke Date: Mon, 1 Dec 2014 00:22:46 +0100 Subject: [PATCH 2/2] added unit test to make sure id isnt teh same --- .../spark/mllib/recommendation/JavaALSSuite.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java index af688c504cf1e..756973dfd7956 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java @@ -190,6 +190,17 @@ public void runRecommend() { .run(data.rdd()); validateRecommendations(model.recommendProducts(1, 10), 10); validateRecommendations(model.recommendUsers(1, 20), 20); + validateSimilarRecommendations(model.recommendSimilariProducts(1, 10), 10,1); + validateSimilarRecommendations(model.recommendSimilariUsers(1,10),10,1); + } + + private static void validateSimilarRecommendations(Rating[] recommendations, int howMany, int id) { + Assert.assertEquals(howMany, recommendations.length); + for (int i = 1; i < recommendations.length; i++) { + Assert.assertFalse(recommendations[i].product() != id); + Assert.assertTrue(recommendations[i-1].rating() >= recommendations[i].rating()); + } + Assert.assertTrue(recommendations[0].rating() > 0.7); } private static void validateRecommendations(Rating[] recommendations, int howMany) {