summaryrefslogtreecommitdiffstats
path: root/sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig
diff options
context:
space:
mode:
Diffstat (limited to 'sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig')
-rw-r--r--sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig21
1 files changed, 21 insertions, 0 deletions
diff --git a/sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig b/sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig
new file mode 100644
index 00000000000..ab4245eaa25
--- /dev/null
+++ b/sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig
@@ -0,0 +1,21 @@
+REGISTER $VESPA_HADOOP_JAR
+
+DEFINE BlogPostRecommendations
+ com.yahoo.vespa.hadoop.pig.VespaQuery(
+ 'query=http://$ENDPOINT/search/?user_id=<user_id>&hits=$NUMBER_RECOMMENDATIONS&ranking=$RANKING_NAME',
+ 'schema=rank:int,id:chararray,relevance:double,fields/post_id:chararray'
+ );
+
+-- Load test_set data from a local file
+test_indices = LOAD '$TEST_INDICES' AS (post_id:chararray, user_id:chararray);
+users = FOREACH test_indices GENERATE user_id;
+users = FILTER users BY user_id IS NOT null;
+users = DISTINCT users;
+
+-- Run a set of queries against Vespa
+recommendations = FOREACH users GENERATE user_id,
+ FLATTEN(BlogPostRecommendations(*)) AS (rank, id, relevance, post_id);
+recommendations = FOREACH recommendations GENERATE user_id, rank, post_id;
+recommendations = FILTER recommendations BY rank IS NOT NULL AND post_id IS NOT NULL;
+
+STORE recommendations INTO '$OUTPUT';