diff options
Diffstat (limited to 'sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig')
-rw-r--r-- | sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig b/sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig new file mode 100644 index 00000000000..ab4245eaa25 --- /dev/null +++ b/sample-apps/blog-tutorial-shared/src/main/pig/tutorial_get_recommendation_list.pig @@ -0,0 +1,21 @@ +REGISTER $VESPA_HADOOP_JAR + +DEFINE BlogPostRecommendations + com.yahoo.vespa.hadoop.pig.VespaQuery( + 'query=http://$ENDPOINT/search/?user_id=<user_id>&hits=$NUMBER_RECOMMENDATIONS&ranking=$RANKING_NAME', + 'schema=rank:int,id:chararray,relevance:double,fields/post_id:chararray' + ); + +-- Load test_set data from a local file +test_indices = LOAD '$TEST_INDICES' AS (post_id:chararray, user_id:chararray); +users = FOREACH test_indices GENERATE user_id; +users = FILTER users BY user_id IS NOT null; +users = DISTINCT users; + +-- Run a set of queries against Vespa +recommendations = FOREACH users GENERATE user_id, + FLATTEN(BlogPostRecommendations(*)) AS (rank, id, relevance, post_id); +recommendations = FOREACH recommendations GENERATE user_id, rank, post_id; +recommendations = FILTER recommendations BY rank IS NOT NULL AND post_id IS NOT NULL; + +STORE recommendations INTO '$OUTPUT'; |