diff options
Diffstat (limited to 'sample-apps/blog-recommendation')
22 files changed, 25 insertions, 3 deletions
diff --git a/sample-apps/blog-recommendation/pom.xml b/sample-apps/blog-recommendation/pom.xml index d4afac565a7..2f1582d4fbe 100644 --- a/sample-apps/blog-recommendation/pom.xml +++ b/sample-apps/blog-recommendation/pom.xml @@ -1,4 +1,5 @@ <?xml version="1.0"?> +<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.yahoo.example</groupId> diff --git a/sample-apps/blog-recommendation/src/main/application/deployment.xml b/sample-apps/blog-recommendation/src/main/application/deployment.xml index 0a2397bbca9..bb37f77412b 100644 --- a/sample-apps/blog-recommendation/src/main/application/deployment.xml +++ b/sample-apps/blog-recommendation/src/main/application/deployment.xml @@ -1,3 +1,4 @@ +<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <deployment version='1.0'> <test /> <staging /> diff --git a/sample-apps/blog-recommendation/src/main/application/hosts.xml b/sample-apps/blog-recommendation/src/main/application/hosts.xml index 632e48db321..65c6373d70c 100644 --- a/sample-apps/blog-recommendation/src/main/application/hosts.xml +++ b/sample-apps/blog-recommendation/src/main/application/hosts.xml @@ -1,4 +1,5 @@ <?xml version="1.0" encoding="utf-8" ?> +<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <hosts> <host name="localhost"> <alias>node1</alias> diff --git a/sample-apps/blog-recommendation/src/main/application/search/query-profiles/types/root.xml b/sample-apps/blog-recommendation/src/main/application/search/query-profiles/types/root.xml index 6362bbe48ce..f034e2dc5e3 100644 --- a/sample-apps/blog-recommendation/src/main/application/search/query-profiles/types/root.xml +++ b/sample-apps/blog-recommendation/src/main/application/search/query-profiles/types/root.xml @@ -1,3 +1,4 @@ +<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <query-profile-type id="root" inherits="native"> <field name="ranking.features.query(user_item_cf)" type="tensor(user_item_cf[10])" /> </query-profile-type> diff --git a/sample-apps/blog-recommendation/src/main/application/searchdefinitions/blog_post.sd b/sample-apps/blog-recommendation/src/main/application/searchdefinitions/blog_post.sd index b0b53032216..6aec8961475 100644 --- a/sample-apps/blog-recommendation/src/main/application/searchdefinitions/blog_post.sd +++ b/sample-apps/blog-recommendation/src/main/application/searchdefinitions/blog_post.sd @@ -1,3 +1,4 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. search blog_post { document blog_post { field date_gmt type string { diff --git a/sample-apps/blog-recommendation/src/main/application/searchdefinitions/user.sd b/sample-apps/blog-recommendation/src/main/application/searchdefinitions/user.sd index 0c2a6e50aa6..3f545ef406c 100644 --- a/sample-apps/blog-recommendation/src/main/application/searchdefinitions/user.sd +++ b/sample-apps/blog-recommendation/src/main/application/searchdefinitions/user.sd @@ -1,3 +1,4 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. search user { document user { diff --git a/sample-apps/blog-recommendation/src/main/application/services.xml b/sample-apps/blog-recommendation/src/main/application/services.xml index c81cbde6027..d58211bb8c9 100644 --- a/sample-apps/blog-recommendation/src/main/application/services.xml +++ b/sample-apps/blog-recommendation/src/main/application/services.xml @@ -1,4 +1,5 @@ <?xml version='1.0' encoding='UTF-8'?> +<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <services version='1.0' xmlns:deploy="vespa" xmlns:preprocess="properties"> <jdisc id='default' version='1.0'> diff --git a/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/BlogTensorSearcher.java b/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/BlogTensorSearcher.java index 3a2a3df455f..44fdcef583b 100644 --- a/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/BlogTensorSearcher.java +++ b/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/BlogTensorSearcher.java @@ -1,3 +1,4 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.example; import com.yahoo.data.access.Inspectable; diff --git a/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/UserProfileSearcher.java b/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/UserProfileSearcher.java index eead82d7299..aca2d2b9cce 100644 --- a/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/UserProfileSearcher.java +++ b/sample-apps/blog-recommendation/src/main/java/com/yahoo/example/UserProfileSearcher.java @@ -1,3 +1,4 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.example; import com.yahoo.data.access.Inspectable; diff --git a/sample-apps/blog-recommendation/src/pig/feed_content_and_tensor_vespa.pig b/sample-apps/blog-recommendation/src/pig/feed_content_and_tensor_vespa.pig index 9a536f38779..70d14dc1cc1 100644 --- a/sample-apps/blog-recommendation/src/pig/feed_content_and_tensor_vespa.pig +++ b/sample-apps/blog-recommendation/src/pig/feed_content_and_tensor_vespa.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. REGISTER vespa-hadoop.jar -- Create valid Vespa put operations diff --git a/sample-apps/blog-recommendation/src/pig/feed_content_vespa.pig b/sample-apps/blog-recommendation/src/pig/feed_content_vespa.pig index 59b173e16f4..d6379649789 100644 --- a/sample-apps/blog-recommendation/src/pig/feed_content_vespa.pig +++ b/sample-apps/blog-recommendation/src/pig/feed_content_vespa.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. REGISTER vespa-hadoop.jar -- UDF to create valid Vespa document operation in JSON format @@ -68,4 +69,4 @@ data_for_feed_json = FOREACH data_for_feed GENERATE VespaPutOperationDoc(*); -- STORE data_for_feed_json_sample INTO 'blog-sample'; -- Store into Vespa -STORE data_for_feed_json INTO '$ENDPOINT' USING VespaStorage();
\ No newline at end of file +STORE data_for_feed_json INTO '$ENDPOINT' USING VespaStorage(); diff --git a/sample-apps/blog-recommendation/src/pig/feed_user_item_cf_vespa.pig b/sample-apps/blog-recommendation/src/pig/feed_user_item_cf_vespa.pig index fd06394c3af..66cb6c5d5c6 100644 --- a/sample-apps/blog-recommendation/src/pig/feed_user_item_cf_vespa.pig +++ b/sample-apps/blog-recommendation/src/pig/feed_user_item_cf_vespa.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. REGISTER vespa-hadoop.jar -- Create valid Vespa put operations diff --git a/sample-apps/blog-recommendation/src/pig/generate_user_item_cf_dataset.pig b/sample-apps/blog-recommendation/src/pig/generate_user_item_cf_dataset.pig index 2e71dcbe9e3..feea22263d9 100644 --- a/sample-apps/blog-recommendation/src/pig/generate_user_item_cf_dataset.pig +++ b/sample-apps/blog-recommendation/src/pig/generate_user_item_cf_dataset.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -- Load data from any source - here we load using PigStorage data = LOAD 'blog-recommendation/trainPostsFinal' USING JsonLoader('date_gmt:chararray, language:chararray, author:chararray, url:chararray, title:chararray, blog:chararray, post_id:chararray, tags:{T:(tag_name:chararray)}, blogname:chararray, date:chararray, content:chararray, categories:{T:(category_name:chararray)}, likes:{T:(dt:chararray, uid:chararray)}'); diff --git a/sample-apps/blog-recommendation/src/pig/get_recommendations.pig b/sample-apps/blog-recommendation/src/pig/get_recommendations.pig index 00b03b0f49a..ad97e8361a2 100644 --- a/sample-apps/blog-recommendation/src/pig/get_recommendations.pig +++ b/sample-apps/blog-recommendation/src/pig/get_recommendations.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -- REGISTER $VESPA_HADOOP_JAR REGISTER vespa-hadoop.jar -- REGISTER parquet-pig-bundle-1.8.1.jar diff --git a/sample-apps/blog-recommendation/src/pig/tutorial_blog_popularity.pig b/sample-apps/blog-recommendation/src/pig/tutorial_blog_popularity.pig index 4dac36a717f..25c3b4ab711 100644 --- a/sample-apps/blog-recommendation/src/pig/tutorial_blog_popularity.pig +++ b/sample-apps/blog-recommendation/src/pig/tutorial_blog_popularity.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. REGISTER '$VESPA_HADOOP_JAR' -- UDF to create valid Vespa document operation in JSON format diff --git a/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_and_tensor_vespa.pig b/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_and_tensor_vespa.pig index 77943fd842a..b000037f25a 100644 --- a/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_and_tensor_vespa.pig +++ b/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_and_tensor_vespa.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. REGISTER '$VESPA_HADOOP_JAR' -- Create valid Vespa put operations diff --git a/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_vespa.pig b/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_vespa.pig index d20ccf505a9..6fff700881a 100644 --- a/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_vespa.pig +++ b/sample-apps/blog-recommendation/src/pig/tutorial_feed_content_vespa.pig @@ -1,3 +1,4 @@ +-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. REGISTER '$VESPA_HADOOP_JAR' -- REGISTER vespa-hadoop.jar diff --git a/sample-apps/blog-recommendation/src/spark/collaborative_filtering_example.scala b/sample-apps/blog-recommendation/src/spark/collaborative_filtering_example.scala index 1a2c8f92730..2118862f063 100644 --- a/sample-apps/blog-recommendation/src/spark/collaborative_filtering_example.scala +++ b/sample-apps/blog-recommendation/src/spark/collaborative_filtering_example.scala @@ -1,3 +1,4 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating diff --git a/sample-apps/blog-recommendation/src/spark/data_exploration.scala b/sample-apps/blog-recommendation/src/spark/data_exploration.scala index 228834cfb4b..d9f3abc9cd8 100644 --- a/sample-apps/blog-recommendation/src/spark/data_exploration.scala +++ b/sample-apps/blog-recommendation/src/spark/data_exploration.scala @@ -1,3 +1,4 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. // sc is an existing SparkContext. val sqlContext = new org.apache.spark.sql.SQLContext(sc) diff --git a/sample-apps/blog-recommendation/src/spark/expected_percentile.scala b/sample-apps/blog-recommendation/src/spark/expected_percentile.scala index 986a3eb79f4..b1fd05f6f04 100644 --- a/sample-apps/blog-recommendation/src/spark/expected_percentile.scala +++ b/sample-apps/blog-recommendation/src/spark/expected_percentile.scala @@ -1,3 +1,4 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. val test_file_path = "data/cv/test_set_exploded" val blog_recom_file_path = "data/recommendations" val size_recommendation_list = 100 @@ -36,4 +37,4 @@ val expected_percentile = joined_data. count($"post_id").as("number_read")). withColumn("expected_percentile", $"sum_percentile" / $"number_read") -expected_percentile.show()
\ No newline at end of file +expected_percentile.show() diff --git a/sample-apps/blog-recommendation/src/spark/full_dataset_cf.scala b/sample-apps/blog-recommendation/src/spark/full_dataset_cf.scala index 0b76e8b8b1c..405105eb663 100644 --- a/sample-apps/blog-recommendation/src/spark/full_dataset_cf.scala +++ b/sample-apps/blog-recommendation/src/spark/full_dataset_cf.scala @@ -1,3 +1,4 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating diff --git a/sample-apps/blog-recommendation/src/spark/train_test_set_division.scala b/sample-apps/blog-recommendation/src/spark/train_test_set_division.scala index 2fc67734386..224ea74cb4e 100644 --- a/sample-apps/blog-recommendation/src/spark/train_test_set_division.scala +++ b/sample-apps/blog-recommendation/src/spark/train_test_set_division.scala @@ -1,3 +1,4 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. import org.apache.spark.sql.functions.udf // Inputs @@ -42,4 +43,4 @@ test_set_exploded = test_set_exploded.unionAll(test_set_null.select("post_id").w // Write to disk training_set_exploded.rdd.map(x => x(0) + "\t" + x(1)).saveAsTextFile(training_file_path) -test_set_exploded.rdd.map(x => x(0) + "\t" + x(1)).saveAsTextFile(test_file_path)
\ No newline at end of file +test_set_exploded.rdd.map(x => x(0) + "\t" + x(1)).saveAsTextFile(test_file_path) |