diff options
author | Lester Solbakken <lesters@yahoo-inc.com> | 2017-03-17 12:29:41 +0100 |
---|---|---|
committer | Lester Solbakken <lesters@yahoo-inc.com> | 2017-03-17 12:29:41 +0100 |
commit | ef775d57f273a69f0de2cd52518cbd9260e55eac (patch) | |
tree | 5b89780e05f24c70b0158968fda5225822df9933 /vespa-hadoop/src/test/pig | |
parent | b09d7deb64e5a723c5a052dd2b1db225f632405f (diff) |
Renamce hadoop -> vespa-hadoop
Diffstat (limited to 'vespa-hadoop/src/test/pig')
-rw-r--r-- | vespa-hadoop/src/test/pig/feed_create_operations.pig | 23 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig | 18 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/feed_multiline_operations.pig | 14 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/feed_operations.pig | 10 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig | 13 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/feed_operations_xml.pig | 10 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/feed_visit_data.pig | 11 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/query.pig | 18 | ||||
-rw-r--r-- | vespa-hadoop/src/test/pig/query_alt_root.pig | 19 |
9 files changed, 136 insertions, 0 deletions
diff --git a/vespa-hadoop/src/test/pig/feed_create_operations.pig b/vespa-hadoop/src/test/pig/feed_create_operations.pig new file mode 100644 index 00000000000..2186935b59a --- /dev/null +++ b/vespa-hadoop/src/test/pig/feed_create_operations.pig @@ -0,0 +1,23 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Create valid Vespa put operations +DEFINE VespaPutOperation + com.yahoo.vespa.hadoop.pig.VespaDocumentOperation( + 'operation=put', + 'docid=id:<application>:metrics::<name>-<date>' + ); + +-- By default, VespaStorage assumes it's feeding valid Vespa operations +DEFINE VespaStorage + com.yahoo.vespa.hadoop.pig.VespaStorage(); + +-- Load tabular data +metrics = LOAD 'src/test/resources/tabular_data.csv' AS (date:chararray, name:chararray, value:int, application:chararray); + +-- Transform tabular data to a Vespa document operation JSON format +metrics = FOREACH metrics GENERATE VespaPutOperation(*); + +-- Store into Vespa +STORE metrics INTO '$ENDPOINT' USING VespaStorage(); + + diff --git a/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig b/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig new file mode 100644 index 00000000000..348616f00ad --- /dev/null +++ b/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig @@ -0,0 +1,18 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Transform tabular data to a Vespa document operation JSON format +-- as part of storing the data. +DEFINE VespaStorage + com.yahoo.vespa.hadoop.pig.VespaStorage( + 'create-document-operation=true', + 'operation=put', + 'docid=id:<application>:metrics::<name>-<date>' + ); + +-- Load tabular data +metrics = LOAD 'src/test/resources/tabular_data.csv' AS (date:chararray, name:chararray, value:int, application:chararray); + +-- Store into Vespa +STORE metrics INTO '$ENDPOINT' USING VespaStorage(); + + diff --git a/vespa-hadoop/src/test/pig/feed_multiline_operations.pig b/vespa-hadoop/src/test/pig/feed_multiline_operations.pig new file mode 100644 index 00000000000..e9efb36858b --- /dev/null +++ b/vespa-hadoop/src/test/pig/feed_multiline_operations.pig @@ -0,0 +1,14 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Define short name for VespaJsonLoader +DEFINE VespaJsonLoader com.yahoo.vespa.hadoop.pig.VespaSimpleJsonLoader(); + +-- Define short name for VespaStorage +DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); + +-- Load data - one column for json data +metrics = LOAD 'src/test/resources/operations_multiline_data.json' USING VespaJsonLoader() AS (data:chararray); + +-- Store into Vespa +STORE metrics INTO '$ENDPOINT' USING VespaStorage(); + diff --git a/vespa-hadoop/src/test/pig/feed_operations.pig b/vespa-hadoop/src/test/pig/feed_operations.pig new file mode 100644 index 00000000000..327181d4410 --- /dev/null +++ b/vespa-hadoop/src/test/pig/feed_operations.pig @@ -0,0 +1,10 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Define short name for VespaStorage +DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); + +-- Load data - one column for json data +metrics = LOAD 'src/test/resources/operations_data.json' AS (data:chararray); + +-- Store into Vespa +STORE metrics INTO '$ENDPOINT' USING VespaStorage();
\ No newline at end of file diff --git a/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig b/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig new file mode 100644 index 00000000000..6d31201e4eb --- /dev/null +++ b/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig @@ -0,0 +1,13 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Define short name for VespaJsonLoader +DEFINE VespaJsonLoader com.yahoo.vespa.hadoop.pig.VespaSimpleJsonLoader(); + +-- Define short name for VespaStorage +DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); + +-- Load data - one column for json data +metrics = LOAD 'src/test/resources/operations_data.json' USING VespaJsonLoader() AS (data:chararray); + +-- Store into Vespa +STORE metrics INTO '$ENDPOINT' USING VespaStorage();
\ No newline at end of file diff --git a/vespa-hadoop/src/test/pig/feed_operations_xml.pig b/vespa-hadoop/src/test/pig/feed_operations_xml.pig new file mode 100644 index 00000000000..d109d56ad1e --- /dev/null +++ b/vespa-hadoop/src/test/pig/feed_operations_xml.pig @@ -0,0 +1,10 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Define short name for VespaStorage +DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); + +-- Load data - one column for xml data +data = LOAD 'src/test/resources/operations_data.xml' AS (data:chararray); + +-- Store into Vespa +STORE data INTO '$ENDPOINT' USING VespaStorage();
\ No newline at end of file diff --git a/vespa-hadoop/src/test/pig/feed_visit_data.pig b/vespa-hadoop/src/test/pig/feed_visit_data.pig new file mode 100644 index 00000000000..14010c38336 --- /dev/null +++ b/vespa-hadoop/src/test/pig/feed_visit_data.pig @@ -0,0 +1,11 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Define short name for VespaStorage +DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); + +-- Load data - one column for json data +metrics = LOAD 'src/test/resources/visit_data.json' AS (data:chararray); + +-- Store into Vespa +STORE metrics INTO '$ENDPOINT' USING VespaStorage(); + diff --git a/vespa-hadoop/src/test/pig/query.pig b/vespa-hadoop/src/test/pig/query.pig new file mode 100644 index 00000000000..70f53a992e2 --- /dev/null +++ b/vespa-hadoop/src/test/pig/query.pig @@ -0,0 +1,18 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Define Vespa query for retrieving blog posts +DEFINE BlogPostRecommendations + com.yahoo.vespa.hadoop.pig.VespaQuery( + 'query=$ENDPOINT/search?query=<userid>&hits=100', + 'schema=rank:int,id:chararray,relevance:double,fields/id:chararray,fields/content:chararray' + ); + +-- Load data from a local file +users = LOAD 'src/test/resources/user_ids.csv' AS (userid:chararray); +users = FILTER users BY userid IS NOT null; + +-- Run a set of queries against Vespa +recommendations = FOREACH users GENERATE userid, FLATTEN(BlogPostRecommendations(*)); + +-- Output recommendations +DUMP recommendations; diff --git a/vespa-hadoop/src/test/pig/query_alt_root.pig b/vespa-hadoop/src/test/pig/query_alt_root.pig new file mode 100644 index 00000000000..8995990e398 --- /dev/null +++ b/vespa-hadoop/src/test/pig/query_alt_root.pig @@ -0,0 +1,19 @@ +-- REGISTER vespa-hadoop.jar -- Not needed in tests + +-- Define Vespa query for retrieving blog posts +DEFINE BlogPostRecommendations + com.yahoo.vespa.hadoop.pig.VespaQuery( + 'query=$ENDPOINT/search?query=<userid>&hits=100', + 'rootnode=root/children/children', + 'schema=rank:int,id:chararray,relevance:double,fields/id:chararray,fields/content:chararray' + ); + +-- Load data from a local file +users = LOAD 'src/test/resources/user_ids.csv' AS (userid:chararray); +users = FILTER users BY userid IS NOT null; + +-- Run a set of queries against Vespa +recommendations = FOREACH users GENERATE userid, FLATTEN(BlogPostRecommendations(*)); + +-- Output recommendations +DUMP recommendations; |