summaryrefslogtreecommitdiffstats
path: root/vespa-hadoop/src/test/pig
diff options
context:
space:
mode:
authorLester Solbakken <lesters@yahoo-inc.com>2017-03-17 12:29:41 +0100
committerLester Solbakken <lesters@yahoo-inc.com>2017-03-17 12:29:41 +0100
commitef775d57f273a69f0de2cd52518cbd9260e55eac (patch)
tree5b89780e05f24c70b0158968fda5225822df9933 /vespa-hadoop/src/test/pig
parentb09d7deb64e5a723c5a052dd2b1db225f632405f (diff)
Renamce hadoop -> vespa-hadoop
Diffstat (limited to 'vespa-hadoop/src/test/pig')
-rw-r--r--vespa-hadoop/src/test/pig/feed_create_operations.pig23
-rw-r--r--vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig18
-rw-r--r--vespa-hadoop/src/test/pig/feed_multiline_operations.pig14
-rw-r--r--vespa-hadoop/src/test/pig/feed_operations.pig10
-rw-r--r--vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig13
-rw-r--r--vespa-hadoop/src/test/pig/feed_operations_xml.pig10
-rw-r--r--vespa-hadoop/src/test/pig/feed_visit_data.pig11
-rw-r--r--vespa-hadoop/src/test/pig/query.pig18
-rw-r--r--vespa-hadoop/src/test/pig/query_alt_root.pig19
9 files changed, 136 insertions, 0 deletions
diff --git a/vespa-hadoop/src/test/pig/feed_create_operations.pig b/vespa-hadoop/src/test/pig/feed_create_operations.pig
new file mode 100644
index 00000000000..2186935b59a
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/feed_create_operations.pig
@@ -0,0 +1,23 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Create valid Vespa put operations
+DEFINE VespaPutOperation
+ com.yahoo.vespa.hadoop.pig.VespaDocumentOperation(
+ 'operation=put',
+ 'docid=id:<application>:metrics::<name>-<date>'
+ );
+
+-- By default, VespaStorage assumes it's feeding valid Vespa operations
+DEFINE VespaStorage
+ com.yahoo.vespa.hadoop.pig.VespaStorage();
+
+-- Load tabular data
+metrics = LOAD 'src/test/resources/tabular_data.csv' AS (date:chararray, name:chararray, value:int, application:chararray);
+
+-- Transform tabular data to a Vespa document operation JSON format
+metrics = FOREACH metrics GENERATE VespaPutOperation(*);
+
+-- Store into Vespa
+STORE metrics INTO '$ENDPOINT' USING VespaStorage();
+
+
diff --git a/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig b/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig
new file mode 100644
index 00000000000..348616f00ad
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig
@@ -0,0 +1,18 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Transform tabular data to a Vespa document operation JSON format
+-- as part of storing the data.
+DEFINE VespaStorage
+ com.yahoo.vespa.hadoop.pig.VespaStorage(
+ 'create-document-operation=true',
+ 'operation=put',
+ 'docid=id:<application>:metrics::<name>-<date>'
+ );
+
+-- Load tabular data
+metrics = LOAD 'src/test/resources/tabular_data.csv' AS (date:chararray, name:chararray, value:int, application:chararray);
+
+-- Store into Vespa
+STORE metrics INTO '$ENDPOINT' USING VespaStorage();
+
+
diff --git a/vespa-hadoop/src/test/pig/feed_multiline_operations.pig b/vespa-hadoop/src/test/pig/feed_multiline_operations.pig
new file mode 100644
index 00000000000..e9efb36858b
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/feed_multiline_operations.pig
@@ -0,0 +1,14 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Define short name for VespaJsonLoader
+DEFINE VespaJsonLoader com.yahoo.vespa.hadoop.pig.VespaSimpleJsonLoader();
+
+-- Define short name for VespaStorage
+DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage();
+
+-- Load data - one column for json data
+metrics = LOAD 'src/test/resources/operations_multiline_data.json' USING VespaJsonLoader() AS (data:chararray);
+
+-- Store into Vespa
+STORE metrics INTO '$ENDPOINT' USING VespaStorage();
+
diff --git a/vespa-hadoop/src/test/pig/feed_operations.pig b/vespa-hadoop/src/test/pig/feed_operations.pig
new file mode 100644
index 00000000000..327181d4410
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/feed_operations.pig
@@ -0,0 +1,10 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Define short name for VespaStorage
+DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage();
+
+-- Load data - one column for json data
+metrics = LOAD 'src/test/resources/operations_data.json' AS (data:chararray);
+
+-- Store into Vespa
+STORE metrics INTO '$ENDPOINT' USING VespaStorage(); \ No newline at end of file
diff --git a/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig b/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig
new file mode 100644
index 00000000000..6d31201e4eb
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig
@@ -0,0 +1,13 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Define short name for VespaJsonLoader
+DEFINE VespaJsonLoader com.yahoo.vespa.hadoop.pig.VespaSimpleJsonLoader();
+
+-- Define short name for VespaStorage
+DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage();
+
+-- Load data - one column for json data
+metrics = LOAD 'src/test/resources/operations_data.json' USING VespaJsonLoader() AS (data:chararray);
+
+-- Store into Vespa
+STORE metrics INTO '$ENDPOINT' USING VespaStorage(); \ No newline at end of file
diff --git a/vespa-hadoop/src/test/pig/feed_operations_xml.pig b/vespa-hadoop/src/test/pig/feed_operations_xml.pig
new file mode 100644
index 00000000000..d109d56ad1e
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/feed_operations_xml.pig
@@ -0,0 +1,10 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Define short name for VespaStorage
+DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage();
+
+-- Load data - one column for xml data
+data = LOAD 'src/test/resources/operations_data.xml' AS (data:chararray);
+
+-- Store into Vespa
+STORE data INTO '$ENDPOINT' USING VespaStorage(); \ No newline at end of file
diff --git a/vespa-hadoop/src/test/pig/feed_visit_data.pig b/vespa-hadoop/src/test/pig/feed_visit_data.pig
new file mode 100644
index 00000000000..14010c38336
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/feed_visit_data.pig
@@ -0,0 +1,11 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Define short name for VespaStorage
+DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage();
+
+-- Load data - one column for json data
+metrics = LOAD 'src/test/resources/visit_data.json' AS (data:chararray);
+
+-- Store into Vespa
+STORE metrics INTO '$ENDPOINT' USING VespaStorage();
+
diff --git a/vespa-hadoop/src/test/pig/query.pig b/vespa-hadoop/src/test/pig/query.pig
new file mode 100644
index 00000000000..70f53a992e2
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/query.pig
@@ -0,0 +1,18 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Define Vespa query for retrieving blog posts
+DEFINE BlogPostRecommendations
+ com.yahoo.vespa.hadoop.pig.VespaQuery(
+ 'query=$ENDPOINT/search?query=<userid>&hits=100',
+ 'schema=rank:int,id:chararray,relevance:double,fields/id:chararray,fields/content:chararray'
+ );
+
+-- Load data from a local file
+users = LOAD 'src/test/resources/user_ids.csv' AS (userid:chararray);
+users = FILTER users BY userid IS NOT null;
+
+-- Run a set of queries against Vespa
+recommendations = FOREACH users GENERATE userid, FLATTEN(BlogPostRecommendations(*));
+
+-- Output recommendations
+DUMP recommendations;
diff --git a/vespa-hadoop/src/test/pig/query_alt_root.pig b/vespa-hadoop/src/test/pig/query_alt_root.pig
new file mode 100644
index 00000000000..8995990e398
--- /dev/null
+++ b/vespa-hadoop/src/test/pig/query_alt_root.pig
@@ -0,0 +1,19 @@
+-- REGISTER vespa-hadoop.jar -- Not needed in tests
+
+-- Define Vespa query for retrieving blog posts
+DEFINE BlogPostRecommendations
+ com.yahoo.vespa.hadoop.pig.VespaQuery(
+ 'query=$ENDPOINT/search?query=<userid>&hits=100',
+ 'rootnode=root/children/children',
+ 'schema=rank:int,id:chararray,relevance:double,fields/id:chararray,fields/content:chararray'
+ );
+
+-- Load data from a local file
+users = LOAD 'src/test/resources/user_ids.csv' AS (userid:chararray);
+users = FILTER users BY userid IS NOT null;
+
+-- Run a set of queries against Vespa
+recommendations = FOREACH users GENERATE userid, FLATTEN(BlogPostRecommendations(*));
+
+-- Output recommendations
+DUMP recommendations;