summaryrefslogtreecommitdiffstats
path: root/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/pig/VespaSimpleJsonLoader.java
diff options
context:
space:
mode:
Diffstat (limited to 'vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/pig/VespaSimpleJsonLoader.java')
-rw-r--r--vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/pig/VespaSimpleJsonLoader.java62
1 files changed, 62 insertions, 0 deletions
diff --git a/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/pig/VespaSimpleJsonLoader.java b/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/pig/VespaSimpleJsonLoader.java
new file mode 100644
index 00000000000..66f04be657f
--- /dev/null
+++ b/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/pig/VespaSimpleJsonLoader.java
@@ -0,0 +1,62 @@
+package com.yahoo.vespa.hadoop.pig;
+
+import com.yahoo.vespa.hadoop.mapreduce.VespaSimpleJsonInputFormat;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.pig.LoadFunc;
+import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+import java.io.IOException;
+
+/**
+ * Simple JSON loader which loads either one JSON object per line or a
+ * multiline JSON consisting of objects in an array.
+ *
+ * Returns only the textual representation of the JSON object.
+ *
+ * @author lesters
+ */
+@SuppressWarnings("rawtypes")
+public class VespaSimpleJsonLoader extends LoadFunc {
+
+ private TupleFactory tupleFactory = TupleFactory.getInstance();
+ private VespaSimpleJsonInputFormat.VespaJsonRecordReader recordReader;
+
+ @Override
+ public void setLocation(String location, Job job) throws IOException {
+ FileInputFormat.setInputPaths(job, location);
+ }
+
+ @Override
+ public InputFormat getInputFormat() throws IOException {
+ return new VespaSimpleJsonInputFormat();
+ }
+
+ @Override
+ public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
+ recordReader = (VespaSimpleJsonInputFormat.VespaJsonRecordReader) reader;
+ }
+
+ @Override
+ public Tuple getNext() throws IOException {
+ try {
+ boolean done = recordReader.nextKeyValue();
+ if (done) {
+ return null;
+ }
+ Text json = recordReader.getCurrentKey();
+ if (json == null) {
+ return null;
+ }
+ return tupleFactory.newTuple(json.toString());
+
+ } catch (InterruptedException ex) {
+ return null;
+ }
+ }
+}