aboutsummaryrefslogtreecommitdiffstats
path: root/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/pig/VespaSimpleJsonLoader.java
blob: 66f04be657fd82e7417d7461f9b2dc1b4d2462e7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package com.yahoo.vespa.hadoop.pig;

import com.yahoo.vespa.hadoop.mapreduce.VespaSimpleJsonInputFormat;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.pig.LoadFunc;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;

import java.io.IOException;

/**
 * Simple JSON loader which loads either one JSON object per line or a
 * multiline JSON consisting of objects in an array.
 *
 * Returns only the textual representation of the JSON object.
 *
 * @author lesters
 */
@SuppressWarnings("rawtypes")
public class VespaSimpleJsonLoader extends LoadFunc {

    private TupleFactory tupleFactory = TupleFactory.getInstance();
    private VespaSimpleJsonInputFormat.VespaJsonRecordReader recordReader;

    @Override
    public void setLocation(String location, Job job) throws IOException {
        FileInputFormat.setInputPaths(job, location);
    }

    @Override
    public InputFormat getInputFormat() throws IOException {
        return new VespaSimpleJsonInputFormat();
    }

    @Override
    public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
        recordReader = (VespaSimpleJsonInputFormat.VespaJsonRecordReader) reader;
    }

    @Override
    public Tuple getNext() throws IOException {
        try {
            boolean done = recordReader.nextKeyValue();
            if (done) {
                return null;
            }
            Text json = recordReader.getCurrentKey();
            if (json == null) {
                return null;
            }
            return tupleFactory.newTuple(json.toString());

        } catch (InterruptedException ex) {
            return null;
        }
    }
}