summaryrefslogtreecommitdiffstats
path: root/vespa-hadoop/src/main/java/com/yahoo/vespa/hadoop/mapreduce/util/VespaQuerySchema.java
blob: 0208b4165d308a9fae9d2a1dbf80ca8a1e563236 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package com.yahoo.vespa.hadoop.mapreduce.util;

import com.fasterxml.jackson.databind.JsonNode;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.parser.ParserException;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class VespaQuerySchema implements Iterable<VespaQuerySchema.AliasTypePair> {

    private final List<AliasTypePair> tupleSchema = new ArrayList<>();

    public VespaQuerySchema(String schema) {
        for (String e : schema.split(",")) {
            String[] pair = e.split(":");
            String alias = pair[0].trim();
            String type = pair[1].trim();
            tupleSchema.add(new AliasTypePair(alias, type));
        }
    }

    public Tuple buildTuple(int rank, JsonNode hit) {
        Tuple tuple = TupleFactory.getInstance().newTuple();

        for (VespaQuerySchema.AliasTypePair tupleElement : tupleSchema) {
            String alias = tupleElement.getAlias();
            Byte type = DataType.findTypeByName(tupleElement.getType());

            // reserved word
            if ("rank".equals(alias)) {
                tuple.append(rank);
            } else {
                JsonNode field = hit;
                String[] path = alias.split("/"); // move outside
                for (String p : path) {
                    field = field.get(p);
                    if (field == null) {
                        type = DataType.NULL; // effectively skip field as it is not found
                        break;
                    }
                }
                switch (type) {
                    case DataType.BOOLEAN:
                        tuple.append(field.asBoolean());
                        break;
                    case DataType.INTEGER:
                        tuple.append(field.asInt());
                        break;
                    case DataType.LONG:
                        tuple.append(field.asLong());
                        break;
                    case DataType.FLOAT:
                    case DataType.DOUBLE:
                        tuple.append(field.asDouble());
                        break;
                    case DataType.DATETIME:
                        tuple.append(field.asText());
                        break;
                    case DataType.CHARARRAY:
                        tuple.append(field.asText());
                        break;
                    default:
                        // the rest of the data types are currently not supported
                }
            }
        }
        return tuple;
    }

    public static Schema getPigSchema(String schemaString) {
        Schema schema = null;
        schemaString = schemaString.replace("/", "_");
        schemaString = "{(" + schemaString + ")}";
        try {
            schema = Utils.getSchemaFromString(schemaString);
        } catch (ParserException e) {
            e.printStackTrace();
        }
        return schema;
    }

    @Override
    public Iterator<AliasTypePair> iterator() {
        return tupleSchema.iterator();
    }


    public static class AliasTypePair {
        private final String alias;
        private final String type;

        AliasTypePair(String alias, String type) {
            this.alias = alias;
            this.type = type;
        }

        public String getAlias() {
            return alias;
        }

        public String getType() {
            return type;
        }

    }

}