blob: 0208b4165d308a9fae9d2a1dbf80ca8a1e563236 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
package com.yahoo.vespa.hadoop.mapreduce.util;
import com.fasterxml.jackson.databind.JsonNode;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.parser.ParserException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class VespaQuerySchema implements Iterable<VespaQuerySchema.AliasTypePair> {
private final List<AliasTypePair> tupleSchema = new ArrayList<>();
public VespaQuerySchema(String schema) {
for (String e : schema.split(",")) {
String[] pair = e.split(":");
String alias = pair[0].trim();
String type = pair[1].trim();
tupleSchema.add(new AliasTypePair(alias, type));
}
}
public Tuple buildTuple(int rank, JsonNode hit) {
Tuple tuple = TupleFactory.getInstance().newTuple();
for (VespaQuerySchema.AliasTypePair tupleElement : tupleSchema) {
String alias = tupleElement.getAlias();
Byte type = DataType.findTypeByName(tupleElement.getType());
// reserved word
if ("rank".equals(alias)) {
tuple.append(rank);
} else {
JsonNode field = hit;
String[] path = alias.split("/"); // move outside
for (String p : path) {
field = field.get(p);
if (field == null) {
type = DataType.NULL; // effectively skip field as it is not found
break;
}
}
switch (type) {
case DataType.BOOLEAN:
tuple.append(field.asBoolean());
break;
case DataType.INTEGER:
tuple.append(field.asInt());
break;
case DataType.LONG:
tuple.append(field.asLong());
break;
case DataType.FLOAT:
case DataType.DOUBLE:
tuple.append(field.asDouble());
break;
case DataType.DATETIME:
tuple.append(field.asText());
break;
case DataType.CHARARRAY:
tuple.append(field.asText());
break;
default:
// the rest of the data types are currently not supported
}
}
}
return tuple;
}
public static Schema getPigSchema(String schemaString) {
Schema schema = null;
schemaString = schemaString.replace("/", "_");
schemaString = "{(" + schemaString + ")}";
try {
schema = Utils.getSchemaFromString(schemaString);
} catch (ParserException e) {
e.printStackTrace();
}
return schema;
}
@Override
public Iterator<AliasTypePair> iterator() {
return tupleSchema.iterator();
}
public static class AliasTypePair {
private final String alias;
private final String type;
AliasTypePair(String alias, String type) {
this.alias = alias;
this.type = type;
}
public String getAlias() {
return alias;
}
public String getType() {
return type;
}
}
}
|