blob: 3b4c1b432bf1337b82b0567306f2340538f17daa (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage.expressions;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.yahoo.document.ArrayDataType;
import com.yahoo.document.DataType;
import com.yahoo.document.DocumentType;
import com.yahoo.document.Field;
import com.yahoo.document.datatypes.IntegerFieldValue;
import com.yahoo.document.datatypes.LongFieldValue;
import com.yahoo.document.datatypes.StringFieldValue;
import java.nio.charset.StandardCharsets;
/**
* Hashes a string value to a long or int (by type inference on the target value).
*
* @author bratseth
*/
public class HashExpression extends Expression {
private final HashFunction hasher = Hashing.sipHash24();
/** The target *primitive* type we are hashing into. */
private DataType targetType;
public HashExpression() {
super(DataType.STRING);
}
@Override
public void setStatementOutput(DocumentType documentType, Field field) {
if ( ! canStoreHash(field.getDataType()))
throw new IllegalArgumentException("Cannot use the hash function on an indexing statement for " +
field.getName() +
": The hash function can only be used when the target field " +
"is int or long or an array of int or long, not " + field.getDataType());
targetType = primitiveTypeOf(field.getDataType());
}
@Override
protected void doExecute(ExecutionContext context) {
StringFieldValue input = (StringFieldValue) context.getValue();
if (targetType.equals(DataType.INT))
context.setValue(new IntegerFieldValue(hashToInt(input.getString())));
else if (targetType.equals(DataType.LONG))
context.setValue(new LongFieldValue(hashToLong(input.getString())));
else
throw new IllegalStateException(); // won't happen
}
private int hashToInt(String value) {
return hasher.hashString(value, StandardCharsets.UTF_8).asInt();
}
private long hashToLong(String value) {
return hasher.hashString(value, StandardCharsets.UTF_8).asLong();
}
@Override
protected void doVerify(VerificationContext context) {
String outputField = context.getOutputField();
if (outputField == null)
throw new VerificationException(this, "No output field in this statement: " +
"Don't know what value to hash to");
DataType outputFieldType = context.getInputType(this, outputField);
if ( ! canStoreHash(outputFieldType))
throw new VerificationException(this, "The type of the output field " + outputField +
" is not int or long but " + outputFieldType);
targetType = primitiveTypeOf(outputFieldType);
context.setValueType(createdOutputType());
}
private boolean canStoreHash(DataType type) {
if (type.equals(DataType.INT)) return true;
if (type.equals(DataType.LONG)) return true;
if (type instanceof ArrayDataType) return canStoreHash(((ArrayDataType)type).getNestedType());
return false;
}
private static DataType primitiveTypeOf(DataType type) {
if (type instanceof ArrayDataType) return ((ArrayDataType)type).getNestedType();
return type;
}
@Override
public DataType createdOutputType() { return targetType; }
@Override
public String toString() { return "hash"; }
@Override
public int hashCode() { return 987; }
@Override
public boolean equals(Object o) { return o instanceof HashExpression; }
}
|