summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--container-search/src/main/java/com/yahoo/search/Query.java15
-rw-r--r--container-search/src/main/java/com/yahoo/vespa/streamingvisitors/QueryEncoder.java90
-rw-r--r--container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java8
-rw-r--r--document/src/main/java/com/yahoo/document/datatypes/Array.java5
-rw-r--r--indexinglanguage/pom.xml1
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java95
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java1
-rw-r--r--indexinglanguage/src/main/javacc/IndexingParser.jj13
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java46
-rw-r--r--vespajlib/src/main/java/com/yahoo/collections/BobHash.java51
10 files changed, 274 insertions, 51 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/Query.java b/container-search/src/main/java/com/yahoo/search/Query.java
index 623c38fa9f0..77cd4f3b292 100644
--- a/container-search/src/main/java/com/yahoo/search/Query.java
+++ b/container-search/src/main/java/com/yahoo/search/Query.java
@@ -1048,6 +1048,7 @@ public class Query extends com.yahoo.processing.Request implements Cloneable {
return new SessionId(requestId, getRanking().getProfile());
}
+ @Deprecated // TODO: Remove on Vespa 8
public boolean hasEncodableProperties() {
if ( ! ranking.getProperties().isEmpty()) return true;
if ( ! ranking.getFeatures().isEmpty()) return true;
@@ -1064,39 +1065,29 @@ public class Query extends com.yahoo.processing.Request implements Cloneable {
* @param buffer the buffer to encode to
* @param encodeQueryData true to encode all properties, false to only include session information, not actual query data
* @return the encoded length
+ * @deprecated do not use
*/
+ @Deprecated // TODO: Remove on Vespa 8
public int encodeAsProperties(ByteBuffer buffer, boolean encodeQueryData) {
// Make sure we don't encode anything here if we have turned the property feature off
// Due to sendQuery we sometimes end up turning this feature on and then encoding a 0 int as the number of
// property maps - that's ok (probably we should simplify by just always turning the feature on)
if (! hasEncodableProperties()) return 0;
-
int start = buffer.position();
-
int mapCountPosition = buffer.position();
buffer.putInt(0); // map count will go here
-
int mapCount = 0;
-
- // TODO: Push down
mapCount += ranking.getProperties().encode(buffer, encodeQueryData);
if (encodeQueryData) {
mapCount += ranking.getFeatures().encode(buffer);
-
- // TODO: Push down
if (presentation.getHighlight() != null) {
mapCount += MapEncoder.encodeMultiMap(Highlight.HIGHLIGHTTERMS, presentation.getHighlight().getHighlightTerms(), buffer);
}
-
- // TODO: Push down
mapCount += MapEncoder.encodeMap("model", createModelMap(), buffer);
}
mapCount += MapEncoder.encodeSingleValue(DocumentDatabase.MATCH_PROPERTY, DocumentDatabase.SEARCH_DOC_TYPE_KEY, model.getDocumentDb(), buffer);
-
mapCount += MapEncoder.encodeMap("caches", createCacheSettingMap(), buffer);
-
buffer.putInt(mapCountPosition, mapCount);
-
return buffer.position() - start;
}
diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/QueryEncoder.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/QueryEncoder.java
new file mode 100644
index 00000000000..e252a230d4f
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/QueryEncoder.java
@@ -0,0 +1,90 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.streamingvisitors;
+
+import com.yahoo.fs4.MapEncoder;
+import com.yahoo.prelude.fastsearch.DocumentDatabase;
+import com.yahoo.prelude.query.Highlight;
+import com.yahoo.search.Query;
+import com.yahoo.search.dispatch.rpc.ProtobufSerialization;
+
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Encodes the query in binary form.
+ *
+ * @author bratseth
+ */
+class QueryEncoder {
+
+ /**
+ * Encodes properties of this query.
+ *
+ * @param buffer the buffer to encode to
+ * @param encodeQueryData true to encode all properties, false to only include session information, not actual query data
+ * @return the encoded length
+ */
+ static int encodeAsProperties(Query query, ByteBuffer buffer, boolean encodeQueryData) {
+ // Make sure we don't encode anything here if we have turned the property feature off
+ // Due to sendQuery we sometimes end up turning this feature on and then encoding a 0 int as the number of
+ // property maps - that's ok (probably we should simplify by just always turning the feature on)
+ if (! hasEncodableProperties(query)) return 0;
+
+ int start = buffer.position();
+ int mapCountPosition = buffer.position();
+ buffer.putInt(0); // map count will go here
+ int mapCount = 0;
+ mapCount += query.getRanking().getProperties().encode(buffer, encodeQueryData);
+ if (encodeQueryData) {
+ mapCount += query.getRanking().getFeatures().encode(buffer);
+ if (query.getPresentation().getHighlight() != null) {
+ mapCount += MapEncoder.encodeMultiMap(Highlight.HIGHLIGHTTERMS,
+ query.getPresentation().getHighlight().getHighlightTerms(), buffer);
+ }
+ mapCount += MapEncoder.encodeMap("model", createModelMap(query), buffer);
+ }
+ mapCount += MapEncoder.encodeSingleValue(DocumentDatabase.MATCH_PROPERTY, DocumentDatabase.SEARCH_DOC_TYPE_KEY,
+ query.getModel().getDocumentDb(), buffer);
+ mapCount += MapEncoder.encodeMap("caches", createCacheSettingMap(query), buffer);
+ buffer.putInt(mapCountPosition, mapCount);
+ return buffer.position() - start;
+ }
+
+ static boolean hasEncodableProperties(Query query) {
+ if ( ! query.getRanking().getProperties().isEmpty()) return true;
+ if ( ! query.getRanking().getFeatures().isEmpty()) return true;
+ if ( query.getRanking().getFreshness() != null) return true;
+ if ( query.getModel().getSearchPath() != null) return true;
+ if ( query.getModel().getDocumentDb() != null) return true;
+ if ( query.getPresentation().getHighlight() != null &&
+ ! query.getPresentation().getHighlight().getHighlightItems().isEmpty()) return true;
+ return false;
+ }
+
+ private static Map<String, Boolean> createCacheSettingMap(Query query) {
+ if (query.getGroupingSessionCache() && query.getRanking().getQueryCache()) {
+ Map<String, Boolean> cacheSettingMap = new HashMap<>();
+ cacheSettingMap.put("grouping", true);
+ cacheSettingMap.put("query", true);
+ return cacheSettingMap;
+ }
+ if (query.getGroupingSessionCache())
+ return Collections.singletonMap("grouping", true);
+ if (query.getRanking().getQueryCache())
+ return Collections.singletonMap("query", true);
+ return Collections.emptyMap();
+ }
+
+ private static Map<String, String> createModelMap(Query query) {
+ Map<String, String> m = new HashMap<>();
+ if (query.getModel().getSearchPath() != null) m.put("searchpath", query.getModel().getSearchPath());
+
+ int traceLevel = ProtobufSerialization.getTraceLevelForBackend(query);
+ if (traceLevel > 0) m.put("tracelevel", String.valueOf(traceLevel));
+
+ return m;
+ }
+
+}
diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java
index e2233d51ae4..b2e4821f164 100644
--- a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java
+++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java
@@ -3,13 +3,10 @@ package com.yahoo.vespa.streamingvisitors;
import com.yahoo.document.select.parser.ParseException;
import com.yahoo.documentapi.AckToken;
-import com.yahoo.documentapi.DocumentAccess;
import com.yahoo.documentapi.VisitorControlHandler;
import com.yahoo.documentapi.VisitorDataHandler;
import com.yahoo.documentapi.VisitorParameters;
import com.yahoo.documentapi.VisitorSession;
-import com.yahoo.documentapi.messagebus.MessageBusDocumentAccess;
-import com.yahoo.documentapi.messagebus.MessageBusParams;
import com.yahoo.documentapi.messagebus.loadtypes.LoadType;
import com.yahoo.documentapi.messagebus.loadtypes.LoadTypeSet;
import com.yahoo.documentapi.messagebus.protocol.DocumentProtocol;
@@ -41,7 +38,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Logger;
/**
@@ -187,7 +183,7 @@ class VdsVisitor extends VisitorDataHandler implements Visitor {
params.setLibraryParameter("location", af);
}
- if (query.hasEncodableProperties()) {
+ if (QueryEncoder.hasEncodableProperties(query)) {
encodeQueryData(query, 1, ed);
params.setLibraryParameter("rankproperties", ed.getEncodedData());
}
@@ -254,7 +250,7 @@ class VdsVisitor extends VisitorDataHandler implements Visitor {
ed.setReturned(query.getModel().getQueryTree().getRoot().encode(buf));
break;
case 1:
- ed.setReturned(query.encodeAsProperties(buf, true));
+ ed.setReturned(QueryEncoder.encodeAsProperties(query, buf, true));
break;
case 2:
throw new IllegalArgumentException("old aggregation no longer exists!");
diff --git a/document/src/main/java/com/yahoo/document/datatypes/Array.java b/document/src/main/java/com/yahoo/document/datatypes/Array.java
index 11a8eb7a350..672690bafad 100644
--- a/document/src/main/java/com/yahoo/document/datatypes/Array.java
+++ b/document/src/main/java/com/yahoo/document/datatypes/Array.java
@@ -21,7 +21,7 @@ import java.util.ListIterator;
import java.util.RandomAccess;
/**
- * FieldValue which encapsulates a Array value
+ * FieldValue which encapsulates an Array value
*
* @author Einar M R Rosenvinge
*/
@@ -42,8 +42,7 @@ public final class Array<T extends FieldValue> extends CollectionFieldValue<T> i
this(type);
for (T v : values) {
if (!((ArrayDataType)type).getNestedType().isValueCompatible(v)) {
- throw new IllegalArgumentException("FieldValue " + v +
- " is not compatible with " + type + ".");
+ throw new IllegalArgumentException("FieldValue " + v + " is not compatible with " + type + ".");
}
}
this.values.addAll(values);
diff --git a/indexinglanguage/pom.xml b/indexinglanguage/pom.xml
index efca7479faf..f9ee18a4602 100644
--- a/indexinglanguage/pom.xml
+++ b/indexinglanguage/pom.xml
@@ -47,7 +47,6 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
- <scope>test</scope>
</dependency>
</dependencies>
<build>
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java
new file mode 100644
index 00000000000..5b04720dad4
--- /dev/null
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java
@@ -0,0 +1,95 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.indexinglanguage.expressions;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+import com.yahoo.document.ArrayDataType;
+import com.yahoo.document.DataType;
+import com.yahoo.document.DocumentType;
+import com.yahoo.document.Field;
+import com.yahoo.document.datatypes.IntegerFieldValue;
+import com.yahoo.document.datatypes.LongFieldValue;
+import com.yahoo.document.datatypes.StringFieldValue;
+
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Hashes a string value to a long or int (by type inference on the target value).
+ *
+ * @author bratseth
+ */
+public class HashExpression extends Expression {
+
+ private final HashFunction hasher = Hashing.sipHash24();
+
+ /** The target type we are hashing into. */
+ private DataType targetType;
+
+ public HashExpression() {
+ super(DataType.STRING);
+ }
+
+ @Override
+ public void setStatementOutput(DocumentType documentType, Field field) {
+ if ( ! canStoreHash(field.getDataType()))
+ throw new IllegalArgumentException("Cannot use the hash function on an indexing statement for " +
+ field.getName() +
+ ": The hash function can only be used when the target field " +
+ "is int or long, not " + field.getDataType());
+ targetType = field.getDataType();
+ }
+
+ @Override
+ protected void doExecute(ExecutionContext context) {
+ StringFieldValue input = (StringFieldValue) context.getValue();
+ if (targetType.equals(DataType.INT))
+ context.setValue(new IntegerFieldValue(hashToInt(input.getString())));
+ else if (targetType.equals(DataType.LONG))
+ context.setValue(new LongFieldValue(hashToLong(input.getString())));
+ else
+ throw new IllegalStateException(); // won't happen
+ }
+
+ private int hashToInt(String value) {
+ return hasher.hashString(value, StandardCharsets.UTF_8).asInt();
+ }
+
+ private long hashToLong(String value) {
+ return hasher.hashString(value, StandardCharsets.UTF_8).asLong();
+ }
+
+ @Override
+ protected void doVerify(VerificationContext context) {
+ String outputField = context.getOutputField();
+ if (outputField == null)
+ throw new VerificationException(this, "No output field in this statement: " +
+ "Don't know what value to hash to.");
+ DataType outputFieldType = context.getInputType(this, outputField);
+ if ( ! canStoreHash(outputFieldType))
+ throw new VerificationException(this, "The type of the output field " + outputField +
+ " is not int or long but " + outputFieldType);
+ targetType = outputFieldType;
+ context.setValueType(createdOutputType());
+ }
+
+ private boolean canStoreHash(DataType type) {
+ if (type.equals(DataType.INT)) return true;
+ if (type.equals(DataType.LONG)) return true;
+ return false;
+ }
+
+ @Override
+ public DataType createdOutputType() {
+ return targetType;
+ }
+
+ @Override
+ public String toString() { return "hash"; }
+
+ @Override
+ public int hashCode() { return 987; }
+
+ @Override
+ public boolean equals(Object o) { return o instanceof HashExpression; }
+
+}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java
index 5e7288b8ecc..ca2be7c3400 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java
@@ -13,6 +13,7 @@ public final class HexEncodeExpression extends Expression {
public HexEncodeExpression() {
super(DataType.LONG);
}
+
@Override
protected void doExecute(ExecutionContext context) {
long input = ((LongFieldValue) context.getValue()).getLong();
diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj
index bdbecadecd3..e6b21f7c07b 100644
--- a/indexinglanguage/src/main/javacc/IndexingParser.jj
+++ b/indexinglanguage/src/main/javacc/IndexingParser.jj
@@ -164,6 +164,7 @@ TOKEN :
<GET_FIELD: "get_field"> |
<GET_VAR: "get_var"> |
<GUARD: "guard"> |
+ <HASH: "hash"> |
<HEX_DECODE: "hexdecode"> |
<HEX_ENCODE: "hexencode"> |
<HOST_NAME: "hostname"> |
@@ -283,13 +284,14 @@ Expression value() :
val = base64EncodeExp() |
val = clearStateExp() |
val = echoExp() |
- val = embedExp() |
+ val = embedExp() |
val = exactExp() |
val = flattenExp() |
val = forEachExp() |
val = getFieldExp() |
val = getVarExp() |
val = guardExp() |
+ val = hashExp() |
val = hexDecodeExp() |
val = hexEncodeExp() |
val = hostNameExp() |
@@ -419,6 +421,12 @@ Expression guardExp() :
{ return new GuardExpression(val); }
}
+Expression hashExp() : { }
+{
+ ( <HASH> )
+ { return new HashExpression(); }
+}
+
Expression hexDecodeExp() : { }
{
( <HEX_DECODE> )
@@ -744,12 +752,13 @@ String identifier() :
<ECHO> |
<EXACT> |
<ELSE> |
- <EMBED> |
+ <EMBED> |
<FLATTEN> |
<FOR_EACH> |
<GET_FIELD> |
<GET_VAR> |
<GUARD> |
+ <HASH> |
<HEX_DECODE> |
<HEX_ENCODE> |
<HOST_NAME> |
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
index f9a6f2225b3..778d95fcaef 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
@@ -9,7 +9,6 @@ import com.yahoo.document.TensorDataType;
import com.yahoo.document.datatypes.BoolFieldValue;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.document.datatypes.TensorFieldValue;
-import com.yahoo.language.Language;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.tensor.Tensor;
@@ -100,6 +99,50 @@ public class ScriptTestCase {
}
@Test
+ public void testIntHash() throws ParseException {
+ var expression = Expression.fromString("input myText | hash | attribute 'myInt'");
+
+ SimpleTestAdapter adapter = new SimpleTestAdapter();
+ adapter.createField(new Field("myText", DataType.STRING));
+ var intField = new Field("myInt", DataType.INT);
+ adapter.createField(intField);
+ adapter.setValue("myText", new StringFieldValue("input text"));
+ expression.setStatementOutput(new DocumentType("myDocument"), intField);
+
+ // Necessary to resolve output type
+ VerificationContext verificationContext = new VerificationContext(adapter);
+ assertEquals(DataType.INT, expression.verify(verificationContext));
+
+ ExecutionContext context = new ExecutionContext(adapter);
+ context.setValue(new StringFieldValue("input text"));
+ expression.execute(context);
+ assertTrue(adapter.values.containsKey("myInt"));
+ assertEquals(-1425622096, adapter.values.get("myInt").getWrappedValue());
+ }
+
+ @Test
+ public void testLongHash() throws ParseException {
+ var expression = Expression.fromString("input myText | hash | attribute 'myLong'");
+
+ SimpleTestAdapter adapter = new SimpleTestAdapter();
+ adapter.createField(new Field("myText", DataType.STRING));
+ var intField = new Field("myLong", DataType.LONG);
+ adapter.createField(intField);
+ adapter.setValue("myText", new StringFieldValue("input text"));
+ expression.setStatementOutput(new DocumentType("myDocument"), intField);
+
+ // Necessary to resolve output type
+ VerificationContext verificationContext = new VerificationContext(adapter);
+ assertEquals(DataType.LONG, expression.verify(verificationContext));
+
+ ExecutionContext context = new ExecutionContext(adapter);
+ context.setValue(new StringFieldValue("input text"));
+ expression.execute(context);
+ assertTrue(adapter.values.containsKey("myLong"));
+ assertEquals(7678158186624760752L, adapter.values.get("myLong").getWrappedValue());
+ }
+
+ @Test
public void testEmbed() throws ParseException {
TensorType tensorType = TensorType.fromSpec("tensor(d[4])");
var expression = Expression.fromString("input myText | embed | attribute 'myTensor'",
@@ -120,7 +163,6 @@ public class ScriptTestCase {
ExecutionContext context = new ExecutionContext(adapter);
context.setValue(new StringFieldValue("input text"));
expression.execute(context);
- assertNotNull(context);
assertTrue(adapter.values.containsKey("myTensor"));
assertEquals(Tensor.from(tensorType, "[7,3,0,0]"),
((TensorFieldValue)adapter.values.get("myTensor")).getTensor().get());
diff --git a/vespajlib/src/main/java/com/yahoo/collections/BobHash.java b/vespajlib/src/main/java/com/yahoo/collections/BobHash.java
index d133af2ea84..3d1e82743cc 100644
--- a/vespajlib/src/main/java/com/yahoo/collections/BobHash.java
+++ b/vespajlib/src/main/java/com/yahoo/collections/BobHash.java
@@ -153,44 +153,45 @@ public class BobHash {
// handle the last 11 bytes
c += k.length;
switch (len) {
- // all the case statements fall through
- case 11:
- c += (unsign(k[offset + 10]) << 24);
+ // all the case statements fall through
+ case 11:
+ c += (unsign(k[offset + 10]) << 24);
- case 10:
- c += (unsign(k[offset + 9]) << 16);
+ case 10:
+ c += (unsign(k[offset + 9]) << 16);
- case 9:
- c += (unsign(k[offset + 8]) << 8);
+ case 9:
+ c += (unsign(k[offset + 8]) << 8);
- /* the first byte of c is reserved for the length */
- case 8:
- b += (unsign(k[offset + 7]) << 24);
+ /* the first byte of c is reserved for the length */
+ case 8:
+ b += (unsign(k[offset + 7]) << 24);
- case 7:
- b += (unsign(k[offset + 6]) << 16);
+ case 7:
+ b += (unsign(k[offset + 6]) << 16);
- case 6:
- b += (unsign(k[offset + 5]) << 8);
+ case 6:
+ b += (unsign(k[offset + 5]) << 8);
- case 5:
- b += unsign(k[offset + 4]);
+ case 5:
+ b += unsign(k[offset + 4]);
- case 4:
- a += (unsign(k[offset + 3]) << 24);
+ case 4:
+ a += (unsign(k[offset + 3]) << 24);
- case 3:
- a += (unsign(k[offset + 2]) << 16);
+ case 3:
+ a += (unsign(k[offset + 2]) << 16);
- case 2:
- a += (unsign(k[offset + 1]) << 8);
+ case 2:
+ a += (unsign(k[offset + 1]) << 8);
- case 1:
- a += unsign(k[offset + 0]);
+ case 1:
+ a += unsign(k[offset + 0]);
- /* case 0: nothing left to add */
+ /* case 0: nothing left to add */
}
abcBuffer = mix(a, b, c);
return abcBuffer[2];
}
+
}