diff options
10 files changed, 274 insertions, 51 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/Query.java b/container-search/src/main/java/com/yahoo/search/Query.java index 623c38fa9f0..77cd4f3b292 100644 --- a/container-search/src/main/java/com/yahoo/search/Query.java +++ b/container-search/src/main/java/com/yahoo/search/Query.java @@ -1048,6 +1048,7 @@ public class Query extends com.yahoo.processing.Request implements Cloneable { return new SessionId(requestId, getRanking().getProfile()); } + @Deprecated // TODO: Remove on Vespa 8 public boolean hasEncodableProperties() { if ( ! ranking.getProperties().isEmpty()) return true; if ( ! ranking.getFeatures().isEmpty()) return true; @@ -1064,39 +1065,29 @@ public class Query extends com.yahoo.processing.Request implements Cloneable { * @param buffer the buffer to encode to * @param encodeQueryData true to encode all properties, false to only include session information, not actual query data * @return the encoded length + * @deprecated do not use */ + @Deprecated // TODO: Remove on Vespa 8 public int encodeAsProperties(ByteBuffer buffer, boolean encodeQueryData) { // Make sure we don't encode anything here if we have turned the property feature off // Due to sendQuery we sometimes end up turning this feature on and then encoding a 0 int as the number of // property maps - that's ok (probably we should simplify by just always turning the feature on) if (! hasEncodableProperties()) return 0; - int start = buffer.position(); - int mapCountPosition = buffer.position(); buffer.putInt(0); // map count will go here - int mapCount = 0; - - // TODO: Push down mapCount += ranking.getProperties().encode(buffer, encodeQueryData); if (encodeQueryData) { mapCount += ranking.getFeatures().encode(buffer); - - // TODO: Push down if (presentation.getHighlight() != null) { mapCount += MapEncoder.encodeMultiMap(Highlight.HIGHLIGHTTERMS, presentation.getHighlight().getHighlightTerms(), buffer); } - - // TODO: Push down mapCount += MapEncoder.encodeMap("model", createModelMap(), buffer); } mapCount += MapEncoder.encodeSingleValue(DocumentDatabase.MATCH_PROPERTY, DocumentDatabase.SEARCH_DOC_TYPE_KEY, model.getDocumentDb(), buffer); - mapCount += MapEncoder.encodeMap("caches", createCacheSettingMap(), buffer); - buffer.putInt(mapCountPosition, mapCount); - return buffer.position() - start; } diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/QueryEncoder.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/QueryEncoder.java new file mode 100644 index 00000000000..e252a230d4f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/QueryEncoder.java @@ -0,0 +1,90 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.streamingvisitors; + +import com.yahoo.fs4.MapEncoder; +import com.yahoo.prelude.fastsearch.DocumentDatabase; +import com.yahoo.prelude.query.Highlight; +import com.yahoo.search.Query; +import com.yahoo.search.dispatch.rpc.ProtobufSerialization; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Encodes the query in binary form. + * + * @author bratseth + */ +class QueryEncoder { + + /** + * Encodes properties of this query. + * + * @param buffer the buffer to encode to + * @param encodeQueryData true to encode all properties, false to only include session information, not actual query data + * @return the encoded length + */ + static int encodeAsProperties(Query query, ByteBuffer buffer, boolean encodeQueryData) { + // Make sure we don't encode anything here if we have turned the property feature off + // Due to sendQuery we sometimes end up turning this feature on and then encoding a 0 int as the number of + // property maps - that's ok (probably we should simplify by just always turning the feature on) + if (! hasEncodableProperties(query)) return 0; + + int start = buffer.position(); + int mapCountPosition = buffer.position(); + buffer.putInt(0); // map count will go here + int mapCount = 0; + mapCount += query.getRanking().getProperties().encode(buffer, encodeQueryData); + if (encodeQueryData) { + mapCount += query.getRanking().getFeatures().encode(buffer); + if (query.getPresentation().getHighlight() != null) { + mapCount += MapEncoder.encodeMultiMap(Highlight.HIGHLIGHTTERMS, + query.getPresentation().getHighlight().getHighlightTerms(), buffer); + } + mapCount += MapEncoder.encodeMap("model", createModelMap(query), buffer); + } + mapCount += MapEncoder.encodeSingleValue(DocumentDatabase.MATCH_PROPERTY, DocumentDatabase.SEARCH_DOC_TYPE_KEY, + query.getModel().getDocumentDb(), buffer); + mapCount += MapEncoder.encodeMap("caches", createCacheSettingMap(query), buffer); + buffer.putInt(mapCountPosition, mapCount); + return buffer.position() - start; + } + + static boolean hasEncodableProperties(Query query) { + if ( ! query.getRanking().getProperties().isEmpty()) return true; + if ( ! query.getRanking().getFeatures().isEmpty()) return true; + if ( query.getRanking().getFreshness() != null) return true; + if ( query.getModel().getSearchPath() != null) return true; + if ( query.getModel().getDocumentDb() != null) return true; + if ( query.getPresentation().getHighlight() != null && + ! query.getPresentation().getHighlight().getHighlightItems().isEmpty()) return true; + return false; + } + + private static Map<String, Boolean> createCacheSettingMap(Query query) { + if (query.getGroupingSessionCache() && query.getRanking().getQueryCache()) { + Map<String, Boolean> cacheSettingMap = new HashMap<>(); + cacheSettingMap.put("grouping", true); + cacheSettingMap.put("query", true); + return cacheSettingMap; + } + if (query.getGroupingSessionCache()) + return Collections.singletonMap("grouping", true); + if (query.getRanking().getQueryCache()) + return Collections.singletonMap("query", true); + return Collections.emptyMap(); + } + + private static Map<String, String> createModelMap(Query query) { + Map<String, String> m = new HashMap<>(); + if (query.getModel().getSearchPath() != null) m.put("searchpath", query.getModel().getSearchPath()); + + int traceLevel = ProtobufSerialization.getTraceLevelForBackend(query); + if (traceLevel > 0) m.put("tracelevel", String.valueOf(traceLevel)); + + return m; + } + +} diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java index e2233d51ae4..b2e4821f164 100644 --- a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/VdsVisitor.java @@ -3,13 +3,10 @@ package com.yahoo.vespa.streamingvisitors; import com.yahoo.document.select.parser.ParseException; import com.yahoo.documentapi.AckToken; -import com.yahoo.documentapi.DocumentAccess; import com.yahoo.documentapi.VisitorControlHandler; import com.yahoo.documentapi.VisitorDataHandler; import com.yahoo.documentapi.VisitorParameters; import com.yahoo.documentapi.VisitorSession; -import com.yahoo.documentapi.messagebus.MessageBusDocumentAccess; -import com.yahoo.documentapi.messagebus.MessageBusParams; import com.yahoo.documentapi.messagebus.loadtypes.LoadType; import com.yahoo.documentapi.messagebus.loadtypes.LoadTypeSet; import com.yahoo.documentapi.messagebus.protocol.DocumentProtocol; @@ -41,7 +38,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicReference; import java.util.logging.Logger; /** @@ -187,7 +183,7 @@ class VdsVisitor extends VisitorDataHandler implements Visitor { params.setLibraryParameter("location", af); } - if (query.hasEncodableProperties()) { + if (QueryEncoder.hasEncodableProperties(query)) { encodeQueryData(query, 1, ed); params.setLibraryParameter("rankproperties", ed.getEncodedData()); } @@ -254,7 +250,7 @@ class VdsVisitor extends VisitorDataHandler implements Visitor { ed.setReturned(query.getModel().getQueryTree().getRoot().encode(buf)); break; case 1: - ed.setReturned(query.encodeAsProperties(buf, true)); + ed.setReturned(QueryEncoder.encodeAsProperties(query, buf, true)); break; case 2: throw new IllegalArgumentException("old aggregation no longer exists!"); diff --git a/document/src/main/java/com/yahoo/document/datatypes/Array.java b/document/src/main/java/com/yahoo/document/datatypes/Array.java index 11a8eb7a350..672690bafad 100644 --- a/document/src/main/java/com/yahoo/document/datatypes/Array.java +++ b/document/src/main/java/com/yahoo/document/datatypes/Array.java @@ -21,7 +21,7 @@ import java.util.ListIterator; import java.util.RandomAccess; /** - * FieldValue which encapsulates a Array value + * FieldValue which encapsulates an Array value * * @author Einar M R Rosenvinge */ @@ -42,8 +42,7 @@ public final class Array<T extends FieldValue> extends CollectionFieldValue<T> i this(type); for (T v : values) { if (!((ArrayDataType)type).getNestedType().isValueCompatible(v)) { - throw new IllegalArgumentException("FieldValue " + v + - " is not compatible with " + type + "."); + throw new IllegalArgumentException("FieldValue " + v + " is not compatible with " + type + "."); } } this.values.addAll(values); diff --git a/indexinglanguage/pom.xml b/indexinglanguage/pom.xml index efca7479faf..f9ee18a4602 100644 --- a/indexinglanguage/pom.xml +++ b/indexinglanguage/pom.xml @@ -47,7 +47,6 @@ <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> - <scope>test</scope> </dependency> </dependencies> <build> diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java new file mode 100644 index 00000000000..5b04720dad4 --- /dev/null +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HashExpression.java @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.indexinglanguage.expressions; + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentType; +import com.yahoo.document.Field; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.StringFieldValue; + +import java.nio.charset.StandardCharsets; + +/** + * Hashes a string value to a long or int (by type inference on the target value). + * + * @author bratseth + */ +public class HashExpression extends Expression { + + private final HashFunction hasher = Hashing.sipHash24(); + + /** The target type we are hashing into. */ + private DataType targetType; + + public HashExpression() { + super(DataType.STRING); + } + + @Override + public void setStatementOutput(DocumentType documentType, Field field) { + if ( ! canStoreHash(field.getDataType())) + throw new IllegalArgumentException("Cannot use the hash function on an indexing statement for " + + field.getName() + + ": The hash function can only be used when the target field " + + "is int or long, not " + field.getDataType()); + targetType = field.getDataType(); + } + + @Override + protected void doExecute(ExecutionContext context) { + StringFieldValue input = (StringFieldValue) context.getValue(); + if (targetType.equals(DataType.INT)) + context.setValue(new IntegerFieldValue(hashToInt(input.getString()))); + else if (targetType.equals(DataType.LONG)) + context.setValue(new LongFieldValue(hashToLong(input.getString()))); + else + throw new IllegalStateException(); // won't happen + } + + private int hashToInt(String value) { + return hasher.hashString(value, StandardCharsets.UTF_8).asInt(); + } + + private long hashToLong(String value) { + return hasher.hashString(value, StandardCharsets.UTF_8).asLong(); + } + + @Override + protected void doVerify(VerificationContext context) { + String outputField = context.getOutputField(); + if (outputField == null) + throw new VerificationException(this, "No output field in this statement: " + + "Don't know what value to hash to."); + DataType outputFieldType = context.getInputType(this, outputField); + if ( ! canStoreHash(outputFieldType)) + throw new VerificationException(this, "The type of the output field " + outputField + + " is not int or long but " + outputFieldType); + targetType = outputFieldType; + context.setValueType(createdOutputType()); + } + + private boolean canStoreHash(DataType type) { + if (type.equals(DataType.INT)) return true; + if (type.equals(DataType.LONG)) return true; + return false; + } + + @Override + public DataType createdOutputType() { + return targetType; + } + + @Override + public String toString() { return "hash"; } + + @Override + public int hashCode() { return 987; } + + @Override + public boolean equals(Object o) { return o instanceof HashExpression; } + +} diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java index 5e7288b8ecc..ca2be7c3400 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/HexEncodeExpression.java @@ -13,6 +13,7 @@ public final class HexEncodeExpression extends Expression { public HexEncodeExpression() { super(DataType.LONG); } + @Override protected void doExecute(ExecutionContext context) { long input = ((LongFieldValue) context.getValue()).getLong(); diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index bdbecadecd3..e6b21f7c07b 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -164,6 +164,7 @@ TOKEN : <GET_FIELD: "get_field"> | <GET_VAR: "get_var"> | <GUARD: "guard"> | + <HASH: "hash"> | <HEX_DECODE: "hexdecode"> | <HEX_ENCODE: "hexencode"> | <HOST_NAME: "hostname"> | @@ -283,13 +284,14 @@ Expression value() : val = base64EncodeExp() | val = clearStateExp() | val = echoExp() | - val = embedExp() | + val = embedExp() | val = exactExp() | val = flattenExp() | val = forEachExp() | val = getFieldExp() | val = getVarExp() | val = guardExp() | + val = hashExp() | val = hexDecodeExp() | val = hexEncodeExp() | val = hostNameExp() | @@ -419,6 +421,12 @@ Expression guardExp() : { return new GuardExpression(val); } } +Expression hashExp() : { } +{ + ( <HASH> ) + { return new HashExpression(); } +} + Expression hexDecodeExp() : { } { ( <HEX_DECODE> ) @@ -744,12 +752,13 @@ String identifier() : <ECHO> | <EXACT> | <ELSE> | - <EMBED> | + <EMBED> | <FLATTEN> | <FOR_EACH> | <GET_FIELD> | <GET_VAR> | <GUARD> | + <HASH> | <HEX_DECODE> | <HEX_ENCODE> | <HOST_NAME> | diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java index f9a6f2225b3..778d95fcaef 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java @@ -9,7 +9,6 @@ import com.yahoo.document.TensorDataType; import com.yahoo.document.datatypes.BoolFieldValue; import com.yahoo.document.datatypes.StringFieldValue; import com.yahoo.document.datatypes.TensorFieldValue; -import com.yahoo.language.Language; import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.tensor.Tensor; @@ -100,6 +99,50 @@ public class ScriptTestCase { } @Test + public void testIntHash() throws ParseException { + var expression = Expression.fromString("input myText | hash | attribute 'myInt'"); + + SimpleTestAdapter adapter = new SimpleTestAdapter(); + adapter.createField(new Field("myText", DataType.STRING)); + var intField = new Field("myInt", DataType.INT); + adapter.createField(intField); + adapter.setValue("myText", new StringFieldValue("input text")); + expression.setStatementOutput(new DocumentType("myDocument"), intField); + + // Necessary to resolve output type + VerificationContext verificationContext = new VerificationContext(adapter); + assertEquals(DataType.INT, expression.verify(verificationContext)); + + ExecutionContext context = new ExecutionContext(adapter); + context.setValue(new StringFieldValue("input text")); + expression.execute(context); + assertTrue(adapter.values.containsKey("myInt")); + assertEquals(-1425622096, adapter.values.get("myInt").getWrappedValue()); + } + + @Test + public void testLongHash() throws ParseException { + var expression = Expression.fromString("input myText | hash | attribute 'myLong'"); + + SimpleTestAdapter adapter = new SimpleTestAdapter(); + adapter.createField(new Field("myText", DataType.STRING)); + var intField = new Field("myLong", DataType.LONG); + adapter.createField(intField); + adapter.setValue("myText", new StringFieldValue("input text")); + expression.setStatementOutput(new DocumentType("myDocument"), intField); + + // Necessary to resolve output type + VerificationContext verificationContext = new VerificationContext(adapter); + assertEquals(DataType.LONG, expression.verify(verificationContext)); + + ExecutionContext context = new ExecutionContext(adapter); + context.setValue(new StringFieldValue("input text")); + expression.execute(context); + assertTrue(adapter.values.containsKey("myLong")); + assertEquals(7678158186624760752L, adapter.values.get("myLong").getWrappedValue()); + } + + @Test public void testEmbed() throws ParseException { TensorType tensorType = TensorType.fromSpec("tensor(d[4])"); var expression = Expression.fromString("input myText | embed | attribute 'myTensor'", @@ -120,7 +163,6 @@ public class ScriptTestCase { ExecutionContext context = new ExecutionContext(adapter); context.setValue(new StringFieldValue("input text")); expression.execute(context); - assertNotNull(context); assertTrue(adapter.values.containsKey("myTensor")); assertEquals(Tensor.from(tensorType, "[7,3,0,0]"), ((TensorFieldValue)adapter.values.get("myTensor")).getTensor().get()); diff --git a/vespajlib/src/main/java/com/yahoo/collections/BobHash.java b/vespajlib/src/main/java/com/yahoo/collections/BobHash.java index d133af2ea84..3d1e82743cc 100644 --- a/vespajlib/src/main/java/com/yahoo/collections/BobHash.java +++ b/vespajlib/src/main/java/com/yahoo/collections/BobHash.java @@ -153,44 +153,45 @@ public class BobHash { // handle the last 11 bytes c += k.length; switch (len) { - // all the case statements fall through - case 11: - c += (unsign(k[offset + 10]) << 24); + // all the case statements fall through + case 11: + c += (unsign(k[offset + 10]) << 24); - case 10: - c += (unsign(k[offset + 9]) << 16); + case 10: + c += (unsign(k[offset + 9]) << 16); - case 9: - c += (unsign(k[offset + 8]) << 8); + case 9: + c += (unsign(k[offset + 8]) << 8); - /* the first byte of c is reserved for the length */ - case 8: - b += (unsign(k[offset + 7]) << 24); + /* the first byte of c is reserved for the length */ + case 8: + b += (unsign(k[offset + 7]) << 24); - case 7: - b += (unsign(k[offset + 6]) << 16); + case 7: + b += (unsign(k[offset + 6]) << 16); - case 6: - b += (unsign(k[offset + 5]) << 8); + case 6: + b += (unsign(k[offset + 5]) << 8); - case 5: - b += unsign(k[offset + 4]); + case 5: + b += unsign(k[offset + 4]); - case 4: - a += (unsign(k[offset + 3]) << 24); + case 4: + a += (unsign(k[offset + 3]) << 24); - case 3: - a += (unsign(k[offset + 2]) << 16); + case 3: + a += (unsign(k[offset + 2]) << 16); - case 2: - a += (unsign(k[offset + 1]) << 8); + case 2: + a += (unsign(k[offset + 1]) << 8); - case 1: - a += unsign(k[offset + 0]); + case 1: + a += unsign(k[offset + 0]); - /* case 0: nothing left to add */ + /* case 0: nothing left to add */ } abcBuffer = mix(a, b, c); return abcBuffer[2]; } + } |