diff options
Diffstat (limited to 'container-search/src/main/java')
7 files changed, 391 insertions, 3 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/InItem.java b/container-search/src/main/java/com/yahoo/prelude/query/InItem.java new file mode 100644 index 00000000000..27213000e3a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/InItem.java @@ -0,0 +1,46 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.Objects; + +import static java.util.Objects.requireNonNullElse; + +/* + * Abstract class representing an IN operator. + * + * @author toregge + */ +public abstract class InItem extends Item { + private String indexName; + public InItem(String indexName) { + this.indexName = requireNonNullElse(indexName, ""); + } + + @Override + public void setIndexName(String index) { + this.indexName = requireNonNullElse(index, ""); + } + public String getIndexName() { + return indexName; + } + + @Override + public String getName() { + return getItemType().name(); + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! super.equals(o)) return false; + var other = (InItem)o; + if ( ! Objects.equals(this.indexName, other.indexName)) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), indexName); + } + +}; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java index e38579be2df..f43b55424e6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -56,7 +56,9 @@ public abstract class Item implements Cloneable { GEO_LOCATION_TERM(27), TRUE(28), FALSE(29), - FUZZY(30); + FUZZY(30), + STRING_IN(31), + NUMERIC_IN(32); public final int code; @@ -241,8 +243,9 @@ public abstract class Item implements Cloneable { byte FEAT_UNIQUEID = 0b01000000; byte FEAT_FLAGS = -0b10000000; - byte type = (byte) (getCode() & CODE_MASK); - if (type != getCode()) + int code = getCode(); + byte type = code >= CODE_MASK ? CODE_MASK : (byte) code; + if (code >= 0x80 + CODE_MASK) throw new IllegalStateException("must increase number of bytes in serialization format for queries"); if (weight != DEFAULT_WEIGHT) { @@ -257,6 +260,10 @@ public abstract class Item implements Cloneable { } buffer.put(type); + if (code >= CODE_MASK) { + // This is an extension to the serialization to work around original 5 bits limit for code + buffer.put((byte) (code - CODE_MASK)); + } if ((type & FEAT_WEIGHT) != 0) { IntegerCompressor.putCompressedNumber(weight, buffer); } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NumericInItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NumericInItem.java new file mode 100644 index 00000000000..9333173d898 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NumericInItem.java @@ -0,0 +1,89 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +/* + * Class representing an IN operator with a set of 64-bit + * integer values. + * + * @author toregge + */ +public class NumericInItem extends InItem { + private Set<Long> tokens; + + public NumericInItem(String indexName) { + super(indexName); + tokens = new HashSet<>(1000); + } + + @Override + public Item.ItemType getItemType() { + return Item.ItemType.NUMERIC_IN; + } + + @Override + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return 1; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(tokens.size(), buffer); + putString(getIndexName(), buffer); + for (var token : tokens) { + buffer.putLong(token); + } + } + + @Override + public int getTermCount() { + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(getIndexName()); + buffer.append("{"); + for (var token : tokens) { + buffer.append(token.toString()); + if (token < Integer.MIN_VALUE || token > Integer.MAX_VALUE) { + buffer.append("L"); + } + buffer.append(","); + } + if (!tokens.isEmpty()) { + buffer.deleteCharAt(buffer.length() - 1); // remove extra "," + } + buffer.append("}"); + } + + public void addToken(long token) { + tokens.add(token); + } + + public Collection<Long> getTokens() { return Set.copyOf(tokens); } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! super.equals(o)) return false; + var other = (NumericInItem)o; + if ( ! Objects.equals(this.tokens, other.tokens)) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), tokens); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java b/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java new file mode 100644 index 00000000000..ebcf0de1a21 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java @@ -0,0 +1,87 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +/* + * Class representing an IN operator with a set of string values. + * + * @author toregge + */ +public class StringInItem extends InItem { + private Set<String> tokens; + + public StringInItem(String indexName) { + super(indexName); + tokens = new HashSet<>(1000); + } + + @Override + public ItemType getItemType() { + return ItemType.STRING_IN; + } + + @Override + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return 1; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(tokens.size(), buffer); + putString(getIndexName(), buffer); + for (var entry : tokens) { + putString(entry, buffer); + } + } + + @Override + public int getTermCount() { + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(getIndexName()); + buffer.append("{"); + for (var entry : tokens) { + buffer.append("\""); + buffer.append(entry); + buffer.append("\","); + } + if (!tokens.isEmpty()) { + buffer.deleteCharAt(buffer.length() - 1); // remove extra "," + } + buffer.append("}"); + } + + public void addToken(String token) { + Objects.requireNonNull(token, "Token string must not be null"); + tokens.add(token); + } + + public Collection<String> getTokens() { return Set.copyOf(tokens); } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! super.equals(o)) return false; + var other = (StringInItem)o; + if ( ! Objects.equals(this.tokens, other.tokens)) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), tokens); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/yql/ParameterListParser.java b/container-search/src/main/java/com/yahoo/search/yql/ParameterListParser.java index 1993871aa4c..397cb056ba2 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/ParameterListParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/ParameterListParser.java @@ -1,6 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.yql; +import com.yahoo.prelude.query.NumericInItem; +import com.yahoo.prelude.query.StringInItem; import com.yahoo.prelude.query.WeightedSetItem; import java.util.Arrays; @@ -61,6 +63,41 @@ class ParameterListParser { s.pass('}'); } + public static void addStringTokensFromString(String string, StringInItem out) { + if (string == null) { + return; + } + var s = new ParsableString(string); + while (!s.atEnd()) { + String token; + if (s.passOptional('\'')) { + token = s.stringTo(s.position('\'')); + s.pass('\''); + } + else if (s.passOptional('"')) { + token = s.stringTo(s.position('"')); + s.pass('"'); + } + else { + token = s.stringTo(s.positionOrEnd(',')).trim(); + } + out.addToken(token); + s.passOptional(','); + } + } + + public static void addNumericTokensFromString(String string, NumericInItem out) { + if (string == null) { + return; + } + var s = new ParsableString(string); + while (!s.atEnd()) { + long token = s.longTo(s.positionOrEnd(',')); + out.addToken(token); + s.passOptional(','); + } + } + private static class ParsableString { int position = 0; @@ -142,6 +179,17 @@ class ParameterListParser { throw new IllegalArgumentException("Expected one of " + Arrays.toString(characters) + " after " + position); } + int positionOrEnd(char ... characters) { + int localPosition = position; + while (localPosition < s.length()) { + char nextChar = s.charAt(localPosition); + for (char character : characters) + if (nextChar == character) return localPosition; + localPosition++; + } + return localPosition; + } + boolean atEnd() { return position >= s.length(); } diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java index 490fc7aa07f..634163bf0c2 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -84,6 +84,7 @@ import com.yahoo.prelude.query.NearItem; import com.yahoo.prelude.query.NearestNeighborItem; import com.yahoo.prelude.query.NotItem; import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.NumericInItem; import com.yahoo.prelude.query.ONearItem; import com.yahoo.prelude.query.OrItem; import com.yahoo.prelude.query.PhraseItem; @@ -95,6 +96,7 @@ import com.yahoo.prelude.query.RankItem; import com.yahoo.prelude.query.RegExpItem; import com.yahoo.prelude.query.SameElementItem; import com.yahoo.prelude.query.SegmentingRule; +import com.yahoo.prelude.query.StringInItem; import com.yahoo.prelude.query.Substring; import com.yahoo.prelude.query.SubstringItem; import com.yahoo.prelude.query.SuffixItem; @@ -1077,6 +1079,51 @@ public class VespaSerializer { } + private static class StringInSerializer extends Serializer<StringInItem> { + @Override + void onExit(StringBuilder destination, StringInItem item) { + + } + + @Override + boolean serialize(StringBuilder destination, StringInItem item) { + destination.append(item.getIndexName()).append(" in ("); + int initLen = destination.length(); + List<String> tokens = new ArrayList<>(item.getTokens()); + Collections.sort(tokens); + for (var token : tokens) { + comma(destination, initLen); + destination.append('"'); + escape(token, destination); + destination.append("\""); + } + destination.append(")"); + return false; + } + } + + private static class NumericInSerializer extends Serializer<NumericInItem> { + @Override + void onExit(StringBuilder destination, NumericInItem item) { + } + + @Override + boolean serialize(StringBuilder destination, NumericInItem item) { + destination.append(item.getIndexName()).append(" in ("); + int initLen = destination.length(); + List<Long> tokens = new ArrayList<>(item.getTokens()); + Collections.sort(tokens); + for (var token : tokens) { + comma(destination, initLen); + destination.append(token.toString()); + if (token < Integer.MIN_VALUE || token > Integer.MAX_VALUE) + destination.append("L"); + } + destination.append(")"); + return false; + } + } + private static class WordSerializer extends Serializer<WordItem> { @Override @@ -1284,6 +1331,8 @@ public class VespaSerializer { dispatchBuilder.put(RegExpItem.class, new RegExpSerializer()); dispatchBuilder.put(UriItem.class, new UriSerializer()); dispatchBuilder.put(FuzzyItem.class, new FuzzySerializer()); + dispatchBuilder.put(StringInItem.class, new StringInSerializer()); + dispatchBuilder.put(NumericInItem.class, new NumericInSerializer()); dispatch = ImmutableMap.copyOf(dispatchBuilder); } diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index eaabdf2d2d4..5e1dfb99479 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -44,6 +44,7 @@ import com.yahoo.prelude.query.NearItem; import com.yahoo.prelude.query.NearestNeighborItem; import com.yahoo.prelude.query.NotItem; import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.NumericInItem; import com.yahoo.prelude.query.ONearItem; import com.yahoo.prelude.query.OrItem; import com.yahoo.prelude.query.PhraseItem; @@ -56,6 +57,7 @@ import com.yahoo.prelude.query.RegExpItem; import com.yahoo.prelude.query.SameElementItem; import com.yahoo.prelude.query.SegmentItem; import com.yahoo.prelude.query.SegmentingRule; +import com.yahoo.prelude.query.StringInItem; import com.yahoo.prelude.query.Substring; import com.yahoo.prelude.query.SubstringItem; import com.yahoo.prelude.query.SuffixItem; @@ -353,10 +355,12 @@ public class YqlParser implements Parser { case CALL -> buildFunctionCall(ast); case LITERAL -> buildLiteral(ast); case NOT -> buildNot(ast); + case IN -> buildIn(ast); default -> throw newUnexpectedArgumentException(ast.getOperator(), ExpressionOperator.AND, ExpressionOperator.CALL, ExpressionOperator.CONTAINS, ExpressionOperator.EQ, ExpressionOperator.GT, ExpressionOperator.GTEQ, + ExpressionOperator.IN, ExpressionOperator.LT, ExpressionOperator.LTEQ, ExpressionOperator.OR); }; @@ -409,6 +413,18 @@ public class YqlParser implements Parser { return fillWeightedSet(ast, args.get(1), new DotProductItem(getIndex(args.get(0)))); } + private Item buildIn(OperatorNode<ExpressionOperator> ast) { + String field = getIndex(ast.getArgument(0)); + boolean stringField = indexFactsSession.getIndex(field).isString(); + Item item = null; + if (stringField) { + item = fillStringIn(ast, ast.getArgument(1), new StringInItem(field)); + } else { + item = fillNumericIn(ast, ast.getArgument(1), new NumericInItem(field)); + } + return item; + } + private ParsedDegree degreesFromArg(OperatorNode<ExpressionOperator> ast, boolean first) { Object arg = null; switch (ast.getOperator()) { @@ -591,6 +607,52 @@ public class YqlParser implements Parser { return leafStyleSettings(ast, out); } + private StringInItem fillStringIn(OperatorNode<ExpressionOperator> ast, + OperatorNode<ExpressionOperator> arg, + StringInItem out) { + assertHasOperator(arg, ExpressionOperator.ARRAY); + List<OperatorNode<ExpressionOperator>> values = arg.getArgument(0); + for (var value : values) { + switch (value.getOperator()) { + case LITERAL -> { + String tokenValue = value.getArgument(0, String.class); + out.addToken(tokenValue); + } + case VARREF -> { + Preconditions.checkState(userQuery != null, "Query properties are not available"); + String varRef = value.getArgument(0, String.class); + ParameterListParser.addStringTokensFromString(userQuery.properties().getString(varRef), out); + } + default -> throw newUnexpectedArgumentException(value.getOperator(), + ExpressionOperator.LITERAL, ExpressionOperator.VARREF); + } + } + return out; + } + + private NumericInItem fillNumericIn(OperatorNode<ExpressionOperator> ast, + OperatorNode<ExpressionOperator> arg, + NumericInItem out) { + assertHasOperator(arg, ExpressionOperator.ARRAY); + List<OperatorNode<ExpressionOperator>> values = arg.getArgument(0); + for (var value : values) { + switch (value.getOperator()) { + case LITERAL -> { + Long tokenValue = value.getArgument(0, Number.class).longValue(); + out.addToken(tokenValue); + } + case VARREF -> { + Preconditions.checkState(userQuery != null, "Query properties are not available"); + String varRef = value.getArgument(0, String.class); + ParameterListParser.addNumericTokensFromString(userQuery.properties().getString(varRef), out); + } + default -> throw newUnexpectedArgumentException(value.getOperator(), + ExpressionOperator.LITERAL, ExpressionOperator.VARREF); + } + } + return out; + } + private static class PrefixExpander extends IndexNameExpander { private final String prefix; public PrefixExpander(String prefix) { |