diff options
Diffstat (limited to 'searchlib/src')
41 files changed, 409 insertions, 222 deletions
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java index ae7d0a67b2f..b0f98685578 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java @@ -80,7 +80,7 @@ public class Int16ResultNode extends NumericResultNode { @Override public void add(ResultNode rhs) { - value += rhs.getInteger(); + value += (short)rhs.getInteger(); } @Override @@ -90,7 +90,7 @@ public class Int16ResultNode extends NumericResultNode { @Override public void multiply(ResultNode rhs) { - value *= rhs.getInteger(); + value *= (short)rhs.getInteger(); } @Override @@ -101,7 +101,7 @@ public class Int16ResultNode extends NumericResultNode { @Override public void modulo(ResultNode rhs) { - value %= rhs.getInteger(); + value %= (short)rhs.getInteger(); } @Override diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java index da31cbc236a..711b8f1bd3f 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java @@ -80,7 +80,7 @@ public class Int32ResultNode extends NumericResultNode { @Override public void add(ResultNode rhs) { - value += rhs.getInteger(); + value += (int)rhs.getInteger(); } @Override @@ -90,7 +90,7 @@ public class Int32ResultNode extends NumericResultNode { @Override public void multiply(ResultNode rhs) { - value *= rhs.getInteger(); + value *= (int)rhs.getInteger(); } @Override @@ -101,7 +101,7 @@ public class Int32ResultNode extends NumericResultNode { @Override public void modulo(ResultNode rhs) { - value %= rhs.getInteger(); + value %= (int)rhs.getInteger(); } @Override @@ -122,7 +122,7 @@ public class Int32ResultNode extends NumericResultNode { @Override public Object getNumber() { - return Integer.valueOf(value); + return value; } @Override diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java index ae53cf45a6f..d6706ce1dfe 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java @@ -78,7 +78,7 @@ public class Int8ResultNode extends NumericResultNode { @Override public void add(ResultNode rhs) { - value += rhs.getInteger(); + value += (byte)rhs.getInteger(); } @Override @@ -88,7 +88,7 @@ public class Int8ResultNode extends NumericResultNode { @Override public void multiply(ResultNode rhs) { - value *= rhs.getInteger(); + value *= (byte)rhs.getInteger(); } @Override @@ -99,7 +99,7 @@ public class Int8ResultNode extends NumericResultNode { @Override public void modulo(ResultNode rhs) { - value %= rhs.getInteger(); + value %= (byte)rhs.getInteger(); } @Override diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java index 5a0e056f254..d1dc46fc4d0 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java @@ -18,8 +18,8 @@ public class RawResultNode extends SingleResultNode { // The global class identifier shared with C++. public static final int classId = registerClass(0x4000 + 54, RawResultNode.class); - private static RawResultNode negativeInfinity = new RawResultNode(); - private static PositiveInfinityResultNode positiveInfinity = new PositiveInfinityResultNode(); + private static final RawResultNode negativeInfinity = new RawResultNode(); + private static final PositiveInfinityResultNode positiveInfinity = new PositiveInfinityResultNode(); // The raw value of this node. private RawData value = null; @@ -147,7 +147,7 @@ public class RawResultNode extends SingleResultNode { @Override public Object getValue() { - return getString(); + return value; } @Override diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java index 2b5efdb1ffe..5b6a53a7019 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java @@ -16,7 +16,7 @@ import static java.lang.Math.*; public final class FieldMatchMetrics implements Cloneable { /** The calculator creating this - given on initialization */ - private FieldMatchMetricsComputer source; + private final FieldMatchMetricsComputer source; /** The trace accumulated during execution - empty if no tracing */ private final Trace trace = new Trace(); @@ -75,7 +75,7 @@ public final class FieldMatchMetrics implements Cloneable { currentSequence=0; segmentStarts.clear(); - queryLength=source.getQuery().getTerms().length; + queryLength = source.getQuery().getTerms().length; } /** Are these metrics representing a complete match */ @@ -93,7 +93,7 @@ public final class FieldMatchMetrics implements Cloneable { */ public float get(String name) { try { - Method getter=getClass().getMethod("get" + name.substring(0,1).toUpperCase() + name.substring(1)); + Method getter = getClass().getMethod("get" + name.substring(0, 1).toUpperCase() + name.substring(1)); return ((Number)getter.invoke(this)).floatValue(); } catch (NoSuchMethodException e) { @@ -140,7 +140,7 @@ public final class FieldMatchMetrics implements Cloneable { * segment or out of order */ public float getAbsoluteProximity() { - if (pairs <1) return 0.1f; + if (pairs < 1) return 0.1f; return proximity/pairs; } @@ -151,7 +151,7 @@ public final class FieldMatchMetrics implements Cloneable { * following each other in sequence, and close to 0 if they are far from each other or out of order */ public float getUnweightedProximity() { - if (pairs <1) return 1f; + if (pairs < 1) return 1f; return unweightedProximity/pairs; } @@ -271,33 +271,33 @@ public final class FieldMatchMetrics implements Cloneable { * <code>queryCompleteness * ( 1 - fieldCompletenessImportance) + fieldCompletenessImportance * fieldCompleteness</code> */ public float getCompleteness() { - float fieldCompletenessImportance=source.getParameters().getFieldCompletenessImportance(); + float fieldCompletenessImportance = source.getParameters().getFieldCompletenessImportance(); return getQueryCompleteness() * ( 1 - fieldCompletenessImportance) + fieldCompletenessImportance*getFieldCompleteness(); } /** Returns how well the order of the terms agreed in segments: <code>1-outOfOrder/pairs</code> */ public float getOrderness() { - if (pairs ==0) return 1f; + if (pairs == 0) return 1f; return 1-(float)outOfOrder/pairs; } /** Returns the degree to which different terms are related (occurring in the same segment): <code>1-segments/(matches-1)</code> */ public float getRelatedness() { - if (matches==0) return 0; - if (matches==1) return 1; - return 1-(float)(segments-1)/(matches-1); + if (matches == 0) return 0; + if (matches == 1) return 1; + return 1 - (float)(segments - 1) / (matches - 1); } /** Returns <code>longestSequence/matches</code> */ public float getLongestSequenceRatio() { - if (matches==0) return 0; - return (float)longestSequence/matches; + if (matches == 0) return 0; + return (float)longestSequence / matches; } /** Returns the closeness of the segments in the field: <code>1-segmentDistance/fieldLength</code> */ public float getSegmentProximity() { - if (matches==0) return 0; - return 1-segmentDistance/source.getField().terms().size(); + if (matches == 0) return 0; + return 1 - segmentDistance / source.getField().terms().size(); } /** @@ -306,14 +306,14 @@ public final class FieldMatchMetrics implements Cloneable { * This is absoluteProximity/average connectedness. */ public float getProximity() { - float totalConnectedness=0; - for (int i=1; i<queryLength; i++) { - totalConnectedness+=Math.max(0.1,source.getQuery().getTerms()[i].getConnectedness()); + float totalConnectedness = 0; + for (int i = 1; i < queryLength; i++) { + totalConnectedness += (float)Math.max(0.1, source.getQuery().getTerms()[i].getConnectedness()); } - float averageConnectedness=0.1f; - if (queryLength>1) - averageConnectedness=totalConnectedness/(queryLength-1); - return getAbsoluteProximity()/averageConnectedness; + float averageConnectedness = 0.1f; + if (queryLength > 1) + averageConnectedness = totalConnectedness / (queryLength - 1); + return getAbsoluteProximity() / averageConnectedness; } /** @@ -378,7 +378,7 @@ public final class FieldMatchMetrics implements Cloneable { * not only when the metrics are complete, because this metric is used to choose segments during calculation.</p> */ float getSegmentationScore() { - if (segments==0) return 0; + if (segments == 0) return 0; return getAbsoluteProximity() * getExactness() / (segments * segments); } @@ -389,7 +389,7 @@ public final class FieldMatchMetrics implements Cloneable { /** Called once for every match */ void onMatch(int i, int j) { - if (matches>=source.getField().terms().size()) return; + if (matches >= source.getField().terms().size()) return; matches++; weight += (float)source.getQuery().getTerms()[i].getWeight() / source.getQuery().getTotalTermWeight(); significance += source.getQuery().getTerms()[i].getSignificance() / source.getQuery().getTotalSignificance(); @@ -418,42 +418,42 @@ public final class FieldMatchMetrics implements Cloneable { } /** Called once when this value is calculated, before onComplete */ - void setOccurrence(float occurrence) { this.occurrence=occurrence; } + void setOccurrence(float occurrence) { this.occurrence = occurrence; } /** Called once when this value is calculated, before onComplete */ - void setWeightedOccurrence(float weightedOccurrence) { this.weightedOccurrence=weightedOccurrence; } + void setWeightedOccurrence(float weightedOccurrence) { this.weightedOccurrence = weightedOccurrence; } /** Called once when this value is calculated, before onComplete */ - void setAbsoluteOccurrence(float absoluteOccurrence) { this.absoluteOccurrence=absoluteOccurrence; } + void setAbsoluteOccurrence(float absoluteOccurrence) { this.absoluteOccurrence = absoluteOccurrence; } /** Called once when this value is calculated, before onComplete */ - void setWeightedAbsoluteOccurrence(float weightedAbsoluteOccurrence) { this.weightedAbsoluteOccurrence=weightedAbsoluteOccurrence; } + void setWeightedAbsoluteOccurrence(float weightedAbsoluteOccurrence) { this.weightedAbsoluteOccurrence = weightedAbsoluteOccurrence; } /** Called once when this value is calculated, before onComplete */ - void setSignificantOccurrence(float significantOccurrence) { this.significantOccurrence =significantOccurrence; } + void setSignificantOccurrence(float significantOccurrence) { this.significantOccurrence = significantOccurrence; } /** Called once when matching is complete */ void onComplete() { // segment distance - calculated from sorted segment starts - if (segmentStarts.size()<=1) { - segmentDistance=0; + if (segmentStarts.size() <= 1) { + segmentDistance = 0; } else { Collections.sort(segmentStarts); - for (int i=1; i<segmentStarts.size(); i++) { - segmentDistance+=segmentStarts.get(i)-segmentStarts.get(i-1)+1; + for (int i = 1; i < segmentStarts.size(); i++) { + segmentDistance += segmentStarts.get(i) - segmentStarts.get(i - 1) + 1; } } - if (head==-1) head=0; - if (tail==-1) tail=0; + if (head == -1) head = 0; + if (tail == -1) tail = 0; } // Events on pairs ---------- /** Called when <i>any</i> pair is encountered */ void onPair(int i, int j, int previousJ) { - int distance = j-previousJ-1; + int distance = j - previousJ - 1; if (distance < 0) distance++; // Discontinuity where the two terms are in the same position if (abs(distance) > source.getParameters().getProximityLimit()) return; // Contribution=0 @@ -463,7 +463,7 @@ public final class FieldMatchMetrics implements Cloneable { unweightedProximity += pairProximity; float connectedness = source.getQuery().getTerms()[i].getConnectedness(); - proximity += pow(pairProximity, connectedness/0.1) * max(0.1, connectedness); + proximity += (float)pow(pairProximity, connectedness / 0.1) * (float)max(0.1, connectedness); pairs++; } @@ -498,8 +498,8 @@ public final class FieldMatchMetrics implements Cloneable { @Override public FieldMatchMetrics clone() { try { - FieldMatchMetrics clone=(FieldMatchMetrics)super.clone(); - clone.segmentStarts=new ArrayList<>(segmentStarts); + FieldMatchMetrics clone = (FieldMatchMetrics)super.clone(); + clone.segmentStarts = new ArrayList<>(segmentStarts); return clone; } catch (CloneNotSupportedException e) { @@ -514,19 +514,19 @@ public final class FieldMatchMetrics implements Cloneable { public String toStringDump() { try { - StringBuilder b=new StringBuilder(); + StringBuilder b = new StringBuilder(); for (Method m : this.getClass().getDeclaredMethods()) { if ( ! m.getName().startsWith("get")) continue; - if (m.getReturnType()!=Integer.TYPE && m.getReturnType()!=Float.TYPE) continue; - if ( m.getParameterTypes().length!=0 ) continue; + if (m.getReturnType() != Integer.TYPE && m.getReturnType() != Float.TYPE) continue; + if ( m.getParameterTypes().length != 0 ) continue; - Object value=m.invoke(this,new Object[0]); - b.append(m.getName().substring(3,4).toLowerCase() + m.getName().substring(4) + ": " + value + "\n"); + Object value = m.invoke(this, new Object[0]); + b.append(m.getName().substring(3, 4).toLowerCase() + m.getName().substring(4) + ": " + value + "\n"); } return b.toString(); } catch (Exception e) { - throw new RuntimeException("Programming error",e); + throw new RuntimeException("Programming error", e); } } diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java index 949e1f026f7..df721a4309e 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java @@ -70,13 +70,13 @@ public final class GBDTNode extends ExpressionNode { int offset = (int)nextValue - MAX_LEAF_VALUE; boolean comparisonIsTrue = false; if (offset < MAX_VARIABLES) { - comparisonIsTrue = context.getDouble(offset)<values[pc++]; + comparisonIsTrue = context.getDouble(offset) < values[pc++]; } - else if (offset < MAX_VARIABLES*2) { - comparisonIsTrue = context.getDouble(offset-MAX_VARIABLES)==values[pc++]; + else if (offset < MAX_VARIABLES * 2) { + comparisonIsTrue = context.getDouble(offset - MAX_VARIABLES) == values[pc++]; } - else if (offset<MAX_VARIABLES*3) { - double testValue = context.getDouble(offset-MAX_VARIABLES*2); + else if (offset < MAX_VARIABLES * 3) { + double testValue = context.getDouble(offset - MAX_VARIABLES * 2); int setValuesLeft = (int)values[pc++]; while (setValuesLeft > 0) { // test each value in the set setValuesLeft--; @@ -88,13 +88,13 @@ public final class GBDTNode extends ExpressionNode { pc += setValuesLeft; // jump to after the set } else { // offset<MAX_VARIABLES*4 - comparisonIsTrue = ! (context.getDouble(offset-MAX_VARIABLES*3)>=values[pc++]); + comparisonIsTrue = ! (context.getDouble(offset - MAX_VARIABLES * 3) >= values[pc++]); } if (comparisonIsTrue) pc++; // true branch - skip the jump value else - pc += values[pc]; // false branch - jump + pc += (int)values[pc]; // false branch - jump } else { // a leaf return nextValue; diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java index d846d322720..8a33f320bb0 100644 --- a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java +++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java @@ -3,14 +3,12 @@ package com.yahoo.searchlib.gbdt; import com.yahoo.searchlib.rankingexpression.RankingExpression; import com.yahoo.searchlib.rankingexpression.parser.ParseException; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; -import java.security.Permission; +import java.nio.charset.StandardCharsets; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertEquals; @@ -21,36 +19,6 @@ import static org.junit.Assert.fail; */ public class GbdtConverterTestCase { - @Before - @SuppressWarnings("removal") - public void enableSecurityManager() { - System.setSecurityManager(new NoExitSecurityManager()); - } - - @After - @SuppressWarnings("removal") - public void disableSecurityManager() { - System.setSecurityManager(null); - } - - @Test - public void testOnlyOneArgumentIsAccepted() throws UnsupportedEncodingException { - assertError("Usage: GbdtConverter <filename>\n", new String[0]); - assertError("Usage: GbdtConverter <filename>\n", new String[] { "foo", "bar" }); - } - - @Test - public void testFileIsFound() throws UnsupportedEncodingException { - assertError("Could not find file 'not.found'.\n", new String[] { "not.found" }); - } - - @Test - public void testFileParsingExceptionIsCaught() throws UnsupportedEncodingException { - assertError("An error occurred while parsing the content of file 'src/test/files/gbdt_err.xml': " + - "Node 'Unknown' has no 'DecisionTree' children.\n", - new String[] { "src/test/files/gbdt_err.xml" }); - } - @Test public void testEmptyTreesAreIgnored() throws Exception { assertConvert("src/test/files/gbdt_empty_tree.xml", @@ -125,7 +93,7 @@ public class GbdtConverterTestCase { ByteArrayOutputStream out = new ByteArrayOutputStream(); System.setOut(new PrintStream(out)); GbdtConverter.main(new String[] { gbdtModelFile }); - String actualExpression = out.toString("UTF-8"); + String actualExpression = out.toString(StandardCharsets.UTF_8); assertEquals(expectedExpression, actualExpression); assertNotNull(new RankingExpression(actualExpression)); } @@ -138,26 +106,7 @@ public class GbdtConverterTestCase { fail(); } catch (ExitException e) { assertEquals(1, e.status); - assertEquals(expected, err.toString("UTF-8")); - } - } - - @SuppressWarnings("removal") - private static class NoExitSecurityManager extends SecurityManager { - - @Override - public void checkPermission(Permission perm) { - // allow anything - } - - @Override - public void checkPermission(Permission perm, Object context) { - // allow anything - } - - @Override - public void checkExit(int status) { - throw new ExitException(status); + assertEquals(expected, err.toString(StandardCharsets.UTF_8)); } } @@ -169,4 +118,5 @@ public class GbdtConverterTestCase { this.status = status; } } + } diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index d2d3ccaad23..3a1a5b457ef 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -913,7 +913,7 @@ AttributeTest::testSingle() AttributePtr ptr = createAttribute("sv-post-int32", cfg); ptr->updateStat(true); EXPECT_EQ(338972u, ptr->getStatus().getAllocated()); - EXPECT_EQ(101492u, ptr->getStatus().getUsed()); + EXPECT_EQ(101632u, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values); } @@ -935,7 +935,7 @@ AttributeTest::testSingle() AttributePtr ptr = createAttribute("sv-post-float", cfg); ptr->updateStat(true); EXPECT_EQ(338972u, ptr->getStatus().getAllocated()); - EXPECT_EQ(101492u, ptr->getStatus().getUsed()); + EXPECT_EQ(101632u, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testSingle<FloatingPointAttribute, double, float>(ptr, values); } @@ -948,7 +948,7 @@ AttributeTest::testSingle() AttributePtr ptr = createAttribute("sv-string", Config(BasicType::STRING, CollectionType::SINGLE)); ptr->updateStat(true); EXPECT_EQ(116528u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); - EXPECT_EQ(52760u + sizeof_large_string_entry, ptr->getStatus().getUsed()); + EXPECT_EQ(52844u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testSingle<StringAttribute, string, string>(ptr, values); } @@ -958,7 +958,7 @@ AttributeTest::testSingle() AttributePtr ptr = createAttribute("sv-fs-string", cfg); ptr->updateStat(true); EXPECT_EQ(344848u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); - EXPECT_EQ(104408u + sizeof_large_string_entry, ptr->getStatus().getUsed()); + EXPECT_EQ(104556u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testSingle<StringAttribute, string, string>(ptr, values); } @@ -1110,7 +1110,7 @@ AttributeTest::testArray() AttributePtr ptr = createAttribute("a-fs-int32", cfg); ptr->updateStat(true); EXPECT_EQ(844116u, ptr->getStatus().getAllocated()); - EXPECT_EQ(581232u, ptr->getStatus().getUsed()); + EXPECT_EQ(581372u, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values); } @@ -1129,7 +1129,7 @@ AttributeTest::testArray() AttributePtr ptr = createAttribute("a-fs-float", cfg); ptr->updateStat(true); EXPECT_EQ(844116u, ptr->getStatus().getAllocated()); - EXPECT_EQ(581232u, ptr->getStatus().getUsed()); + EXPECT_EQ(581372u, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testArray<FloatingPointAttribute, double>(ptr, values); } @@ -1141,7 +1141,7 @@ AttributeTest::testArray() AttributePtr ptr = createAttribute("a-string", Config(BasicType::STRING, CollectionType::ARRAY)); ptr->updateStat(true); EXPECT_EQ(599784u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); - EXPECT_EQ(532480u + sizeof_large_string_entry, ptr->getStatus().getUsed()); + EXPECT_EQ(532564u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testArray<StringAttribute, string>(ptr, values); } @@ -1151,7 +1151,7 @@ AttributeTest::testArray() AttributePtr ptr = createAttribute("afs-string", cfg); ptr->updateStat(true); EXPECT_EQ(849992u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); - EXPECT_EQ(584148u + sizeof_large_string_entry, ptr->getStatus().getUsed()); + EXPECT_EQ(584296u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testArray<StringAttribute, string>(ptr, values); } @@ -1718,7 +1718,7 @@ AttributeTest::testStatus() ptr->commit(true); EXPECT_EQ(ptr->getStatus().getNumDocs(), 100u); EXPECT_EQ(ptr->getStatus().getNumValues(), 100u); - EXPECT_EQ(ptr->getStatus().getNumUniqueValues(), 1u); + EXPECT_EQ(ptr->getStatus().getNumUniqueValues(), 2u); size_t expUsed = 0; expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree expUsed += 1 * 32; // enum store (uniquevalues * bytes per entry) @@ -1741,7 +1741,7 @@ AttributeTest::testStatus() ptr->commit(true); EXPECT_EQ(ptr->getStatus().getNumDocs(), numDocs); EXPECT_EQ(ptr->getStatus().getNumValues(), numDocs*numValuesPerDoc); - EXPECT_EQ(ptr->getStatus().getNumUniqueValues(), numUniq); + EXPECT_EQ(ptr->getStatus().getNumUniqueValues(), numUniq + 1); size_t expUsed = 0; expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // Approximate enum store tree expUsed += 272; // TODO Approximate... enum store (16 unique values, 17 bytes per entry) @@ -2145,12 +2145,12 @@ AttributeTest::test_default_value_ref_count_is_updated_after_shrink_lid_space() const auto & iattr = dynamic_cast<const search::IntegerAttributeTemplate<int32_t> &>(*attr); attr->addReservedDoc(); attr->addDocs(10); - EXPECT_EQ(11u, get_default_value_ref_count(*attr, iattr.defaultValue())); + EXPECT_EQ(12u, get_default_value_ref_count(*attr, iattr.defaultValue())); attr->compactLidSpace(6); - EXPECT_EQ(11u, get_default_value_ref_count(*attr, iattr.defaultValue())); + EXPECT_EQ(12u, get_default_value_ref_count(*attr, iattr.defaultValue())); attr->shrinkLidSpace(); EXPECT_EQ(6u, attr->getNumDocs()); - EXPECT_EQ(6u, get_default_value_ref_count(*attr, iattr.defaultValue())); + EXPECT_EQ(7u, get_default_value_ref_count(*attr, iattr.defaultValue())); } template <typename AttributeType> @@ -2170,7 +2170,7 @@ AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool AddressSpaceUsage after = attrPtr->getAddressSpaceUsage(); if (attrPtr->hasEnum()) { LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str()); - EXPECT_EQ(before.enum_store_usage().used(), 1u); + EXPECT_EQ(before.enum_store_usage().used(), 2u); EXPECT_EQ(before.enum_store_usage().dead(), 1u); EXPECT_GT(after.enum_store_usage().used(), before.enum_store_usage().used()); EXPECT_GE(after.enum_store_usage().limit(), before.enum_store_usage().limit()); diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index 820f39089d1..5501c99652b 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -183,7 +183,7 @@ MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const return false; if (lhs.get() == NULL) return true; - if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen())) + if (!EXPECT_EQUAL(lhs->getDataLen(), rhs->getDataLen())) return false; if (!EXPECT_TRUE(vespalib::memcmp_safe(lhs->getData(), rhs->getData(), lhs->getDataLen()) == 0)) @@ -243,8 +243,9 @@ EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed, int weight = 1; for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); - if (i == 9) + if (i == 9) { continue; + } if (i == 7) { if (v.hasMultiValue()) { v.append(i, -42, 27); @@ -270,7 +271,7 @@ EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed, i + 1); } } else { - EXPECT_TRUE( v.update(i, lrand48() & mask) ); + EXPECT_TRUE( v.update(i, rnd.lrand48() & mask) ); } } v.commit(); @@ -288,8 +289,9 @@ EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed, int weight = 1; for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); - if (i == 9) + if (i == 9) { continue; + } if (i == 7) { if (v.hasMultiValue()) { v.append(i, -42.0, 27); @@ -315,7 +317,7 @@ EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed, i + 1); } } else { - EXPECT_TRUE( v.update(i, lrand48()) ); + EXPECT_TRUE( v.update(i, rnd.lrand48()) ); } } v.commit(); @@ -332,8 +334,9 @@ EnumeratedSaveTest::populate(StringAttribute &v, unsigned seed, int weight = 1; for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); - if (i == 9) + if (i == 9) { continue; + } if (i == 7) { if (v.hasMultiValue()) { v.append(i, "foo", 27); @@ -712,9 +715,9 @@ EnumeratedSaveTest::test(BasicType bt, CollectionType ct, Config check_cfg(cfg); check_cfg.setFastSearch(true); - checkLoad<VectorType, BufferType>(check_cfg, pref + "0_ee", v0); - checkLoad<VectorType, BufferType>(check_cfg, pref + "1_ee", v1); - checkLoad<VectorType, BufferType>(check_cfg, pref + "2_ee", v2); + TEST_DO((checkLoad<VectorType, BufferType>(check_cfg, pref + "0_ee", v0))); + TEST_DO((checkLoad<VectorType, BufferType>(check_cfg, pref + "1_ee", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(check_cfg, pref + "2_ee", v2))); TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, mv0, mv1, mv2, diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index b3c7516777c..2b01c266e80 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -180,15 +180,35 @@ TYPED_TEST(FloatEnumStoreTest, numbers_can_be_inserted_and_retrieved) } } +TEST(EnumStoreTest, default_value_is_present) +{ + StringEnumStore ses(false, DictionaryConfig::Type::BTREE); + using EntryType = StringEnumStore::EntryType; + EntryType undefined = attribute::getUndefined<EntryType>(); + EnumIndex idx; + EXPECT_TRUE(ses.find_index(undefined, idx)); + EXPECT_TRUE(idx.valid()); + EXPECT_EQ(ses.get_default_value_ref().load_relaxed(), idx); + ses.clear_default_value_ref(); + EXPECT_FALSE(ses.find_index(undefined, idx)); + EXPECT_FALSE(ses.get_default_value_ref().load_relaxed().valid()); + ses.setup_default_value_ref(); + idx = EnumIndex(); + EXPECT_TRUE(ses.find_index(undefined, idx)); + EXPECT_TRUE(idx.valid()); + EXPECT_EQ(ses.get_default_value_ref().load_relaxed(), idx); +} + TEST(EnumStoreTest, test_find_folded_on_string_enum_store) { StringEnumStore ses(false, DictionaryConfig::Type::BTREE); + using EntryType = StringEnumStore::EntryType; std::vector<EnumIndex> indices; std::vector<std::string> unique({"", "one", "two", "TWO", "Two", "three"}); for (std::string &str : unique) { EnumIndex idx = ses.insert(str.c_str()); indices.push_back(idx); - EXPECT_EQ(1u, ses.get_ref_count(idx)); + EXPECT_EQ((str == attribute::getUndefined<EntryType>()) ? 2u : 1u, ses.get_ref_count(idx)); } ses.freeze_dictionary(); for (uint32_t i = 0; i < indices.size(); ++i) { @@ -233,13 +253,14 @@ void StringEnumStoreTest::testInsert(bool hasPostings) { StringEnumStore ses(hasPostings, DictionaryConfig::Type::BTREE); + using EntryType = StringEnumStore::EntryType; std::vector<EnumIndex> indices; std::vector<std::string> unique = {"", "add", "enumstore", "unique"}; for (const auto & i : unique) { EnumIndex idx = ses.insert(i.c_str()); - EXPECT_EQ(1u, ses.get_ref_count(idx)); + EXPECT_EQ((i == attribute::getUndefined<EntryType>()) ? 2u : 1u, ses.get_ref_count(idx)); indices.push_back(idx); EXPECT_TRUE(ses.find_index(i.c_str(), idx)); } @@ -253,7 +274,7 @@ StringEnumStoreTest::testInsert(bool hasPostings) EnumIndex idx; EXPECT_TRUE(ses.find_index(unique[i].c_str(), idx)); EXPECT_TRUE(idx == indices[i]); - EXPECT_EQ(1u, ses.get_ref_count(indices[i])); + EXPECT_EQ((i == 0) ? 2u : 1u, ses.get_ref_count(indices[i])); const char* value = nullptr; EXPECT_TRUE(ses.get_value(indices[i], value)); EXPECT_TRUE(strcmp(unique[i].c_str(), value) == 0); @@ -354,22 +375,22 @@ TEST(EnumStoreTest, address_space_usage_is_reported) NumericEnumStore store(false, DictionaryConfig::Type::BTREE); using vespalib::AddressSpace; - EXPECT_EQ(AddressSpace(1, 1, ADDRESS_LIMIT), store.get_values_address_space_usage()); - EnumIndex idx1 = store.insert(10); EXPECT_EQ(AddressSpace(2, 1, ADDRESS_LIMIT), store.get_values_address_space_usage()); - EnumIndex idx2 = store.insert(20); + EnumIndex idx1 = store.insert(10); // Address limit increases because buffer is re-sized. EXPECT_EQ(AddressSpace(3, 1, ADDRESS_LIMIT + 2), store.get_values_address_space_usage()); + EnumIndex idx2 = store.insert(20); + EXPECT_EQ(AddressSpace(4, 1, ADDRESS_LIMIT + 2), store.get_values_address_space_usage()); dec_ref_count(store, idx1); - EXPECT_EQ(AddressSpace(3, 2, ADDRESS_LIMIT + 2), store.get_values_address_space_usage()); + EXPECT_EQ(AddressSpace(4, 2, ADDRESS_LIMIT + 2), store.get_values_address_space_usage()); dec_ref_count(store, idx2); - EXPECT_EQ(AddressSpace(3, 3, ADDRESS_LIMIT + 2), store.get_values_address_space_usage()); + EXPECT_EQ(AddressSpace(4, 3, ADDRESS_LIMIT + 2), store.get_values_address_space_usage()); } TEST(EnumStoreTest, provided_memory_allocator_is_used) { AllocStats stats; - NumericEnumStore ses(false, DictionaryConfig::Type::BTREE, std::make_unique<MemoryAllocatorObserver>(stats)); + NumericEnumStore ses(false, DictionaryConfig::Type::BTREE, std::make_unique<MemoryAllocatorObserver>(stats), attribute::getUndefined<NumericEnumStore::EntryType>()); EXPECT_EQ(AllocStats(1, 0), stats); } @@ -539,6 +560,7 @@ TYPED_TEST_SUITE(LoaderTest, LoaderTestTypes); TYPED_TEST(LoaderTest, store_is_instantiated_with_enumerated_loader) { + this->store.clear_default_value_ref(); auto loader = this->store.make_enumerated_loader(); this->load_values(loader); loader.allocate_enums_histogram(); @@ -554,6 +576,7 @@ TYPED_TEST(LoaderTest, store_is_instantiated_with_enumerated_loader) TYPED_TEST(LoaderTest, store_is_instantiated_with_enumerated_postings_loader) { + this->store.clear_default_value_ref(); auto loader = this->store.make_enumerated_postings_loader(); this->load_values(loader); this->set_ref_count(0, 1, loader); @@ -568,6 +591,7 @@ TYPED_TEST(LoaderTest, store_is_instantiated_with_enumerated_postings_loader) TYPED_TEST(LoaderTest, store_is_instantiated_with_non_enumerated_loader) { + this->store.clear_default_value_ref(); auto loader = this->store.make_non_enumerated_loader(); using MyValues = LoaderTestValues<typename TypeParam::EnumStoreType>; loader.insert(MyValues::values[0], 100); @@ -610,6 +634,7 @@ public: void test_normalize_posting_lists(bool use_filter, bool one_filter); void test_foreach_posting_list(bool one_filter); static EntryRef fake_pidx() { return EntryRef(42); } + EnumIndex check_default_value_ref() const noexcept; }; template <typename EnumStoreTypeAndDictionaryType> @@ -775,6 +800,16 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_foreach_posting_li clear_sample_values(large_population); } +template <typename EnumStoreTypeAndDictionaryType> +EnumIndex +EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::check_default_value_ref() const noexcept +{ + EnumIndex default_value_ref = store.get_default_value_ref().load_relaxed(); + EXPECT_TRUE(default_value_ref.valid()); + EXPECT_EQ(attribute::getUndefined<EntryType>(), store.get_value(default_value_ref)); + return default_value_ref; +} + using EnumStoreDictionaryTestTypes = ::testing::Types<BTreeNumericEnumStore, HybridNumericEnumStore, HashNumericEnumStore>; TYPED_TEST_SUITE(EnumStoreDictionaryTest, EnumStoreDictionaryTestTypes); @@ -875,6 +910,7 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works) updater.commit(); generation_t gen = 3; inc_generation(gen, this->store); + // Compact dictionary auto& dict = this->store.get_dictionary(); if (dict.get_has_btree_dictionary()) { EXPECT_LT(CompactionStrategy::DEAD_BYTES_SLACK, dict.get_btree_memory_usage().deadBytes()); @@ -902,8 +938,31 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works) if (dict.get_has_hash_dictionary()) { EXPECT_GT(CompactionStrategy::DEAD_BYTES_SLACK, dict.get_hash_memory_usage().deadBytes()); } + auto old_default_value_ref = this->check_default_value_ref(); + // Compact values + EXPECT_LT(CompactionStrategy::DEAD_BYTES_SLACK, this->store.get_values_memory_usage().deadBytes()); + compaction_strategy = CompactionStrategy::make_compact_all_active_buffers_strategy(); + int compact_values_count = 0; + for (uint32_t i = 0; i < 2; ++i) { + this->store.update_stat(compaction_strategy); + auto remapper = this->store.consider_compact_values(compaction_strategy); + if (remapper) { + remapper->done(); + ++compact_values_count; + } else { + break; + } + EXPECT_FALSE(this->store.consider_compact_values(compaction_strategy)); + inc_generation(gen, this->store); + } + EXPECT_EQ(1, compact_values_count); + auto new_default_value_ref = this->check_default_value_ref(); + EXPECT_NE(old_default_value_ref, new_default_value_ref); + EXPECT_GT(CompactionStrategy::DEAD_BYTES_SLACK, this->store.get_values_memory_usage().deadBytes()); + std::vector<int32_t> exp_values; std::vector<int32_t> values; + exp_values.push_back(std::numeric_limits<int32_t>::min()); for (int32_t i = 0; i < 20; ++i) { exp_values.push_back(i); } diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index f354f635def..2c202d9131b 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/query/streaming/query.h> +#include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h> #include <vespa/searchlib/query/tree/querybuilder.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/query/tree/stackdumpcreator.h> @@ -804,6 +805,42 @@ TEST("testSameElementEvaluate") { EXPECT_TRUE(sameElem->evaluate()); } +TEST("test_nearest_neighbor_query_node") +{ + QueryBuilder<SimpleQueryNodeTypes> builder; + constexpr double distance_threshold = 35.5; + constexpr int32_t id = 42; + constexpr int32_t weight = 1; + constexpr uint32_t target_num_hits = 100; + constexpr bool allow_approximate = false; + constexpr uint32_t explore_additional_hits = 800; + constexpr double raw_score = 0.5; + builder.add_nearest_neighbor_term("qtensor", "field", id, Weight(weight), target_num_hits, allow_approximate, explore_additional_hits, distance_threshold); + auto build_node = builder.build(); + auto stack_dump = StackDumpCreator::create(*build_node); + QueryNodeResultFactory empty; + Query q(empty, stack_dump); + auto* qterm = dynamic_cast<QueryTerm *>(&q.getRoot()); + EXPECT_TRUE(qterm != nullptr); + auto* node = dynamic_cast<NearestNeighborQueryNode *>(&q.getRoot()); + EXPECT_TRUE(node != nullptr); + EXPECT_EQUAL(node, qterm->as_nearest_neighbor_query_node()); + EXPECT_EQUAL("qtensor", node->get_query_tensor_name()); + EXPECT_EQUAL("field", node->getIndex()); + EXPECT_EQUAL(id, static_cast<int32_t>(node->uniqueId())); + EXPECT_EQUAL(weight, node->weight().percent()); + EXPECT_EQUAL(distance_threshold, node->get_distance_threshold()); + EXPECT_FALSE(node->get_raw_score().has_value()); + EXPECT_FALSE(node->evaluate()); + node->set_raw_score(raw_score); + EXPECT_TRUE(node->get_raw_score().has_value()); + EXPECT_EQUAL(raw_score, node->get_raw_score().value()); + EXPECT_TRUE(node->evaluate()); + node->reset(); + EXPECT_FALSE(node->get_raw_score().has_value()); + EXPECT_FALSE(node->evaluate()); +} + TEST("Control the size of query terms") { EXPECT_EQUAL(112u, sizeof(QueryTermSimple)); EXPECT_EQUAL(128u, sizeof(QueryTermUCS4)); diff --git a/searchlib/src/vespa/searchcommon/common/undefinedvalues.h b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h index bbe3198a8dc..a080648c054 100644 --- a/searchlib/src/vespa/searchcommon/common/undefinedvalues.h +++ b/searchlib/src/vespa/searchcommon/common/undefinedvalues.h @@ -24,6 +24,10 @@ inline constexpr double getUndefined<double>() { return -std::numeric_limits<double>::quiet_NaN(); } +template <> +inline constexpr const char* getUndefined<const char*>() { + return ""; +} // for all signed integers template <typename T> diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index 1a2c8c43b94..f4ab447ed51 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -353,6 +353,8 @@ AttributeVector::load(vespalib::Executor * executor) { bool loaded = onLoad(executor); if (loaded) { commit(); + incGeneration(); + updateStat(true); } _loaded = loaded; return _loaded; @@ -451,19 +453,6 @@ AttributeVector::set_reserved_doc_values() return; } clearDoc(docId); - if (hasMultiValue()) { - if (isFloatingPointType()) { - auto * vec = dynamic_cast<FloatingPointAttribute *>(this); - bool appendedUndefined = vec->append(0, attribute::getUndefined<double>(), 1); - assert(appendedUndefined); - (void) appendedUndefined; - } else if (isStringType()) { - auto * vec = dynamic_cast<StringAttribute *>(this); - bool appendedUndefined = vec->append(0, StringAttribute::defaultValue(), 1); - assert(appendedUndefined); - (void) appendedUndefined; - } - } commit(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp index eeaa3e9539f..c1345b4f770 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp @@ -93,6 +93,7 @@ EnumeratedLoader::build_dictionary() { _store.get_dictionary().build(_indexes); release_enum_indexes(); + _store.setup_default_value_ref(); } EnumeratedPostingsLoader::EnumeratedPostingsLoader(IEnumStore& store) @@ -131,6 +132,13 @@ EnumeratedPostingsLoader::build_dictionary() _store.get_dictionary().build_with_payload(_indexes, _posting_indexes); release_enum_indexes(); EntryRefVector().swap(_posting_indexes); + _store.setup_default_value_ref(); +} + +void +EnumeratedPostingsLoader::build_empty_dictionary() +{ + _store.setup_default_value_ref(); } } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h index 2a72fcac628..937ceb91628 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h @@ -85,6 +85,7 @@ public: void set_ref_count(Index idx, uint32_t ref_count); vespalib::ArrayRef<EntryRef> initialize_empty_posting_indexes(); void build_dictionary(); + void build_empty_dictionary(); }; } diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h index f0ff23a06b4..4753dbe65f9 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h @@ -50,13 +50,12 @@ protected: /* * Iterate through the change vector and find new unique values. - * Perform compaction if necessary and insert the new unique values into the EnumStore. + * Insert the new unique values into the EnumStore. */ void insertNewUniqueValues(EnumStoreBatchUpdater& updater); virtual void considerAttributeChange(const Change & c, EnumStoreBatchUpdater & inserter) = 0; vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const override; void populate_address_space_usage(AddressSpaceUsage& usage) const override; - void cache_change_data_entry_ref(const Change& c) const; public: EnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); ~EnumAttribute(); diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp index c5188b89129..66d555df3cb 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -15,7 +15,7 @@ EnumAttribute<B>:: EnumAttribute(const vespalib::string &baseFileName, const AttributeVector::Config &cfg) : B(baseFileName, cfg), - _enumStore(cfg.fastSearch(), cfg.get_dictionary_config(), this->get_memory_allocator()) + _enumStore(cfg.fastSearch(), cfg.get_dictionary_config(), this->get_memory_allocator(), this->_defaultValue._data.raw()) { this->setEnum(true); } @@ -50,6 +50,7 @@ void EnumAttribute<B>::load_enum_store(LoadedVector& loaded) loader.set_ref_count_for_last_value(prevRefCount); } loader.build_dictionary(); + _enumStore.setup_default_value_ref(); } } @@ -85,15 +86,4 @@ EnumAttribute<B>::populate_address_space_usage(AddressSpaceUsage& usage) const usage.set(AddressSpaceComponents::enum_store, _enumStore.get_values_address_space_usage()); } -template <typename B> -void -EnumAttribute<B>::cache_change_data_entry_ref(const Change& c) const -{ - EnumIndex new_idx; - _enumStore.find_index(c._data.raw(), new_idx); - c.set_entry_ref(new_idx.ref()); -} - } // namespace search - - diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 266437fafa1..f6467194d74 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -28,6 +28,9 @@ namespace search { * It uses an instance of vespalib::datastore::UniqueStore to store the actual values. * It also exposes the dictionary used for fast lookups into the set of unique values. * + * The default value is always present except for a short time window + * during attribute vector load. + * * @tparam EntryType The type of the entries/values stored. * It has special handling of type 'const char *' for strings. */ @@ -55,6 +58,8 @@ private: ComparatorType _comparator; ComparatorType _foldedComparator; enumstore::EnumStoreCompactionSpec _compaction_spec; + EntryType _default_value; + AtomicIndex _default_value_ref; EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; @@ -75,7 +80,7 @@ private: std::unique_ptr<EntryComparator> allocate_optionally_folded_comparator(bool folded) const; ComparatorType make_optionally_folded_comparator(bool folded) const; public: - EnumStoreT(bool has_postings, const search::DictionaryConfig& dict_cfg, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator); + EnumStoreT(bool has_postings, const search::DictionaryConfig& dict_cfg, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator, EntryType default_value); EnumStoreT(bool has_postings, const search::DictionaryConfig & dict_cfg); ~EnumStoreT() override; @@ -201,6 +206,9 @@ public: bool find_index(EntryType value, Index& idx) const; void free_unused_values() override; void free_unused_values(IndexList to_remove); + void clear_default_value_ref() override; + void setup_default_value_ref() override; + const AtomicIndex& get_default_value_ref() const noexcept { return _default_value_ref; } vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) override; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index bc767a296eb..c0eebee8e94 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -17,6 +17,7 @@ #include <vespa/vespalib/datastore/unique_store.hpp> #include <vespa/vespalib/datastore/unique_store_string_allocator.hpp> #include <vespa/vespalib/util/array.hpp> +#include <vespa/searchcommon/common/undefinedvalues.h> #include <vespa/searchlib/util/bufferwriter.h> #include <vespa/vespalib/datastore/compaction_strategy.h> @@ -72,23 +73,26 @@ EnumStoreT<EntryT>::load_unique_value(const void* src, size_t available, Index& } template <typename EntryT> -EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig& dict_cfg, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator) +EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig& dict_cfg, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator, EntryType default_value) : _store(std::move(memory_allocator)), _dict(), _is_folded(dict_cfg.getMatch() == DictionaryConfig::Match::UNCASED), _comparator(_store.get_data_store()), _foldedComparator(make_optionally_folded_comparator(is_folded())), - _compaction_spec() + _compaction_spec(), + _default_value(default_value), + _default_value_ref() { _store.set_dictionary(make_enum_store_dictionary(*this, has_postings, dict_cfg, allocate_comparator(), allocate_optionally_folded_comparator(is_folded()))); _dict = static_cast<IEnumStoreDictionary*>(&_store.get_dictionary()); + setup_default_value_ref(); } template <typename EntryT> EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig& dict_cfg) - : EnumStoreT<EntryT>(has_postings, dict_cfg, {}) + : EnumStoreT<EntryT>(has_postings, dict_cfg, {}, attribute::getUndefined<EntryType>()) { } @@ -215,6 +219,33 @@ EnumStoreT<EntryT>::insert(EntryType value) return _store.add(value).ref(); } + +template <typename EntryT> +void +EnumStoreT<EntryT>::clear_default_value_ref() +{ + auto ref = _default_value_ref.load_relaxed(); + if (ref.valid()) { + auto updater = make_batch_updater(); + updater.dec_ref_count(ref); + _default_value_ref.store_relaxed(Index()); + updater.commit(); + } +} + +template <typename EntryT> +void +EnumStoreT<EntryT>::setup_default_value_ref() +{ + if (!_default_value_ref.load_relaxed().valid()) { + auto updater = make_batch_updater(); + auto ref = updater.insert(_default_value); + updater.inc_ref_count(ref); + _default_value_ref.store_relaxed(ref); + updater.commit(); + } +} + template <typename EntryT> vespalib::MemoryUsage EnumStoreT<EntryT>::update_stat(const CompactionStrategy& compaction_strategy) @@ -236,7 +267,14 @@ template <typename EntryT> std::unique_ptr<IEnumStore::EnumIndexRemapper> EnumStoreT<EntryT>::compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) { - return _store.compact_worst(compaction_spec, compaction_strategy); + auto remapper = _store.compact_worst(compaction_spec, compaction_strategy); + if (remapper) { + auto ref = _default_value_ref.load_relaxed(); + if (ref.valid() && remapper->get_entry_ref_filter().has(ref)) { + _default_value_ref.store_release(remapper->remap(ref)); + } + } + return remapper; } template <typename EntryT> diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index 2157db3e5ed..aa9fd549b60 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -74,6 +74,8 @@ public: virtual std::unique_ptr<Enumerator> make_enumerator() = 0; virtual std::unique_ptr<vespalib::datastore::EntryComparator> allocate_comparator() const = 0; + virtual void clear_default_value_ref() = 0; + virtual void setup_default_value_ref() = 0; }; } diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp index edfea23f48d..59c1216829d 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -97,6 +97,10 @@ MultiValueNumericEnumAttribute<B, M>::onLoad(vespalib::Executor *) return false; } + this->_enumStore.clear_default_value_ref(); + this->commit(); + this->incGeneration(); + this->setCreateSerialNum(attrReader.getCreateSerialNum()); if (attrReader.getEnumerated()) { diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index a63862126fa..7b11fcd59f4 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -42,7 +42,6 @@ MultiValueStringAttributeT<B, M>::freezeEnumDictionary() this->getEnumStore().freeze_dictionary(); } - template <typename B, typename M> std::unique_ptr<attribute::SearchContext> MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm, diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp index 6ef3b575c3e..01e68949f92 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp @@ -49,6 +49,7 @@ PostingListAttributeBase<P>::handle_load_posting_lists_and_update_enum_store(enu PostingChange<P> postings; const auto& loaded_enums = loader.get_loaded_enums(); if (loaded_enums.empty()) { + loader.build_empty_dictionary(); return; } uint32_t preve = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h index aac9a7b5416..7f36238ec6a 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h @@ -67,14 +67,14 @@ protected: void considerAttributeChange(const Change & c, EnumStoreBatchUpdater & inserter) override; // implemented by single value numeric enum attribute. - virtual void considerUpdateAttributeChange(const Change & c) { (void) c; } + virtual void considerUpdateAttributeChange(DocId, const Change&) { } virtual void considerArithmeticAttributeChange(const Change & c, EnumStoreBatchUpdater & inserter) { (void) c; (void) inserter; } virtual void applyValueChanges(EnumStoreBatchUpdater& updater) ; virtual void applyArithmeticValueChange(const Change& c, EnumStoreBatchUpdater& updater) { (void) c; (void) updater; } - void updateEnumRefCounts(const Change& c, EnumIndex newIdx, EnumIndex oldIdx, EnumStoreBatchUpdater& updater); + void updateEnumRefCounts(DocId doc, EnumIndex newIdx, EnumIndex oldIdx, EnumStoreBatchUpdater& updater); virtual void freezeEnumDictionary() { this->getEnumStore().freeze_dictionary(); diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index f4f2b777abd..95976609940 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -146,7 +146,7 @@ SingleValueEnumAttribute<B>::considerUpdateAttributeChange(const Change & c, Enu } else { c.set_entry_ref(idx.ref()); } - considerUpdateAttributeChange(c); // for numeric + considerUpdateAttributeChange(c._doc, c); // for numeric } template <typename B> @@ -158,9 +158,7 @@ SingleValueEnumAttribute<B>::considerAttributeChange(const Change & c, EnumStore } else if (c._type >= ChangeBase::ADD && c._type <= ChangeBase::DIV) { considerArithmeticAttributeChange(c, inserter); // for numeric } else if (c._type == ChangeBase::CLEARDOC) { - Change clearDoc(this->_defaultValue); - clearDoc._doc = c._doc; - considerUpdateAttributeChange(clearDoc, inserter); + considerUpdateAttributeChange(c._doc, this->_defaultValue); } } @@ -175,7 +173,7 @@ SingleValueEnumAttribute<B>::applyUpdateValueChange(const Change& c, EnumStoreBa } else { this->_enumStore.find_index(c._data.raw(), newIdx); } - updateEnumRefCounts(c, newIdx, oldIdx, updater); + updateEnumRefCounts(c._doc, newIdx, oldIdx, updater); } template <typename B> @@ -183,30 +181,26 @@ void SingleValueEnumAttribute<B>::applyValueChanges(EnumStoreBatchUpdater& updater) { ValueModifier valueGuard(this->getValueModifier()); - // This avoids searching for the defaultValue in the enum store for each CLEARDOC in the change vector. - this->cache_change_data_entry_ref(this->_defaultValue); for (const auto& change : this->_changes.getInsertOrder()) { if (change._type == ChangeBase::UPDATE) { applyUpdateValueChange(change, updater); } else if (change._type >= ChangeBase::ADD && change._type <= ChangeBase::DIV) { applyArithmeticValueChange(change, updater); } else if (change._type == ChangeBase::CLEARDOC) { - Change clearDoc(this->_defaultValue); - clearDoc._doc = change._doc; - applyUpdateValueChange(clearDoc, updater); + EnumIndex oldIdx = _enumIndices[change._doc].load_relaxed(); + EnumIndex newIdx = this->_enumStore.get_default_value_ref().load_relaxed(); + updateEnumRefCounts(change._doc, newIdx, oldIdx, updater); } } - // We must clear the cached entry ref as the defaultValue might be located in another data buffer on later invocations. - this->_defaultValue.clear_entry_ref(); } template <typename B> void -SingleValueEnumAttribute<B>::updateEnumRefCounts(const Change& c, EnumIndex newIdx, EnumIndex oldIdx, +SingleValueEnumAttribute<B>::updateEnumRefCounts(DocId doc, EnumIndex newIdx, EnumIndex oldIdx, EnumStoreBatchUpdater& updater) { updater.inc_ref_count(newIdx); - _enumIndices[c._doc].store_release(newIdx); + _enumIndices[doc].store_release(newIdx); if (oldIdx.valid()) { updater.dec_ref_count(oldIdx); } diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp index a105d980986..c75ee0aacb5 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp @@ -134,8 +134,9 @@ SingleValueNumericAttribute<B>::onLoad(vespalib::Executor *) PrimitiveReader<T> attrReader(*this); bool ok(attrReader.getHasLoadData()); - if (!ok) + if (!ok) { return false; + } this->setCreateSerialNum(attrReader.getCreateSerialNum()); diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h index 5b0e1c6131e..4eeb6ceda57 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h @@ -43,7 +43,7 @@ private: protected: // from SingleValueEnumAttribute - void considerUpdateAttributeChange(const Change & c) override; + void considerUpdateAttributeChange(DocId doc, const Change & c) override; void considerArithmeticAttributeChange(const Change & c, EnumStoreBatchUpdater & inserter) override; void applyArithmeticValueChange(const Change& c, EnumStoreBatchUpdater& updater) override; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp index 52ea0a53533..b840a0516b2 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp @@ -15,9 +15,9 @@ namespace search { template <typename B> void -SingleValueNumericEnumAttribute<B>::considerUpdateAttributeChange(const Change & c) +SingleValueNumericEnumAttribute<B>::considerUpdateAttributeChange(DocId doc, const Change & c) { - _currDocValues[c._doc] = c._data.get(); + _currDocValues[doc] = c._data.get(); } template <typename B> @@ -53,7 +53,7 @@ SingleValueNumericEnumAttribute<B>::applyArithmeticValueChange(const Change& c, T newValue = this->template applyArithmetic<T, typename Change::DataType>(get(c._doc), c._data.getArithOperand(), c._type); this->_enumStore.find_index(newValue, newIdx); - this->updateEnumRefCounts(c, newIdx, oldIdx, updater); + this->updateEnumRefCounts(c._doc, newIdx, oldIdx, updater); } template <typename B> @@ -117,6 +117,10 @@ SingleValueNumericEnumAttribute<B>::onLoad(vespalib::Executor *) return false; } + this->_enumStore.clear_default_value_ref(); + this->commit(); + this->incGeneration(); + this->setCreateSerialNum(attrReader.getCreateSerialNum()); if (attrReader.getEnumerated()) { diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp index de4a7157dae..e353d03a9e8 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -89,8 +89,6 @@ SingleValueNumericPostingAttribute<B>::applyValueChanges(EnumStoreBatchUpdater& // used to make sure several arithmetic operations on the same document in a single commit works std::map<DocId, EnumIndex> currEnumIndices; - // This avoids searching for the defaultValue in the enum store for each CLEARDOC in the change vector. - this->cache_change_data_entry_ref(this->_defaultValue); for (const auto& change : this->_changes.getInsertOrder()) { auto enumIter = currEnumIndices.find(change._doc); EnumIndex oldIdx; @@ -111,13 +109,9 @@ SingleValueNumericPostingAttribute<B>::applyValueChanges(EnumStoreBatchUpdater& currEnumIndices[change._doc] = newIdx; } } else if(change._type == ChangeBase::CLEARDOC) { - Change clearDoc(this->_defaultValue); - clearDoc._doc = change._doc; - applyUpdateValueChange(clearDoc, enumStore, currEnumIndices); + currEnumIndices[change._doc] = enumStore.get_default_value_ref().load_relaxed(); } } - // We must clear the cached entry ref as the defaultValue might be located in another data buffer on later invocations. - this->_defaultValue.clear_entry_ref(); makePostingChange(enumStore.get_comparator(), currEnumIndices, changePost); diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index 82a4393fc91..69fe6435a03 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -40,7 +40,6 @@ SingleValueStringAttributeT<B>::freezeEnumDictionary() this->getEnumStore().freeze_dictionary(); } - template <typename B> std::unique_ptr<attribute::SearchContext> SingleValueStringAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index 1ec9b54a73b..5b5214f6d3e 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -98,8 +98,6 @@ SingleValueStringPostingAttributeT<B>::applyValueChanges(EnumStoreBatchUpdater& // used to make sure several arithmetic operations on the same document in a single commit works std::map<DocId, EnumIndex> currEnumIndices; - // This avoids searching for the defaultValue in the enum store for each CLEARDOC in the change vector. - this->cache_change_data_entry_ref(this->_defaultValue); for (const auto& change : this->_changes.getInsertOrder()) { auto enumIter = currEnumIndices.find(change._doc); EnumIndex oldIdx; @@ -111,12 +109,9 @@ SingleValueStringPostingAttributeT<B>::applyValueChanges(EnumStoreBatchUpdater& if (change._type == ChangeBase::UPDATE) { applyUpdateValueChange(change, enumStore, currEnumIndices); } else if (change._type == ChangeBase::CLEARDOC) { - this->_defaultValue._doc = change._doc; - applyUpdateValueChange(this->_defaultValue, enumStore, currEnumIndices); + currEnumIndices[change._doc] = enumStore.get_default_value_ref().load_relaxed(); } } - // We must clear the cached entry ref as the defaultValue might be located in another data buffer on later invocations. - this->_defaultValue.clear_entry_ref(); makePostingChange(enumStore.get_folded_comparator(), dictionary, currEnumIndices, changePost); diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 80967affaa7..b37318d470e 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -223,6 +223,10 @@ StringAttribute::onLoad(vespalib::Executor *) return false; } + getEnumStoreBase()->clear_default_value_ref(); + commit(); + incGeneration(); + setCreateSerialNum(attrReader.getCreateSerialNum()); assert(attrReader.getEnumerated()); diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 5c6bf3c6b6a..98a3316947b 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -62,7 +62,7 @@ protected: using ChangeVector = ChangeVectorT<Change>; using EnumEntryType = const char*; ChangeVector _changes; - Change _defaultValue; + const Change _defaultValue; bool onLoad(vespalib::Executor *executor) override; bool onLoadEnumerated(ReaderBase &attrReader); diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h index 74728ab1f2e..a79e33dba32 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_simple.h +++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h @@ -23,7 +23,8 @@ public: SUFFIXTERM = 4, REGEXP = 5, GEO_LOCATION = 6, - FUZZYTERM = 7 + FUZZYTERM = 7, + NEAREST_NEIGHBOR = 8 }; template <typename N> @@ -65,6 +66,7 @@ public: bool isRegex() const { return (_type == Type::REGEXP); } bool isGeoLoc() const { return (_type == Type::GEO_LOCATION); } bool isFuzzy() const { return (_type == Type::FUZZYTERM); } + bool is_nearest_neighbor() const noexcept { return (_type == Type::NEAREST_NEIGHBOR); } bool empty() const { return _term.empty(); } virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; vespalib::string getClassName() const; diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt index 27f9870dc18..c71b838fb37 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(searchlib_query_streaming OBJECT SOURCES + nearest_neighbor_query_node.cpp query.cpp querynode.cpp querynoderesultbase.cpp diff --git a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp new file mode 100644 index 00000000000..d1c37cd6dcd --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "nearest_neighbor_query_node.h" + +namespace search::streaming { + +NearestNeighborQueryNode::NearestNeighborQueryNode(std::unique_ptr<QueryNodeResultBase> resultBase, const string& term, const string& index, int32_t id, search::query::Weight weight, double distance_threshold) + : QueryTerm(std::move(resultBase), term, index, Type::NEAREST_NEIGHBOR), + _distance_threshold(distance_threshold), + _raw_score() +{ + setUniqueId(id); + setWeight(weight); +} + +NearestNeighborQueryNode::~NearestNeighborQueryNode() = default; + +bool +NearestNeighborQueryNode::evaluate() const +{ + return _raw_score.has_value(); +} + +void +NearestNeighborQueryNode::reset() +{ + _raw_score.reset(); +} + +NearestNeighborQueryNode* +NearestNeighborQueryNode::as_nearest_neighbor_query_node() noexcept +{ + return this; +} + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.h new file mode 100644 index 00000000000..0beb130c53d --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.h @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "queryterm.h" +#include <optional> + +namespace search::streaming { + +/* + * Nearest neighbor query node. + */ +class NearestNeighborQueryNode: public QueryTerm { +private: + double _distance_threshold; + // When this value is set it also indicates a match + std::optional<double> _raw_score; + +public: + NearestNeighborQueryNode(std::unique_ptr<QueryNodeResultBase> resultBase, const string& term, const string& index, int32_t id, search::query::Weight weight, double distance_threshold); + NearestNeighborQueryNode(const NearestNeighborQueryNode &) = delete; + NearestNeighborQueryNode & operator = (const NearestNeighborQueryNode &) = delete; + NearestNeighborQueryNode(NearestNeighborQueryNode &&) = delete; + NearestNeighborQueryNode & operator = (NearestNeighborQueryNode &&) = delete; + ~NearestNeighborQueryNode() override; + bool evaluate() const override; + void reset() override; + NearestNeighborQueryNode* as_nearest_neighbor_query_node() noexcept override; + const vespalib::string& get_query_tensor_name() const { return getTermString(); } + double get_distance_threshold() const { return _distance_threshold; } + void set_raw_score(double value) { _raw_score = value; } + const std::optional<double>& get_raw_score() const noexcept { return _raw_score; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 6d59886a4f5..226cb92c894 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "query.h" +#include "nearest_neighbor_query_node.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <charconv> #include <vespa/log/log.h> @@ -77,6 +78,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor queryRep.getIndexName(), QueryTerm::Type::GEO_LOCATION); break; + case ParseItem::ITEM_NEAREST_NEIGHBOR: + qn = build_nearest_neighbor_query_node(factory, queryRep); + break; case ParseItem::ITEM_NUMTERM: case ParseItem::ITEM_TERM: case ParseItem::ITEM_PREFIXTERM: @@ -191,4 +195,20 @@ const HitList & QueryNode::evaluateHits(HitList & hl) const return hl; } +std::unique_ptr<QueryNode> +QueryNode::build_nearest_neighbor_query_node(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& query_rep) +{ + vespalib::stringref query_tensor_name = query_rep.getTerm(); + vespalib::stringref field_name = query_rep.getIndexName(); + int32_t id = query_rep.getUniqueId(); + search::query::Weight weight = query_rep.GetWeight(); + double distance_threshold = query_rep.getDistanceThreshold(); + return std::make_unique<NearestNeighborQueryNode>(factory.create(), + query_tensor_name, + field_name, + id, + weight, + distance_threshold); +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h index 574a3c16ca3..c3fa2b63f69 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h @@ -28,6 +28,7 @@ using ConstQueryTermList = std::vector<const QueryTerm *>; */ class QueryNode { + static std::unique_ptr<QueryNode> build_nearest_neighbor_query_node(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); public: using UP = std::unique_ptr<QueryNode>; @@ -54,7 +55,7 @@ class QueryNode virtual size_t depth() const { return 1; } /// Return the width of this tree. virtual size_t width() const { return 1; } - static UP Build(const QueryNode * parent, const QueryNodeResultFactory & org, SimpleQueryStackDumpIterator & queryRep, bool allowRewrite); + static UP Build(const QueryNode * parent, const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator & queryRep, bool allowRewrite); }; /// A list conating the QuerNode objects. With copy/assignment. diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 83f4410a520..11557bf1dcc 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -92,4 +92,10 @@ void QueryTerm::add(unsigned pos, unsigned context, uint32_t elemId, int32_t wei _hitList.emplace_back(pos, context, elemId, weight_); } +NearestNeighborQueryNode* +QueryTerm::as_nearest_neighbor_query_node() noexcept +{ + return nullptr; +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index dd9f56b11e1..51987225692 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -12,6 +12,8 @@ namespace search::streaming { +class NearestNeighborQueryNode; + /** This is a leaf in the Query tree. All terms are leafs. A QueryTerm has the index for where to find the term. The term is a string, @@ -57,7 +59,7 @@ public: QueryTerm & operator = (const QueryTerm &) = delete; QueryTerm(QueryTerm &&) = delete; QueryTerm & operator = (QueryTerm &&) = delete; - ~QueryTerm(); + ~QueryTerm() override; bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; void reset() override; @@ -87,6 +89,7 @@ public: const string & getIndex() const override { return _index; } void setFuzzyMaxEditDistance(uint32_t fuzzyMaxEditDistance) { _fuzzyMaxEditDistance = fuzzyMaxEditDistance; } void setFuzzyPrefixLength(uint32_t fuzzyPrefixLength) { _fuzzyPrefixLength = fuzzyPrefixLength; } + virtual NearestNeighborQueryNode* as_nearest_neighbor_query_node() noexcept; protected: using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>; string _index; |