diff options
Diffstat (limited to 'searchlib')
129 files changed, 2987 insertions, 1940 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 219439a1224..5628db99171 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -77,8 +77,7 @@ vespa_define_module( src/tests/attribute/compaction src/tests/attribute/dfa_fuzzy_matcher src/tests/attribute/direct_multi_term_blueprint - src/tests/attribute/document_weight_iterator - src/tests/attribute/document_weight_or_filter_search + src/tests/attribute/direct_posting_store src/tests/attribute/enum_attribute_compaction src/tests/attribute/enum_comparator src/tests/attribute/enumeratedsave @@ -87,6 +86,7 @@ vespa_define_module( src/tests/attribute/guard src/tests/attribute/imported_attribute_vector src/tests/attribute/imported_search_context + src/tests/attribute/multi_term_or_filter_search src/tests/attribute/multi_value_mapping src/tests/attribute/multi_value_read_view src/tests/attribute/posting_list_merger @@ -196,6 +196,7 @@ vespa_define_module( src/tests/queryeval/equiv src/tests/queryeval/fake_searchable src/tests/queryeval/filter_search + src/tests/queryeval/flow src/tests/queryeval/getnodeweight src/tests/queryeval/global_filter src/tests/queryeval/matching_elements_search diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java index fce0485f41a..60617687f44 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java @@ -7,6 +7,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -15,21 +16,21 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.List; import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; /** * @author Simon Thoresen Hult */ abstract class XmlHelper { - - private static final Charset UTF8 = Charset.forName("UTF-8"); - public static Element parseXml(String xml) throws ParserConfigurationException, IOException, SAXException { - return parseXmlStream(new ByteArrayInputStream(xml.getBytes(UTF8))); + return parseXmlStream(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8))); } public static Element parseXmlFile(String fileName) @@ -41,22 +42,27 @@ abstract class XmlHelper { public static Element parseXmlStream(InputStream in) throws ParserConfigurationException, IOException, SAXException { - DocumentBuilderFactory factory = createDocumentBuilderFactory(); - DocumentBuilder builder = factory.newDocumentBuilder(); + DocumentBuilder builder = createDocumentBuilderFactory().newDocumentBuilder(); Document doc = builder.parse(in); return doc.getDocumentElement(); } - private static DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setXIncludeAware(false); + private static DocumentBuilderFactory createDocumentBuilderFactory() { + try { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + factory.setXIncludeAware(false); + factory.setExpandEntityReferences(false); + factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - // XXE prevention - factory.setFeature("http://xml.org/sax/features/external-general-entities", false); - factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); - factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - return factory; + // XXE prevention + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + return factory; + } catch (ParserConfigurationException e) { + throw new RuntimeException("Failed to initialize XML parser", e); + } } public static String getAttributeText(Node node, String name) { diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index 181c0fdf110..f612bdda87f 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -7,7 +7,7 @@ #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/parsequery/parse.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/executeinfo.h> #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/searchlib/util/randomgenerator.h> @@ -432,7 +432,7 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre const auto* dww = v->as_docid_with_weight_posting_store(); if (dww != nullptr) { auto lres = dww->lookup(getSearchStr<VectorType>(), dww->get_dictionary_snapshot()); - using DWSI = search::queryeval::DocumentWeightSearchIterator; + using DWSI = search::queryeval::DocidWithWeightSearchIterator; TermFieldMatchData md; auto dwsi = std::make_unique<DWSI>(md, *dww, lres); if (!filter) { diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp index f2341d0968e..899ddaa3cc0 100644 --- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp @@ -3,7 +3,9 @@ #include <vespa/searchlib/attribute/direct_multi_term_blueprint.h> #include <vespa/searchlib/attribute/i_docid_posting_store.h> #include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> +#include <vespa/searchlib/attribute/in_term_search.h> #include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/queryeval/orsearch.h> #include <vespa/searchlib/queryeval/searchiterator.h> @@ -19,13 +21,22 @@ using namespace search::queryeval; using namespace search; using testing::StartsWith; -struct IntegerKey : public IDirectPostingStore::LookupKey { +using LookupKey = IDirectPostingStore::LookupKey; + +struct IntegerKey : public LookupKey { int64_t _value; IntegerKey(int64_t value_in) : _value(value_in) {} vespalib::stringref asString() const override { abort(); } bool asInteger(int64_t& value) const override { value = _value; return true; } }; +struct StringKey : public LookupKey { + vespalib::string _value; + StringKey(int64_t value_in) : _value(std::to_string(value_in)) {} + vespalib::stringref asString() const override { return _value; } + bool asInteger(int64_t&) const override { abort(); } +}; + const vespalib::string field_name = "test"; constexpr uint32_t field_id = 3; uint32_t doc_id_limit = 500; @@ -50,112 +61,153 @@ concat(const Docids& a, const Docids& b) return res; } +template <typename AttributeType, typename DataType> +void +populate_attribute(AttributeType& attr, const std::vector<DataType>& values) +{ + // Values 0 and 1 have btree (short) posting lists. + attr.update(10, values[0]); + attr.update(30, values[1]); + attr.update(31, values[1]); + + // Values 2 and 3 have bitvector posting lists. + // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors()) + for (auto docid : range(100, 128)) { + attr.update(docid, values[2]); + } + for (auto docid : range(300, 128)) { + attr.update(docid, values[3]); + } + attr.commit(true); +} + std::shared_ptr<AttributeVector> -make_attribute(bool field_is_filter, CollectionType col_type) +make_attribute(CollectionType col_type, BasicType type, bool field_is_filter) { - Config cfg(BasicType::INT64, col_type); + Config cfg(type, col_type); cfg.setFastSearch(true); if (field_is_filter) { cfg.setIsFilter(field_is_filter); } uint32_t num_docs = doc_id_limit - 1; auto attr = test::AttributeBuilder(field_name, cfg).docs(num_docs).get(); - IntegerAttribute& real = dynamic_cast<IntegerAttribute&>(*attr); - - // Values 1 and 3 have btree (short) posting lists with weights. - real.update(10, 1); - real.update(30, 3); - real.update(31, 3); - - // Values 100 and 300 have bitvector posting lists. - // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors()) - for (auto docid : range(100, 128)) { - real.update(docid, 100); - } - for (auto docid : range(300, 128)) { - real.update(docid, 300); + if (type == BasicType::STRING) { + populate_attribute<StringAttribute, vespalib::string>(dynamic_cast<StringAttribute&>(*attr), + {"1", "3", "100", "300"}); + } else { + populate_attribute<IntegerAttribute, int64_t>(dynamic_cast<IntegerAttribute&>(*attr), + {1, 3, 100, 300}); } - attr->commit(true); return attr; } void -expect_has_btree_iterator(const IDirectPostingStore& store, int64_t term_value) +expect_has_btree_iterator(const IDirectPostingStore& store, const LookupKey& key) { auto snapshot = store.get_dictionary_snapshot(); - auto res = store.lookup(IntegerKey(term_value), snapshot); + auto res = store.lookup(key, snapshot); EXPECT_TRUE(store.has_btree_iterator(res.posting_idx)); } void -expect_has_bitvector_iterator(const IDirectPostingStore& store, int64_t term_value) +expect_has_bitvector_iterator(const IDirectPostingStore& store, const LookupKey& key) { auto snapshot = store.get_dictionary_snapshot(); - auto res = store.lookup(IntegerKey(term_value), snapshot); + auto res = store.lookup(key, snapshot); EXPECT_TRUE(store.has_bitvector(res.posting_idx)); } +template <typename LookupKeyType> void validate_posting_lists(const IDirectPostingStore& store) { - expect_has_btree_iterator(store, 1); - expect_has_btree_iterator(store, 3); + expect_has_btree_iterator(store, LookupKeyType(1)); + expect_has_btree_iterator(store, LookupKeyType(3)); if (store.has_always_btree_iterator()) { - expect_has_btree_iterator(store, 100); - expect_has_btree_iterator(store, 300); + expect_has_btree_iterator(store, LookupKeyType(100)); + expect_has_btree_iterator(store, LookupKeyType(300)); } - expect_has_bitvector_iterator(store, 100); - expect_has_bitvector_iterator(store, 300); + expect_has_bitvector_iterator(store, LookupKeyType(100)); + expect_has_bitvector_iterator(store, LookupKeyType(300)); } +enum OperatorType { + In, + WSet +}; + struct TestParam { + OperatorType op_type; CollectionType col_type; - TestParam(CollectionType col_type_in) : col_type(col_type_in) {} + BasicType type; + TestParam(OperatorType op_type_in, CollectionType col_type_in, BasicType type_in) + : op_type(op_type_in), col_type(col_type_in), type(type_in) {} ~TestParam() = default; }; std::ostream& operator<<(std::ostream& os, const TestParam& param) { - os << param.col_type.asString(); + os << (param.op_type == OperatorType::In ? "in_" : "wset_") << param.col_type.asString() << "_" << param.type.asString(); return os; } +using SingleInBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, InTermSearch>; +using MultiInBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, InTermSearch>; +using SingleWSetBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>; +using MultiWSetBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>; + class DirectMultiTermBlueprintTest : public ::testing::TestWithParam<TestParam> { public: - using SingleValueBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>; - using MultiValueBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>; std::shared_ptr<AttributeVector> attr; - std::shared_ptr<SingleValueBlueprintType> single_blueprint; - std::shared_ptr<MultiValueBlueprintType> multi_blueprint; - queryeval::ComplexLeafBlueprint* blueprint; + bool in_operator; + bool single_type; + bool integer_type; + std::shared_ptr<ComplexLeafBlueprint> blueprint; Blueprint::HitEstimate estimate; fef::TermFieldMatchData tfmd; fef::TermFieldMatchDataArray tfmda; DirectMultiTermBlueprintTest() : attr(), - single_blueprint(), - multi_blueprint(), + in_operator(true), + single_type(true), + integer_type(true), blueprint(), tfmd(), tfmda() { tfmda.add(&tfmd); } + ~DirectMultiTermBlueprintTest() {} void setup(bool field_is_filter, bool need_term_field_match_data) { - attr = make_attribute(field_is_filter, GetParam().col_type); + attr = make_attribute(GetParam().col_type, GetParam().type, field_is_filter); + in_operator = GetParam().op_type == OperatorType::In; + single_type = GetParam().col_type == CollectionType::SINGLE; + integer_type = GetParam().type != BasicType::STRING; FieldSpec spec(field_name, field_id, fef::TermFieldHandle(), field_is_filter); - if (GetParam().col_type == CollectionType::SINGLE) { - const auto* store = attr->as_docid_posting_store(); - ASSERT_TRUE(store); - validate_posting_lists(*store); - single_blueprint = std::make_shared<SingleValueBlueprintType>(spec, *attr, *store, 2); - blueprint = single_blueprint.get(); + const IDirectPostingStore* store; + if (single_type) { + auto real_store = attr->as_docid_posting_store(); + ASSERT_TRUE(real_store); + if (in_operator) { + blueprint = std::make_shared<SingleInBlueprintType>(spec, *attr, *real_store, 2); + } else { + blueprint = std::make_shared<SingleWSetBlueprintType>(spec, *attr, *real_store, 2); + } + store = real_store; + } else { + auto real_store = attr->as_docid_with_weight_posting_store(); + ASSERT_TRUE(real_store); + if (in_operator) { + blueprint = std::make_shared<MultiInBlueprintType>(spec, *attr, *real_store, 2); + } else { + blueprint = std::make_shared<MultiWSetBlueprintType>(spec, *attr, *real_store, 2); + } + store = real_store; + } + if (integer_type) { + validate_posting_lists<IntegerKey>(*store); } else { - const auto* store = attr->as_docid_with_weight_posting_store(); - ASSERT_TRUE(store); - validate_posting_lists(*store); - multi_blueprint = std::make_shared<MultiValueBlueprintType>(spec, *attr, *store, 2); - blueprint = multi_blueprint.get(); + validate_posting_lists<StringKey>(*store); } blueprint->setDocIdLimit(doc_id_limit); if (need_term_field_match_data) { @@ -164,16 +216,35 @@ public: tfmd.tagAsNotNeeded(); } } + template <typename BlueprintType> + void add_term_helper(BlueprintType& b, int64_t term_value) { + if (integer_type) { + b.addTerm(IntegerKey(term_value), 1, estimate); + } else { + b.addTerm(StringKey(term_value), 1, estimate); + } + } void add_term(int64_t term_value) { - if (single_blueprint) { - single_blueprint->addTerm(IntegerKey(term_value), 1, estimate); + if (single_type) { + if (in_operator) { + add_term_helper(dynamic_cast<SingleInBlueprintType&>(*blueprint), term_value); + } else { + add_term_helper(dynamic_cast<SingleWSetBlueprintType&>(*blueprint), term_value); + } } else { - multi_blueprint->addTerm(IntegerKey(term_value), 1, estimate); + if (in_operator) { + add_term_helper(dynamic_cast<MultiInBlueprintType&>(*blueprint), term_value); + } else { + add_term_helper(dynamic_cast<MultiWSetBlueprintType&>(*blueprint), term_value); + } } } std::unique_ptr<SearchIterator> create_leaf_search() const { return blueprint->createLeafSearch(tfmda, true); } + vespalib::string multi_term_iterator() const { + return in_operator ? "search::attribute::MultiTermOrFilterSearchImpl" : "search::queryeval::WeightedSetTermSearchImpl"; + } }; void @@ -201,30 +272,54 @@ expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_c INSTANTIATE_TEST_SUITE_P(DefaultInstantiation, DirectMultiTermBlueprintTest, - testing::Values(CollectionType::SINGLE, CollectionType::WSET), + testing::Values(TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::INT64), + TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::STRING), + TestParam(OperatorType::In, CollectionType::WSET, BasicType::INT64), + TestParam(OperatorType::In, CollectionType::WSET, BasicType::STRING), + TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::INT64), + TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::STRING), + TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::INT64), + TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::STRING)), testing::PrintToStringParamName()); -TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field) -{ +TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_none_filter_field) { setup(false, true); add_term(1); add_term(3); auto itr = create_leaf_search(); - EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator())); expect_hits({10, 30, 31}, *itr); } -TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_instead_of_bitvectors_for_none_filter_field) +TEST_P(DirectMultiTermBlueprintTest, bitvectors_used_instead_of_btree_iterators_for_none_filter_field) +{ + setup(false, true); + if (!in_operator) { + return; + } + add_term(1); + add_term(100); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 2); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, multi_term_iterator()); + expect_hits(concat({10}, range(100, 128)), *itr); +} + +TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_instead_of_bitvectors_for_none_filter_field) { setup(false, true); + if (in_operator) { + return; + } add_term(1); add_term(100); auto itr = create_leaf_search(); - EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator())); expect_hits(concat({10}, range(100, 128)), *itr); } -TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_filter_field) +TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_filter_field) { setup(true, true); add_term(1); @@ -235,7 +330,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_fi expect_or_iterator(*itr, 3); expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); - expect_or_child(*itr, 2, "search::queryeval::WeightedSetTermSearchImpl"); + expect_or_child(*itr, 2, multi_term_iterator()); expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); } @@ -251,17 +346,17 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field) expect_hits(concat(range(100, 128), range(300, 128)), *itr); } -TEST_P(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_ranking_not_needed) +TEST_P(DirectMultiTermBlueprintTest, or_filter_iterator_used_for_filter_field_when_ranking_not_needed) { setup(true, false); add_term(1); add_term(3); auto itr = create_leaf_search(); - EXPECT_THAT(itr->asString(), StartsWith("search::attribute::DocumentWeightOrFilterSearchImpl")); + EXPECT_THAT(itr->asString(), StartsWith("search::attribute::MultiTermOrFilterSearchImpl")); expect_hits({10, 30, 31}, *itr); } -TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_filter_field_and_ranking_not_needed) +TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_or_filter_iterator_used_for_filter_field_when_ranking_not_needed) { setup(true, false); add_term(1); @@ -272,11 +367,11 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_fil expect_or_iterator(*itr, 3); expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); - expect_or_child(*itr, 2, "search::attribute::DocumentWeightOrFilterSearchImpl"); + expect_or_child(*itr, 2, "search::attribute::MultiTermOrFilterSearchImpl"); expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); } -TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_and_ranking_not_needed) +TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_when_ranking_not_needed) { setup(true, false); add_term(100); diff --git a/searchlib/src/tests/attribute/direct_posting_store/.gitignore b/searchlib/src/tests/attribute/direct_posting_store/.gitignore new file mode 100644 index 00000000000..5516bc721c7 --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/.gitignore @@ -0,0 +1 @@ +searchlib_direct_posting_store_test_app diff --git a/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt new file mode 100644 index 00000000000..3c8e76bc9b2 --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_direct_posting_store_test_app TEST + SOURCES + direct_posting_store_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_direct_posting_store_test_app COMMAND searchlib_direct_posting_store_test_app) diff --git a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp new file mode 100644 index 00000000000..c1e12580559 --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp @@ -0,0 +1,297 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attribute_read_guard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/i_docid_posting_store.h> +#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> +#define ENABLE_GTEST_MIGRATION +#include <vespa/searchlib/test/searchiteratorverifier.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/test/insertion_operators.h> + +#include <vespa/log/log.h> +LOG_SETUP("direct_posting_store_test"); + +using namespace search; +using namespace search::attribute; + +AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) { + Config cfg(type, collection); + cfg.setFastSearch(fast_search); + return AttributeFactory::createAttribute("my_attribute", cfg); +} + +void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) { + AttributeVector::DocId docid; + for (size_t i = 0; i < limit; ++i) { + attr_ptr->addDoc(docid); + } + attr_ptr->commit(); + ASSERT_EQ((limit - 1), docid); +} + +template <typename ATTR, typename KEY> +void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) { + attr->clearDoc(docid); + if (attr->getCollectionType() == CollectionType::SINGLE) { + attr->update(docid, key); + } else { + attr->append(docid, key, weight); + } + attr->commit(); +} + +void populate_long(AttributeVector::SP attr_ptr) { + IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); + set_doc(attr, 1, int64_t(111), 20); + set_doc(attr, 5, int64_t(111), 5); + set_doc(attr, 7, int64_t(111), 10); +} + +void populate_string(AttributeVector::SP attr_ptr) { + StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); + set_doc(attr, 1, "foo", 20); + set_doc(attr, 5, "foo", 5); + set_doc(attr, 7, "foo", 10); +} + +struct TestParam { + CollectionType col_type; + BasicType type; + const char* valid_term; + const char* invalid_term; + TestParam(CollectionType col_type_in, BasicType type_in, + const char* valid_term_in, const char* invalid_term_in) + : col_type(col_type_in), type(type_in), valid_term(valid_term_in), invalid_term(invalid_term_in) {} + ~TestParam() {} +}; + +std::ostream& operator<<(std::ostream& os, const TestParam& param) +{ + os << param.col_type.asString() << "_" << param.type.asString(); + return os; +} + +struct DirectPostingStoreTest : public ::testing::TestWithParam<TestParam> { + AttributeVector::SP attr; + bool has_weight; + const IDirectPostingStore* api; + + const IDirectPostingStore* extract_api() { + if (has_weight) { + return attr->as_docid_with_weight_posting_store(); + } else { + return attr->as_docid_posting_store(); + } + } + + DirectPostingStoreTest() + : attr(make_attribute(GetParam().type, GetParam().col_type, true)), + has_weight(GetParam().col_type != CollectionType::SINGLE), + api(extract_api()) + { + assert(api != nullptr); + add_docs(attr); + if (GetParam().type == BasicType::STRING) { + populate_string(attr); + } else { + populate_long(attr); + } + } + ~DirectPostingStoreTest() {} +}; + +void expect_docid_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_posting_store() != nullptr); +} + +void expect_not_docid_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_posting_store() == nullptr); +} + +void expect_docid_with_weight_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_with_weight_posting_store() != nullptr); +} + +void expect_not_docid_with_weight_posting_store(BasicType type, CollectionType col_type, bool fast_search) { + EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_with_weight_posting_store() == nullptr); +} + +TEST(DirectPostingStoreApiTest, attributes_support_IDocidPostingStore_interface) { + expect_docid_posting_store(BasicType::INT8, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::INT16, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::INT32, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::INT64, CollectionType::SINGLE, true); + expect_docid_posting_store(BasicType::STRING, CollectionType::SINGLE, true); +} + +TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidPostingStore_interface) { + expect_not_docid_posting_store(BasicType::BOOL, CollectionType::SINGLE, true); + expect_not_docid_posting_store(BasicType::FLOAT, CollectionType::SINGLE, true); + expect_not_docid_posting_store(BasicType::DOUBLE, CollectionType::SINGLE, true); + expect_not_docid_posting_store(BasicType::INT64, CollectionType::SINGLE, false); + expect_not_docid_posting_store(BasicType::STRING, CollectionType::SINGLE, false); +} + +TEST(DirectPostingStoreApiTest, attributes_support_IDocidWithWeightPostingStore_interface) { + expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, true); + expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, true); +} + +TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostingStore_interface) { + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, false); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, false); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, false); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, true); + expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, false); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, false); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, false); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, true); + expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true); + expect_not_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true); + expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::WSET, true); +} + +void verify_valid_lookup(IDirectPostingStore::LookupResult result, bool has_weight) { + EXPECT_TRUE(result.posting_idx.valid()); + EXPECT_EQ(3u, result.posting_size); + EXPECT_EQ(has_weight ? 5 : 1, result.min_weight); + EXPECT_EQ(has_weight ? 20 : 1, result.max_weight); +} + +void verify_invalid_lookup(IDirectPostingStore::LookupResult result) { + EXPECT_FALSE(result.posting_idx.valid()); + EXPECT_EQ(0u, result.posting_size); + EXPECT_EQ(0, result.min_weight); + EXPECT_EQ(0, result.max_weight); +} + +INSTANTIATE_TEST_SUITE_P(DefaultInstantiation, + DirectPostingStoreTest, + testing::Values(TestParam(CollectionType::SINGLE, BasicType::INT64, "111", "222"), + TestParam(CollectionType::WSET, BasicType::INT64, "111", "222"), + TestParam(CollectionType::SINGLE, BasicType::STRING, "foo", "bar"), + TestParam(CollectionType::WSET, BasicType::STRING, "foo", "bar")), + testing::PrintToStringParamName()); + +TEST_P(DirectPostingStoreTest, lookup_works_correctly) { + verify_valid_lookup(api->lookup(GetParam().valid_term, api->get_dictionary_snapshot()), has_weight); + verify_invalid_lookup(api->lookup(GetParam().invalid_term, api->get_dictionary_snapshot())); +} + +template <typename DirectPostingStoreType, bool has_weight> +void verify_posting(const IDirectPostingStore& api, const vespalib::string& term) { + auto result = api.lookup(term, api.get_dictionary_snapshot()); + ASSERT_TRUE(result.posting_idx.valid()); + std::vector<typename DirectPostingStoreType::IteratorType> itr_store; + auto& real = dynamic_cast<const DirectPostingStoreType&>(api); + real.create(result.posting_idx, itr_store); + ASSERT_EQ(1u, itr_store.size()); + { + auto& itr = itr_store[0]; + if (itr.valid() && itr.getKey() < 1) { + itr.linearSeek(1); + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(1u, itr.getKey()); // docid + if constexpr (has_weight) { + EXPECT_EQ(20, itr.getData()); // weight + } + itr.linearSeek(2); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(5u, itr.getKey()); // docid + if constexpr (has_weight) { + EXPECT_EQ(5, itr.getData()); // weight + } + itr.linearSeek(6); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(7u, itr.getKey()); // docid + if constexpr (has_weight) { + EXPECT_EQ(10, itr.getData()); // weight + } + itr.linearSeek(8); + EXPECT_FALSE(itr.valid()); + } +} + +TEST_P(DirectPostingStoreTest, iterators_are_created_correctly) { + if (has_weight) { + verify_posting<IDocidWithWeightPostingStore, true>(*api, GetParam().valid_term); + } else { + verify_posting<IDocidPostingStore, false>(*api, GetParam().valid_term); + } +} + +TEST_P(DirectPostingStoreTest, collect_folded_works) +{ + if (GetParam().type == BasicType::STRING) { + auto* sa = static_cast<StringAttribute*>(attr.get()); + set_doc(sa, 2, "bar", 30); + attr->commit(); + set_doc(sa, 3, "FOO", 30); + attr->commit(); + auto snapshot = api->get_dictionary_snapshot(); + auto lookup = api->lookup(GetParam().valid_term, snapshot); + std::vector<vespalib::string> folded; + std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,sa](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(sa->getFromEnum(enum_idx.ref())); }; + api->collect_folded(lookup.enum_idx, snapshot, save_folded); + std::vector<vespalib::string> expected_folded{"FOO", "foo"}; + EXPECT_EQ(expected_folded, folded); + } else { + auto* ia = dynamic_cast<IntegerAttributeTemplate<int64_t>*>(attr.get()); + set_doc(ia, 2, int64_t(112), 30); + attr->commit(); + auto snapshot = api->get_dictionary_snapshot(); + auto lookup = api->lookup(GetParam().valid_term, snapshot); + std::vector<int64_t> folded; + std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded, ia]( + vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(ia->getFromEnum(enum_idx.ref())); }; + api->collect_folded(lookup.enum_idx, snapshot, save_folded); + std::vector<int64_t> expected_folded{int64_t(111)}; + EXPECT_EQ(expected_folded, folded); + } +} + +class Verifier : public search::test::SearchIteratorVerifier { +public: + Verifier(); + ~Verifier(); + SearchIterator::UP create(bool strict) const override { + (void) strict; + const auto* api = _attr->as_docid_with_weight_posting_store(); + assert(api != nullptr); + auto dict_entry = api->lookup("123", api->get_dictionary_snapshot()); + assert(dict_entry.posting_idx.valid()); + return std::make_unique<queryeval::DocidWithWeightSearchIterator>(_tfmd, *api, dict_entry); + } +private: + mutable fef::TermFieldMatchData _tfmd; + AttributeVector::SP _attr; +}; + +Verifier::Verifier() + : _attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)) +{ + add_docs(_attr, getDocIdLimit()); + auto docids = getExpectedDocIds(); + auto* int_attr = static_cast<IntegerAttribute*>(_attr.get()); + for (auto docid : docids) { + set_doc(int_attr, docid, int64_t(123), 1); + } +} +Verifier::~Verifier() {} + +TEST(VerifierTest, verify_document_weight_search_iterator) { + Verifier verifier; + verifier.verify(); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore deleted file mode 100644 index 08cae9a48df..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore +++ /dev/null @@ -1 +0,0 @@ -searchlib_document_weight_iterator_test_app diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt deleted file mode 100644 index 4cb480068e3..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_document_weight_iterator_test_app TEST - SOURCES - document_weight_iterator_test.cpp - DEPENDS - searchlib - searchlib_test -) -vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp deleted file mode 100644 index 28416d09d6f..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/searchcommon/attribute/config.h> -#include <vespa/searchlib/attribute/attribute.h> -#include <vespa/searchlib/attribute/attribute_read_guard.h> -#include <vespa/searchlib/attribute/attributefactory.h> -#include <vespa/searchlib/attribute/attributeguard.h> -#include <vespa/searchlib/attribute/attributememorysavetarget.h> -#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> -#include <vespa/searchlib/index/dummyfileheadercontext.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> -#include <vespa/searchlib/test/searchiteratorverifier.h> -#include <vespa/searchlib/util/randomgenerator.h> -#include <vespa/vespalib/test/insertion_operators.h> -#include <vespa/vespalib/testkit/test_kit.h> - -#include <vespa/log/log.h> -LOG_SETUP("document_weight_iterator_test"); - -using namespace search; -using namespace search::attribute; - -AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) { - Config cfg(type, collection); - cfg.setFastSearch(fast_search); - return AttributeFactory::createAttribute("my_attribute", cfg); -} - -void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) { - AttributeVector::DocId docid; - for (size_t i = 0; i < limit; ++i) { - attr_ptr->addDoc(docid); - } - attr_ptr->commit(); - ASSERT_EQUAL((limit - 1), docid); -} - -template <typename ATTR, typename KEY> -void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) { - attr->clearDoc(docid); - attr->append(docid, key, weight); - attr->commit(); -} - -void populate_long(AttributeVector::SP attr_ptr) { - IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); - set_doc(attr, 1, int64_t(111), 20); - set_doc(attr, 5, int64_t(111), 5); - set_doc(attr, 7, int64_t(111), 10); -} - -void populate_string(AttributeVector::SP attr_ptr) { - StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); - set_doc(attr, 1, "foo", 20); - set_doc(attr, 5, "foo", 5); - set_doc(attr, 7, "foo", 10); -} - -struct LongFixture { - AttributeVector::SP attr; - const IDocidWithWeightPostingStore *api; - LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)), - api(attr->as_docid_with_weight_posting_store()) - { - ASSERT_TRUE(api != nullptr); - add_docs(attr); - populate_long(attr); - } -}; - -struct StringFixture { - AttributeVector::SP attr; - const IDocidWithWeightPostingStore *api; - StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)), - api(attr->as_docid_with_weight_posting_store()) - { - ASSERT_TRUE(api != nullptr); - add_docs(attr); - populate_string(attr); - } -}; - -TEST("require that appropriate attributes support the document weight attribute interface") { - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr); -} - -TEST("require that inappropriate attributes do not support the document weight attribute interface") { - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->as_docid_with_weight_posting_store() == nullptr); - EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->as_docid_with_weight_posting_store() == nullptr); -} - -void verify_valid_lookup(IDirectPostingStore::LookupResult result) { - EXPECT_TRUE(result.posting_idx.valid()); - EXPECT_EQUAL(3u, result.posting_size); - EXPECT_EQUAL(5, result.min_weight); - EXPECT_EQUAL(20, result.max_weight); -} - -void verify_invalid_lookup(IDirectPostingStore::LookupResult result) { - EXPECT_FALSE(result.posting_idx.valid()); - EXPECT_EQUAL(0u, result.posting_size); - EXPECT_EQUAL(0, result.min_weight); - EXPECT_EQUAL(0, result.max_weight); -} - -TEST_F("require that integer lookup works correctly", LongFixture) { - verify_valid_lookup(f1.api->lookup("111", f1.api->get_dictionary_snapshot())); - verify_invalid_lookup(f1.api->lookup("222", f1.api->get_dictionary_snapshot())); -} - -TEST_F("require string lookup works correctly", StringFixture) { - verify_valid_lookup(f1.api->lookup("foo", f1.api->get_dictionary_snapshot())); - verify_invalid_lookup(f1.api->lookup("bar", f1.api->get_dictionary_snapshot())); -} - -void verify_posting(const IDocidWithWeightPostingStore &api, const char *term) { - auto result = api.lookup(term, api.get_dictionary_snapshot()); - ASSERT_TRUE(result.posting_idx.valid()); - std::vector<DocidWithWeightIterator> itr_store; - api.create(result.posting_idx, itr_store); - ASSERT_EQUAL(1u, itr_store.size()); - { - DocidWithWeightIterator &itr = itr_store[0]; - if (itr.valid() && itr.getKey() < 1) { - itr.linearSeek(1); - } - ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(1u, itr.getKey()); // docid - EXPECT_EQUAL(20, itr.getData()); // weight - itr.linearSeek(2); - ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(5u, itr.getKey()); // docid - EXPECT_EQUAL(5, itr.getData()); // weight - itr.linearSeek(6); - ASSERT_TRUE(itr.valid()); - EXPECT_EQUAL(7u, itr.getKey()); // docid - EXPECT_EQUAL(10, itr.getData()); // weight - itr.linearSeek(8); - EXPECT_FALSE(itr.valid()); - } -} - -TEST_F("require that integer iterators are created correctly", LongFixture) { - verify_posting(*f1.api, "111"); -} - -TEST_F("require that string iterators are created correctly", StringFixture) { - verify_posting(*f1.api, "foo"); -} - -TEST_F("require that collect_folded works for string", StringFixture) -{ - StringAttribute *attr = static_cast<StringAttribute *>(f1.attr.get()); - set_doc(attr, 2, "bar", 30); - attr->commit(); - set_doc(attr, 3, "FOO", 30); - attr->commit(); - auto dictionary_snapshot = f1.api->get_dictionary_snapshot(); - auto lookup1 = f1.api->lookup("foo", dictionary_snapshot); - std::vector<vespalib::string> folded; - std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,attr](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(attr->getFromEnum(enum_idx.ref())); }; - f1.api->collect_folded(lookup1.enum_idx, dictionary_snapshot, save_folded); - std::vector<vespalib::string> expected_folded{"FOO", "foo"}; - EXPECT_EQUAL(expected_folded, folded); -} - -TEST_F("require that collect_folded works for integers", LongFixture) -{ - IntegerAttributeTemplate<int64_t> *attr = dynamic_cast<IntegerAttributeTemplate<int64_t> *>(f1.attr.get()); - set_doc(attr, 2, int64_t(112), 30); - attr->commit(); - auto dictionary_snapshot = f1.api->get_dictionary_snapshot(); - auto lookup1 = f1.api->lookup("111", dictionary_snapshot); - std::vector<int64_t> folded; - std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,attr](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(attr->getFromEnum(enum_idx.ref())); }; - f1.api->collect_folded(lookup1.enum_idx, dictionary_snapshot, save_folded); - std::vector<int64_t> expected_folded{int64_t(111)}; - EXPECT_EQUAL(expected_folded, folded); -} - -class Verifier : public search::test::SearchIteratorVerifier { -public: - Verifier(); - ~Verifier(); - SearchIterator::UP create(bool strict) const override { - (void) strict; - const auto* api = _attr->as_docid_with_weight_posting_store(); - ASSERT_TRUE(api != nullptr); - auto dict_entry = api->lookup("123", api->get_dictionary_snapshot()); - ASSERT_TRUE(dict_entry.posting_idx.valid()); - return std::make_unique<queryeval::DocumentWeightSearchIterator>(_tfmd, *api, dict_entry); - } -private: - mutable fef::TermFieldMatchData _tfmd; - AttributeVector::SP _attr; -}; - -Verifier::Verifier() - : _attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)) -{ - add_docs(_attr, getDocIdLimit()); - auto docids = getExpectedDocIds(); - IntegerAttribute *int_attr = static_cast<IntegerAttribute *>(_attr.get()); - for (auto docid: docids) { - set_doc(int_attr, docid, int64_t(123), 1); - } -} -Verifier::~Verifier() {} - -TEST("verify document weight search iterator") { - Verifier verifier; - verifier.verify(); -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt deleted file mode 100644 index b2f86a9ddec..00000000000 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_document_weight_or_filter_search_test_app TEST - SOURCES - document_weight_or_filter_search_test.cpp - DEPENDS - searchlib - searchlib_test - GTest::GTest -) -vespa_add_test(NAME searchlib_document_weight_or_filter_search_test_app COMMAND searchlib_document_weight_or_filter_search_test_app) diff --git a/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt new file mode 100644 index 00000000000..4ec5d849ad3 --- /dev/null +++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multi_term_or_filter_search_test_app TEST + SOURCES + multi_term_or_filter_search_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_multi_term_or_filter_search_test_app COMMAND searchlib_multi_term_or_filter_search_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp index ae4812b5437..552a128c518 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp @@ -1,30 +1,34 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/gtest/gtest.h> #include <vespa/searchlib/attribute/i_direct_posting_store.h> -#include <vespa/searchlib/attribute/document_weight_or_filter_search.h> -#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/attribute/multi_term_or_filter_search.h> #include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/vespalib/gtest/gtest.h> #define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/searchiteratorverifier.h> using PostingList = search::attribute::PostingListTraits<int32_t>::PostingStoreBase; using Iterator = search::attribute::PostingListTraits<int32_t>::const_iterator; using KeyData = PostingList::KeyDataType; + using search::BitVector; -using search::attribute::DocumentWeightOrFilterSearch; +using search::attribute::MultiTermOrFilterSearch; +using search::fef::TermFieldMatchData; using search::queryeval::SearchIterator; using vespalib::datastore::EntryRef; -class DocumentWeightOrFilterSearchTest : public ::testing::Test { +class MultiTermOrFilterSearchTest : public ::testing::Test { PostingList _postings; + mutable TermFieldMatchData _tfmd; vespalib::GenerationHandler _gens; std::vector<EntryRef> _trees; uint32_t _range_start; uint32_t _range_end; public: - DocumentWeightOrFilterSearchTest(); - ~DocumentWeightOrFilterSearchTest() override; + MultiTermOrFilterSearchTest(); + ~MultiTermOrFilterSearchTest() override; void inc_generation(); size_t num_trees() const { return _trees.size(); } Iterator get_tree(size_t idx) const { @@ -62,7 +66,7 @@ public: for (size_t i = 0; i < num_trees(); ++i) { iterators.emplace_back(get_tree(i)); } - auto result = DocumentWeightOrFilterSearch::create(std::move(iterators)); + auto result = MultiTermOrFilterSearch::create(std::move(iterators), _tfmd); result->initRange(_range_start, _range_end); return result; }; @@ -73,6 +77,8 @@ public: while (doc_id < _range_end) { if (iterator.seek(doc_id)) { result.emplace_back(doc_id); + iterator.unpack(doc_id); + EXPECT_EQ(doc_id, _tfmd.getDocId()); ++doc_id; } else { doc_id = std::max(doc_id + 1, iterator.getDocId()); @@ -121,7 +127,7 @@ public: } }; -DocumentWeightOrFilterSearchTest::DocumentWeightOrFilterSearchTest() +MultiTermOrFilterSearchTest::MultiTermOrFilterSearchTest() : _postings(true), _gens(), _range_start(1), @@ -129,7 +135,7 @@ DocumentWeightOrFilterSearchTest::DocumentWeightOrFilterSearchTest() { } -DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest() +MultiTermOrFilterSearchTest::~MultiTermOrFilterSearchTest() { for (auto& tree : _trees) { _postings.clear(tree); @@ -140,7 +146,7 @@ DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest() } void -DocumentWeightOrFilterSearchTest::inc_generation() +MultiTermOrFilterSearchTest::inc_generation() { _postings.freeze(); _postings.assign_generation(_gens.getCurrentGeneration()); @@ -148,19 +154,19 @@ DocumentWeightOrFilterSearchTest::inc_generation() _postings.reclaim_memory(_gens.get_oldest_used_generation()); } -TEST_F(DocumentWeightOrFilterSearchTest, daat_or) +TEST_F(MultiTermOrFilterSearchTest, daat_or) { make_sample_data(); expect_result(eval_daat(*make_iterator()), { 3, 10, 11, 14, 17, 20 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits) +TEST_F(MultiTermOrFilterSearchTest, taat_get_hits) { make_sample_data(); expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 3, 10, 11, 14, 17, 20 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into) +TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into) { make_sample_data(); auto bv = tobv({13, 14}); @@ -168,7 +174,7 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into) expect_result(frombv(*bv), { 3, 10, 11, 13, 14, 17, 20 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into) +TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into) { make_sample_data(); auto bv = tobv({13, 14}); @@ -176,21 +182,21 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into) expect_result(frombv(*bv), { 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, daat_or_ranged) +TEST_F(MultiTermOrFilterSearchTest, daat_or_ranged) { make_sample_data(); set_range(4, 15); expect_result(eval_daat(*make_iterator()), {10, 11, 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits_ranged) +TEST_F(MultiTermOrFilterSearchTest, taat_get_hits_ranged) { make_sample_data(); set_range(4, 15); expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 10, 11, 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into_ranged) +TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into_ranged) { make_sample_data(); set_range(4, 15); @@ -199,7 +205,7 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into_ranged) expect_result(frombv(*bv), { 10, 11, 13, 14 }); } -TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into_ranged) +TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into_ranged) { make_sample_data(); set_range(4, 15); @@ -211,9 +217,9 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into_ranged) namespace { class Verifier : public search::test::SearchIteratorVerifier { - DocumentWeightOrFilterSearchTest &_test; + MultiTermOrFilterSearchTest &_test; public: - Verifier(DocumentWeightOrFilterSearchTest &test, int num_trees) + Verifier(MultiTermOrFilterSearchTest &test, int num_trees) : _test(test) { std::vector<std::vector<uint32_t>> trees(num_trees); @@ -239,7 +245,7 @@ public: }; -TEST_F(DocumentWeightOrFilterSearchTest, iterator_conformance) +TEST_F(MultiTermOrFilterSearchTest, iterator_conformance) { { Verifier verifier(*this, 1); diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 8831bd1ec75..ecc03ac54c5 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -488,11 +488,11 @@ TEST("require that direct attribute iterators work") { EXPECT_TRUE(result.has_minmax); EXPECT_EQUAL(100, result.min_weight); EXPECT_EQUAL(1000, result.max_weight); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") != vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") != vespalib::string::npos); } else { EXPECT_EQUAL(num_docs, result.est_hits); EXPECT_FALSE(result.has_minmax); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos); } ASSERT_EQUAL(3u, result.hits.size()); EXPECT_FALSE(result.est_empty); @@ -513,7 +513,7 @@ TEST("require that single weighted set turns filter on filter fields") { SimpleStringTerm node("foo", "", 0, Weight(1)); Result result = do_search(attribute_manager, node, strict); EXPECT_EQUAL(3u, result.est_hits); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos); EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") != vespalib::string::npos); ASSERT_EQUAL(3u, result.hits.size()); EXPECT_FALSE(result.est_empty); diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 6e334fffa75..741a86b0beb 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -468,7 +468,7 @@ template <typename V, typename T> ResultSetPtr SearchContextTest::performSearch(const V & vec, const T & term) { - return performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, TermType::WORD); + return performSearch(queryeval::ExecuteInfo::TRUE, vec, term, TermType::WORD); } template <typename V, typename T> @@ -503,7 +503,7 @@ void SearchContextTest::performSearch(const V & vec, const vespalib::string & term, const DocSet & expected, TermType termType) { - performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, expected, termType); + performSearch(queryeval::ExecuteInfo::TRUE, vec, term, expected, termType); } void @@ -1113,7 +1113,7 @@ SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::st { for (size_t num_threads : {1,3}) { vespalib::SimpleThreadBundle thread_bundle(num_threads); - auto executeInfo = search::queryeval::ExecuteInfo::create(true, 1.0, nullptr, thread_bundle, true, true); + auto executeInfo = queryeval::ExecuteInfo::create(true, 1.0, vespalib::Doom::never(), thread_bundle); performSearch(executeInfo, vec, term, expected, TermType::WORD); } } diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 1beb2b1e501..1bfb9fb41f9 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -1,5 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/attribute/enumstore.h> #include <vespa/searchlib/attribute/singlestringattribute.h> #include <vespa/searchlib/attribute/singlestringpostattribute.h> @@ -8,7 +9,6 @@ #include <vespa/searchlib/attribute/enumstore.hpp> #include <vespa/searchlib/attribute/single_string_enum_search_context.h> -#include <vespa/searchlib/attribute/multistringpostattribute.hpp> #include <vespa/log/log.h> LOG_SETUP("stringattribute_test"); diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 81862b74eb2..b1b2235165f 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -7,7 +7,6 @@ #include <vespa/searchlib/tensor/dense_tensor_attribute.h> #include <vespa/searchlib/tensor/direct_tensor_attribute.h> #include <vespa/searchlib/tensor/doc_vector_access.h> -#include <vespa/searchlib/tensor/distance_functions.h> #include <vespa/searchlib/tensor/hnsw_index.h> #include <vespa/searchlib/tensor/mips_distance_transform.h> #include <vespa/searchlib/tensor/nearest_neighbor_index.h> @@ -25,7 +24,6 @@ #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/vespalib/util/mmap_file_allocator_factory.h> #include <vespa/searchlib/util/bufferwriter.h> -#include <vespa/vespalib/util/fake_doom.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/document/base/exceptions.h> #include <vespa/eval/eval/fast_value.h> @@ -132,7 +130,7 @@ private: int _index_value; public: - MockIndexSaver(int index_value) : _index_value(index_value) {} + explicit MockIndexSaver(int index_value) noexcept : _index_value(index_value) {} void save(search::BufferWriter& writer) const override { writer.write(&_index_value, sizeof(int)); writer.flush(); @@ -158,7 +156,7 @@ public: class MockPrepareResult : public PrepareResult { public: uint32_t docid; - MockPrepareResult(uint32_t docid_in) : docid(docid_in) {} + explicit MockPrepareResult(uint32_t docid_in) noexcept : docid(docid_in) {} }; class MockNearestNeighborIndex : public NearestNeighborIndex { @@ -177,7 +175,7 @@ private: int _index_value; public: - MockNearestNeighborIndex(const DocVectorAccess& vectors) + explicit MockNearestNeighborIndex(const DocVectorAccess& vectors) : _vectors(vectors), _adds(), _removes(), @@ -279,11 +277,11 @@ public: } vespalib::MemoryUsage update_stat(const CompactionStrategy&) override { ++_memory_usage_cnt; - return vespalib::MemoryUsage(); + return {}; } vespalib::MemoryUsage memory_usage() const override { ++_memory_usage_cnt; - return vespalib::MemoryUsage(); + return {}; } void populate_address_space_usage(AddressSpaceUsage&) const override {} void get_state(const vespalib::slime::Inserter&) const override {} @@ -293,7 +291,7 @@ public: if (_index_value != 0) { return std::make_unique<MockIndexSaver>(_index_value); } - return std::unique_ptr<NearestNeighborIndexSaver>(); + return {}; } std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file, const vespalib::GenericHeader& header) override { (void) header; @@ -310,7 +308,7 @@ public: (void) explore_k; (void) doom; (void) distance_threshold; - return std::vector<Neighbor>(); + return {}; } std::vector<Neighbor> find_top_k_with_filter(uint32_t k, const search::tensor::BoundDistanceFunction &df, @@ -324,7 +322,7 @@ public: (void) filter; (void) doom; (void) distance_threshold; - return std::vector<Neighbor>(); + return {}; } search::tensor::DistanceFunctionFactory &distance_function_factory() const override { @@ -427,7 +425,7 @@ struct Fixture { FixtureTraits _traits; vespalib::string _mmap_allocator_base_dir; - Fixture(const vespalib::string &typeSpec, FixtureTraits traits = FixtureTraits()); + explicit Fixture(const vespalib::string &typeSpec, FixtureTraits traits = FixtureTraits()); ~Fixture(); @@ -589,7 +587,7 @@ struct Fixture { } TensorSpec expEmptyDenseTensor() const { - return TensorSpec(denseSpec); + return {denseSpec}; } vespalib::string expEmptyDenseTensorSpec() const { @@ -1296,12 +1294,10 @@ template <typename ParentT> class NearestNeighborBlueprintFixtureBase : public ParentT { private: std::unique_ptr<Value> _query_tensor; - vespalib::FakeDoom _no_doom; public: NearestNeighborBlueprintFixtureBase() - : _query_tensor(), - _no_doom() + : _query_tensor() { this->set_tensor(1, vec_2d(1, 1)); this->set_tensor(2, vec_2d(2, 2)); @@ -1329,7 +1325,7 @@ public: std::make_unique<DistanceCalculator>(this->as_dense_tensor(), create_query_tensor(vec_2d(17, 42))), 3, approximate, 5, 100100.25, - global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, _no_doom.get_doom()); + global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, vespalib::Doom::never()); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold()); return bp; diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp index 2ac9fb738f8..758f44cdba2 100644 --- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp @@ -646,45 +646,45 @@ TEST("requireThatGrowWorks") EXPECT_EQUAL(4u, v.writer().countTrueBits()); EXPECT_EQUAL(200u, v.reader().size()); - EXPECT_EQUAL(1023u, v.writer().capacity()); + EXPECT_EQUAL(2047u, v.writer().capacity()); EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader())); EXPECT_EQUAL(4u, v.writer().countTrueBits()); - EXPECT_TRUE(v.reserve(1024)); + EXPECT_TRUE(v.reserve(2048u)); EXPECT_EQUAL(200u, v.reader().size()); - EXPECT_EQUAL(2047u, v.writer().capacity()); + EXPECT_EQUAL(4095u, v.writer().capacity()); EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader())); EXPECT_EQUAL(4u, v.writer().countTrueBits()); EXPECT_FALSE(v.extend(202)); EXPECT_EQUAL(202u, v.reader().size()); - EXPECT_EQUAL(2047u, v.writer().capacity()); + EXPECT_EQUAL(4095u, v.writer().capacity()); EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader())); EXPECT_EQUAL(4u, v.writer().countTrueBits()); EXPECT_FALSE(v.shrink(200)); EXPECT_EQUAL(200u, v.reader().size()); - EXPECT_EQUAL(2047u, v.writer().capacity()); + EXPECT_EQUAL(4095u, v.writer().capacity()); EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader())); EXPECT_EQUAL(4u, v.writer().countTrueBits()); - EXPECT_FALSE(v.reserve(2047)); + EXPECT_FALSE(v.reserve(4095u)); EXPECT_EQUAL(200u, v.reader().size()); - EXPECT_EQUAL(2047u, v.writer().capacity()); + EXPECT_EQUAL(4095u, v.writer().capacity()); EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader())); EXPECT_EQUAL(4u, v.writer().countTrueBits()); EXPECT_FALSE(v.shrink(202)); EXPECT_EQUAL(202u, v.reader().size()); - EXPECT_EQUAL(2047u, v.writer().capacity()); + EXPECT_EQUAL(4095u, v.writer().capacity()); EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader())); EXPECT_EQUAL(4u, v.writer().countTrueBits()); EXPECT_FALSE(v.shrink(100)); EXPECT_EQUAL(100u, v.reader().size()); - EXPECT_EQUAL(2047u, v.writer().capacity()); + EXPECT_EQUAL(4095u, v.writer().capacity()); EXPECT_TRUE(assertBV("[7,39,71]", v.reader())); EXPECT_EQUAL(3u, v.writer().countTrueBits()); v.writer().invalidateCachedCount(); - EXPECT_TRUE(v.reserve(3100)); + EXPECT_TRUE(v.reserve(5100u)); EXPECT_EQUAL(100u, v.reader().size()); - EXPECT_EQUAL(4095u, v.writer().capacity()); + EXPECT_EQUAL(6143u, v.writer().capacity()); EXPECT_EQUAL(3u, v.writer().countTrueBits()); g.assign_generation(1); @@ -701,9 +701,9 @@ TEST("require that growable bit vectors keeps memory allocator") EXPECT_EQUAL(AllocStats(1, 0), stats); v.writer().resize(1); // DO NOT TRY THIS AT HOME EXPECT_EQUAL(AllocStats(2, 1), stats); - v.reserve(2000); + v.reserve(2048); EXPECT_EQUAL(AllocStats(3, 1), stats); - v.extend(4000); + v.extend(5000); EXPECT_EQUAL(AllocStats(4, 1), stats); v.shrink(200); EXPECT_EQUAL(AllocStats(4, 1), stats); diff --git a/searchlib/src/tests/features/CMakeLists.txt b/searchlib/src/tests/features/CMakeLists.txt index 9d2ed02f5dd..ea2410734b5 100644 --- a/searchlib/src/tests/features/CMakeLists.txt +++ b/searchlib/src/tests/features/CMakeLists.txt @@ -17,5 +17,6 @@ vespa_add_executable(searchlib_featurebenchmark_app featurebenchmark.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_featurebenchmark_app COMMAND searchlib_featurebenchmark_app BENCHMARK) diff --git a/searchlib/src/tests/features/beta/CMakeLists.txt b/searchlib/src/tests/features/beta/CMakeLists.txt index 543982c549c..db45f02d898 100644 --- a/searchlib/src/tests/features/beta/CMakeLists.txt +++ b/searchlib/src/tests/features/beta/CMakeLists.txt @@ -4,6 +4,7 @@ vespa_add_executable(searchlib_beta_features_test_app TEST beta_features_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test( NAME searchlib_beta_features_test_app diff --git a/searchlib/src/tests/features/beta/beta_features_test.cpp b/searchlib/src/tests/features/beta/beta_features_test.cpp index e0f57a6cad1..8413cfde47f 100644 --- a/searchlib/src/tests/features/beta/beta_features_test.cpp +++ b/searchlib/src/tests/features/beta/beta_features_test.cpp @@ -14,7 +14,7 @@ #include <vespa/searchlib/features/utils.h> #include <vespa/searchlib/fef/test/plugin/setup.h> #include <vespa/vespalib/util/rand48.h> -#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/vespalib/util/stringfmt.h> using namespace search::features; diff --git a/searchlib/src/tests/features/bm25/bm25_test.cpp b/searchlib/src/tests/features/bm25/bm25_test.cpp index 8abd3d104b9..233c0ec09f3 100644 --- a/searchlib/src/tests/features/bm25/bm25_test.cpp +++ b/searchlib/src/tests/features/bm25/bm25_test.cpp @@ -4,9 +4,9 @@ #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/fef/blueprintfactory.h> #include <vespa/searchlib/fef/test/dummy_dependency_handler.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/fef/test/indexenvironment.h> #include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/vespalib/gtest/gtest.h> using namespace search::features; diff --git a/searchlib/src/tests/features/element_completeness/CMakeLists.txt b/searchlib/src/tests/features/element_completeness/CMakeLists.txt index 327bb691819..046b061b884 100644 --- a/searchlib/src/tests/features/element_completeness/CMakeLists.txt +++ b/searchlib/src/tests/features/element_completeness/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchlib_element_completeness_test_app TEST element_completeness_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_element_completeness_test_app COMMAND searchlib_element_completeness_test_app) diff --git a/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp index 3b2a5035d1a..ff210035502 100644 --- a/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp +++ b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp @@ -5,8 +5,8 @@ #include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> #include <vespa/searchlib/fef/test/queryenvironment.h> #include <vespa/searchlib/features/element_completeness_feature.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/vespalib/util/stringfmt.h> using namespace search::fef; diff --git a/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt index 921e4bab04e..748556b0fcd 100644 --- a/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt +++ b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchlib_element_similarity_feature_test_app TEST element_similarity_feature_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_element_similarity_feature_test_app COMMAND searchlib_element_similarity_feature_test_app) diff --git a/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp index 3aedb3c51ed..1eda660d2ec 100644 --- a/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp +++ b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp @@ -5,7 +5,7 @@ #include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> #include <vespa/searchlib/fef/test/queryenvironment.h> #include <vespa/searchlib/features/element_similarity_feature.h> -#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <initializer_list> #include <vespa/searchlib/fef/test/dummy_dependency_handler.h> #include <vespa/vespalib/util/stringfmt.h> diff --git a/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt index 6af6a9095ac..df55b8f834c 100644 --- a/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt +++ b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchlib_euclidean_distance_test_app TEST euclidean_distance_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_euclidean_distance_test_app COMMAND searchlib_euclidean_distance_test_app) diff --git a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp index d327253731d..3bc61a77c55 100644 --- a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp +++ b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp @@ -1,16 +1,16 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/attribute/attributevector.h> #include <vespa/searchlib/attribute/integerbase.h> #include <vespa/searchlib/attribute/floatbase.h> #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/fef/test/indexenvironment.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/features/euclidean_distance_feature.h> #include <vespa/searchlib/fef/fef.h> -#include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchlib/test/ft_test_app.h> using search::feature_t; diff --git a/searchlib/src/tests/features/featurebenchmark.cpp b/searchlib/src/tests/features/featurebenchmark.cpp index e151b47a0c9..9c3d4943d65 100644 --- a/searchlib/src/tests/features/featurebenchmark.cpp +++ b/searchlib/src/tests/features/featurebenchmark.cpp @@ -11,7 +11,7 @@ #include <vespa/searchlib/fef/functiontablefactory.h> #include <vespa/searchlib/fef/test/plugin/setup.h> #include <vespa/vespalib/util/stringfmt.h> -#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/searchcommon/attribute/config.h> #include <fstream> diff --git a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt index 217af473987..e7fc3126e2f 100644 --- a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt +++ b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchlib_internal_max_reduce_prod_join_feature_test_app TE internal_max_reduce_prod_join_feature_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_internal_max_reduce_prod_join_feature_test_app COMMAND searchlib_internal_max_reduce_prod_join_feature_test_app) diff --git a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp index 852827244bc..7611296c641 100644 --- a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp +++ b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp @@ -2,11 +2,11 @@ #include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchlib/attribute/attribute.h> #include <vespa/searchlib/attribute/attributefactory.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/features/internal_max_reduce_prod_join_feature.h> -#include <vespa/searchlib/attribute/attribute.h> -#include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchlib/test/ft_test_app.h> using search::feature_t; using namespace search::fef; diff --git a/searchlib/src/tests/features/prod_features_test.h b/searchlib/src/tests/features/prod_features_test.h index 94c4e496dd2..aeadf23be80 100644 --- a/searchlib/src/tests/features/prod_features_test.h +++ b/searchlib/src/tests/features/prod_features_test.h @@ -4,7 +4,7 @@ #include <vespa/searchlib/features/distancetopathfeature.h> #include <vespa/searchlib/features/termdistancefeature.h> -#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/test/ft_test_app.h> class Test : public FtTestApp { diff --git a/searchlib/src/tests/features/tensor/tensor_test.cpp b/searchlib/src/tests/features/tensor/tensor_test.cpp index 96a53d98865..fe4464bad72 100644 --- a/searchlib/src/tests/features/tensor/tensor_test.cpp +++ b/searchlib/src/tests/features/tensor/tensor_test.cpp @@ -1,21 +1,21 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/eval/eval/function.h> +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/eval/test/value_compare.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/attribute/attributevector.h> #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/fef/fef.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/fef/test/indexenvironment.h> #include <vespa/searchlib/tensor/tensor_attribute.h> #include <vespa/searchlib/tensor/direct_tensor_attribute.h> -#include <vespa/searchcommon/attribute/config.h> -#include <vespa/eval/eval/function.h> -#include <vespa/eval/eval/simple_value.h> -#include <vespa/eval/eval/tensor_spec.h> -#include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/value_codec.h> -#include <vespa/eval/eval/test/value_compare.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/vespalib/objects/nbostream.h> using search::feature_t; diff --git a/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt index 186ecf38c9e..3ecceffd422 100644 --- a/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt +++ b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchlib_tensor_from_labels_test_app TEST tensor_from_labels_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_tensor_from_labels_test_app COMMAND searchlib_tensor_from_labels_test_app) diff --git a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp index 20cfa4d84c8..f241398539a 100644 --- a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp +++ b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp @@ -8,8 +8,8 @@ #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/features/tensor_from_labels_feature.h> #include <vespa/searchlib/fef/fef.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/searchcommon/attribute/config.h> #include <vespa/eval/eval/function.h> #include <vespa/eval/eval/simple_value.h> diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt index bf93e8923b5..b5322c1a64c 100644 --- a/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt +++ b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchlib_tensor_from_weighted_set_test_app TEST tensor_from_weighted_set_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_tensor_from_weighted_set_test_app COMMAND searchlib_tensor_from_weighted_set_test_app) diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp index db734387288..9c8f231051e 100644 --- a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp +++ b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp @@ -1,6 +1,12 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/eval/eval/function.h> +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/test/value_compare.h> +#include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/attribute/attributevector.h> #include <vespa/searchlib/attribute/integerbase.h> @@ -8,14 +14,8 @@ #include <vespa/searchlib/features/setup.h> #include <vespa/searchlib/features/tensor_from_weighted_set_feature.h> #include <vespa/searchlib/fef/fef.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/fef/test/indexenvironment.h> -#include <vespa/searchcommon/attribute/config.h> -#include <vespa/eval/eval/function.h> -#include <vespa/eval/eval/simple_value.h> -#include <vespa/eval/eval/tensor_spec.h> -#include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/test/value_compare.h> +#include <vespa/searchlib/test/ft_test_app.h> using search::feature_t; using namespace search::fef; diff --git a/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt index 363619ce4fb..cfa715af516 100644 --- a/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt +++ b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchlib_text_similarity_feature_test_app TEST text_similarity_feature_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test(NAME searchlib_text_similarity_feature_test_app COMMAND searchlib_text_similarity_feature_test_app) diff --git a/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp index 03734b15d64..cf0660282f2 100644 --- a/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp +++ b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp @@ -1,14 +1,14 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> #include <vespa/searchlib/fef/test/indexenvironment.h> #include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> #include <vespa/searchlib/fef/test/queryenvironment.h> #include <vespa/searchlib/features/text_similarity_feature.h> -#include <vespa/searchlib/fef/test/ftlib.h> -#include <initializer_list> -#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/vespalib/util/stringfmt.h> +#include <initializer_list> using namespace search::fef; using namespace search::fef::test; diff --git a/searchlib/src/tests/nativerank/CMakeLists.txt b/searchlib/src/tests/nativerank/CMakeLists.txt index 20fdc0c1245..2a46dd54904 100644 --- a/searchlib/src/tests/nativerank/CMakeLists.txt +++ b/searchlib/src/tests/nativerank/CMakeLists.txt @@ -4,6 +4,7 @@ vespa_add_executable(searchlib_nativerank_test_app TEST nativerank_test.cpp DEPENDS searchlib + searchlib_test ) vespa_add_test( NAME searchlib_nativerank_test_app diff --git a/searchlib/src/tests/nativerank/nativerank_test.cpp b/searchlib/src/tests/nativerank/nativerank_test.cpp index bc9c579a597..69234071a34 100644 --- a/searchlib/src/tests/nativerank/nativerank_test.cpp +++ b/searchlib/src/tests/nativerank/nativerank_test.cpp @@ -10,8 +10,8 @@ #include <vespa/searchlib/fef/functiontablefactory.h> #include <vespa/searchlib/fef/test/plugin/setup.h> #include <vespa/vespalib/util/stringfmt.h> -#include <vespa/searchlib/fef/test/ftlib.h> #include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/test/ft_test_app.h> #include <vespa/log/log.h> LOG_SETUP("nativerank_test"); diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 306456518b7..7c4b7555158 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -6,6 +6,7 @@ #include <vespa/searchlib/query/streaming/in_term.h> #include <vespa/searchlib/query/streaming/query.h> #include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h> +#include <vespa/searchlib/query/streaming/wand_term.h> #include <vespa/searchlib/query/tree/querybuilder.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/query/tree/stackdumpcreator.h> @@ -27,6 +28,7 @@ void assertHit(const Hit & h, size_t expWordpos, size_t expContext, int32_t weig EXPECT_EQ(h.weight(), weight); } + TEST(StreamingQueryTest, test_query_language) { QueryNodeResultFactory factory; @@ -38,7 +40,7 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, 7); EXPECT_EQ(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, 7); EXPECT_EQ(db, 7); } @@ -48,15 +50,24 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -7); EXPECT_EQ(ib, -7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, -7); EXPECT_EQ(db, -7); } + { + QueryTerm q(factory.create(), "+7", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQ(ia, 7); + EXPECT_EQ(ib, 7); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, 7); + EXPECT_EQ(db, 7); + } { QueryTerm q(factory.create(), "7.5", "index", TermType::WORD); EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, 7.5); EXPECT_EQ(db, 7.5); } @@ -64,7 +75,7 @@ TEST(StreamingQueryTest, test_query_language) { QueryTerm q(factory.create(), "-7.5", "index", TermType::WORD); EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, -7.5); EXPECT_EQ(db, -7.5); } @@ -74,8 +85,8 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, std::numeric_limits<int64_t>::min()); EXPECT_EQ(ib, 6); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQ(da, -std::numeric_limits<double>::max()); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, -std::numeric_limits<double>::infinity()); EXPECT_LT(db, 7); EXPECT_GT(db, 6.99); } @@ -85,8 +96,8 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, std::numeric_limits<int64_t>::min()); EXPECT_EQ(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQ(da, -std::numeric_limits<double>::max()); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, -std::numeric_limits<double>::infinity()); EXPECT_EQ(db, 7); } @@ -95,10 +106,10 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, 8); EXPECT_EQ(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_GT(da, 7); EXPECT_LT(da, 7.01); - EXPECT_EQ(db, std::numeric_limits<double>::max()); + EXPECT_EQ(db, std::numeric_limits<double>::infinity()); } { @@ -106,9 +117,9 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, 7); EXPECT_EQ(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, 7); - EXPECT_EQ(db, std::numeric_limits<double>::max()); + EXPECT_EQ(db, std::numeric_limits<double>::infinity()); } { @@ -116,7 +127,7 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -7); EXPECT_EQ(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, -7); EXPECT_EQ(db, 7); } @@ -126,7 +137,7 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. EXPECT_EQ(ia, std::numeric_limits<int64_t>::min()); EXPECT_EQ(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, -7.1); EXPECT_EQ(db, 7.1); } @@ -136,7 +147,7 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. EXPECT_EQ(ia, std::numeric_limits<int64_t>::min()); EXPECT_EQ(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, 500.0); EXPECT_EQ(db, std::numeric_limits<double>::max()); } @@ -147,8 +158,8 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -6); EXPECT_EQ(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQ(da, std::nextafterf(minusSeven, seven)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, std::nextafter(minusSeven, seven)); EXPECT_EQ(db, seven); } @@ -157,9 +168,9 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -6); EXPECT_EQ(ib, 6); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQ(da, std::nextafterf(minusSeven, seven)); - EXPECT_EQ(db, std::nextafterf(seven, minusSeven)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, std::nextafter(minusSeven, seven)); + EXPECT_EQ(db, std::nextafter(seven, minusSeven)); } { @@ -174,9 +185,9 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -7); EXPECT_EQ(ib, 6); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, minusSeven); - EXPECT_EQ(db, std::nextafterf(seven, minusSeven)); + EXPECT_EQ(db, std::nextafter(seven, minusSeven)); } { @@ -184,8 +195,8 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, std::numeric_limits<int64_t>::min()); EXPECT_EQ(ib, -8); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQ(da, -std::numeric_limits<double>::max()); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, -std::numeric_limits<double>::infinity()); EXPECT_LT(db, -7); EXPECT_GT(db, -7.01); } @@ -195,8 +206,8 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, std::numeric_limits<int64_t>::min()); EXPECT_EQ(ib, -7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQ(da, -std::numeric_limits<double>::max()); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, -std::numeric_limits<double>::infinity()); EXPECT_EQ(db, -7); } @@ -205,8 +216,8 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, std::numeric_limits<int64_t>::min()); EXPECT_EQ(ib, -7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQ(da, -std::numeric_limits<double>::max()); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); + EXPECT_EQ(da, -std::numeric_limits<double>::infinity()); EXPECT_EQ(db, -7); } @@ -215,10 +226,10 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -6); EXPECT_EQ(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_GT(da, -7); EXPECT_LT(da, -6.99); - EXPECT_EQ(db, std::numeric_limits<double>::max()); + EXPECT_EQ(db, std::numeric_limits<double>::infinity()); } { @@ -226,9 +237,9 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -7); EXPECT_EQ(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, -7); - EXPECT_EQ(db, std::numeric_limits<double>::max()); + EXPECT_EQ(db, std::numeric_limits<double>::infinity()); } { @@ -236,15 +247,15 @@ TEST(StreamingQueryTest, test_query_language) EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); EXPECT_EQ(ia, -7); EXPECT_EQ(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(q.getAsFloatTerm(da, db)); EXPECT_EQ(da, -7); - EXPECT_EQ(db, std::numeric_limits<double>::max()); + EXPECT_EQ(db, std::numeric_limits<double>::infinity()); } { QueryTerm q(factory.create(), "a", "index", TermType::WORD); EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); - EXPECT_TRUE(!q.getAsDoubleTerm(da, db)); + EXPECT_TRUE(!q.getAsFloatTerm(da, db)); } { @@ -287,7 +298,10 @@ TEST(StreamingQueryTest, test_query_language) class AllowRewrite : public QueryNodeResultFactory { public: - virtual bool getRewriteFloatTerms() const override { return true; } + explicit AllowRewrite(vespalib::stringref index) noexcept : _allowedIndex(index) {} + bool allow_float_terms_rewrite(vespalib::stringref index) const noexcept override { return index == _allowedIndex; } +private: + vespalib::string _allowedIndex; }; const char TERM_UNIQ = static_cast<char>(ParseItem::ITEM_TERM) | static_cast<char>(ParseItem::IF_UNIQUEID); @@ -297,12 +311,12 @@ TEST(StreamingQueryTest, e_is_not_rewritten_even_if_allowed) const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(6u, stackDump.size()); - AllowRewrite allowRewrite; + AllowRewrite allowRewrite("c"); const Query q(allowRewrite, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr); - const QueryTerm & qt = static_cast<const QueryTerm &>(root); + const auto & qt = static_cast<const QueryTerm &>(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -313,12 +327,12 @@ TEST(StreamingQueryTest, onedot0e_is_not_rewritten_by_default) const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(9u, stackDump.size()); - QueryNodeResultFactory empty; + AllowRewrite empty("nix"); const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr); - const QueryTerm & qt = static_cast<const QueryTerm &>(root); + const auto & qt = static_cast<const QueryTerm &>(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -329,34 +343,34 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too) const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(9u, stackDump.size()); - AllowRewrite empty; + AllowRewrite empty("c"); const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr); - const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(root); + const auto & equiv = static_cast<const EquivQueryNode &>(root); EXPECT_EQ(2u, equiv.size()); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]); + const auto & qt = static_cast<const QueryTerm &>(*equiv[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr); { - const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); + const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); EXPECT_EQ(2u, phrase.size()); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[0].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]); + const auto & qt = static_cast<const QueryTerm &>(*phrase[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[1].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]); + const auto & qt = static_cast<const QueryTerm &>(*phrase[1]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("0e"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); @@ -460,7 +474,7 @@ TEST(StreamingQueryTest, test_phrase_evaluate) terms[1]->add(1, 5, 0, 1); terms[2]->add(0, 5, 0, 1); HitList hits; - PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]); + auto * p = static_cast<PhraseQueryNode *>(phrases[0]); p->evaluateHits(hits); ASSERT_EQ(3u, hits.size()); EXPECT_EQ(hits[0].wordpos(), 2u); @@ -522,6 +536,7 @@ void assertInt64Range(const std::string &term, bool expAdjusted, int64_t expLow, EXPECT_EQ(expHigh, (int64_t)res.high); } + TEST(StreamingQueryTest, require_that_int8_limits_are_enforced) { //std::numeric_limits<int8_t>::min() -> -128 @@ -607,6 +622,20 @@ TEST(StreamingQueryTest, require_that_we_can_take_floating_point_values_in_range assertInt64Range("[1.7976931348623157E308;-1.7976931348623157E308]", false, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min()); } +void assertIllegalRangeQueries(const QueryTermSimple & qt) { + QueryTermSimple::RangeResult<int64_t> ires = qt.getRange<int64_t>(); + EXPECT_EQ(false, ires.valid); + QueryTermSimple::RangeResult<double> fres = qt.getRange<double>(); + EXPECT_EQ(false, fres.valid); +} + +TEST(StreamingQueryTest, require_safe_parsing_of_illegal_ranges) { + // The 2 below are created when naively splitting numeric terms by dot. + // T=A.B => T EQUIV PHRASE(A, B) + assertIllegalRangeQueries(QueryTermSimple("[1", TermType::WORD)); + assertIllegalRangeQueries(QueryTermSimple(".1;2.1]", TermType::WORD)); +} + TEST(StreamingQueryTest, require_that_we_handle_empty_range_as_expected) { assertInt64Range("[1;1]", false, 1, 1); @@ -627,11 +656,11 @@ TEST(StreamingQueryTest, require_that_ascending_range_can_be_specified_with_limi QueryTerm ascending_query(eqnr.create(), "[;;500]", "index", TermType::WORD); EXPECT_TRUE(ascending_query.getAsIntegerTerm(low_integer, high_integer)); - EXPECT_TRUE(ascending_query.getAsDoubleTerm(low_double, high_double)); + EXPECT_TRUE(ascending_query.getAsFloatTerm(low_double, high_double)); EXPECT_EQ(std::numeric_limits<int64_t>::min(), low_integer); EXPECT_EQ(std::numeric_limits<int64_t>::max(), high_integer); - EXPECT_EQ(-std::numeric_limits<double>::max(), low_double); - EXPECT_EQ(std::numeric_limits<double>::max(), high_double); + EXPECT_EQ(-std::numeric_limits<double>::infinity(), low_double); + EXPECT_EQ(std::numeric_limits<double>::infinity(), high_double); EXPECT_EQ(500, ascending_query.getRangeLimit()); } @@ -646,11 +675,11 @@ TEST(StreamingQueryTest, require_that_descending_range_can_be_specified_with_lim QueryTerm descending_query(eqnr.create(), "[;;-500]", "index", TermType::WORD); EXPECT_TRUE(descending_query.getAsIntegerTerm(low_integer, high_integer)); - EXPECT_TRUE(descending_query.getAsDoubleTerm(low_double, high_double)); + EXPECT_TRUE(descending_query.getAsFloatTerm(low_double, high_double)); EXPECT_EQ(std::numeric_limits<int64_t>::min(), low_integer); EXPECT_EQ(std::numeric_limits<int64_t>::max(), high_integer); - EXPECT_EQ(-std::numeric_limits<double>::max(), low_double); - EXPECT_EQ(std::numeric_limits<double>::max(), high_double); + EXPECT_EQ(-std::numeric_limits<double>::infinity(), low_double); + EXPECT_EQ(std::numeric_limits<double>::infinity(), high_double); EXPECT_EQ(-500, descending_query.getRangeLimit()); } @@ -735,7 +764,7 @@ TEST(StreamingQueryTest, require_that_incorrectly_specified_diversity_can_be_par TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query) { - QueryTermSimple term("<form><iframe+	 +src=\\\"javascript:alert(1)\\\" 	;>", TermType::WORD); + QueryTermSimple term(R"(<form><iframe+	 +src=\"javascript:alert(1)\" 	;>)", TermType::WORD); EXPECT_FALSE(term.isValid()); } @@ -744,7 +773,7 @@ TEST(StreamingQueryTest, a_unhandled_sameElement_stack) const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393"; vespalib::stringref stackDump(stack); EXPECT_EQ(85u, stackDump.size()); - AllowRewrite empty; + AllowRewrite empty(""); const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); @@ -778,7 +807,7 @@ TEST(StreamingQueryTest, test_same_element_evaluate) vespalib::string stackDump = StackDumpCreator::create(*node); QueryNodeResultFactory empty; Query q(empty, stackDump); - SameElementQueryNode * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); + auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); EXPECT_TRUE(sameElem != nullptr); EXPECT_EQ("field", sameElem->getIndex()); EXPECT_EQ(3u, sameElem->size()); @@ -878,7 +907,7 @@ TEST(StreamingQueryTest, test_in_term) { auto term_vector = std::make_unique<StringTermVector>(1); term_vector->addTerm("7"); - search::streaming::InTerm term({}, "index", std::move(term_vector)); + search::streaming::InTerm term({}, "index", std::move(term_vector), Normalizing::NONE); SimpleTermData td; td.addField(10); td.addField(11); @@ -929,6 +958,68 @@ TEST(StreamingQueryTest, dot_product_term) EXPECT_EQ(-17 * 27 + 9 * 2, tmd1->getRawScore()); } +namespace { + +constexpr double exp_wand_score_field_12 = 13 * 27 + 4 * 2; +constexpr double exp_wand_score_field_11 = 17 * 27 + 9 * 2; + +void +check_wand_term(double limit, const vespalib::string& label) +{ + SCOPED_TRACE(label); + search::streaming::WandTerm term({}, "index", 2); + term.add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "7", "", QueryTermSimple::Type::WORD)); + term.get_terms().back()->setWeight(Weight(27)); + term.add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "9", "", QueryTermSimple::Type::WORD)); + term.get_terms().back()->setWeight(Weight(2)); + EXPECT_EQ(2, term.get_terms().size()); + term.set_score_threshold(limit); + SimpleTermData td; + /* + * Search in fields 10, 11 and 12 (cf. fieldset in schema). + * Fields 11 and 12 have content for doc containing the keys. + * Fields 10 and 12 have valid handles and can be used for ranking. + * Field 11 does not have a valid handle, thus no associated match data. + */ + td.addField(10); + td.addField(11); + td.addField(12); + td.lookupField(10)->setHandle(0); + td.lookupField(12)->setHandle(1); + EXPECT_FALSE(term.evaluate()); + auto& q0 = *term.get_terms()[0]; + q0.add(0, 11, 0, 17); + q0.add(0, 12, 0, 13); + auto& q1 = *term.get_terms()[1]; + q1.add(0, 11, 0, 9); + q1.add(0, 12, 0, 4); + EXPECT_EQ(limit < exp_wand_score_field_11, term.evaluate()); + MatchData md(MatchData::params().numTermFields(2)); + term.unpack_match_data(23, td, md); + auto tmd0 = md.resolveTermField(0); + EXPECT_NE(23, tmd0->getDocId()); + auto tmd1 = md.resolveTermField(1); + if (limit < exp_wand_score_field_12) { + EXPECT_EQ(23, tmd1->getDocId()); + EXPECT_EQ(exp_wand_score_field_12, tmd1->getRawScore()); + } else { + EXPECT_NE(23, tmd1->getDocId()); + } +} + +} + +TEST(StreamingQueryTest, wand_term) +{ + check_wand_term(0.0, "no limit"); + check_wand_term(exp_wand_score_field_12 - 1, "score above limit"); + check_wand_term(exp_wand_score_field_12, "score at limit"); + check_wand_term(exp_wand_score_field_12 + 1, "score below limit"); + check_wand_term(exp_wand_score_field_11 - 1, "hidden score above limit"); + check_wand_term(exp_wand_score_field_11, "hidden score at limit"); + check_wand_term(exp_wand_score_field_11 + 1, "hidden score below limit"); +} + TEST(StreamingQueryTest, control_the_size_of_query_terms) { EXPECT_EQ(112u, sizeof(QueryTermSimple)); diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index 20cf2008e4b..f800e124bdc 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "mysearch.h" #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/flow.h> #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/vespalib/objects/objectdumper.h> @@ -22,8 +23,12 @@ class MyOr : public IntermediateBlueprint { private: public: - double calculate_cost() const final { return 1.0; } - double calculate_relative_estimate() const final { return 0.5; } + double calculate_cost() const final { + return cost_of(get_children(), OrFlow()); + } + double calculate_relative_estimate() const final { + return estimate_of(get_children(), OrFlow()); + } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); } @@ -32,7 +37,7 @@ public: return mixChildrenFields(); } - void sort(Children &children) const override { + void sort(Children &children, bool) const override { std::sort(children.begin(), children.end(), TieredGreaterEstimate()); } @@ -440,7 +445,8 @@ TEST_F("testChildAndNotCollapsing", Fixture) ) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -479,7 +485,8 @@ TEST_F("testChildAndCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -517,7 +524,8 @@ TEST_F("testChildOrCollapsing", Fixture) .add(MyLeafSpec(1).addField(2, 42).create()) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -560,7 +568,8 @@ TEST_F("testChildSorting", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -646,6 +655,7 @@ getExpectedBlueprint() " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " children: std::vector {\n" @@ -666,6 +676,7 @@ getExpectedBlueprint() " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" @@ -696,6 +707,7 @@ getExpectedSlimeBlueprint() { " tree_size: 2," " allow_termwise_eval: false" " }," + " cost: 1.0," " sourceId: 4294967295," " docid_limit: 0," " children: {" @@ -721,6 +733,7 @@ getExpectedSlimeBlueprint() { " tree_size: 1," " allow_termwise_eval: true" " }," + " cost: 1.0," " sourceId: 4294967295," " docid_limit: 0" " }" diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index e24e91c2f1d..ab1c004c721 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -14,6 +14,11 @@ #include <vespa/searchlib/test/diskindex/testdiskindex.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/approx.h> +#include <vespa/vespalib/data/simple_buffer.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/data/slime/inserter.h> #include <filesystem> #include <vespa/log/log.h> @@ -24,6 +29,11 @@ using namespace search::fef; using namespace search::query; using search::BitVector; using BlueprintVector = std::vector<std::unique_ptr<Blueprint>>; +using vespalib::Slime; +using vespalib::slime::Inspector; +using vespalib::slime::SlimeInserter; +using vespalib::make_string_short::fmt; +using Path = std::vector<std::variant<size_t,vespalib::stringref>>; struct InvalidSelector : ISourceSelector { InvalidSelector() : ISourceSelector(Source()) {} @@ -66,7 +76,8 @@ void check_sort_order(IntermediateBlueprint &self, BlueprintVector children, std for (const auto & child: children) { unordered.push_back(child.get()); } - self.sort(children); + // TODO: sort by cost (requires both setDocIdLimit and optimize to be called) + self.sort(children, false); for (size_t i = 0; i < children.size(); ++i) { EXPECT_EQUAL(children[i].get(), unordered[order[i]]); } @@ -120,7 +131,7 @@ TEST("test AndNot Blueprint") { template <typename BP> void optimize(std::unique_ptr<BP> &ref) { - auto optimized = Blueprint::optimize(std::move(ref)); + auto optimized = Blueprint::optimize(std::move(ref), true); ref.reset(dynamic_cast<BP*>(optimized.get())); ASSERT_TRUE(ref); optimized.release(); @@ -132,8 +143,8 @@ TEST("test And propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); - optimize(bp); bp->setDocIdLimit(5000); + optimize(bp); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(3u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -152,8 +163,8 @@ TEST("test Or propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); - optimize(bp); bp->setDocIdLimit(5000); + optimize(bp); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(4u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -480,13 +491,71 @@ struct SourceBlenderTestFixture { void addChildrenForSimpleSBTest(IntermediateBlueprint & parent); }; +vespalib::string path_to_str(const Path &path) { + size_t cnt = 0; + vespalib::string str("["); + for (const auto &item: path) { + if (cnt++ > 0) { + str.append(","); + } + std::visit(vespalib::overload{ + [&str](size_t value)noexcept{ str.append(fmt("%zu", value)); }, + [&str](vespalib::stringref value)noexcept{ str.append(value); }}, item); + } + str.append("]"); + return str; +} + +vespalib::string to_str(const Inspector &value) { + if (!value.valid()) { + return "<missing>"; + } + vespalib::SimpleBuffer buf; + vespalib::slime::JsonFormat::encode(value, buf, true); + return buf.get().make_string(); +} + +void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { + auto cmp_hook = [expect_eq](const auto &path, const auto &a, const auto &b) { + if (!path.empty() && std::holds_alternative<vespalib::stringref>(path.back())) { + vespalib::stringref field = std::get<vespalib::stringref>(path.back()); + if (field == "cost") { + return true; + } + if (field == "relative_estimate") { + double a_val = a.asDouble(); + double b_val = b.asDouble(); + if (a_val != 0.0 && b_val != 0.0 && vespalib::approx_equal(a_val, b_val)) { + return true; + } + } + } + if (expect_eq) { + fprintf(stderr, " mismatch at %s: %s vs %s\n", path_to_str(path).c_str(), + to_str(a).c_str(), to_str(b).c_str()); + } + return false; + }; + Slime a; + Slime b; + bp1.asSlime(SlimeInserter(a)); + bp2.asSlime(SlimeInserter(b)); + if (expect_eq) { + EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); + } else { + EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); + } +} + void -optimize_and_compare(Blueprint::UP top, Blueprint::UP expect) { - EXPECT_NOT_EQUAL(expect->asString(), top->asString()); - top = Blueprint::optimize(std::move(top)); - EXPECT_EQUAL(expect->asString(), top->asString()); - expect = Blueprint::optimize(std::move(expect)); - EXPECT_EQUAL(expect->asString(), top->asString()); +optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool sort_by_cost = true) { + top->setDocIdLimit(1000); + expect->setDocIdLimit(1000); + TEST_DO(compare(*top, *expect, false)); + top = Blueprint::optimize(std::move(top), sort_by_cost); + TEST_DO(compare(*top, *expect, true)); + expect = Blueprint::optimize(std::move(expect), sort_by_cost); + TEST_DO(compare(*expect, *top, true)); } void SourceBlenderTestFixture::addChildrenForSBTest(IntermediateBlueprint & parent) { @@ -612,11 +681,11 @@ TEST("test empty root node optimization and safeness") { //------------------------------------------------------------------------- auto expect_up = std::make_unique<EmptyBlueprint>(); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5))->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5), true)->asString()); } TEST("and with one empty child is optimized away") { @@ -624,7 +693,7 @@ TEST("and with one empty child is optimized away") { Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20}))); - top = Blueprint::optimize(std::move(top)); + top = Blueprint::optimize(std::move(top), true); Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(std::make_unique<EmptyBlueprint>()))); @@ -716,6 +785,22 @@ TEST("AND_NOT AND AND_NOT collapsing") { optimize_and_compare(std::move(top), std::move(expect)); } +TEST("AND_NOT AND AND_NOT AND nested collapsing") { + Blueprint::UP top = make::ANDNOT() + .add(make::AND() + .add(make::ANDNOT() + .add(make::AND().leafs({1,2})) + .leafs({5,6})) + .add(make::ANDNOT() + .add(make::AND().leafs({3,4})) + .leafs({8,9}))) + .leaf(7); + Blueprint::UP expect = make::ANDNOT() + .add(make::AND().leafs({1,2,3,4})) + .leafs({9,8,7,6,5}); + optimize_and_compare(std::move(top), std::move(expect)); +} + TEST("AND_NOT AND AND_NOT collapsing into full source blender optimization") { InvalidSelector sel; Blueprint::UP top = @@ -783,8 +868,8 @@ TEST("require that replaced blueprints retain source id") { addChild(ap(MyLeafSpec(30).create()->setSourceId(55))))); Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42))); //------------------------------------------------------------------------- - top1_up = Blueprint::optimize(std::move(top1_up)); - top2_up = Blueprint::optimize(std::move(top2_up)); + top1_up = Blueprint::optimize(std::move(top1_up), true); + top2_up = Blueprint::optimize(std::move(top2_up), true); EXPECT_EQUAL(expect1_up->asString(), top1_up->asString()); EXPECT_EQUAL(expect2_up->asString(), top2_up->asString()); EXPECT_EQUAL(13u, top1_up->getSourceId()); @@ -1103,8 +1188,8 @@ TEST("require that children of near are not optimized") { auto expect_up = ap((new NearBlueprint(10))-> addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up)); - EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up), true); + TEST_DO(compare(*top_up, *expect_up, true)); } TEST("require that children of onear are not optimized") { @@ -1114,27 +1199,27 @@ TEST("require that children of onear are not optimized") { auto expect_up = ap((new ONearBlueprint(10))-> addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up)); - EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up), true); + TEST_DO(compare(*top_up, *expect_up, true)); } TEST("require that ANDNOT without children is optimized to empty search") { Blueprint::UP top_up = std::make_unique<AndNotBlueprint>(); auto expect_up = std::make_unique<EmptyBlueprint>(); - top_up = Blueprint::optimize(std::move(top_up)); + top_up = Blueprint::optimize(std::move(top_up), true); EXPECT_EQUAL(expect_up->asString(), top_up->asString()); } TEST("require that highest cost tier sorts last for OR") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {30, 3}, {20, 2}, {10, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {10, 1}, {20, 2}, {30, 3}}); - optimize_and_compare(std::move(top), std::move(expect)); + optimize_and_compare(std::move(top), std::move(expect), false); } TEST("require that highest cost tier sorts last for AND") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {20, 3}, {30, 2}, {50, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {50, 1}, {30, 2}, {20, 3}}); - optimize_and_compare(std::move(top), std::move(expect)); + optimize_and_compare(std::move(top), std::move(expect), false); } template<typename BP> @@ -1251,7 +1336,7 @@ void verify_cost(make &&mk, double expect) { .cost(1.2).leaf(300) .cost(1.3).leaf(500); bp->setDocIdLimit(1000); - bp = Blueprint::optimize(std::move(bp)); + bp = Blueprint::optimize(std::move(bp), true); EXPECT_EQUAL(bp->cost(), expect); } diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index f910ff5be1b..1180206279d 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -48,7 +48,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { // inherit Blueprint to capture the default filter factory struct DefaultBlueprint : Blueprint { double calculate_relative_estimate() const override { abort(); } - void optimize(Blueprint* &, OptimizePass) override { abort(); } + void optimize(Blueprint* &, OptimizePass, bool) override { abort(); } const State &getState() const override { abort(); } void fetchPostings(const ExecuteInfo &) override { abort(); } void freeze() override { abort(); } diff --git a/searchlib/src/tests/queryeval/flow/CMakeLists.txt b/searchlib/src/tests/queryeval/flow/CMakeLists.txt new file mode 100644 index 00000000000..70658d36f21 --- /dev/null +++ b/searchlib/src/tests/queryeval/flow/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_queryeval_flow_test_app TEST + SOURCES + queryeval_flow_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_queryeval_flow_test_app COMMAND searchlib_queryeval_flow_test_app) diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp new file mode 100644 index 00000000000..ceda30f169a --- /dev/null +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -0,0 +1,117 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/queryeval/flow.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vector> +#include <random> + +using search::queryeval::AndFlow; +using search::queryeval::OrFlow; + +struct Item { + double rel_est; + double cost; + Item(double rel_est_in, double cost_in) noexcept + : rel_est(rel_est_in), cost(cost_in) {} + static void sort_for_and(std::vector<Item> &data) { + std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { + return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost; + }); + } + static void sort_for_or(std::vector<Item> &data) { + std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { + return a.rel_est / a.cost > b.rel_est / b.cost; + }); + } + static double cost_of(const std::vector<Item> &data, auto flow) { + double cost = 0.0; + for (const Item &item: data) { + cost += flow.flow() * item.cost; + flow.add(item.rel_est); + } + return cost; + } + static double cost_of_and(const std::vector<Item> &data) { return cost_of(data, AndFlow()); } + static double cost_of_or(const std::vector<Item> &data) { return cost_of(data, OrFlow()); } +}; + +std::vector<Item> gen_data(size_t size) { + static std::mt19937 gen; + static std::uniform_real_distribution<double> rel_est(0.1, 0.9); + static std::uniform_real_distribution<double> cost(1.0, 10.0); + std::vector<Item> result; + result.reserve(size); + for (size_t i = 0; i < size; ++i) { + result.emplace_back(rel_est(gen), cost(gen)); + } + return result; +} + +template <typename T, typename F> +void each_perm(std::vector<T> &data, size_t k, F fun) { + if (k <= 1) { + fun(const_cast<const std::vector<T> &>(data)); + } else { + each_perm(data, k-1, fun); + for (size_t i = 0; i < k-1; ++i) { + if (k & 1) { + std::swap(data[0], data[k-1]); + } else { + std::swap(data[i], data[k-1]); + } + each_perm(data, k-1, fun); + } + } +} + +template <typename T, typename F> +void each_perm(std::vector<T> &data, F fun) { + each_perm(data, data.size(), fun); +} + +TEST(FlowTest, perm_test) { + std::set<std::vector<int>> seen; + std::vector<int> data = {1,2,3,4,5}; + auto hook = [&](const std::vector<int> &perm) { + EXPECT_EQ(perm.size(), 5); + seen.insert(perm); + }; + each_perm(data, hook); + EXPECT_EQ(seen.size(), 120); +} + +TEST(FlowTest, optimal_and_flow) { + for (size_t i = 0; i < 256; ++i) { + auto data = gen_data(7); + Item::sort_for_and(data); + double min_cost = Item::cost_of_and(data); + double max_cost = 0.0; + auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::cost_of_and(my_data); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + fprintf(stderr, " and cost(%zu): min: %g, max: %g, factor: %g\n", + i, min_cost, max_cost, max_cost / min_cost); + } +} + +TEST(FlowTest, optimal_or_flow) { + for (size_t i = 0; i < 256; ++i) { + auto data = gen_data(7); + Item::sort_for_or(data); + double min_cost = Item::cost_of_or(data); + double max_cost = 0.0; + auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept { + double my_cost = Item::cost_of_or(my_data); + EXPECT_LE(min_cost, my_cost); + max_cost = std::max(max_cost, my_cost); + }; + each_perm(data, check); + fprintf(stderr, " or cost(%zu): min: %g, max: %g, factor: %g\n", + i, min_cost, max_cost, max_cost / min_cost); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index 2a59a578ec9..aa6d922f23f 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -1,6 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/query/tree/simplequery.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/fake_requestcontext.h> #include <vespa/searchlib/queryeval/fake_searchable.h> #include <vespa/searchlib/queryeval/simpleresult.h> @@ -10,8 +10,9 @@ #include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h> #include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h> #include <vespa/searchlib/test/document_weight_attribute_helper.h> +#define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/weightedchildrenverifiers.h> -#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/gtest/gtest.h> using namespace search::query; using namespace search::queryeval; @@ -284,89 +285,101 @@ struct AlgoExhaustPastFixture : public FixtureBase }; -TEST_F("require that algorithm prunes bad hits after enough good ones are obtained", AlgoSimpleFixture) +TEST(ParallelWeakAndTest, require_that_algorithm_prunes_bad_hits_after_enough_good_ones_are_obtained) { + AlgoSimpleFixture f; FakeResult expect = FakeResult() .doc(1).score(1 * 1 + 4 * 1) .doc(2).score(1 * 2) .doc(3).score(1 * 3 + 4 * 3) .doc(5).score(1 * 5 + 4 * 5); - EXPECT_EQUAL(expect, f.result); + EXPECT_EQ(expect, f.result); } -TEST_F("require that algorithm uses subsearches as expected", AlgoSimpleFixture) { - EXPECT_EQUAL(SearchHistory() - .seek("PWAND", 1).seek("B", 1).step("B", 1).unpack("B", 1).step("PWAND", 1) - .unpack("PWAND", 1).seek("A", 1).step("A", 1).unpack("A", 1) - .seek("PWAND", 2).seek("B", 2).step("B", 3).seek("A", 2).step("A", 2).unpack("A", 2).step("PWAND", 2) - .unpack("PWAND", 2) - .seek("PWAND", 3).unpack("B", 3).step("PWAND", 3) - .unpack("PWAND", 3).seek("A", 3).step("A", 3).unpack("A", 3) - .seek("PWAND", 4).seek("B", 4).step("B", 5).seek("A", 4).step("A", 4).unpack("A", 4).unpack("B", 5).step("PWAND", 5) - .unpack("PWAND", 5).seek("A", 5).step("A", 5).unpack("A", 5) - .seek("PWAND", 6).seek("B", 6).step("B", search::endDocId).step("PWAND", search::endDocId), - f.spec.getHistory()); +TEST(ParallelWeakAndTest, require_that_algorithm_uses_subsearches_as_expected) +{ + AlgoSimpleFixture f; + EXPECT_EQ(SearchHistory() + .seek("PWAND", 1).seek("B", 1).step("B", 1).unpack("B", 1).step("PWAND", 1) + .unpack("PWAND", 1).seek("A", 1).step("A", 1).unpack("A", 1) + .seek("PWAND", 2).seek("B", 2).step("B", 3).seek("A", 2).step("A", 2).unpack("A", 2).step("PWAND", 2) + .unpack("PWAND", 2) + .seek("PWAND", 3).unpack("B", 3).step("PWAND", 3) + .unpack("PWAND", 3).seek("A", 3).step("A", 3).unpack("A", 3) + .seek("PWAND", 4).seek("B", 4).step("B", 5).seek("A", 4).step("A", 4).unpack("A", 4).unpack("B", 5).step("PWAND", 5) + .unpack("PWAND", 5).seek("A", 5).step("A", 5).unpack("A", 5) + .seek("PWAND", 6).seek("B", 6).step("B", search::endDocId).step("PWAND", search::endDocId), + f.spec.getHistory()); } -TEST_F("require that algorithm considers documents in the right order", AlgoAdvancedFixture) +TEST(ParallelWeakAndTest, require_that_algorithm_considers_documents_in_the_right_order) { - EXPECT_EQUAL(SimpleResult() - .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5) - .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15) - .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), asSimpleResult(f.result)); + AlgoAdvancedFixture f; + EXPECT_EQ(SimpleResult() + .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5) + .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15) + .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), asSimpleResult(f.result)); } -TEST_F("require that algorithm take initial docid for subsearches into account", AlgoSubsearchFixture) +TEST(ParallelWeakAndTest, require_that_algorithm_take_initial_docid_for_subsearches_into_account) { - EXPECT_EQUAL(FakeResult().doc(10).score(20), f.result); - EXPECT_EQUAL(SearchHistory().seek("PWAND", 1).unpack("B", 10).step("PWAND", 10).unpack("PWAND", 10) - .seek("PWAND", 11).seek("B", 11).step("B", search::endDocId).step("PWAND", search::endDocId), - f.spec.getHistory()); + AlgoSubsearchFixture f; + EXPECT_EQ(FakeResult().doc(10).score(20), f.result); + EXPECT_EQ(SearchHistory().seek("PWAND", 1).unpack("B", 10).step("PWAND", 10).unpack("PWAND", 10) + .seek("PWAND", 11).seek("B", 11).step("B", search::endDocId).step("PWAND", search::endDocId), + f.spec.getHistory()); } -TEST_F("require that algorithm uses first match when two matches have same score", AlgoSameScoreFixture) +TEST(ParallelWeakAndTest, require_that_algorithm_uses_first_match_when_two_matches_have_same_score) { - EXPECT_EQUAL(FakeResult().doc(1).score(100), f.result); + AlgoSameScoreFixture f; + EXPECT_EQ(FakeResult().doc(1).score(100), f.result); } -TEST_F("require that algorithm uses initial score threshold (all hits greater)", AlgoScoreThresholdFixture(29)) +TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_all_hits_greater) { - EXPECT_EQUAL(FakeResult() - .doc(1).score(1 * 10 + 2 * 20) - .doc(2).score(1 * 30) - .doc(3).score(2 * 40), f.result); + AlgoScoreThresholdFixture f(29); + EXPECT_EQ(FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(2).score(1 * 30) + .doc(3).score(2 * 40), f.result); } -TEST_F("require that algorithm uses initial score threshold (2 hits greater)", AlgoScoreThresholdFixture(30)) +TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_2_hits_greater) { - EXPECT_EQUAL(FakeResult() - .doc(1).score(1 * 10 + 2 * 20) - .doc(3).score(2 * 40), f.result); + AlgoScoreThresholdFixture f(30); + EXPECT_EQ(FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(3).score(2 * 40), f.result); } -TEST_F("require that algorithm uses initial score threshold (1 hit greater)", AlgoScoreThresholdFixture(50)) +TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_1_hit_greater) { - EXPECT_EQUAL(FakeResult() - .doc(3).score(2 * 40), f.result); + AlgoScoreThresholdFixture f(50); + EXPECT_EQ(FakeResult() + .doc(3).score(2 * 40), f.result); } -TEST_F("require that algorithm uses initial score threshold (0 hits greater)", AlgoScoreThresholdFixture(80)) +TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_0_hits_greater) { - EXPECT_EQUAL(FakeResult(), f.result); + AlgoScoreThresholdFixture f(80); + EXPECT_EQ(FakeResult(), f.result); } -TEST_F("require that algorithm handle large scores", AlgoLargeScoresFixture(60000L * 70000L)) +TEST(ParallelWeakAndTest, require_that_algorithm_handles_large_scores) { - EXPECT_EQUAL(FakeResult() - .doc(1).score(60000L * 60000L + 70000L * 80000L) - .doc(3).score(70000L * 90000L), f.result); + AlgoLargeScoresFixture f(60000L * 70000L); + EXPECT_EQ(FakeResult() + .doc(1).score(60000L * 60000L + 70000L * 80000L) + .doc(3).score(70000L * 90000L), f.result); } -TEST_F("require that algorithm steps all present terms when past is empty", AlgoExhaustPastFixture(25)) +TEST(ParallelWeakAndTest, require_that_algorithm_steps_all_present_terms_when_past_is_empty) { - EXPECT_EQUAL(FakeResult() - .doc(3).score(40) - .doc(5).score(30), f.result); + AlgoExhaustPastFixture f(25); + EXPECT_EQ(FakeResult() + .doc(3).score(40) + .doc(5).score(30), f.result); } struct HeapFixture @@ -380,14 +393,15 @@ struct HeapFixture } }; -TEST_F("require that scores are collected in batches before adjusting heap", HeapFixture) +TEST(ParallelWeakAndTest, require_that_scores_are_collected_in_batches_before_adjusting_heap) { - EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(4).addHit(5).addHit(6), - f.result); - EXPECT_EQUAL(ScoresHistory().add(Scores().add(1).add(2)) - .add(Scores().add(3).add(4)) - .add(Scores().add(5).add(6)), - f.spec.heap.history); + HeapFixture f; + EXPECT_EQ(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(4).addHit(5).addHit(6), + f.result); + EXPECT_EQ(ScoresHistory().add(Scores().add(1).add(2)) + .add(Scores().add(3).add(4)) + .add(Scores().add(5).add(6)), + f.spec.heap.history); } @@ -400,13 +414,14 @@ struct SearchFixture : public FixtureBase } }; -TEST_F("require that dot product score is calculated", SearchFixture) +TEST(ParallelWeakAndTest, require_that_dot_product_score_is_calculated) { + SearchFixture f; FakeResult expect = FakeResult() .doc(1).score(1 * 10 + 2 * 20) .doc(2).score(1 * 30) .doc(3).score(2 * 40); - EXPECT_EQUAL(expect, f.result); + EXPECT_EQ(expect, f.result); } @@ -452,8 +467,9 @@ struct BlueprintHitsFixture : public BlueprintFixtureBase bool maxScoreFirst() { SearchIterator::UP itr = iterator(); const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get()); - ASSERT_EQUAL(2u, wand->get_num_terms()); - return (wand->get_term_weight(0) == 20); + bool failed = false; + EXPECT_EQ(2u, wand->get_num_terms()) << (failed = true, ""); + return failed ? false : (wand->get_term_weight(0) == 20); } }; @@ -468,8 +484,11 @@ struct ThresholdBoostFixture : public FixtureBase SearchIterator::UP si(spec.create()); result = doSearch(*si, spec.rootMatchData); } + ~ThresholdBoostFixture(); }; +ThresholdBoostFixture::~ThresholdBoostFixture() = default; + struct BlueprintFixture : public BlueprintFixtureBase { BlueprintFixture() : BlueprintFixtureBase() { @@ -497,89 +516,99 @@ struct BlueprintAsStringFixture : public BlueprintFixtureBase }; -TEST_F("require that hit estimate is calculated", BlueprintFixture) +TEST(ParallelWeakAndTest, require_that_hit_estimate_is_calculated) { + BlueprintFixture f; Node::UP term = f.spec.createNode(); Blueprint::UP bp = f.blueprint(*term); - EXPECT_EQUAL(4u, bp->getState().estimate().estHits); + EXPECT_EQ(4u, bp->getState().estimate().estHits); } -TEST_F("require that blueprint picks up docid limit", BlueprintFixture) +TEST(ParallelWeakAndTest, require_that_blueprint_picks_up_docid_limit) { + BlueprintFixture f; Node::UP term = f.spec.createNode(57, 67, 77.7); Blueprint::UP bp = f.blueprint(*term); const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); - EXPECT_EQUAL(0u, pbp->get_docid_limit()); + EXPECT_EQ(0u, pbp->get_docid_limit()); bp->setDocIdLimit(1000); - EXPECT_EQUAL(1000u, pbp->get_docid_limit()); + EXPECT_EQ(1000u, pbp->get_docid_limit()); } -TEST_F("require that scores to track, score threshold and threshold boost factor is passed down from query node to blueprint", BlueprintFixture) +TEST(ParallelWeakAndTest, require_that_scores_to_track_score_threshold_and_threshold_boost_factor_is_passed_down_from_query_node_to_blueprint) { + BlueprintFixture f; Node::UP term = f.spec.createNode(57, 67, 77.7); Blueprint::UP bp = f.blueprint(*term); const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); - EXPECT_EQUAL(57u, pbp->getScores().getScoresToTrack()); - EXPECT_EQUAL(67u, pbp->getScoreThreshold()); - EXPECT_EQUAL(77.7, pbp->getThresholdBoostFactor()); + EXPECT_EQ(57u, pbp->getScores().getScoresToTrack()); + EXPECT_EQ(67u, pbp->getScoreThreshold()); + EXPECT_EQ(77.7, pbp->getThresholdBoostFactor()); } -TEST_F("require that search iterator is correctly setup and executed", BlueprintFixture) +TEST(ParallelWeakAndTest, require_that_search_iterator_is_correctly_setup_and_executed) { + BlueprintFixture f; FakeResult expect = FakeResult() .doc(1).score(1 * 10 + 2 * 20) .doc(2).score(1 * 30) .doc(3).score(2 * 40); - EXPECT_EQUAL(expect, f.search()); + EXPECT_EQ(expect, f.search()); } -TEST_F("require that initial score threshold can be specified (1 hit greater)", BlueprintFixture) +TEST(ParallelWeakAndTest, require_that_initial_score_threshold_can_be_specified_case_1_hit_greater) { + BlueprintFixture f; Node::UP term = f.spec.createNode(3, 50); - EXPECT_EQUAL(FakeResult() - .doc(3).score(2 * 40), f.search(*term)); + EXPECT_EQ(FakeResult() + .doc(3).score(2 * 40), f.search(*term)); } -TEST_F("require that large scores are handled", BlueprintLargeScoresFixture) +TEST(ParallelWeakAndTest, require_that_large_scores_are_handled) { + BlueprintLargeScoresFixture f; Node::UP term = f.spec.createNode(3, 60000L * 70000L); - EXPECT_EQUAL(FakeResult() - .doc(1).score(60000L * 60000L + 70000L * 80000L) - .doc(3).score(70000L * 90000L), f.search(*term)); + EXPECT_EQ(FakeResult() + .doc(1).score(60000L * 60000L + 70000L * 80000L) + .doc(3).score(70000L * 90000L), f.search(*term)); } -TEST_F("require that docid limit is propagated to search iterator", BlueprintFixture()) +TEST(ParallelWeakAndTest, require_that_docid_limit_is_propagated_to_search_iterator) { + BlueprintFixture f1; f1.spec.docIdLimit = 4050; SearchIterator::UP itr = f1.iterator(); const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get()); - EXPECT_EQUAL(4050u, wand->getMatchParams().docIdLimit); + EXPECT_EQ(4050u, wand->getMatchParams().docIdLimit); } -TEST_FFF("require that terms are sorted for maximum skipping", - BlueprintHitsFixture(50, 50, 100), - BlueprintHitsFixture(60, 50, 100), - BlueprintHitsFixture(80, 50, 100)) +TEST(ParallelWeakAndTest, require_that_terms_are_sorted_for_maximum_skipping) { + BlueprintHitsFixture f1(50, 50, 100); + BlueprintHitsFixture f2(60, 50, 100); + BlueprintHitsFixture f3(80, 50, 100); EXPECT_TRUE(f1.maxScoreFirst()); EXPECT_TRUE(f2.maxScoreFirst()); EXPECT_FALSE(f3.maxScoreFirst()); } -TEST_FF("require that threshold boosting works as expected", ThresholdBoostFixture(1.0), ThresholdBoostFixture(2.0)) -{ - EXPECT_EQUAL(FakeResult() - .doc(1).score(1000) - .doc(2).score(2000) - .doc(3).score(3000) - .doc(4).score(4200), f1.result); - EXPECT_EQUAL(FakeResult() - .doc(2).score(2000) - .doc(4).score(4200), f2.result); +TEST(ParallelWeakAndTest, require_that_threshold_boosting_works_as_expected) +{ + ThresholdBoostFixture f1(1.0); + ThresholdBoostFixture f2(2.0); + EXPECT_EQ(FakeResult() + .doc(1).score(1000) + .doc(2).score(2000) + .doc(3).score(3000) + .doc(4).score(4200), f1.result); + EXPECT_EQ(FakeResult() + .doc(2).score(2000) + .doc(4).score(4200), f2.result); } -TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) +TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works) { + BlueprintAsStringFixture f; Node::UP term = f.spec.createNode(57, 67); Blueprint::UP bp = f.blueprint(*term); vespalib::string expStr = "search::queryeval::ParallelWeakAndBlueprint {\n" @@ -599,6 +628,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) " tree_size: 2\n" " allow_termwise_eval: false\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " _weights: std::vector {\n" @@ -622,12 +652,13 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) " tree_size: 1\n" " allow_termwise_eval: true\n" " }\n" + " cost: 1\n" " sourceId: 4294967295\n" " docid_limit: 0\n" " }\n" " }\n" "}\n"; - EXPECT_EQUAL(expStr, bp->asString()); + EXPECT_EQ(expStr, bp->asString()); } using MatchParams = ParallelWeakAndSearch::MatchParams; @@ -659,7 +690,7 @@ SearchIterator::UP create_wand(bool use_dww, assert(childrenMatchData->getNumTermFields() == dict_entries.size()); wand::Terms terms; for (size_t i = 0; i < dict_entries.size(); ++i) { - terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), + terms.push_back(wand::Term(new DocidWithWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), weights[i], dict_entries[i].posting_size, childrenMatchData->resolveTermField(handles[i]))); @@ -684,11 +715,12 @@ private: mutable DummyHeap _dummy_heap; }; -TEST("verify search iterator conformance") { +TEST(ParallelWeakAndTest, verify_search_iterator_conformance) +{ for (bool use_dww: {false, true}) { Verifier verifier(use_dww); verifier.verify(); } } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/queryeval_test.cpp b/searchlib/src/tests/queryeval/queryeval_test.cpp index a403f7a7c23..3fabb45a7ff 100644 --- a/searchlib/src/tests/queryeval/queryeval_test.cpp +++ b/searchlib/src/tests/queryeval/queryeval_test.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> #include <vespa/vespalib/regex/regex.h> +#define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/initrange.h> #include <vespa/searchlib/queryeval/andnotsearch.h> #include <vespa/searchlib/queryeval/andsearch.h> @@ -19,9 +19,9 @@ #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/attribute/singleboolattribute.h> #include <vespa/searchcommon/common/growstrategy.h> -#include <vespa/vespalib/test/insertion_operators.h> #include <vespa/searchlib/fef/fef.h> #include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/gtest/gtest.h> #include <vespa/log/log.h> LOG_SETUP("query_eval_test"); @@ -87,15 +87,16 @@ std::unique_ptr<sourceselector::Iterator> selector() { void testMultiSearch(SearchIterator & search) { auto & ms = dynamic_cast<MultiSearch &>(search); ms.initRange(3, 309); - EXPECT_EQUAL(2u, ms.getDocId()); - EXPECT_EQUAL(309u, ms.getEndId()); + EXPECT_EQ(2u, ms.getDocId()); + EXPECT_EQ(309u, ms.getEndId()); for (const auto & child : ms.getChildren()) { - EXPECT_EQUAL(2u, child->getDocId()); - EXPECT_EQUAL(309u, child->getEndId()); + EXPECT_EQ(2u, child->getDocId()); + EXPECT_EQ(309u, child->getEndId()); } } -TEST("test that OR.andWith is a NOOP") { +TEST(QueryEvalTest, test_that_or_andwith_is_a_noop) +{ TermFieldMatchData tfmd; MultiSearch::Children ch; ch.emplace_back(new TrueSearch(tfmd)); @@ -106,7 +107,8 @@ TEST("test that OR.andWith is a NOOP") { EXPECT_TRUE(search->andWith(std::move(filter), 1)); } -TEST("test that non-strict AND.andWith is a NOOP") { +TEST(QueryEvalTest, test_that_non_strict_and_andwidth_is_a_noop) +{ TermFieldMatchData tfmd; MultiSearch::Children ch; ch.emplace_back(new TrueSearch(tfmd)); @@ -117,7 +119,8 @@ TEST("test that non-strict AND.andWith is a NOOP") { EXPECT_TRUE(filter); } -TEST("test that strict AND.andWith steals filter and places it correctly based on estimate") { +TEST(QueryEvalTest, test_that_strict_and_andwidth_steals_filter_and_places_it_correctly_based_on_estimate) +{ TermFieldMatchData tfmd; std::vector<SearchIterator *> ch; ch.emplace_back(new TrueSearch(tfmd)); @@ -129,19 +132,19 @@ TEST("test that strict AND.andWith steals filter and places it correctly based o EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); const auto & andChildren = dynamic_cast<MultiSearch &>(*search).getChildren(); - EXPECT_EQUAL(3u, andChildren.size()); - EXPECT_EQUAL(ch[0], andChildren[0].get()); - EXPECT_EQUAL(filterP, andChildren[1].get()); - EXPECT_EQUAL(ch[1], andChildren[2].get()); + EXPECT_EQ(3u, andChildren.size()); + EXPECT_EQ(ch[0], andChildren[0].get()); + EXPECT_EQ(filterP, andChildren[1].get()); + EXPECT_EQ(ch[1], andChildren[2].get()); auto filter2 = std::make_unique<TrueSearch>(tfmd); SearchIterator * filter2P = filter2.get(); EXPECT_TRUE(nullptr == search->andWith(std::move(filter2), 6).get()); - EXPECT_EQUAL(4u, andChildren.size()); - EXPECT_EQUAL(filter2P, andChildren[0].get()); - EXPECT_EQUAL(ch[0], andChildren[1].get()); - EXPECT_EQUAL(filterP, andChildren[2].get()); - EXPECT_EQUAL(ch[1], andChildren[3].get()); + EXPECT_EQ(4u, andChildren.size()); + EXPECT_EQ(filter2P, andChildren[0].get()); + EXPECT_EQ(ch[0], andChildren[1].get()); + EXPECT_EQ(filterP, andChildren[2].get()); + EXPECT_EQ(ch[1], andChildren[3].get()); } class NonStrictTrueSearch : public TrueSearch @@ -151,7 +154,8 @@ public: [[nodiscard]] Trinary is_strict() const override { return Trinary::False; } }; -TEST("test that strict AND.andWith does not place non-strict iterator first") { +TEST(QueryEvalTest, test_that_strict_and_andwidth_does_not_place_non_strict_iterator_first) +{ TermFieldMatchData tfmd; std::vector<SearchIterator *> ch; ch.emplace_back(new TrueSearch(tfmd)); @@ -162,34 +166,38 @@ TEST("test that strict AND.andWith does not place non-strict iterator first") { SearchIterator * filterP = filter.get(); EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 6).get()); const auto & andChildren = dynamic_cast<MultiSearch &>(*search).getChildren(); - EXPECT_EQUAL(3u, andChildren.size()); - EXPECT_EQUAL(ch[0], andChildren[0].get()); - EXPECT_EQUAL(filterP, andChildren[1].get()); - EXPECT_EQUAL(ch[1], andChildren[2].get()); + EXPECT_EQ(3u, andChildren.size()); + EXPECT_EQ(ch[0], andChildren[0].get()); + EXPECT_EQ(filterP, andChildren[1].get()); + EXPECT_EQ(ch[1], andChildren[2].get()); } -TEST("test that strict rank search forwards to its greedy first child") { +TEST(QueryEvalTest, test_that_strict_rank_search_forwards_to_its_greedy_first_child) +{ TermFieldMatchData tfmd; SearchIterator::UP search = RankSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, true); auto filter = std::make_unique<TrueSearch>(tfmd); EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); } -TEST("test that non-strict rank search does NOT forward to its greedy first child") { +TEST(QueryEvalTest, test_that_non_strict_rank_search_does_not_forward_to_its_greedy_first_child) +{ TermFieldMatchData tfmd; SearchIterator::UP search = RankSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, false); auto filter = std::make_unique<TrueSearch>(tfmd); EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get()); } -TEST("test that strict andnot search forwards to its greedy first child") { +TEST(QueryEvalTest, test_that_strict_andnot_search_forwards_to_its_greedy_first_child) +{ TermFieldMatchData tfmd; SearchIterator::UP search = AndNotSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, true); auto filter = std::make_unique<TrueSearch>(tfmd); EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); } -TEST("test that non-strict andnot search does NOT forward to its greedy first child") { +TEST(QueryEvalTest, test_that_non_strict_andnot_search_does_not_forward_to_its_greedy_first_child) +{ TermFieldMatchData tfmd; SearchIterator::UP search = AndNotSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, false); auto filter = std::make_unique<TrueSearch>(tfmd); @@ -199,13 +207,10 @@ TEST("test that non-strict andnot search does NOT forward to its greedy first ch void expect_match(std::string input, std::string regexp) { using vespalib::Regex; Regex pattern = Regex::from_pattern(regexp, Regex::Options::DotMatchesNewline); - if (! EXPECT_TRUE(pattern.partial_match(input))) { - fprintf(stderr, "no match for pattern: >>>%s<<< in input:\n>>>\n%s\n<<<\n", - regexp.c_str(), input.c_str()); - } + EXPECT_TRUE(pattern.partial_match(input)) << "no match for pattern: >>>" << regexp << "<<< in input: >>>\n" << input << "<<<"; } -TEST("testAnd") { +TEST(QueryEvalTest, test_and) { SimpleResult a; SimpleResult b; a.addHit(5).addHit(10).addHit(16).addHit(30); @@ -219,17 +224,17 @@ TEST("testAnd") { SearchIterator::UP and_ab = and_b->createSearch(*md, true); EXPECT_TRUE(dynamic_cast<const AndSearch *>(and_ab.get()) != nullptr); - EXPECT_EQUAL(4u, dynamic_cast<AndSearch &>(*and_ab).estimate()); + EXPECT_EQ(4u, dynamic_cast<AndSearch &>(*and_ab).estimate()); SimpleResult res; res.search(*and_ab); SimpleResult expect; expect.addHit(5).addHit(30); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); SearchIterator::UP filter_ab = and_b->createFilterSearch(true, upper_bound); SimpleResult filter_res; filter_res.search(*filter_ab); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); std::string dump = filter_ab->asString(); expect_match(dump, "upper"); expect_match(dump, "AndSearchStrict.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper"); @@ -239,10 +244,8 @@ TEST("testAnd") { expect_match(dump, "AndSearchNoStrict.*NoUnpack.*SimpleSearch.*lower.*SimpleSearch.*lower"); } -TEST("mutisearch and initRange") { -} - -TEST("testOr") { +TEST(QueryEvalTest, test_or) +{ { SimpleResult a; SimpleResult b; @@ -260,12 +263,12 @@ TEST("testOr") { res.search(*or_ab); SimpleResult expect; expect.addHit(5).addHit(10).addHit(17).addHit(30); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); SearchIterator::UP filter_ab = or_b->createFilterSearch(true, upper_bound); SimpleResult filter_res; filter_res.search(*filter_ab); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); std::string dump = filter_ab->asString(); expect_match(dump, "upper"); expect_match(dump, "OrLikeSearch.true.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper"); @@ -305,34 +308,35 @@ struct MultiSearchRemoveTest { static SearchIterator::UP remove(MultiSearch &ms, size_t idx) { return ms.remove(idx); } }; -TEST("testMultiSearch") { +TEST(QueryEvalTest, test_multi_search) +{ std::vector<SearchIterator *> orig; orig.emplace_back(new EmptySearch()); orig.emplace_back(new EmptySearch()); orig.emplace_back(new EmptySearch()); TestInsertRemoveSearch ms({orig[0], orig[1], orig[2]}); - EXPECT_EQUAL(3u, ms.getChildren().size()); - EXPECT_EQUAL(orig[0], ms.getChildren()[0].get()); - EXPECT_EQUAL(orig[1], ms.getChildren()[1].get()); - EXPECT_EQUAL(orig[2], ms.getChildren()[2].get()); - EXPECT_EQUAL(0u, ms._accumInsert); - EXPECT_EQUAL(0u, ms._accumRemove); - - EXPECT_EQUAL(orig[1], MultiSearchRemoveTest::remove(ms, 1).get()); - EXPECT_EQUAL(2u, ms.getChildren().size()); - EXPECT_EQUAL(orig[0], ms.getChildren()[0].get()); - EXPECT_EQUAL(orig[2], ms.getChildren()[1].get()); - EXPECT_EQUAL(0u, ms._accumInsert); - EXPECT_EQUAL(1u, ms._accumRemove); + EXPECT_EQ(3u, ms.getChildren().size()); + EXPECT_EQ(orig[0], ms.getChildren()[0].get()); + EXPECT_EQ(orig[1], ms.getChildren()[1].get()); + EXPECT_EQ(orig[2], ms.getChildren()[2].get()); + EXPECT_EQ(0u, ms._accumInsert); + EXPECT_EQ(0u, ms._accumRemove); + + EXPECT_EQ(orig[1], MultiSearchRemoveTest::remove(ms, 1).get()); + EXPECT_EQ(2u, ms.getChildren().size()); + EXPECT_EQ(orig[0], ms.getChildren()[0].get()); + EXPECT_EQ(orig[2], ms.getChildren()[1].get()); + EXPECT_EQ(0u, ms._accumInsert); + EXPECT_EQ(1u, ms._accumRemove); orig.emplace_back(new EmptySearch()); ms.insert(1, SearchIterator::UP(orig.back())); - EXPECT_EQUAL(3u, ms.getChildren().size()); - EXPECT_EQUAL(orig[0], ms.getChildren()[0].get()); - EXPECT_EQUAL(orig[3], ms.getChildren()[1].get()); - EXPECT_EQUAL(orig[2], ms.getChildren()[2].get()); - EXPECT_EQUAL(1u, ms._accumInsert); - EXPECT_EQUAL(1u, ms._accumRemove); + EXPECT_EQ(3u, ms.getChildren().size()); + EXPECT_EQ(orig[0], ms.getChildren()[0].get()); + EXPECT_EQ(orig[3], ms.getChildren()[1].get()); + EXPECT_EQ(orig[2], ms.getChildren()[2].get()); + EXPECT_EQ(1u, ms._accumInsert); + EXPECT_EQ(1u, ms._accumRemove); } class DummySingleValueBitNumericAttributeBlueprint : public SimpleLeafBlueprint @@ -370,7 +374,8 @@ private: }; -TEST("testAndNot") { +TEST(QueryEvalTest, test_andnot) +{ { SimpleResult a; SimpleResult b; @@ -388,12 +393,12 @@ TEST("testAndNot") { res.search(*andnot_ab); SimpleResult expect; expect.addHit(10); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); SearchIterator::UP filter_ab = andnot_b->createFilterSearch(true, upper_bound); SimpleResult filter_res; filter_res.search(*filter_ab); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); std::string dump = filter_ab->asString(); expect_match(dump, "upper"); expect_match(dump, "AndNotSearch.*SimpleSearch.*<strict,upper>.*SimpleSearch.*<nostrict,lower>"); @@ -420,7 +425,7 @@ TEST("testAndNot") { SimpleResult expect; expect.addHit(1).addHit(10); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); } { SimpleResult a; @@ -446,13 +451,14 @@ TEST("testAndNot") { SimpleResult expect; expect.addHit(1).addHit(10); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); } { } } -TEST("testRank") { +TEST(QueryEvalTest, test_rank) +{ { SimpleResult a; SimpleResult b; @@ -471,7 +477,7 @@ TEST("testRank") { SimpleResult expect; expect.addHit(5).addHit(10).addHit(16).addHit(30); - EXPECT_EQUAL(res, expect); + EXPECT_EQ(res, expect); } } @@ -600,7 +606,8 @@ getExpectedSlime() { "}"; } -TEST("testDump") { +TEST(QueryEvalTest, test_dump) +{ using SBChild = SourceBlenderSearch::Child; SearchIterator::UP search = AndSearch::create( { @@ -622,13 +629,13 @@ TEST("testDump") { auto s = slime.toString(); vespalib::Slime expectedSlime; vespalib::slime::JsonFormat::decode(getExpectedSlime(), expectedSlime); - EXPECT_EQUAL(expectedSlime, slime); + EXPECT_EQ(expectedSlime, slime); // fprintf(stderr, "%s", search->asString().c_str()); } -TEST("testFieldSpec") { - EXPECT_EQUAL(8u, sizeof(FieldSpecBase)); - EXPECT_EQUAL(72u, sizeof(FieldSpec)); +TEST(QueryEvalTest, test_field_spec) { + EXPECT_EQ(8u, sizeof(FieldSpecBase)); + EXPECT_EQ(72u, sizeof(FieldSpec)); } @@ -652,9 +659,9 @@ std::vector<size_t> fill_vector(size_t begin, size_t end) { void verify_unpack(const UnpackInfo &unpack, const std::vector<size_t> &expect) { std::vector<size_t> actual = vectorize(unpack); - EXPECT_EQUAL(unpack.empty(), expect.empty()); - EXPECT_EQUAL(unpack.unpackAll(), (expect.size() == unpack_child_cnt)); - EXPECT_EQUAL(expect, actual); + EXPECT_EQ(unpack.empty(), expect.empty()); + EXPECT_EQ(unpack.unpackAll(), (expect.size() == unpack_child_cnt)); + EXPECT_EQ(expect, actual); size_t child_idx = 0; for (size_t next_unpack: expect) { while (child_idx < next_unpack) { @@ -664,19 +671,23 @@ void verify_unpack(const UnpackInfo &unpack, const std::vector<size_t> &expect) } } -TEST("require that unpack info has expected memory footprint") { - EXPECT_EQUAL(32u, sizeof(UnpackInfo)); +TEST(QueryEvalTest, require_that_unpack_info_has_expected_memory_footprint) +{ + EXPECT_EQ(32u, sizeof(UnpackInfo)); } -TEST("require that unpack info starts out empty") { +TEST(QueryEvalTest, require_that_unpack_info_starts_out_empty) +{ verify_unpack(UnpackInfo(), {}); } -TEST("require that unpack info force all unpacks all children") { +TEST(QueryEvalTest, require_that_unpack_info_force_all_unpacks_all_children) +{ verify_unpack(UnpackInfo().forceAll(), fill_vector(0, unpack_child_cnt)); } -TEST("require that adding a large index to unpack info forces unpack all") { +TEST(QueryEvalTest, require_that_adding_a_large_index_to_unpack_info_forces_unpack_all) +{ UnpackInfo unpack; unpack.add(0); unpack.add(max_unpack_index); @@ -685,7 +696,8 @@ TEST("require that adding a large index to unpack info forces unpack all") { verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); } -TEST("require that adding too many children to unpack info forces unpack all") { +TEST(QueryEvalTest, require_that_adding_too_many_children_to_unpack_info_forces_unpack_all) +{ UnpackInfo unpack; std::vector<size_t> expect; for (size_t i = 0; i < max_unpack_size; ++i) { @@ -697,19 +709,22 @@ TEST("require that adding too many children to unpack info forces unpack all") { verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); } -TEST("require that adding normal unpack info indexes works") { +TEST(QueryEvalTest, require_that_adding_normal_unpack_info_indexes_works) +{ UnpackInfo unpack; unpack.add(3).add(5).add(7).add(14).add(50); verify_unpack(unpack, {3,5,7,14,50}); } -TEST("require that adding unpack info indexes out of order works") { +TEST(QueryEvalTest, require_that_adding_unpack_info_indexes_out_of_order_works) +{ UnpackInfo unpack; unpack.add(5).add(3).add(7).add(50).add(14); verify_unpack(unpack, {3,5,7,14,50}); } -TEST("require that basic insert remove of unpack info works") { +TEST(QueryEvalTest, require_that_basic_insert_remove_of_unpack_info_works) +{ UnpackInfo unpack; unpack.insert(1).insert(3); verify_unpack(unpack, {1, 3}); @@ -729,7 +744,8 @@ TEST("require that basic insert remove of unpack info works") { verify_unpack(unpack, {}); } -TEST("require that inserting too many indexs into unpack info forces unpack all") { +TEST(QueryEvalTest, require_that_inserting_too_many_indexes_into_unpack_info_forces_unpack_all) +{ for (bool unpack_inserted: {true, false}) { UnpackInfo unpack; for (size_t i = 0; i < max_unpack_size; ++i) { @@ -745,7 +761,8 @@ TEST("require that inserting too many indexs into unpack info forces unpack all" } } -TEST("require that implicitly overflowing indexes during insert in unpack info forces unpack all") { +TEST(QueryEvalTest, require_that_implicitly_overflowing_indexes_during_insert_in_unpack_info_forces_unpack_all) +{ for (bool unpack_inserted: {true, false}) { UnpackInfo unpack; unpack.insert(max_unpack_index); @@ -755,7 +772,8 @@ TEST("require that implicitly overflowing indexes during insert in unpack info f } } -TEST("require that inserting a too high index into unpack info forces unpack all") { +TEST(QueryEvalTest, require_that_inserting_a_too_high_index_into_unpack_info_forces_unpack_all) +{ for (bool unpack_inserted: {true, false}) { UnpackInfo unpack; for (size_t i = 0; i < 10; ++i) { @@ -771,7 +789,7 @@ TEST("require that inserting a too high index into unpack info forces unpack all } } -TEST("require that we can insert indexes into unpack info that we do not unpack") { +TEST(QueryEvalTest, require_that_we_can_insert_indexes_into_unpack_info_that_we_do_not_unpack) { UnpackInfo unpack; unpack.add(10).add(20).add(30); verify_unpack(unpack, {10, 20, 30}); @@ -779,65 +797,85 @@ TEST("require that we can insert indexes into unpack info that we do not unpack" verify_unpack(unpack, {11, 22, 33}); } -TEST("testTrueSearch") { - EXPECT_EQUAL(16u, sizeof(EmptySearch)); - EXPECT_EQUAL(24u, sizeof(TrueSearch)); +TEST(QueryEvalTest, test_true_search) +{ + EXPECT_EQ(16u, sizeof(EmptySearch)); + EXPECT_EQ(24u, sizeof(TrueSearch)); TermFieldMatchData tfmd; TrueSearch t(tfmd); - EXPECT_EQUAL(0u, t.getDocId()); - EXPECT_EQUAL(0u, t.getEndId()); + EXPECT_EQ(0u, t.getDocId()); + EXPECT_EQ(0u, t.getEndId()); t.initRange(7, 10); - EXPECT_EQUAL(6u, t.getDocId()); - EXPECT_EQUAL(10u, t.getEndId()); + EXPECT_EQ(6u, t.getDocId()); + EXPECT_EQ(10u, t.getEndId()); EXPECT_TRUE(t.seek(9)); - EXPECT_EQUAL(9u, t.getDocId()); + EXPECT_EQ(9u, t.getDocId()); EXPECT_FALSE(t.isAtEnd()); EXPECT_TRUE(t.seek(10)); - EXPECT_EQUAL(10u, t.getDocId()); + EXPECT_EQ(10u, t.getDocId()); EXPECT_TRUE(t.isAtEnd()); t.initRange(4, 14); - EXPECT_EQUAL(3u, t.getDocId()); - EXPECT_EQUAL(14u, t.getEndId()); + EXPECT_EQ(3u, t.getDocId()); + EXPECT_EQ(14u, t.getEndId()); EXPECT_FALSE(t.isAtEnd()); } -TEST("test InitRangeVerifier") { +TEST(QueryEvalTest, test_init_range_verifier) +{ InitRangeVerifier ir; - EXPECT_EQUAL(207u, ir.getDocIdLimit()); - EXPECT_EQUAL(41u, ir.getExpectedDocIds().size()); + EXPECT_EQ(207u, ir.getDocIdLimit()); + EXPECT_EQ(41u, ir.getExpectedDocIds().size()); auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), 300); size_t numInverted = 300 - 41 - 1; - EXPECT_EQUAL(numInverted, inverted.size()); - EXPECT_EQUAL(2u, inverted[0]); - EXPECT_EQUAL(299u, inverted[numInverted - 1]); + EXPECT_EQ(numInverted, inverted.size()); + EXPECT_EQ(2u, inverted[0]); + EXPECT_EQ(299u, inverted[numInverted - 1]); ir.verify(*ir.createIterator(ir.getExpectedDocIds(), false)); ir.verify(*ir.createIterator(ir.getExpectedDocIds(), true)); } -TEST("Test multisearch and andsearchstrict iterators adheres to initRange") { +TEST(QueryEvalTest, test_multisearch_and_andsearchstrict_iterators_adheres_to_init_range) +{ InitRangeVerifier ir; - ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), false), - ir.createFullIterator() }, false)); - - ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), true), - ir.createFullIterator() }, true)); + { + SCOPED_TRACE("non-strict"); + ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), false), + ir.createFullIterator() }, false)); + } + { + SCOPED_TRACE("strict"); + ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), true), + ir.createFullIterator() }, true)); + } } -TEST("Test andnotsearchstrict iterators adheres to initRange") { +TEST(QueryEvalTest, test_andnotsearchstrict_iterators_adheres_to_init_range) { InitRangeVerifier ir; - - TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), false), - ir.createEmptyIterator() }, false))); - TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), true), - ir.createEmptyIterator() }, true))); + + { + SCOPED_TRACE("non-strict"); + ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), false), + ir.createEmptyIterator() }, false)); + } + { + SCOPED_TRACE("strict"); + ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), true), + ir.createEmptyIterator() }, true)); + } auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), ir.getDocIdLimit()); - TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator(), - ir.createIterator(inverted, false) }, false))); - TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator(), - ir.createIterator(inverted, false) }, true))); + { + SCOPED_TRACE("non-strict full"); + ir.verify( AndNotSearch::create({ir.createFullIterator(), + ir.createIterator(inverted, false) }, false)); + } + { + SCOPED_TRACE("strict full"); + ir.verify( AndNotSearch::create({ir.createFullIterator(), + ir.createIterator(inverted, false) }, true)); + } } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp index d05e6c8e4f4..7c535e5d3d5 100644 --- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp +++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp @@ -46,7 +46,7 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul } Blueprint::UP finalize(Blueprint::UP bp, bool strict) { - Blueprint::UP result = Blueprint::optimize(std::move(bp)); + Blueprint::UP result = Blueprint::optimize(std::move(bp), true); result->fetchPostings(ExecuteInfo::createForTest(strict)); result->freeze(); return result; diff --git a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp index 3ca35221c50..3a10ed6df53 100644 --- a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp +++ b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp @@ -1,5 +1,4 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/searchlib/queryeval/andnotsearch.h> @@ -8,11 +7,12 @@ #include <vespa/searchlib/queryeval/termwise_search.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/searchlib/queryeval/termwise_blueprint_helper.h> -#include <vespa/vespalib/test/insertion_operators.h> +#define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/vespalib/objects/visit.hpp> +#include <vespa/vespalib/gtest/gtest.h> using namespace vespalib; using namespace search; @@ -192,7 +192,10 @@ std::vector<uint32_t> make_expect(uint32_t begin, uint32_t end) { return expect; } -void verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_t begin, uint32_t end) { +void +verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_t begin, uint32_t end, const vespalib::string& label) +{ + SCOPED_TRACE(label); std::vector<uint32_t> actual; search.initRange(begin, end); for (uint32_t docid = begin; docid < end; ++docid) { @@ -200,7 +203,7 @@ void verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_ actual.push_back(docid); } } - EXPECT_EQUAL(expect, actual); + EXPECT_EQ(expect, actual); } //----------------------------------------------------------------------------- @@ -213,95 +216,107 @@ MatchData::UP make_match_data() { //----------------------------------------------------------------------------- -TEST("require that pseudo term produces correct results") { - TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 1, 6)); - TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 1, 6)); - TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 3, 6)); - TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 3, 6)); - TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, true)), 1, 4)); - TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, false)), 1, 4)); +TEST(TermwiseEvalTest, require_that_pseudo_term_produces_correct_results) +{ + verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 1, 6, "strict full"); + verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 1, 6, "non-strict full"); + verify({3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 3, 6, "strict last"); + verify({3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 3, 6, "non-strict last"); + verify({1,2,3}, *UP(TERM({1,2,3,4,5}, true)), 1, 4, "strict first"); + verify({1,2,3}, *UP(TERM({1,2,3,4,5}, false)), 1, 4, "non-strict first"); } -TEST("require that normal search gives expected results") { +TEST(TermwiseEvalTest, require_that_normal_search_gives_expected_results) +{ auto search = make_search(true); - TEST_DO(verify(make_expect(1, 10), *search, 1, 10)); + verify(make_expect(1, 10), *search, 1, 10, "strict normal"); } -TEST("require that filter search gives expected results") { +TEST(TermwiseEvalTest, require_that_filter_search_gives_expected_results) +{ auto search = make_filter_search(true); - TEST_DO(verify(make_expect(1, 10), *search, 1, 10)); + verify(make_expect(1, 10), *search, 1, 10, "strict filter"); } -TEST("require that termwise AND/OR search produces appropriate results") { +TEST(TermwiseEvalTest, require_that_termwise_and_or_or_search_produces_appropriate_results) +{ for (uint32_t begin: {1, 2, 5}) { for (uint32_t end: {6, 7, 10}) { for (bool strict_search: {true, false}) { for (bool strict_wrapper: {true, false}) { - TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s", - begin, end, strict_search ? "true" : "false", - strict_wrapper ? "true" : "false").c_str()); + auto label = make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s", + begin, end, strict_search ? "true" : "false", + strict_wrapper ? "true" : "false"); auto search = make_termwise(make_search(strict_search), strict_wrapper); - TEST_DO(verify(make_expect(begin, end), *search, begin, end)); + verify(make_expect(begin, end), *search, begin, end, label); } } } } } -TEST("require that termwise filter search produces appropriate results") { +TEST(TermwiseEvalTest, require_that_termwise_filter_search_produces_appropriate_results) +{ for (uint32_t begin: {1, 2, 5}) { for (uint32_t end: {6, 7, 10}) { for (bool strict_search: {true, false}) { for (bool strict_wrapper: {true, false}) { - TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s", - begin, end, strict_search ? "true" : "false", - strict_wrapper ? "true" : "false").c_str()); + auto label = make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s", + begin, end, strict_search ? "true" : "false", + strict_wrapper ? "true" : "false"); auto search = make_termwise(make_filter_search(strict_search), strict_wrapper); - TEST_DO(verify(make_expect(begin, end), *search, begin, end)); + verify(make_expect(begin, end), *search, begin, end, label); } } } } } -TEST("require that termwise ANDNOT with single term works") { - TEST_DO(verify({2,3,4}, *make_termwise(ANDNOT({ TERM({1,2,3,4,5}, true) }, true), true), 2, 5)); +TEST(TermwiseEvalTest, require_that_termwise_andnot_with_single_term_works) +{ + verify({2,3,4}, *make_termwise(ANDNOT({ TERM({1,2,3,4,5}, true) }, true), true), 2, 5, "termwise andnot"); } -TEST("require that pseudo term is rewindable") { +TEST(TermwiseEvalTest, require_that_pseudo_term_is_rewindable) +{ auto search = UP(TERM({1,2,3,4,5}, true)); - TEST_DO(verify({3,4,5}, *search, 3, 6)); - TEST_DO(verify({1,2,3,4}, *search, 1, 5)); + verify({3,4,5}, *search, 3, 6, "pseudo term end"); + verify({1,2,3,4}, *search, 1, 5, "pseudo term rewound to start"); } -TEST("require that termwise wrapper is rewindable") { +TEST(TermwiseEvalTest, require_that_termwise_wrapper_is_rewindable) +{ auto search = make_termwise(make_search(true), true); - TEST_DO(verify(make_expect(3, 7), *search, 3, 7)); - TEST_DO(verify(make_expect(1, 5), *search, 1, 5)); + verify(make_expect(3, 7), *search, 3, 7, "termwise wrapper end"); + verify(make_expect(1, 5), *search, 1, 5, "termwise wrapper rewound to start"); } //----------------------------------------------------------------------------- -TEST("require that leaf blueprints allow termwise evaluation by default") { +TEST(TermwiseEvalTest, require_that_leaf_blueprints_allow_termwise_evaluation_by_default) +{ MyBlueprint bp({}); EXPECT_TRUE(bp.getState().allow_termwise_eval()); } -TEST("require that leaf blueprints can enable/disable termwise evaluation") { +TEST(TermwiseEvalTest, require_that_leaf_blueprints_can_enable_and_disable_termwise_evaluation) +{ MyBlueprint enable({}, true); MyBlueprint disable({}, false); EXPECT_TRUE(enable.getState().allow_termwise_eval()); EXPECT_FALSE(disable.getState().allow_termwise_eval()); } -TEST("require that intermediate blueprints disallow termwise evaluation by default") { +TEST(TermwiseEvalTest, require_that_intermediate_blueprints_disallow_termwise_evaluation_by_default) +{ MyOr bp(false); bp.addChild(UP(new MyBlueprint({}, true))); bp.addChild(UP(new MyBlueprint({}, true))); EXPECT_FALSE(bp.getState().allow_termwise_eval()); } -TEST("require that intermediate blueprints can enable/disable termwise evaluation") { +TEST(TermwiseEvalTest, require_that_intermediate_blueprints_can_enable_and_disable_termwise_evaluation) +{ MyOr enable(true, true); enable.addChild(UP(new MyBlueprint({}, true))); enable.addChild(UP(new MyBlueprint({}, true))); @@ -312,7 +327,8 @@ TEST("require that intermediate blueprints can enable/disable termwise evaluatio EXPECT_FALSE(disable.getState().allow_termwise_eval()); } -TEST("require that intermediate blueprints cannot be termwise unless all its children are termwise") { +TEST(TermwiseEvalTest, require_that_intermediate_blueprints_cannot_be_termwise_unless_all_its_children_are_termwise) +{ MyOr bp(true, true); bp.addChild(UP(new MyBlueprint({}, true))); bp.addChild(UP(new MyBlueprint({}, false))); @@ -321,27 +337,30 @@ TEST("require that intermediate blueprints cannot be termwise unless all its chi //----------------------------------------------------------------------------- -TEST("require that leafs have tree size 1") { +TEST(TermwiseEvalTest, require_that_leafs_have_tree_size_1) +{ MyBlueprint bp({}); - EXPECT_EQUAL(1u, bp.getState().tree_size()); + EXPECT_EQ(1u, bp.getState().tree_size()); } -TEST("require that tree size is accumulated correctly by intermediate nodes") { +TEST(TermwiseEvalTest, require_that_tree_size_is_accumulated_correctly_by_intermediate_nodes) +{ MyOr bp(false); - EXPECT_EQUAL(1u, bp.getState().tree_size()); + EXPECT_EQ(1u, bp.getState().tree_size()); bp.addChild(UP(new MyBlueprint({}))); bp.addChild(UP(new MyBlueprint({}))); - EXPECT_EQUAL(3u, bp.getState().tree_size()); + EXPECT_EQ(3u, bp.getState().tree_size()); auto child = UP(new MyOr(false)); child->addChild(UP(new MyBlueprint({}))); child->addChild(UP(new MyBlueprint({}))); bp.addChild(std::move(child)); - EXPECT_EQUAL(6u, bp.getState().tree_size()); + EXPECT_EQ(6u, bp.getState().tree_size()); } //----------------------------------------------------------------------------- -TEST("require that any blueprint node can obtain the root") { +TEST(TermwiseEvalTest, require_that_any_blueprint_node_can_obtain_the_root) +{ MyOr bp(false); bp.addChild(UP(new MyBlueprint({1,2,3}))); bp.addChild(UP(new MyBlueprint({1,2,3,4,5,6}))); @@ -354,35 +373,38 @@ TEST("require that any blueprint node can obtain the root") { //----------------------------------------------------------------------------- -TEST("require that match data keeps track of the termwise limit") { +TEST(TermwiseEvalTest, require_that_match_data_keeps_track_of_the_termwise_limit) +{ auto md = make_match_data(); - EXPECT_EQUAL(1.0, md->get_termwise_limit()); + EXPECT_EQ(1.0, md->get_termwise_limit()); md->set_termwise_limit(0.03); - EXPECT_EQUAL(0.03, md->get_termwise_limit()); + EXPECT_EQ(0.03, md->get_termwise_limit()); } //----------------------------------------------------------------------------- -TEST("require that terwise test search string dump is detailed enough") { - EXPECT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), - make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString()); +TEST(TermwiseEvalTest, require_that_terwise_test_search_string_dump_is_detailed_enough) +{ + EXPECT_EQ(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), + make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString()); - EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), - make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, false), TERM({3}, true) }, true), true)->asString()); + EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), + make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, false), TERM({3}, true) }, true), true)->asString()); - EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), - make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, false), true)->asString()); + EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), + make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, false), true)->asString()); - EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), - make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), false)->asString()); + EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), + make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), false)->asString()); - EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), - make_termwise(OR({ TERM({1,2,3}, true), TERM({3}, true), TERM({2,3}, true) }, true), true)->asString()); + EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(), + make_termwise(OR({ TERM({1,2,3}, true), TERM({3}, true), TERM({2,3}, true) }, true), true)->asString()); } //----------------------------------------------------------------------------- -TEST("require that basic termwise evaluation works") { +TEST(TermwiseEvalTest, require_that_basic_termwise_evaluation_works) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -391,12 +413,13 @@ TEST("require that basic termwise evaluation works") { my_or.addChild(UP(new MyBlueprint({1}, true, 1))); my_or.addChild(UP(new MyBlueprint({2}, true, 2))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), - make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString()); + EXPECT_EQ(my_or.createSearch(*md, strict)->asString(), + make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString()); } } -TEST("require that the hit rate must be high enough for termwise evaluation to be activated") { +TEST(TermwiseEvalTest, require_that_the_hit_rate_must_be_high_enough_for_termwise_evaluation_to_be_activated) +{ auto md = make_match_data(); md->set_termwise_limit(1.0); // <- md->resolveTermField(1)->tagAsNotNeeded(); @@ -409,7 +432,8 @@ TEST("require that the hit rate must be high enough for termwise evaluation to b } } -TEST("require that enough unranked termwise terms are present for termwise evaluation to be activated") { +TEST(TermwiseEvalTest, require_that_enough_unranked_termwise_terms_are_present_for_termwise_evaluation_to_be_activated) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -423,29 +447,31 @@ TEST("require that enough unranked termwise terms are present for termwise evalu } } -TEST("require that termwise evaluation can be multi-level, but not duplicated") { +TEST(TermwiseEvalTest, require_that_termwise_evaluation_can_be_multi_level_but_not_duplicated) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); md->resolveTermField(2)->tagAsNotNeeded(); md->resolveTermField(3)->tagAsNotNeeded(); OrBlueprint my_or; - my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); auto child = UP(new OrBlueprint()); child->addChild(UP(new MyBlueprint({2}, true, 2))); child->addChild(UP(new MyBlueprint({3}, true, 3))); my_or.addChild(std::move(child)); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), - make_termwise(OR({ TERM({1}, strict), - ORz({ TERM({2}, strict), TERM({3}, strict) }, strict) }, - strict), strict)->asString()); + EXPECT_EQ(my_or.createSearch(*md, strict)->asString(), + make_termwise(OR({ TERM({1}, strict), + ORz({ TERM({2}, strict), TERM({3}, strict) }, strict) }, + strict), strict)->asString()); } } //----------------------------------------------------------------------------- -TEST("require that OR can be completely termwise") { +TEST(TermwiseEvalTest, require_that_or_can_be_completely_termwise) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -454,12 +480,13 @@ TEST("require that OR can be completely termwise") { my_or.addChild(UP(new MyBlueprint({1}, true, 1))); my_or.addChild(UP(new MyBlueprint({2}, true, 2))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), - make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString()); + EXPECT_EQ(my_or.createSearch(*md, strict)->asString(), + make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString()); } } -TEST("require that OR can be partially termwise") { +TEST(TermwiseEvalTest, require_that_or_can_be_partially_termwise) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -469,13 +496,14 @@ TEST("require that OR can be partially termwise") { my_or.addChild(UP(new MyBlueprint({2}, true, 2))); my_or.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), - ORs({ make_termwise(OR({ TERM({1}, strict), TERM({3}, strict) }, strict), strict), + EXPECT_EQ(my_or.createSearch(*md, strict)->asString(), + ORs({ make_termwise(OR({ TERM({1}, strict), TERM({3}, strict) }, strict), strict), TERM({2}, strict) }, strict)->asString()); } } -TEST("require that OR puts termwise subquery at the right place") { +TEST(TermwiseEvalTest, require_that_or_puts_termwise_subquery_at_the_right_place) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(2)->tagAsNotNeeded(); @@ -485,14 +513,15 @@ TEST("require that OR puts termwise subquery at the right place") { my_or.addChild(UP(new MyBlueprint({2}, true, 2))); my_or.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), - ORs({ TERM({1}, strict), - make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict), - strict) }, strict)->asString()); + EXPECT_EQ(my_or.createSearch(*md, strict)->asString(), + ORs({ TERM({1}, strict), + make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict), + strict) }, strict)->asString()); } } -TEST("require that OR can use termwise eval also when having non-termwise children") { +TEST(TermwiseEvalTest, require_that_or_can_use_termwise_eval_also_when_having_non_termwise_children) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -503,17 +532,18 @@ TEST("require that OR can use termwise eval also when having non-termwise childr my_or.addChild(UP(new MyBlueprint({2}, true, 2))); my_or.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), - ORz({ TERM({1}, strict), - make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict), - strict)}, - strict)->asString()); + EXPECT_EQ(my_or.createSearch(*md, strict)->asString(), + ORz({ TERM({1}, strict), + make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict), + strict)}, + strict)->asString()); } } //----------------------------------------------------------------------------- -TEST("require that AND can be completely termwise") { +TEST(TermwiseEvalTest, require_that_and_can_be_completely_termwise) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -522,12 +552,13 @@ TEST("require that AND can be completely termwise") { my_and.addChild(UP(new MyBlueprint({1}, true, 1))); my_and.addChild(UP(new MyBlueprint({2}, true, 2))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), - make_termwise(AND({ TERM({1}, strict), TERM({2}, false) }, strict), strict)->asString()); + EXPECT_EQ(my_and.createSearch(*md, strict)->asString(), + make_termwise(AND({ TERM({1}, strict), TERM({2}, false) }, strict), strict)->asString()); } } -TEST("require that AND can be partially termwise") { +TEST(TermwiseEvalTest, require_that_and_can_be_partially_termwise) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -537,7 +568,7 @@ TEST("require that AND can be partially termwise") { my_and.addChild(UP(new MyBlueprint({2}, true, 2))); my_and.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), + EXPECT_EQ(my_and.createSearch(*md, strict)->asString(), ANDs({ make_termwise(AND({ TERM({1}, strict), TERM({3}, false) }, strict), strict), @@ -545,7 +576,8 @@ TEST("require that AND can be partially termwise") { } } -TEST("require that AND puts termwise subquery at the right place") { +TEST(TermwiseEvalTest, require_that_and_puts_termwise_subquery_at_the_right_place) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(2)->tagAsNotNeeded(); @@ -555,14 +587,15 @@ TEST("require that AND puts termwise subquery at the right place") { my_and.addChild(UP(new MyBlueprint({2}, true, 2))); my_and.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), + EXPECT_EQ(my_and.createSearch(*md, strict)->asString(), ANDs({ TERM({1}, strict), make_termwise(AND({ TERM({2}, false), TERM({3}, false) }, false), false) }, strict)->asString()); } } -TEST("require that AND can use termwise eval also when having non-termwise children") { +TEST(TermwiseEvalTest, require_that_and_can_use_termwise_eval_also_when_having_non_termwise_children) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -573,7 +606,7 @@ TEST("require that AND can use termwise eval also when having non-termwise child my_and.addChild(UP(new MyBlueprint({2}, true, 2))); my_and.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), + EXPECT_EQ(my_and.createSearch(*md, strict)->asString(), ANDz({ TERM({1}, strict), make_termwise(AND({ TERM({2}, false), TERM({3}, false) }, false), false) }, strict)->asString()); @@ -582,7 +615,8 @@ TEST("require that AND can use termwise eval also when having non-termwise child //----------------------------------------------------------------------------- -TEST("require that ANDNOT can be completely termwise") { +TEST(TermwiseEvalTest, require_that_andnot_can_be_completely_termwise) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -590,13 +624,14 @@ TEST("require that ANDNOT can be completely termwise") { my_andnot.addChild(UP(new MyBlueprint({1}, true, 1))); my_andnot.addChild(UP(new MyBlueprint({2}, true, 2))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(), + EXPECT_EQ(my_andnot.createSearch(*md, strict)->asString(), make_termwise(ANDNOT({ TERM({1}, strict), TERM({2}, false) }, strict), strict)->asString()); } } -TEST("require that ANDNOT can be partially termwise") { +TEST(TermwiseEvalTest, require_that_andnot_can_be_partially_termwise) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); AndNotBlueprint my_andnot; @@ -604,14 +639,15 @@ TEST("require that ANDNOT can be partially termwise") { my_andnot.addChild(UP(new MyBlueprint({2}, true, 2))); my_andnot.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(), + EXPECT_EQ(my_andnot.createSearch(*md, strict)->asString(), ANDNOT({ TERM({1}, strict), make_termwise(OR({ TERM({2}, false), TERM({3}, false) }, false), false) }, strict)->asString()); } } -TEST("require that ANDNOT can be partially termwise with first child being termwise") { +TEST(TermwiseEvalTest, require_that_andnot_can_be_partially_termwise_with_first_child_being_termwise) +{ auto md = make_match_data(); md->set_termwise_limit(0.0); md->resolveTermField(1)->tagAsNotNeeded(); @@ -620,7 +656,7 @@ TEST("require that ANDNOT can be partially termwise with first child being termw my_andnot.addChild(UP(new MyBlueprint({2}, false, 2))); my_andnot.addChild(UP(new MyBlueprint({3}, true, 3))); for (bool strict: {true, false}) { - EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(), + EXPECT_EQ(my_andnot.createSearch(*md, strict)->asString(), ANDNOT({ make_termwise(ANDNOT({ TERM({1}, strict), TERM({3}, false) }, strict), strict), TERM({2}, false) }, strict)->asString()); @@ -629,7 +665,8 @@ TEST("require that ANDNOT can be partially termwise with first child being termw //----------------------------------------------------------------------------- -TEST("require that termwise blueprint helper calculates unpack info correctly") { +TEST(TermwiseEvalTest, require_that_termwise_blueprint_helper_calculates_unpack_info_correctly) +{ OrBlueprint my_or; my_or.addChild(UP(new MyBlueprint({1}, false, 1))); // termwise not allowed my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // termwise not allowed and ranked @@ -641,9 +678,9 @@ TEST("require that termwise blueprint helper calculates unpack info correctly") unpack.add(1); unpack.add(3); TermwiseBlueprintHelper helper(my_or, std::move(dummy_searches), unpack); - EXPECT_EQUAL(helper.get_result().size(), 3u); - EXPECT_EQUAL(helper.get_termwise_children().size(), 2u); - EXPECT_EQUAL(helper.first_termwise, 2u); + EXPECT_EQ(helper.get_result().size(), 3u); + EXPECT_EQ(helper.get_termwise_children().size(), 2u); + EXPECT_EQ(helper.first_termwise, 2u); EXPECT_TRUE(!helper.termwise_unpack.needUnpack(0)); EXPECT_TRUE(helper.termwise_unpack.needUnpack(1)); EXPECT_TRUE(!helper.termwise_unpack.needUnpack(2)); @@ -658,11 +695,12 @@ public: return make_termwise(createIterator(getExpectedDocIds(), strict), strict); } }; -TEST("test terwise adheres to search iterator requirements.") { +TEST(TermwiseEvalTest, test_termwise_adheres_to_search_iterator_requirements) +{ Verifier verifier; verifier.verify(); } //----------------------------------------------------------------------------- -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp index 1054980e4ec..9409b2b26c4 100644 --- a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp @@ -1,5 +1,4 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> #include <vespa/searchlib/queryeval/fake_search.h> #include <vespa/searchlib/queryeval/wand/weak_and_search.h> #include <vespa/searchlib/queryeval/simpleresult.h> @@ -7,7 +6,9 @@ #include <vespa/searchlib/queryeval/test/eagerchild.h> #include <vespa/searchlib/queryeval/test/leafspec.h> #include <vespa/searchlib/queryeval/test/wandspec.h> +#define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/weightedchildrenverifiers.h> +#include <vespa/vespalib/gtest/gtest.h> using namespace search::fef; using namespace search::queryeval; @@ -60,32 +61,39 @@ struct WeightOrder { } // namespace <unnamed> -TEST_F("require that wand prunes bad hits after enough good ones are obtained", SimpleWandFixture) { - EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(5), f.hits); +TEST(WeakAndTest, require_that_wand_prunes_bad_hits_after_enough_good_ones_are_obtained) +{ + SimpleWandFixture f; + EXPECT_EQ(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(5), f.hits); } -TEST_F("require that wand uses subsearches as expected", SimpleWandFixture) { - EXPECT_EQUAL(History() - .seek("WAND", 1).seek("bar", 1).step("bar", 1).step("WAND", 1) - .unpack("WAND", 1).seek("foo", 1).step("foo", 1).unpack("bar", 1).unpack("foo", 1) - .seek("WAND", 2).seek("bar", 2).step("bar", 3).seek("foo", 2).step("foo", 2).step("WAND", 2) - .unpack("WAND", 2).unpack("foo", 2) - .seek("WAND", 3).step("WAND", 3) - .unpack("WAND", 3).seek("foo", 3).step("foo", 3).unpack("bar", 3).unpack("foo", 3) - .seek("WAND", 4).seek("bar", 4).step("bar", 5).seek("foo", 5).step("foo", 5).step("WAND", 5) - .unpack("WAND", 5).unpack("bar", 5).unpack("foo", 5) - .seek("WAND", 6).seek("bar", 6).step("bar", search::endDocId).step("WAND", search::endDocId), - f.spec.getHistory()); +TEST(WeakAndTest, require_that_wand_uses_subsearches_as_expected) +{ + SimpleWandFixture f; + EXPECT_EQ(History() + .seek("WAND", 1).seek("bar", 1).step("bar", 1).step("WAND", 1) + .unpack("WAND", 1).seek("foo", 1).step("foo", 1).unpack("bar", 1).unpack("foo", 1) + .seek("WAND", 2).seek("bar", 2).step("bar", 3).seek("foo", 2).step("foo", 2).step("WAND", 2) + .unpack("WAND", 2).unpack("foo", 2) + .seek("WAND", 3).step("WAND", 3) + .unpack("WAND", 3).seek("foo", 3).step("foo", 3).unpack("bar", 3).unpack("foo", 3) + .seek("WAND", 4).seek("bar", 4).step("bar", 5).seek("foo", 5).step("foo", 5).step("WAND", 5) + .unpack("WAND", 5).unpack("bar", 5).unpack("foo", 5) + .seek("WAND", 6).seek("bar", 6).step("bar", search::endDocId).step("WAND", search::endDocId), + f.spec.getHistory()); } -TEST_F("require that documents are considered in the right order", AdvancedWandFixture) { - EXPECT_EQUAL(SimpleResult() - .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5) - .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15) - .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), f.hits); +TEST(WeakAndTest, require_that_documents_are_considered_in_the_right_order) +{ + AdvancedWandFixture f; + EXPECT_EQ(SimpleResult() + .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5) + .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15) + .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), f.hits); } -TEST("require that initial docid for subsearches are taken into account") { +TEST(WeakAndTest, require_that_initial_docid_for_subsearches_are_taken_into_account) +{ History history; wand::Terms terms; terms.push_back(wand::Term(new TrackedSearch("foo", history, new EagerChild(search::endDocId)), 100, 1)); @@ -93,10 +101,10 @@ TEST("require that initial docid for subsearches are taken into account") { SearchIterator::UP search(new TrackedSearch("WAND", history, WeakAndSearch::create(terms, 2, true))); SimpleResult hits; hits.search(*search); - EXPECT_EQUAL(SimpleResult().addHit(10), hits); - EXPECT_EQUAL(History().seek("WAND", 1).step("WAND", 10).unpack("WAND", 10).unpack("bar", 10) - .seek("WAND", 11).seek("bar", 11).step("bar", search::endDocId).step("WAND", search::endDocId), - history); + EXPECT_EQ(SimpleResult().addHit(10), hits); + EXPECT_EQ(History().seek("WAND", 1).step("WAND", 10).unpack("WAND", 10).unpack("bar", 10) + .seek("WAND", 11).seek("bar", 11).step("bar", search::endDocId).step("WAND", search::endDocId), + history); } class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier { @@ -111,9 +119,10 @@ private: } }; -TEST("verify search iterator conformance") { +TEST(WeakAndTest, verify_search_iterator_conformance) +{ IteratorChildrenVerifier verifier; verifier.verify(); } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index 5faead1175e..24d62f66714 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -1,6 +1,5 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/queryeval/weighted_set_term_search.h> #include <vespa/searchlib/query/tree/simplequery.h> @@ -11,7 +10,9 @@ #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/queryeval/fake_searchable.h> #include <vespa/searchlib/queryeval/fake_requestcontext.h> +#define ENABLE_GTEST_MIGRATION #include <vespa/searchlib/test/weightedchildrenverifiers.h> +#include <vespa/vespalib/gtest/gtest.h> using namespace search; using namespace search::query; @@ -112,8 +113,9 @@ WS::WS() term_is_not_needed(false) { MatchData::UP tmp = layout.createMatchData(); - ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); + assert(tmp->resolveTermField(handle)->getFieldId() == fieldId); } + WS::~WS() = default; struct MockSearch : public SearchIterator { @@ -143,8 +145,11 @@ struct MockFixture { weights.push_back(1); search = WeightedSetTermSearch::create(children, tfmd, false, weights, {}); } + ~MockFixture(); }; +MockFixture::~MockFixture() = default; + } // namespace <unnamed> void run_simple(bool field_is_filter, bool term_is_not_needed, bool singleTerm) @@ -179,42 +184,50 @@ void run_simple(bool field_is_filter, bool term_is_not_needed, bool singleTerm) EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); - EXPECT_EQUAL(expect, ws.search(index, "field", true)); - EXPECT_EQUAL(expect, ws.search(index, "field", false)); - EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); - EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); + EXPECT_EQ(expect, ws.search(index, "field", true)); + EXPECT_EQ(expect, ws.search(index, "field", false)); + EXPECT_EQ(expect, ws.search(index, "multi-field", true)); + EXPECT_EQ(expect, ws.search(index, "multi-field", false)); } -TEST("testSimple") { - TEST_DO(run_simple(false, false, false)); +TEST(WeightedSetTermTest, test_simple) +{ + run_simple(false, false, false); } -TEST("testSimple filter field") { - TEST_DO(run_simple(true, false, false)); +TEST(WeightedSetTermTest, test_simple_filter_field) +{ + run_simple(true, false, false); } -TEST("testSimple unranked") { - TEST_DO(run_simple(false, true, false)); +TEST(WeightedSetTermTest, test_simple_unranked) +{ + run_simple(false, true, false); } -TEST("testSimple unranked filter filed") { - TEST_DO(run_simple(true, true, false)); +TEST(WeightedSetTermTest, test_simple_unranked_filter_field) +{ + run_simple(true, true, false); } -TEST("testSimple single") { - TEST_DO(run_simple(false, false, true)); +TEST(WeightedSetTermTest, test_simple_single) +{ + run_simple(false, false, true); } -TEST("testSimple single filter field") { - TEST_DO(run_simple(true, false, true)); +TEST(WeightedSetTermTest, test_simple_single_filter_field) +{ + run_simple(true, false, true); } -TEST("testSimple single unranked") { - TEST_DO(run_simple(false, true, true)); +TEST(WeightedSetTermTest, test_simple_single_unranked) +{ + run_simple(false, true, true); } -TEST("testSimple single unranked filter field") { - TEST_DO(run_simple(true, true, true)); +TEST(WeightedSetTermTest, test_simple_single_unranked_filter_field) +{ + run_simple(true, true, true); } void run_multi(bool field_is_filter, bool term_is_not_needed) @@ -240,46 +253,53 @@ void run_multi(bool field_is_filter, bool term_is_not_needed) EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); - EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); - EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); + EXPECT_EQ(expect, ws.search(index, "multi-field", true)); + EXPECT_EQ(expect, ws.search(index, "multi-field", false)); } -TEST("testMulti") { - TEST_DO(run_multi(false, false)); +TEST(WeightedSetTermTest, test_multi) +{ + run_multi(false, false); } -TEST("testMulti filter field") { - TEST_DO(run_multi(true, false)); +TEST(WeightedSetTermTest, test_multi_filter_field) +{ + run_multi(true, false); } -TEST("testMulti unranked") { - TEST_DO(run_multi(false, true)); +TEST(WeightedSetTermTest, test_multi_unranked) +{ + run_multi(false, true); } -TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) { +TEST(WeightedSetTermTest, test_eager_empty_child) +{ + MockFixture f1(search::endDocId); MockSearch *mock = f1.mock; SearchIterator &search = *f1.search; search.initFullRange(); - EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_EQ(search.beginId(), search.getDocId()); EXPECT_TRUE(!search.seek(1)); EXPECT_TRUE(search.isAtEnd()); - EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_EQ(0, mock->seekCnt); } -TEST_F("test Eager Matching Child", MockFixture(5)) { +TEST(WeightedSetTermTest, test_eager_matching_child) +{ + MockFixture f1(5); MockSearch *mock = f1.mock; SearchIterator &search = *f1.search; search.initFullRange(); - EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_EQ(search.beginId(), search.getDocId()); EXPECT_TRUE(!search.seek(3)); - EXPECT_EQUAL(5u, search.getDocId()); - EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_EQ(5u, search.getDocId()); + EXPECT_EQ(0, mock->seekCnt); EXPECT_TRUE(search.seek(5)); - EXPECT_EQUAL(5u, search.getDocId()); - EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_EQ(5u, search.getDocId()); + EXPECT_EQ(0, mock->seekCnt); EXPECT_TRUE(!search.seek(7)); EXPECT_TRUE(search.isAtEnd()); - EXPECT_EQUAL(1, mock->seekCnt); + EXPECT_EQ(1, mock->seekCnt); } class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier { @@ -296,12 +316,14 @@ private: } }; -TEST("verify search iterator conformance with search iterator children") { +TEST(WeightedSetTermTest, verify_search_iterator_conformance_with_search_iterator_children) +{ IteratorChildrenVerifier verifier; verifier.verify(); } -TEST("verify search iterator conformance with document weight iterator children") { +TEST(WeightedSetTermTest, verify_search_iterator_conformance_with_document_weight_iterator_children) +{ WeightIteratorChildrenVerifier verifier; verifier.verify(); } @@ -312,12 +334,12 @@ struct VerifyMatchData { MyBlueprint(VerifyMatchData &vmd_in, FieldSpecBase spec_in) : SimpleLeafBlueprint(spec_in), vmd(vmd_in) {} [[nodiscard]] SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const override { - EXPECT_EQUAL(tfmda.size(), 1u); + EXPECT_EQ(tfmda.size(), 1u); EXPECT_TRUE(tfmda[0] != nullptr); if (vmd.child_tfmd == nullptr) { vmd.child_tfmd = tfmda[0]; } else { - EXPECT_EQUAL(vmd.child_tfmd, tfmda[0]); + EXPECT_EQ(vmd.child_tfmd, tfmda[0]); } ++vmd.child_cnt; return std::make_unique<EmptySearch>(); @@ -333,7 +355,8 @@ struct VerifyMatchData { } }; -TEST("require that children get a common (yet separate) term field match data") { +TEST(WeightedSetTermTest, require_that_children_get_a_common_yet_separate_term_field_match_data) +{ VerifyMatchData vmd; MatchDataLayout layout; auto top_handle = layout.allocTermField(42); @@ -347,9 +370,9 @@ TEST("require that children get a common (yet separate) term field match data") auto match_data = layout.createMatchData(); auto search = blueprint.createSearch(*match_data, true); auto top_tfmd = match_data->resolveTermField(top_handle); - EXPECT_EQUAL(vmd.child_cnt, 5u); + EXPECT_EQ(vmd.child_cnt, 5u); EXPECT_TRUE(vmd.child_tfmd != nullptr); - EXPECT_NOT_EQUAL(top_tfmd, vmd.child_tfmd); + EXPECT_NE(top_tfmd, vmd.child_tfmd); } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/sortspec/multilevelsort_test.cpp b/searchlib/src/tests/sortspec/multilevelsort_test.cpp index 2d0456e13fc..f3bf363645e 100644 --- a/searchlib/src/tests/sortspec/multilevelsort_test.cpp +++ b/searchlib/src/tests/sortspec/multilevelsort_test.cpp @@ -8,7 +8,6 @@ #include <vespa/searchlib/attribute/attributemanager.h> #include <vespa/searchlib/uca/ucaconverter.h> #include <vespa/searchcommon/attribute/config.h> -#include <vespa/vespalib/util/testclock.h> #include <vespa/vespalib/testkit/testapp.h> #include <type_traits> #include <cinttypes> @@ -242,10 +241,8 @@ MultilevelSortTest::sortAndCheck(const std::vector<Spec> &specs, uint32_t num, hits.emplace_back(i, getRandomValue<uint32_t>()); } - vespalib::TestClock clock; - vespalib::Doom doom(clock.clock(), vespalib::steady_time::max()); search::uca::UcaConverterFactory ucaFactory; - FastS_SortSpec sorter("no-metastore", 7, doom, ucaFactory); + FastS_SortSpec sorter("no-metastore", 7, vespalib::Doom::never(), ucaFactory); // init sorter with sort data for (const auto & spec : specs) { AttributeGuard ag; @@ -384,10 +381,8 @@ TEST("require that all sort methods behave the same") } TEST("test that [docid] translates to [lid][paritionid]") { - vespalib::TestClock clock; - vespalib::Doom doom(clock.clock(), vespalib::steady_time::max()); search::uca::UcaConverterFactory ucaFactory; - FastS_SortSpec asc("no-metastore", 7, doom, ucaFactory); + FastS_SortSpec asc("no-metastore", 7, vespalib::Doom::never(), ucaFactory); RankedHit hits[2] = {RankedHit(91, 0.0), RankedHit(3, 2.0)}; search::AttributeManager mgr; search::AttributeContext ac(mgr); @@ -404,7 +399,7 @@ TEST("test that [docid] translates to [lid][paritionid]") { EXPECT_EQUAL(6u, sr2.second); EXPECT_EQUAL(0, memcmp(SECOND_ASC, sr2.first, 6)); - FastS_SortSpec desc("no-metastore", 7, doom, ucaFactory); + FastS_SortSpec desc("no-metastore", 7, vespalib::Doom::never(), ucaFactory); desc.Init("-[docid]", ac); desc.initWithoutSorting(hits, 2); sr1 = desc.getSortRef(0); @@ -416,10 +411,8 @@ TEST("test that [docid] translates to [lid][paritionid]") { } TEST("test that [docid] uses attribute when one exists") { - vespalib::TestClock clock; - vespalib::Doom doom(clock.clock(), vespalib::steady_time::max()); search::uca::UcaConverterFactory ucaFactory; - FastS_SortSpec asc("metastore", 7, doom, ucaFactory); + FastS_SortSpec asc("metastore", 7, vespalib::Doom::never(), ucaFactory); RankedHit hits[2] = {RankedHit(91, 0.0), RankedHit(3, 2.0)}; Config cfg(BasicType::INT64, CollectionType::SINGLE); auto metastore = AttributeFactory::createAttribute("metastore", cfg); @@ -445,7 +438,7 @@ TEST("test that [docid] uses attribute when one exists") { EXPECT_EQUAL(8u, sr2.second); EXPECT_EQUAL(0, memcmp(SECOND_ASC, sr2.first, 8)); - FastS_SortSpec desc("metastore", 7, doom, ucaFactory); + FastS_SortSpec desc("metastore", 7, vespalib::Doom::never(), ucaFactory); desc.Init("-[docid]", ac); desc.initWithoutSorting(hits, 2); sr1 = desc.getSortRef(0); diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 74d4600a079..da58dd749ba 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -106,7 +106,7 @@ public: .set(7, {3, 5}).set(8, {0, 3}).set(9, {4, 5}); } - ~HnswIndexTest() {} + ~HnswIndexTest() override {} auto dff() { return search::tensor::make_distance_function_factory( @@ -135,7 +135,7 @@ public: gen_handler.incGeneration(); index->reclaim_memory(gen_handler.get_oldest_used_generation()); } - void set_filter(std::vector<uint32_t> docids) { + void set_filter(const std::vector<uint32_t>& docids) { uint32_t sz = 10; global_filter = GlobalFilter::create(docids, sz); } @@ -168,7 +168,7 @@ public: ASSERT_EQ(exp_levels.size(), act_node.size()); EXPECT_EQ(exp_levels, act_node.levels()); } - void expect_top_3_by_docid(const vespalib::string& label, std::vector<float> qv, std::vector<uint32_t> exp) { + void expect_top_3_by_docid(const vespalib::string& label, std::vector<float> qv, const std::vector<uint32_t>& exp) { SCOPED_TRACE(label); uint32_t k = 3; uint32_t explore_k = 100; @@ -794,7 +794,7 @@ class MyGlobalFilter : public GlobalFilter { std::shared_ptr<GlobalFilter> _filter; mutable uint32_t _max_docid; public: - MyGlobalFilter(std::shared_ptr<GlobalFilter> filter) noexcept + explicit MyGlobalFilter(std::shared_ptr<GlobalFilter> filter) noexcept : _filter(std::move(filter)), _max_docid(0) { @@ -845,7 +845,7 @@ TEST_F(HnswMultiIndexTest, duplicate_docid_is_removed) global_filter = filter; this->expect_top_3_by_docid("{2,2}", {2, 2}, {1, 2}); EXPECT_EQ(2, filter->max_docid()); -}; +} TEST_F(HnswMultiIndexTest, docid_with_empty_tensor_can_be_removed) { @@ -904,10 +904,10 @@ TEST(LevelGeneratorTest, gives_various_levels) } hist[l]++; } - for (uint32_t l = 0; l < hist.size(); ++l) { + for (unsigned int l : hist) { double expected = left * 0.75; - EXPECT_TRUE(hist[l] < expected*1.01 + 100); - EXPECT_TRUE(hist[l] > expected*0.99 - 100); + EXPECT_TRUE(l < expected*1.01 + 100); + EXPECT_TRUE(l > expected*0.99 - 100); left *= 0.25; } EXPECT_TRUE(hist.size() < 14); diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index f80e8dbe7be..96bd07bc5a3 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -43,7 +43,6 @@ vespa_add_library(searchlib_attribute OBJECT direct_multi_term_blueprint.cpp distance_metric_utils.cpp diversity.cpp - document_weight_or_filter_search.cpp empty_search_context.cpp enum_store_compaction_spec.cpp enum_store_dictionary.cpp @@ -84,6 +83,7 @@ vespa_add_library(searchlib_attribute OBJECT multi_numeric_search_context.cpp multi_string_enum_hint_search_context.cpp multi_string_enum_search_context.cpp + multi_term_or_filter_search.cpp multi_value_mapping.cpp multi_value_mapping_base.cpp multienumattribute.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 3a5f79ef665..5d689f5bd81 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -5,9 +5,10 @@ #include "attribute_object_visitor.h" #include "attribute_weighted_set_blueprint.h" #include "direct_multi_term_blueprint.h" -#include "document_weight_or_filter_search.h" -#include "i_direct_posting_store.h" -#include "posting_iterator_pack.h" +#include "i_docid_posting_store.h" +#include "i_docid_with_weight_posting_store.h" +#include "in_term_search.h" +#include "multi_term_or_filter_search.h" #include "predicate_attribute.h" #include <vespa/eval/eval/value.h> #include <vespa/searchlib/common/location.h> @@ -18,7 +19,7 @@ #include <vespa/searchlib/query/tree/stackdumpcreator.h> #include <vespa/searchlib/queryeval/andsearchstrict.h> #include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/dot_product_blueprint.h> #include <vespa/searchlib/queryeval/dot_product_search.h> #include <vespa/searchlib/queryeval/emptysearch.h> @@ -468,7 +469,7 @@ DirectWandBlueprint::createFilterSearch(bool, FilterConstraint constraint) const for (const IDirectPostingStore::LookupResult &r : _terms) { _attr.create(r.posting_idx, iterators); } - return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); + return attribute::MultiTermOrFilterSearch::create(std::move(iterators)); } else { return std::make_unique<queryeval::EmptySearch>(); } @@ -528,7 +529,7 @@ public: } } if (_attr.has_btree_iterator(_dict_entry.posting_idx)) { - return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); + return std::make_unique<queryeval::DocidWithWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); } else { return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict); } @@ -574,12 +575,13 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper private: const FieldSpec &_field; const IAttributeVector &_attr; - const IDocidWithWeightPostingStore *_dww; + const IDocidPostingStore *_dps; + const IDocidWithWeightPostingStore *_dwwps; vespalib::string _scratchPad; bool use_docid_with_weight_posting_store() const { // TODO: Relax requirement on always having weight iterator for query operators where that makes sense. - return (_dww != nullptr) && (_dww->has_always_btree_iterator()); + return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator()); } public: @@ -588,7 +590,8 @@ public: : CreateBlueprintVisitorHelper(searchable, field, requestContext), _field(field), _attr(attr), - _dww(attr.as_docid_with_weight_posting_store()), + _dps(attr.as_docid_posting_store()), + _dwwps(attr.as_docid_with_weight_posting_store()), _scratchPad() { } @@ -598,7 +601,7 @@ public: void visitSimpleTerm(TermNode &n) { if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) { NodeAsKey key(n, _scratchPad); - setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dww, key)); + setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dwwps, key)); } else { visitTerm(n); } @@ -663,8 +666,8 @@ public: void visit(PredicateQuery &n) override { visitPredicate(n); } void visit(RegExpTerm & n) override { visitTerm(n); } - template <typename WS> - void createDirectWeightedSet(WS *bp, MultiTerm &n); + template <typename BlueprintType> + void createDirectMultiTerm(BlueprintType *bp, MultiTerm &n); template <typename WS> void createShallowWeightedSet(WS *bp, MultiTerm &n, const FieldSpec &fs, bool isInteger); @@ -677,8 +680,7 @@ public: return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD); } - template <typename Node> - void create_weighted_set_or_in(Node &n) { + void visit(query::WeightedSetTerm &n) override { bool isSingleValue = !_attr.hasMultiValue(); bool isString = (_attr.isStringType() && _attr.hasEnum()); bool isInteger = _attr.isIntegerType(); @@ -693,8 +695,8 @@ public: } else { if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch> - (_field, _attr, *_dww, n.getNumTerms()); - createDirectWeightedSet(bp, n); + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { auto *bp = new WeightedSetTermBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); @@ -702,15 +704,11 @@ public: } } - void visit(query::WeightedSetTerm &n) override { - create_weighted_set_or_in(n); - } - void visit(query::DotProduct &n) override { if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch> - (_field, _attr, *_dww, n.getNumTerms()); - createDirectWeightedSet(bp, n); + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { auto *bp = new DotProductBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); @@ -719,10 +717,10 @@ public: void visit(query::WandTerm &n) override { if (use_docid_with_weight_posting_store()) { - auto *bp = new DirectWandBlueprint(_field, *_dww, + auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), n.getNumTerms()); - createDirectWeightedSet(bp, n); + createDirectMultiTerm(bp, n); } else { auto *bp = new ParallelWeakAndBlueprint(_field, n.getTargetNumHits(), @@ -733,7 +731,18 @@ public: } void visit(query::InTerm &n) override { - create_weighted_set_or_in(n); + if (_dps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, attribute::InTermSearch> + (_field, _attr, *_dps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else if (_dwwps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, attribute::InTermSearch> + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else { + auto* bp = new WeightedSetTermBlueprint(_field); + createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); + } } void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) { @@ -768,9 +777,9 @@ public: void visit(query::FuzzyTerm &n) override { visitTerm(n); } }; -template <typename WS> +template <typename BlueprintType> void -CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, MultiTerm &n) { +CreateBlueprintVisitor::createDirectMultiTerm(BlueprintType *bp, MultiTerm &n) { Blueprint::UP result(bp); Blueprint::HitEstimate estimate; for (uint32_t i(0); i < n.getNumTerms(); i++) { diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp index 12ae226895e..d7f9cd84d8d 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp @@ -4,12 +4,15 @@ #include "direct_multi_term_blueprint.hpp" #include "i_docid_posting_store.h" #include "i_docid_with_weight_posting_store.h" +#include "in_term_search.h" #include <vespa/searchlib/queryeval/dot_product_search.h> #include <vespa/searchlib/queryeval/weighted_set_term_search.h> namespace search::attribute { +template class DirectMultiTermBlueprint<IDocidPostingStore, InTermSearch>; template class DirectMultiTermBlueprint<IDocidPostingStore, queryeval::WeightedSetTermSearch>; +template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, InTermSearch>; template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>; template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>; diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h index 668034ecd3d..066b70481dc 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h @@ -18,7 +18,7 @@ namespace search::attribute { /** * Blueprint used for multi-term query operators as InTerm, WeightedSetTerm or DotProduct - * over a multi-value attribute which supports the IDocidWithWeightPostingStore interface. + * over an attribute which supports the IDocidPostingStore or IDocidWithWeightPostingStore interface. * * This uses access to low-level posting lists, which speeds up query execution. */ @@ -44,7 +44,9 @@ private: std::vector<std::unique_ptr<queryeval::SearchIterator>>&& bitvectors, bool strict) const; - std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const; + template <bool filter_search, bool need_match_data> + std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda, + bool strict) const; public: DirectMultiTermBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const PostingStoreType &attr, size_t size_hint); diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp index 5ca943a356d..f195e97fee0 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp @@ -3,7 +3,7 @@ #pragma once #include "direct_multi_term_blueprint.h" -#include "document_weight_or_filter_search.h" +#include "multi_term_or_filter_search.h" #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/queryeval/filter_wrapper.h> @@ -88,8 +88,10 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::combine_iterators(std::u } template <typename PostingStoreType, typename SearchType> +template <bool filter_search, bool need_match_data> std::unique_ptr<queryeval::SearchIterator> -DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const +DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda, + bool strict) const { if (_terms.empty()) { return std::make_unique<queryeval::EmptySearch>(); @@ -98,24 +100,30 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(con std::vector<queryeval::SearchIterator::UP> bitvectors; const size_t num_children = _terms.size(); btree_iterators.reserve(num_children); - bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_btree_iterator(); - auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict); - if (is_filter_search) { - auto filter = !btree_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(btree_iterators)) : std::unique_ptr<SearchIterator>(); + auto& tfmd = *tfmda[0]; + bool use_bit_vector_when_available = filter_search || !_attr.has_always_btree_iterator(); + auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, tfmd, strict); + if constexpr (filter_search || (!need_match_data && !SearchType::require_btree_iterators)) { + auto filter = !btree_iterators.empty() ? + (need_match_data ? + attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators), tfmd) : + attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators))) : + std::unique_ptr<SearchIterator>(); return combine_iterators(std::move(filter), std::move(bitvectors), strict); } bool field_is_filter = getState().fields()[0].isFilter(); - if constexpr (std::is_same_v<SearchType, queryeval::WeightedSetTermSearch>) { + if constexpr (!filter_search && !SearchType::require_btree_iterators) { auto multi_term = !btree_iterators.empty() ? - SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(btree_iterators)) + SearchType::create(tfmd, field_is_filter, std::move(weights), std::move(btree_iterators)) : std::unique_ptr<SearchIterator>(); return combine_iterators(std::move(multi_term), std::move(bitvectors), strict); - } else { - // In this case we should only have weight iterators. + } else if constexpr (SearchType::require_btree_iterators) { + // In this case we should only have btree iterators. assert(btree_iterators.size() == _terms.size()); assert(weights.index() == 0); - return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators)); + return SearchType::create(tfmd, field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators)); } + return std::make_unique<queryeval::EmptySearch>(); } template <typename PostingStoreType, typename SearchType> @@ -124,9 +132,12 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createLeafSearch(const f { assert(tfmda.size() == 1); assert(getState().numFields() == 1); - bool field_is_filter = getState().fields()[0].isFilter(); - bool is_filter_search = field_is_filter && tfmda[0]->isNotNeeded(); - return create_search_helper(tfmda, strict, is_filter_search); + bool need_match_data = !tfmda[0]->isNotNeeded(); + if (need_match_data) { + return create_search_helper<SearchType::filter_search, true>(tfmda, strict); + } else { + return create_search_helper<SearchType::filter_search, false>(tfmda, strict); + } } template <typename PostingStoreType, typename SearchType> @@ -135,7 +146,7 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createFilterSearch(bool { assert(getState().numFields() == 1); auto wrapper = std::make_unique<FilterWrapper>(getState().numFields()); - wrapper->wrap(create_search_helper(wrapper->tfmda(), strict, true)); + wrapper->wrap(create_search_helper<true, false>(wrapper->tfmda(), strict)); return wrapper; } diff --git a/searchlib/src/vespa/searchlib/attribute/in_term_search.h b/searchlib/src/vespa/searchlib/attribute/in_term_search.h new file mode 100644 index 00000000000..36776499e51 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/in_term_search.h @@ -0,0 +1,15 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search::attribute { + +/** + * Class used as template argument in DirectMultiTermBlueprint to configure it for the InTerm query operator. + */ +struct InTermSearch { + static constexpr bool filter_search = true; + static constexpr bool require_btree_iterators = false; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp index b910e64b665..a4cc7405bbf 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp @@ -1,6 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "document_weight_or_filter_search.h" +#include "multi_term_or_filter_search.h" #include "posting_iterator_pack.h" #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/queryeval/iterator_pack.h> @@ -12,21 +12,32 @@ using search::queryeval::SearchIteratorPack; namespace search::attribute { template<typename IteratorPack> -class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch +class MultiTermOrFilterSearchImpl : public MultiTermOrFilterSearch { + // Vector that caches the docids of the current positions of the iterators in the pack. + // This reduces cache misses in doSeek() and seek_all(). + std::vector<uint32_t> _docids; IteratorPack _children; + fef::TermFieldMatchData* _tfmd; void seek_all(uint32_t docId); public: - explicit DocumentWeightOrFilterSearchImpl(IteratorPack&& children); - ~DocumentWeightOrFilterSearchImpl() override; + explicit MultiTermOrFilterSearchImpl(IteratorPack&& children, fef::TermFieldMatchData* tfmd); + ~MultiTermOrFilterSearchImpl() override; void doSeek(uint32_t docId) override; - void doUnpack(uint32_t) override { } + void doUnpack(uint32_t docid) override { + if (_tfmd != nullptr) { + _tfmd->resetOnlyDocId(docid); + } + } void initRange(uint32_t begin, uint32_t end) override { SearchIterator::initRange(begin, end); _children.initRange(begin, end); + for (uint16_t i = 0; i < _children.size(); ++i) { + _docids[i] = _children.get_docid(i); + } } void or_hits_into(BitVector &result, uint32_t begin_id) override { @@ -46,35 +57,39 @@ public: }; template<typename IteratorPack> -DocumentWeightOrFilterSearchImpl<IteratorPack>::DocumentWeightOrFilterSearchImpl(IteratorPack&& children) - : DocumentWeightOrFilterSearch(), - _children(std::move(children)) +MultiTermOrFilterSearchImpl<IteratorPack>::MultiTermOrFilterSearchImpl(IteratorPack&& children, fef::TermFieldMatchData* tfmd) + : MultiTermOrFilterSearch(), + _docids(children.size(), 0), + _children(std::move(children)), + _tfmd(tfmd) { } template<typename IteratorPack> -DocumentWeightOrFilterSearchImpl<IteratorPack>::~DocumentWeightOrFilterSearchImpl() = default; +MultiTermOrFilterSearchImpl<IteratorPack>::~MultiTermOrFilterSearchImpl() = default; template<typename IteratorPack> void -DocumentWeightOrFilterSearchImpl<IteratorPack>::seek_all(uint32_t docId) { +MultiTermOrFilterSearchImpl<IteratorPack>::seek_all(uint32_t docId) { for (uint16_t i = 0; i < _children.size(); ++i) { - uint32_t next = _children.get_docid(i); + uint32_t next = _docids[i]; if (next < docId) { - _children.seek(i, docId); + next = _children.seek(i, docId); + _docids[i] = next; } } } template<typename IteratorPack> void -DocumentWeightOrFilterSearchImpl<IteratorPack>::doSeek(uint32_t docId) +MultiTermOrFilterSearchImpl<IteratorPack>::doSeek(uint32_t docId) { uint32_t min_doc_id = endDocId; for (uint16_t i = 0; i < _children.size(); ++i) { - uint32_t next = _children.get_docid(i); + uint32_t next = _docids[i]; if (next < docId) { next = _children.seek(i, docId); + _docids[i] = next; } if (next == docId) { setDocId(next); @@ -89,41 +104,53 @@ namespace { template <typename IteratorType, typename IteratorPackType> std::unique_ptr<queryeval::SearchIterator> -create_helper(std::vector<IteratorType>&& children) +create_helper(std::vector<IteratorType>&& children, fef::TermFieldMatchData* tfmd) { if (children.empty()) { return std::make_unique<queryeval::EmptySearch>(); } else { std::sort(children.begin(), children.end(), [](const auto & a, const auto & b) { return a.size() > b.size(); }); - using OrFilter = DocumentWeightOrFilterSearchImpl<IteratorPackType>; - return std::make_unique<OrFilter>(IteratorPackType(std::move(children))); + using OrFilter = MultiTermOrFilterSearchImpl<IteratorPackType>; + return std::make_unique<OrFilter>(IteratorPackType(std::move(children)), tfmd); } } } std::unique_ptr<queryeval::SearchIterator> -DocumentWeightOrFilterSearch::create(std::vector<DocidIterator>&& children) +MultiTermOrFilterSearch::create(std::vector<DocidIterator>&& children) +{ + return create_helper<DocidIterator, DocidIteratorPack>(std::move(children), nullptr); +} + +std::unique_ptr<queryeval::SearchIterator> +MultiTermOrFilterSearch::create(std::vector<DocidIterator>&& children, fef::TermFieldMatchData& tfmd) +{ + return create_helper<DocidIterator, DocidIteratorPack>(std::move(children), &tfmd); +} + +std::unique_ptr<queryeval::SearchIterator> +MultiTermOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children) { - return create_helper<DocidIterator, DocidIteratorPack>(std::move(children)); + return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children), nullptr); } std::unique_ptr<queryeval::SearchIterator> -DocumentWeightOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children) +MultiTermOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children, fef::TermFieldMatchData& tfmd) { - return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children)); + return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children), &tfmd); } std::unique_ptr<queryeval::SearchIterator> -DocumentWeightOrFilterSearch::create(const std::vector<SearchIterator *>& children, +MultiTermOrFilterSearch::create(const std::vector<SearchIterator *>& children, std::unique_ptr<fef::MatchData> md) { if (children.empty()) { return std::make_unique<queryeval::EmptySearch>(); } else { - using OrFilter = DocumentWeightOrFilterSearchImpl<SearchIteratorPack>; - return std::make_unique<OrFilter>(SearchIteratorPack(children, std::move(md))); + using OrFilter = MultiTermOrFilterSearchImpl<SearchIteratorPack>; + return std::make_unique<OrFilter>(SearchIteratorPack(children, std::move(md)), nullptr); } } diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h index 5ed0dd16d83..1e8227c3007 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h @@ -7,16 +7,20 @@ namespace search::fef { class MatchData; } namespace search::attribute { /** - * Filter iterator on top of document weight iterators with OR semantics used during - * calculation of global filter for weighted set terms, wand terms and dot product terms. + * Filter iterator on top of low-level posting list iterators or regular search iterators with OR semantics. + * + * Used during calculation of global filter for InTerm, WeightedSetTerm, DotProduct and WandTerm, + * or when ranking is not needed for InTerm and WeightedSetTerm. */ -class DocumentWeightOrFilterSearch : public queryeval::SearchIterator +class MultiTermOrFilterSearch : public queryeval::SearchIterator { protected: - DocumentWeightOrFilterSearch() = default; + MultiTermOrFilterSearch() = default; public: static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children); + static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children, fef::TermFieldMatchData& tfmd); static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children); + static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children, fef::TermFieldMatchData& tfmd); static std::unique_ptr<SearchIterator> create(const std::vector<SearchIterator *>& children, std::unique_ptr<fef::MatchData> md); }; diff --git a/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h b/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h index 93612fde893..7f1c3e31367 100644 --- a/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h @@ -24,7 +24,8 @@ private: int _limit; size_t _max_per_group; public: - NumericRangeMatcher(const QueryTermSimple& queryTerm, bool avoidUndefinedInRange=false); + NumericRangeMatcher(const QueryTermSimple& queryTerm) : NumericRangeMatcher(queryTerm, false) {} + NumericRangeMatcher(const QueryTermSimple& queryTerm, bool avoidUndefinedInRange); protected: Int64Range getRange() const { return {static_cast<int64_t>(_low), static_cast<int64_t>(_high)}; @@ -41,8 +42,8 @@ protected: search::Range<BaseType> cappedRange(bool isFloat) { - BaseType low = static_cast<BaseType>(_low); - BaseType high = static_cast<BaseType>(_high); + auto low = static_cast<BaseType>(_low); + auto high = static_cast<BaseType>(_high); BaseType numMin = std::numeric_limits<BaseType>::min(); BaseType numMax = std::numeric_limits<BaseType>::max(); diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index 0b75e8c2c0d..3e0794835ae 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -57,6 +57,13 @@ protected: ~PostingListSearchContext() override; + double avg_values_per_document() const noexcept { + return static_cast<double>(_numValues) / static_cast<double>(_docIdLimit); + } + double avg_postinglist_size() const noexcept { + return static_cast<double>(_numValues) / _dictSize; + } + void lookupTerm(const vespalib::datastore::EntryComparator &comp); void lookupRange(const vespalib::datastore::EntryComparator &low, const vespalib::datastore::EntryComparator &high); void lookupSingle(); @@ -72,8 +79,6 @@ protected: * by looking at the posting lists in the range [lower, upper>. */ virtual size_t calc_estimated_hits_in_range() const = 0; - virtual void fillArray() = 0; - virtual void fillBitVector(vespalib::ThreadBundle & thread_bundle) = 0; }; @@ -88,6 +93,7 @@ protected: using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; using EntryRef = vespalib::datastore::EntryRef; using FrozenView = typename PostingStore::BTreeType::FrozenView; + using ExecuteInfo = queryeval::ExecuteInfo; const PostingStore& _posting_store; /* @@ -101,10 +107,10 @@ protected: ~PostingListSearchContextT() override; void lookupSingle(); - void fillArray() override; - void fillBitVector(vespalib::ThreadBundle & thread_bundle) override; + virtual void fillArray(); + virtual void fillBitVector(const ExecuteInfo &); - void fetchPostings(const queryeval::ExecuteInfo & strict) override; + void fetchPostings(const ExecuteInfo & strict) override; // this will be called instead of the fetchPostings function in some cases void diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr, size_t max_per_group, size_t cutoff_groups, bool cutoff_strict); @@ -131,6 +137,7 @@ protected: using DictionaryConstIterator = Dictionary::ConstIterator; using EntryRef = vespalib::datastore::EntryRef; using PostingStore = typename Parent::PostingStore; + using ExecuteInfo = queryeval::ExecuteInfo; using Parent::_docIdLimit; using Parent::_lowerDictItr; using Parent::_merger; @@ -154,7 +161,7 @@ protected: template <bool fill_array> void fill_array_or_bitvector(); void fillArray() override; - void fillBitVector(vespalib::ThreadBundle & thread_bundle) override; + void fillBitVector(const ExecuteInfo &) override; }; @@ -177,6 +184,7 @@ class StringPostingSearchContext : public PostingSearchContext<BaseSC, PostingListFoldedSearchContextT<DataT>, AttrT> { private: + using ExecuteInfo = queryeval::ExecuteInfo; using Parent = PostingSearchContext<BaseSC, PostingListFoldedSearchContextT<DataT>, AttrT>; using RegexpUtil = vespalib::RegexpUtil; using Parent::_enumStore; @@ -186,7 +194,7 @@ private: bool use_single_dictionary_entry(PostingListSearchContext::DictionaryConstIterator it) const { return use_dictionary_entry(it); } - bool use_posting_lists_when_non_strict(const queryeval::ExecuteInfo& info) const override; + bool use_posting_lists_when_non_strict(const ExecuteInfo& info) const override; public: StringPostingSearchContext(BaseSC&& base_sc, bool useBitVector, const AttrT &toBeSearched); }; @@ -196,6 +204,7 @@ class NumericPostingSearchContext : public PostingSearchContext<BaseSC, PostingListSearchContextT<DataT>, AttrT> { private: + using ExecuteInfo = queryeval::ExecuteInfo; using Parent = PostingSearchContext<BaseSC, PostingListSearchContextT<DataT>, AttrT>; using BaseType = typename AttrT::T; using Params = attribute::SearchContextParams; @@ -215,7 +224,7 @@ private: ? limit : estimate; } - void fetchPostings(const queryeval::ExecuteInfo & execInfo) override { + void fetchPostings(const ExecuteInfo & execInfo) override { if (params().diversityAttribute() != nullptr) { bool forward = (this->getRangeLimit() > 0); size_t wanted_hits = std::abs(this->getRangeLimit()); @@ -227,7 +236,7 @@ private: } } - bool use_posting_lists_when_non_strict(const queryeval::ExecuteInfo& info) const override; + bool use_posting_lists_when_non_strict(const ExecuteInfo& info) const override; size_t calc_estimated_hits_in_range() const override; public: @@ -343,14 +352,11 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::use_posting_lists_when_non_st // Based on this we see that LMC = 5 * PLMC. // The same relationship is found with the test case range_hits_ratio=[200]. - if ( ! info.create_postinglist_when_non_strict()) return false; - - constexpr float lookup_match_constant = 5.0; - constexpr float posting_list_merge_constant = 1.0; + constexpr double lookup_match_constant = 5.0; + constexpr double posting_list_merge_constant = 1.0; uint32_t exp_doc_hits = this->_docIdLimit * info.hit_rate(); - float avg_values_per_document = static_cast<float>(this->_numValues) / static_cast<float>(this->_docIdLimit); - float lookup_match_cost = exp_doc_hits * avg_values_per_document * lookup_match_constant; + float lookup_match_cost = exp_doc_hits * this->avg_values_per_document() * lookup_match_constant; float posting_list_cost = this->estimated_hits_in_range() * posting_list_merge_constant; return posting_list_cost < lookup_match_cost; } @@ -362,14 +368,25 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::calc_estimated_hits_in_range( size_t exact_sum = 0; size_t estimated_sum = 0; - auto it = this->_lowerDictItr; - for (uint32_t count = 0; (it != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it, ++count) { - exact_sum += this->_posting_store.frozenSize(it.getData().load_acquire()); + // Sample lower range + auto it_forward = this->_lowerDictItr; + for (uint32_t count = 0; (it_forward != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it_forward, ++count) { + exact_sum += this->_posting_store.frozenSize(it_forward.getData().load_acquire()); } - if (it != this->_upperDictItr) { - uint32_t remaining_posting_lists = this->_upperDictItr - it; - float hits_per_posting_list = static_cast<float>(exact_sum) / static_cast<float>(this->max_posting_lists_to_count); - estimated_sum = remaining_posting_lists * hits_per_posting_list; + if (it_forward != this->_upperDictItr) { + //Sample upper range + auto it_backward = this->_upperDictItr; + for (uint32_t count = 0; (it_backward != it_forward) && (count < this->max_posting_lists_to_count);++count) { + --it_backward; + exact_sum += this->_posting_store.frozenSize(it_backward.getData().load_acquire()); + } + if (it_forward != it_backward) { + // Estimate the rest + uint32_t remaining_posting_lists = it_backward - it_forward; + double measured_hits_per_posting_list = static_cast<double>(exact_sum) / (this->max_posting_lists_to_count * 2); + // Let measure and global rate count equally, to reduce the effect of outlayers. + estimated_sum = remaining_posting_lists * (measured_hits_per_posting_list + this->avg_postinglist_size())/2; + } } return exact_sum + estimated_sum; } diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index 65ed15a866f..f937d567588 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -68,11 +68,14 @@ PostingListSearchContextT<DataT>::fillArray() template <typename DataT> struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable { - FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, uint32_t limit) - : FillPart(posting_store, from, count, nullptr, limit) + FillPart(const vespalib::Doom & doom, const PostingStore& posting_store, const DictionaryConstIterator & from, + size_t count, uint32_t limit) + : FillPart(doom, posting_store, from, count, nullptr, limit) { } - FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, BitVector * bv, uint32_t limit) - : _posting_store(posting_store), + FillPart(const vespalib::Doom & doom, const PostingStore& posting_store, const DictionaryConstIterator & from, + size_t count, BitVector * bv, uint32_t limit) + : _doom(doom), + _posting_store(posting_store), _bv(bv), _docIdLimit(limit), _from(from), @@ -86,7 +89,8 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable { _owned_bv = BitVector::create(_docIdLimit); _bv = _owned_bv.get(); } - for (;_from != _to;++_from) { + //TODO Add && !_doom.soft_doom() to loop + for ( ;_from != _to; ++_from) { addToBitVector(PostingListTraverser<PostingStore>(_posting_store, _from.getData().load_acquire())); } } @@ -95,6 +99,7 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable { if (__builtin_expect(key < _docIdLimit, true)) { _bv->setBit(key); } }); } + const vespalib::Doom _doom; const PostingStore &_posting_store; BitVector *_bv; uint32_t _docIdLimit; @@ -105,8 +110,9 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable { template <typename DataT> void -PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_bundle) +PostingListSearchContextT<DataT>::fillBitVector(const ExecuteInfo & exec_info) { + vespalib::ThreadBundle & thread_bundle = exec_info.thread_bundle(); size_t num_iter = _upperDictItr - _lowerDictItr; size_t num_threads = std::min(thread_bundle.size(), num_iter); @@ -115,10 +121,10 @@ PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_ std::vector<FillPart> parts; parts.reserve(num_threads); BitVector * master = _merger.getBitVector(); - parts.emplace_back(_posting_store, _lowerDictItr, per_thread + (rest_docs > 0), master, _merger.getDocIdLimit()); + parts.emplace_back(exec_info.doom(), _posting_store, _lowerDictItr, per_thread + (rest_docs > 0), master, _merger.getDocIdLimit()); for (size_t i(1); i < num_threads; i++) { size_t num_this_thread = per_thread + (i < rest_docs); - parts.emplace_back(_posting_store, parts[i-1]._to, num_this_thread, _merger.getDocIdLimit()); + parts.emplace_back(exec_info.doom(), _posting_store, parts[i-1]._to, num_this_thread, _merger.getDocIdLimit()); } thread_bundle.run(parts); std::vector<BitVector *> vectors; @@ -131,7 +137,7 @@ PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_ template <typename DataT> void -PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & execInfo) +PostingListSearchContextT<DataT>::fetchPostings(const ExecuteInfo & exec_info) { // The following constant is derived after running parts of // the range search performance test with 10M documents on an Apple M1 Pro with 32 GB memory. @@ -162,7 +168,7 @@ PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & e // The threshold for when to use array merging is therefore 0.0025 (0.08 / 32). constexpr float threshold_for_using_array = 0.0025; if (!_merger.merge_done() && _uniqueValues >= 2u && this->_dictionary.get_has_btree_dictionary()) { - if (execInfo.is_strict() || use_posting_lists_when_non_strict(execInfo)) { + if (exec_info.is_strict() || use_posting_lists_when_non_strict(exec_info)) { size_t sum = estimated_hits_in_range(); //TODO Honour soft_doom and forward it to merge code if (sum < (_docIdLimit * threshold_for_using_array)) { @@ -170,7 +176,7 @@ PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & e fillArray(); } else { _merger.allocBitVector(); - fillBitVector(execInfo.thread_bundle()); + fillBitVector(exec_info); } _merger.merge(); } @@ -223,7 +229,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) } const BitVector *bv(_merger.getBitVector()); assert(bv != nullptr); - return search::BitVectorIterator::create(bv, bv->size(), *matchData, strict); + return BitVectorIterator::create(bv, bv->size(), *matchData, strict); } if (_uniqueValues == 1) { if (_bv != nullptr && (!_pidx.valid() || _useBitVector || matchData->isNotNeeded())) { @@ -421,9 +427,9 @@ PostingListFoldedSearchContextT<DataT>::fillArray() template <typename DataT> void -PostingListFoldedSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_bundle) +PostingListFoldedSearchContextT<DataT>::fillBitVector(const ExecuteInfo & exec_info) { - (void) thread_bundle; + (void) exec_info; fill_array_or_bitvector<false>(); } @@ -487,7 +493,7 @@ StringPostingSearchContext<BaseSC, AttrT, DataT>::use_dictionary_entry(PostingLi template <typename BaseSC, typename AttrT, typename DataT> bool -StringPostingSearchContext<BaseSC, AttrT, DataT>::use_posting_lists_when_non_strict(const queryeval::ExecuteInfo& info) const +StringPostingSearchContext<BaseSC, AttrT, DataT>::use_posting_lists_when_non_strict(const ExecuteInfo& info) const { if (this->isFuzzy()) { uint32_t exp_doc_hits = this->_docIdLimit * info.hit_rate(); diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h index 482dc90f6cd..749fa48565b 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h @@ -52,16 +52,16 @@ private: using ValueModifier = typename B::BaseClass::ValueModifier; using generation_t = typename SingleValueNumericEnumAttribute<B>::generation_t; - using DirectPostingStoreAdapterType = attribute::NumericDirectPostingStoreAdapter<IDocidPostingStore, - PostingStore, EnumStore>; - DirectPostingStoreAdapterType _posting_store_adapter; - using PostingParent::_posting_store; using PostingParent::clearAllPostings; using PostingParent::handle_load_posting_lists; using PostingParent::handle_load_posting_lists_and_update_enum_store; using PostingParent::forwardedOnAddDoc; + using DirectPostingStoreAdapterType = attribute::NumericDirectPostingStoreAdapter<IDocidPostingStore, + PostingStore, EnumStore>; + DirectPostingStoreAdapterType _posting_store_adapter; + void freezeEnumDictionary() override; void mergeMemoryStats(vespalib::MemoryUsage & total) override; void applyUpdateValueChange(const Change & c, EnumStore & enumStore, diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h index 543cfdd90ec..5a5b599244f 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h @@ -2,8 +2,10 @@ #pragma once -#include "singlestringattribute.h" +#include "i_docid_posting_store.h" #include "postinglistattribute.h" +#include "singlestringattribute.h" +#include "string_direct_posting_store_adapter.h" namespace search { @@ -48,12 +50,17 @@ private: using PostingParent::handle_load_posting_lists; using PostingParent::handle_load_posting_lists_and_update_enum_store; using PostingParent::forwardedOnAddDoc; + public: using PostingStore = typename PostingParent::PostingStore; using Dictionary = EnumPostingTree; using PostingParent::get_posting_store; private: + using DirectPostingStoreAdapterType = attribute::StringDirectPostingStoreAdapter<IDocidPostingStore, + PostingStore, EnumStore>; + DirectPostingStoreAdapterType _posting_store_adapter; + void freezeEnumDictionary() override; void mergeMemoryStats(vespalib::MemoryUsage & total) override; void applyUpdateValueChange(const Change & c, @@ -77,6 +84,8 @@ public: std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; + const IDocidPostingStore* as_docid_posting_store() const override { return &_posting_store_adapter; } + bool onAddDoc(DocId doc) override { return forwardedOnAddDoc(doc, this->_enumIndices.size(), this->_enumIndices.capacity()); } diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index 85b0c095d76..5822ab2c786 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -4,6 +4,7 @@ #include "singlestringpostattribute.h" #include "single_string_enum_search_context.h" +#include "string_direct_posting_store_adapter.hpp" #include <vespa/searchcommon/attribute/config.h> #include <vespa/searchlib/query/query_term_ucs4.h> @@ -13,7 +14,8 @@ template <typename B> SingleValueStringPostingAttributeT<B>::SingleValueStringPostingAttributeT(const vespalib::string & name, const AttributeVector::Config & c) : SingleValueStringAttributeT<B>(name, c), - PostingParent(*this, this->getEnumStore()) + PostingParent(*this, this->getEnumStore()), + _posting_store_adapter(this->get_posting_store(), this->_enumStore, this->getIsFilter()) { } @@ -152,5 +154,5 @@ SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, *this); } -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt index 970937e18ec..3270c170327 100644 --- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt @@ -10,6 +10,7 @@ vespa_add_library(searchlib_common OBJECT documentlocations.cpp documentsummary.cpp fileheadercontext.cpp + fileheadertags.cpp flush_token.cpp geo_gcd.cpp geo_location.cpp diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 898a21d860e..cee5801beb9 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -96,7 +96,7 @@ BitVector::allocatePaddedAndAligned(Index start, Index end, Index capacity, cons { assert(capacity >= end); uint32_t words = numActiveWords(start, capacity); - words += (-words & 15); // Pad to 64 byte alignment + words += (-words & (getAlignment()/sizeof(Word) - 1)); // Pad to required alignment const size_t sz(words * sizeof(Word)); Alloc alloc = (init_alloc != nullptr) ? init_alloc->create(sz) : Alloc::alloc(sz, MMAP_LIMIT); assert(alloc.size()/sizeof(Word) >= words); @@ -445,7 +445,6 @@ void MMappedBitVector::read(Index numberOfElements, FastOS_FileInterface &file, int64_t offset, Index doccount) { - assert((offset & (getAlignment() - 1)) == 0); void *mapptr = file.MemoryMapPtr(offset); assert(mapptr != nullptr); if (mapptr != nullptr) { @@ -478,10 +477,21 @@ operator>>(nbostream &in, AllocatedBitVector &bv) in >> size >> cachedHits >> fileBytes; assert(size <= std::numeric_limits<BitVector::Index>::max()); assert(cachedHits <= size || ! bv.isValidCount(cachedHits)); - if (bv.size() != size) + if (bv.size() != size) { bv.resize(size); - assert(bv.getFileBytes() == fileBytes); - in.read(bv.getStart(), bv.getFileBytes()); + } + size_t expected_file_bytes = bv.getFileBytes(); + size_t read_size = fileBytes; + size_t skip_size = 0; + if (expected_file_bytes < fileBytes) { + read_size = expected_file_bytes; + skip_size = fileBytes - expected_file_bytes; + } + in.read(bv.getStart(), read_size); + if (skip_size != 0) { + std::vector<char> dummy(skip_size); + in.read(dummy.data(), skip_size); + } assert(bv.testBit(size)); bv.setTrueBits(cachedHits); return in; diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 0312e37a33c..b1a02f5cee2 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -288,6 +288,9 @@ public: * TODO: Extend to handle both AND/OR */ static void parallellOr(vespalib::ThreadBundle & thread_bundle, vespalib::ConstArrayRef<BitVector *> vectors); + + static Index numWords(Index bits) noexcept { return wordNum(bits + 1 + (WordLen - 1)); } + static Index numBytes(Index bits) noexcept { return numWords(bits) * sizeof(Word); } protected: using Alloc = vespalib::alloc::Alloc; VESPA_DLL_LOCAL BitVector(void * buf, Index start, Index end) noexcept; @@ -299,10 +302,8 @@ protected: VESPA_DLL_LOCAL void clearIntervalNoInvalidation(Range range); bool isValidCount() const noexcept { return isValidCount(_numTrueBits.load(std::memory_order_relaxed)); } static bool isValidCount(Index v) noexcept { return v != invalidCount(); } - static Index numWords(Index bits) noexcept { return wordNum(bits + 1 + (WordLen - 1)); } - static Index numBytes(Index bits) noexcept { return numWords(bits) * sizeof(Word); } size_t numWords() const noexcept { return numWords(size()); } - static constexpr size_t getAlignment() noexcept { return 0x40u; } + static constexpr size_t getAlignment() noexcept { return 0x100u; } static size_t numActiveBytes(Index start, Index end) noexcept { return numActiveWords(start, end) * sizeof(Word); } static Alloc allocatePaddedAndAligned(Index sz) { return allocatePaddedAndAligned(0, sz); diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp index 77246e2b202..067d7aeaae9 100644 --- a/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp +++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fileheadercontext.h" +#include "fileheadertags.h" #include <vespa/vespalib/data/fileheader.h> #include <chrono> @@ -9,6 +10,7 @@ using namespace std::chrono; namespace search::common { using vespalib::GenericHeader; +using namespace tags; FileHeaderContext::FileHeaderContext() = default; @@ -18,17 +20,17 @@ void FileHeaderContext::addCreateAndFreezeTime(GenericHeader &header) { using Tag = GenericHeader::Tag; - header.putTag(Tag("createTime", duration_cast<microseconds>(system_clock::now().time_since_epoch()).count())); - header.putTag(Tag("freezeTime", 0)); + header.putTag(Tag(CREATE_TIME, duration_cast<microseconds>(system_clock::now().time_since_epoch()).count())); + header.putTag(Tag(FREEZE_TIME, 0)); } void FileHeaderContext::setFreezeTime(GenericHeader &header) { using Tag = GenericHeader::Tag; - if (header.hasTag("freezeTime") && - header.getTag("freezeTime").getType() == Tag::TYPE_INTEGER) { - header.putTag(Tag("freezeTime", duration_cast<microseconds>(system_clock::now().time_since_epoch()).count())); + if (header.hasTag(FREEZE_TIME) && + header.getTag(FREEZE_TIME).getType() == Tag::TYPE_INTEGER) { + header.putTag(Tag(FREEZE_TIME, duration_cast<microseconds>(system_clock::now().time_since_epoch()).count())); } } diff --git a/searchlib/src/vespa/searchlib/common/fileheadertags.cpp b/searchlib/src/vespa/searchlib/common/fileheadertags.cpp new file mode 100644 index 00000000000..c6c15f08918 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/fileheadertags.cpp @@ -0,0 +1,16 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "fileheadertags.h" + +namespace search::tags { +// Do not change these constants, they are persisted in many file headers. +vespalib::string FREEZE_TIME("freezeTime"); +vespalib::string CREATE_TIME("createTime"); +vespalib::string FROZEN("frozen"); +vespalib::string DOCID_LIMIT("docIdLimit"); +vespalib::string FILE_BIT_SIZE("fileBitSize"); +vespalib::string DESC("desc"); +vespalib::string ENTRY_SIZE("entrySize"); +vespalib::string NUM_KEYS("numKeys"); + +} diff --git a/searchlib/src/vespa/searchlib/common/fileheadertags.h b/searchlib/src/vespa/searchlib/common/fileheadertags.h new file mode 100644 index 00000000000..c7e7385160e --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/fileheadertags.h @@ -0,0 +1,17 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace search::tags { + +extern vespalib::string FREEZE_TIME; +extern vespalib::string CREATE_TIME; +extern vespalib::string FROZEN; +extern vespalib::string DOCID_LIMIT; +extern vespalib::string FILE_BIT_SIZE; +extern vespalib::string DESC; +extern vespalib::string ENTRY_SIZE; +extern vespalib::string NUM_KEYS; + +} diff --git a/searchlib/src/vespa/searchlib/common/sortspec.cpp b/searchlib/src/vespa/searchlib/common/sortspec.cpp index 04bc87f1000..40e2616367f 100644 --- a/searchlib/src/vespa/searchlib/common/sortspec.cpp +++ b/searchlib/src/vespa/searchlib/common/sortspec.cpp @@ -30,7 +30,7 @@ LowercaseConverter::onConvert(const ConstBufferRef & src) const vespalib::Utf8Writer w(_buffer); while (r.hasMore()) { ucs4_t c = r.getChar(0xFFFD); - c = Fast_NormalizeWordFolder::ToFold(c); + c = Fast_NormalizeWordFolder::lowercase_and_fold(c); w.putChar(c); } return {_buffer.begin(), _buffer.size()}; diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp index f2a7ec4d88b..5f001b20dda 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "bitvectordictionary.h" +#include <vespa/searchlib/common/fileheadertags.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/fastos/file.h> #include <cassert> @@ -10,44 +11,48 @@ LOG_SETUP(".diskindex.bitvectordictionary"); namespace search::diskindex { +using namespace tags; + BitVectorDictionary::BitVectorDictionary() - : _docIdLimit(), + : _docIdLimit(0u), _entries(), - _vectorSize(), + _vectorSize(0u), _datFile(), _datHeaderLen(0u) { } - BitVectorDictionary::~BitVectorDictionary() = default; - bool BitVectorDictionary::open(const vespalib::string &pathPrefix, const TuneFileRandRead &tuneFileRead, BitVectorKeyScope scope) { - vespalib::string booloccIdxName = pathPrefix + "boolocc" + - getBitVectorKeyScopeSuffix(scope); - vespalib::string booloccDatName = pathPrefix + "boolocc.bdat"; { + vespalib::string booloccIdxName = pathPrefix + "boolocc" + getBitVectorKeyScopeSuffix(scope); FastOS_File idxFile; idxFile.OpenReadOnly(booloccIdxName.c_str()); if (!idxFile.IsOpened()) { - LOG(warning, "Could not open bitvector idx file '%s'", - booloccIdxName.c_str()); + LOG(warning, "Could not open bitvector idx file '%s'", booloccIdxName.c_str()); return false; } vespalib::FileHeader idxHeader; uint32_t idxHeaderLen = idxHeader.readFile(idxFile); idxFile.SetPosition(idxHeaderLen); - assert(idxHeader.hasTag("frozen")); - assert(idxHeader.hasTag("docIdLimit")); - assert(idxHeader.hasTag("numKeys")); - assert(idxHeader.getTag("frozen").asInteger() != 0); - _docIdLimit = idxHeader.getTag("docIdLimit").asInteger(); - uint32_t numEntries = idxHeader.getTag("numKeys").asInteger(); + assert(idxHeader.hasTag(FROZEN)); + assert(idxHeader.hasTag(DOCID_LIMIT)); + assert(idxHeader.hasTag(NUM_KEYS)); + assert(idxHeader.getTag(FROZEN).asInteger() != 0); + _docIdLimit = idxHeader.getTag(DOCID_LIMIT).asInteger(); + uint32_t numEntries = idxHeader.getTag(NUM_KEYS).asInteger(); + if (idxHeader.hasTag(ENTRY_SIZE)) { + _vectorSize = idxHeader.getTag(ENTRY_SIZE).asInteger(); + } else { + constexpr size_t LEGACY_ALIGNMENT = 0x40; + BitVector::Index bytes = BitVector::numBytes(_docIdLimit); + _vectorSize = bytes + (-bytes & (LEGACY_ALIGNMENT - 1)); + } _entries.resize(numEntries); size_t bufSize = sizeof(WordSingleKey) * numEntries; @@ -58,7 +63,7 @@ BitVectorDictionary::open(const vespalib::string &pathPrefix, } } - _vectorSize = BitVector::getFileBytes(_docIdLimit); + vespalib::string booloccDatName = pathPrefix + "boolocc.bdat"; _datFile = std::make_unique<FastOS_File>(); _datFile->setFAdviseOptions(tuneFileRead.getAdvise()); @@ -69,14 +74,12 @@ BitVectorDictionary::open(const vespalib::string &pathPrefix, } _datFile->OpenReadOnly(booloccDatName.c_str()); if (!_datFile->IsOpened()) { - LOG(warning, "Could not open bitvector dat file '%s'", - booloccDatName.c_str()); + LOG(warning, "Could not open bitvector dat file '%s'", booloccDatName.c_str()); return false; } vespalib::FileHeader datHeader(64); _datHeaderLen = datHeader.readFile(*_datFile); - assert(_datFile->getSize() >= - static_cast<int64_t>(_vectorSize * _entries.size() + _datHeaderLen)); + assert(_datFile->getSize() >= static_cast<int64_t>(_vectorSize * _entries.size() + _datHeaderLen)); return true; } @@ -88,12 +91,11 @@ BitVectorDictionary::lookup(uint64_t wordNum) key._wordNum = wordNum; auto itr = std::lower_bound(_entries.begin(), _entries.end(), key); if (itr == _entries.end() || key < *itr) { - return BitVector::UP(); + return {}; } int64_t pos = &*itr - &_entries[0]; - return BitVector::create(_docIdLimit, *_datFile, - ((int64_t) _vectorSize) * pos + _datHeaderLen, - itr->_numDocs); + int64_t offset = ((int64_t) _vectorSize) * pos + _datHeaderLen; + return BitVector::create(_docIdLimit, *_datFile, offset, itr->_numDocs); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp index e78b740c837..fc7fd9c2fb7 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp @@ -3,6 +3,7 @@ #include "bitvectorfile.h" #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/fileheadercontext.h> +#include <vespa/searchlib/common/fileheadertags.h> #include <vespa/searchlib/index/bitvectorkeys.h> #include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/fileheader.h> @@ -14,12 +15,12 @@ namespace search::diskindex { using search::index::BitVectorWordSingleKey; using search::common::FileHeaderContext; +using namespace tags; namespace { void -readHeader(vespalib::FileHeader &h, - const vespalib::string &name) +readHeader(vespalib::FileHeader &h, const vespalib::string &name) { Fast_BufferedFile file(32_Ki); file.ReadOpenExisting(name.c_str()); @@ -35,13 +36,10 @@ BitVectorFileWrite::BitVectorFileWrite(BitVectorKeyScope scope) { } - BitVectorFileWrite::~BitVectorFileWrite() = default; - void -BitVectorFileWrite::open(const vespalib::string &name, - uint32_t docIdLimit, +BitVectorFileWrite::open(const vespalib::string &name, uint32_t docIdLimit, const TuneFileSeqWrite &tuneFileWrite, const FileHeaderContext &fileHeaderContext) { @@ -65,18 +63,10 @@ BitVectorFileWrite::open(const vespalib::string &name, makeDatHeader(fileHeaderContext); } - int64_t pos; - size_t bitmapbytes; - - bitmapbytes = BitVector::getFileBytes(_docIdLimit); - - pos = static_cast<int64_t>(_numKeys) * - static_cast<int64_t>(bitmapbytes) + _datHeaderLen; - - int64_t olddatsize = _datFile->getSize(); - assert(olddatsize >= pos); - (void) olddatsize; + size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit); + int64_t pos = static_cast<int64_t>(_numKeys) * static_cast<int64_t>(bitmapbytes) + _datHeaderLen; + assert(_datFile->getSize() >= pos); _datFile->SetSize(pos); assert(pos == _datFile->getPosition()); @@ -89,11 +79,12 @@ BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext) vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); using Tag = vespalib::GenericHeader::Tag; fileHeaderContext.addTags(h, _datFile->GetFileName()); - h.putTag(Tag("docIdLimit", _docIdLimit)); - h.putTag(Tag("numKeys", _numKeys)); - h.putTag(Tag("frozen", 0)); - h.putTag(Tag("fileBitSize", 0)); - h.putTag(Tag("desc", "Bitvector data file")); + h.putTag(Tag(ENTRY_SIZE, (int64_t) BitVector::getFileBytes(_docIdLimit))); + h.putTag(Tag(DOCID_LIMIT, _docIdLimit)); + h.putTag(Tag(NUM_KEYS, _numKeys)); + h.putTag(Tag(FROZEN, 0)); + h.putTag(Tag(FILE_BIT_SIZE, 0)); + h.putTag(Tag(DESC, "Bitvector data file")); _datFile->SetPosition(0); _datHeaderLen = h.writeFile(*_datFile); _datFile->Flush(); @@ -107,9 +98,9 @@ BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize) using Tag = vespalib::GenericHeader::Tag; readHeader(h, _datFile->GetFileName()); FileHeaderContext::setFreezeTime(h); - h.putTag(Tag("numKeys", _numKeys)); - h.putTag(Tag("frozen", 1)); - h.putTag(Tag("fileBitSize", fileBitSize)); + h.putTag(Tag(NUM_KEYS, _numKeys)); + h.putTag(Tag(FROZEN, 1)); + h.putTag(Tag(FILE_BIT_SIZE, fileBitSize)); bool sync_ok = _datFile->Sync(); assert(sync_ok); assert(h.getSize() == _datHeaderLen); @@ -121,14 +112,12 @@ BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize) void -BitVectorFileWrite::addWordSingle(uint64_t wordNum, - const BitVector &bitVector) +BitVectorFileWrite::addWordSingle(uint64_t wordNum, const BitVector &bitVector) { assert(bitVector.size() == _docIdLimit); bitVector.invalidateCachedCount(); Parent::addWordSingle(wordNum, bitVector.countTrueBits()); - _datFile->WriteBuf(bitVector.getStart(), - bitVector.getFileBytes()); + _datFile->WriteBuf(bitVector.getStart(), bitVector.getFileBytes()); } @@ -153,21 +142,17 @@ BitVectorFileWrite::sync() void BitVectorFileWrite::close() { - size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit); - - if (_datFile != nullptr) { - if (_datFile->IsOpened()) { - uint64_t pos = _datFile->getPosition(); - assert(pos == static_cast<uint64_t>(_numKeys) * - static_cast<uint64_t>(bitmapbytes) + _datHeaderLen); - (void) bitmapbytes; - _datFile->alignEndForDirectIO(); - updateDatHeader(pos * 8); - bool close_ok = _datFile->Close(); - assert(close_ok); - } - _datFile.reset(); + if (_datFile && _datFile->IsOpened()) { + size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit); + uint64_t pos = _datFile->getPosition(); + assert(pos == static_cast<uint64_t>(_numKeys) * static_cast<uint64_t>(bitmapbytes) + _datHeaderLen); + (void) bitmapbytes; + _datFile->alignEndForDirectIO(); + updateDatHeader(pos * 8); + bool close_ok = _datFile->Close(); + assert(close_ok); } + _datFile.reset(); Parent::close(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp index ec436205578..7e3f0f5f258 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp @@ -2,6 +2,7 @@ #include "bitvectoridxfile.h" #include <vespa/searchlib/common/fileheadercontext.h> +#include <vespa/searchlib/common/fileheadertags.h> #include <vespa/searchlib/index/bitvectorkeys.h> #include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/fileheader.h> @@ -13,6 +14,7 @@ namespace search::diskindex { using search::index::BitVectorWordSingleKey; using search::common::FileHeaderContext; +using namespace tags; namespace { @@ -45,10 +47,9 @@ BitVectorIdxFileWrite::idxSize() const } void -BitVectorIdxFileWrite::open(const vespalib::string &name, - uint32_t docIdLimit, - const TuneFileSeqWrite &tuneFileWrite, - const FileHeaderContext &fileHeaderContext) +BitVectorIdxFileWrite::open(const vespalib::string &name, uint32_t docIdLimit, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) { if (_numKeys != 0) { assert(docIdLimit == _docIdLimit); @@ -90,13 +91,14 @@ BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext) vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); using Tag = vespalib::GenericHeader::Tag; fileHeaderContext.addTags(h, _idxFile->GetFileName()); - h.putTag(Tag("docIdLimit", _docIdLimit)); - h.putTag(Tag("numKeys", _numKeys)); - h.putTag(Tag("frozen", 0)); + h.putTag(Tag(ENTRY_SIZE, (int64_t) BitVector::getFileBytes(_docIdLimit))); + h.putTag(Tag(DOCID_LIMIT, _docIdLimit)); + h.putTag(Tag(NUM_KEYS, _numKeys)); + h.putTag(Tag(FROZEN, 0)); if (_scope != BitVectorKeyScope::SHARED_WORDS) { - h.putTag(Tag("fileBitSize", 0)); + h.putTag(Tag(FILE_BIT_SIZE, 0)); } - h.putTag(Tag("desc", "Bitvector dictionary file, single words")); + h.putTag(Tag(DESC, "Bitvector dictionary file, single words")); _idxFile->SetPosition(0); _idxHeaderLen = h.writeFile(*_idxFile); _idxFile->Flush(); @@ -109,10 +111,10 @@ BitVectorIdxFileWrite::updateIdxHeader(uint64_t fileBitSize) using Tag = vespalib::GenericHeader::Tag; readHeader(h, _idxFile->GetFileName()); FileHeaderContext::setFreezeTime(h); - h.putTag(Tag("numKeys", _numKeys)); - h.putTag(Tag("frozen", 1)); + h.putTag(Tag(NUM_KEYS, _numKeys)); + h.putTag(Tag(FROZEN, 1)); if (_scope != BitVectorKeyScope::SHARED_WORDS) { - h.putTag(Tag("fileBitSize", fileBitSize)); + h.putTag(Tag(FILE_BIT_SIZE, fileBitSize)); } bool sync_ok = _idxFile->Sync(); assert(sync_ok); @@ -160,17 +162,15 @@ BitVectorIdxFileWrite::sync() void BitVectorIdxFileWrite::close() { - if (_idxFile) { - if (_idxFile->IsOpened()) { - uint64_t pos = _idxFile->getPosition(); - assert(pos == idxSize()); - _idxFile->alignEndForDirectIO(); - updateIdxHeader(pos * 8); - bool close_ok = _idxFile->Close(); - assert(close_ok); - } - _idxFile.reset(); + if (_idxFile && _idxFile->IsOpened()) { + uint64_t pos = _idxFile->getPosition(); + assert(pos == idxSize()); + _idxFile->alignEndForDirectIO(); + updateIdxHeader(pos * 8); + bool close_ok = _idxFile->Close(); + assert(close_ok); } + _idxFile.reset(); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h index 533f5620ea2..9f1e5ce0f80 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h @@ -2,10 +2,10 @@ #pragma once +#include "bitvectorkeyscope.h" #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/tunefileinfo.h> #include <vespa/vespalib/stllike/string.h> -#include "bitvectorkeyscope.h" class Fast_BufferedFile; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index cd9dbff99cb..4637ad5a4e8 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -455,27 +455,15 @@ FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm return vespalib::fuzzy_matching_algorithm_from_string(value, default_value); } -const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive"); -const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false); -bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) { - return lookupBool(props, NAME, fallback); -} - -const vespalib::string CreatePostingListWhenNonStrict::NAME("vespa.matching.create_postinglist_when_non_strict"); -const bool CreatePostingListWhenNonStrict::DEFAULT_VALUE(true); -bool CreatePostingListWhenNonStrict::check(const Properties &props, bool fallback) { +const vespalib::string SortBlueprintsByCost::NAME("vespa.matching.sort_blueprints_by_cost"); +const bool SortBlueprintsByCost::DEFAULT_VALUE(false); +bool SortBlueprintsByCost::check(const Properties &props, bool fallback) { return lookupBool(props, NAME, fallback); } -const vespalib::string UseEstimateForFetchPostings::NAME("vespa.matching.use_estimate_for_fetch_postings"); -const bool UseEstimateForFetchPostings::DEFAULT_VALUE(false); -bool UseEstimateForFetchPostings::check(const Properties &props, bool fallback) { - return lookupBool(props, NAME, fallback); -} - -const vespalib::string UseThreadBundleForFetchPostings::NAME("vespa.matching.use_thread_bundle_for_fetch_postings"); -const bool UseThreadBundleForFetchPostings::DEFAULT_VALUE(false); -bool UseThreadBundleForFetchPostings::check(const Properties &props, bool fallback) { +const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive"); +const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false); +bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) { return lookupBool(props, NAME, fallback); } diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 0183fdf1a13..db8de8209a9 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -336,12 +336,10 @@ namespace matching { static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props); static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value); }; - /** - * When enabled, the unpacking part of the phrase iterator will be tagged as expensive - * under all intermediate iterators, not only AND. + * Sort blueprints based on relative cost estimate rather than est_hits **/ - struct AlwaysMarkPhraseExpensive { + struct SortBlueprintsByCost { static const vespalib::string NAME; static const bool DEFAULT_VALUE; static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } @@ -349,26 +347,10 @@ namespace matching { }; /** - * When enabled posting lists can be created on the fly even if iterator is not strict. - **/ - struct CreatePostingListWhenNonStrict { - static const vespalib::string NAME; - static const bool DEFAULT_VALUE; - static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } - static bool check(const Properties &props, bool fallback); - }; - - /** - * When enabled posting lists can be created on the fly even if iterator is not strict. + * When enabled, the unpacking part of the phrase iterator will be tagged as expensive + * under all intermediate iterators, not only AND. **/ - struct UseEstimateForFetchPostings { - static const vespalib::string NAME; - static const bool DEFAULT_VALUE; - static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } - static bool check(const Properties &props, bool fallback); - }; - - struct UseThreadBundleForFetchPostings { + struct AlwaysMarkPhraseExpensive { static const vespalib::string NAME; static const bool DEFAULT_VALUE; static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 5c28f1814d5..aadc5300ede 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -56,14 +56,12 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _dumpFeatures(), _warnings(), _feature_rename_map(), + _sort_blueprints_by_cost(false), _ignoreDefaultRankFeatures(false), _compiled(false), _compileError(false), _degradationAscendingOrder(false), _always_mark_phrase_expensive(false), - _create_postinglist_when_non_strict(true), - _use_estimate_for_fetch_postings(false), - _use_thread_bundle_for_fetch_postings(false), _diversityAttribute(), _diversityMinGroups(1), _diversityCutoffFactor(10.0), @@ -137,10 +135,8 @@ RankSetup::configure() _mutateOnSummary._attribute = mutate::on_summary::Attribute::lookup(_indexEnv.getProperties()); _mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties()); _mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties()); + _sort_blueprints_by_cost = matching::SortBlueprintsByCost::check(_indexEnv.getProperties()); _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties()); - _create_postinglist_when_non_strict = matching::CreatePostingListWhenNonStrict::check(_indexEnv.getProperties()); - _use_estimate_for_fetch_postings = matching::UseEstimateForFetchPostings::check(_indexEnv.getProperties()); - _use_thread_bundle_for_fetch_postings = matching::UseThreadBundleForFetchPostings::check(_indexEnv.getProperties()); } void diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 04659955490..d8b977a0331 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -65,14 +65,12 @@ private: std::vector<vespalib::string> _dumpFeatures; Warnings _warnings; StringStringMap _feature_rename_map; + bool _sort_blueprints_by_cost; bool _ignoreDefaultRankFeatures; bool _compiled; bool _compileError; bool _degradationAscendingOrder; bool _always_mark_phrase_expensive; - bool _create_postinglist_when_non_strict; - bool _use_estimate_for_fetch_postings; - bool _use_thread_bundle_for_fetch_postings; vespalib::string _diversityAttribute; uint32_t _diversityMinGroups; double _diversityCutoffFactor; @@ -225,9 +223,6 @@ public: return _degradationAscendingOrder; } bool always_mark_phrase_expensive() const noexcept { return _always_mark_phrase_expensive; } - bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; } - bool use_estimate_for_fetch_postings() const noexcept { return _use_estimate_for_fetch_postings; } - bool use_thread_bundle_for_fetch_postings() const noexcept { return _use_thread_bundle_for_fetch_postings; } /** get number of hits to collect during graceful degradation in match phase */ uint32_t getDegradationMaxHits() const { return _degradationMaxHits; @@ -465,6 +460,7 @@ public: const MutateOperation & getMutateOnSummary() const { return _mutateOnSummary; } bool allowMutateQueryOverride() const { return _mutateAllowQueryOverride; } + bool sort_blueprints_by_cost() const noexcept { return _sort_blueprints_by_cost; } }; } diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp index 774e17d015a..b3b42a179d8 100644 --- a/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp @@ -1,14 +1,9 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "ftlib.h" -#include "dummy_dependency_handler.h" #include <vespa/searchlib/features/utils.h> -#include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/text/stringtokenizer.h> -#include <vespa/log/log.h> -LOG_SETUP(".ftlib"); - using namespace search::features; using namespace search::fef; using namespace search::fef::test; @@ -110,280 +105,3 @@ FtUtil::toRankResult(const vespalib::string & baseName, const vespalib::string & } FtIndex::~FtIndex() = default; - -//--------------------------------------------------------------------------------------------------------------------- -// FtTestApp -//--------------------------------------------------------------------------------------------------------------------- -void -FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList ¶ms) -{ - search::fef::test::IndexEnvironment ie; - FT_SETUP_FAIL(prototype, ie, params); -} - -void -FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, - const StringList ¶ms) -{ - FT_LOG(prototype, env, params); - search::fef::Blueprint::UP bp = prototype.createInstance(); - DummyDependencyHandler deps(*bp); - EXPECT_TRUE(!bp->setup(env, params)); -} - -void -FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList ¶ms, - const StringList &expectedIn, const StringList &expectedOut) -{ - search::fef::test::IndexEnvironment ie; - FT_SETUP_OK(prototype, ie, params, expectedIn, expectedOut); -} - -void -FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, - const StringList ¶ms, const StringList &expectedIn, const StringList &expectedOut) -{ - FT_LOG(prototype, env, params); - search::fef::Blueprint::UP bp = prototype.createInstance(); - DummyDependencyHandler deps(*bp); - ASSERT_TRUE(bp->setup(env, params)); - FT_EQUAL(expectedIn, deps.input, "In, "); - FT_EQUAL(expectedOut, deps.output, "Out,"); -} - -void -FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName) -{ - StringList empty; - FT_DUMP(factory, baseName, empty); -} - -void -FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, - search::fef::test::IndexEnvironment &env) -{ - StringList empty; - FT_DUMP(factory, baseName, env, empty); -} - -void -FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, - const StringList &expected) -{ - search::fef::test::IndexEnvironment ie; - FT_DUMP(factory, baseName, ie, expected); -} - -void -FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, - search::fef::test::IndexEnvironment &env, - const StringList &expected) -{ - FtDumpFeatureVisitor dfv; - search::fef::Blueprint::SP bp = factory.createBlueprint(baseName); - if ( ! bp) { - LOG(error, "Blueprint '%s' does not exist in factory, did you forget to add it?", baseName.c_str()); - ASSERT_TRUE(bp); - } - bp->visitDumpFeatures(env, dfv); - FT_EQUAL(expected, dfv.features(), "Dump"); -} - -void -FtTestApp::FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual, - const vespalib::string &prefix) -{ - FT_LOG(prefix + " expected", expected); - FT_LOG(prefix + " actual ", actual); - EXPECT_EQUAL(expected.size(), actual.size()); - ASSERT_TRUE(expected.size() == actual.size()); - for (uint32_t i = 0; i < expected.size(); ++i) { - EXPECT_EQUAL(expected[i], actual[i]); - ASSERT_TRUE(expected[i] == actual[i]); - } -} - -void -FtTestApp::FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, - const StringList ¶ms) -{ - LOG(info, "Testing blueprint '%s'.", prototype.getBaseName().c_str()); - std::vector<vespalib::string> arr; - for (const auto & it : env.getFields()) { - arr.push_back(it.name()); - } - FT_LOG("Environment ", arr); - FT_LOG("Parameters ", params); -} - -void -FtTestApp::FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr) -{ - vespalib::string str = prefix + " = [ "; - for (uint32_t i = 0; i < arr.size(); ++i) { - str.append("'").append(arr[i]).append("'"); - if (i < arr.size() - 1) { - str.append(", "); - } - } - str.append(" ]"); - LOG(info, "%s", str.c_str()); -} - -void -FtTestApp::FT_SETUP(FtFeatureTest &test, const vespalib::string &query, const StringMap &index, - uint32_t docId) -{ - LOG(info, "Setup test for query '%s'.", query.c_str()); - - // Add all query terms. - FtQueryEnvironment &queryEnv = test.getQueryEnv(); - for (uint32_t i = 0; i < query.size(); ++i) { - queryEnv.getBuilder().addAllFields(); - } - ASSERT_TRUE(test.setup()); - - // Add all occurences. - search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); - for (auto it = index.begin();it != index.end(); ++it) { - ASSERT_TRUE(mdb->setFieldLength(it->first, it->second.size())); - for (uint32_t i = 0; i < it->second.size(); ++i) { - size_t pos = query.find_first_of(it->second[i]); - if (pos != vespalib::string::npos) { - LOG(debug, "Occurence of '%c' added to field '%s' at position %d.", query[pos], it->first.c_str(), i); - ASSERT_TRUE(mdb->addOccurence(it->first, pos, i)); - } - } - } - ASSERT_TRUE(mdb->apply(docId)); -} - -void -FtTestApp::FT_SETUP(FtFeatureTest & test, const std::vector<FtQueryTerm> & query, const StringVectorMap & index, - uint32_t docId) -{ - setupQueryEnv(test.getQueryEnv(), query); - ASSERT_TRUE(test.setup()); - - search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); - - // Add all occurences. - for (auto itr = index.begin(); itr != index.end(); ++itr) { - ASSERT_TRUE(mdb->setFieldLength(itr->first, itr->second.size())); - for (uint32_t i = 0; i < itr->second.size(); ++i) { - auto fitr = query.begin(); - for (;;) { - fitr = std::find(fitr, query.end(), FtQueryTerm(itr->second[i])); - if (fitr != query.end()) { - uint32_t termId = fitr - query.begin(); - LOG(debug, "Occurence of '%s' added to field '%s' at position %u.", fitr->term.c_str(), itr->first.c_str(), i); - ASSERT_TRUE(mdb->addOccurence(itr->first, termId, i)); - ++fitr; - } else { - break; - } - } - } - } - ASSERT_TRUE(mdb->apply(docId)); -} - -void -FtTestApp::FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId) -{ - setupQueryEnv(test.getQueryEnv(), query); - ASSERT_TRUE(test.setup()); - search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); - - // Add all occurences. - for (auto itr = index.index.begin(); itr != index.index.end(); ++itr) { - const FtIndex::Field &field = itr->second; - for (size_t e = 0; e < field.size(); ++e) { - const FtIndex::Element &element = field[e]; - ASSERT_TRUE(mdb->addElement(itr->first, element.weight, element.tokens.size())); - for (size_t t = 0; t < element.tokens.size(); ++t) { - const vespalib::string &token = element.tokens[t]; - for (size_t q = 0; q < query.size(); ++q) { - if (query[q].term == token) { - ASSERT_TRUE(mdb->addOccurence(itr->first, q, t, e)); - } - } - } - } - } - ASSERT_TRUE(mdb->apply(docId)); -} - -void -FtTestApp::setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query) -{ - // Add all query terms. - for (uint32_t i = 0; i < query.size(); ++i) { - queryEnv.getBuilder().addAllFields(); - queryEnv.getTerms()[i].setPhraseLength(1); - queryEnv.getTerms()[i].setUniqueId(i); - queryEnv.getTerms()[i].setWeight(query[i].termWeight); - if (i > 0) { - vespalib::string from = vespalib::make_string("vespa.term.%u.connexity", i); - vespalib::string to = vespalib::make_string("%u", i - 1); - vespalib::string connexity = vespalib::make_string("%f", query[i].connexity); - queryEnv.getProperties().add(from, to); - queryEnv.getProperties().add(from, connexity); - } - vespalib::string term = vespalib::make_string("vespa.term.%u.significance", i); - vespalib::string significance = vespalib::make_string("%f", query[i].significance); - queryEnv.getProperties().add(term, significance); - LOG(debug, "Add term node: '%s'", query[i].term.c_str()); - } -} - -void -FtTestApp::setupFieldMatch(FtFeatureTest & ft, const vespalib::string & indexName, - const vespalib::string & query, const vespalib::string & field, - const fieldmatch::Params * params, uint32_t totalTermWeight, feature_t totalSignificance, - uint32_t docId) -{ - ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, FieldInfo::CollectionType::SINGLE, indexName); - - if (params != nullptr) { - Properties & p = ft.getIndexEnv().getProperties(); - p.add("fieldMatch(" + indexName + ").proximityLimit", vespalib::make_string("%u", params->getProximityLimit())); - p.add("fieldMatch(" + indexName + ").maxAlternativeSegmentations", vespalib::make_string("%u", params->getMaxAlternativeSegmentations())); - p.add("fieldMatch(" + indexName + ").maxOccurrences", vespalib::make_string("%u", params->getMaxOccurrences())); - p.add("fieldMatch(" + indexName + ").proximityCompletenessImportance", vespalib::make_string("%f", params->getProximityCompletenessImportance())); - p.add("fieldMatch(" + indexName + ").relatednessImportance", vespalib::make_string("%f", params->getRelatednessImportance())); - p.add("fieldMatch(" + indexName + ").earlinessImportance", vespalib::make_string("%f", params->getEarlinessImportance())); - p.add("fieldMatch(" + indexName + ").segmentProximityImportance", vespalib::make_string("%f", params->getSegmentProximityImportance())); - p.add("fieldMatch(" + indexName + ").occurrenceImportance", vespalib::make_string("%f", params->getOccurrenceImportance())); - p.add("fieldMatch(" + indexName + ").fieldCompletenessImportance", vespalib::make_string("%f", params->getFieldCompletenessImportance())); - for (double it : params->getProximityTable()) { - p.add("fieldMatch(" + indexName + ").proximityTable", vespalib::make_string("%f", it)); - } - } - - if (totalTermWeight > 0) { - ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermWeight", - vespalib::make_string("%u", totalTermWeight)); - } - - if (totalSignificance > 0.0f) { - ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermSignificance", - vespalib::make_string("%f", totalSignificance)); - } - - std::map<vespalib::string, std::vector<vespalib::string> > index; - index[indexName] = FtUtil::tokenize(field); - FT_SETUP(ft, FtUtil::toQuery(query), index, docId); -} - - -RankResult -FtTestApp::toRankResult(const vespalib::string & baseName, - const vespalib::string & result, - const vespalib::string & separator) -{ - return FtUtil::toRankResult(baseName, result, separator); -} - - - diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.h b/searchlib/src/vespa/searchlib/fef/test/ftlib.h index be52b407369..2281151f9cf 100644 --- a/searchlib/src/vespa/searchlib/fef/test/ftlib.h +++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.h @@ -8,10 +8,8 @@ #include "queryenvironment.h" #include "queryenvironmentbuilder.h" #include "rankresult.h" -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/common/feature.h> #include <vespa/searchlib/query/weight.h> -#include <vespa/searchlib/features/fieldmatch/params.h> #include <vespa/searchlib/fef/fef.h> using search::feature_t; @@ -183,58 +181,3 @@ struct FtIndex { return *this; } }; - -//--------------------------------------------------------------------------------------------------------------------- -// FtTestApp -//--------------------------------------------------------------------------------------------------------------------- -struct FtTestApp : public vespalib::TestApp { - using string = vespalib::string; - static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList ¶ms); - static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, - const StringList ¶ms); - static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList ¶ms, - const StringList &expectedIn, const StringList &expectedOut); - static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, - const StringList ¶ms, const StringList &expectedIn, const StringList &expectedOut); - - static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName); - static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, - search::fef::test::IndexEnvironment &env); - static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, - const StringList &expected); - static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, - search::fef::test::IndexEnvironment &env, - const StringList &expected); - - static void FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual, - const vespalib::string & prefix = ""); - - static void FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, const StringList ¶ms); - static void FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr); - - - static void FT_SETUP(FtFeatureTest & test, const vespalib::string & query, const StringMap & index, uint32_t docId); - static void FT_SETUP(FtFeatureTest & test, const FtQuery & query, const StringVectorMap & index, uint32_t docId); - - static void FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId); - - static void setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query); - static void setupFieldMatch(FtFeatureTest & test, const vespalib::string & indexName, - const vespalib::string & query, const vespalib::string & field, - const search::features::fieldmatch::Params * params, - uint32_t totalTermWeight, feature_t totalSignificance, - uint32_t docId); - - static search::fef::test::RankResult toRankResult(const vespalib::string & baseName, - const vespalib::string & result, - const vespalib::string & separator = " "); - - template <typename T> - static bool assertCreateInstance(const T & prototype, const vespalib::string & baseName) { - search::fef::Blueprint::UP bp = prototype.createInstance(); - if (!EXPECT_TRUE(dynamic_cast<T*>(bp.get()) != NULL)) return false; - if (!EXPECT_EQUAL(bp->getBaseName(), baseName)) return false; - return true; - } -}; - diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp index b7a1719fb5f..ab3bd512d1d 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp +++ b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp @@ -2,20 +2,83 @@ #include "query_term_simple.h" #include "base.h" +#include <vespa/vespalib/locale/c.h> #include <vespa/vespalib/objects/visit.h> #include <vespa/vespalib/util/classname.h> -#include <vespa/vespalib/locale/c.h> #include <cmath> #include <limits> +#include <charconv> namespace { template <typename N> -bool isValidInteger(int64_t value) +constexpr bool isValidInteger(int64_t value) noexcept { - return value >= std::numeric_limits<N>::min() && value <= std::numeric_limits<N>::max(); + return (value >= std::numeric_limits<N>::min()) && + (value <= std::numeric_limits<N>::max()); +} + +constexpr bool isRepresentableByInt64(double d) noexcept { + return (d > double(std::numeric_limits<int64_t>::min())) && + (d < double(std::numeric_limits<int64_t>::max())); } +bool isFullRange(vespalib::stringref s) noexcept { + const size_t sz(s.size()); + return (sz >= 3u) && + (s[0] == '<' || s[0] == '[') && + (s[sz-1] == '>' || s[sz-1] == ']'); +} + +struct IntDecoder { + static int64_t fromstr(const char * q, const char * qend, const char ** end) noexcept { + int64_t v(0); + for (;q < qend && (isspace(*q) || (*q == '+')); q++); + std::from_chars_result err = std::from_chars(q, qend, v, 10); + if (err.ec == std::errc::result_out_of_range) { + v = (*q == '-') ? std::numeric_limits<int64_t>::min() : std::numeric_limits<int64_t>::max(); + } + *end = err.ptr; + return v; + } + static int64_t nearestDownwd(int64_t n, int64_t min) noexcept { return (n > min ? n - 1 : n); } + static int64_t nearestUpward(int64_t n, int64_t max) noexcept { return (n < max ? n + 1 : n); } +}; + +template <typename T> +struct FloatDecoder { + static T fromstr(const char * q, const char * qend, const char ** end) noexcept { + T v(0); +#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 180000 + vespalib::string tmp(q, qend - q); + char* tmp_end = nullptr; + const char *tmp_cstring = tmp.c_str(); + if constexpr (std::is_same_v<T, float>) { + v = vespalib::locale::c::strtof_au(tmp_cstring, &tmp_end); + } else { + v = vespalib::locale::c::strtod_au(tmp_cstring, &tmp_end); + } + if (end != nullptr) { + *end = (tmp_end != nullptr) ? (q + (tmp_end - tmp_cstring)) : nullptr; + } +#else + for (;q < qend && (isspace(*q) || (*q == '+')); q++); + std::from_chars_result err = std::from_chars(q, qend, v); + if (err.ec == std::errc::result_out_of_range) { + v = (*q == '-') ? -std::numeric_limits<T>::infinity() : std::numeric_limits<T>::infinity(); + } + *end = err.ptr; +#endif + return v; + } + static T nearestDownwd(T n, T min) noexcept { + return std::nextafter(n, min); + } + static T nearestUpward(T n, T max) noexcept { + return std::nextafter(n, max); + } +}; + } namespace search { @@ -29,15 +92,15 @@ QueryTermSimple::visitMembers(vespalib::ObjectVisitor & visitor) const template <typename N> QueryTermSimple::RangeResult<N> -QueryTermSimple::getFloatRange() const +QueryTermSimple::getFloatRange() const noexcept { - double lowRaw, highRaw; - bool valid = getAsDoubleTerm(lowRaw, highRaw); + N lowRaw, highRaw; + bool valid = getAsFloatTerm(lowRaw, highRaw); RangeResult<N> res; res.valid = valid; if (!valid) { - res.low = std::numeric_limits<N>::max(); - res.high = - std::numeric_limits<N>::max(); + res.low = std::numeric_limits<N>::infinity(); + res.high = -std::numeric_limits<N>::infinity(); res.adjusted = true; } else { res.low = lowRaw; @@ -46,25 +109,16 @@ QueryTermSimple::getFloatRange() const return res; } -namespace { - -bool isRepresentableByInt64(double d) { - return (d > double(std::numeric_limits<int64_t>::min())) - && (d < double(std::numeric_limits<int64_t>::max())); -} - -} - bool -QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const +QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const noexcept { bool valid = getAsIntegerTerm(low, high); if ( ! valid ) { double l(0), h(0); - valid = getAsDoubleTerm(l, h); + valid = getAsFloatTerm(l, h); if (valid) { if ((l == h) && isRepresentableByInt64(l)) { - low = high = std::round(l); + low = high = static_cast<int64_t>(std::round(l)); } else { if (l > double(std::numeric_limits<int64_t>::min())) { if (l < double(std::numeric_limits<int64_t>::max())) { @@ -88,7 +142,7 @@ QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const template <typename N> QueryTermSimple::RangeResult<N> -QueryTermSimple::getIntegerRange() const +QueryTermSimple::getIntegerRange() const noexcept { int64_t lowRaw, highRaw; bool valid = getRangeInternal(lowRaw, highRaw); @@ -121,83 +175,72 @@ QueryTermSimple::getIntegerRange() const template <> QueryTermSimple::RangeResult<float> -QueryTermSimple::getRange() const +QueryTermSimple::getRange() const noexcept { return getFloatRange<float>(); } template <> QueryTermSimple::RangeResult<double> -QueryTermSimple::getRange() const +QueryTermSimple::getRange() const noexcept { return getFloatRange<double>(); } template <> QueryTermSimple::RangeResult<int8_t> -QueryTermSimple::getRange() const +QueryTermSimple::getRange() const noexcept { return getIntegerRange<int8_t>(); } template <> QueryTermSimple::RangeResult<int16_t> -QueryTermSimple::getRange() const +QueryTermSimple::getRange() const noexcept { return getIntegerRange<int16_t>(); } template <> QueryTermSimple::RangeResult<int32_t> -QueryTermSimple::getRange() const +QueryTermSimple::getRange() const noexcept { return getIntegerRange<int32_t>(); } template <> QueryTermSimple::RangeResult<int64_t> -QueryTermSimple::getRange() const +QueryTermSimple::getRange() const noexcept { return getIntegerRange<int64_t>(); } -template <int B> -struct IntDecoder { - static int64_t fromstr(const char * v, char ** end) { return strtoll(v, end, B); } - static int64_t nearestDownwd(int64_t n, int64_t min) { return (n > min ? n - 1 : n); } - static int64_t nearestUpward(int64_t n, int64_t max) { return (n < max ? n + 1 : n); } -}; - -struct DoubleDecoder { - static double fromstr(const char * v, char ** end) { return vespalib::locale::c::strtod(v, end); } - static double nearestDownwd(double n, double min) { return std::nextafterf(n, min); } - static double nearestUpward(double n, double max) { return std::nextafterf(n, max); } -}; - -bool QueryTermSimple::getAsIntegerTerm(int64_t & lower, int64_t & upper) const +bool QueryTermSimple::getAsIntegerTerm(int64_t & lower, int64_t & upper) const noexcept { lower = std::numeric_limits<int64_t>::min(); upper = std::numeric_limits<int64_t>::max(); - return getAsNumericTerm(lower, upper, IntDecoder<10>()); + return getAsNumericTerm(lower, upper, IntDecoder()); +} + +bool QueryTermSimple::getAsFloatTerm(double & lower, double & upper) const noexcept +{ + lower = -std::numeric_limits<double>::infinity(); + upper = std::numeric_limits<double>::infinity(); + return getAsNumericTerm(lower, upper, FloatDecoder<double>()); } -bool QueryTermSimple::getAsDoubleTerm(double & lower, double & upper) const +bool QueryTermSimple::getAsFloatTerm(float & lower, float & upper) const noexcept { - lower = - std::numeric_limits<double>::max(); - upper = std::numeric_limits<double>::max(); - return getAsNumericTerm(lower, upper, DoubleDecoder()); + lower = -std::numeric_limits<float>::infinity(); + upper = std::numeric_limits<float>::infinity(); + return getAsNumericTerm(lower, upper, FloatDecoder<float>()); } QueryTermSimple::~QueryTermSimple() = default; namespace { -bool isFullRange(vespalib::stringref s) { - const size_t sz(s.size()); - return (sz >= 3u) && - (s[0] == '<' || s[0] == '[') && - (s[sz-1] == '>' || s[sz-1] == ']'); -} + } @@ -232,7 +275,7 @@ QueryTermSimple::QueryTermSimple(const string & term_, Type type) } _valid = (numParts >= 2) && (numParts < NELEMS(parts)); if (_valid && numParts > 2) { - _rangeLimit = strtol(parts[2].data(), nullptr, 0); + _rangeLimit = static_cast<int32_t>(strtol(parts[2].data(), nullptr, 0)); if (numParts > 3) { _valid = (numParts >= 5); if (_valid) { @@ -257,48 +300,56 @@ QueryTermSimple::QueryTermSimple(const string & term_, Type type) template <typename T, typename D> bool -QueryTermSimple::getAsNumericTerm(T & lower, T & upper, D d) const +QueryTermSimple::getAsNumericTerm(T & lower, T & upper, D d) const noexcept { - bool valid(empty()); + if (empty()) return false; + size_t sz(_term.size()); - if (sz) { - char *err(nullptr); - T low(lower); - T high(upper); - const char * q = _term.c_str(); - const char first(q[0]); - const char last(q[sz-1]); - q += ((first == '<') || (first == '>') || (first == '[')) ? 1 : 0; - T ll = d.fromstr(q, &err); - valid = isValid() && ((*err == 0) || (*err == ';')); - if (valid) { - if (first == '<' && (*err == 0)) { - high = d.nearestDownwd(ll, lower); - } else if (first == '>' && (*err == 0)) { - low = d.nearestUpward(ll, upper); - } else if ((first == '[') || (first == '<')) { - if (q != err) { - low = (first == '[') ? ll : d.nearestUpward(ll, upper); - } - q = err + 1; - T hh = d.fromstr(q, &err); - bool hasUpperLimit(q != err); - if (*err == ';') { - err = const_cast<char *>(_term.end() - 1); - } - valid = (*err == last) && ((last == ']') || (last == '>')); - if (hasUpperLimit) { - high = (last == ']') ? hh : d.nearestDownwd(hh, lower); - } - } else { - low = high = ll; - } + const char *err(nullptr); + T low(lower); + T high(upper); + const char * q = _term.c_str(); + const char * qend = q + sz; + const char first(q[0]); + const char last(q[sz-1]); + bool isRange = (first == '<') || (first == '>') || (first == '['); + q += isRange ? 1 : 0; + T ll = d.fromstr(q, qend, &err); + bool valid = isValid() && ((*err == 0) || (*err == ';')); + if (!valid) return false; + + if (*err == 0) { + if (first == '<') { + high = d.nearestDownwd(ll, lower); + } else if (first == '>') { + low = d.nearestUpward(ll, upper); + } else { + low = high = ll; + valid = ! isRange; } - if (valid) { - lower = low; - upper = high; + } else { + if ((first == '[') || (first == '<')) { + if (q != err) { + low = (first == '[') ? ll : d.nearestUpward(ll, upper); + } + q = err + 1; + T hh = d.fromstr(q, qend, &err); + bool hasUpperLimit(q != err); + if (*err == ';') { + err = const_cast<char *>(_term.end() - 1); + } + valid = (*err == last) && ((last == ']') || (last == '>')); + if (hasUpperLimit) { + high = (last == ']') ? hh : d.nearestDownwd(hh, lower); + } + } else { + valid = false; } } + if (valid) { + lower = low; + upper = high; + } return valid; } diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h index 2b64e3812ab..87bf7c26b80 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_simple.h +++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h @@ -33,8 +33,8 @@ public: N high; bool valid; // Whether parsing of the range was successful bool adjusted; // Whether the low and high was adjusted according to min and max limits of the given type. - RangeResult() : low(), high(), valid(true), adjusted(false) {} - bool isEqual() const { return low == high; } + RangeResult() noexcept : low(), high(), valid(true), adjusted(false) {} + bool isEqual() const noexcept { return low == high; } }; QueryTermSimple(const QueryTermSimple &) = delete; @@ -47,39 +47,40 @@ public: * Extracts the content of this query term as a range with low and high values. */ template <typename N> - RangeResult<N> getRange() const; - int getRangeLimit() const { return _rangeLimit; } - size_t getMaxPerGroup() const { return _maxPerGroup; } - size_t getDiversityCutoffGroups() const { return _diversityCutoffGroups; } - bool getDiversityCutoffStrict() const { return _diversityCutoffStrict; } - vespalib::stringref getDiversityAttribute() const { return _diversityAttribute; } - size_t getFuzzyMaxEditDistance() const { return _fuzzyMaxEditDistance; } - size_t getFuzzyPrefixLength() const { return _fuzzyPrefixLength; } - bool getAsIntegerTerm(int64_t & lower, int64_t & upper) const; - bool getAsDoubleTerm(double & lower, double & upper) const; - const char * getTerm() const { return _term.c_str(); } - bool isPrefix() const { return (_type == Type::PREFIXTERM); } - bool isSubstring() const { return (_type == Type::SUBSTRINGTERM); } - bool isExactstring() const { return (_type == Type::EXACTSTRINGTERM); } - bool isSuffix() const { return (_type == Type::SUFFIXTERM); } - bool isWord() const { return (_type == Type::WORD); } - bool isRegex() const { return (_type == Type::REGEXP); } - bool isGeoLoc() const { return (_type == Type::GEO_LOCATION); } - bool isFuzzy() const { return (_type == Type::FUZZYTERM); } + RangeResult<N> getRange() const noexcept; + int getRangeLimit() const noexcept { return _rangeLimit; } + size_t getMaxPerGroup() const noexcept { return _maxPerGroup; } + size_t getDiversityCutoffGroups() const noexcept { return _diversityCutoffGroups; } + bool getDiversityCutoffStrict() const noexcept { return _diversityCutoffStrict; } + vespalib::stringref getDiversityAttribute() const noexcept { return _diversityAttribute; } + size_t getFuzzyMaxEditDistance() const noexcept { return _fuzzyMaxEditDistance; } + size_t getFuzzyPrefixLength() const noexcept { return _fuzzyPrefixLength; } + bool getAsIntegerTerm(int64_t & lower, int64_t & upper) const noexcept; + bool getAsFloatTerm(double & lower, double & upper) const noexcept; + bool getAsFloatTerm(float & lower, float & upper) const noexcept; + const char * getTerm() const noexcept { return _term.c_str(); } + bool isPrefix() const noexcept { return (_type == Type::PREFIXTERM); } + bool isSubstring() const noexcept { return (_type == Type::SUBSTRINGTERM); } + bool isExactstring() const noexcept { return (_type == Type::EXACTSTRINGTERM); } + bool isSuffix() const noexcept { return (_type == Type::SUFFIXTERM); } + bool isWord() const noexcept { return (_type == Type::WORD); } + bool isRegex() const noexcept { return (_type == Type::REGEXP); } + bool isGeoLoc() const noexcept { return (_type == Type::GEO_LOCATION); } + bool isFuzzy() const noexcept { return (_type == Type::FUZZYTERM); } bool is_nearest_neighbor() const noexcept { return (_type == Type::NEAREST_NEIGHBOR); } - bool empty() const { return _term.empty(); } + bool empty() const noexcept { return _term.empty(); } virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; vespalib::string getClassName() const; - bool isValid() const { return _valid; } - const string & getTermString() const { return _term; } + bool isValid() const noexcept { return _valid; } + const string & getTermString() const noexcept { return _term; } private: - bool getRangeInternal(int64_t & low, int64_t & high) const; + bool getRangeInternal(int64_t & low, int64_t & high) const noexcept; template <typename N> - RangeResult<N> getIntegerRange() const; + RangeResult<N> getIntegerRange() const noexcept; template <typename N> - RangeResult<N> getFloatRange() const; - int _rangeLimit; + RangeResult<N> getFloatRange() const noexcept; + int32_t _rangeLimit; uint32_t _maxPerGroup; uint32_t _diversityCutoffGroups; Type _type; @@ -88,7 +89,7 @@ private: string _term; stringref _diversityAttribute; template <typename T, typename D> - bool getAsNumericTerm(T & lower, T & upper, D d) const; + bool getAsNumericTerm(T & lower, T & upper, D d) const noexcept; protected: uint32_t _fuzzyMaxEditDistance; // set in QueryTerm diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt index 0813292a9da..9b53407aff5 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt @@ -9,5 +9,6 @@ vespa_add_library(searchlib_query_streaming OBJECT querynode.cpp querynoderesultbase.cpp queryterm.cpp + wand_term.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp index d2c1ba872f5..1871bda564d 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp @@ -11,36 +11,51 @@ using search::fef::MatchData; namespace search::streaming { DotProductTerm::DotProductTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms) - : MultiTerm(std::move(result_base), index, Type::WORD, num_terms) + : MultiTerm(std::move(result_base), index, num_terms) { } DotProductTerm::~DotProductTerm() = default; void -DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +DotProductTerm::build_scores(Scores& scores) const { - vespalib::hash_map<uint32_t,double> scores; HitList hl_store; for (const auto& term : _terms) { auto& hl = term->evaluateHits(hl_store); for (auto& hit : hl) { - scores[hit.context()] += term->weight().percent() * hit.weight(); + scores[hit.context()] += ((int64_t)term->weight().percent()) * hit.weight(); } } +} + +void +DotProductTerm::unpack_scores(Scores& scores, std::optional<double> score_threshold, uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) +{ auto num_fields = td.numFields(); for (uint32_t field_idx = 0; field_idx < num_fields; ++field_idx) { auto& tfd = td.field(field_idx); auto field_id = tfd.getFieldId(); if (scores.contains(field_id)) { - auto handle = tfd.getHandle(); - if (handle != fef::IllegalHandle) { - auto tmd = match_data.resolveTermField(tfd.getHandle()); - tmd->setFieldId(field_id); - tmd->setRawScore(docid, scores[field_id]); + auto score = scores[field_id]; + if (!score_threshold.has_value() || score_threshold.value() < score) { + auto handle = tfd.getHandle(); + if (handle != fef::IllegalHandle) { + auto tmd = match_data.resolveTermField(tfd.getHandle()); + tmd->setFieldId(field_id); + tmd->setRawScore(docid, score); + } } } } } +void +DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +{ + Scores scores; + build_scores(scores); + unpack_scores(scores, std::nullopt, docid, td, match_data); +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h index 77cac693781..3702bd4721c 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h @@ -3,6 +3,8 @@ #pragma once #include "multi_term.h" +#include <vespa/vespalib/stllike/hash_map.h> +#include <optional> namespace search::streaming { @@ -10,6 +12,10 @@ namespace search::streaming { * A dot product query term for streaming search. */ class DotProductTerm : public MultiTerm { +protected: + using Scores = vespalib::hash_map<uint32_t,double>; + void build_scores(Scores& scores) const; + void unpack_scores(Scores& scores, std::optional<double> score_threshold, uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data); public: DotProductTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms); ~DotProductTerm() override; diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp index 36303d4e991..3e75f4a5114 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp @@ -12,8 +12,9 @@ using search::query::TermVector; namespace search::streaming { -InTerm::InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, std::unique_ptr<TermVector> terms) - : MultiTerm(std::move(result_base), index, Type::WORD, std::move(terms)) +InTerm::InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, + std::unique_ptr<TermVector> terms, Normalizing normalize_mode) + : MultiTerm(std::move(result_base), index, std::move(terms), normalize_mode) { } diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.h b/searchlib/src/vespa/searchlib/query/streaming/in_term.h index 7d03ed989c7..7b388b3f6e6 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/in_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.h @@ -11,7 +11,8 @@ namespace search::streaming { */ class InTerm : public MultiTerm { public: - InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, std::unique_ptr<query::TermVector> terms); + InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, + std::unique_ptr<query::TermVector> terms, Normalizing normalize_mode); ~InTerm() override; void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; }; diff --git a/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp index ad5857b8c41..dd34b9b7e73 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp @@ -9,19 +9,20 @@ using search::query::TermVector; namespace search::streaming { -MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, uint32_t num_terms) - : QueryTerm(std::move(result_base), "", index, type), +MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms) + : QueryTerm(std::move(result_base), "", index, Type::WORD, Normalizing::NONE), _terms() { _terms.reserve(num_terms); } -MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, std::unique_ptr<TermVector> terms) - : MultiTerm(std::move(result_base), index, type, terms->size()) +MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, + std::unique_ptr<TermVector> terms, Normalizing normalizing) + : MultiTerm(std::move(result_base), index, terms->size()) { auto num_terms = terms->size(); for (uint32_t i = 0; i < num_terms; ++i) { - add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), terms->getAsString(i).first, "", QueryTermSimple::Type::WORD)); + add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), terms->getAsString(i).first, "", Type::WORD, normalizing)); } } @@ -33,12 +34,6 @@ MultiTerm::add_term(std::unique_ptr<QueryTerm> term) _terms.emplace_back(std::move(term)); } -MultiTerm* -MultiTerm::as_multi_term() noexcept -{ - return this; -} - void MultiTerm::reset() { diff --git a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h index 4c3f1ea5b5a..3bb69e29693 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h +++ b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h @@ -24,11 +24,12 @@ class MultiTerm : public QueryTerm { protected: std::vector<std::unique_ptr<QueryTerm>> _terms; public: - MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, uint32_t num_terms); - MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, std::unique_ptr<query::TermVector> terms); + MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms); + MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, + std::unique_ptr<query::TermVector> terms, Normalizing normalizing); ~MultiTerm() override; void add_term(std::unique_ptr<QueryTerm> term); - MultiTerm* as_multi_term() noexcept override; + MultiTerm* as_multi_term() noexcept override { return this; } void reset() override; bool evaluate() const override; const HitList& evaluateHits(HitList& hl) const override; diff --git a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp index f710218297d..1317d1c0651 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp @@ -9,7 +9,7 @@ NearestNeighborQueryNode::NearestNeighborQueryNode(std::unique_ptr<QueryNodeResu const string& query_tensor_name, const string& field_name, uint32_t target_hits, double distance_threshold, int32_t unique_id, search::query::Weight weight) - : QueryTerm(std::move(resultBase), query_tensor_name, field_name, Type::NEAREST_NEIGHBOR), + : QueryTerm(std::move(resultBase), query_tensor_name, field_name, Type::NEAREST_NEIGHBOR, Normalizing::NONE), _target_hits(target_hits), _distance_threshold(distance_threshold), _distance(), diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp index d2eee5d345f..3079ec31e8f 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp @@ -12,7 +12,7 @@ QueryConnector::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "Operator", _opName); } -QueryConnector::QueryConnector(const char * opName) +QueryConnector::QueryConnector(const char * opName) noexcept : QueryNode(), _opName(opName), _index(), @@ -31,7 +31,7 @@ const HitList & QueryConnector::evaluateHits(HitList & hl) const { if (evaluate()) { - hl.push_back(Hit(1, 0, 0, 1)); + hl.emplace_back(1, 0, 0, 1); } return hl; } @@ -105,10 +105,10 @@ QueryConnector::create(ParseItem::ItemType type) { switch (type) { case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>(); - case search::ParseItem::ITEM_OR: return std::make_unique<OrQueryNode>(); + case search::ParseItem::ITEM_OR: case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>(); + case search::ParseItem::ITEM_WEIGHTED_SET: case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>(); - case search::ParseItem::ITEM_WEIGHTED_SET: return std::make_unique<EquivQueryNode>(); case search::ParseItem::ITEM_WAND: return std::make_unique<OrQueryNode>(); case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>(); case search::ParseItem::ITEM_PHRASE: return std::make_unique<PhraseQueryNode>(); @@ -340,7 +340,7 @@ Query::Query(const QueryNodeResultFactory & factory, vespalib::stringref queryRe bool Query::evaluate() const { - return valid() ? _root->evaluate() : false; + return valid() && _root->evaluate(); } bool diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.h b/searchlib/src/vespa/searchlib/query/streaming/query.h index 42c3b94002c..8befa2fe7fa 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/query.h +++ b/searchlib/src/vespa/searchlib/query/streaming/query.h @@ -13,8 +13,8 @@ namespace search::streaming { class QueryConnector : public QueryNode { public: - QueryConnector(const char * opName); - ~QueryConnector(); + explicit QueryConnector(const char * opName) noexcept; + ~QueryConnector() override; const HitList & evaluateHits(HitList & hl) const override; void reset() override; void getLeaves(QueryTermList & tl) override; @@ -44,7 +44,7 @@ private: class TrueNode : public QueryConnector { public: - TrueNode() : QueryConnector("AND") { } + TrueNode() noexcept : QueryConnector("AND") { } bool evaluate() const override; }; @@ -52,7 +52,7 @@ public: class FalseNode : public QueryConnector { public: - FalseNode() : QueryConnector("AND") { } + FalseNode() noexcept : QueryConnector("AND") { } bool evaluate() const override; }; @@ -62,8 +62,8 @@ public: class AndQueryNode : public QueryConnector { public: - AndQueryNode() : QueryConnector("AND") { } - AndQueryNode(const char * opName) : QueryConnector(opName) { } + AndQueryNode() noexcept : QueryConnector("AND") { } + explicit AndQueryNode(const char * opName) noexcept : QueryConnector(opName) { } bool evaluate() const override; bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_AND; } }; @@ -74,7 +74,7 @@ public: class AndNotQueryNode : public QueryConnector { public: - AndNotQueryNode() : QueryConnector("ANDNOT") { } + AndNotQueryNode() noexcept : QueryConnector("ANDNOT") { } bool evaluate() const override; bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_NOT; } }; @@ -85,13 +85,11 @@ public: class OrQueryNode : public QueryConnector { public: - OrQueryNode() : QueryConnector("OR") { } - OrQueryNode(const char * opName) : QueryConnector(opName) { } + OrQueryNode() noexcept : QueryConnector("OR") { } + explicit OrQueryNode(const char * opName) noexcept : QueryConnector(opName) { } bool evaluate() const override; bool isFlattenable(ParseItem::ItemType type) const override { return (type == ParseItem::ITEM_OR) || - (type == ParseItem::ITEM_DOT_PRODUCT) || - (type == ParseItem::ITEM_WAND) || (type == ParseItem::ITEM_WEAK_AND); } }; @@ -102,7 +100,7 @@ public: class EquivQueryNode : public OrQueryNode { public: - EquivQueryNode() : OrQueryNode("EQUIV") { } + EquivQueryNode() noexcept : OrQueryNode("EQUIV") { } bool evaluate() const override; bool isFlattenable(ParseItem::ItemType type) const override { return (type == ParseItem::ITEM_EQUIV) || @@ -117,7 +115,7 @@ public: class PhraseQueryNode : public AndQueryNode { public: - PhraseQueryNode() : AndQueryNode("PHRASE"), _fieldInfo(32) { } + PhraseQueryNode() noexcept : AndQueryNode("PHRASE"), _fieldInfo(32) { } bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; void getPhrases(QueryNodeRefList & tl) override; @@ -138,7 +136,7 @@ private: class SameElementQueryNode : public AndQueryNode { public: - SameElementQueryNode() : AndQueryNode("SAME_ELEMENT") { } + SameElementQueryNode() noexcept : AndQueryNode("SAME_ELEMENT") { } bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_NOT; } @@ -151,8 +149,8 @@ public: class NearQueryNode : public AndQueryNode { public: - NearQueryNode() : AndQueryNode("NEAR"), _distance(0) { } - NearQueryNode(const char * opName) : AndQueryNode(opName), _distance(0) { } + NearQueryNode() noexcept : AndQueryNode("NEAR"), _distance(0) { } + explicit NearQueryNode(const char * opName) noexcept : AndQueryNode(opName), _distance(0) { } bool evaluate() const override; void distance(size_t dist) { _distance = dist; } size_t distance() const { return _distance; } @@ -169,8 +167,7 @@ private: class ONearQueryNode : public NearQueryNode { public: - ONearQueryNode() : NearQueryNode("ONEAR") { } - ~ONearQueryNode() { } + ONearQueryNode() noexcept : NearQueryNode("ONEAR") { } bool evaluate() const override; }; diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index db0fbd5b98e..c24f41d16cf 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -5,6 +5,7 @@ #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/query/streaming/dot_product_term.h> #include <vespa/searchlib/query/streaming/in_term.h> +#include <vespa/searchlib/query/streaming/wand_term.h> #include <vespa/searchlib/query/tree/term_vector.h> #include <charconv> #include <vespa/log/log.h> @@ -13,12 +14,18 @@ LOG_SETUP(".vsm.querynode"); namespace search::streaming { namespace { - vespalib::stringref DEFAULT("default"); - bool disableRewrite(const QueryNode * qn) { - return dynamic_cast<const NearQueryNode *> (qn) || - dynamic_cast<const PhraseQueryNode *> (qn) || - dynamic_cast<const SameElementQueryNode *>(qn); - } + +vespalib::stringref DEFAULT("default"); +bool disableRewrite(const QueryNode * qn) { + return dynamic_cast<const NearQueryNode *> (qn) || + dynamic_cast<const PhraseQueryNode *> (qn) || + dynamic_cast<const SameElementQueryNode *>(qn); +} + +bool possibleFloat(const QueryTerm & qt, const QueryTerm::string & term) { + return !qt.encoding().isBase10Integer() && qt.encoding().isFloat() && (term.find('.') != QueryTerm::string::npos); +} + } QueryNode::UP @@ -34,7 +41,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_WEAK_AND: case ParseItem::ITEM_EQUIV: case ParseItem::ITEM_WEIGHTED_SET: - case ParseItem::ITEM_WAND: case ParseItem::ITEM_NOT: case ParseItem::ITEM_PHRASE: case ParseItem::ITEM_SAME_ELEMENT: @@ -43,16 +49,14 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor { qn = QueryConnector::create(type); if (qn) { - QueryConnector * qc = dynamic_cast<QueryConnector *> (qn.get()); - NearQueryNode * nqn = dynamic_cast<NearQueryNode *> (qc); + auto * qc = dynamic_cast<QueryConnector *> (qn.get()); + auto * nqn = dynamic_cast<NearQueryNode *> (qc); if (nqn) { nqn->distance(queryRep.getNearDistance()); } if ((type == ParseItem::ITEM_WEAK_AND) || (type == ParseItem::ITEM_WEIGHTED_SET) || - (type == ParseItem::ITEM_DOT_PRODUCT) || - (type == ParseItem::ITEM_SAME_ELEMENT) || - (type == ParseItem::ITEM_WAND)) + (type == ParseItem::ITEM_SAME_ELEMENT)) { qn->setIndex(queryRep.getIndexName()); } @@ -75,10 +79,8 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor break; case ParseItem::ITEM_GEO_LOCATION_TERM: // just keep the string representation here; parsed in vsm::GeoPosFieldSearcher - qn = std::make_unique<QueryTerm>(factory.create(), - queryRep.getTerm(), - queryRep.getIndexName(), - QueryTerm::Type::GEO_LOCATION); + qn = std::make_unique<QueryTerm>(factory.create(), queryRep.getTerm(), queryRep.getIndexName(), + QueryTerm::Type::GEO_LOCATION, Normalizing::NONE); break; case ParseItem::ITEM_NEAREST_NEIGHBOR: qn = build_nearest_neighbor_query_node(factory, queryRep); @@ -143,28 +145,25 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor // But it will do for now as only correct sddocname queries are sent down. qn = std::make_unique<TrueNode>(); } else { - auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm); + Normalizing normalize_mode = factory.normalizing_mode(ssIndex); + auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode); qt->setWeight(queryRep.GetWeight()); qt->setUniqueId(queryRep.getUniqueId()); if (qt->isFuzzy()) { qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance()); qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength()); } - if (qt->encoding().isBase10Integer() || - ! qt->encoding().isFloat() || - ! factory.getRewriteFloatTerms() || - ! allowRewrite || - (ssTerm.find('.') == vespalib::string::npos)) - { - qn = std::move(qt); - } else { + if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.allow_float_terms_rewrite(ssIndex)) { auto phrase = std::make_unique<PhraseQueryNode>(); - phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, ssTerm.find('.')), ssIndex, TermType::WORD)); - phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(ssTerm.find('.') + 1), ssIndex, TermType::WORD)); + auto dotPos = ssTerm.find('.'); + phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD, normalize_mode)); + phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(dotPos + 1), ssIndex, TermType::WORD, normalize_mode)); auto orqn = std::make_unique<EquivQueryNode>(); orqn->addChild(std::move(qt)); orqn->addChild(std::move(phrase)); qn = std::move(orqn); + } else { + qn = std::move(qt); } } } @@ -181,12 +180,18 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor } break; case ParseItem::ITEM_STRING_IN: + qn = std::make_unique<InTerm>(factory.create(), queryRep.getIndexName(), queryRep.get_terms(), + factory.normalizing_mode(queryRep.getIndexName())); + break; case ParseItem::ITEM_NUMERIC_IN: - qn = std::make_unique<InTerm>(factory.create(), queryRep.getIndexName(), queryRep.get_terms()); + qn = std::make_unique<InTerm>(factory.create(), queryRep.getIndexName(), queryRep.get_terms(), Normalizing::NONE); break; case ParseItem::ITEM_DOT_PRODUCT: qn = build_dot_product_term(factory, queryRep); break; + case ParseItem::ITEM_WAND: + qn = build_wand_term(factory, queryRep); + break; default: skip_unknown(queryRep); break; @@ -208,17 +213,12 @@ QueryNode::build_nearest_neighbor_query_node(const QueryNodeResultFactory& facto auto weight = query_rep.GetWeight(); uint32_t target_hits = query_rep.getTargetHits(); double distance_threshold = query_rep.getDistanceThreshold(); - return std::make_unique<NearestNeighborQueryNode>(factory.create(), - query_tensor_name, - field_name, - target_hits, - distance_threshold, - unique_id, - weight); + return std::make_unique<NearestNeighborQueryNode>(factory.create(), query_tensor_name, field_name, + target_hits, distance_threshold, unique_id, weight); } void -QueryNode::populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep) +QueryNode::populate_multi_term(Normalizing string_normalize_mode, MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep) { char buf[24]; vespalib::string subterm; @@ -227,13 +227,15 @@ QueryNode::populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& quer std::unique_ptr<QueryTerm> term; switch (queryRep.getType()) { case ParseItem::ITEM_PURE_WEIGHTED_STRING: - term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), queryRep.getTerm(), "", QueryTermSimple::Type::WORD); + term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), queryRep.getTerm(), "", + QueryTermSimple::Type::WORD, string_normalize_mode); break; case ParseItem::ITEM_PURE_WEIGHTED_LONG: { auto res = std::to_chars(buf, buf + sizeof(buf), queryRep.getIntergerTerm(), 10); subterm.assign(buf, res.ptr - buf); - term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), subterm, "", QueryTermSimple::Type::WORD); + term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), subterm, "", + QueryTermSimple::Type::WORD, Normalizing::NONE); } break; default: @@ -250,13 +252,24 @@ QueryNode::populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& quer std::unique_ptr<QueryNode> QueryNode::build_dot_product_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep) { - auto dp =std::make_unique<DotProductTerm>(factory.create(), queryRep.getIndexName(), queryRep.getArity()); + auto dp = std::make_unique<DotProductTerm>(factory.create(), queryRep.getIndexName(), queryRep.getArity()); dp->setWeight(queryRep.GetWeight()); dp->setUniqueId(queryRep.getUniqueId()); - populate_multi_term(*dp, queryRep); + populate_multi_term(factory.normalizing_mode(dp->index()), *dp, queryRep); return dp; } +std::unique_ptr<QueryNode> +QueryNode::build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep) +{ + auto wand = std::make_unique<WandTerm>(factory.create(), queryRep.getIndexName(), queryRep.getArity()); + wand->setWeight(queryRep.GetWeight()); + wand->setUniqueId(queryRep.getUniqueId()); + wand->set_score_threshold(queryRep.getScoreThreshold()); + populate_multi_term(factory.normalizing_mode(wand->index()), *wand, queryRep); + return wand; +} + void QueryNode::skip_unknown(SimpleQueryStackDumpIterator& queryRep) { diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h index 09c44d951d3..a0561b2e52e 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h @@ -2,8 +2,7 @@ #pragma once #include "hit.h" -#include <vespa/vespalib/stllike/string.h> -#include <memory> +#include "querynoderesultbase.h" namespace search { class SimpleQueryStackDumpIterator; } @@ -30,13 +29,14 @@ using ConstQueryTermList = std::vector<const QueryTerm *>; class QueryNode { static std::unique_ptr<QueryNode> build_nearest_neighbor_query_node(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); - static void populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep); + static void populate_multi_term(Normalizing string_normalize_mode, MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep); static std::unique_ptr<QueryNode> build_dot_product_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); + static std::unique_ptr<QueryNode> build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); static void skip_unknown(SimpleQueryStackDumpIterator& queryRep); public: using UP = std::unique_ptr<QueryNode>; - virtual ~QueryNode() { } + virtual ~QueryNode() = default; /// This evalutes if the subtree starting here evaluates to true. virtual bool evaluate() const = 0; /// This return the hitList for this subtree. Does only give meaning in a diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h index 62fc32a4575..74f872ad187 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/vespalib/stllike/string.h> #include <memory> namespace search::streaming { @@ -17,11 +18,24 @@ public: virtual QueryNodeResultBase * clone() const = 0; }; +enum class Normalizing { + NONE, + LOWERCASE, + LOWERCASE_AND_FOLD +}; + class QueryNodeResultFactory { public: virtual ~QueryNodeResultFactory() = default; - virtual bool getRewriteFloatTerms() const { return false; } - virtual std::unique_ptr<QueryNodeResultBase> create() const { return std::unique_ptr<QueryNodeResultBase>(); } + virtual bool allow_float_terms_rewrite(vespalib::stringref index) const noexcept { + (void) index; + return false; + } + virtual Normalizing normalizing_mode(vespalib::stringref index) const noexcept { + (void) index; + return Normalizing::NONE; + } + virtual std::unique_ptr<QueryNodeResultBase> create() const { return {}; } }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 9c45427d07d..3950a179d67 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "queryterm.h" +#include <vespa/fastlib/text/normwordfolder.h> #include <vespa/vespalib/objects/visit.h> #include <cmath> @@ -9,12 +10,13 @@ namespace { class CharInfo { public: CharInfo(); - uint8_t get(uint8_t c) const { return _charInfo[c]; } + uint8_t get(uint8_t c) const noexcept { return _charInfo[c]; } private: uint8_t _charInfo[256]; }; CharInfo::CharInfo() + : _charInfo() { // XXX: Should refactor to reduce number of magic constants. memset(_charInfo, 0x01, 128); // All 7 bits are ascii7bit @@ -33,7 +35,7 @@ CharInfo::CharInfo() _charInfo[uint8_t('E')] = 0x05; } -static CharInfo _G_charTable; +CharInfo G_charTable; } @@ -53,29 +55,102 @@ QueryTerm::visitMembers(vespalib::ObjectVisitor & visitor) const visit(visitor, "uniqueid", _uniqueId); } -QueryTerm::QueryTerm(std::unique_ptr<QueryNodeResultBase> org, const string & termS, const string & indexS, Type type) : - QueryTermUCS4(termS, type), - _index(indexS), - _encoding(0x01), - _result(org.release()), - _hitList(), - _weight(100), - _uniqueId(0), - _fieldInfo() +namespace { + +using Type = QueryTermSimple::Type; + +Normalizing +requireFold(Type type, Normalizing normalizing) { + if (normalizing == Normalizing::NONE) return Normalizing::NONE; + if (normalizing == Normalizing::LOWERCASE) return Normalizing::LOWERCASE; + if (type == Type::EXACTSTRINGTERM) return Normalizing::LOWERCASE; + return ((type == Type::WORD) || (type == Type::SUBSTRINGTERM) || + (type == Type::PREFIXTERM) || (type == Type::SUFFIXTERM)) + ? Normalizing::LOWERCASE_AND_FOLD + : Normalizing::NONE; +} + +vespalib::string +fold(vespalib::stringref s) { + const auto * curr = reinterpret_cast<const unsigned char *>(s.data()); + const unsigned char * end = curr + s.size(); + vespalib::string folded; + for (; curr < end;) { + uint32_t c_ucs4 = *curr; + if (c_ucs4 < 0x80) { + folded.append(Fast_NormalizeWordFolder::lowercase_and_fold_ascii(*curr++)); + } else { + c_ucs4 = Fast_UnicodeUtil::GetUTF8CharNonAscii(curr); + const char *repl = Fast_NormalizeWordFolder::ReplacementString(c_ucs4); + if (repl != nullptr) { + size_t repllen = strlen(repl); + folded.append(repl, repllen); + } else { + c_ucs4 = Fast_NormalizeWordFolder::lowercase_and_fold(c_ucs4); + char tmp[6]; + const char * tmp_end = Fast_UnicodeUtil::utf8cput(tmp, c_ucs4); + folded.append(tmp, tmp_end - tmp); + } + } + } + return folded; +} + +vespalib::string +lowercase(vespalib::stringref s) { + const auto * curr = reinterpret_cast<const unsigned char *>(s.data()); + const unsigned char * end = curr + s.size(); + vespalib::string folded; + for (; curr < end;) { + uint32_t c_ucs4 = *curr; + if (c_ucs4 < 0x80) { + folded.append(static_cast<char>(Fast_NormalizeWordFolder::lowercase_ascii(*curr++))); + } else { + c_ucs4 = Fast_NormalizeWordFolder::lowercase(Fast_UnicodeUtil::GetUTF8CharNonAscii(curr)); + char tmp[6]; + const char * tmp_end = Fast_UnicodeUtil::utf8cput(tmp, c_ucs4); + folded.append(tmp, tmp_end - tmp); + } + } + return folded; +} + +vespalib::string +optional_fold(vespalib::stringref s, Type type, Normalizing normalizing) { + switch ( requireFold(type, normalizing)) { + case Normalizing::NONE: return s; + case Normalizing::LOWERCASE: return lowercase(s); + case Normalizing::LOWERCASE_AND_FOLD: return fold(s); + } + return s; +} + +} + +QueryTerm::QueryTerm(std::unique_ptr<QueryNodeResultBase> org, stringref termS, const string & indexS, + Type type, Normalizing normalizing) + : QueryTermUCS4(optional_fold(termS, type, normalizing), type), + _index(indexS), + _encoding(0x01), + _result(org.release()), + _hitList(), + _weight(100), + _uniqueId(0), + _fieldInfo() { - if (!termS.empty()) { + if (!empty()) { uint8_t enc(0xff); - for (size_t i(0), m(termS.size()); i < m; i++) { - enc &= _G_charTable.get(termS[i]); + for (char c : getTermString()) { + enc &= G_charTable.get(c); } - _encoding = enc; + _encoding = EncodingBitMap(enc); } } void QueryTerm::getPhrases(QueryNodeRefList & tl) { (void) tl; } void QueryTerm::getPhrases(ConstQueryNodeRefList & tl) const { (void) tl; } -void QueryTerm::getLeaves(QueryTermList & tl) { tl.push_back(this); } -void QueryTerm::getLeaves(ConstQueryTermList & tl) const { tl.push_back(this); } +void QueryTerm::getLeaves(QueryTermList & tl) { tl.push_back(this); } +void QueryTerm::getLeaves(ConstQueryTermList & tl) const { tl.push_back(this); } bool QueryTerm::evaluate() const { return !_hitList.empty(); } void QueryTerm::reset() { _hitList.clear(); } const HitList & QueryTerm::evaluateHits(HitList &) const { return _hitList; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 6e91437b1f9..743998a630e 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -27,13 +27,10 @@ public: class EncodingBitMap { public: - EncodingBitMap(uint8_t bm=0) : _enc(bm) { } + explicit EncodingBitMap(uint8_t bm) : _enc(bm) { } bool isFloat() const { return _enc & Float; } bool isBase10Integer() const { return _enc & Base10Integer; } bool isAscii7Bit() const { return _enc & Ascii7Bit; } - void setBase10Integer(bool v) { if (v) _enc |= Base10Integer; else _enc &= ~Base10Integer; } - void setAscii7Bit(bool v) { if (v) _enc |= Ascii7Bit; else _enc &= ~Ascii7Bit; } - void setFloat(bool v) { if (v) _enc |= Float; else _enc &= ~Float; } private: enum { Ascii7Bit=0x01, Base10Integer=0x02, Float=0x04 }; uint8_t _enc; @@ -54,7 +51,12 @@ public: uint32_t _hitCount; uint32_t _fieldLength; }; - QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, const string & term, const string & index, Type type); + QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, stringref term, const string & index, Type type) + : QueryTerm(std::move(resultBase), term, index, type, (type == Type::EXACTSTRINGTERM) + ? Normalizing::LOWERCASE + : Normalizing::LOWERCASE_AND_FOLD) + {} + QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, stringref term, const string & index, Type type, Normalizing normalizing); QueryTerm(const QueryTerm &) = delete; QueryTerm & operator = (const QueryTerm &) = delete; QueryTerm(QueryTerm &&) = delete; diff --git a/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp new file mode 100644 index 00000000000..a561adf5b42 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp @@ -0,0 +1,44 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "wand_term.h" +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/matchdata.h> + +using search::fef::ITermData; +using search::fef::MatchData; + +namespace search::streaming { + +WandTerm::WandTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms) + : DotProductTerm(std::move(result_base), index, num_terms), + _score_threshold(0.0) +{ +} + +WandTerm::~WandTerm() = default; + +bool +WandTerm::evaluate() const +{ + if (_score_threshold <= 0.0) { + return DotProductTerm::evaluate(); + } + Scores scores; + build_scores(scores); + for (auto &field_and_score : scores) { + if (field_and_score.second > _score_threshold) { + return true; + } + } + return false; +} + +void +WandTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data) +{ + Scores scores; + build_scores(scores); + unpack_scores(scores, _score_threshold, docid, td, match_data); +} + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/wand_term.h b/searchlib/src/vespa/searchlib/query/streaming/wand_term.h new file mode 100644 index 00000000000..1b342834216 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/wand_term.h @@ -0,0 +1,22 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dot_product_term.h" + +namespace search::streaming { + +/* + * A wand query term for streaming search. + */ +class WandTerm : public DotProductTerm { + double _score_threshold; +public: + WandTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms); + ~WandTerm() override; + void set_score_threshold(double value) { _score_threshold = value; } + bool evaluate() const override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt index 5e6d31d3761..51fe2d12637 100644 --- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt @@ -7,7 +7,7 @@ vespa_add_library(searchlib_queryeval OBJECT booleanmatchiteratorwrapper.cpp children_iterators.cpp create_blueprint_visitor_helper.cpp - document_weight_search_iterator.cpp + docid_with_weight_search_iterator.cpp dot_product_blueprint.cpp dot_product_search.cpp elementiterator.cpp diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index b71a579e097..6ca072d6dc7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -130,15 +130,15 @@ Blueprint::Blueprint() noexcept Blueprint::~Blueprint() = default; Blueprint::UP -Blueprint::optimize(Blueprint::UP bp) { +Blueprint::optimize(Blueprint::UP bp, bool sort_by_cost) { Blueprint *root = bp.release(); - root->optimize(root, OptimizePass::FIRST); - root->optimize(root, OptimizePass::LAST); + root->optimize(root, OptimizePass::FIRST, sort_by_cost); + root->optimize(root, OptimizePass::LAST, sort_by_cost); return Blueprint::UP(root); } void -Blueprint::optimize_self(OptimizePass) +Blueprint::optimize_self(OptimizePass, bool) { } @@ -358,6 +358,7 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const visitor.visitInt("tree_size", state.tree_size()); visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval()); visitor.closeStruct(); + visitor.visitFloat("cost", _cost); visitor.visitInt("sourceId", _sourceId); visitor.visitInt("docid_limit", _docid_limit); } @@ -526,10 +527,9 @@ IntermediateBlueprint::calculateState() const } double -IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const +IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const { (void) child; - (void) use_estimate; return hit_rate; } @@ -548,19 +548,19 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double } void -IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) +IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) { assert(self == this); if (should_optimize_children()) { for (auto &child : _children) { auto *child_ptr = child.release(); - child_ptr->optimize(child_ptr, pass); + child_ptr->optimize(child_ptr, pass, sort_by_cost); child.reset(child_ptr); } } - optimize_self(pass); + optimize_self(pass, sort_by_cost); if (pass == OptimizePass::LAST) { - sort(_children); + sort(_children, sort_by_cost); set_cost(calculate_cost()); } maybe_eliminate_self(self, get_replacement()); @@ -634,7 +634,7 @@ IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo) for (size_t i = 0; i < _children.size(); ++i) { Blueprint & child = *_children[i]; child.fetchPostings(ExecuteInfo::create(execInfo.is_strict() && inheritStrict(i), nextHitRate, execInfo)); - nextHitRate = computeNextHitRate(child, nextHitRate, execInfo.use_estimate_for_fetch_postings()); + nextHitRate = computeNextHitRate(child, nextHitRate); } } @@ -758,10 +758,10 @@ LeafBlueprint::getRange(vespalib::string &, vespalib::string &) const { } void -LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass) +LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) { assert(self == this); - optimize_self(pass); + optimize_self(pass, sort_by_cost); maybe_eliminate_self(self, get_replacement()); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 66d55015f62..a78dd092f5a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -172,6 +172,20 @@ public: // lower limit for docid_limit: max child estimate static HitEstimate sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit); + // sort children to minimize total cost of OR flow + struct MinimalOrCost { + bool operator () (const auto &a, const auto &b) const noexcept { + return a->estimate() / a->cost() > b->estimate() / b->cost(); + } + }; + + // sort children to minimize total cost of AND flow + struct MinimalAndCost { + bool operator () (const auto &a, const auto &b) const noexcept { + return (1.0 - a->estimate()) / a->cost() > (1.0 - b->estimate()) / b->cost(); + } + }; + // utility to get the greater estimate to sort first, higher tiers last struct TieredGreaterEstimate { bool operator () (const auto &a, const auto &b) const noexcept { @@ -246,9 +260,9 @@ public: virtual void setDocIdLimit(uint32_t limit) noexcept { _docid_limit = limit; } uint32_t get_docid_limit() const noexcept { return _docid_limit; } - static Blueprint::UP optimize(Blueprint::UP bp); - virtual void optimize(Blueprint* &self, OptimizePass pass) = 0; - virtual void optimize_self(OptimizePass pass); + static Blueprint::UP optimize(Blueprint::UP bp, bool sort_by_cost); + virtual void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) = 0; + virtual void optimize_self(OptimizePass pass, bool sort_by_cost); virtual Blueprint::UP get_replacement(); virtual bool should_optimize_children() const { return true; } @@ -354,7 +368,7 @@ private: bool infer_want_global_filter() const; size_t count_termwise_nodes(const UnpackInfo &unpack) const; - virtual double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const; + virtual double computeNextHitRate(const Blueprint & child, double hit_rate) const; protected: // returns an empty collection if children have empty or @@ -376,7 +390,7 @@ public: void setDocIdLimit(uint32_t limit) noexcept final; - void optimize(Blueprint* &self, OptimizePass pass) final; + void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; IndexList find(const IPredicate & check) const; @@ -393,7 +407,7 @@ public: virtual double calculate_cost() const = 0; virtual HitEstimate combine(const std::vector<HitEstimate> &data) const = 0; virtual FieldSpecBaseList exposeFields() const = 0; - virtual void sort(Children &children) const = 0; + virtual void sort(Children &children, bool sort_by_cost) const = 0; virtual bool inheritStrict(size_t i) const = 0; virtual SearchIteratorUP createIntermediateSearch(MultiSearch::Children subSearches, @@ -413,7 +427,7 @@ class LeafBlueprint : public Blueprint private: State _state; protected: - void optimize(Blueprint* &self, OptimizePass pass) final; + void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final; void setEstimate(HitEstimate est) { _state.estimate(est); _state.relative_estimate(calculate_relative_estimate()); diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.cpp index 6b0bd3ec7fc..85bd751df27 100644 --- a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.cpp @@ -1,3 +1,3 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "document_weight_search_iterator.h" +#include "docid_with_weight_search_iterator.h" diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.h index 448f1c8f2b4..8201c6a78b8 100644 --- a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.h @@ -8,7 +8,12 @@ namespace search::queryeval { -class DocumentWeightSearchIterator : public SearchIterator +/** + * SearchIterator implementation over a low-level posting list with {docid, weight} tuples. + * + * This is used by the parallel weak AND search iterator. + */ +class DocidWithWeightSearchIterator : public SearchIterator { private: fef::TermFieldMatchData &_tfmd; @@ -17,9 +22,9 @@ private: queryeval::MinMaxPostingInfo _postingInfo; public: - DocumentWeightSearchIterator(fef::TermFieldMatchData &tfmd, - const IDocidWithWeightPostingStore &attr, - IDirectPostingStore::LookupResult dict_entry) + DocidWithWeightSearchIterator(fef::TermFieldMatchData &tfmd, + const IDocidWithWeightPostingStore &attr, + IDirectPostingStore::LookupResult dict_entry) : _tfmd(tfmd), _matchPosition(_tfmd.populate_fixed()), _iterator(attr.create(dict_entry.posting_idx)), diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h index 2f1a4386a95..e49fcbcb5bc 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h @@ -25,6 +25,9 @@ protected: DotProductSearch() {} public: + static constexpr bool filter_search = false; + static constexpr bool require_btree_iterators = true; + // TODO: use MultiSearch::Children to pass ownership static SearchIterator::UP create(const std::vector<SearchIterator*> &children, search::fef::TermFieldMatchData &tmd, diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp index 858cb92331a..c9ec6edb225 100644 --- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp @@ -2,9 +2,19 @@ #include "executeinfo.h" +using vespalib::Doom; namespace search::queryeval { -const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, nullptr, vespalib::ThreadBundle::trivial(), true, true); -const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, nullptr, vespalib::ThreadBundle::trivial(), true, true); +const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, Doom::never(), vespalib::ThreadBundle::trivial()); +const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, Doom::never(), vespalib::ThreadBundle::trivial()); + +ExecuteInfo::ExecuteInfo() noexcept + : ExecuteInfo(false, 1.0, Doom::never(), vespalib::ThreadBundle::trivial()) +{ } + +ExecuteInfo +ExecuteInfo::createForTest(bool strict, double hitRate) noexcept { + return createForTest(strict, hitRate, Doom::never()); +} } diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h index 3300a2aea4d..fa2c69e0400 100644 --- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h +++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h @@ -13,12 +13,10 @@ namespace search::queryeval { */ class ExecuteInfo { public: - ExecuteInfo() noexcept : ExecuteInfo(false, 1.0, nullptr, vespalib::ThreadBundle::trivial(), true, true) { } + ExecuteInfo() noexcept; bool is_strict() const noexcept { return _strict; } - bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; } - bool use_estimate_for_fetch_postings() const noexcept { return _use_estimate_for_fetch_postings; } double hit_rate() const noexcept { return _hitRate; } - bool soft_doom() const noexcept { return _doom && _doom->soft_doom(); } + const vespalib::Doom & doom() const noexcept { return _doom; } vespalib::ThreadBundle & thread_bundle() const noexcept { return _thread_bundle; } static const ExecuteInfo TRUE; @@ -27,39 +25,33 @@ public: return create(strict, org._hitRate, org); } static ExecuteInfo create(bool strict, double hitRate, const ExecuteInfo & org) noexcept { - return {strict, hitRate, org._doom, org.thread_bundle(), org.create_postinglist_when_non_strict(), org.use_estimate_for_fetch_postings()}; + return {strict, hitRate, org._doom, org.thread_bundle()}; } - static ExecuteInfo create(bool strict, double hitRate, const vespalib::Doom * doom, vespalib::ThreadBundle & thread_bundle_in, - bool postinglist_when_non_strict, bool use_estimate_for_fetch_postings) noexcept + static ExecuteInfo create(bool strict, double hitRate, const vespalib::Doom & doom, + vespalib::ThreadBundle & thread_bundle_in) noexcept { - return {strict, hitRate, doom, thread_bundle_in, postinglist_when_non_strict, use_estimate_for_fetch_postings}; + return {strict, hitRate, doom, thread_bundle_in}; } static ExecuteInfo createForTest(bool strict) noexcept { return createForTest(strict, 1.0); } - static ExecuteInfo createForTest(bool strict, double hitRate) noexcept { - return createForTest(strict, hitRate, nullptr); - } - static ExecuteInfo createForTest(bool strict, double hitRate, const vespalib::Doom * doom) noexcept { - return create(strict, hitRate, doom, vespalib::ThreadBundle::trivial(), true, true); + static ExecuteInfo createForTest(bool strict, double hitRate) noexcept; + static ExecuteInfo createForTest(bool strict, double hitRate, const vespalib::Doom & doom) noexcept { + return create(strict, hitRate, doom, vespalib::ThreadBundle::trivial()); } private: - ExecuteInfo(bool strict, double hitRate_in, const vespalib::Doom * doom, vespalib::ThreadBundle & thread_bundle_in, - bool postinglist_when_non_strict, bool use_estimate_for_fetch_postings) noexcept + ExecuteInfo(bool strict, double hitRate_in, const vespalib::Doom & doom, + vespalib::ThreadBundle & thread_bundle_in) noexcept : _doom(doom), _thread_bundle(thread_bundle_in), _hitRate(hitRate_in), - _strict(strict), - _create_postinglist_when_non_strict(postinglist_when_non_strict), - _use_estimate_for_fetch_postings(use_estimate_for_fetch_postings) + _strict(strict) { } - const vespalib::Doom * _doom; + const vespalib::Doom _doom; vespalib::ThreadBundle & _thread_bundle; double _hitRate; bool _strict; - bool _create_postinglist_when_non_strict; - bool _use_estimate_for_fetch_postings; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp index ae5a7583c8c..f15f3d0e84c 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp @@ -3,7 +3,6 @@ #include "fake_requestcontext.h" #include <vespa/vespalib/util/testclock.h> - namespace search::queryeval { FakeRequestContext::FakeRequestContext() @@ -13,7 +12,7 @@ FakeRequestContext::FakeRequestContext() FakeRequestContext::FakeRequestContext(attribute::IAttributeContext * context, vespalib::steady_time softDoom, vespalib::steady_time hardDoom) : _clock(std::make_unique<vespalib::TestClock>()), - _doom(_clock->clock(), softDoom, hardDoom, false), + _doom(_clock->nowRef(), softDoom, hardDoom, false), _attributeContext(context), _query_tensor_name(), _query_tensor(), diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index c43335a6fdf..bebc1f433f7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -33,7 +33,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, } template <typename CombineType> -void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { +void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx, bool sort_by_cost) { std::vector<size_t> source_blenders; const SourceBlenderBlueprint * reference = nullptr; for (size_t i = begin_idx; i < self.childCnt(); ++i) { @@ -63,7 +63,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { top->addChild(std::move(sources.back())); sources.pop_back(); } - blender_up = Blueprint::optimize(std::move(blender_up)); + blender_up = Blueprint::optimize(std::move(blender_up), sort_by_cost); self.addChild(std::move(blender_up)); } } @@ -114,7 +114,7 @@ AndNotBlueprint::exposeFields() const } void -AndNotBlueprint::optimize_self(OptimizePass pass) +AndNotBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (childCnt() == 0) { return; @@ -133,7 +133,14 @@ AndNotBlueprint::optimize_self(OptimizePass pass) while (grand_child->childCnt() > 1) { addChild(grand_child->removeLastChild()); } - child->addChild(grand_child->removeChild(0)); + auto orphan = grand_child->removeChild(0); + if (auto *orphan_and = orphan->asAnd()) { + while (orphan_and->childCnt() > 0) { + child->addChild(orphan_and->removeLastChild()); + } + } else { + child->addChild(std::move(orphan)); + } child->removeChild(i--); } } @@ -145,7 +152,7 @@ AndNotBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 1); + optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost); } } @@ -159,10 +166,14 @@ AndNotBlueprint::get_replacement() } void -AndNotBlueprint::sort(Children &children) const +AndNotBlueprint::sort(Children &children, bool sort_by_cost) const { if (children.size() > 2) { - std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate()); + if (sort_by_cost) { + std::sort(children.begin() + 1, children.end(), MinimalOrCost()); + } else { + std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate()); + } } } @@ -224,7 +235,7 @@ AndBlueprint::exposeFields() const } void -AndBlueprint::optimize_self(OptimizePass pass) +AndBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; i < childCnt(); ++i) { @@ -237,7 +248,7 @@ AndBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<AndBlueprint>(*this, 0); + optimize_source_blenders<AndBlueprint>(*this, 0, sort_by_cost); } } @@ -251,9 +262,13 @@ AndBlueprint::get_replacement() } void -AndBlueprint::sort(Children &children) const +AndBlueprint::sort(Children &children, bool sort_by_cost) const { - std::sort(children.begin(), children.end(), TieredLessEstimate()); + if (sort_by_cost) { + std::sort(children.begin(), children.end(), MinimalAndCost()); + } else { + std::sort(children.begin(), children.end(), TieredLessEstimate()); + } } bool @@ -293,21 +308,13 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const } double -AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { - double estimate = use_estimate ? child.estimate() : child.hit_ratio(); - return hit_rate * estimate; +AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const { + return hit_rate * child.estimate(); } double -OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const { - // Avoid dropping hitRate to zero when meeting a conservatively high hitrate in a child. - // Happens at least when using non fast-search attributes, and with AND nodes. - constexpr double MIN_INVERSE_HIT_RATIO = 0.10; - double estimate = use_estimate ? child.estimate() : child.hit_ratio(); - double inverse_child_estimate = 1.0 - estimate; - return (use_estimate || (inverse_child_estimate > MIN_INVERSE_HIT_RATIO)) - ? hit_rate * inverse_child_estimate - : hit_rate; +OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const { + return hit_rate * (1.0 - child.estimate()); } //----------------------------------------------------------------------------- @@ -337,7 +344,7 @@ OrBlueprint::exposeFields() const } void -OrBlueprint::optimize_self(OptimizePass pass) +OrBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) { @@ -352,7 +359,7 @@ OrBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 0); + optimize_source_blenders<OrBlueprint>(*this, 0, sort_by_cost); } } @@ -366,9 +373,13 @@ OrBlueprint::get_replacement() } void -OrBlueprint::sort(Children &children) const +OrBlueprint::sort(Children &children, bool sort_by_cost) const { - std::sort(children.begin(), children.end(), TieredGreaterEstimate()); + if (sort_by_cost) { + std::sort(children.begin(), children.end(), MinimalOrCost()); + } else { + std::sort(children.begin(), children.end(), TieredGreaterEstimate()); + } } bool @@ -445,7 +456,7 @@ WeakAndBlueprint::exposeFields() const } void -WeakAndBlueprint::sort(Children &) const +WeakAndBlueprint::sort(Children &, bool) const { // order needs to stay the same as _weights } @@ -509,9 +520,13 @@ NearBlueprint::exposeFields() const } void -NearBlueprint::sort(Children &children) const +NearBlueprint::sort(Children &children, bool sort_by_cost) const { - std::sort(children.begin(), children.end(), TieredLessEstimate()); + if (sort_by_cost) { + std::sort(children.begin(), children.end(), MinimalAndCost()); + } else { + std::sort(children.begin(), children.end(), TieredLessEstimate()); + } } bool @@ -572,10 +587,9 @@ ONearBlueprint::exposeFields() const } void -ONearBlueprint::sort(Children &children) const +ONearBlueprint::sort(Children &, bool) const { // ordered near cannot sort children here - (void)children; } bool @@ -641,7 +655,7 @@ RankBlueprint::exposeFields() const } void -RankBlueprint::optimize_self(OptimizePass pass) +RankBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (pass == OptimizePass::FIRST) { for (size_t i = 1; i < childCnt(); ++i) { @@ -651,7 +665,7 @@ RankBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 1); + optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost); } } @@ -665,9 +679,8 @@ RankBlueprint::get_replacement() } void -RankBlueprint::sort(Children &children) const +RankBlueprint::sort(Children &, bool) const { - (void)children; } bool @@ -744,7 +757,7 @@ SourceBlenderBlueprint::exposeFields() const } void -SourceBlenderBlueprint::sort(Children &) const +SourceBlenderBlueprint::sort(Children &, bool) const { } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index 14672c2a5cd..620280e979b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -19,10 +19,10 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; AndNotBlueprint * asAndNot() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -47,10 +47,10 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; AndBlueprint * asAnd() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -58,7 +58,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override; + double computeNextHitRate(const Blueprint & child, double hit_rate) const override; }; //----------------------------------------------------------------------------- @@ -73,10 +73,10 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; OrBlueprint * asOr() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -84,7 +84,7 @@ public: SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override; private: - double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override; + double computeNextHitRate(const Blueprint & child, double hit_rate) const override; uint8_t calculate_cost_tier() const override; }; @@ -101,7 +101,7 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; bool always_needs_unpack() const override; WeakAndBlueprint * asWeakAnd() noexcept final { return this; } @@ -133,7 +133,7 @@ public: HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; bool should_optimize_children() const override { return false; } - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; SearchIterator::UP @@ -157,7 +157,7 @@ public: HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; bool should_optimize_children() const override { return false; } - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; SearchIterator::UP @@ -177,9 +177,9 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; bool isRank() const noexcept final { return true; } SearchIterator::UP @@ -206,7 +206,7 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp index fdf4ec950dd..e90156868fb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp @@ -4,7 +4,6 @@ #include "andsearch.h" #include "andnotsearch.h" #include "sourceblendersearch.h" -#include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/vespalib/hwaccelrated/iaccelrated.h> namespace search::queryeval { @@ -18,17 +17,17 @@ namespace { struct And { using Word = BitWord::Word; void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept { - accel.and64(offset, src, dest); + accel.and128(offset, src, dest); } - static bool isAnd() noexcept { return true; } + static constexpr bool isAnd() noexcept { return true; } }; struct Or { using Word = BitWord::Word; void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept { - accel.or64(offset, src, dest); + accel.or128(offset, src, dest); } - static bool isAnd() noexcept { return false; } + static constexpr bool isAnd() noexcept { return false; } }; } @@ -56,43 +55,47 @@ MultiBitVector<Update>::MultiBitVector(size_t reserved) _accel(IAccelrated::getAccelerator()), _lastWords() { - static_assert(sizeof(_lastWords) == 64, "Lastwords should have 64 byte size"); - static_assert(NumWordsInBatch == 8, "Batch size should be 8 words."); + static_assert(sizeof(_lastWords) == 128, "Lastwords should have 128 byte size"); + static_assert(NumWordsInBatch == 16, "Batch size should be 16 words."); memset(_lastWords, 0, sizeof(_lastWords)); } template<typename Update> bool -MultiBitVector<Update>::updateLastValue(uint32_t docId) noexcept +MultiBitVector<Update>::updateLastValueCold(uint32_t docId) noexcept { - if (docId >= _lastMaxDocIdLimit) { - if (__builtin_expect(isAtEnd(docId), false)) { - return true; - } - const uint32_t index(BitWord::wordNum(docId)); - if (docId >= _lastMaxDocIdLimitRequireFetch) { - uint32_t baseIndex = index & ~(NumWordsInBatch - 1); - _update(_accel, baseIndex*sizeof(Word), _bvs, _lastWords); - _lastMaxDocIdLimitRequireFetch = (baseIndex + NumWordsInBatch) * BitWord::WordLen; - } - _lastValue = _lastWords[index % NumWordsInBatch]; - _lastMaxDocIdLimit = (index + 1) * BitWord::WordLen; + if (__builtin_expect(isAtEnd(docId), false)) { + return true; + } + const uint32_t index(BitWord::wordNum(docId)); + if (docId >= _lastMaxDocIdLimitRequireFetch) { + fetchChunk(index); } + _lastValue = _lastWords[index % NumWordsInBatch]; + _lastMaxDocIdLimit = (index + 1) * BitWord::WordLen; return false; } template<typename Update> +void +MultiBitVector<Update>::fetchChunk(uint32_t index) noexcept +{ + uint32_t baseIndex = index & ~(NumWordsInBatch - 1); + _update(_accel, baseIndex*sizeof(Word), _bvs, _lastWords); + _lastMaxDocIdLimitRequireFetch = (baseIndex + NumWordsInBatch) * BitWord::WordLen; +} + +template<typename Update> uint32_t MultiBitVector<Update>::strictSeek(uint32_t docId) noexcept { bool atEnd; for (atEnd = updateLastValue(docId), _lastValue = _lastValue & BitWord::checkTab(docId); - (_lastValue == 0) && __builtin_expect(! atEnd, true); + __builtin_expect(_lastValue == 0, Update::isAnd()) && __builtin_expect(! atEnd, true); // And is likely to have few bits, while Or has many. atEnd = updateLastValue(_lastMaxDocIdLimit)); - if (__builtin_expect(!atEnd, true)) { - return _lastMaxDocIdLimit - BitWord::WordLen + vespalib::Optimized::lsbIdx(_lastValue); - } - return _numDocs; + return (__builtin_expect(!atEnd, true)) + ? _lastMaxDocIdLimit - BitWord::WordLen + vespalib::Optimized::lsbIdx(_lastValue) + : _numDocs; } template<typename Update> @@ -100,12 +103,8 @@ bool MultiBitVector<Update>::seek(uint32_t docId) noexcept { bool atEnd = updateLastValue(docId); - if (__builtin_expect( ! atEnd, true)) { - if (_lastValue & BitWord::mask(docId)) { - return true; - } - } - return false; + return __builtin_expect( ! atEnd, true) && + __builtin_expect(_lastValue & BitWord::mask(docId), false); } namespace { @@ -160,7 +159,7 @@ template<typename Update> void MultiBitVectorIterator<Update>::doSeek(uint32_t docId) { - if (_mbv.seek(docId)) { + if (_mbv.seek(docId)) [[unlikely]] { setDocId(docId); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h index 2b4f90544ac..0ecf9d85b92 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h @@ -37,12 +37,20 @@ public: bool seek(uint32_t docId) noexcept; bool acceptExtraFilter() const noexcept { return Update::isAnd(); } private: - bool updateLastValue(uint32_t docId) noexcept; + bool updateLastValue(uint32_t docId) noexcept { + if (docId >= _lastMaxDocIdLimit) { + return updateLastValueCold(docId); + } + return false; + } + VESPA_DLL_LOCAL bool updateLastValueCold(uint32_t docId) noexcept __attribute__((noinline)); + VESPA_DLL_LOCAL void fetchChunk(uint32_t docId) noexcept __attribute__((noinline)); + using IAccelrated = vespalib::hwaccelrated::IAccelrated; Update _update; const IAccelrated & _accel; - alignas(64) Word _lastWords[8]; + alignas(64) Word _lastWords[16]; static constexpr size_t NumWordsInBatch = sizeof(_lastWords) / sizeof(Word); }; diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index f0c75173671..500e9fe4dbb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -45,7 +45,7 @@ SameElementBlueprint::addTerm(Blueprint::UP term) } void -SameElementBlueprint::optimize_self(OptimizePass pass) +SameElementBlueprint::optimize_self(OptimizePass pass, bool) { if (pass == OptimizePass::LAST) { std::sort(_terms.begin(), _terms.end(), @@ -60,13 +60,11 @@ SameElementBlueprint::fetchPostings(const ExecuteInfo &execInfo) { if (_terms.empty()) return; _terms[0]->fetchPostings(execInfo); - double estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate(); - double hit_rate = execInfo.hit_rate() * estimate; + double hit_rate = execInfo.hit_rate() * _terms[0]->estimate(); for (size_t i = 1; i < _terms.size(); ++i) { Blueprint & term = *_terms[i]; term.fetchPostings(ExecuteInfo::create(false, hit_rate, execInfo)); - estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate(); - hit_rate = hit_rate * estimate; + hit_rate = hit_rate * _terms[i]->estimate(); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h index 6a988e67149..06c20339e81 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h @@ -34,7 +34,7 @@ public: // used by create visitor void addTerm(Blueprint::UP term); - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; void fetchPostings(const ExecuteInfo &execInfo) override; std::unique_ptr<SameElementSearch> create_same_element_search(search::fef::TermFieldMatchData& tfmd, bool strict) const; diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp index 828ca4be08d..f3028f5159a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "parallel_weak_and_search.h" -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/monitoring_dump_iterator.h> #include <vespa/searchlib/fef/matchdatalayout.h> #include <vespa/vespalib/objects/visit.h> @@ -243,7 +243,7 @@ ParallelWeakAndSearch::create(search::fef::TermFieldMatchData &tfmd, assert(childrenMatchData->getNumTermFields() == dict_entries.size()); wand::Terms terms; for (size_t i = 0; i < dict_entries.size(); ++i) { - terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), + terms.push_back(wand::Term(new DocidWithWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), weights[i], dict_entries[i].posting_size, childrenMatchData->resolveTermField(handles[i]))); diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index 1cecbca7660..0929f80a8f0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -2,7 +2,7 @@ #include "weighted_set_term_search.h" #include <vespa/searchlib/common/bitvector.h> -#include <vespa/searchlib/attribute/document_weight_or_filter_search.h> +#include <vespa/searchlib/attribute/multi_term_or_filter_search.h> #include <vespa/vespalib/objects/visit.h> #include <vespa/searchcommon/attribute/i_search_context.h> @@ -175,7 +175,7 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children, using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, SearchIteratorPack>; if (tmd.isNotNeeded()) { - return attribute::DocumentWeightOrFilterSearch::create(children, std::move(match_data)); + return attribute::MultiTermOrFilterSearch::create(children, std::move(match_data)); } if (children.size() < 128) { diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index a497a647ac6..a9ab86e2c5f 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -27,6 +27,9 @@ protected: WeightedSetTermSearch() = default; public: + static constexpr bool filter_search = false; + static constexpr bool require_btree_iterators = false; + // TODO: pass ownership with unique_ptr static SearchIterator::UP create(const std::vector<SearchIterator *> &children, search::fef::TermFieldMatchData &tmd, diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt index e7401d74c71..a4db57a44cd 100644 --- a/searchlib/src/vespa/searchlib/test/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt @@ -4,6 +4,8 @@ vespa_add_library(searchlib_test attribute_builder.cpp document_weight_attribute_helper.cpp doc_builder.cpp + ft_test_app.cpp + ft_test_app_base.cpp imported_attribute_fixture.cpp initrange.cpp make_attribute_map_lookup_node.cpp diff --git a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h index 059456c383b..10d64b0aa6d 100644 --- a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h +++ b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h @@ -6,7 +6,7 @@ #include <vespa/searchlib/attribute/multinumericattribute.h> #include <vespa/searchlib/attribute/multinumericpostattribute.hpp> #include <vespa/searchlib/attribute/attributefactory.h> -#include <vespa/vespalib/testkit/test_kit.h> +#include <cassert> namespace search::test { @@ -25,8 +25,8 @@ public: _int_attr(dynamic_cast<IntegerAttribute *>(_attr.get())), _dww(_attr->as_docid_with_weight_posting_store()) { - ASSERT_TRUE(_int_attr != nullptr); - ASSERT_TRUE(_dww != nullptr); + assert(_int_attr != nullptr); + assert(_dww != nullptr); } ~DocumentWeightAttributeHelper(); @@ -36,7 +36,7 @@ public: _attr->addDoc(docid); } _attr->commit(); - ASSERT_EQUAL((limit - 1), docid); + assert((limit - 1) == docid); } void set_doc(uint32_t docid, int64_t key, int32_t weight) { diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app.cpp b/searchlib/src/vespa/searchlib/test/ft_test_app.cpp new file mode 100644 index 00000000000..1d9d7c05d76 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/ft_test_app.cpp @@ -0,0 +1,5 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "ft_test_app.h" + +FtTestApp::~FtTestApp() = default; diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app.h b/searchlib/src/vespa/searchlib/test/ft_test_app.h new file mode 100644 index 00000000000..432d9d80e4d --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/ft_test_app.h @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "ft_test_app_base.h" +#include <vespa/vespalib/testkit/testapp.h> + +/* + * Test application used by feature unit tests. + */ +struct FtTestApp : public vespalib::TestApp, public FtTestAppBase { + ~FtTestApp() override; +}; diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app_base.cpp b/searchlib/src/vespa/searchlib/test/ft_test_app_base.cpp new file mode 100644 index 00000000000..eee5631dcc5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/ft_test_app_base.cpp @@ -0,0 +1,286 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "ft_test_app.h" +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/vespalib/util/stringfmt.h> + +#include <vespa/log/log.h> +LOG_SETUP(".ft_test_app_base"); + +namespace fieldmatch = search::features::fieldmatch; +using search::fef::test::DummyDependencyHandler; +using search::fef::FieldInfo; +using search::fef::FieldType; +using search::fef::Properties; +using search::fef::test::RankResult; + +void +FtTestAppBase::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList ¶ms) +{ + search::fef::test::IndexEnvironment ie; + FT_SETUP_FAIL(prototype, ie, params); +} + +void +FtTestAppBase::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms) +{ + FT_LOG(prototype, env, params); + search::fef::Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + EXPECT_TRUE(!bp->setup(env, params)); +} + +void +FtTestAppBase::FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList ¶ms, + const StringList &expectedIn, const StringList &expectedOut) +{ + search::fef::test::IndexEnvironment ie; + FT_SETUP_OK(prototype, ie, params, expectedIn, expectedOut); +} + +void +FtTestAppBase::FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms, const StringList &expectedIn, const StringList &expectedOut) +{ + FT_LOG(prototype, env, params); + search::fef::Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + ASSERT_TRUE(bp->setup(env, params)); + FT_EQUAL(expectedIn, deps.input, "In, "); + FT_EQUAL(expectedOut, deps.output, "Out,"); +} + +void +FtTestAppBase::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName) +{ + StringList empty; + FT_DUMP(factory, baseName, empty); +} + +void +FtTestAppBase::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env) +{ + StringList empty; + FT_DUMP(factory, baseName, env, empty); +} + +void +FtTestAppBase::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + const StringList &expected) +{ + search::fef::test::IndexEnvironment ie; + FT_DUMP(factory, baseName, ie, expected); +} + +void +FtTestAppBase::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env, + const StringList &expected) +{ + FtDumpFeatureVisitor dfv; + search::fef::Blueprint::SP bp = factory.createBlueprint(baseName); + if ( ! bp) { + LOG(error, "Blueprint '%s' does not exist in factory, did you forget to add it?", baseName.c_str()); + ASSERT_TRUE(bp); + } + bp->visitDumpFeatures(env, dfv); + FT_EQUAL(expected, dfv.features(), "Dump"); +} + +void +FtTestAppBase::FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual, + const vespalib::string &prefix) +{ + FT_LOG(prefix + " expected", expected); + FT_LOG(prefix + " actual ", actual); + EXPECT_EQUAL(expected.size(), actual.size()); + ASSERT_TRUE(expected.size() == actual.size()); + for (uint32_t i = 0; i < expected.size(); ++i) { + EXPECT_EQUAL(expected[i], actual[i]); + ASSERT_TRUE(expected[i] == actual[i]); + } +} + +void +FtTestAppBase::FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms) +{ + LOG(info, "Testing blueprint '%s'.", prototype.getBaseName().c_str()); + std::vector<vespalib::string> arr; + for (const auto & it : env.getFields()) { + arr.push_back(it.name()); + } + FT_LOG("Environment ", arr); + FT_LOG("Parameters ", params); +} + +void +FtTestAppBase::FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr) +{ + vespalib::string str = prefix + " = [ "; + for (uint32_t i = 0; i < arr.size(); ++i) { + str.append("'").append(arr[i]).append("'"); + if (i < arr.size() - 1) { + str.append(", "); + } + } + str.append(" ]"); + LOG(info, "%s", str.c_str()); +} + +void +FtTestAppBase::FT_SETUP(FtFeatureTest &test, const vespalib::string &query, const StringMap &index, + uint32_t docId) +{ + LOG(info, "Setup test for query '%s'.", query.c_str()); + + // Add all query terms. + FtQueryEnvironment &queryEnv = test.getQueryEnv(); + for (uint32_t i = 0; i < query.size(); ++i) { + queryEnv.getBuilder().addAllFields(); + } + ASSERT_TRUE(test.setup()); + + // Add all occurences. + search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); + for (auto it = index.begin();it != index.end(); ++it) { + ASSERT_TRUE(mdb->setFieldLength(it->first, it->second.size())); + for (uint32_t i = 0; i < it->second.size(); ++i) { + size_t pos = query.find_first_of(it->second[i]); + if (pos != vespalib::string::npos) { + LOG(debug, "Occurence of '%c' added to field '%s' at position %d.", query[pos], it->first.c_str(), i); + ASSERT_TRUE(mdb->addOccurence(it->first, pos, i)); + } + } + } + ASSERT_TRUE(mdb->apply(docId)); +} + +void +FtTestAppBase::FT_SETUP(FtFeatureTest & test, const std::vector<FtQueryTerm> & query, const StringVectorMap & index, + uint32_t docId) +{ + setupQueryEnv(test.getQueryEnv(), query); + ASSERT_TRUE(test.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); + + // Add all occurences. + for (auto itr = index.begin(); itr != index.end(); ++itr) { + ASSERT_TRUE(mdb->setFieldLength(itr->first, itr->second.size())); + for (uint32_t i = 0; i < itr->second.size(); ++i) { + auto fitr = query.begin(); + for (;;) { + fitr = std::find(fitr, query.end(), FtQueryTerm(itr->second[i])); + if (fitr != query.end()) { + uint32_t termId = fitr - query.begin(); + LOG(debug, "Occurence of '%s' added to field '%s' at position %u.", fitr->term.c_str(), itr->first.c_str(), i); + ASSERT_TRUE(mdb->addOccurence(itr->first, termId, i)); + ++fitr; + } else { + break; + } + } + } + } + ASSERT_TRUE(mdb->apply(docId)); +} + +void +FtTestAppBase::FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId) +{ + setupQueryEnv(test.getQueryEnv(), query); + ASSERT_TRUE(test.setup()); + search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); + + // Add all occurences. + for (auto itr = index.index.begin(); itr != index.index.end(); ++itr) { + const FtIndex::Field &field = itr->second; + for (size_t e = 0; e < field.size(); ++e) { + const FtIndex::Element &element = field[e]; + ASSERT_TRUE(mdb->addElement(itr->first, element.weight, element.tokens.size())); + for (size_t t = 0; t < element.tokens.size(); ++t) { + const vespalib::string &token = element.tokens[t]; + for (size_t q = 0; q < query.size(); ++q) { + if (query[q].term == token) { + ASSERT_TRUE(mdb->addOccurence(itr->first, q, t, e)); + } + } + } + } + } + ASSERT_TRUE(mdb->apply(docId)); +} + +void +FtTestAppBase::setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query) +{ + // Add all query terms. + for (uint32_t i = 0; i < query.size(); ++i) { + queryEnv.getBuilder().addAllFields(); + queryEnv.getTerms()[i].setPhraseLength(1); + queryEnv.getTerms()[i].setUniqueId(i); + queryEnv.getTerms()[i].setWeight(query[i].termWeight); + if (i > 0) { + vespalib::string from = vespalib::make_string("vespa.term.%u.connexity", i); + vespalib::string to = vespalib::make_string("%u", i - 1); + vespalib::string connexity = vespalib::make_string("%f", query[i].connexity); + queryEnv.getProperties().add(from, to); + queryEnv.getProperties().add(from, connexity); + } + vespalib::string term = vespalib::make_string("vespa.term.%u.significance", i); + vespalib::string significance = vespalib::make_string("%f", query[i].significance); + queryEnv.getProperties().add(term, significance); + LOG(debug, "Add term node: '%s'", query[i].term.c_str()); + } +} + +void +FtTestAppBase::setupFieldMatch(FtFeatureTest & ft, const vespalib::string & indexName, + const vespalib::string & query, const vespalib::string & field, + const fieldmatch::Params * params, uint32_t totalTermWeight, feature_t totalSignificance, + uint32_t docId) +{ + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, FieldInfo::CollectionType::SINGLE, indexName); + + if (params != nullptr) { + Properties & p = ft.getIndexEnv().getProperties(); + p.add("fieldMatch(" + indexName + ").proximityLimit", vespalib::make_string("%u", params->getProximityLimit())); + p.add("fieldMatch(" + indexName + ").maxAlternativeSegmentations", vespalib::make_string("%u", params->getMaxAlternativeSegmentations())); + p.add("fieldMatch(" + indexName + ").maxOccurrences", vespalib::make_string("%u", params->getMaxOccurrences())); + p.add("fieldMatch(" + indexName + ").proximityCompletenessImportance", vespalib::make_string("%f", params->getProximityCompletenessImportance())); + p.add("fieldMatch(" + indexName + ").relatednessImportance", vespalib::make_string("%f", params->getRelatednessImportance())); + p.add("fieldMatch(" + indexName + ").earlinessImportance", vespalib::make_string("%f", params->getEarlinessImportance())); + p.add("fieldMatch(" + indexName + ").segmentProximityImportance", vespalib::make_string("%f", params->getSegmentProximityImportance())); + p.add("fieldMatch(" + indexName + ").occurrenceImportance", vespalib::make_string("%f", params->getOccurrenceImportance())); + p.add("fieldMatch(" + indexName + ").fieldCompletenessImportance", vespalib::make_string("%f", params->getFieldCompletenessImportance())); + for (double it : params->getProximityTable()) { + p.add("fieldMatch(" + indexName + ").proximityTable", vespalib::make_string("%f", it)); + } + } + + if (totalTermWeight > 0) { + ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermWeight", + vespalib::make_string("%u", totalTermWeight)); + } + + if (totalSignificance > 0.0f) { + ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermSignificance", + vespalib::make_string("%f", totalSignificance)); + } + + std::map<vespalib::string, std::vector<vespalib::string> > index; + index[indexName] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, docId); +} + + +RankResult +FtTestAppBase::toRankResult(const vespalib::string & baseName, + const vespalib::string & result, + const vespalib::string & separator) +{ + return FtUtil::toRankResult(baseName, result, separator); +} diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app_base.h b/searchlib/src/vespa/searchlib/test/ft_test_app_base.h new file mode 100644 index 00000000000..329d93e4c47 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/ft_test_app_base.h @@ -0,0 +1,61 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/features/fieldmatch/params.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/vespalib/testkit/test_macros.h> + +/* + * Base class for test application used by feature unit tests. + */ +struct FtTestAppBase { + using string = vespalib::string; + static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList ¶ms); + static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms); + static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList ¶ms, + const StringList &expectedIn, const StringList &expectedOut); + static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms, const StringList &expectedIn, const StringList &expectedOut); + + static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName); + static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env); + static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + const StringList &expected); + static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env, + const StringList &expected); + + static void FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual, + const vespalib::string & prefix = ""); + + static void FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, const StringList ¶ms); + static void FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr); + + + static void FT_SETUP(FtFeatureTest & test, const vespalib::string & query, const StringMap & index, uint32_t docId); + static void FT_SETUP(FtFeatureTest & test, const FtQuery & query, const StringVectorMap & index, uint32_t docId); + + static void FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId); + + static void setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query); + static void setupFieldMatch(FtFeatureTest & test, const vespalib::string & indexName, + const vespalib::string & query, const vespalib::string & field, + const search::features::fieldmatch::Params * params, + uint32_t totalTermWeight, feature_t totalSignificance, + uint32_t docId); + + static search::fef::test::RankResult toRankResult(const vespalib::string & baseName, + const vespalib::string & result, + const vespalib::string & separator = " "); + + template <typename T> + static bool assertCreateInstance(const T & prototype, const vespalib::string & baseName) { + search::fef::Blueprint::UP bp = prototype.createInstance(); + if (!EXPECT_TRUE(dynamic_cast<T*>(bp.get()) != NULL)) return false; + if (!EXPECT_EQUAL(bp->getBaseName(), baseName)) return false; + return true; + } +}; |