diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-20 22:27:30 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-20 22:27:30 +0100 |
commit | fc4d21db12894fa8056f03c091b7cbc756da3d6f (patch) | |
tree | e64b166dc7bdec39157d23d5c049b622c63ab92d /searchlib | |
parent | 65be04820cda0d06b8d2c7f7212f814a5ef434f3 (diff) | |
parent | 57595d4ee49e13ad5183361b0c6e4c8a250c6d3f (diff) |
Merge branch 'master' into balder/gc-unused-feature-flags
Diffstat (limited to 'searchlib')
35 files changed, 267 insertions, 164 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index c9e19b76cef..5628db99171 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -77,7 +77,7 @@ vespa_define_module( src/tests/attribute/compaction src/tests/attribute/dfa_fuzzy_matcher src/tests/attribute/direct_multi_term_blueprint - src/tests/attribute/document_weight_iterator + src/tests/attribute/direct_posting_store src/tests/attribute/enum_attribute_compaction src/tests/attribute/enum_comparator src/tests/attribute/enumeratedsave diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java index fce0485f41a..60617687f44 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java @@ -7,6 +7,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -15,21 +16,21 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.List; import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; /** * @author Simon Thoresen Hult */ abstract class XmlHelper { - - private static final Charset UTF8 = Charset.forName("UTF-8"); - public static Element parseXml(String xml) throws ParserConfigurationException, IOException, SAXException { - return parseXmlStream(new ByteArrayInputStream(xml.getBytes(UTF8))); + return parseXmlStream(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8))); } public static Element parseXmlFile(String fileName) @@ -41,22 +42,27 @@ abstract class XmlHelper { public static Element parseXmlStream(InputStream in) throws ParserConfigurationException, IOException, SAXException { - DocumentBuilderFactory factory = createDocumentBuilderFactory(); - DocumentBuilder builder = factory.newDocumentBuilder(); + DocumentBuilder builder = createDocumentBuilderFactory().newDocumentBuilder(); Document doc = builder.parse(in); return doc.getDocumentElement(); } - private static DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setXIncludeAware(false); + private static DocumentBuilderFactory createDocumentBuilderFactory() { + try { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + factory.setXIncludeAware(false); + factory.setExpandEntityReferences(false); + factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - // XXE prevention - factory.setFeature("http://xml.org/sax/features/external-general-entities", false); - factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); - factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - return factory; + // XXE prevention + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + return factory; + } catch (ParserConfigurationException e) { + throw new RuntimeException("Failed to initialize XML parser", e); + } } public static String getAttributeText(Node node, String name) { diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index 181c0fdf110..f612bdda87f 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -7,7 +7,7 @@ #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/parsequery/parse.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/executeinfo.h> #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/searchlib/util/randomgenerator.h> @@ -432,7 +432,7 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre const auto* dww = v->as_docid_with_weight_posting_store(); if (dww != nullptr) { auto lres = dww->lookup(getSearchStr<VectorType>(), dww->get_dictionary_snapshot()); - using DWSI = search::queryeval::DocumentWeightSearchIterator; + using DWSI = search::queryeval::DocidWithWeightSearchIterator; TermFieldMatchData md; auto dwsi = std::make_unique<DWSI>(md, *dww, lres); if (!filter) { diff --git a/searchlib/src/tests/attribute/direct_posting_store/.gitignore b/searchlib/src/tests/attribute/direct_posting_store/.gitignore new file mode 100644 index 00000000000..5516bc721c7 --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/.gitignore @@ -0,0 +1 @@ +searchlib_direct_posting_store_test_app diff --git a/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt new file mode 100644 index 00000000000..0cb45e6b18f --- /dev/null +++ b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_direct_posting_store_test_app TEST + SOURCES + direct_posting_store_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_direct_posting_store_test_app COMMAND searchlib_direct_posting_store_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp index 28416d09d6f..d6b0ff2a4b6 100644 --- a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp +++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp @@ -8,14 +8,14 @@ #include <vespa/searchlib/attribute/attributememorysavetarget.h> #include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/searchlib/util/randomgenerator.h> #include <vespa/vespalib/test/insertion_operators.h> #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/log/log.h> -LOG_SETUP("document_weight_iterator_test"); +LOG_SETUP("direct_posting_store_test"); using namespace search; using namespace search::attribute; @@ -80,12 +80,12 @@ struct StringFixture { } }; -TEST("require that appropriate attributes support the document weight attribute interface") { +TEST("require that appropriate attributes support the IDocidWithWeightPostingStore interface") { EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr); EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr); } -TEST("require that inappropriate attributes do not support the document weight attribute interface") { +TEST("require that inappropriate attributes do not support the IDocidWithWeightPostingStore interface") { EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->as_docid_with_weight_posting_store() == nullptr); EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->as_docid_with_weight_posting_store() == nullptr); EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->as_docid_with_weight_posting_store() == nullptr); @@ -199,7 +199,7 @@ public: ASSERT_TRUE(api != nullptr); auto dict_entry = api->lookup("123", api->get_dictionary_snapshot()); ASSERT_TRUE(dict_entry.posting_idx.valid()); - return std::make_unique<queryeval::DocumentWeightSearchIterator>(_tfmd, *api, dict_entry); + return std::make_unique<queryeval::DocidWithWeightSearchIterator>(_tfmd, *api, dict_entry); } private: mutable fef::TermFieldMatchData _tfmd; diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore deleted file mode 100644 index 08cae9a48df..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore +++ /dev/null @@ -1 +0,0 @@ -searchlib_document_weight_iterator_test_app diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt deleted file mode 100644 index 4cb480068e3..00000000000 --- a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_document_weight_iterator_test_app TEST - SOURCES - document_weight_iterator_test.cpp - DEPENDS - searchlib - searchlib_test -) -vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app) diff --git a/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp index dea2702ef0d..552a128c518 100644 --- a/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp @@ -3,6 +3,7 @@ #include <vespa/searchlib/attribute/i_direct_posting_store.h> #include <vespa/searchlib/attribute/multi_term_or_filter_search.h> #include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/vespalib/gtest/gtest.h> #define ENABLE_GTEST_MIGRATION @@ -11,13 +12,16 @@ using PostingList = search::attribute::PostingListTraits<int32_t>::PostingStoreBase; using Iterator = search::attribute::PostingListTraits<int32_t>::const_iterator; using KeyData = PostingList::KeyDataType; + using search::BitVector; using search::attribute::MultiTermOrFilterSearch; +using search::fef::TermFieldMatchData; using search::queryeval::SearchIterator; using vespalib::datastore::EntryRef; class MultiTermOrFilterSearchTest : public ::testing::Test { PostingList _postings; + mutable TermFieldMatchData _tfmd; vespalib::GenerationHandler _gens; std::vector<EntryRef> _trees; uint32_t _range_start; @@ -62,7 +66,7 @@ public: for (size_t i = 0; i < num_trees(); ++i) { iterators.emplace_back(get_tree(i)); } - auto result = MultiTermOrFilterSearch::create(std::move(iterators)); + auto result = MultiTermOrFilterSearch::create(std::move(iterators), _tfmd); result->initRange(_range_start, _range_end); return result; }; @@ -73,6 +77,8 @@ public: while (doc_id < _range_end) { if (iterator.seek(doc_id)) { result.emplace_back(doc_id); + iterator.unpack(doc_id); + EXPECT_EQ(doc_id, _tfmd.getDocId()); ++doc_id; } else { doc_id = std::max(doc_id + 1, iterator.getDocId()); diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 8831bd1ec75..ecc03ac54c5 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -488,11 +488,11 @@ TEST("require that direct attribute iterators work") { EXPECT_TRUE(result.has_minmax); EXPECT_EQUAL(100, result.min_weight); EXPECT_EQUAL(1000, result.max_weight); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") != vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") != vespalib::string::npos); } else { EXPECT_EQUAL(num_docs, result.est_hits); EXPECT_FALSE(result.has_minmax); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos); } ASSERT_EQUAL(3u, result.hits.size()); EXPECT_FALSE(result.est_empty); @@ -513,7 +513,7 @@ TEST("require that single weighted set turns filter on filter fields") { SimpleStringTerm node("foo", "", 0, Weight(1)); Result result = do_search(attribute_manager, node, strict); EXPECT_EQUAL(3u, result.est_hits); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos); EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") != vespalib::string::npos); ASSERT_EQUAL(3u, result.hits.size()); EXPECT_FALSE(result.est_empty); diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index eefca5c82e9..f800e124bdc 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "mysearch.h" #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/flow.h> #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/vespalib/objects/objectdumper.h> @@ -22,8 +23,12 @@ class MyOr : public IntermediateBlueprint { private: public: - double calculate_cost() const final { return 1.0; } - double calculate_relative_estimate() const final { return 0.5; } + double calculate_cost() const final { + return cost_of(get_children(), OrFlow()); + } + double calculate_relative_estimate() const final { + return estimate_of(get_children(), OrFlow()); + } HitEstimate combine(const std::vector<HitEstimate> &data) const override { return max(data); } @@ -32,7 +37,7 @@ public: return mixChildrenFields(); } - void sort(Children &children) const override { + void sort(Children &children, bool) const override { std::sort(children.begin(), children.end(), TieredGreaterEstimate()); } @@ -440,7 +445,8 @@ TEST_F("testChildAndNotCollapsing", Fixture) ) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -479,7 +485,8 @@ TEST_F("testChildAndCollapsing", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -517,7 +524,8 @@ TEST_F("testChildOrCollapsing", Fixture) .add(MyLeafSpec(1).addField(2, 42).create()) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } @@ -560,7 +568,8 @@ TEST_F("testChildSorting", Fixture) ); TEST_DO(f.check_not_equal(*sorted, *unsorted)); - unsorted = Blueprint::optimize(std::move(unsorted)); + unsorted->setDocIdLimit(1000); + unsorted = Blueprint::optimize(std::move(unsorted), true); TEST_DO(f.check_equal(*sorted, *unsorted)); } diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 234ff5a9d19..ab1c004c721 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -15,6 +15,7 @@ #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/vespalib/util/overload.h> +#include <vespa/vespalib/util/approx.h> #include <vespa/vespalib/data/simple_buffer.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/data/slime/inserter.h> @@ -75,7 +76,8 @@ void check_sort_order(IntermediateBlueprint &self, BlueprintVector children, std for (const auto & child: children) { unordered.push_back(child.get()); } - self.sort(children); + // TODO: sort by cost (requires both setDocIdLimit and optimize to be called) + self.sort(children, false); for (size_t i = 0; i < children.size(); ++i) { EXPECT_EQUAL(children[i].get(), unordered[order[i]]); } @@ -129,7 +131,7 @@ TEST("test AndNot Blueprint") { template <typename BP> void optimize(std::unique_ptr<BP> &ref) { - auto optimized = Blueprint::optimize(std::move(ref)); + auto optimized = Blueprint::optimize(std::move(ref), true); ref.reset(dynamic_cast<BP*>(optimized.get())); ASSERT_TRUE(ref); optimized.release(); @@ -141,8 +143,8 @@ TEST("test And propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); - optimize(bp); bp->setDocIdLimit(5000); + optimize(bp); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(3u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -161,8 +163,8 @@ TEST("test Or propagates updated histestimate") { bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2))); bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2))); - optimize(bp); bp->setDocIdLimit(5000); + optimize(bp); bp->fetchPostings(ExecuteInfo::TRUE); EXPECT_EQUAL(4u, bp->childCnt()); for (uint32_t i = 0; i < bp->childCnt(); i++) { @@ -514,36 +516,45 @@ vespalib::string to_str(const Inspector &value) { } void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) { - auto ignore_cost = [expect_eq](const auto &path, const auto &a, const auto &b) { - if (!path.empty() && std::holds_alternative<vespalib::stringref>(path.back())) { - vespalib::stringref field = std::get<vespalib::stringref>(path.back()); - if (field == "cost") { - return true; - } - } - if (expect_eq) { - fprintf(stderr, " mismatch at %s: %s vs %s\n", path_to_str(path).c_str(), - to_str(a).c_str(), to_str(b).c_str()); - } - return false; - }; + auto cmp_hook = [expect_eq](const auto &path, const auto &a, const auto &b) { + if (!path.empty() && std::holds_alternative<vespalib::stringref>(path.back())) { + vespalib::stringref field = std::get<vespalib::stringref>(path.back()); + if (field == "cost") { + return true; + } + if (field == "relative_estimate") { + double a_val = a.asDouble(); + double b_val = b.asDouble(); + if (a_val != 0.0 && b_val != 0.0 && vespalib::approx_equal(a_val, b_val)) { + return true; + } + } + } + if (expect_eq) { + fprintf(stderr, " mismatch at %s: %s vs %s\n", path_to_str(path).c_str(), + to_str(a).c_str(), to_str(b).c_str()); + } + return false; + }; Slime a; Slime b; bp1.asSlime(SlimeInserter(a)); bp2.asSlime(SlimeInserter(b)); if (expect_eq) { - EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), ignore_cost)); + EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); } else { - EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), ignore_cost)); + EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook)); } } void -optimize_and_compare(Blueprint::UP top, Blueprint::UP expect) { +optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool sort_by_cost = true) { + top->setDocIdLimit(1000); + expect->setDocIdLimit(1000); TEST_DO(compare(*top, *expect, false)); - top = Blueprint::optimize(std::move(top)); + top = Blueprint::optimize(std::move(top), sort_by_cost); TEST_DO(compare(*top, *expect, true)); - expect = Blueprint::optimize(std::move(expect)); + expect = Blueprint::optimize(std::move(expect), sort_by_cost); TEST_DO(compare(*expect, *top, true)); } @@ -670,11 +681,11 @@ TEST("test empty root node optimization and safeness") { //------------------------------------------------------------------------- auto expect_up = std::make_unique<EmptyBlueprint>(); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4))->asString()); - EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5))->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4), true)->asString()); + EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5), true)->asString()); } TEST("and with one empty child is optimized away") { @@ -682,7 +693,7 @@ TEST("and with one empty child is optimized away") { Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20}))); - top = Blueprint::optimize(std::move(top)); + top = Blueprint::optimize(std::move(top), true); Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))-> addChild(ap(MyLeafSpec(10).create())). addChild(std::make_unique<EmptyBlueprint>()))); @@ -857,8 +868,8 @@ TEST("require that replaced blueprints retain source id") { addChild(ap(MyLeafSpec(30).create()->setSourceId(55))))); Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42))); //------------------------------------------------------------------------- - top1_up = Blueprint::optimize(std::move(top1_up)); - top2_up = Blueprint::optimize(std::move(top2_up)); + top1_up = Blueprint::optimize(std::move(top1_up), true); + top2_up = Blueprint::optimize(std::move(top2_up), true); EXPECT_EQUAL(expect1_up->asString(), top1_up->asString()); EXPECT_EQUAL(expect2_up->asString(), top2_up->asString()); EXPECT_EQUAL(13u, top1_up->getSourceId()); @@ -1177,7 +1188,7 @@ TEST("require that children of near are not optimized") { auto expect_up = ap((new NearBlueprint(10))-> addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up)); + top_up = Blueprint::optimize(std::move(top_up), true); TEST_DO(compare(*top_up, *expect_up, true)); } @@ -1188,27 +1199,27 @@ TEST("require that children of onear are not optimized") { auto expect_up = ap((new ONearBlueprint(10))-> addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})). addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30}))); - top_up = Blueprint::optimize(std::move(top_up)); + top_up = Blueprint::optimize(std::move(top_up), true); TEST_DO(compare(*top_up, *expect_up, true)); } TEST("require that ANDNOT without children is optimized to empty search") { Blueprint::UP top_up = std::make_unique<AndNotBlueprint>(); auto expect_up = std::make_unique<EmptyBlueprint>(); - top_up = Blueprint::optimize(std::move(top_up)); + top_up = Blueprint::optimize(std::move(top_up), true); EXPECT_EQUAL(expect_up->asString(), top_up->asString()); } TEST("require that highest cost tier sorts last for OR") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {30, 3}, {20, 2}, {10, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {10, 1}, {20, 2}, {30, 3}}); - optimize_and_compare(std::move(top), std::move(expect)); + optimize_and_compare(std::move(top), std::move(expect), false); } TEST("require that highest cost tier sorts last for AND") { Blueprint::UP top = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {20, 3}, {30, 2}, {50, 1}}); Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {50, 1}, {30, 2}, {20, 3}}); - optimize_and_compare(std::move(top), std::move(expect)); + optimize_and_compare(std::move(top), std::move(expect), false); } template<typename BP> @@ -1325,7 +1336,7 @@ void verify_cost(make &&mk, double expect) { .cost(1.2).leaf(300) .cost(1.3).leaf(500); bp->setDocIdLimit(1000); - bp = Blueprint::optimize(std::move(bp)); + bp = Blueprint::optimize(std::move(bp), true); EXPECT_EQUAL(bp->cost(), expect); } diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index f910ff5be1b..1180206279d 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -48,7 +48,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) { // inherit Blueprint to capture the default filter factory struct DefaultBlueprint : Blueprint { double calculate_relative_estimate() const override { abort(); } - void optimize(Blueprint* &, OptimizePass) override { abort(); } + void optimize(Blueprint* &, OptimizePass, bool) override { abort(); } const State &getState() const override { abort(); } void fetchPostings(const ExecuteInfo &) override { abort(); } void freeze() override { abort(); } diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp index fa12b453d8c..0e27c77feae 100644 --- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -1,6 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchlib/query/tree/simplequery.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/fake_requestcontext.h> #include <vespa/searchlib/queryeval/fake_searchable.h> #include <vespa/searchlib/queryeval/simpleresult.h> @@ -661,7 +661,7 @@ SearchIterator::UP create_wand(bool use_dww, assert(childrenMatchData->getNumTermFields() == dict_entries.size()); wand::Terms terms; for (size_t i = 0; i < dict_entries.size(); ++i) { - terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), + terms.push_back(wand::Term(new DocidWithWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), weights[i], dict_entries[i].posting_size, childrenMatchData->resolveTermField(handles[i]))); diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp index d05e6c8e4f4..7c535e5d3d5 100644 --- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp +++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp @@ -46,7 +46,7 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul } Blueprint::UP finalize(Blueprint::UP bp, bool strict) { - Blueprint::UP result = Blueprint::optimize(std::move(bp)); + Blueprint::UP result = Blueprint::optimize(std::move(bp), true); result->fetchPostings(ExecuteInfo::createForTest(strict)); result->freeze(); return result; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index b9adcf3b093..037285fedf0 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -17,7 +17,7 @@ #include <vespa/searchlib/query/tree/stackdumpcreator.h> #include <vespa/searchlib/queryeval/andsearchstrict.h> #include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h> -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/dot_product_blueprint.h> #include <vespa/searchlib/queryeval/dot_product_search.h> #include <vespa/searchlib/queryeval/emptysearch.h> @@ -527,7 +527,7 @@ public: } } if (_attr.has_btree_iterator(_dict_entry.posting_idx)) { - return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); + return std::make_unique<queryeval::DocidWithWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); } else { return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict); } diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp index 19668522e17..214851e9681 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp @@ -15,14 +15,19 @@ template<typename IteratorPack> class MultiTermOrFilterSearchImpl : public MultiTermOrFilterSearch { IteratorPack _children; + fef::TermFieldMatchData* _tfmd; void seek_all(uint32_t docId); public: - explicit MultiTermOrFilterSearchImpl(IteratorPack&& children); + explicit MultiTermOrFilterSearchImpl(IteratorPack&& children, fef::TermFieldMatchData* tfmd); ~MultiTermOrFilterSearchImpl() override; void doSeek(uint32_t docId) override; - void doUnpack(uint32_t) override { } + void doUnpack(uint32_t docid) override { + if (_tfmd != nullptr) { + _tfmd->resetOnlyDocId(docid); + } + } void initRange(uint32_t begin, uint32_t end) override { SearchIterator::initRange(begin, end); @@ -46,9 +51,10 @@ public: }; template<typename IteratorPack> -MultiTermOrFilterSearchImpl<IteratorPack>::MultiTermOrFilterSearchImpl(IteratorPack&& children) +MultiTermOrFilterSearchImpl<IteratorPack>::MultiTermOrFilterSearchImpl(IteratorPack&& children, fef::TermFieldMatchData* tfmd) : MultiTermOrFilterSearch(), - _children(std::move(children)) + _children(std::move(children)), + _tfmd(tfmd) { } @@ -89,7 +95,7 @@ namespace { template <typename IteratorType, typename IteratorPackType> std::unique_ptr<queryeval::SearchIterator> -create_helper(std::vector<IteratorType>&& children) +create_helper(std::vector<IteratorType>&& children, fef::TermFieldMatchData* tfmd) { if (children.empty()) { return std::make_unique<queryeval::EmptySearch>(); @@ -97,7 +103,7 @@ create_helper(std::vector<IteratorType>&& children) std::sort(children.begin(), children.end(), [](const auto & a, const auto & b) { return a.size() > b.size(); }); using OrFilter = MultiTermOrFilterSearchImpl<IteratorPackType>; - return std::make_unique<OrFilter>(IteratorPackType(std::move(children))); + return std::make_unique<OrFilter>(IteratorPackType(std::move(children)), tfmd); } } @@ -106,13 +112,25 @@ create_helper(std::vector<IteratorType>&& children) std::unique_ptr<queryeval::SearchIterator> MultiTermOrFilterSearch::create(std::vector<DocidIterator>&& children) { - return create_helper<DocidIterator, DocidIteratorPack>(std::move(children)); + return create_helper<DocidIterator, DocidIteratorPack>(std::move(children), nullptr); +} + +std::unique_ptr<queryeval::SearchIterator> +MultiTermOrFilterSearch::create(std::vector<DocidIterator>&& children, fef::TermFieldMatchData& tfmd) +{ + return create_helper<DocidIterator, DocidIteratorPack>(std::move(children), &tfmd); } std::unique_ptr<queryeval::SearchIterator> MultiTermOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children) { - return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children)); + return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children), nullptr); +} + +std::unique_ptr<queryeval::SearchIterator> +MultiTermOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children, fef::TermFieldMatchData& tfmd) +{ + return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children), &tfmd); } std::unique_ptr<queryeval::SearchIterator> @@ -123,7 +141,7 @@ MultiTermOrFilterSearch::create(const std::vector<SearchIterator *>& children, return std::make_unique<queryeval::EmptySearch>(); } else { using OrFilter = MultiTermOrFilterSearchImpl<SearchIteratorPack>; - return std::make_unique<OrFilter>(SearchIteratorPack(children, std::move(md))); + return std::make_unique<OrFilter>(SearchIteratorPack(children, std::move(md)), nullptr); } } diff --git a/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h index 42eb33d2eed..1e8227c3007 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h @@ -18,7 +18,9 @@ protected: MultiTermOrFilterSearch() = default; public: static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children); + static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children, fef::TermFieldMatchData& tfmd); static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children); + static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children, fef::TermFieldMatchData& tfmd); static std::unique_ptr<SearchIterator> create(const std::vector<SearchIterator *>& children, std::unique_ptr<fef::MatchData> md); }; diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index b79703a8e5c..a75066a67a9 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -39,7 +39,7 @@ BitVector::allocatePaddedAndAligned(Index start, Index end, Index capacity, cons { assert(capacity >= end); uint32_t words = numActiveWords(start, capacity); - words += (-words & 15); // Pad to 64 byte alignment + words += (-words & 15); // Pad to 128 byte alignment const size_t sz(words * sizeof(Word)); Alloc alloc = (init_alloc != nullptr) ? init_alloc->create(sz) : Alloc::alloc(sz, MMAP_LIMIT); assert(alloc.size()/sizeof(Word) >= words); diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 9c986d0bc63..4637ad5a4e8 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -455,6 +455,12 @@ FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm return vespalib::fuzzy_matching_algorithm_from_string(value, default_value); } +const vespalib::string SortBlueprintsByCost::NAME("vespa.matching.sort_blueprints_by_cost"); +const bool SortBlueprintsByCost::DEFAULT_VALUE(false); +bool SortBlueprintsByCost::check(const Properties &props, bool fallback) { + return lookupBool(props, NAME, fallback); +} + const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive"); const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false); bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 1921f52276f..db8de8209a9 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -336,6 +336,15 @@ namespace matching { static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props); static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value); }; + /** + * Sort blueprints based on relative cost estimate rather than est_hits + **/ + struct SortBlueprintsByCost { + static const vespalib::string NAME; + static const bool DEFAULT_VALUE; + static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); } + static bool check(const Properties &props, bool fallback); + }; /** * When enabled, the unpacking part of the phrase iterator will be tagged as expensive diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index d6b0b900516..aadc5300ede 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -56,6 +56,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _dumpFeatures(), _warnings(), _feature_rename_map(), + _sort_blueprints_by_cost(false), _ignoreDefaultRankFeatures(false), _compiled(false), _compileError(false), @@ -134,6 +135,7 @@ RankSetup::configure() _mutateOnSummary._attribute = mutate::on_summary::Attribute::lookup(_indexEnv.getProperties()); _mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties()); _mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties()); + _sort_blueprints_by_cost = matching::SortBlueprintsByCost::check(_indexEnv.getProperties()); _always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties()); } diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index d744b38cc6e..d8b977a0331 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -65,6 +65,7 @@ private: std::vector<vespalib::string> _dumpFeatures; Warnings _warnings; StringStringMap _feature_rename_map; + bool _sort_blueprints_by_cost; bool _ignoreDefaultRankFeatures; bool _compiled; bool _compileError; @@ -459,6 +460,7 @@ public: const MutateOperation & getMutateOnSummary() const { return _mutateOnSummary; } bool allowMutateQueryOverride() const { return _mutateAllowQueryOverride; } + bool sort_blueprints_by_cost() const noexcept { return _sort_blueprints_by_cost; } }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt index 5e6d31d3761..51fe2d12637 100644 --- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt @@ -7,7 +7,7 @@ vespa_add_library(searchlib_queryeval OBJECT booleanmatchiteratorwrapper.cpp children_iterators.cpp create_blueprint_visitor_helper.cpp - document_weight_search_iterator.cpp + docid_with_weight_search_iterator.cpp dot_product_blueprint.cpp dot_product_search.cpp elementiterator.cpp diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index d2b4661d9c3..6ca072d6dc7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -130,15 +130,15 @@ Blueprint::Blueprint() noexcept Blueprint::~Blueprint() = default; Blueprint::UP -Blueprint::optimize(Blueprint::UP bp) { +Blueprint::optimize(Blueprint::UP bp, bool sort_by_cost) { Blueprint *root = bp.release(); - root->optimize(root, OptimizePass::FIRST); - root->optimize(root, OptimizePass::LAST); + root->optimize(root, OptimizePass::FIRST, sort_by_cost); + root->optimize(root, OptimizePass::LAST, sort_by_cost); return Blueprint::UP(root); } void -Blueprint::optimize_self(OptimizePass) +Blueprint::optimize_self(OptimizePass, bool) { } @@ -548,19 +548,19 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double } void -IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass) +IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) { assert(self == this); if (should_optimize_children()) { for (auto &child : _children) { auto *child_ptr = child.release(); - child_ptr->optimize(child_ptr, pass); + child_ptr->optimize(child_ptr, pass, sort_by_cost); child.reset(child_ptr); } } - optimize_self(pass); + optimize_self(pass, sort_by_cost); if (pass == OptimizePass::LAST) { - sort(_children); + sort(_children, sort_by_cost); set_cost(calculate_cost()); } maybe_eliminate_self(self, get_replacement()); @@ -758,10 +758,10 @@ LeafBlueprint::getRange(vespalib::string &, vespalib::string &) const { } void -LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass) +LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) { assert(self == this); - optimize_self(pass); + optimize_self(pass, sort_by_cost); maybe_eliminate_self(self, get_replacement()); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 0dbd7b618a7..a78dd092f5a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -172,6 +172,20 @@ public: // lower limit for docid_limit: max child estimate static HitEstimate sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit); + // sort children to minimize total cost of OR flow + struct MinimalOrCost { + bool operator () (const auto &a, const auto &b) const noexcept { + return a->estimate() / a->cost() > b->estimate() / b->cost(); + } + }; + + // sort children to minimize total cost of AND flow + struct MinimalAndCost { + bool operator () (const auto &a, const auto &b) const noexcept { + return (1.0 - a->estimate()) / a->cost() > (1.0 - b->estimate()) / b->cost(); + } + }; + // utility to get the greater estimate to sort first, higher tiers last struct TieredGreaterEstimate { bool operator () (const auto &a, const auto &b) const noexcept { @@ -246,9 +260,9 @@ public: virtual void setDocIdLimit(uint32_t limit) noexcept { _docid_limit = limit; } uint32_t get_docid_limit() const noexcept { return _docid_limit; } - static Blueprint::UP optimize(Blueprint::UP bp); - virtual void optimize(Blueprint* &self, OptimizePass pass) = 0; - virtual void optimize_self(OptimizePass pass); + static Blueprint::UP optimize(Blueprint::UP bp, bool sort_by_cost); + virtual void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) = 0; + virtual void optimize_self(OptimizePass pass, bool sort_by_cost); virtual Blueprint::UP get_replacement(); virtual bool should_optimize_children() const { return true; } @@ -376,7 +390,7 @@ public: void setDocIdLimit(uint32_t limit) noexcept final; - void optimize(Blueprint* &self, OptimizePass pass) final; + void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; IndexList find(const IPredicate & check) const; @@ -393,7 +407,7 @@ public: virtual double calculate_cost() const = 0; virtual HitEstimate combine(const std::vector<HitEstimate> &data) const = 0; virtual FieldSpecBaseList exposeFields() const = 0; - virtual void sort(Children &children) const = 0; + virtual void sort(Children &children, bool sort_by_cost) const = 0; virtual bool inheritStrict(size_t i) const = 0; virtual SearchIteratorUP createIntermediateSearch(MultiSearch::Children subSearches, @@ -413,7 +427,7 @@ class LeafBlueprint : public Blueprint private: State _state; protected: - void optimize(Blueprint* &self, OptimizePass pass) final; + void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final; void setEstimate(HitEstimate est) { _state.estimate(est); _state.relative_estimate(calculate_relative_estimate()); diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.cpp index 6b0bd3ec7fc..85bd751df27 100644 --- a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.cpp @@ -1,3 +1,3 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "document_weight_search_iterator.h" +#include "docid_with_weight_search_iterator.h" diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.h index 448f1c8f2b4..8201c6a78b8 100644 --- a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.h @@ -8,7 +8,12 @@ namespace search::queryeval { -class DocumentWeightSearchIterator : public SearchIterator +/** + * SearchIterator implementation over a low-level posting list with {docid, weight} tuples. + * + * This is used by the parallel weak AND search iterator. + */ +class DocidWithWeightSearchIterator : public SearchIterator { private: fef::TermFieldMatchData &_tfmd; @@ -17,9 +22,9 @@ private: queryeval::MinMaxPostingInfo _postingInfo; public: - DocumentWeightSearchIterator(fef::TermFieldMatchData &tfmd, - const IDocidWithWeightPostingStore &attr, - IDirectPostingStore::LookupResult dict_entry) + DocidWithWeightSearchIterator(fef::TermFieldMatchData &tfmd, + const IDocidWithWeightPostingStore &attr, + IDirectPostingStore::LookupResult dict_entry) : _tfmd(tfmd), _matchPosition(_tfmd.populate_fixed()), _iterator(attr.create(dict_entry.posting_idx)), diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index d50b9846f17..bebc1f433f7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -33,7 +33,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources, } template <typename CombineType> -void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { +void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx, bool sort_by_cost) { std::vector<size_t> source_blenders; const SourceBlenderBlueprint * reference = nullptr; for (size_t i = begin_idx; i < self.childCnt(); ++i) { @@ -63,7 +63,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { top->addChild(std::move(sources.back())); sources.pop_back(); } - blender_up = Blueprint::optimize(std::move(blender_up)); + blender_up = Blueprint::optimize(std::move(blender_up), sort_by_cost); self.addChild(std::move(blender_up)); } } @@ -114,7 +114,7 @@ AndNotBlueprint::exposeFields() const } void -AndNotBlueprint::optimize_self(OptimizePass pass) +AndNotBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (childCnt() == 0) { return; @@ -152,7 +152,7 @@ AndNotBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 1); + optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost); } } @@ -166,10 +166,14 @@ AndNotBlueprint::get_replacement() } void -AndNotBlueprint::sort(Children &children) const +AndNotBlueprint::sort(Children &children, bool sort_by_cost) const { if (children.size() > 2) { - std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate()); + if (sort_by_cost) { + std::sort(children.begin() + 1, children.end(), MinimalOrCost()); + } else { + std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate()); + } } } @@ -231,7 +235,7 @@ AndBlueprint::exposeFields() const } void -AndBlueprint::optimize_self(OptimizePass pass) +AndBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; i < childCnt(); ++i) { @@ -244,7 +248,7 @@ AndBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<AndBlueprint>(*this, 0); + optimize_source_blenders<AndBlueprint>(*this, 0, sort_by_cost); } } @@ -258,9 +262,13 @@ AndBlueprint::get_replacement() } void -AndBlueprint::sort(Children &children) const +AndBlueprint::sort(Children &children, bool sort_by_cost) const { - std::sort(children.begin(), children.end(), TieredLessEstimate()); + if (sort_by_cost) { + std::sort(children.begin(), children.end(), MinimalAndCost()); + } else { + std::sort(children.begin(), children.end(), TieredLessEstimate()); + } } bool @@ -336,7 +344,7 @@ OrBlueprint::exposeFields() const } void -OrBlueprint::optimize_self(OptimizePass pass) +OrBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (pass == OptimizePass::FIRST) { for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) { @@ -351,7 +359,7 @@ OrBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 0); + optimize_source_blenders<OrBlueprint>(*this, 0, sort_by_cost); } } @@ -365,9 +373,13 @@ OrBlueprint::get_replacement() } void -OrBlueprint::sort(Children &children) const +OrBlueprint::sort(Children &children, bool sort_by_cost) const { - std::sort(children.begin(), children.end(), TieredGreaterEstimate()); + if (sort_by_cost) { + std::sort(children.begin(), children.end(), MinimalOrCost()); + } else { + std::sort(children.begin(), children.end(), TieredGreaterEstimate()); + } } bool @@ -444,7 +456,7 @@ WeakAndBlueprint::exposeFields() const } void -WeakAndBlueprint::sort(Children &) const +WeakAndBlueprint::sort(Children &, bool) const { // order needs to stay the same as _weights } @@ -508,9 +520,13 @@ NearBlueprint::exposeFields() const } void -NearBlueprint::sort(Children &children) const +NearBlueprint::sort(Children &children, bool sort_by_cost) const { - std::sort(children.begin(), children.end(), TieredLessEstimate()); + if (sort_by_cost) { + std::sort(children.begin(), children.end(), MinimalAndCost()); + } else { + std::sort(children.begin(), children.end(), TieredLessEstimate()); + } } bool @@ -571,10 +587,9 @@ ONearBlueprint::exposeFields() const } void -ONearBlueprint::sort(Children &children) const +ONearBlueprint::sort(Children &, bool) const { // ordered near cannot sort children here - (void)children; } bool @@ -640,7 +655,7 @@ RankBlueprint::exposeFields() const } void -RankBlueprint::optimize_self(OptimizePass pass) +RankBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost) { if (pass == OptimizePass::FIRST) { for (size_t i = 1; i < childCnt(); ++i) { @@ -650,7 +665,7 @@ RankBlueprint::optimize_self(OptimizePass pass) } } if (pass == OptimizePass::LAST) { - optimize_source_blenders<OrBlueprint>(*this, 1); + optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost); } } @@ -664,9 +679,8 @@ RankBlueprint::get_replacement() } void -RankBlueprint::sort(Children &children) const +RankBlueprint::sort(Children &, bool) const { - (void)children; } bool @@ -743,7 +757,7 @@ SourceBlenderBlueprint::exposeFields() const } void -SourceBlenderBlueprint::sort(Children &) const +SourceBlenderBlueprint::sort(Children &, bool) const { } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index cc3da6ce983..620280e979b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -19,10 +19,10 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; AndNotBlueprint * asAndNot() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -47,10 +47,10 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; AndBlueprint * asAnd() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -73,10 +73,10 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; OrBlueprint * asOr() noexcept final { return this; } Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, @@ -101,7 +101,7 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; bool always_needs_unpack() const override; WeakAndBlueprint * asWeakAnd() noexcept final { return this; } @@ -133,7 +133,7 @@ public: HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; bool should_optimize_children() const override { return false; } - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; SearchIterator::UP @@ -157,7 +157,7 @@ public: HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; bool should_optimize_children() const override { return false; } - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; SearchIterator::UP @@ -177,9 +177,9 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; Blueprint::UP get_replacement() override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; bool isRank() const noexcept final { return true; } SearchIterator::UP @@ -206,7 +206,7 @@ public: double calculate_relative_estimate() const final; HitEstimate combine(const std::vector<HitEstimate> &data) const override; FieldSpecBaseList exposeFields() const override; - void sort(Children &children) const override; + void sort(Children &children, bool sort_by_cost) const override; bool inheritStrict(size_t i) const override; SearchIterator::UP createIntermediateSearch(MultiSearch::Children subSearches, diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp index 66f505581c7..e90156868fb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp @@ -4,7 +4,6 @@ #include "andsearch.h" #include "andnotsearch.h" #include "sourceblendersearch.h" -#include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/vespalib/hwaccelrated/iaccelrated.h> namespace search::queryeval { @@ -18,7 +17,7 @@ namespace { struct And { using Word = BitWord::Word; void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept { - accel.and64(offset, src, dest); + accel.and128(offset, src, dest); } static constexpr bool isAnd() noexcept { return true; } }; @@ -26,7 +25,7 @@ struct And { struct Or { using Word = BitWord::Word; void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept { - accel.or64(offset, src, dest); + accel.or128(offset, src, dest); } static constexpr bool isAnd() noexcept { return false; } }; @@ -56,8 +55,8 @@ MultiBitVector<Update>::MultiBitVector(size_t reserved) _accel(IAccelrated::getAccelerator()), _lastWords() { - static_assert(sizeof(_lastWords) == 64, "Lastwords should have 64 byte size"); - static_assert(NumWordsInBatch == 8, "Batch size should be 8 words."); + static_assert(sizeof(_lastWords) == 128, "Lastwords should have 128 byte size"); + static_assert(NumWordsInBatch == 16, "Batch size should be 16 words."); memset(_lastWords, 0, sizeof(_lastWords)); } diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h index 0d9e2c4f25f..0ecf9d85b92 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h @@ -50,7 +50,7 @@ private: Update _update; const IAccelrated & _accel; - alignas(64) Word _lastWords[8]; + alignas(64) Word _lastWords[16]; static constexpr size_t NumWordsInBatch = sizeof(_lastWords) / sizeof(Word); }; diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index 96181377282..500e9fe4dbb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -45,7 +45,7 @@ SameElementBlueprint::addTerm(Blueprint::UP term) } void -SameElementBlueprint::optimize_self(OptimizePass pass) +SameElementBlueprint::optimize_self(OptimizePass pass, bool) { if (pass == OptimizePass::LAST) { std::sort(_terms.begin(), _terms.end(), diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h index 6a988e67149..06c20339e81 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h @@ -34,7 +34,7 @@ public: // used by create visitor void addTerm(Blueprint::UP term); - void optimize_self(OptimizePass pass) override; + void optimize_self(OptimizePass pass, bool sort_by_cost) override; void fetchPostings(const ExecuteInfo &execInfo) override; std::unique_ptr<SameElementSearch> create_same_element_search(search::fef::TermFieldMatchData& tfmd, bool strict) const; diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp index 828ca4be08d..f3028f5159a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "parallel_weak_and_search.h" -#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h> #include <vespa/searchlib/queryeval/monitoring_dump_iterator.h> #include <vespa/searchlib/fef/matchdatalayout.h> #include <vespa/vespalib/objects/visit.h> @@ -243,7 +243,7 @@ ParallelWeakAndSearch::create(search::fef::TermFieldMatchData &tfmd, assert(childrenMatchData->getNumTermFields() == dict_entries.size()); wand::Terms terms; for (size_t i = 0; i < dict_entries.size(); ++i) { - terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), + terms.push_back(wand::Term(new DocidWithWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), weights[i], dict_entries[i].posting_size, childrenMatchData->resolveTermField(handles[i]))); |