diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-18 18:31:44 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-18 22:54:51 +0000 |
commit | 723f2ae4228a82692831af2682942e701be7aa16 (patch) | |
tree | 8b43562b28f0da99d59c48f093ac013eb1a4a20a | |
parent | 38e14d3a40775f2e010046ae36cf0505f7b6e5a6 (diff) |
- Handle more than 64k hits in the element vector.
- Avoid computing all vectors in full separately and instead do an incremental inline merge with.
- Also avoid requiring the searchiterator aspect on the wrappers.
21 files changed, 424 insertions, 274 deletions
diff --git a/searchcore/src/tests/proton/matching/querynodes_test.cpp b/searchcore/src/tests/proton/matching/querynodes_test.cpp index 5d01753dfb6..896b2f7e07f 100644 --- a/searchcore/src/tests/proton/matching/querynodes_test.cpp +++ b/searchcore/src/tests/proton/matching/querynodes_test.cpp @@ -48,6 +48,8 @@ using search::query::QueryBuilder; using search::queryeval::AndNotSearch; using search::queryeval::AndSearch; using search::queryeval::Blueprint; +using search::queryeval::ElementIteratorWrapper; +using search::queryeval::ElementIterator; using search::queryeval::EmptySearch; using search::queryeval::FakeRequestContext; using search::queryeval::FakeResult; @@ -290,16 +292,14 @@ SearchIterator *getParent<ONear>(SearchIterator *a, SearchIterator *b) { template <> SearchIterator *getParent<SameElement>(SearchIterator *a, SearchIterator *b) { - std::vector<SearchIterator::UP> children; - children.emplace_back(a); - children.emplace_back(b); - TermFieldMatchDataArray data; static TermFieldMatchData tmd; + std::vector<ElementIterator::UP> children; + children.emplace_back(std::make_unique<ElementIteratorWrapper>(SearchIterator::UP(a), tmd)); + children.emplace_back(std::make_unique<ElementIteratorWrapper>(SearchIterator::UP(b), tmd)); // we only check how many term/field combinations // are below the SameElement parent: // two terms searching in one index field - data.add(&tmd).add(&tmd); - return new SameElementSearch(nullptr, std::move(children), data, true); + return new SameElementSearch(nullptr, std::move(children), true); } template <> diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 19cad2f3905..47b382af3f1 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -94,6 +94,7 @@ vespa_define_module( src/tests/attribute/save_target src/tests/attribute/searchable src/tests/attribute/searchcontext + src/tests/attribute/searchcontextelementiterator src/tests/attribute/sourceselector src/tests/attribute/stringattribute src/tests/attribute/tensorattribute diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 416ddb5fbc0..ffae7824668 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -1,12 +1,9 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/searchlib/attribute/attribute.h> #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/attribute/attributeiterators.h> -#include <vespa/searchlib/attribute/attributevector.hpp> -#include <vespa/searchlib/attribute/elementiterator.h> +#include <vespa/searchlib/attribute/searchcontextelementiterator.h> #include <vespa/searchlib/attribute/flagattribute.h> -#include <vespa/searchlib/attribute/multistringattribute.h> #include <vespa/searchlib/attribute/singleboolattribute.h> #include <vespa/searchlib/attribute/singlenumericattribute.h> #include <vespa/searchlib/attribute/singlestringattribute.h> @@ -22,6 +19,7 @@ #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/compress.h> +#include <vespa/searchlib/attribute/attributevector.hpp> #include <vespa/log/log.h> LOG_SETUP("searchcontext_test"); @@ -628,29 +626,24 @@ void SearchContextTest::testSearch(const ConfigMap & cfgs) { template<typename T, typename A> class Verifier : public search::test::SearchIteratorVerifier { public: - Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString, const vespalib::string & name, - const Config & cfg, bool withElementId); + Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString, + const vespalib::string & name, const Config & cfg); ~Verifier() override; SearchIterator::UP create(bool strict) const override { _sc->fetchPostings(queryeval::ExecuteInfo::create(strict, 1.0)); - auto search = _sc->createIterator(&_dummy, strict); - if (_withElementId) { - search = std::make_unique<attribute::ElementIterator>(std::move(search), *_sc, _dummy); - } - return search; + return _sc->createIterator(&_dummy, strict); } private: mutable TermFieldMatchData _dummy; - const bool _withElementId; AttributePtr _attribute; SearchContextPtr _sc; }; template<typename T, typename A> -Verifier<T, A>::Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString, const vespalib::string & name, - const Config & cfg, bool withElementId) - : _withElementId(withElementId), +Verifier<T, A>::Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString, + const vespalib::string & name, const Config & cfg) + : _dummy(), _attribute(AttributeFactory::createAttribute(name + "-initrange", cfg)), _sc() { @@ -670,22 +663,18 @@ Verifier<T, A>::~Verifier() = default; template<typename T, typename A> void SearchContextTest::testSearchIterator(const std::vector<T> & keys, const vespalib::string &keyAsString, const ConfigMap &cfgs) { - - for (bool withElementId : {false, true} ) { - for (const auto & cfg : cfgs) { - { - Verifier<T, A> verifier(keys, keyAsString, cfg.first, cfg.second, withElementId); - verifier.verify(); - } - { - Config withFilter(cfg.second); - withFilter.setIsFilter(true); - Verifier<T, A> verifier(keys, keyAsString, cfg.first + "-filter", withFilter, withElementId); - verifier.verify(); - } + for (const auto & cfg : cfgs) { + { + Verifier<T, A> verifier(keys, keyAsString, cfg.first, cfg.second); + verifier.verify(); + } + { + Config withFilter(cfg.second); + withFilter.setIsFilter(true); + Verifier<T, A> verifier(keys, keyAsString, cfg.first + "-filter", withFilter); + verifier.verify(); } } - } void SearchContextTest::testSearchIteratorConformance() { @@ -976,11 +965,13 @@ SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, Search pos.setElementWeight(100); md.appendPosition(pos); - SearchBasePtr sb = sc.createIterator(&md, strict); + SearchBasePtr sbp = sc.createIterator(&md, strict); + SearchIterator & search = *sbp; + queryeval::ElementIterator::UP elemIt; if (withElementId) { - sb = std::make_unique<attribute::ElementIterator>(std::move(sb), sc, md); + elemIt = std::make_unique<attribute::SearchContextElementIterator>(std::move(sbp), sc); } - sb->initFullRange(); + search.initFullRange(); std::vector<int32_t> weights(3); if (attr->getCollectionType() == CollectionType::SINGLE || @@ -1000,41 +991,40 @@ SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, Search } // unpack and check weights - sb->unpack(1); - EXPECT_EQUAL(sb->getDocId(), 1u); + search.unpack(1); + EXPECT_EQUAL(search.getDocId(), 1u); EXPECT_EQUAL(md.getDocId(), 1u); EXPECT_EQUAL(md.getWeight(), weights[0]); - sb->unpack(2); - EXPECT_EQUAL(sb->getDocId(), 2u); + search.unpack(2); + EXPECT_EQUAL(search.getDocId(), 2u); EXPECT_EQUAL(md.getDocId(), 2u); if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) { - EXPECT_EQUAL(2, md.end()- md.begin()); - EXPECT_EQUAL(md.begin()[0].getElementId(), 0u); - EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1); - EXPECT_EQUAL(md.begin()[1].getElementId(), 1u); - EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1); + std::vector<uint32_t> elems; + elemIt->getElementIds(2, elems); + ASSERT_EQUAL(2u, elems.size()); + EXPECT_EQUAL(0u,elems[0]); + EXPECT_EQUAL(1u,elems[1]); } else { EXPECT_EQUAL(md.getWeight(), weights[1]); } - sb->unpack(3); - EXPECT_EQUAL(sb->getDocId(), 3u); + search.unpack(3); + EXPECT_EQUAL(search.getDocId(), 3u); EXPECT_EQUAL(md.getDocId(), 3u); if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) { - EXPECT_EQUAL(3, md.end()- md.begin()); - EXPECT_EQUAL(md.begin()[0].getElementId(), 0u); - EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1); - EXPECT_EQUAL(md.begin()[1].getElementId(), 1u); - EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1); - EXPECT_EQUAL(md.begin()[2].getElementId(), 2u); - EXPECT_EQUAL(md.begin()[2].getElementWeight(), 1); + std::vector<uint32_t> elems; + elemIt->getElementIds(3, elems); + ASSERT_EQUAL(3u, elems.size()); + EXPECT_EQUAL(0u,elems[0]); + EXPECT_EQUAL(1u,elems[1]); + EXPECT_EQUAL(2u,elems[2]); } else { EXPECT_EQUAL(md.getWeight(), weights[2]); } if (extra) { - sb->unpack(4); - EXPECT_EQUAL(sb->getDocId(), 4u); + search.unpack(4); + EXPECT_EQUAL(search.getDocId(), 4u); EXPECT_EQUAL(md.getDocId(), 4u); EXPECT_EQUAL(md.getWeight(), 1); } diff --git a/searchlib/src/tests/attribute/searchcontextelementiterator/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontextelementiterator/CMakeLists.txt new file mode 100644 index 00000000000..1a27b446d30 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontextelementiterator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attribute_searchcontextelementiterator_test_app TEST + SOURCES + searchcontextelementiterator_test.cpp + DEPENDS + searchlib + gtest +) +vespa_add_test(NAME searchlib_attribute_searchcontextelementiterator_test_app COMMAND searchlib_attribute_searchcontextelementiterator_test_app) diff --git a/searchlib/src/tests/attribute/searchcontextelementiterator/searchcontextelementiterator_test.cpp b/searchlib/src/tests/attribute/searchcontextelementiterator/searchcontextelementiterator_test.cpp new file mode 100644 index 00000000000..400af6860ec --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontextelementiterator/searchcontextelementiterator_test.cpp @@ -0,0 +1,128 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/attribute/searchcontextelementiterator.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/queryeval/fake_search.h> +#include <vespa/searchlib/query/query_term_simple.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +# + +#include <vespa/vespalib/gtest/gtest.h> + +#include <vespa/log/log.h> +LOG_SETUP("searchcontextelementiterator_test"); + +using namespace search::attribute; +using namespace search; + +namespace { + +AttributeVector::SP +createAndFillAttribute() { + AttributeFactory factory; + AttributeVector::SP attribute = factory.createAttribute("mva", Config(BasicType::INT32, CollectionType::ARRAY)); + attribute->addDocs(6); + IntegerAttribute & ia = dynamic_cast<IntegerAttribute &>(*attribute); + ia.append(1, 3, 1); + for (int v : {1,2,3,1,2,3}) { + ia.append(2, v, 1); + } + for (int v : {1,2,3,4,5,1,2,3,4,5,6}) { + ia.append(4, v, 1); + } + ia.append(5, 5, 1); + attribute->commit(); + return attribute; +} + +queryeval::FakeResult +createResult() { + queryeval::FakeResult result; + result.doc(2).elem(0).pos(7).pos(9) + .elem(3).pos(1); + result.doc(4).elem(0).pos(2) + .elem(5).pos(1).pos(2).pos(3); + return result; +} + +void +verifySeek(queryeval::ElementIterator & elemIt) { + elemIt.initFullRange(); + EXPECT_FALSE(elemIt.seek(1)); + EXPECT_TRUE(elemIt.seek(2)); + EXPECT_FALSE(elemIt.seek(3)); + EXPECT_TRUE(elemIt.seek(4)); + EXPECT_FALSE(elemIt.seek(5)); +} + +void +verifyGetElementIds(queryeval::ElementIterator & elemIt, const std::vector<std::vector<uint32_t>> & expectedALL) { + elemIt.initFullRange(); + std::vector<uint32_t> elems; + for (uint32_t docId : {1,2,3,4,5}) { + const auto & expected = expectedALL[docId]; + elems.clear(); + EXPECT_EQ(expected.empty(), !elemIt.seek(docId)); + assert(expected.empty() != elemIt.seek(docId)); + if (elemIt.seek(docId)) { + elemIt.getElementIds(docId, elems); + EXPECT_EQ(expected.size(), elems.size()); + EXPECT_EQ(expected, elems); + } + } +} + +void +verifyMergeElementIds(queryeval::ElementIterator & elemIt, std::vector<uint32_t> initial, const std::vector<std::vector<uint32_t>> & expectedALL) { + elemIt.initFullRange(); + std::vector<uint32_t> elems; + for (uint32_t docId : {1,2,3,4,5}) { + const auto & expected = expectedALL[docId]; + elems = initial; + EXPECT_EQ(expected.empty(), !elemIt.seek(docId)); + assert(expected.empty() != elemIt.seek(docId)); + if (elemIt.seek(docId)) { + elemIt.mergeElementIds(docId, elems); + EXPECT_EQ(expected.size(), elems.size()); + EXPECT_EQ(expected, elems); + } + } +} + +void +verifyElementIterator(queryeval::ElementIterator & elemIt) { + verifySeek(elemIt); + std::vector<std::vector<uint32_t>> expectedALL = {{}, {}, {0, 3}, {}, {0, 5}, {}}; + std::vector<std::vector<uint32_t>> expectedNONE = {{}, {}, {}, {}, {}, {}}; + verifyGetElementIds(elemIt, expectedALL); + verifyMergeElementIds(elemIt, {0,1,2,3,4,5}, expectedALL); + //verifyMergeElementIds(elemIt, {}, expectedNONE); +} + +} + +TEST(ElementIteratorTest, require_that_searchcontext) +{ + AttributeVector::SP attribute = createAndFillAttribute(); + fef::TermFieldMatchData tfmd; + + SearchContextParams params; + ISearchContext::UP sc = attribute->createSearchContext(std::make_unique<QueryTermSimple>("1", QueryTermSimple::SearchTerm::WORD), params); + SearchContextElementIterator elemIt(sc->createIterator(&tfmd, false), *sc); + verifyElementIterator(elemIt); +} + +TEST(ElementIteratorTest, require_that_non_searchcontext) +{ + fef::TermFieldMatchData tfmd; + fef::TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + queryeval::FakeResult result = createResult(); + auto search = std::make_unique<queryeval::FakeSearch>("","","", result, tfmda); + queryeval::ElementIteratorWrapper wrapper(std::move(search), tfmd); + verifyElementIterator(wrapper); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp index 378e16480b8..db177b98b84 100644 --- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp +++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp @@ -8,12 +8,12 @@ #include <vespa/searchlib/queryeval/same_element_search.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchcommon/attribute/i_search_context.h> -#include <vespa/searchlib/attribute/elementiterator.h> +#include <vespa/searchlib/attribute/searchcontextelementiterator.h> #include <vespa/vespalib/test/insertion_operators.h> using namespace search::fef; using namespace search::queryeval; -using search::attribute::ElementIterator; +using search::attribute::SearchContextElementIterator; void verify_elements(SameElementSearch &se, uint32_t docid, const std::initializer_list<uint32_t> list) { std::vector<uint32_t> expect(list); @@ -99,11 +99,11 @@ TEST("require that strict iterator seeks to next hit") { auto search = bp->createSearch(*md, true); search->initRange(1, 1000); EXPECT_LESS(search->getDocId(), 1u); - EXPECT_TRUE(!search->seek(1)); + EXPECT_FALSE(search->seek(1)); EXPECT_EQUAL(search->getDocId(), 7u); EXPECT_TRUE(search->seek(9)); EXPECT_EQUAL(search->getDocId(), 9u); - EXPECT_TRUE(!search->seek(10)); + EXPECT_FALSE(search->seek(10)); EXPECT_TRUE(search->isAtEnd()); } @@ -134,8 +134,8 @@ TEST("require that attribute iterators are wrapped for element unpacking") { SameElementSearch *se = dynamic_cast<SameElementSearch*>(search.get()); ASSERT_TRUE(se != nullptr); ASSERT_EQUAL(se->children().size(), 2u); - EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[0].get()) != nullptr); - EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[1].get()) != nullptr); + EXPECT_TRUE(dynamic_cast<SearchContextElementIterator*>(se->children()[0].get()) != nullptr); + EXPECT_TRUE(dynamic_cast<SearchContextElementIterator*>(se->children()[1].get()) != nullptr); } TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index b9e3af8c729..0fffe19727c 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -34,7 +34,7 @@ vespa_add_library(searchlib_attribute OBJECT defines.cpp diversity.cpp dociditerator.cpp - elementiterator.cpp + searchcontextelementiterator.cpp enumattribute.cpp enumattributesaver.cpp enumcomparator.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/elementiterator.cpp b/searchlib/src/vespa/searchlib/attribute/elementiterator.cpp deleted file mode 100644 index 3be4c478376..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/elementiterator.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "elementiterator.h" -#include <vespa/searchcommon/attribute/i_search_context.h> -#include <vespa/searchlib/fef/termfieldmatchdata.h> - -using search::fef::TermFieldMatchDataPosition; - -namespace search::attribute { - -void -ElementIterator::doSeek(uint32_t docid) { - _search->doSeek(docid); - setDocId(_search->getDocId()); -} - -void -ElementIterator::doUnpack(uint32_t docid) { - _tfmd.reset(docid); - int32_t weight(0); - for (int32_t id = _searchContext.find(docid, 0, weight); id >= 0; id = _searchContext.find(docid, id+1, weight)) { - _tfmd.appendPosition(TermFieldMatchDataPosition(id, 0, weight, 1)); - } -} - -vespalib::Trinary -ElementIterator::is_strict() const { - return _search->is_strict(); -} - -void -ElementIterator::initRange(uint32_t beginid, uint32_t endid) { - SearchIterator::initRange(beginid, endid); - _search->initRange(beginid, endid); - setDocId(_search->getDocId()); -} - -ElementIterator::ElementIterator(SearchIterator::UP search, const ISearchContext & sc, fef::TermFieldMatchData & tfmd) - : _search(std::move(search)), - _searchContext(sc), - _tfmd(tfmd) -{ -} - -ElementIterator::~ElementIterator() = default; - -} diff --git a/searchlib/src/vespa/searchlib/attribute/elementiterator.h b/searchlib/src/vespa/searchlib/attribute/elementiterator.h deleted file mode 100644 index 232139751b6..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/elementiterator.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/searchlib/queryeval/searchiterator.h> - -namespace search::fef { class TermFieldMatchData; } -namespace search::attribute { - -class ISearchContext; - -class ElementIterator : public queryeval::SearchIterator -{ -private: - SearchIterator::UP _search; - const ISearchContext & _searchContext; - fef::TermFieldMatchData & _tfmd; - - void doSeek(uint32_t docid) override; - void doUnpack(uint32_t docid) override; - Trinary is_strict() const override; - void initRange(uint32_t beginid, uint32_t endid) override; -public: - ElementIterator(SearchIterator::UP search, const ISearchContext & sc, fef::TermFieldMatchData & tfmd); - ~ElementIterator(); -}; - -} diff --git a/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.cpp b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.cpp new file mode 100644 index 00000000000..8bfb07df938 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.cpp @@ -0,0 +1,41 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "searchcontextelementiterator.h" +#include <vespa/searchcommon/attribute/i_search_context.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +using search::fef::TermFieldMatchDataPosition; + +namespace search::attribute { + +void +SearchContextElementIterator::getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) { + int32_t weight(0); + for (int32_t id = _searchContext.find(docId, 0, weight); id >= 0; id = _searchContext.find(docId, id+1, weight)) { + elementIds.push_back(id); + } +} +void +SearchContextElementIterator::mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) { + size_t toKeep(0); + int32_t id(-1); + int32_t weight(0); + for (int32_t candidate : elementIds) { + if (candidate >= id) { + id = _searchContext.find(docId, candidate, weight); + if (id == candidate) { + elementIds[toKeep++] = candidate; + } + } + } + elementIds.resize(toKeep); +} + +SearchContextElementIterator::SearchContextElementIterator(queryeval::SearchIterator::UP search, const ISearchContext & sc) + : ElementIterator(std::move(search)), + _searchContext(sc) +{} + +SearchContextElementIterator::~SearchContextElementIterator() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.h b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.h new file mode 100644 index 00000000000..c6c5aac1cd2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.h @@ -0,0 +1,23 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/queryeval/elementiterator.h> + +namespace search::fef { class TermFieldMatchData; } +namespace search::attribute { + +class ISearchContext; + +class SearchContextElementIterator : public queryeval::ElementIterator +{ +private: + const ISearchContext & _searchContext; +public: + void getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override; + void mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override; + SearchContextElementIterator(queryeval::SearchIterator::UP search, const ISearchContext & sc); + ~SearchContextElementIterator() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt index dc36e0a1f7e..723af890f0e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt @@ -9,6 +9,7 @@ vespa_add_library(searchlib_queryeval OBJECT document_weight_search_iterator.cpp dot_product_blueprint.cpp dot_product_search.cpp + elementiterator.cpp emptysearch.cpp equiv_blueprint.cpp equivsearch.cpp diff --git a/searchlib/src/vespa/searchlib/queryeval/elementiterator.cpp b/searchlib/src/vespa/searchlib/queryeval/elementiterator.cpp new file mode 100644 index 00000000000..750408fb242 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/elementiterator.cpp @@ -0,0 +1,71 @@ +#include "elementiterator.h" +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/vespalib/objects/objectvisitor.h> + +namespace search::queryeval { + +void +ElementIterator::visitMembers(vespalib::ObjectVisitor &visitor) const { + visit(visitor, "iterator", _search.get()); +} + +ElementIteratorWrapper::ElementIteratorWrapper(SearchIterator::UP search, fef::TermFieldMatchData & tfmd) + : ElementIterator(std::move(search)), + _tfmd(tfmd) +{} + +ElementIteratorWrapper::~ElementIteratorWrapper() = default; + +void +ElementIteratorWrapper::getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) { + _search->unpack(docId); + int prevId(-1); + for (auto element : _tfmd) { + uint32_t id(element.getElementId()); + if (prevId != int(id)) { + elementIds.push_back(id); + prevId = id; + } + } +} + +void +ElementIteratorWrapper::mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) { + _search->unpack(docId); + size_t toKeep(0); + int32_t id(-1); + auto it = _tfmd.begin(); + for (int32_t candidate : elementIds) { + if (candidate >= id) { + while ((it < _tfmd.end()) && (candidate > int(it->getElementId()))) { + it++; + } + if (it == _tfmd.end()) break; + id = it->getElementId(); + if (id == candidate) { + elementIds[toKeep++] = candidate; + } + } + } + elementIds.resize(toKeep); +} + +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::ElementIterator *obj) +{ + if (obj != 0) { + self.openStruct(name, "ElementIterator"); + obj->visitMembers(self); + self.closeStruct(); + } else { + self.visitNull(name); + } +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::ElementIterator &obj) +{ + visit(self, name, &obj); +}
\ No newline at end of file diff --git a/searchlib/src/vespa/searchlib/queryeval/elementiterator.h b/searchlib/src/vespa/searchlib/queryeval/elementiterator.h new file mode 100644 index 00000000000..a433c9be8f4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/elementiterator.h @@ -0,0 +1,48 @@ +#pragma once + +#include <vespa/searchlib/queryeval/searchiterator.h> + +namespace search::fef { class TermFieldMatchData; } + +namespace search::queryeval { + +class ElementIterator { +public: + using UP = std::unique_ptr<ElementIterator>; + ElementIterator(SearchIterator::UP search) : _search(std::move(search)) { } + virtual ~ElementIterator() = default; + bool seek(uint32_t docId) { + return _search->seek(docId); + } + void initFullRange() { + _search->initFullRange(); + } + void initRange(uint32_t beginid, uint32_t endid) { + _search->initRange(beginid, endid); + } + uint32_t getDocId() const { + return _search->getDocId(); + } + virtual void getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) = 0; + virtual void mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) = 0; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; +protected: + SearchIterator::UP _search; +}; + +class ElementIteratorWrapper : public ElementIterator { +public: + ElementIteratorWrapper(SearchIterator::UP search, fef::TermFieldMatchData & tfmd); + ~ElementIteratorWrapper() override; + void getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override; + void mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override; +private: + fef::TermFieldMatchData & _tfmd; +}; + +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::ElementIterator &obj); +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::ElementIterator *obj); diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp index 6f2d3cb6b2a..8e2ec3eabed 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp @@ -6,8 +6,7 @@ #include <vespa/vespalib/objects/visit.h> #include <vespa/searchcommon/attribute/i_search_context.h> -namespace search { -namespace queryeval { +namespace search::queryeval { void FakeSearch::doSeek(uint32_t docid) @@ -61,5 +60,4 @@ FakeSearch::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "term", _term); } -} // namespace search::queryeval -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.h b/searchlib/src/vespa/searchlib/queryeval/fake_search.h index f5b95a94e99..4cbba5e5c24 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.h @@ -7,8 +7,7 @@ #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchcommon/attribute/i_search_context.h> -namespace search { -namespace queryeval { +namespace search::queryeval { class FakeSearch : public SearchIterator { @@ -41,9 +40,12 @@ public: bool is_attr() const { return (_ctx != nullptr); } void doSeek(uint32_t docid) override; void doUnpack(uint32_t docid) override; + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + _offset = 0; + } const PostingInfo *getPostingInfo() const override { return _result.postingInfo(); } void visitMembers(vespalib::ObjectVisitor &visitor) const override; }; -} // namespace queryeval -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp index 22a392ca208..d1e0a2528e7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp @@ -4,7 +4,7 @@ #include "same_element_search.h" #include "field_spec.hpp" #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/attribute/elementiterator.h> +#include <vespa/searchlib/attribute/searchcontextelementiterator.h> #include <vespa/vespalib/objects/visit.hpp> #include <algorithm> #include <map> @@ -48,8 +48,7 @@ void SameElementBlueprint::optimize_self() { std::sort(_terms.begin(), _terms.end(), - [](const auto &a, const auto &b) - { + [](const auto &a, const auto &b) { return (a->getState().estimate() < b->getState().estimate()); }); } @@ -66,34 +65,23 @@ std::unique_ptr<SameElementSearch> SameElementBlueprint::create_same_element_search(bool strict) const { fef::MatchDataLayout my_layout = _layout; - std::vector<fef::TermFieldHandle> extra_handles; - for (size_t i = 0; i < _terms.size(); ++i) { - const State &childState = _terms[i]->getState(); - assert(childState.numFields() == 1); - extra_handles.push_back(my_layout.allocTermField(childState.field(0).getFieldId())); - } fef::MatchData::UP md = my_layout.createMatchData(); - search::fef::TermFieldMatchDataArray childMatch; - std::vector<SearchIterator::UP> children(_terms.size()); + std::vector<ElementIterator::UP> children(_terms.size()); for (size_t i = 0; i < _terms.size(); ++i) { const State &childState = _terms[i]->getState(); SearchIterator::UP child = _terms[i]->createSearch(*md, (strict && (i == 0))); const attribute::ISearchContext *context = _terms[i]->get_attribute_search_context(); if (context == nullptr) { - children[i] = std::move(child); - childMatch.add(childState.field(0).resolve(*md)); + children[i] = std::make_unique<ElementIteratorWrapper>(std::move(child), *childState.field(0).resolve(*md)); } else { - fef::TermFieldMatchData *child_tfmd = md->resolveTermField(extra_handles[i]); - children[i] = std::make_unique<attribute::ElementIterator>(std::move(child), *context, *child_tfmd); - childMatch.add(child_tfmd); + children[i] = std::make_unique<attribute::SearchContextElementIterator>(std::move(child), *context); } } - return std::make_unique<SameElementSearch>(std::move(md), std::move(children), childMatch, strict); + return std::make_unique<SameElementSearch>(std::move(md), std::move(children), strict); } SearchIterator::UP -SameElementBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, - bool strict) const +SameElementBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const { (void) tfmda; assert(!tfmda.valid()); diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp index 37f8eccadd9..18f42597121 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp @@ -11,26 +11,6 @@ using TFMD = search::fef::TermFieldMatchData; namespace search::queryeval { -namespace { - -template <typename It> -int32_t try_match(const fef::TermFieldMatchDataArray &match, std::vector<It> &iterators, uint32_t cand) { - for (size_t i = 0; i < iterators.size(); ++i) { - while ((iterators[i] != match[i]->end()) && (iterators[i]->getElementId() < cand)) { - ++iterators[i]; - } - if (iterators[i] == match[i]->end()) { - return -1; - } - if (iterators[i]->getElementId() != cand) { - return iterators[i]->getElementId(); - } - } - return cand; -} - -} - bool SameElementSearch::check_docid_match(uint32_t docid) { @@ -43,41 +23,30 @@ SameElementSearch::check_docid_match(uint32_t docid) } void -SameElementSearch::unpack_children(uint32_t docid) +SameElementSearch::fetch_matching_elements(uint32_t docid, std::vector<uint32_t> & elems) { - for (const auto &child: _children) { - child->doUnpack(docid); - } - for (size_t i = 0; i < _childMatch.size(); ++i) { - _iterators[i] = _childMatch[i]->begin(); + _children.front()->getElementIds(docid, elems); + for (auto it(_children.begin() + 1); it != _children.end(); it++) { + (*it)->mergeElementIds(docid, elems); } } bool SameElementSearch::check_element_match(uint32_t docid) { - unpack_children(docid); - int32_t cand = 0; - int32_t next = try_match(_childMatch, _iterators, cand); - while (next > cand) { - cand = next; - next = try_match(_childMatch, _iterators, cand); - } - return (cand == next); + _matchingElements.clear(); + fetch_matching_elements(docid, _matchingElements); + return !_matchingElements.empty(); } SameElementSearch::SameElementSearch(fef::MatchData::UP md, - std::vector<SearchIterator::UP> children, - const fef::TermFieldMatchDataArray &childMatch, + std::vector<ElementIterator::UP> children, bool strict) : _md(std::move(md)), _children(std::move(children)), - _childMatch(childMatch), - _iterators(childMatch.size()), _strict(strict) { assert(!_children.empty()); - assert(_childMatch.valid()); } void @@ -118,17 +87,7 @@ void SameElementSearch::find_matching_elements(uint32_t docid, std::vector<uint32_t> &dst) { if (check_docid_match(docid)) { - unpack_children(docid); - int32_t cand = 0; - while (cand >= 0) { - int32_t next = try_match(_childMatch, _iterators, cand); - if (next == cand) { - dst.push_back(cand); - ++cand; - } else { - cand = next; - } - } + fetch_matching_elements(docid, dst); } } diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h index ae7ecab781d..e5f185e06e6 100644 --- a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h @@ -2,7 +2,7 @@ #pragma once -#include "searchiterator.h" +#include "elementiterator.h" #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> @@ -21,26 +21,24 @@ class SameElementSearch : public SearchIterator private: using It = fef::TermFieldMatchData::PositionsIterator; - fef::MatchData::UP _md; - std::vector<SearchIterator::UP> _children; - fef::TermFieldMatchDataArray _childMatch; - std::vector<It> _iterators; - bool _strict; + fef::MatchData::UP _md; + std::vector<ElementIterator::UP> _children; + std::vector<uint32_t> _matchingElements; + bool _strict; - void unpack_children(uint32_t docid); + void fetch_matching_elements(uint32_t docid, std::vector<uint32_t> &dst); bool check_docid_match(uint32_t docid); bool check_element_match(uint32_t docid); public: SameElementSearch(fef::MatchData::UP md, - std::vector<SearchIterator::UP> children, - const fef::TermFieldMatchDataArray &childMatch, + std::vector<ElementIterator::UP> children, bool strict); void initRange(uint32_t begin_id, uint32_t end_id) override; void doSeek(uint32_t docid) override; void doUnpack(uint32_t) override {} void visitMembers(vespalib::ObjectVisitor &visitor) const override; - const std::vector<SearchIterator::UP> &children() const { return _children; } + const std::vector<ElementIterator::UP> &children() const { return _children; } // used during docsum fetching to identify matching elements // initRange must be called before use. diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h index 3fcc2427880..ac315c805d5 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h @@ -13,11 +13,7 @@ using search::fef::TermFieldMatchDataPosition; #include <vector> #include <string> -namespace search -{ - -namespace fakedata -{ +namespace search::fakedata { /* * Base class for faked posting list formats. @@ -41,56 +37,36 @@ public: /* * Size of posting list, in bits. */ - virtual size_t - bitSize() const = 0; - - virtual size_t - skipBitSize() const; - - virtual size_t - l1SkipBitSize() const; - - virtual size_t - l2SkipBitSize() const; - - virtual size_t - l3SkipBitSize() const; - - virtual size_t - l4SkipBitSize() const; - - virtual bool - hasWordPositions() const = 0; - + virtual size_t bitSize() const = 0; + virtual size_t skipBitSize() const; + virtual size_t l1SkipBitSize() const; + virtual size_t l2SkipBitSize() const; + virtual size_t l3SkipBitSize() const; + virtual size_t l4SkipBitSize() const; + virtual bool hasWordPositions() const = 0; virtual bool has_interleaved_features() const; - virtual bool enable_unpack_normal_features() const; - virtual bool enable_unpack_interleaved_features() const; /* * Single posting list performance, without feature unpack. */ - virtual int - lowLevelSinglePostingScan() const = 0; + virtual int lowLevelSinglePostingScan() const = 0; /* * Single posting list performance, with feature unpack. */ - virtual int - lowLevelSinglePostingScanUnpack() const = 0; + virtual int lowLevelSinglePostingScanUnpack() const = 0; /* * Two posting lists performance (same format) without feature unpack. */ - virtual int - lowLevelAndPairPostingScan(const FakePosting &rhs) const = 0; + virtual int lowLevelAndPairPostingScan(const FakePosting &rhs) const = 0; /* * Two posting lists performance (same format) with feature unpack. */ - virtual int - lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const = 0; + virtual int lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const = 0; /* @@ -105,7 +81,4 @@ public: } }; -} // namespace fakedata - -} // namespace search - +} diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h index ffe9e2e54d0..97b0d3a44d3 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h @@ -12,9 +12,7 @@ #include <vespa/searchlib/diskindex/fieldreader.h> #include <vespa/searchlib/diskindex/fieldwriter.h> -namespace search { - -namespace fakedata { +namespace search::fakedata { /** @@ -275,7 +273,4 @@ public: void addDocIdBias(uint32_t docIdBias); }; -} // namespace fakedata - -} // namespace search - +} |