diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2017-09-25 21:01:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-25 21:01:12 +0200 |
commit | 76907e0ce40bc501e8171f6537ab509171b8bd48 (patch) | |
tree | 46fc16132c8ed0d925dc94a106c796988df0300e | |
parent | 3a528e4bb4fd1b6b203cb6ceea3f3257fd05126a (diff) | |
parent | 050f6822e82adcdd7c6e8d970f8ed976c7e5962b (diff) |
Merge pull request #3527 from vespa-engine/havardpe/use-internal-match-data-for-weighted-set-term
use internal match data for common dummy term field match data
9 files changed, 77 insertions, 32 deletions
diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp index 2a9885c9648..38d6483f21a 100644 --- a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp @@ -234,7 +234,7 @@ struct WeightedSetFactory : SparseVectorFactory { terms.push_back(childFactory.createChild(i, limit)); weights.push_back(default_weight); } - return WeightedSetTermSearch::create(terms, tfmd, weights); + return WeightedSetTermSearch::create(terms, tfmd, weights, MatchData::UP(nullptr)); } }; diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index 003c9935716..78195f19427 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -7,7 +7,9 @@ #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/queryeval/field_spec.h> #include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/weighted_set_term_blueprint.h> #include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/queryeval/fake_searchable.h> #include <vespa/searchlib/queryeval/fake_requestcontext.h> #include <vespa/searchlib/test/weightedchildrenverifiers.h> @@ -121,7 +123,7 @@ struct MockFixture { mock = new MockSearch(initial); children.push_back(mock); weights.push_back(1); - search.reset(WeightedSetTermSearch::create(children, tfmd, weights)); + search.reset(WeightedSetTermSearch::create(children, tfmd, weights, MatchData::UP(nullptr))); } }; @@ -192,7 +194,7 @@ TEST_F("test Eager Matching Child", MockFixture(5)) { class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier { private: SearchIterator::UP create(const std::vector<SearchIterator*> &children) const override { - return SearchIterator::UP(WeightedSetTermSearch::create(children, _tfmd, _weights)); + return SearchIterator::UP(WeightedSetTermSearch::create(children, _tfmd, _weights, MatchData::UP(nullptr))); } }; @@ -213,4 +215,45 @@ TEST("verify search iterator conformance with document weight iterator children" verifier.verify(); } +struct VerifyMatchData { + struct MyBlueprint : search::queryeval::SimpleLeafBlueprint { + VerifyMatchData &vmd; + MyBlueprint(VerifyMatchData &vmd_in, FieldSpec spec_in) + : SimpleLeafBlueprint(spec_in), vmd(vmd_in) {} + SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const override { + EXPECT_EQUAL(tfmda.size(), 1u); + EXPECT_TRUE(tfmda[0] != nullptr); + if (vmd.child_tfmd == nullptr) { + vmd.child_tfmd = tfmda[0]; + } else { + EXPECT_EQUAL(vmd.child_tfmd, tfmda[0]); + } + ++vmd.child_cnt; + return std::make_unique<EmptySearch>(); + } + }; + size_t child_cnt = 0; + TermFieldMatchData *child_tfmd = nullptr; + search::queryeval::Blueprint::UP create(const FieldSpec &spec) { + return std::make_unique<MyBlueprint>(*this, spec); + } +}; + +TEST("require that children get a common (yet separate) term field match data") { + VerifyMatchData vmd; + MatchDataLayout layout; + auto top_handle = layout.allocTermField(42); + FieldSpec top_spec("foo", 42, top_handle); + WeightedSetTermBlueprint blueprint(top_spec); + for (size_t i = 0; i < 5; ++i) { + blueprint.addTerm(vmd.create(blueprint.getNextChildField(top_spec)), 1); + } + auto match_data = layout.createMatchData(); + auto search = blueprint.createSearch(*match_data, true); + auto top_tfmd = match_data->resolveTermField(top_handle); + EXPECT_EQUAL(vmd.child_cnt, 5u); + EXPECT_TRUE(vmd.child_tfmd != nullptr); + EXPECT_NOT_EQUAL(top_tfmd, vmd.child_tfmd); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index e6c9e9c0590..3ff7db5a184 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -6,6 +6,7 @@ #include <vespa/searchlib/query/queryterm.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/searchlib/fef/matchdatalayout.h> namespace search { @@ -157,11 +158,15 @@ AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArr assert(tfmda.size() == 1); fef::TermFieldMatchData &tfmd = *tfmda[0]; if (strict) { // use generic weighted set search + fef::MatchDataLayout layout; + auto handle = layout.allocTermField(tfmd.getFieldId()); + auto match_data = layout.createMatchData(); + auto child_tfmd = match_data->resolveTermField(handle); std::vector<queryeval::SearchIterator*> children(_contexts.size()); for (size_t i = 0; i < _contexts.size(); ++i) { - children[i] = _contexts[i]->createIterator(&tfmd, true).release(); + children[i] = _contexts[i]->createIterator(child_tfmd, true).release(); } - return queryeval::SearchIterator::UP(queryeval::WeightedSetTermSearch::create(children, tfmd, _weights)); + return queryeval::SearchIterator::UP(queryeval::WeightedSetTermSearch::create(children, tfmd, _weights, std::move(match_data))); } else { // use attribute filter optimization bool isSingleValue = !_attr.hasMultiValue(); bool isString = (_attr.isStringType() && _attr.hasEnum()); diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp index e50fe57ac41..1f5858c1100 100644 --- a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp @@ -37,12 +37,11 @@ SearchIteratorPack::SearchIteratorPack(const std::vector<SearchIterator*> &child for (auto child: children) { _children.emplace_back(child); } - assert((_children.size() == _childMatch.size()) || - (_childMatch.empty() && (_md.get() == nullptr))); + assert((_children.size() == _childMatch.size()) || _childMatch.empty()); } -SearchIteratorPack::SearchIteratorPack(const std::vector<SearchIterator*> &children) - : SearchIteratorPack(children, std::vector<fef::TermFieldMatchData*>(), MatchDataUP()) +SearchIteratorPack::SearchIteratorPack(const std::vector<SearchIterator*> &children, MatchDataUP md) + : SearchIteratorPack(children, std::vector<fef::TermFieldMatchData*>(), MatchDataUP(std::move(md))) { } std::unique_ptr<BitVector> diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h index 20a3a57f34a..58c774e0903 100644 --- a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h +++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h @@ -29,7 +29,7 @@ public: const std::vector<fef::TermFieldMatchData*> &childMatch, MatchDataUP md); - explicit SearchIteratorPack(const std::vector<SearchIterator*> &children); + SearchIteratorPack(const std::vector<SearchIterator*> &children, MatchDataUP md); uint32_t get_docid(uint32_t ref) const { return _children[ref]->getDocId(); diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp index 0c9af1d3e25..fc68c48a247 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp @@ -9,6 +9,8 @@ namespace search::queryeval { WeightedSetTermBlueprint::WeightedSetTermBlueprint(const FieldSpec &field) : ComplexLeafBlueprint(field), _estimate(), + _layout(), + _children_field(field.getName(), field.getFieldId(), _layout.allocTermField(field.getFieldId()), false), _weights(), _terms() { @@ -40,18 +42,18 @@ WeightedSetTermBlueprint::addTerm(Blueprint::UP term, int32_t weight) term.release(); } + SearchIterator::UP -WeightedSetTermBlueprint::createSearch(search::fef::MatchData &md, bool) const +WeightedSetTermBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool) const { - const State &state = getState(); - assert(state.numFields() == 1); - search::fef::TermFieldMatchData &tfmd = *state.field(0).resolve(md); - + assert(tfmda.size() == 1); + fef::MatchData::UP md = _layout.createMatchData(); std::vector<SearchIterator*> children(_terms.size()); for (size_t i = 0; i < _terms.size(); ++i) { - children[i] = _terms[i]->createSearch(md, true).release(); + children[i] = _terms[i]->createSearch(*md, true).release(); } - return SearchIterator::UP(WeightedSetTermSearch::create(children, tfmd, _weights)); + return SearchIterator::UP(WeightedSetTermSearch::create(children, *tfmda[0], _weights, std::move(md))); } void @@ -71,10 +73,4 @@ WeightedSetTermBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "_terms", _terms); } -SearchIterator::UP -WeightedSetTermBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const -{ - abort(); -} - } diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h index 1afa0f9f2b2..b81d6c6f9e9 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h @@ -15,6 +15,8 @@ namespace queryeval { class WeightedSetTermBlueprint : public ComplexLeafBlueprint { HitEstimate _estimate; + fef::MatchDataLayout _layout; + FieldSpec _children_field; std::vector<int32_t> _weights; std::vector<Blueprint*> _terms; @@ -27,18 +29,16 @@ public: // used by create visitor // matches signature in dot product blueprint for common blueprint - // building code. Hands out its own field spec to children. NOTE: - // this is only ok since children will never be unpacked. - FieldSpec getNextChildField(const FieldSpec &outer) { return outer; } + // building code. Hands out the same field spec to all children. + FieldSpec getNextChildField(const FieldSpec &) { return _children_field; } // used by create visitor void addTerm(Blueprint::UP term, int32_t weight); - SearchIteratorUP createSearch(search::fef::MatchData &md, bool strict) const override; + SearchIteratorUP createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const override; void visitMembers(vespalib::ObjectVisitor &visitor) const override; private: - SearchIteratorUP createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const override; void fetchPostings(bool strict) override; }; diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index aae7c60bd80..2801f1c5e0c 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -134,15 +134,16 @@ public: SearchIterator * WeightedSetTermSearch::create(const std::vector<SearchIterator*> &children, TermFieldMatchData &tmd, - const std::vector<int32_t> &weights) + const std::vector<int32_t> &weights, + fef::MatchData::UP match_data) { typedef WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, SearchIteratorPack> ArrayHeapImpl; typedef WeightedSetTermSearchImpl<vespalib::LeftHeap, SearchIteratorPack> HeapImpl; if (children.size() < 128) { - return new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children)); + return new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children, std::move(match_data))); } - return new HeapImpl(tmd, weights, SearchIteratorPack(children)); + return new HeapImpl(tmd, weights, SearchIteratorPack(children, std::move(match_data))); } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index de7131bd0a9..397ac0caf2e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -29,7 +29,8 @@ protected: public: static SearchIterator* create(const std::vector<SearchIterator*> &children, search::fef::TermFieldMatchData &tmd, - const std::vector<int32_t> &weights); + const std::vector<int32_t> &weights, + fef::MatchData::UP match_data); static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, const std::vector<int32_t> &weights, |