diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2020-05-14 23:47:36 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2020-05-15 11:57:52 +0200 |
commit | a5b49bb9f29cd9724cc538b128901e3b698533f8 (patch) | |
tree | 40a410eae3fa17968e0418b73ec56fd49078a4ea | |
parent | d05fbb6d8eae73144cf6b5f4c5eb794f3b157389 (diff) |
Unpack interleaved features if they are needed in equiv search.
4 files changed, 162 insertions, 40 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index bec1691df23..eb6e49747a1 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1291,7 +1291,7 @@ TEST("require that children does not optimize when parents refuse them to") { } } -TEST("require_that_unpack_optimization_is_overruled_by_equiv") { +TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") { FieldSpecBaseList fields; fields.add(FieldSpecBase(1, 1)); fields.add(FieldSpecBase(2, 2)); @@ -1322,7 +1322,7 @@ TEST("require_that_unpack_optimization_is_overruled_by_equiv") { EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); - EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>", + EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::SelectiveUnpack>", e.getChildren()[0]->getClassName()); } @@ -1332,7 +1332,7 @@ TEST("require_that_unpack_optimization_is_overruled_by_equiv") { EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); - EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>", + EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::NoUnpack>", e.getChildren()[0]->getClassName()); } } diff --git a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp index 219735105de..412130ecaab 100644 --- a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp +++ b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp @@ -19,7 +19,7 @@ protected: EquivTest(); ~EquivTest(); - void test_equiv(bool strict); + void test_equiv(bool strict, bool unpack_normal_features, bool unpack_interleaved_features); }; EquivTest::EquivTest() = default; @@ -27,15 +27,15 @@ EquivTest::EquivTest() = default; EquivTest::~EquivTest() = default; void -EquivTest::test_equiv(bool strict) +EquivTest::test_equiv(bool strict, bool unpack_normal_features, bool unpack_interleaved_features) { FakeResult a; FakeResult b; FakeResult c; - a.doc(5).pos(1); - b.doc(5).pos(2); - c.doc(5).pos(3).doc(10).pos(4); + a.doc(5).pos(1).len(30).field_length(30).num_occs(1); + b.doc(5).pos(2).len(30).field_length(30).num_occs(1); + c.doc(5).pos(3).len(30).field_length(30).num_occs(1).doc(10).pos(4).len(35).field_length(35).num_occs(1); MatchDataLayout subLayout; TermFieldHandle fbh11 = subLayout.allocTermField(1); @@ -52,6 +52,11 @@ EquivTest::test_equiv(bool strict) bp->addTerm(std::make_unique<FakeBlueprint>(FieldSpec("bar", 2, fbh22), c), 1.0); MatchData::UP md = MatchData::makeTestInstance(100, 10); + for (uint32_t field_id = 1; field_id <= 2; ++field_id) { + TermFieldMatchData &data = *md->resolveTermField(field_id); + data.setNeedNormalFeatures(unpack_normal_features); + data.setNeedInterleavedFeatures(unpack_interleaved_features); + } bp->fetchPostings(ExecuteInfo::create(strict)); SearchIterator::UP search = bp->createSearch(*md, strict); search->initFullRange(); @@ -69,25 +74,43 @@ EquivTest::test_equiv(bool strict) EXPECT_EQ(1u, data.getFieldId()); EXPECT_EQ(5u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQ(1u, itr.size()); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(1u, itr.getPosition()); - itr.next(); + if (unpack_normal_features) { + EXPECT_EQ(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(1u, itr.getPosition()); + itr.next(); + } EXPECT_TRUE(!itr.valid()); + if (unpack_interleaved_features) { + EXPECT_EQ(1u, data.getNumOccs()); + EXPECT_EQ(30u, data.getFieldLength()); + } else { + EXPECT_EQ(0u, data.getNumOccs()); + EXPECT_EQ(0u, data.getFieldLength()); + } } { TermFieldMatchData &data = *md->resolveTermField(2); EXPECT_EQ(2u, data.getFieldId()); EXPECT_EQ(5u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQ(2u, itr.size()); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(2u, itr.getPosition()); - itr.next(); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(3u, itr.getPosition()); - itr.next(); + if (unpack_normal_features) { + EXPECT_EQ(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(2u, itr.getPosition()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(3u, itr.getPosition()); + itr.next(); + } EXPECT_TRUE(!itr.valid()); + if (unpack_interleaved_features) { + EXPECT_EQ(2u, data.getNumOccs()); + EXPECT_EQ(30u, data.getFieldLength()); + } else { + EXPECT_EQ(0u, data.getNumOccs()); + EXPECT_EQ(0u, data.getFieldLength()); + } } } EXPECT_TRUE(!search->seek(7)); @@ -104,11 +127,20 @@ EquivTest::test_equiv(bool strict) EXPECT_EQ(2u, data.getFieldId()); EXPECT_EQ(10u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQ(1u, itr.size()); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(4u, itr.getPosition()); - itr.next(); + if (unpack_normal_features) { + EXPECT_EQ(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(4u, itr.getPosition()); + itr.next(); + } EXPECT_TRUE(!itr.valid()); + if (unpack_interleaved_features) { + EXPECT_EQ(1u, data.getNumOccs()); + EXPECT_EQ(35u, data.getFieldLength()); + } else { + EXPECT_EQ(0u, data.getNumOccs()); + EXPECT_EQ(0u, data.getFieldLength()); + } } } EXPECT_TRUE(!search->seek(13)); @@ -122,12 +154,42 @@ EquivTest::test_equiv(bool strict) TEST_F(EquivTest, nonstrict) { - test_equiv(false); + test_equiv(false, true, false); } TEST_F(EquivTest, strict) { - test_equiv(true); + test_equiv(true, true, false); +} + +TEST_F(EquivTest, nonstrict_no_normal_no_interleaved) +{ + test_equiv(false, false, false); +} + +TEST_F(EquivTest, strict_no_normal_no_interleaved) +{ + test_equiv(true, false, false); +} + +TEST_F(EquivTest, nonstrict_no_normal_interleaved) +{ + test_equiv(false, false, true); +} + +TEST_F(EquivTest, strict_no_normal_interleaved) +{ + test_equiv(true, false, true); +} + +TEST_F(EquivTest, nonstrict_normal_interleaved) +{ + test_equiv(false, true, true); +} + +TEST_F(EquivTest, strict_normal_interleaved) +{ + test_equiv(true, true, true); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp index 97cb829e30c..973e11fc0d2 100644 --- a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp +++ b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp @@ -41,31 +41,52 @@ TermMatchDataMerger::merge(uint32_t docid, { _scratch.clear(); bool wasMatch = false; + bool needs_normal_features = out.needs_normal_features(); + bool needs_interleaved_features = out.needs_interleaved_features(); + uint32_t num_occs = 0u; + uint16_t field_length = 0u; for (size_t i = 0; i < in.size(); ++i) { const TermFieldMatchData *md = in[i].matchData; if (md->getDocId() == docid) { - for (const TermFieldMatchDataPosition &iter : *md) { - double exactness = in[i].exactness * iter.getMatchExactness(); - _scratch.push_back(iter); - _scratch.back().setMatchExactness(exactness); + if (needs_normal_features) { + for (const TermFieldMatchDataPosition &iter : *md) { + double exactness = in[i].exactness * iter.getMatchExactness(); + _scratch.push_back(iter); + _scratch.back().setMatchExactness(exactness); + } + } + if (needs_interleaved_features) { + num_occs += md->getNumOccs(); + field_length = std::max(field_length, md->getFieldLength()); } wasMatch = true; } } if (wasMatch) { out.reset(docid); - if (_scratch.size() > 0) { - std::sort(_scratch.begin(), _scratch.end(), - TermFieldMatchDataPosition::compareWithExactness); - TermFieldMatchDataPosition prev = _scratch[0]; - for (size_t i = 1; i < _scratch.size(); ++i) { - const TermFieldMatchDataPosition &curr = _scratch[i]; - if (prev.key() < curr.key()) { - out.appendPosition(prev); - prev = curr; + if (needs_normal_features) { + num_occs = 0; + if (_scratch.size() > 0) { + std::sort(_scratch.begin(), _scratch.end(), + TermFieldMatchDataPosition::compareWithExactness); + TermFieldMatchDataPosition prev = _scratch[0]; + for (size_t i = 1; i < _scratch.size(); ++i) { + const TermFieldMatchDataPosition &curr = _scratch[i]; + if (prev.key() < curr.key()) { + out.appendPosition(prev); + prev = curr; + ++num_occs; + } } + out.appendPosition(prev); + ++num_occs; } - out.appendPosition(prev); + } + if (needs_interleaved_features) { + constexpr uint32_t max_num_occs = std::numeric_limits<uint16_t>::max(); + uint16_t capped_num_occs = std::min(num_occs, max_num_occs); + out.setNumOccs(std::min(capped_num_occs, field_length)); + out.setFieldLength(field_length); } } } diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp index 08a05b25772..cf378c95487 100644 --- a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp @@ -4,9 +4,42 @@ #include "equivsearch.h" #include "field_spec.hpp" #include <vespa/vespalib/objects/visit.hpp> +#include <vespa/vespalib/stllike/hash_map.hpp> namespace search::queryeval { +namespace { + +class UnpackNeed +{ + bool _needs_normal_features; + bool _needs_interleaved_features; +public: + UnpackNeed() + : _needs_normal_features(false), + _needs_interleaved_features(false) + { + } + + void observe(const fef::TermFieldMatchData &output) + { + if (output.needs_normal_features()) { + _needs_normal_features = true; + } + if (output.needs_interleaved_features()) { + _needs_interleaved_features = true; + } + } + + void notify(fef::TermFieldMatchData &input) const + { + input.setNeedNormalFeatures(_needs_normal_features); + input.setNeedInterleavedFeatures(_needs_interleaved_features); + } +}; + +}; + EquivBlueprint::EquivBlueprint(const FieldSpecBaseList &fields, fef::MatchDataLayout subtree_mdl) : ComplexLeafBlueprint(fields), @@ -26,10 +59,16 @@ EquivBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &outputs, bo fef::MatchData::UP md = _layout.createMatchData(); MultiSearch::Children children(_terms.size()); fef::TermMatchDataMerger::Inputs childMatch; + vespalib::hash_map<uint16_t, UnpackNeed> unpack_needs(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + unpack_needs[outputs[i]->getFieldId()].observe(*outputs[i]); + } for (size_t i = 0; i < _terms.size(); ++i) { const State &childState = _terms[i]->getState(); for (size_t j = 0; j < childState.numFields(); ++j) { - childMatch.emplace_back(childState.field(j).resolve(*md), _exactness[i]); + auto *child_term_field_match_data = childState.field(j).resolve(*md); + unpack_needs[child_term_field_match_data->getFieldId()].notify(*child_term_field_match_data); + childMatch.emplace_back(child_term_field_match_data, _exactness[i]); } children[i] = _terms[i]->createSearch(*md, strict).release(); } |