diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2020-05-16 13:34:38 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-16 13:34:38 +0200 |
commit | aa28fc13c558ac2f81aa3b0ed0256db675d681db (patch) | |
tree | dbd8e8b0332186d9a8ff49b76c2055c9ab3122f6 /searchlib | |
parent | 4ea5b4397171ffce0a30b6905b508800190f9b7d (diff) | |
parent | ce1b5bdb2538d5bfe2d354eb40c139af6f95cfc9 (diff) |
Merge pull request #13264 from vespa-engine/toregge/unpack-interleaved-features-for-equiv-search
Unpack interleaved features if they are needed in equiv search.
Diffstat (limited to 'searchlib')
6 files changed, 310 insertions, 133 deletions
diff --git a/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt index 4eb00cacf38..d01b3f84436 100644 --- a/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt +++ b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt @@ -1,8 +1,10 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +find_package(GTest REQUIRED) vespa_add_executable(searchlib_termmatchdatamerger_test_app TEST SOURCES termmatchdatamerger_test.cpp DEPENDS searchlib + GTest::GTest ) vespa_add_test(NAME searchlib_termmatchdatamerger_test_app COMMAND searchlib_termmatchdatamerger_test_app) diff --git a/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp index a8670f43a6b..eb04c34b595 100644 --- a/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp +++ b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp @@ -1,11 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/log/log.h> -LOG_SETUP("termmatchdatamerger_test"); -#include <vespa/vespalib/testkit/testapp.h> - #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/fef/termmatchdatamerger.h> +#include <vespa/vespalib/gtest/gtest.h> using namespace search::fef; @@ -21,19 +18,7 @@ TermFieldMatchDataPosition make_pos(uint32_t pos) } // namespace <unnamed> -class Test : public vespalib::TestApp -{ -public: - void testMergeEmptyInput(); - void testMergeSimple(); - void testMergeMultifield(); - void testMergeDuplicates(); - void testMergeFieldLength(); - int Main() override; -}; - -void -Test::testMergeEmptyInput() +TEST(TermMatchDataMergerTest, merge_empty_input) { TermFieldMatchData out; TermFieldMatchDataArray output; @@ -48,12 +33,11 @@ Test::testMergeEmptyInput() uint32_t docid = 5; in.reset(docid); merger.merge(docid); - EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQ(docid, out.getDocId()); EXPECT_TRUE(out.begin() == out.end()); } -void -Test::testMergeSimple() +TEST(TermMatchDataMergerTest, merge_simple) { TermFieldMatchData a; TermFieldMatchData b; @@ -86,26 +70,26 @@ Test::testMergeSimple() merger.merge(docid); - EXPECT_EQUAL(docid, out.getDocId()); - EXPECT_EQUAL(8u, out.end() - out.begin()); - - EXPECT_EQUAL( 5u, out.begin()[0].getPosition()); - EXPECT_EQUAL( 7u, out.begin()[1].getPosition()); - EXPECT_EQUAL(10u, out.begin()[2].getPosition()); - EXPECT_EQUAL(15u, out.begin()[3].getPosition()); - EXPECT_EQUAL(20u, out.begin()[4].getPosition()); - EXPECT_EQUAL(22u, out.begin()[5].getPosition()); - EXPECT_EQUAL(27u, out.begin()[6].getPosition()); - EXPECT_EQUAL(28u, out.begin()[7].getPosition()); - - EXPECT_EQUAL(0.25, out.begin()[0].getMatchExactness()); - EXPECT_EQUAL( 0.5, out.begin()[1].getMatchExactness()); - EXPECT_EQUAL( 1.5, out.begin()[2].getMatchExactness()); - EXPECT_EQUAL( 1.0, out.begin()[3].getMatchExactness()); - EXPECT_EQUAL( 4.0, out.begin()[4].getMatchExactness()); - EXPECT_EQUAL(0.75, out.begin()[5].getMatchExactness()); - EXPECT_EQUAL( 3.0, out.begin()[6].getMatchExactness()); - EXPECT_EQUAL( 7.5, out.begin()[7].getMatchExactness()); + EXPECT_EQ(docid, out.getDocId()); + EXPECT_EQ(8u, out.end() - out.begin()); + + EXPECT_EQ( 5u, out.begin()[0].getPosition()); + EXPECT_EQ( 7u, out.begin()[1].getPosition()); + EXPECT_EQ(10u, out.begin()[2].getPosition()); + EXPECT_EQ(15u, out.begin()[3].getPosition()); + EXPECT_EQ(20u, out.begin()[4].getPosition()); + EXPECT_EQ(22u, out.begin()[5].getPosition()); + EXPECT_EQ(27u, out.begin()[6].getPosition()); + EXPECT_EQ(28u, out.begin()[7].getPosition()); + + EXPECT_EQ(0.25, out.begin()[0].getMatchExactness()); + EXPECT_EQ( 0.5, out.begin()[1].getMatchExactness()); + EXPECT_EQ( 1.5, out.begin()[2].getMatchExactness()); + EXPECT_EQ( 1.0, out.begin()[3].getMatchExactness()); + EXPECT_EQ( 4.0, out.begin()[4].getMatchExactness()); + EXPECT_EQ(0.75, out.begin()[5].getMatchExactness()); + EXPECT_EQ( 3.0, out.begin()[6].getMatchExactness()); + EXPECT_EQ( 7.5, out.begin()[7].getMatchExactness()); // one stale input @@ -117,24 +101,22 @@ Test::testMergeSimple() merger.merge(docid); - EXPECT_EQUAL(docid, out.getDocId()); - EXPECT_EQUAL(3u, out.end() - out.begin()); + EXPECT_EQ(docid, out.getDocId()); + EXPECT_EQ(3u, out.end() - out.begin()); - EXPECT_EQUAL( 5u, out.begin()[0].getPosition()); - EXPECT_EQUAL(10u, out.begin()[1].getPosition()); - EXPECT_EQUAL(15u, out.begin()[2].getPosition()); + EXPECT_EQ( 5u, out.begin()[0].getPosition()); + EXPECT_EQ(10u, out.begin()[1].getPosition()); + EXPECT_EQ(15u, out.begin()[2].getPosition()); // both inputs are stale docid = 15; merger.merge(docid); - EXPECT_NOT_EQUAL(docid, out.getDocId()); + EXPECT_NE(docid, out.getDocId()); } - -void -Test::testMergeMultifield() +TEST(TermMatchDataMergerTest, merge_multiple_fields) { TermFieldMatchData a; TermFieldMatchData b; @@ -174,30 +156,29 @@ Test::testMergeMultifield() merger.merge(docid); - EXPECT_EQUAL(docid, out1.getDocId()); - EXPECT_EQUAL(docid, out2.getDocId()); - EXPECT_NOT_EQUAL(docid, out3.getDocId()); + EXPECT_EQ(docid, out1.getDocId()); + EXPECT_EQ(docid, out2.getDocId()); + EXPECT_NE(docid, out3.getDocId()); - EXPECT_EQUAL(2u, out1.end() - out1.begin()); - EXPECT_EQUAL(3u, out2.end() - out2.begin()); + EXPECT_EQ(2u, out1.end() - out1.begin()); + EXPECT_EQ(3u, out2.end() - out2.begin()); - EXPECT_EQUAL( 5u, out1.begin()[0].getPosition()); - EXPECT_EQUAL(15u, out1.begin()[1].getPosition()); + EXPECT_EQ( 5u, out1.begin()[0].getPosition()); + EXPECT_EQ(15u, out1.begin()[1].getPosition()); - EXPECT_EQUAL( 5u, out2.begin()[0].getPosition()); - EXPECT_EQUAL( 7u, out2.begin()[1].getPosition()); - EXPECT_EQUAL(20u, out2.begin()[2].getPosition()); + EXPECT_EQ( 5u, out2.begin()[0].getPosition()); + EXPECT_EQ( 7u, out2.begin()[1].getPosition()); + EXPECT_EQ(20u, out2.begin()[2].getPosition()); - EXPECT_EQUAL(1.0, out1.begin()[0].getMatchExactness()); - EXPECT_EQUAL(1.0, out1.begin()[1].getMatchExactness()); + EXPECT_EQ(1.0, out1.begin()[0].getMatchExactness()); + EXPECT_EQ(1.0, out1.begin()[1].getMatchExactness()); - EXPECT_EQUAL(1.5, out2.begin()[0].getMatchExactness()); - EXPECT_EQUAL(0.5, out2.begin()[1].getMatchExactness()); - EXPECT_EQUAL(1.5, out2.begin()[2].getMatchExactness()); + EXPECT_EQ(1.5, out2.begin()[0].getMatchExactness()); + EXPECT_EQ(0.5, out2.begin()[1].getMatchExactness()); + EXPECT_EQ(1.5, out2.begin()[2].getMatchExactness()); } -void -Test::testMergeDuplicates() +TEST(TermMatchDataMergerTest, merge_duplicates) { TermFieldMatchData a; TermFieldMatchData b; @@ -225,23 +206,22 @@ Test::testMergeDuplicates() merger.merge(docid); - EXPECT_EQUAL(docid, out.getDocId()); - EXPECT_EQUAL(5u, out.end() - out.begin()); - - EXPECT_EQUAL( 3u, out.begin()[0].getPosition()); - EXPECT_EQUAL(1.5, out.begin()[0].getMatchExactness()); - EXPECT_EQUAL( 5u, out.begin()[1].getPosition()); - EXPECT_EQUAL(0.5, out.begin()[1].getMatchExactness()); - EXPECT_EQUAL(10u, out.begin()[2].getPosition()); - EXPECT_EQUAL(1.5, out.begin()[2].getMatchExactness()); - EXPECT_EQUAL(15u, out.begin()[3].getPosition()); - EXPECT_EQUAL(1.5, out.begin()[3].getMatchExactness()); - EXPECT_EQUAL(17u, out.begin()[4].getPosition()); - EXPECT_EQUAL(1.5, out.begin()[4].getMatchExactness()); + EXPECT_EQ(docid, out.getDocId()); + EXPECT_EQ(5u, out.end() - out.begin()); + + EXPECT_EQ( 3u, out.begin()[0].getPosition()); + EXPECT_EQ(1.5, out.begin()[0].getMatchExactness()); + EXPECT_EQ( 5u, out.begin()[1].getPosition()); + EXPECT_EQ(0.5, out.begin()[1].getMatchExactness()); + EXPECT_EQ(10u, out.begin()[2].getPosition()); + EXPECT_EQ(1.5, out.begin()[2].getMatchExactness()); + EXPECT_EQ(15u, out.begin()[3].getPosition()); + EXPECT_EQ(1.5, out.begin()[3].getMatchExactness()); + EXPECT_EQ(17u, out.begin()[4].getPosition()); + EXPECT_EQ(1.5, out.begin()[4].getMatchExactness()); } -void -Test::testMergeFieldLength() +TEST(TermMatchDataMergerTest, merge_max_element_length) { TermFieldMatchData a; TermFieldMatchData b; @@ -261,20 +241,93 @@ Test::testMergeFieldLength() b.appendPosition(make_pos(2)); merger.merge(docid); - EXPECT_EQUAL(docid, out.getDocId()); - EXPECT_EQUAL(1000u, out.getIterator().getFieldLength()); + EXPECT_EQ(docid, out.getDocId()); + EXPECT_EQ(1000u, out.getIterator().getFieldLength()); +} + +class TermMatchDataMergerTest2 : public ::testing::Test +{ +protected: + TermFieldMatchData a; + TermFieldMatchData b; + MDMIs input; + TermFieldMatchData out; + TermFieldMatchDataArray output; + TermMatchDataMerger merger; + + TermMatchDataMergerTest2() + : a(), + b(), + input({{&a, 0.5},{&b, 1.5}}), + out(), + output(), + merger(input, output.add(&out)) + { + } +}; + +TEST_F(TermMatchDataMergerTest2, merge_no_normal_features) +{ + out.setNeedNormalFeatures(false); + + uint32_t docid = 5; + + a.reset(docid); + a.appendPosition(make_pos(5)); + + b.reset(docid); + b.appendPosition(make_pos(3)); + + merger.merge(docid); + EXPECT_EQ(docid, out.getDocId()); + EXPECT_EQ(0u, out.size()); +} + +TEST_F(TermMatchDataMergerTest2, merge_interleaved_features) +{ + out.setNeedNormalFeatures(false); + out.setNeedInterleavedFeatures(true); + + uint32_t docid = 5; + + a.reset(docid); + a.setNumOccs(1); + a.setFieldLength(30); + + b.reset(docid); + b.setNumOccs(1); + b.setFieldLength(35); + + merger.merge(docid); + EXPECT_EQ(docid, out.getDocId()); + EXPECT_EQ(2u, out.getNumOccs()); + EXPECT_EQ(35u, out.getFieldLength()); } -int -Test::Main() +TEST_F(TermMatchDataMergerTest2, merge_interleaved_features_with_detected_duplicate) { - TEST_INIT("termmatchdatamerger_test"); - testMergeEmptyInput(); - testMergeSimple(); - testMergeMultifield(); - testMergeDuplicates(); - testMergeFieldLength(); - TEST_DONE(); + out.setNeedNormalFeatures(true); + out.setNeedInterleavedFeatures(true); + + uint32_t docid = 5; + + a.reset(docid); + a.setNumOccs(1); + a.setFieldLength(30); + a.appendPosition(make_pos(5)); + + b.reset(docid); + b.setNumOccs(1); + b.setFieldLength(30); + b.appendPosition(make_pos(5)); + + merger.merge(docid); + EXPECT_EQ(docid, out.getDocId()); + EXPECT_EQ(1u, out.end() - out.begin()); + EXPECT_EQ( 5u, out.begin()[0].getPosition()); + EXPECT_EQ(1.5, out.begin()[0].getMatchExactness()); + EXPECT_EQ(1u, out.getNumOccs()); + EXPECT_EQ(30u, out.getFieldLength()); } -TEST_APPHOOK(Test); +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index bec1691df23..eb6e49747a1 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1291,7 +1291,7 @@ TEST("require that children does not optimize when parents refuse them to") { } } -TEST("require_that_unpack_optimization_is_overruled_by_equiv") { +TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") { FieldSpecBaseList fields; fields.add(FieldSpecBase(1, 1)); fields.add(FieldSpecBase(2, 2)); @@ -1322,7 +1322,7 @@ TEST("require_that_unpack_optimization_is_overruled_by_equiv") { EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); - EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>", + EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::SelectiveUnpack>", e.getChildren()[0]->getClassName()); } @@ -1332,7 +1332,7 @@ TEST("require_that_unpack_optimization_is_overruled_by_equiv") { EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); - EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>", + EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::NoUnpack>", e.getChildren()[0]->getClassName()); } } diff --git a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp index 219735105de..412130ecaab 100644 --- a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp +++ b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp @@ -19,7 +19,7 @@ protected: EquivTest(); ~EquivTest(); - void test_equiv(bool strict); + void test_equiv(bool strict, bool unpack_normal_features, bool unpack_interleaved_features); }; EquivTest::EquivTest() = default; @@ -27,15 +27,15 @@ EquivTest::EquivTest() = default; EquivTest::~EquivTest() = default; void -EquivTest::test_equiv(bool strict) +EquivTest::test_equiv(bool strict, bool unpack_normal_features, bool unpack_interleaved_features) { FakeResult a; FakeResult b; FakeResult c; - a.doc(5).pos(1); - b.doc(5).pos(2); - c.doc(5).pos(3).doc(10).pos(4); + a.doc(5).pos(1).len(30).field_length(30).num_occs(1); + b.doc(5).pos(2).len(30).field_length(30).num_occs(1); + c.doc(5).pos(3).len(30).field_length(30).num_occs(1).doc(10).pos(4).len(35).field_length(35).num_occs(1); MatchDataLayout subLayout; TermFieldHandle fbh11 = subLayout.allocTermField(1); @@ -52,6 +52,11 @@ EquivTest::test_equiv(bool strict) bp->addTerm(std::make_unique<FakeBlueprint>(FieldSpec("bar", 2, fbh22), c), 1.0); MatchData::UP md = MatchData::makeTestInstance(100, 10); + for (uint32_t field_id = 1; field_id <= 2; ++field_id) { + TermFieldMatchData &data = *md->resolveTermField(field_id); + data.setNeedNormalFeatures(unpack_normal_features); + data.setNeedInterleavedFeatures(unpack_interleaved_features); + } bp->fetchPostings(ExecuteInfo::create(strict)); SearchIterator::UP search = bp->createSearch(*md, strict); search->initFullRange(); @@ -69,25 +74,43 @@ EquivTest::test_equiv(bool strict) EXPECT_EQ(1u, data.getFieldId()); EXPECT_EQ(5u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQ(1u, itr.size()); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(1u, itr.getPosition()); - itr.next(); + if (unpack_normal_features) { + EXPECT_EQ(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(1u, itr.getPosition()); + itr.next(); + } EXPECT_TRUE(!itr.valid()); + if (unpack_interleaved_features) { + EXPECT_EQ(1u, data.getNumOccs()); + EXPECT_EQ(30u, data.getFieldLength()); + } else { + EXPECT_EQ(0u, data.getNumOccs()); + EXPECT_EQ(0u, data.getFieldLength()); + } } { TermFieldMatchData &data = *md->resolveTermField(2); EXPECT_EQ(2u, data.getFieldId()); EXPECT_EQ(5u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQ(2u, itr.size()); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(2u, itr.getPosition()); - itr.next(); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(3u, itr.getPosition()); - itr.next(); + if (unpack_normal_features) { + EXPECT_EQ(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(2u, itr.getPosition()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(3u, itr.getPosition()); + itr.next(); + } EXPECT_TRUE(!itr.valid()); + if (unpack_interleaved_features) { + EXPECT_EQ(2u, data.getNumOccs()); + EXPECT_EQ(30u, data.getFieldLength()); + } else { + EXPECT_EQ(0u, data.getNumOccs()); + EXPECT_EQ(0u, data.getFieldLength()); + } } } EXPECT_TRUE(!search->seek(7)); @@ -104,11 +127,20 @@ EquivTest::test_equiv(bool strict) EXPECT_EQ(2u, data.getFieldId()); EXPECT_EQ(10u, data.getDocId()); FieldPositionsIterator itr = data.getIterator(); - EXPECT_EQ(1u, itr.size()); - ASSERT_TRUE(itr.valid()); - EXPECT_EQ(4u, itr.getPosition()); - itr.next(); + if (unpack_normal_features) { + EXPECT_EQ(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQ(4u, itr.getPosition()); + itr.next(); + } EXPECT_TRUE(!itr.valid()); + if (unpack_interleaved_features) { + EXPECT_EQ(1u, data.getNumOccs()); + EXPECT_EQ(35u, data.getFieldLength()); + } else { + EXPECT_EQ(0u, data.getNumOccs()); + EXPECT_EQ(0u, data.getFieldLength()); + } } } EXPECT_TRUE(!search->seek(13)); @@ -122,12 +154,42 @@ EquivTest::test_equiv(bool strict) TEST_F(EquivTest, nonstrict) { - test_equiv(false); + test_equiv(false, true, false); } TEST_F(EquivTest, strict) { - test_equiv(true); + test_equiv(true, true, false); +} + +TEST_F(EquivTest, nonstrict_no_normal_no_interleaved) +{ + test_equiv(false, false, false); +} + +TEST_F(EquivTest, strict_no_normal_no_interleaved) +{ + test_equiv(true, false, false); +} + +TEST_F(EquivTest, nonstrict_no_normal_interleaved) +{ + test_equiv(false, false, true); +} + +TEST_F(EquivTest, strict_no_normal_interleaved) +{ + test_equiv(true, false, true); +} + +TEST_F(EquivTest, nonstrict_normal_interleaved) +{ + test_equiv(false, true, true); +} + +TEST_F(EquivTest, strict_normal_interleaved) +{ + test_equiv(true, true, true); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp index 97cb829e30c..973e11fc0d2 100644 --- a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp +++ b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp @@ -41,31 +41,52 @@ TermMatchDataMerger::merge(uint32_t docid, { _scratch.clear(); bool wasMatch = false; + bool needs_normal_features = out.needs_normal_features(); + bool needs_interleaved_features = out.needs_interleaved_features(); + uint32_t num_occs = 0u; + uint16_t field_length = 0u; for (size_t i = 0; i < in.size(); ++i) { const TermFieldMatchData *md = in[i].matchData; if (md->getDocId() == docid) { - for (const TermFieldMatchDataPosition &iter : *md) { - double exactness = in[i].exactness * iter.getMatchExactness(); - _scratch.push_back(iter); - _scratch.back().setMatchExactness(exactness); + if (needs_normal_features) { + for (const TermFieldMatchDataPosition &iter : *md) { + double exactness = in[i].exactness * iter.getMatchExactness(); + _scratch.push_back(iter); + _scratch.back().setMatchExactness(exactness); + } + } + if (needs_interleaved_features) { + num_occs += md->getNumOccs(); + field_length = std::max(field_length, md->getFieldLength()); } wasMatch = true; } } if (wasMatch) { out.reset(docid); - if (_scratch.size() > 0) { - std::sort(_scratch.begin(), _scratch.end(), - TermFieldMatchDataPosition::compareWithExactness); - TermFieldMatchDataPosition prev = _scratch[0]; - for (size_t i = 1; i < _scratch.size(); ++i) { - const TermFieldMatchDataPosition &curr = _scratch[i]; - if (prev.key() < curr.key()) { - out.appendPosition(prev); - prev = curr; + if (needs_normal_features) { + num_occs = 0; + if (_scratch.size() > 0) { + std::sort(_scratch.begin(), _scratch.end(), + TermFieldMatchDataPosition::compareWithExactness); + TermFieldMatchDataPosition prev = _scratch[0]; + for (size_t i = 1; i < _scratch.size(); ++i) { + const TermFieldMatchDataPosition &curr = _scratch[i]; + if (prev.key() < curr.key()) { + out.appendPosition(prev); + prev = curr; + ++num_occs; + } } + out.appendPosition(prev); + ++num_occs; } - out.appendPosition(prev); + } + if (needs_interleaved_features) { + constexpr uint32_t max_num_occs = std::numeric_limits<uint16_t>::max(); + uint16_t capped_num_occs = std::min(num_occs, max_num_occs); + out.setNumOccs(std::min(capped_num_occs, field_length)); + out.setFieldLength(field_length); } } } diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp index 08a05b25772..cf378c95487 100644 --- a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp @@ -4,9 +4,42 @@ #include "equivsearch.h" #include "field_spec.hpp" #include <vespa/vespalib/objects/visit.hpp> +#include <vespa/vespalib/stllike/hash_map.hpp> namespace search::queryeval { +namespace { + +class UnpackNeed +{ + bool _needs_normal_features; + bool _needs_interleaved_features; +public: + UnpackNeed() + : _needs_normal_features(false), + _needs_interleaved_features(false) + { + } + + void observe(const fef::TermFieldMatchData &output) + { + if (output.needs_normal_features()) { + _needs_normal_features = true; + } + if (output.needs_interleaved_features()) { + _needs_interleaved_features = true; + } + } + + void notify(fef::TermFieldMatchData &input) const + { + input.setNeedNormalFeatures(_needs_normal_features); + input.setNeedInterleavedFeatures(_needs_interleaved_features); + } +}; + +}; + EquivBlueprint::EquivBlueprint(const FieldSpecBaseList &fields, fef::MatchDataLayout subtree_mdl) : ComplexLeafBlueprint(fields), @@ -26,10 +59,16 @@ EquivBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &outputs, bo fef::MatchData::UP md = _layout.createMatchData(); MultiSearch::Children children(_terms.size()); fef::TermMatchDataMerger::Inputs childMatch; + vespalib::hash_map<uint16_t, UnpackNeed> unpack_needs(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + unpack_needs[outputs[i]->getFieldId()].observe(*outputs[i]); + } for (size_t i = 0; i < _terms.size(); ++i) { const State &childState = _terms[i]->getState(); for (size_t j = 0; j < childState.numFields(); ++j) { - childMatch.emplace_back(childState.field(j).resolve(*md), _exactness[i]); + auto *child_term_field_match_data = childState.field(j).resolve(*md); + unpack_needs[child_term_field_match_data->getFieldId()].notify(*child_term_field_match_data); + childMatch.emplace_back(child_term_field_match_data, _exactness[i]); } children[i] = _terms[i]->createSearch(*md, strict).release(); } |