diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-06-21 11:28:35 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-06-21 11:30:50 +0000 |
commit | bad7460816a4f3e229be227b04c4cbaa27049ed0 (patch) | |
tree | 8049ac203e682c7a73d7fbb71e1b60929db2b267 /searchlib/src | |
parent | b729f40270612be3074ecd1ae584a0b997a88b23 (diff) |
Support unpacking of interleaved features in memory posting list iterators.
Diffstat (limited to 'searchlib/src')
8 files changed, 269 insertions, 167 deletions
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index 90e33714cac..ca02573bea2 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -40,9 +40,7 @@ using vespalib::GenerationHandler; namespace memoryindex { using test::WrapInserter; -using FieldIndexType = FieldIndex<false>; -using PostingList = FieldIndexType::PostingList; -using PostingConstItr = PostingList::ConstIterator; +using NormalFieldIndex = FieldIndex<false>; class MyBuilder : public IndexBuilder { private: @@ -131,11 +129,21 @@ public: } }; +struct SimpleMatchData { + TermFieldMatchData term; + TermFieldMatchDataArray array; + SimpleMatchData() : term(), array() { + array.add(&term); + } + ~SimpleMatchData() {} +}; + std::string -toString(FieldPositionsIterator posItr, +toString(const SimpleMatchData& match_data, bool hasElements = false, bool hasWeights = false) { + auto posItr = match_data.term.getIterator(); std::stringstream ss; ss << "{"; ss << posItr.getFieldLength() << ":"; @@ -156,16 +164,15 @@ toString(FieldPositionsIterator posItr, return ss.str(); } +template <typename PostingIteratorType> bool assertPostingList(const std::string &exp, - PostingConstItr itr, + PostingIteratorType itr, const FeatureStore *store = nullptr) { std::stringstream ss; FeatureStore::DecodeContextCooked decoder(nullptr); - TermFieldMatchData tfmd; - TermFieldMatchDataArray matchData; - matchData.add(&tfmd); + SimpleMatchData match_data; ss << "["; for (size_t i = 0; itr.valid(); ++itr, ++i) { if (i > 0) ss << ","; @@ -175,8 +182,8 @@ assertPostingList(const std::string &exp, EntryRef ref(itr.getData().get_features()); store->setupForField(0, decoder); store->setupForUnpackFeatures(ref, decoder); - decoder.unpackFeatures(matchData, docId); - ss << toString(tfmd.getIterator()); + decoder.unpackFeatures(match_data.array, docId); + ss << toString(match_data); } } ss << "]"; @@ -185,8 +192,9 @@ assertPostingList(const std::string &exp, return result; } +template <typename PostingIteratorType> bool -assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr) +assertPostingList(std::vector<uint32_t> &exp, PostingIteratorType itr) { std::stringstream ss; ss << "["; @@ -198,21 +206,25 @@ assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr) return assertPostingList(ss.str(), itr); } -FieldIndexType::PostingList::Iterator +template <bool interleaved_features> +typename FieldIndex<interleaved_features>::PostingList::Iterator find_in_field_index(const vespalib::stringref word, uint32_t field_id, const FieldIndexCollection& fic) { + using FieldIndexType = FieldIndex<interleaved_features>; auto* field_index = dynamic_cast<FieldIndexType*>(fic.getFieldIndex(field_id)); assert(field_index != nullptr); return field_index->find(word); } -FieldIndexType::PostingList::ConstIterator +template <bool interleaved_features> +typename FieldIndex<interleaved_features>::PostingList::ConstIterator find_frozen_in_field_index(const vespalib::stringref word, uint32_t field_id, const FieldIndexCollection& fic) { + using FieldIndexType = FieldIndex<interleaved_features>; auto* field_index = dynamic_cast<FieldIndexType*>(fic.getFieldIndex(field_id)); assert(field_index != nullptr); return field_index->findFrozen(word); @@ -352,7 +364,7 @@ public: bool assertPosting(const vespalib::string &word, uint32_t fieldId) { std::vector<uint32_t> exp = _mock.find(word, fieldId); - PostingConstItr itr = find_in_field_index(word, fieldId, _fieldIndexes); + auto itr = find_in_field_index<false>(word, fieldId, _fieldIndexes); bool result = assertPostingList(exp, itr); EXPECT_TRUE(result); return result; @@ -410,7 +422,7 @@ public: { } - MyDrainRemoves(FieldIndexType& field_index) + MyDrainRemoves(IFieldIndex& field_index) : _remover(field_index.getDocumentRemover()) { } @@ -486,6 +498,7 @@ make_single_field_schema() return result; } +template <typename FieldIndexType> struct FieldIndexTest : public ::testing::Test { Schema schema; FieldIndexType idx; @@ -494,12 +507,166 @@ struct FieldIndexTest : public ::testing::Test { idx(schema, 0) { } + ~FieldIndexTest() {} SearchIterator::UP search(const vespalib::stringref word, - const TermFieldMatchDataArray& match_data) { - return make_search_iterator<false>(idx.find(word), idx.getFeatureStore(), 0, match_data); + const SimpleMatchData& match_data) { + return make_search_iterator<FieldIndexType::has_interleaved_features>(idx.find(word), idx.getFeatureStore(), 0, match_data.array); } }; +using FieldIndexTestTypes = ::testing::Types<FieldIndex<false>, FieldIndex<true>>; +TYPED_TEST_CASE(FieldIndexTest, FieldIndexTestTypes); + +// Disable warnings emitted by gtest generated files when using typed tests +#pragma GCC diagnostic push +#ifndef __clang__ +#pragma GCC diagnostic ignored "-Wsuggest-override" +#endif + +TYPED_TEST(FieldIndexTest, require_that_fresh_insert_works) +{ + EXPECT_TRUE(assertPostingList("[]", this->idx.find("a"))); + EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a"))); + EXPECT_EQ(0u, this->idx.getNumUniqueWords()); + WrapInserter(this->idx).word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", this->idx.find("a"))); + EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a"))); + this->idx.commit(); + EXPECT_TRUE(assertPostingList("[10]", this->idx.findFrozen("a"))); + EXPECT_EQ(1u, this->idx.getNumUniqueWords()); +} + +TYPED_TEST(FieldIndexTest, require_that_append_insert_works) +{ + WrapInserter(this->idx).word("a").add(10).flush().rewind(). + word("a").add(5).flush(); + EXPECT_TRUE(assertPostingList("[5,10]", this->idx.find("a"))); + EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a"))); + WrapInserter(this->idx).rewind().word("a").add(20).flush(); + EXPECT_TRUE(assertPostingList("[5,10,20]", this->idx.find("a"))); + EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a"))); + this->idx.commit(); + EXPECT_TRUE(assertPostingList("[5,10,20]", this->idx.findFrozen("a"))); +} + +TYPED_TEST(FieldIndexTest, require_that_remove_works) +{ + WrapInserter(this->idx).word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[]", this->idx.find("a"))); + WrapInserter(this->idx).add(10).add(20).add(30).flush(); + EXPECT_TRUE(assertPostingList("[10,20,30]", this->idx.find("a"))); + WrapInserter(this->idx).rewind().word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[20,30]", this->idx.find("a"))); + WrapInserter(this->idx).remove(20).flush(); + EXPECT_TRUE(assertPostingList("[30]", this->idx.find("a"))); + WrapInserter(this->idx).remove(30).flush(); + EXPECT_TRUE(assertPostingList("[]", this->idx.find("a"))); + EXPECT_EQ(1u, this->idx.getNumUniqueWords()); + MyDrainRemoves(this->idx).drain(10); + WrapInserter(this->idx).rewind().word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", this->idx.find("a"))); +} + +void +addElement(DocIdAndFeatures &f, + uint32_t elemLen, + uint32_t numOccs, + int32_t weight = 1) +{ + f.elements().emplace_back(f.elements().size(), weight, elemLen); + f.elements().back().setNumOccs(numOccs); + for (uint32_t i = 0; i < numOccs; ++i) { + f.word_positions().emplace_back(i); + } +} + +DocIdAndFeatures +getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1) +{ + DocIdAndFeatures f; + addElement(f, elemLen, numOccs, weight); + f.set_num_occs(numOccs); + f.set_field_length(elemLen); + return f; +} + +TYPED_TEST(FieldIndexTest, require_that_posting_iterator_is_working) +{ + WrapInserter(this->idx).word("a").add(10, getFeatures(4, 1)). + add(20, getFeatures(5, 2)). + add(30, getFeatures(6, 1)). + add(40, getFeatures(7, 2)).flush(); + SimpleMatchData match_data; + { + auto itr = this->search("not", match_data); + itr->initFullRange(); + EXPECT_TRUE(itr->isAtEnd()); + } + { + auto itr = this->search("a", match_data); + itr->initFullRange(); + EXPECT_EQ(10u, itr->getDocId()); + itr->unpack(10); + EXPECT_EQ("{4:0}", toString(match_data)); + EXPECT_TRUE(!itr->seek(25)); + EXPECT_EQ(30u, itr->getDocId()); + itr->unpack(30); + EXPECT_EQ("{6:0}", toString(match_data)); + EXPECT_TRUE(itr->seek(40)); + EXPECT_EQ(40u, itr->getDocId()); + itr->unpack(40); + EXPECT_EQ("{7:0,1}", toString(match_data)); + EXPECT_TRUE(!itr->seek(41)); + EXPECT_TRUE(itr->isAtEnd()); + } +} + +#pragma GCC diagnostic pop + +struct FieldIndexInterleavedFeaturesTest : public FieldIndexTest<FieldIndex<true>> { + SimpleMatchData match_data; + FieldIndexInterleavedFeaturesTest() + : FieldIndexTest<FieldIndex<true>>() + { + WrapInserter(idx).word("a").add(10, getFeatures(5, 2)).flush(); + } + void + expect_features_unpacked(const std::string& exp_field_positions, + uint32_t exp_num_occs, + uint32_t exp_field_length) { + auto itr = search("a", match_data); + itr->initFullRange(); + EXPECT_EQ(10u, itr->getDocId()); + itr->unpack(10); + EXPECT_EQ(exp_field_positions, toString(match_data)); + EXPECT_EQ(exp_num_occs, match_data.term.getNumOccs()); + EXPECT_EQ(exp_field_length, match_data.term.getFieldLength()); + EXPECT_TRUE(!itr->seek(11)); + EXPECT_TRUE(itr->isAtEnd()); + } +}; + +TEST_F(FieldIndexInterleavedFeaturesTest, only_normal_features_are_unpacked) +{ + match_data.term.setNeedNormalFeatures(true); + match_data.term.setNeedInterleavedFeatures(false); + expect_features_unpacked("{5:0,1}", 0, 0); +} + +TEST_F(FieldIndexInterleavedFeaturesTest, only_interleaved_features_are_unpacked) +{ + match_data.term.setNeedNormalFeatures(false); + match_data.term.setNeedInterleavedFeatures(true); + expect_features_unpacked("{1000000:}", 2, 5); +} + +TEST_F(FieldIndexInterleavedFeaturesTest, both_normal_and_interleaved_features_are_unpacked) +{ + match_data.term.setNeedNormalFeatures(true); + match_data.term.setNeedInterleavedFeatures(true); + expect_features_unpacked("{5:0,1}", 2, 5); +} + Schema make_multi_field_schema() { @@ -511,8 +678,6 @@ make_multi_field_schema() return result; } - - struct FieldIndexCollectionTest : public ::testing::Test { Schema schema; FieldIndexCollection fic; @@ -523,38 +688,12 @@ struct FieldIndexCollectionTest : public ::testing::Test { } ~FieldIndexCollectionTest() {} - FieldIndexType::PostingList::Iterator find(const vespalib::stringref word, - uint32_t fieldId) const { - return find_in_field_index(word, fieldId, fic); + NormalFieldIndex::PostingList::Iterator find(const vespalib::stringref word, + uint32_t field_id) const { + return find_in_field_index<false>(word, field_id, fic); } }; -TEST_F(FieldIndexTest, require_that_fresh_insert_works) -{ - EXPECT_TRUE(assertPostingList("[]", idx.find("a"))); - EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a"))); - EXPECT_EQ(0u, idx.getNumUniqueWords()); - WrapInserter(idx).word("a").add(10).flush(); - EXPECT_TRUE(assertPostingList("[10]", idx.find("a"))); - EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a"))); - idx.commit(); - EXPECT_TRUE(assertPostingList("[10]", idx.findFrozen("a"))); - EXPECT_EQ(1u, idx.getNumUniqueWords()); -} - -TEST_F(FieldIndexTest, require_that_append_insert_works) -{ - WrapInserter(idx).word("a").add(10).flush().rewind(). - word("a").add(5).flush(); - EXPECT_TRUE(assertPostingList("[5,10]", idx.find("a"))); - EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a"))); - WrapInserter(idx).rewind().word("a").add(20).flush(); - EXPECT_TRUE(assertPostingList("[5,10,20]", idx.find("a"))); - EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a"))); - idx.commit(); - EXPECT_TRUE(assertPostingList("[5,10,20]", idx.findFrozen("a"))); -} - TEST_F(FieldIndexCollectionTest, require_that_multiple_posting_lists_across_multiple_fields_can_exist) { WrapInserter(fic, 0).word("a").add(10).word("b").add(11).add(15).flush(); @@ -568,24 +707,6 @@ TEST_F(FieldIndexCollectionTest, require_that_multiple_posting_lists_across_mult EXPECT_TRUE(assertPostingList("[]", find("c", 0))); } -TEST_F(FieldIndexTest, require_that_remove_works) -{ - WrapInserter(idx).word("a").remove(10).flush(); - EXPECT_TRUE(assertPostingList("[]", idx.find("a"))); - WrapInserter(idx).add(10).add(20).add(30).flush(); - EXPECT_TRUE(assertPostingList("[10,20,30]", idx.find("a"))); - WrapInserter(idx).rewind().word("a").remove(10).flush(); - EXPECT_TRUE(assertPostingList("[20,30]", idx.find("a"))); - WrapInserter(idx).remove(20).flush(); - EXPECT_TRUE(assertPostingList("[30]", idx.find("a"))); - WrapInserter(idx).remove(30).flush(); - EXPECT_TRUE(assertPostingList("[]", idx.find("a"))); - EXPECT_EQ(1u, idx.getNumUniqueWords()); - MyDrainRemoves(idx).drain(10); - WrapInserter(idx).rewind().word("a").add(10).flush(); - EXPECT_TRUE(assertPostingList("[10]", idx.find("a"))); -} - TEST_F(FieldIndexCollectionTest, require_that_multiple_insert_and_remove_works) { MyInserter inserter(schema); @@ -628,27 +749,6 @@ TEST_F(FieldIndexCollectionTest, require_that_multiple_insert_and_remove_works) EXPECT_TRUE(inserter.assertPostings()); } -void -addElement(DocIdAndFeatures &f, - uint32_t elemLen, - uint32_t numOccs, - int32_t weight = 1) -{ - f.elements().emplace_back(f.elements().size(), weight, elemLen); - f.elements().back().setNumOccs(numOccs); - for (uint32_t i = 0; i < numOccs; ++i) { - f.word_positions().emplace_back(i); - } -} - -DocIdAndFeatures -getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1) -{ - DocIdAndFeatures f; - addElement(f, elemLen, numOccs, weight); - return f; -} - TEST_F(FieldIndexCollectionTest, require_that_features_are_in_posting_lists) { WrapInserter(fic, 0).word("a").add(1, getFeatures(4, 2)).flush(); @@ -666,39 +766,6 @@ TEST_F(FieldIndexCollectionTest, require_that_features_are_in_posting_lists) featureStorePtr(fic, 1))); } -TEST_F(FieldIndexTest, require_that_posting_iterator_is_working) -{ - WrapInserter(idx).word("a").add(10, getFeatures(4, 1)). - add(20, getFeatures(5, 2)). - add(30, getFeatures(6, 1)). - add(40, getFeatures(7, 2)).flush(); - TermFieldMatchData tfmd; - TermFieldMatchDataArray matchData; - matchData.add(&tfmd); - { - auto itr = search("not", matchData); - itr->initFullRange(); - EXPECT_TRUE(itr->isAtEnd()); - } - { - auto itr = search("a", matchData); - itr->initFullRange(); - EXPECT_EQ(10u, itr->getDocId()); - itr->unpack(10); - EXPECT_EQ("{4:0}", toString(tfmd.getIterator())); - EXPECT_TRUE(!itr->seek(25)); - EXPECT_EQ(30u, itr->getDocId()); - itr->unpack(30); - EXPECT_EQ("{6:0}", toString(tfmd.getIterator())); - EXPECT_TRUE(itr->seek(40)); - EXPECT_EQ(40u, itr->getDocId()); - itr->unpack(40); - EXPECT_EQ("{7:0,1}", toString(tfmd.getIterator())); - EXPECT_TRUE(!itr->seek(41)); - EXPECT_TRUE(itr->isAtEnd()); - } -} - TEST_F(FieldIndexCollectionTest, require_that_basic_dumping_to_index_builder_is_working) { MyBuilder b(schema); @@ -791,16 +858,16 @@ public: _inv(_schema, _invertThreads, _pushThreads, _fic) { } - PostingList::Iterator find(const vespalib::stringref word, uint32_t field_id) const { - return find_in_field_index(word, field_id, _fic); + NormalFieldIndex::PostingList::Iterator find(const vespalib::stringref word, uint32_t field_id) const { + return find_in_field_index<false>(word, field_id, _fic); } - PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t field_id) const { - return find_frozen_in_field_index(word, field_id, _fic); + NormalFieldIndex::PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t field_id) const { + return find_frozen_in_field_index<false>(word, field_id, _fic); } SearchIterator::UP search(const vespalib::stringref word, uint32_t field_id, - const TermFieldMatchDataArray& match_data) { + const SimpleMatchData& match_data) { return make_search_iterator<false>(findFrozen(word, field_id), featureStoreRef(_fic, field_id), - field_id, match_data); + field_id, match_data.array); } }; @@ -956,58 +1023,56 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) afterStats._activeBuffers, afterStats._holdBuffers); - TermFieldMatchData tfmd; - TermFieldMatchDataArray matchData; - matchData.add(&tfmd); + SimpleMatchData match_data; { - auto itr = search("not", 0, matchData); + auto itr = search("not", 0, match_data); itr->initFullRange(); EXPECT_TRUE(itr->isAtEnd()); } { - auto itr = search("a", 0, matchData); + auto itr = search("a", 0, match_data); itr->initFullRange(); EXPECT_EQ(10u, itr->getDocId()); itr->unpack(10); - EXPECT_EQ("{4:0}", toString(tfmd.getIterator())); + EXPECT_EQ("{4:0}", toString(match_data)); EXPECT_TRUE(!itr->seek(25)); EXPECT_EQ(30u, itr->getDocId()); itr->unpack(30); - EXPECT_EQ("{6:0}", toString(tfmd.getIterator())); + EXPECT_EQ("{6:0}", toString(match_data)); EXPECT_TRUE(itr->seek(40)); EXPECT_EQ(40u, itr->getDocId()); itr->unpack(40); - EXPECT_EQ("{7:0,1,4}", toString(tfmd.getIterator())); + EXPECT_EQ("{7:0,1,4}", toString(match_data)); EXPECT_TRUE(!itr->seek(41)); EXPECT_TRUE(itr->isAtEnd()); } { - auto itr = search("x", 0, matchData); + auto itr = search("x", 0, match_data); itr->initFullRange(); EXPECT_TRUE(itr->isAtEnd()); } { - auto itr = search("x", 1, matchData); + auto itr = search("x", 1, match_data); itr->initFullRange(); EXPECT_EQ(30u, itr->getDocId()); itr->unpack(30); - EXPECT_EQ("{6:2[e=0,w=1,l=6]}", toString(tfmd.getIterator(), true, true)); + EXPECT_EQ("{6:2[e=0,w=1,l=6]}", toString(match_data, true, true)); } { - auto itr = search("x", 2, matchData); + auto itr = search("x", 2, match_data); itr->initFullRange(); EXPECT_EQ(30u, itr->getDocId()); itr->unpack(30); // weight is hardcoded to 1 for new style il doc array field - EXPECT_EQ("{2:1[e=0,w=1,l=2]}", toString(tfmd.getIterator(), true, true)); + EXPECT_EQ("{2:1[e=0,w=1,l=2]}", toString(match_data, true, true)); } { - auto itr = search("x", 3, matchData); + auto itr = search("x", 3, match_data); itr->initFullRange(); EXPECT_EQ(30u, itr->getDocId()); itr->unpack(30); EXPECT_EQ("{2:1[e=0,w=6,l=2]}", - toString(tfmd.getIterator(), true, true)); + toString(match_data, true, true)); } } @@ -1194,44 +1259,42 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) _pushThreads.sync(); - TermFieldMatchData tfmd; - TermFieldMatchDataArray matchData; - matchData.add(&tfmd); + SimpleMatchData match_data; { uint32_t fieldId = _schema.getIndexFieldId("iu"); - auto itr = search("not", fieldId, matchData); + auto itr = search("not", fieldId, match_data); itr->initFullRange(); EXPECT_TRUE(itr->isAtEnd()); } { uint32_t fieldId = _schema.getIndexFieldId("iu"); - auto itr = search("example", fieldId, matchData); + auto itr = search("example", fieldId, match_data); itr->initFullRange(); EXPECT_EQ(10u, itr->getDocId()); itr->unpack(10); - EXPECT_EQ("{9:2}", toString(tfmd.getIterator())); + EXPECT_EQ("{9:2}", toString(match_data)); EXPECT_TRUE(!itr->seek(25)); EXPECT_TRUE(itr->isAtEnd()); } { uint32_t fieldId = _schema.getIndexFieldId("iau"); - auto itr = search("example", fieldId, matchData); + auto itr = search("example", fieldId, match_data); itr->initFullRange(); EXPECT_EQ(10u, itr->getDocId()); itr->unpack(10); EXPECT_EQ("{9:2[e=0,l=9]}", - toString(tfmd.getIterator(), true, false)); + toString(match_data, true, false)); EXPECT_TRUE(!itr->seek(25)); EXPECT_TRUE(itr->isAtEnd()); } { uint32_t fieldId = _schema.getIndexFieldId("iwu"); - auto itr = search("example", fieldId, matchData); + auto itr = search("example", fieldId, match_data); itr->initFullRange(); EXPECT_EQ(10u, itr->getDocId()); itr->unpack(10); EXPECT_EQ("{9:2[e=0,w=4,l=9]}", - toString(tfmd.getIterator(), true, true)); + toString(match_data, true, true)); EXPECT_TRUE(!itr->seek(25)); EXPECT_TRUE(itr->isAtEnd()); } @@ -1272,34 +1335,32 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) _pushThreads.sync(); - TermFieldMatchData tfmd; - TermFieldMatchDataArray matchData; - matchData.add(&tfmd); + SimpleMatchData match_data; uint32_t fieldId = _schema.getIndexFieldId("f0"); { - auto itr = search("not", fieldId, matchData); + auto itr = search("not", fieldId, match_data); itr->initFullRange(); EXPECT_TRUE(itr->isAtEnd()); } { auto itr = search("我就" "是那个", - fieldId, matchData); + fieldId, match_data); itr->initFullRange(); EXPECT_EQ(10u, itr->getDocId()); itr->unpack(10); - EXPECT_EQ("{2:0}", toString(tfmd.getIterator())); + EXPECT_EQ("{2:0}", toString(match_data)); EXPECT_TRUE(!itr->seek(25)); EXPECT_TRUE(itr->isAtEnd()); } { auto itr = search("大灰" "狼", - fieldId, matchData); + fieldId, match_data); itr->initFullRange(); EXPECT_EQ(10u, itr->getDocId()); itr->unpack(10); - EXPECT_EQ("{2:1}", toString(tfmd.getIterator())); + EXPECT_EQ("{2:1}", toString(match_data)); EXPECT_TRUE(!itr->seek(25)); EXPECT_TRUE(itr->isAtEnd()); } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 37a558b17a3..4d18e0a2690 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -293,8 +293,8 @@ FieldIndex<interleaved_features>::make_term_blueprint(const vespalib::string& te (std::move(guard), posting_itr, getFeatureStore(), field, field_id, use_bit_vector); } -template -class FieldIndex<false>; +template class FieldIndex<false>; +template class FieldIndex<true>; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 05665945800..324de79d9e8 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -32,6 +32,8 @@ class IOrderedFieldIndexInserter; template <bool interleaved_features> class FieldIndex : public FieldIndexBase { public: + static constexpr bool has_interleaved_features = interleaved_features; + // Mapping from docid -> feature ref using PostingListEntryType = PostingListEntry<interleaved_features>; using PostingList = btree::BTreeRoot<uint32_t, PostingListEntryType, search::btree::NoAggregated>; diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp index ee1fee3d935..7bf20151b11 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp @@ -32,5 +32,7 @@ FieldIndexBase::FieldIndexBase(const index::Schema& schema, uint32_t fieldId, { } +FieldIndexBase::~FieldIndexBase() = default; + } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h index 7efec1f2ae8..9c6bf823023 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h @@ -99,6 +99,7 @@ public: FieldIndexBase(const index::Schema& schema, uint32_t fieldId); FieldIndexBase(const index::Schema& schema, uint32_t fieldId, const index::FieldLengthInfo& info); + ~FieldIndexBase(); uint64_t getNumUniqueWords() const override { return _numUniqueWords; } const FeatureStore& getFeatureStore() const override { return _featureStore; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp index 0d2bb1f5371..c75087e8577 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp @@ -155,7 +155,7 @@ OrderedFieldIndexInserter<interleaved_features>::getWordRef() const return _dItr.getKey()._wordRef; } -template -class OrderedFieldIndexInserter<false>; +template class OrderedFieldIndexInserter<false>; +template class OrderedFieldIndexInserter<true>; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp index 7c6ceb26f31..ccb410a4e10 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp @@ -126,8 +126,10 @@ PostingIterator<interleaved_features, unpack_normal_features, unpack_interleaved _feature_decoder.unpackFeatures(_matchData, docId); setUnpacked(); } - if (unpack_interleaved_features) { - // TODO: implement + if (interleaved_features && unpack_interleaved_features) { + auto* tfmd = _matchData[0]; + tfmd->setNumOccs(_itr.getData().get_num_occs()); + tfmd->setFieldLength(_itr.getData().get_field_length()); } } @@ -139,8 +141,24 @@ make_search_iterator(typename FieldIndex<interleaved_features>::PostingList::Con const fef::TermFieldMatchDataArray& match_data) { assert(match_data.size() == 1); - return std::make_unique<PostingIterator<interleaved_features, true, false>> - (itr, feature_store, field_id, match_data); + auto* tfmd = match_data[0]; + if (tfmd->needs_normal_features()) { + if (tfmd->needs_interleaved_features()) { + return std::make_unique<PostingIterator<interleaved_features, true, true>> + (itr, feature_store, field_id, match_data); + } else { + return std::make_unique<PostingIterator<interleaved_features, true, false>> + (itr, feature_store, field_id, match_data); + } + } else { + if (tfmd->needs_interleaved_features()) { + return std::make_unique<PostingIterator<interleaved_features, false, true>> + (itr, feature_store, field_id, match_data); + } else { + return std::make_unique<PostingIterator<interleaved_features, false, false>> + (itr, feature_store, field_id, match_data); + } + } } template @@ -150,9 +168,24 @@ make_search_iterator<false>(typename FieldIndex<false>::PostingList::ConstIterat uint32_t, const fef::TermFieldMatchDataArray&); +template +queryeval::SearchIterator::UP +make_search_iterator<true>(typename FieldIndex<true>::PostingList::ConstIterator, + const FeatureStore&, + uint32_t, + const fef::TermFieldMatchDataArray&); + template class PostingIteratorBase<false>; +template class PostingIteratorBase<true>; +template class PostingIterator<false, false, false>; +template class PostingIterator<false, false, true>; template class PostingIterator<false, true, false>; +template class PostingIterator<false, true, true>; +template class PostingIterator<true, false, false>; +template class PostingIterator<true, false, true>; +template class PostingIterator<true, true, false>; +template class PostingIterator<true, true, true>; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h index 33cfda32843..f58a62ddef5 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h +++ b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h @@ -74,4 +74,7 @@ public: void update_features(datastore::EntryRef features) const { _features = features; } }; +template class PostingListEntry<false>; +template class PostingListEntry<true>; + } |