From d995dda4234ed90e4c9c46688c54c6be409e46bb Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Wed, 19 Jun 2019 09:30:13 +0000 Subject: Create interface for a memory field index. --- .../memoryindex/field_index/field_index_test.cpp | 112 +++++++++++++-------- .../vespa/searchlib/memoryindex/field_index.cpp | 80 +++++++++++++-- .../src/vespa/searchlib/memoryindex/field_index.h | 38 +++---- .../searchlib/memoryindex/field_index_collection.h | 26 ++--- .../vespa/searchlib/memoryindex/i_field_index.h | 47 +++++++++ .../memoryindex/i_ordered_field_index_inserter.h | 6 ++ .../vespa/searchlib/memoryindex/memory_index.cpp | 67 ++---------- .../memoryindex/ordered_field_index_inserter.h | 3 +- .../memoryindex/ordered_field_index_inserter.h | 33 ++---- .../searchlib/test/memoryindex/wrap_inserter.h | 2 +- 10 files changed, 250 insertions(+), 164 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/memoryindex/i_field_index.h diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index 0f1c966ad5d..15d97d314a1 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -197,6 +197,25 @@ assertPostingList(std::vector &exp, PostingConstItr itr) return assertPostingList(ss.str(), itr); } +FieldIndex::PostingList::Iterator +find_in_field_index(const vespalib::stringref word, + uint32_t fieldId, + const FieldIndexCollection& fic) +{ + auto* field_index = dynamic_cast(fic.getFieldIndex(fieldId)); + assert(field_index != nullptr); + return field_index->find(word); +} + +FieldIndex::PostingList::ConstIterator +find_frozen_in_field_index(const vespalib::stringref word, + uint32_t fieldId, + const FieldIndexCollection& fic) +{ + auto* field_index = dynamic_cast(fic.getFieldIndex(fieldId)); + assert(field_index != nullptr); + return field_index->findFrozen(word); +} namespace { @@ -332,7 +351,7 @@ public: bool assertPosting(const vespalib::string &word, uint32_t fieldId) { std::vector exp = _mock.find(word, fieldId); - PostingConstItr itr = _fieldIndexes.find(word, fieldId); + PostingConstItr itr = find_in_field_index(word, fieldId, _fieldIndexes); bool result = assertPostingList(exp, itr); EXPECT_TRUE(result); return result; @@ -487,6 +506,8 @@ make_multi_field_schema() return result; } + + struct FieldIndexCollectionTest : public ::testing::Test { Schema schema; FieldIndexCollection fic; @@ -496,6 +517,11 @@ struct FieldIndexCollectionTest : public ::testing::Test { { } ~FieldIndexCollectionTest() {} + + FieldIndex::PostingList::Iterator find(const vespalib::stringref word, + uint32_t fieldId) const { + return find_in_field_index(word, fieldId, fic); + } }; TEST_F(FieldIndexTest, require_that_fresh_insert_works) @@ -529,12 +555,12 @@ TEST_F(FieldIndexCollectionTest, require_that_multiple_posting_lists_across_mult WrapInserter(fic, 0).word("a").add(10).word("b").add(11).add(15).flush(); WrapInserter(fic, 1).word("a").add(5).word("b").add(12).flush(); EXPECT_EQ(4u, fic.getNumUniqueWords()); - EXPECT_TRUE(assertPostingList("[10]", fic.find("a", 0))); - EXPECT_TRUE(assertPostingList("[5]", fic.find("a", 1))); - EXPECT_TRUE(assertPostingList("[11,15]", fic.find("b", 0))); - EXPECT_TRUE(assertPostingList("[12]", fic.find("b", 1))); - EXPECT_TRUE(assertPostingList("[]", fic.find("a", 2))); - EXPECT_TRUE(assertPostingList("[]", fic.find("c", 0))); + EXPECT_TRUE(assertPostingList("[10]", find("a", 0))); + EXPECT_TRUE(assertPostingList("[5]", find("a", 1))); + EXPECT_TRUE(assertPostingList("[11,15]", find("b", 0))); + EXPECT_TRUE(assertPostingList("[12]", find("b", 1))); + EXPECT_TRUE(assertPostingList("[]", find("a", 2))); + EXPECT_TRUE(assertPostingList("[]", find("c", 0))); } TEST_F(FieldIndexTest, require_that_remove_works) @@ -622,16 +648,16 @@ TEST_F(FieldIndexCollectionTest, require_that_features_are_in_posting_lists) { WrapInserter(fic, 0).word("a").add(1, getFeatures(4, 2)).flush(); EXPECT_TRUE(assertPostingList("[1{4:0,1}]", - fic.find("a", 0), + find("a", 0), featureStorePtr(fic, 0))); WrapInserter(fic, 0).word("b").add(2, getFeatures(5, 1)). add(3, getFeatures(6, 2)).flush(); EXPECT_TRUE(assertPostingList("[2{5:0},3{6:0,1}]", - fic.find("b", 0), + find("b", 0), featureStorePtr(fic, 0))); WrapInserter(fic, 1).word("c").add(4, getFeatures(7, 2)).flush(); EXPECT_TRUE(assertPostingList("[4{7:0,1}]", - fic.find("c", 1), + find("c", 1), featureStorePtr(fic, 1))); } @@ -764,6 +790,12 @@ public: _inv(_schema, _invertThreads, _pushThreads, _fic) { } + PostingList::Iterator find(const vespalib::stringref word, uint32_t fieldId) const { + return find_in_field_index(word, fieldId, _fic); + } + PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t fieldId) const { + return find_frozen_in_field_index(word, fieldId, _fic); + } }; class BasicInverterTest : public InverterTest { @@ -922,12 +954,12 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) TermFieldMatchDataArray matchData; matchData.add(&tfmd); { - PostingIterator itr(_fic.findFrozen("not", 0), featureStoreRef(_fic, 0), 0, matchData); + PostingIterator itr(findFrozen("not", 0), featureStoreRef(_fic, 0), 0, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(_fic.findFrozen("a", 0), featureStoreRef(_fic, 0), 0, matchData); + PostingIterator itr(findFrozen("a", 0), featureStoreRef(_fic, 0), 0, matchData); itr.initFullRange(); EXPECT_EQ(10u, itr.getDocId()); itr.unpack(10); @@ -944,19 +976,19 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(_fic.findFrozen("x", 0), featureStoreRef(_fic, 0), 0, matchData); + PostingIterator itr(findFrozen("x", 0), featureStoreRef(_fic, 0), 0, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(_fic.findFrozen("x", 1), featureStoreRef(_fic, 1), 1, matchData); + PostingIterator itr(findFrozen("x", 1), featureStoreRef(_fic, 1), 1, matchData); itr.initFullRange(); EXPECT_EQ(30u, itr.getDocId()); itr.unpack(30); EXPECT_EQ("{6:2[e=0,w=1,l=6]}", toString(tfmd.getIterator(), true, true)); } { - PostingIterator itr(_fic.findFrozen("x", 2), featureStoreRef(_fic, 2), 2, matchData); + PostingIterator itr(findFrozen("x", 2), featureStoreRef(_fic, 2), 2, matchData); itr.initFullRange(); EXPECT_EQ(30u, itr.getDocId()); itr.unpack(30); @@ -964,7 +996,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) EXPECT_EQ("{2:1[e=0,w=1,l=2]}", toString(tfmd.getIterator(), true, true)); } { - PostingIterator itr(_fic.findFrozen("x", 3), featureStoreRef(_fic, 3), 3, matchData); + PostingIterator itr(findFrozen("x", 3), featureStoreRef(_fic, 3), 3, matchData); itr.initFullRange(); EXPECT_EQ(30u, itr.getDocId()); itr.unpack(30); @@ -994,20 +1026,20 @@ TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remo myPushDocument(_inv); _pushThreads.sync(); - EXPECT_TRUE(assertPostingList("[1]", _fic.find("a", 0))); - EXPECT_TRUE(assertPostingList("[1,2]", _fic.find("b", 0))); - EXPECT_TRUE(assertPostingList("[2]", _fic.find("c", 0))); - EXPECT_TRUE(assertPostingList("[1]", _fic.find("a", 1))); - EXPECT_TRUE(assertPostingList("[1]", _fic.find("c", 1))); + EXPECT_TRUE(assertPostingList("[1]", find("a", 0))); + EXPECT_TRUE(assertPostingList("[1,2]", find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", find("c", 0))); + EXPECT_TRUE(assertPostingList("[1]", find("a", 1))); + EXPECT_TRUE(assertPostingList("[1]", find("c", 1))); myremove(1, _inv, _invertThreads); _pushThreads.sync(); - EXPECT_TRUE(assertPostingList("[]", _fic.find("a", 0))); - EXPECT_TRUE(assertPostingList("[2]", _fic.find("b", 0))); - EXPECT_TRUE(assertPostingList("[2]", _fic.find("c", 0))); - EXPECT_TRUE(assertPostingList("[]", _fic.find("a", 1))); - EXPECT_TRUE(assertPostingList("[]", _fic.find("c", 1))); + EXPECT_TRUE(assertPostingList("[]", find("a", 0))); + EXPECT_TRUE(assertPostingList("[2]", find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", find("c", 0))); + EXPECT_TRUE(assertPostingList("[]", find("a", 1))); + EXPECT_TRUE(assertPostingList("[]", find("c", 1))); } Schema @@ -1161,7 +1193,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) matchData.add(&tfmd); { uint32_t fieldId = _schema.getIndexFieldId("iu"); - PostingIterator itr(_fic.findFrozen("not", fieldId), + PostingIterator itr(findFrozen("not", fieldId), featureStoreRef(_fic, fieldId), fieldId, matchData); itr.initFullRange(); @@ -1169,7 +1201,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) } { uint32_t fieldId = _schema.getIndexFieldId("iu"); - PostingIterator itr(_fic.findFrozen("example", fieldId), + PostingIterator itr(findFrozen("example", fieldId), featureStoreRef(_fic, fieldId), fieldId, matchData); itr.initFullRange(); @@ -1181,7 +1213,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) } { uint32_t fieldId = _schema.getIndexFieldId("iau"); - PostingIterator itr(_fic.findFrozen("example", fieldId), + PostingIterator itr(findFrozen("example", fieldId), featureStoreRef(_fic, fieldId), fieldId, matchData); itr.initFullRange(); @@ -1194,7 +1226,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) } { uint32_t fieldId = _schema.getIndexFieldId("iwu"); - PostingIterator itr(_fic.findFrozen("example", fieldId), + PostingIterator itr(findFrozen("example", fieldId), featureStoreRef(_fic, fieldId), fieldId, matchData); itr.initFullRange(); @@ -1247,16 +1279,16 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) matchData.add(&tfmd); uint32_t fieldId = _schema.getIndexFieldId("f0"); { - PostingIterator itr(_fic.findFrozen("not", fieldId), + PostingIterator itr(findFrozen("not", fieldId), featureStoreRef(_fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(_fic.findFrozen("我就" - "是那个", - fieldId), + PostingIterator itr(findFrozen("我就" + "是那个", + fieldId), featureStoreRef(_fic, fieldId), fieldId, matchData); itr.initFullRange(); @@ -1267,9 +1299,9 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(_fic.findFrozen("大灰" - "狼", - fieldId), + PostingIterator itr(findFrozen("大灰" + "狼", + fieldId), featureStoreRef(_fic, fieldId), fieldId, matchData); itr.initFullRange(); @@ -1315,9 +1347,9 @@ struct RemoverTest : public FieldIndexCollectionTest { void assertPostingLists(const vespalib::string &e1, const vespalib::string &e2, const vespalib::string &e3) { - EXPECT_TRUE(assertPostingList(e1, fic.find("a", 1))); - EXPECT_TRUE(assertPostingList(e2, fic.find("a", 2))); - EXPECT_TRUE(assertPostingList(e3, fic.find("b", 1))); + EXPECT_TRUE(assertPostingList(e1, find("a", 1))); + EXPECT_TRUE(assertPostingList(e2, find("a", 2))); + EXPECT_TRUE(assertPostingList(e3, find("b", 1))); } void remove(uint32_t docId) { DocumentInverter inv(schema, _invertThreads, _pushThreads, fic); diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 7e40a4a2aa1..e492e3e7eee 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -2,21 +2,33 @@ #include "field_index.h" #include "ordered_field_index_inserter.h" -#include -#include +#include "posting_iterator.h" #include +#include +#include +#include +#include #include #include #include -#include -#include #include -#include +#include #include +#include +#include +#include +LOG_SETUP(".searchlib.memoryindex.field_index"); + +using search::fef::TermFieldMatchDataArray; using search::index::DocIdAndFeatures; -using search::index::WordDocElementFeatures; using search::index::Schema; +using search::index::WordDocElementFeatures; +using search::queryeval::BooleanMatchIteratorWrapper; +using search::queryeval::FieldSpecBase; +using search::queryeval::SearchIterator; +using search::queryeval::SimpleLeafBlueprint; +using vespalib::GenerationHandler; namespace search::memoryindex { @@ -225,6 +237,62 @@ FieldIndex::getMemoryUsage() const return usage; } +namespace { + +class MemoryTermBlueprint : public SimpleLeafBlueprint { +private: + GenerationHandler::Guard _guard; + FieldIndex::PostingList::ConstIterator _posting_itr; + const FeatureStore& _feature_store; + const uint32_t _field_id; + const bool _use_bit_vector; + +public: + MemoryTermBlueprint(GenerationHandler::Guard&& guard, + FieldIndex::PostingList::ConstIterator posting_itr, + const FeatureStore& feature_store, + const FieldSpecBase& field, + uint32_t field_id, + bool use_bit_vector) + : SimpleLeafBlueprint(field), + _guard(), + _posting_itr(posting_itr), + _feature_store(feature_store), + _field_id(field_id), + _use_bit_vector(use_bit_vector) + { + _guard = std::move(guard); + HitEstimate estimate(_posting_itr.size(), !_posting_itr.valid()); + setEstimate(estimate); + } + + SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray& tfmda, bool) const override { + auto result = std::make_unique(_posting_itr, _feature_store, _field_id, tfmda); + if (_use_bit_vector) { + LOG(debug, "Return BooleanMatchIteratorWrapper: field_id(%u), doc_count(%zu)", + _field_id, _posting_itr.size()); + return std::make_unique(std::move(result), tfmda); + } + LOG(debug, "Return PostingIterator: field_id(%u), doc_count(%zu)", + _field_id, _posting_itr.size()); + return result; + } +}; + +} + +std::unique_ptr +FieldIndex::make_term_blueprint(const vespalib::string& term, + const queryeval::FieldSpecBase& field, + uint32_t field_id) +{ + auto guard = takeGenerationGuard(); + auto posting_itr = findFrozen(term); + bool use_bit_vector = field.isFilter(); + return std::make_unique(std::move(guard), posting_itr, getFeatureStore(), + field, field_id, use_bit_vector); +} + } namespace search::btree { diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 27ba6b26a37..9c97ebf3e85 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -4,8 +4,9 @@ #include "feature_store.h" #include "field_index_remover.h" -#include "word_store.h" +#include "i_field_index.h" #include "posting_list_entry.h" +#include "word_store.h" #include #include #include @@ -18,10 +19,10 @@ namespace search::memoryindex { -class OrderedFieldIndexInserter; +class IOrderedFieldIndexInserter; /** - * Memory index for a single field using lock-free B-Trees in underlying components. + * Implementation of memory index for a single field using lock-free B-Trees in underlying components. * * It consists of the following components: * - WordStore containing all unique words in this field (across all documents). @@ -33,7 +34,7 @@ class OrderedFieldIndexInserter; * * Elements in the three stores are accessed using 32-bit references / handles. */ -class FieldIndex { +class FieldIndex : public IFieldIndex { public: // Mapping from docid -> feature ref using PostingListEntryType = PostingListEntry; @@ -93,7 +94,7 @@ private: FeatureStore _featureStore; uint32_t _fieldId; FieldIndexRemover _remover; - std::unique_ptr _inserter; + std::unique_ptr _inserter; index::FieldLengthCalculator _calculator; public: @@ -114,11 +115,11 @@ public: PostingList::ConstIterator findFrozen(const vespalib::stringref word) const; - uint64_t getNumUniqueWords() const { return _numUniqueWords; } - const FeatureStore & getFeatureStore() const { return _featureStore; } - const WordStore &getWordStore() const { return _wordStore; } - OrderedFieldIndexInserter &getInserter() const { return *_inserter; } - index::FieldLengthCalculator &get_calculator() { return _calculator; } + uint64_t getNumUniqueWords() const override { return _numUniqueWords; } + const FeatureStore & getFeatureStore() const override { return _featureStore; } + const WordStore &getWordStore() const override { return _wordStore; } + IOrderedFieldIndexInserter &getInserter() override { return *_inserter; } + index::FieldLengthCalculator &get_calculator() override { return _calculator; } private: void freeze() { @@ -147,27 +148,30 @@ private: } public: - GenerationHandler::Guard takeGenerationGuard() { + GenerationHandler::Guard takeGenerationGuard() override { return _generationHandler.takeGuard(); } - void - compactFeatures(); + void compactFeatures() override; - void dump(search::index::IndexBuilder & indexBuilder); + void dump(search::index::IndexBuilder & indexBuilder) override; - vespalib::MemoryUsage getMemoryUsage() const; + vespalib::MemoryUsage getMemoryUsage() const override; DictionaryTree &getDictionaryTree() { return _dict; } PostingListStore &getPostingListStore() { return _postingListStore; } - FieldIndexRemover &getDocumentRemover() { return _remover; } + FieldIndexRemover &getDocumentRemover() override { return _remover; } - void commit() { + void commit() override { _remover.flush(); freeze(); transferHoldLists(); incGeneration(); trimHoldLists(); } + + std::unique_ptr make_term_blueprint(const vespalib::string& term, + const queryeval::FieldSpecBase& field, + uint32_t field_id) override; }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h index 53f42658d0a..a737175d346 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h @@ -3,9 +3,14 @@ #pragma once #include "i_field_index_collection.h" -#include "field_index.h" +#include "i_field_index.h" +#include +#include -namespace search::index { class IFieldLengthInspector; } +namespace search::index { + class IFieldLengthInspector; + class Schema; +} namespace search::memoryindex { @@ -19,26 +24,15 @@ class FieldInverter; * for a given word in a given field. */ class FieldIndexCollection : public IFieldIndexCollection { -public: - using PostingList = FieldIndex::PostingList; - private: using GenerationHandler = vespalib::GenerationHandler; - std::vector> _fieldIndexes; + std::vector> _fieldIndexes; uint32_t _numFields; public: FieldIndexCollection(const index::Schema& schema, const index::IFieldLengthInspector& inspector); ~FieldIndexCollection(); - PostingList::Iterator find(const vespalib::stringref word, - uint32_t fieldId) const { - return _fieldIndexes[fieldId]->find(word); - } - - PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t fieldId) const { - return _fieldIndexes[fieldId]->findFrozen(word); - } uint64_t getNumUniqueWords() const { uint64_t numUniqueWords = 0; @@ -52,11 +46,11 @@ public: vespalib::MemoryUsage getMemoryUsage() const; - FieldIndex *getFieldIndex(uint32_t fieldId) const { + IFieldIndex *getFieldIndex(uint32_t fieldId) const { return _fieldIndexes[fieldId].get(); } - const std::vector> &getFieldIndexes() const { return _fieldIndexes; } + const std::vector> &getFieldIndexes() const { return _fieldIndexes; } uint32_t getNumFields() const { return _numFields; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h new file mode 100644 index 00000000000..86082c08d36 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h @@ -0,0 +1,47 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search::index { +class FieldLengthCalculator; +class IndexBuilder; +} + +namespace search::memoryindex { + +class FeatureStore; +class FieldIndexRemover; +class IOrderedFieldIndexInserter; +class WordStore; + +/** + * Interface for a memory index for a single field as seen from the FieldIndexCollection. + */ +class IFieldIndex { +public: + virtual ~IFieldIndex() {} + + virtual uint64_t getNumUniqueWords() const = 0; + virtual vespalib::MemoryUsage getMemoryUsage() const = 0; + virtual const FeatureStore& getFeatureStore() const = 0; + virtual const WordStore& getWordStore() const = 0; + virtual IOrderedFieldIndexInserter& getInserter() = 0; + virtual FieldIndexRemover& getDocumentRemover() = 0; + virtual index::FieldLengthCalculator& get_calculator() = 0; + virtual void compactFeatures() = 0; + virtual void dump(search::index::IndexBuilder& indexBuilder) = 0; + + virtual std::unique_ptr make_term_blueprint(const vespalib::string& term, + const queryeval::FieldSpecBase& field, + uint32_t field_id) = 0; + + // Should only be directly used by unit tests + virtual vespalib::GenerationHandler::Guard takeGenerationGuard() = 0; + virtual void commit() = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h index cf10db3c4d8..4da0844da58 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include namespace search::index { class DocIdAndFeatures; } @@ -29,6 +30,11 @@ public: */ virtual void add(uint32_t docId, const index::DocIdAndFeatures &features) = 0; + /** + * Returns the reference to the current word (only used by unit tests). + */ + virtual datastore::EntryRef getWordRef() const = 0; + /** * Remove (word, docId) tuple. */ diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index 6686745f8c2..d3d3004100c 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -3,16 +3,15 @@ #include "document_inverter.h" #include "field_index_collection.h" #include "memory_index.h" -#include "posting_iterator.h" #include #include -#include #include +#include #include -#include #include #include #include +#include #include LOG_SETUP(".searchlib.memoryindex.memory_index"); @@ -20,19 +19,17 @@ LOG_SETUP(".searchlib.memoryindex.memory_index"); using document::ArrayFieldValue; using document::WeightedSetFieldValue; using vespalib::LockGuard; -using vespalib::GenerationHandler; namespace search { -using fef::TermFieldMatchDataArray; using index::FieldLengthInfo; using index::IFieldLengthInspector; using index::IndexBuilder; using index::Schema; using index::SchemaUtil; -using query::NumberTerm; using query::LocationTerm; using query::Node; +using query::NumberTerm; using query::PredicateQuery; using query::PrefixTerm; using query::RangeTerm; @@ -40,16 +37,12 @@ using query::RegExpTerm; using query::StringTerm; using query::SubstringTerm; using query::SuffixTerm; -using queryeval::SearchIterator; -using queryeval::Searchable; -using queryeval::CreateBlueprintVisitorHelper; using queryeval::Blueprint; -using queryeval::BooleanMatchIteratorWrapper; +using queryeval::CreateBlueprintVisitorHelper; using queryeval::EmptyBlueprint; -using queryeval::FieldSpecBase; -using queryeval::FieldSpecBaseList; using queryeval::FieldSpec; using queryeval::IRequestContext; +using queryeval::Searchable; } @@ -141,47 +134,6 @@ MemoryIndex::dump(IndexBuilder &indexBuilder) namespace { -class MemTermBlueprint : public queryeval::SimpleLeafBlueprint { -private: - GenerationHandler::Guard _genGuard; - FieldIndex::PostingList::ConstIterator _pitr; - const FeatureStore &_featureStore; - const uint32_t _fieldId; - const bool _useBitVector; - -public: - MemTermBlueprint(GenerationHandler::Guard &&genGuard, - FieldIndex::PostingList::ConstIterator pitr, - const FeatureStore &featureStore, - const FieldSpecBase &field, - uint32_t fieldId, - bool useBitVector) - : SimpleLeafBlueprint(field), - _genGuard(), - _pitr(pitr), - _featureStore(featureStore), - _fieldId(fieldId), - _useBitVector(useBitVector) - { - _genGuard = std::move(genGuard); - HitEstimate estimate(_pitr.size(), !_pitr.valid()); - setEstimate(estimate); - } - - SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const override { - auto search = std::make_unique(_pitr, _featureStore, _fieldId, tfmda); - if (_useBitVector) { - LOG(debug, "Return BooleanMatchIteratorWrapper: fieldId(%u), docCount(%zu)", - _fieldId, _pitr.size()); - return std::make_unique(std::move(search), tfmda); - } - LOG(debug, "Return PostingIterator: fieldId(%u), docCount(%zu)", - _fieldId, _pitr.size()); - return search; - } - -}; - /** * Determines the correct Blueprint to use. **/ @@ -207,13 +159,8 @@ public: const vespalib::string termStr = queryeval::termAsString(n); LOG(debug, "searching for '%s' in '%s'", termStr.c_str(), _field.getName().c_str()); - FieldIndex *fieldIndex = _fieldIndexes.getFieldIndex(_fieldId); - GenerationHandler::Guard genGuard = fieldIndex->takeGenerationGuard(); - FieldIndex::PostingList::ConstIterator pitr = fieldIndex->findFrozen(termStr); - bool useBitVector = _field.isFilter(); - setResult(std::make_unique(std::move(genGuard), pitr, - fieldIndex->getFeatureStore(), - _field, _fieldId, useBitVector)); + IFieldIndex* fieldIndex = _fieldIndexes.getFieldIndex(_fieldId); + setResult(fieldIndex->make_term_blueprint(termStr, _field, _fieldId)); } void visit(LocationTerm &n) override { visitTerm(n); } diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h index 9d6be2bcd94..529b1d6d6a7 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h @@ -72,8 +72,7 @@ public: */ void rewind() override; - // Used by unit test - datastore::EntryRef getWordRef() const; + datastore::EntryRef getWordRef() const override; }; } diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h index 9680da7af11..c0ea7be0ce1 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h @@ -14,15 +14,14 @@ class OrderedFieldIndexInserter : public IOrderedFieldIndexInserter { bool _show_interleaved_features; uint32_t _fieldId; - void - addComma() - { + void addComma() { if (!_first) { _ss << ","; } else { _first = false; } } + public: OrderedFieldIndexInserter() : _ss(), @@ -33,23 +32,17 @@ public: { } - virtual void - setNextWord(const vespalib::stringref word) override - { + virtual void setNextWord(const vespalib::stringref word) override { addComma(); _ss << "w=" << word; } - void - setFieldId(uint32_t fieldId) - { + void setFieldId(uint32_t fieldId) { _fieldId = fieldId; } - virtual void - add(uint32_t docId, - const index::DocIdAndFeatures &features) override - { + virtual void add(uint32_t docId, + const index::DocIdAndFeatures &features) override { (void) features; addComma(); _ss << "a=" << docId; @@ -85,9 +78,9 @@ public: } } - virtual void - remove(uint32_t docId) override - { + virtual datastore::EntryRef getWordRef() const override { return datastore::EntryRef(); } + + virtual void remove(uint32_t docId) override { addComma(); _ss << "r=" << docId; } @@ -99,15 +92,11 @@ public: _ss << "f=" << _fieldId; } - std::string - toStr() const - { + std::string toStr() const { return _ss.str(); } - void - reset() - { + void reset() { _ss.str(""); _first = true; _verbose = false; diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h index eeb09898aa2..647f624daea 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h @@ -12,7 +12,7 @@ namespace search::memoryindex::test { */ class WrapInserter { private: - OrderedFieldIndexInserter& _inserter; + IOrderedFieldIndexInserter& _inserter; public: WrapInserter(FieldIndexCollection& field_indexes, uint32_t field_id) -- cgit v1.2.3