diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-06-04 15:01:29 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2019-06-04 15:01:29 +0200 |
commit | ca75983acaa894d86b727c8c91026110d32d6dae (patch) | |
tree | fe180e8b8d7f0a5a84f7919c3f05d5a483cf5595 | |
parent | 05a2b4d3fbf3be03c69a42252051c141ec11fc14 (diff) |
Wire field inverter at construction time.
21 files changed, 228 insertions, 84 deletions
diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp index 9abd3e6cb39..9ceb85981c0 100644 --- a/searchcore/src/tests/proton/index/indexmanager_test.cpp +++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp @@ -364,7 +364,7 @@ TEST_F("requireThatFlushStatsAreCalculated", Fixture) { SequencedTaskExecutor invertThreads(2); SequencedTaskExecutor pushThreads(2); search::memoryindex::DocumentInverter inverter(schema, invertThreads, - pushThreads); + pushThreads, fic); uint64_t fixed_index_size = fic.getMemoryUsage().allocatedBytes(); uint64_t index_size = fic.getMemoryUsage().allocatedBytes() - fixed_index_size; @@ -378,8 +378,7 @@ TEST_F("requireThatFlushStatsAreCalculated", Fixture) { Document::UP doc = f.addDocument(docid); inverter.invertDocument(docid, *doc); invertThreads.sync(); - inverter.pushDocuments(fic, - std::shared_ptr<search::IDestructorCallback>()); + inverter.pushDocuments(std::shared_ptr<search::IDestructorCallback>()); pushThreads.sync(); index_size = fic.getMemoryUsage().allocatedBytes() - fixed_index_size; @@ -398,8 +397,7 @@ TEST_F("requireThatFlushStatsAreCalculated", Fixture) { doc = f.addDocument(docid + 100); inverter.invertDocument(docid + 100, *doc); invertThreads.sync(); - inverter.pushDocuments(fic, - std::shared_ptr<search::IDestructorCallback>()); + inverter.pushDocuments(std::shared_ptr<search::IDestructorCallback>()); pushThreads.sync(); index_size = fic.getMemoryUsage().allocatedBytes() - fixed_index_size; /// Must account for both docid 0 being reserved and the extra after. diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp index e8e780b4376..339e196c9bf 100644 --- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -54,9 +54,9 @@ public: namespace { void -myPushDocument(DocumentInverter &inv, FieldIndexCollection &fieldIndexes) +myPushDocument(DocumentInverter &inv) { - inv.pushDocuments(fieldIndexes, std::shared_ptr<IDestructorCallback>()); + inv.pushDocuments(std::shared_ptr<IDestructorCallback>()); } } @@ -272,7 +272,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, bool directio, DocBuilder b(schema); SequencedTaskExecutor invertThreads(2); SequencedTaskExecutor pushThreads(2); - DocumentInverter inv(schema, invertThreads, pushThreads); + DocumentInverter inv(schema, invertThreads, pushThreads, fic); Document::UP doc; b.startDocument("doc::10"); @@ -295,7 +295,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, bool directio, doc = b.endDocument(); inv.invertDocument(10, *doc); invertThreads.sync(); - myPushDocument(inv, fic); + myPushDocument(inv); pushThreads.sync(); b.startDocument("doc::11"). @@ -305,7 +305,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, bool directio, doc = b.endDocument(); inv.invertDocument(11, *doc); invertThreads.sync(); - myPushDocument(inv, fic); + myPushDocument(inv); pushThreads.sync(); b.startDocument("doc::12"). @@ -315,7 +315,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, bool directio, doc = b.endDocument(); inv.invertDocument(12, *doc); invertThreads.sync(); - myPushDocument(inv, fic); + myPushDocument(inv); pushThreads.sync(); IndexBuilder ib(schema); diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp index 91c1ccba706..08645f38712 100644 --- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp @@ -3,8 +3,12 @@ #include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/field_length_calculator.h> #include <vespa/searchlib/memoryindex/document_inverter.h> +#include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> +#include <vespa/searchlib/memoryindex/i_field_index_collection.h> +#include <vespa/searchlib/memoryindex/word_store.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/searchlib/common/sequencedtaskexecutor.h> #include <vespa/vespalib/testkit/testapp.h> @@ -90,14 +94,46 @@ makeDoc15(DocBuilder &b) } +class MockFieldIndexCollection : public IFieldIndexCollection +{ + FieldIndexRemover &_remover; + test::OrderedFieldIndexInserter &_inserter; + FieldLengthCalculator &_calculator; + +public: + MockFieldIndexCollection(FieldIndexRemover &remover, + test::OrderedFieldIndexInserter &inserter, + FieldLengthCalculator &calculator) + : _remover(remover), + _inserter(inserter), + _calculator(calculator) + { + } + + FieldIndexRemover &get_remover(uint32_t) override { + return _remover; + } + IOrderedFieldIndexInserter &get_inserter(uint32_t) override { + return _inserter; + } + index::FieldLengthCalculator &get_calculator(uint32_t) override { + return _calculator; + } +}; + + struct Fixture { Schema _schema; DocBuilder _b; SequencedTaskExecutor _invertThreads; SequencedTaskExecutor _pushThreads; - DocumentInverter _inv; + WordStore _word_store; + FieldIndexRemover _remover; test::OrderedFieldIndexInserter _inserter; + FieldLengthCalculator _calculator; + MockFieldIndexCollection _fic; + DocumentInverter _inv; static Schema makeSchema() @@ -115,8 +151,12 @@ struct Fixture _b(_schema), _invertThreads(2), _pushThreads(2), - _inv(_schema, _invertThreads, _pushThreads), - _inserter() + _word_store(), + _remover(_word_store), + _inserter(), + _calculator(), + _fic(_remover, _inserter, _calculator), + _inv(_schema, _invertThreads, _pushThreads, _fic) { } @@ -127,7 +167,7 @@ struct Fixture uint32_t fieldId = 0; for (auto &inverter : _inv.getInverters()) { _inserter.setFieldId(fieldId); - inverter->pushDocuments(_inserter); + inverter->pushDocuments(); ++fieldId; } _pushThreads.sync(); diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index 4075a06f882..95861643f84 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -370,12 +370,12 @@ public: MyInserter::~MyInserter() = default; void -myremove(uint32_t docId, DocumentInverter &inv, FieldIndexCollection &fieldIndexes, +myremove(uint32_t docId, DocumentInverter &inv, ISequencedTaskExecutor &invertThreads) { inv.removeDocument(docId); invertThreads.sync(); - inv.pushDocuments(fieldIndexes, std::shared_ptr<IDestructorCallback>()); + inv.pushDocuments(std::shared_ptr<IDestructorCallback>()); } class MyDrainRemoves : IFieldIndexRemoveListener { @@ -399,9 +399,9 @@ public: }; void -myPushDocument(DocumentInverter &inv, FieldIndexCollection &fieldIndexes) +myPushDocument(DocumentInverter &inv) { - inv.pushDocuments(fieldIndexes, std::shared_ptr<IDestructorCallback>()); + inv.pushDocuments(std::shared_ptr<IDestructorCallback>()); } const FeatureStore * @@ -759,7 +759,7 @@ public: _b(_schema), _invertThreads(2), _pushThreads(2), - _inv(_schema, _invertThreads, _pushThreads) + _inv(_schema, _invertThreads, _pushThreads, _fic) { } }; @@ -780,7 +780,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) doc = _b.endDocument(); _inv.invertDocument(10, *doc); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); _b.startDocument("doc::20"); @@ -790,7 +790,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) doc = _b.endDocument(); _inv.invertDocument(20, *doc); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); _b.startDocument("doc::30"); @@ -821,7 +821,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) doc = _b.endDocument(); _inv.invertDocument(30, *doc); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); _b.startDocument("doc::40"); @@ -832,7 +832,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) doc = _b.endDocument(); _inv.invertDocument(40, *doc); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); _b.startDocument("doc::999"); @@ -862,7 +862,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) for (uint32_t docId = 10000; docId < 20000; ++docId) { _inv.invertDocument(docId, *doc); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); } @@ -981,7 +981,7 @@ TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remo Document::UP doc1 = _b.endDocument(); _inv.invertDocument(1, *doc1.get()); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); _b.startDocument("doc::2"); @@ -989,7 +989,7 @@ TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remo Document::UP doc2 = _b.endDocument(); _inv.invertDocument(2, *doc2.get()); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); EXPECT_TRUE(assertPostingList("[1]", _fic.find("a", 0))); @@ -998,7 +998,7 @@ TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remo EXPECT_TRUE(assertPostingList("[1]", _fic.find("a", 1))); EXPECT_TRUE(assertPostingList("[1]", _fic.find("c", 1))); - myremove(1, _inv, _fic, _invertThreads); + myremove(1, _inv, _invertThreads); _pushThreads.sync(); EXPECT_TRUE(assertPostingList("[]", _fic.find("a", 0))); @@ -1150,7 +1150,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) doc = _b.endDocument(); _inv.invertDocument(10, *doc); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); @@ -1234,7 +1234,7 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) doc = _b.endDocument(); _inv.invertDocument(10, *doc); _invertThreads.sync(); - myPushDocument(_inv, _fic); + myPushDocument(_inv); _pushThreads.sync(); @@ -1316,8 +1316,8 @@ struct RemoverTest : public FieldIndexCollectionTest { EXPECT_TRUE(assertPostingList(e3, fic.find("b", 1))); } void remove(uint32_t docId) { - DocumentInverter inv(schema, _invertThreads, _pushThreads); - myremove(docId, inv, fic, _invertThreads); + DocumentInverter inv(schema, _invertThreads, _pushThreads, fic); + myremove(docId, inv, _invertThreads); _pushThreads.sync(); EXPECT_FALSE(fic.getFieldIndex(0u)->getDocumentRemover(). getStore().get(docId).valid()); diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp index a818bb75bf2..c3b5b39b8d0 100644 --- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp @@ -2,7 +2,10 @@ #include <vespa/document/repo/fixedtyperepo.h> #include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> +#include <vespa/searchlib/memoryindex/word_store.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/vespalib/testkit/testapp.h> @@ -114,8 +117,11 @@ struct Fixture { Schema _schema; DocBuilder _b; - std::vector<std::unique_ptr<FieldInverter> > _inverters; + WordStore _word_store; + FieldIndexRemover _remover; test::OrderedFieldIndexInserter _inserter; + FieldLengthCalculator _calculator; + std::vector<std::unique_ptr<FieldInverter> > _inverters; static Schema makeSchema() @@ -131,13 +137,19 @@ struct Fixture Fixture() : _schema(makeSchema()), _b(_schema), - _inverters(), - _inserter() + _word_store(), + _remover(_word_store), + _inserter(), + _calculator(), + _inverters() { for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); ++fieldId) { _inverters.push_back(std::make_unique<FieldInverter>(_schema, - fieldId)); + fieldId, + _remover, + _inserter, + _calculator)); } } @@ -159,7 +171,7 @@ struct Fixture uint32_t fieldId = 0; for (auto &inverter : _inverters) { _inserter.setFieldId(fieldId); - inverter->pushDocuments(_inserter); + inverter->pushDocuments(); ++fieldId; } } diff --git a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp index 76fbf662b77..2ea13a20063 100644 --- a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp @@ -3,7 +3,10 @@ #include <vespa/document/repo/fixedtyperepo.h> #include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> +#include <vespa/searchlib/memoryindex/word_store.h> #include <vespa/searchlib/memoryindex/url_field_inverter.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/vespalib/testkit/testapp.h> @@ -180,9 +183,12 @@ struct Fixture { Schema _schema; DocBuilder _b; + WordStore _word_store; + FieldIndexRemover _remover; + test::OrderedFieldIndexInserter _inserter; + FieldLengthCalculator _calculator; std::vector<std::unique_ptr<FieldInverter> > _inverters; std::unique_ptr<UrlFieldInverter> _urlInverter; - test::OrderedFieldIndexInserter _inserter; index::SchemaIndexFields _schemaIndexFields; static Schema @@ -196,16 +202,22 @@ struct Fixture Fixture(Schema::CollectionType collectionType) : _schema(makeSchema(collectionType)), _b(_schema), + _word_store(), + _remover(_word_store), + _inserter(), + _calculator(), _inverters(), _urlInverter(), - _inserter(), _schemaIndexFields() { _schemaIndexFields.setup(_schema); for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); ++fieldId) { _inverters.push_back(std::make_unique<FieldInverter>(_schema, - fieldId)); + fieldId, + _remover, + _inserter, + _calculator)); } index::UriField &urlField = _schemaIndexFields._uriFields.front(); @@ -233,7 +245,7 @@ struct Fixture uint32_t fieldId = 0; for (auto &inverter : _inverters) { _inserter.setFieldId(fieldId); - inverter->pushDocuments(_inserter); + inverter->pushDocuments(); ++fieldId; } } diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp index a468428e21f..d032f06fc58 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_inverter.h" -#include "field_index_collection.h" +#include "i_field_index_collection.h" #include "field_inverter.h" #include "ordered_field_index_inserter.h" #include "url_field_inverter.h" @@ -38,10 +38,12 @@ using document::SpanNode; using index::DocIdAndPosOccFeatures; using index::Schema; using search::util::URL; +using search::index::FieldLengthCalculator; DocumentInverter::DocumentInverter(const Schema &schema, ISequencedTaskExecutor &invertThreads, - ISequencedTaskExecutor &pushThreads) + ISequencedTaskExecutor &pushThreads, + IFieldIndexCollection &fieldIndexes) : _schema(schema), _indexedFieldPaths(), _dataType(nullptr), @@ -55,7 +57,10 @@ DocumentInverter::DocumentInverter(const Schema &schema, for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); ++fieldId) { - _inverters.push_back(std::make_unique<FieldInverter>(_schema, fieldId)); + auto &remover(fieldIndexes.get_remover(fieldId)); + auto &inserter(fieldIndexes.get_inserter(fieldId)); + auto &calculator(fieldIndexes.get_calculator(fieldId)); + _inverters.push_back(std::make_unique<FieldInverter>(_schema, fieldId, remover, inserter, calculator)); } for (auto &urlField : _schemaIndexFields._uriFields) { Schema::CollectionType collectionType = @@ -171,22 +176,15 @@ DocumentInverter::removeDocument(uint32_t docId) } void -DocumentInverter::pushDocuments(FieldIndexCollection &fieldIndexes, - const std::shared_ptr<IDestructorCallback> &onWriteDone) +DocumentInverter::pushDocuments(const std::shared_ptr<IDestructorCallback> &onWriteDone) { - auto indexFieldIterator = fieldIndexes.getFieldIndexes().begin(); uint32_t fieldId = 0; for (auto &inverter : _inverters) { - FieldIndex &fieldIndex(**indexFieldIterator); - FieldIndexRemover &remover(fieldIndex.getDocumentRemover()); - OrderedFieldIndexInserter &inserter(fieldIndex.getInserter()); _pushThreads.execute(fieldId, - [inverter(inverter.get()), &remover, &inserter, - &fieldIndex, onWriteDone]() - { inverter->applyRemoves(remover); - inverter->pushDocuments(inserter); - fieldIndex.commit(); }); - ++indexFieldIterator; + [inverter(inverter.get()), + onWriteDone]() + { inverter->applyRemoves(); + inverter->pushDocuments(); }); ++fieldId; } } diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h index 158302ddea5..ffa9dd0fab8 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h @@ -22,7 +22,7 @@ namespace search::memoryindex { class FieldInverter; class UrlFieldInverter; -class FieldIndexCollection; +class IFieldIndexCollection; /** * Class used to invert the fields for a set of documents, preparing for pushing changes info field indexes. @@ -66,7 +66,8 @@ public: */ DocumentInverter(const index::Schema &schema, ISequencedTaskExecutor &invertThreads, - ISequencedTaskExecutor &pushThreads); + ISequencedTaskExecutor &pushThreads, + IFieldIndexCollection &fieldIndexes); ~DocumentInverter(); @@ -82,7 +83,7 @@ public: * NOTE: The caller of this function should sync the 'invert threads' executor first, * to ensure that inverting is completed before pushing starts. */ - void pushDocuments(FieldIndexCollection &fieldIndexes, const std::shared_ptr<IDestructorCallback> &onWriteDone); + void pushDocuments(const std::shared_ptr<IDestructorCallback> &onWriteDone); /** * Invert (add) the given document. diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 66644d1fd2b..2243df41b0b 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -52,7 +52,8 @@ FieldIndex::FieldIndex(const Schema & schema, uint32_t fieldId) _featureStore(schema), _fieldId(fieldId), _remover(_wordStore), - _inserter(std::make_unique<OrderedFieldIndexInserter>(*this)) + _inserter(std::make_unique<OrderedFieldIndexInserter>(*this)), + _calculator() { } FieldIndex::~FieldIndex() diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 3f601fd5b47..85685a5e1d1 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -6,6 +6,7 @@ #include "field_index_remover.h" #include "word_store.h" #include <vespa/searchlib/index/docidandfeatures.h> +#include <vespa/searchlib/index/field_length_calculator.h> #include <vespa/searchlib/index/indexbuilder.h> #include <vespa/vespalib/btree/btree.h> #include <vespa/vespalib/btree/btreenodeallocator.h> @@ -91,6 +92,7 @@ private: uint32_t _fieldId; FieldIndexRemover _remover; std::unique_ptr<OrderedFieldIndexInserter> _inserter; + index::FieldLengthCalculator _calculator; public: datastore::EntryRef addWord(const vespalib::stringref word) { @@ -113,6 +115,7 @@ public: const FeatureStore & getFeatureStore() const { return _featureStore; } const WordStore &getWordStore() const { return _wordStore; } OrderedFieldIndexInserter &getInserter() const { return *_inserter; } + index::FieldLengthCalculator &get_calculator() { return _calculator; } private: void freeze() { diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp index a8d564971c3..b75ea7f0a70 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp @@ -2,6 +2,7 @@ #include "field_index_collection.h" #include "field_inverter.h" +#include "ordered_field_index_inserter.h" #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/vespalib/btree/btreenode.hpp> @@ -60,5 +61,23 @@ FieldIndexCollection::getMemoryUsage() const return usage; } +FieldIndexRemover & +FieldIndexCollection::get_remover(uint32_t field_id) +{ + return _fieldIndexes[field_id]->getDocumentRemover(); +} + +IOrderedFieldIndexInserter & +FieldIndexCollection::get_inserter(uint32_t field_id) +{ + return _fieldIndexes[field_id]->getInserter(); +} + +index::FieldLengthCalculator & +FieldIndexCollection::get_calculator(uint32_t field_id) +{ + return _fieldIndexes[field_id]->get_calculator(); +} + } } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h index d259a071f97..d5212c41819 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h @@ -2,6 +2,7 @@ #pragma once +#include "i_field_index_collection.h" #include "field_index.h" namespace search::memoryindex { @@ -15,7 +16,7 @@ class FieldInverter; * Provides functions to create a posting list iterator (used for searching) * for a given word in a given field. */ -class FieldIndexCollection { +class FieldIndexCollection : public IFieldIndexCollection { public: using PostingList = FieldIndex::PostingList; @@ -56,6 +57,10 @@ public: const std::vector<std::unique_ptr<FieldIndex>> &getFieldIndexes() const { return _fieldIndexes; } uint32_t getNumFields() const { return _numFields; } + + FieldIndexRemover &get_remover(uint32_t field_id) override; + IOrderedFieldIndexInserter &get_inserter(uint32_t field_id) override; + index::FieldLengthCalculator &get_calculator(uint32_t field_id) override; }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp index bfa0143d395..abd6fa61645 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.cpp @@ -294,6 +294,7 @@ FieldInverter::endDoc() ++itr; } } + _calculator.add_field_length(field_length); uint32_t newPosSize = static_cast<uint32_t>(_positions.size()); _pendingDocs.insert({ _docId, { _oldPosSize, newPosSize - _oldPosSize } }); @@ -340,7 +341,10 @@ FieldInverter::processNormalDocWeightedSetTextField(const WeightedSetFieldValue } } -FieldInverter::FieldInverter(const Schema &schema, uint32_t fieldId) +FieldInverter::FieldInverter(const Schema &schema, uint32_t fieldId, + FieldIndexRemover &remover, + IOrderedFieldIndexInserter &inserter, + index::FieldLengthCalculator &calculator) : _fieldId(fieldId), _elem(0u), _wpos(0u), @@ -356,7 +360,10 @@ FieldInverter::FieldInverter(const Schema &schema, uint32_t fieldId) _terms(), _abortedDocs(), _pendingDocs(), - _removeDocs() + _removeDocs(), + _remover(remover), + _inserter(inserter), + _calculator(calculator) { } @@ -482,16 +489,16 @@ struct FullRadix { } void -FieldInverter::applyRemoves(FieldIndexRemover &remover) +FieldInverter::applyRemoves() { for (auto docId : _removeDocs) { - remover.remove(docId, *this); + _remover.remove(docId, *this); } _removeDocs.clear(); } void -FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) +FieldInverter::pushDocuments() { trimAbortedDocs(); @@ -516,7 +523,7 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) vespalib::stringref word; bool emptyFeatures = true; - inserter.rewind(); + _inserter.rewind(); for (auto &i : _positions) { assert(i._wordNum <= numWordIds); @@ -524,17 +531,17 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) if (lastWordNum != i._wordNum || lastDocId != i._docId) { if (!emptyFeatures) { _features.set_num_occs(_features.word_positions().size()); - inserter.add(lastDocId, _features); + _inserter.add(lastDocId, _features); emptyFeatures = true; } if (lastWordNum != i._wordNum) { lastWordNum = i._wordNum; word = getWordFromNum(lastWordNum); - inserter.setNextWord(word); + _inserter.setNextWord(word); } lastDocId = i._docId; if (i.removed()) { - inserter.remove(lastDocId); + _inserter.remove(lastDocId); continue; } } @@ -566,9 +573,10 @@ FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) if (!emptyFeatures) { _features.set_num_occs(_features.word_positions().size()); - inserter.add(lastDocId, _features); + _inserter.add(lastDocId, _features); } - inserter.flush(); + _inserter.flush(); + _inserter.commit(); reset(); } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h index e547dbe98c6..85229c484ab 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h @@ -13,6 +13,8 @@ #include <map> #include <set> +namespace search::index { class FieldLengthCalculator; } + namespace search::memoryindex { class IOrderedFieldIndexInserter; @@ -184,6 +186,10 @@ private: std::map<uint32_t, PositionRange> _pendingDocs; std::vector<uint32_t> _removeDocs; + FieldIndexRemover &_remover; + IOrderedFieldIndexInserter &_inserter; + index::FieldLengthCalculator &_calculator; + void invertNormalDocTextField(const document::FieldValue &val); @@ -281,7 +287,10 @@ public: /** * Create a new field inverter for the given fieldId, using the given schema. */ - FieldInverter(const index::Schema &schema, uint32_t fieldId); + FieldInverter(const index::Schema &schema, uint32_t fieldId, + FieldIndexRemover &remover, + IOrderedFieldIndexInserter &inserter, + index::FieldLengthCalculator &calculator); /** * Apply pending removes using the given remover. @@ -289,12 +298,12 @@ public: * The remover is tracking all {word, docId} tuples that should removed, * and forwards this to the remove() function in this class (via IFieldIndexRemoveListener interface). */ - void applyRemoves(FieldIndexRemover &remover); + void applyRemoves(); /** * Push the current batch of inverted documents to the FieldIndex using the given inserter. */ - void pushDocuments(IOrderedFieldIndexInserter &inserter); + void pushDocuments(); /** * Invert a normal text field, based on annotations. diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_collection.h new file mode 100644 index 00000000000..c20ea4cb8e4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index_collection.h @@ -0,0 +1,26 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> + +namespace search::index { class FieldLengthCalculator; } + +namespace search::memoryindex { + +class FieldIndexRemover; +class IOrderedFieldIndexInserter; + +/** + * Interface class for a field index collection which can be used ot + * get the parts needed for wiring in field inverters. + */ +class IFieldIndexCollection { +public: + virtual FieldIndexRemover &get_remover(uint32_t field_id) = 0; + virtual IOrderedFieldIndexInserter &get_inserter(uint32_t field_id) = 0; + virtual index::FieldLengthCalculator &get_calculator(uint32_t field_id) = 0; + virtual ~IFieldIndexCollection() = default; +}; + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h index a1eee2e10ee..cf10db3c4d8 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h @@ -39,6 +39,11 @@ public: */ virtual void flush() = 0; + /* + * Make current state visible to readers. + */ + virtual void commit() = 0; + /** * Rewind to prepare for another set of (word, docId) tuples. */ diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index a73d483ec2f..e2cac316580 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -59,10 +59,10 @@ MemoryIndex::MemoryIndex(const Schema &schema, : _schema(schema), _invertThreads(invertThreads), _pushThreads(pushThreads), - _inverter0(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads)), - _inverter1(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads)), - _inverter(_inverter0.get()), _fieldIndexes(std::make_unique<FieldIndexCollection>(_schema)), + _inverter0(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)), + _inverter1(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)), + _inverter(_inverter0.get()), _frozen(false), _maxDocId(0), // docId 0 is reserved _numDocs(0), @@ -114,7 +114,7 @@ MemoryIndex::commit(const std::shared_ptr<IDestructorCallback> &onWriteDone) { _invertThreads.sync(); // drain inverting into this inverter _pushThreads.sync(); // drain use of other inverter - _inverter->pushDocuments(*_fieldIndexes, onWriteDone); + _inverter->pushDocuments(onWriteDone); flipInverter(); } diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h index 7d94025d9b1..c350da31c54 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h @@ -39,10 +39,10 @@ private: index::Schema _schema; ISequencedTaskExecutor &_invertThreads; ISequencedTaskExecutor &_pushThreads; + std::unique_ptr<FieldIndexCollection> _fieldIndexes; std::unique_ptr<DocumentInverter> _inverter0; std::unique_ptr<DocumentInverter> _inverter1; DocumentInverter *_inverter; - std::unique_ptr<FieldIndexCollection> _fieldIndexes; bool _frozen; uint32_t _maxDocId; uint32_t _numDocs; diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp index 0308c6d276b..637a13d67be 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp @@ -39,10 +39,7 @@ OrderedFieldIndexInserter::OrderedFieldIndexInserter(FieldIndex &fieldIndex) { } -OrderedFieldIndexInserter::~OrderedFieldIndexInserter() -{ - flush(); -} +OrderedFieldIndexInserter::~OrderedFieldIndexInserter() = default; void OrderedFieldIndexInserter::flushWord() @@ -75,6 +72,12 @@ OrderedFieldIndexInserter::flush() } void +OrderedFieldIndexInserter::commit() +{ + _fieldIndex.commit(); +} + +void OrderedFieldIndexInserter::setNextWord(const vespalib::stringref word) { // TODO: Adjust here if zero length words should be legal. diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h index 03cf3723f01..18765f9bae3 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h @@ -63,6 +63,9 @@ public: */ void flush() override; + + void commit() override; + /** * Rewind iterator, to start new pass. */ diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h index f984bd8fcbd..c14c454bad6 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h @@ -93,6 +93,7 @@ public: } virtual void flush() override { } + virtual void commit() override { } virtual void rewind() override { addComma(); _ss << "f=" << _fieldId; |