summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2021-11-02 16:35:58 +0100
committerGitHub <noreply@github.com>2021-11-02 16:35:58 +0100
commitbad33e4b235994e32cf17f543029cdc54822bfbb (patch)
tree0f9921848fb443c95f071725edda565f696e9b4a
parent811f84106854696ee89e6d40e78d62bd312b6002 (diff)
parent7c6170bc7c5586f410a034487d6835edc10fda6c (diff)
Merge pull request #19838 from vespa-engine/toregge/move-portions-of-document-inverter-out-to-document-inverter-context
Move portions of DocumentInverter to DocumentInverterContext.
-rw-r--r--searchcore/src/tests/proton/index/indexmanager_test.cpp4
-rw-r--r--searchlib/src/tests/diskindex/fusion/fusion_test.cpp8
-rw-r--r--searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp5
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp139
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.h32
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp86
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h53
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.h2
11 files changed, 205 insertions, 139 deletions
diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp
index aa118600272..84cf0e3655f 100644
--- a/searchcore/src/tests/proton/index/indexmanager_test.cpp
+++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp
@@ -14,6 +14,7 @@
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/memoryindex/compact_words_store.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
+#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_collection.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/queryeval/isourceselector.h>
@@ -392,7 +393,8 @@ TEST_F(IndexManagerTest, require_that_flush_stats_are_calculated)
FieldIndexCollection fic(schema, MockFieldLengthInspector());
auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2);
auto pushThreads = SequencedTaskExecutor::create(push_executor, 2);
- search::memoryindex::DocumentInverter inverter(schema, *invertThreads, *pushThreads, fic);
+ search::memoryindex::DocumentInverterContext inverter_context(schema, *invertThreads, *pushThreads, fic);
+ search::memoryindex::DocumentInverter inverter(inverter_context);
uint64_t fixed_index_size = fic.getMemoryUsage().allocatedBytes();
uint64_t index_size = fic.getMemoryUsage().allocatedBytes() - fixed_index_size;
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
index ed661b7499f..fba488e78aa 100644
--- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
@@ -10,6 +10,7 @@
#include <vespa/searchlib/index/docbuilder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
+#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_collection.h>
#include <vespa/searchlib/memoryindex/posting_iterator.h>
#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
@@ -32,6 +33,7 @@ using fef::FieldPositionsIterator;
using fef::TermFieldMatchData;
using fef::TermFieldMatchDataArray;
using memoryindex::DocumentInverter;
+using memoryindex::DocumentInverterContext;
using memoryindex::FieldIndexCollection;
using queryeval::SearchIterator;
using search::common::FileHeaderContext;
@@ -321,7 +323,8 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
DocBuilder b(schema);
auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2);
auto pushThreads = SequencedTaskExecutor::create(push_executor, 2);
- DocumentInverter inv(schema, *invertThreads, *pushThreads, fic);
+ DocumentInverterContext inv_context(schema, *invertThreads, *pushThreads, fic);
+ DocumentInverter inv(inv_context);
Document::UP doc;
doc = make_doc10(b);
@@ -462,7 +465,8 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng
DocBuilder b(_schema);
auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2);
auto pushThreads = SequencedTaskExecutor::create(push_executor, 2);
- DocumentInverter inv(_schema, *invertThreads, *pushThreads, fic);
+ DocumentInverterContext inv_context(_schema, *invertThreads, *pushThreads, fic);
+ DocumentInverter inv(inv_context);
inv.invertDocument(10, *make_doc10(b));
invertThreads->sync_all();
diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
index 3bcf75680cc..d81df4c63fe 100644
--- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
+++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp
@@ -3,6 +3,7 @@
#include <vespa/searchlib/index/docbuilder.h>
#include <vespa/searchlib/index/field_length_calculator.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
+#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_remover.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/memoryindex/i_field_index_collection.h>
@@ -129,6 +130,7 @@ struct DocumentInverterTest : public ::testing::Test {
test::OrderedFieldIndexInserter _inserter;
FieldLengthCalculator _calculator;
MockFieldIndexCollection _fic;
+ DocumentInverterContext _inv_context;
DocumentInverter _inv;
static Schema makeSchema() {
@@ -150,7 +152,8 @@ struct DocumentInverterTest : public ::testing::Test {
_inserter(),
_calculator(),
_fic(_remover, _inserter, _calculator),
- _inv(_schema, *_invertThreads, *_pushThreads, _fic)
+ _inv_context(_schema, *_invertThreads, *_pushThreads, _fic),
+ _inv(_inv_context)
{
}
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index 7cf40a5be63..a94e9cf5320 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -9,6 +9,7 @@
#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
+#include <vespa/searchlib/memoryindex/document_inverter_context.h>
#include <vespa/searchlib/memoryindex/field_index_collection.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/memoryindex/ordered_field_index_inserter.h>
@@ -911,6 +912,7 @@ public:
DocBuilder _b;
std::unique_ptr<ISequencedTaskExecutor> _invertThreads;
std::unique_ptr<ISequencedTaskExecutor> _pushThreads;
+ DocumentInverterContext _inv_context;
DocumentInverter _inv;
InverterTest(const Schema& schema)
@@ -919,7 +921,8 @@ public:
_b(_schema),
_invertThreads(SequencedTaskExecutor::create(invert_executor, 2)),
_pushThreads(SequencedTaskExecutor::create(push_executor, 2)),
- _inv(_schema, *_invertThreads, *_pushThreads, _fic)
+ _inv_context(_schema, *_invertThreads, *_pushThreads, _fic),
+ _inv(_inv_context)
{
}
NormalFieldIndex::PostingList::Iterator find(const vespalib::stringref word, uint32_t field_id) const {
@@ -1470,7 +1473,8 @@ struct RemoverTest : public FieldIndexCollectionTest {
EXPECT_TRUE(assertPostingList(e3, find("b", 1)));
}
void remove(uint32_t docId) {
- DocumentInverter inv(schema, *_invertThreads, *_pushThreads, fic);
+ DocumentInverterContext inv_context(schema, *_invertThreads, *_pushThreads, fic);
+ DocumentInverter inv(inv_context);
myremove(docId, inv, *_invertThreads);
_pushThreads->sync_all();
EXPECT_FALSE(fic.getFieldIndex(0u)->getDocumentRemover().
diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt
index 5c15a029c0a..021e5f9cab8 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt
@@ -3,6 +3,7 @@ vespa_add_library(searchlib_memoryindex OBJECT
SOURCES
compact_words_store.cpp
document_inverter.cpp
+ document_inverter_context.cpp
feature_store.cpp
field_index.cpp
field_index_base.cpp
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
index 127ff1d52c3..f42cfa25877 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
@@ -1,67 +1,36 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "document_inverter.h"
+#include "document_inverter_context.h"
#include "i_field_index_collection.h"
#include "field_inverter.h"
-#include "ordered_field_index_inserter.h"
#include "url_field_inverter.h"
-#include <vespa/document/annotation/alternatespanlist.h>
-#include <vespa/document/datatype/urldatatype.h>
-#include <vespa/document/repo/fixedtyperepo.h>
#include <vespa/vespalib/util/isequencedtaskexecutor.h>
-#include <vespa/searchlib/util/url.h>
-#include <stdexcept>
-
-#include <vespa/log/log.h>
-LOG_SETUP(".memoryindex.document_inverter");
namespace search::memoryindex {
-using document::Field;
-using document::FieldValue;
using document::Document;
-using document::ArrayFieldValue;
-using document::WeightedSetFieldValue;
-using document::StringFieldValue;
-using document::IntFieldValue;
-using document::StructFieldValue;
-using document::DataType;
-using document::DocumentType;
-using document::AlternateSpanList;
-using document::Span;
-using document::SpanList;
-using document::SimpleSpanList;
-using document::SpanNode;
-using index::DocIdAndPosOccFeatures;
using index::Schema;
-using search::util::URL;
using search::index::FieldLengthCalculator;
-DocumentInverter::DocumentInverter(const Schema &schema,
- ISequencedTaskExecutor &invertThreads,
- ISequencedTaskExecutor &pushThreads,
- IFieldIndexCollection &fieldIndexes)
- : _schema(schema),
- _indexedFieldPaths(),
- _dataType(nullptr),
- _schemaIndexFields(),
+DocumentInverter::DocumentInverter(DocumentInverterContext& context)
+ : _context(context),
_inverters(),
- _urlInverters(),
- _invertThreads(invertThreads),
- _pushThreads(pushThreads)
+ _urlInverters()
{
- _schemaIndexFields.setup(schema);
-
- for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields();
+ auto& schema = context.get_schema();
+ auto& field_indexes = context.get_field_indexes();
+ for (uint32_t fieldId = 0; fieldId < schema.getNumIndexFields();
++fieldId) {
- auto &remover(fieldIndexes.get_remover(fieldId));
- auto &inserter(fieldIndexes.get_inserter(fieldId));
- auto &calculator(fieldIndexes.get_calculator(fieldId));
- _inverters.push_back(std::make_unique<FieldInverter>(_schema, fieldId, remover, inserter, calculator));
+ auto &remover(field_indexes.get_remover(fieldId));
+ auto &inserter(field_indexes.get_inserter(fieldId));
+ auto &calculator(field_indexes.get_calculator(fieldId));
+ _inverters.push_back(std::make_unique<FieldInverter>(schema, fieldId, remover, inserter, calculator));
}
- for (auto &urlField : _schemaIndexFields._uriFields) {
+ auto& schema_index_fields = context.get_schema_index_fields();
+ for (auto &urlField : schema_index_fields._uriFields) {
Schema::CollectionType collectionType =
- _schema.getIndexField(urlField._all).getCollectionType();
+ schema.getIndexField(urlField._all).getCollectionType();
_urlInverters.push_back(std::make_unique<UrlFieldInverter>
(collectionType,
_inverters[urlField._all].get(),
@@ -77,75 +46,30 @@ DocumentInverter::DocumentInverter(const Schema &schema,
DocumentInverter::~DocumentInverter()
{
- _invertThreads.sync_all();
- _pushThreads.sync_all();
-}
-
-void
-DocumentInverter::addFieldPath(const document::DocumentType &docType, uint32_t fieldId)
-{
- assert(fieldId < _indexedFieldPaths.size());
- std::unique_ptr<FieldPath> fp;
- if ( ! docType.hasField(_schema.getIndexField(fieldId).getName())) {
- LOG(error,
- "Mismatch between documentdefinition and schema. "
- "No field named '%s' from schema in document type '%s'",
- _schema.getIndexField(fieldId).getName().c_str(),
- docType.getName().c_str());
- } else {
- fp = std::make_unique<Field>(docType.getField(_schema.getIndexField(fieldId).getName()));
- }
- _indexedFieldPaths[fieldId] = std::move(fp);
-}
-
-void
-DocumentInverter::buildFieldPath(const document::DocumentType &docType,
- const document::DataType *dataType)
-{
- _indexedFieldPaths.clear();
- _indexedFieldPaths.resize(_schema.getNumIndexFields());
- for (const auto & fi : _schemaIndexFields._textFields) {
- addFieldPath(docType, fi);
- }
- for (const auto & fi : _schemaIndexFields._uriFields) {
- addFieldPath(docType, fi._all);
- }
- _dataType = dataType;
+ _context.get_invert_threads().sync_all();
+ _context.get_push_threads().sync_all();
}
void
DocumentInverter::invertDocument(uint32_t docId, const Document &doc)
{
// Might want to batch inverters as we do for attributes
- const document::DataType *dataType(doc.getDataType());
- if (_indexedFieldPaths.empty() || _dataType != dataType) {
- buildFieldPath(doc.getType(), dataType);
- }
- for (uint32_t fieldId : _schemaIndexFields._textFields) {
- const FieldPath *const fieldPath(_indexedFieldPaths[fieldId].get());
- FieldValue::UP fv;
- if (fieldPath != nullptr) {
- // TODO: better handling of input data (and better input data)
- // FieldValue::UP fv = doc.getNestedFieldValue(fieldPath.begin(), fieldPath.end());
- fv = doc.getValue(*fieldPath);
- }
+ _context.set_data_type(doc);
+ auto& schema_index_fields = _context.get_schema_index_fields();
+ auto& invert_threads = _context.get_invert_threads();
+ for (uint32_t fieldId : schema_index_fields._textFields) {
+ auto fv = _context.get_field_value(doc, fieldId);
FieldInverter *inverter = _inverters[fieldId].get();
- _invertThreads.execute(fieldId,[inverter, docId, fv(std::move(fv))]() {
+ invert_threads.execute(fieldId,[inverter, docId, fv(std::move(fv))]() {
inverter->invertField(docId, fv);
});
}
uint32_t urlId = 0;
- for (const auto & fi : _schemaIndexFields._uriFields) {
+ for (const auto & fi : schema_index_fields._uriFields) {
uint32_t fieldId = fi._all;
- const FieldPath *const fieldPath(_indexedFieldPaths[fieldId].get());
- FieldValue::UP fv;
- if (fieldPath != nullptr) {
- // TODO: better handling of input data (and better input data)
- // FieldValue::UP fv = doc.getNestedFieldValue(fieldPath.begin(), fieldPath.end());
- fv = doc.getValue(*fieldPath);
- }
+ auto fv = _context.get_field_value(doc, fieldId);
UrlFieldInverter *inverter = _urlInverters[urlId].get();
- _invertThreads.execute(fieldId,[inverter, docId, fv(std::move(fv))]() {
+ invert_threads.execute(fieldId,[inverter, docId, fv(std::move(fv))]() {
inverter->invertField(docId, fv);
});
++urlId;
@@ -162,19 +86,21 @@ void
DocumentInverter::removeDocuments(LidVector lids)
{
// Might want to batch inverters as we do for attributes
- for (uint32_t fieldId : _schemaIndexFields._textFields) {
+ auto& schema_index_fields = _context.get_schema_index_fields();
+ auto& invert_threads = _context.get_invert_threads();
+ for (uint32_t fieldId : schema_index_fields._textFields) {
FieldInverter *inverter = _inverters[fieldId].get();
- _invertThreads.execute(fieldId, [inverter, lids]() {
+ invert_threads.execute(fieldId, [inverter, lids]() {
for (uint32_t lid : lids) {
inverter->removeDocument(lid);
}
});
}
uint32_t urlId = 0;
- for (const auto & fi : _schemaIndexFields._uriFields) {
+ for (const auto & fi : schema_index_fields._uriFields) {
uint32_t fieldId = fi._all;
UrlFieldInverter *inverter = _urlInverters[urlId].get();
- _invertThreads.execute(fieldId, [inverter, lids]() {
+ invert_threads.execute(fieldId, [inverter, lids]() {
for (uint32_t lid : lids) {
inverter->removeDocument(lid);
}
@@ -187,8 +113,9 @@ void
DocumentInverter::pushDocuments(const std::shared_ptr<vespalib::IDestructorCallback> &onWriteDone)
{
uint32_t fieldId = 0;
+ auto& push_threads = _context.get_push_threads();
for (auto &inverter : _inverters) {
- _pushThreads.execute(fieldId,[inverter(inverter.get()), onWriteDone]() {
+ push_threads.execute(fieldId,[inverter(inverter.get()), onWriteDone]() {
inverter->applyRemoves();
inverter->pushDocuments();
});
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
index ed06a0b39cc..cce6eda615d 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
@@ -2,8 +2,9 @@
#pragma once
-#include "i_field_index_remove_listener.h"
-#include <vespa/searchlib/index/schema_index_fields.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
namespace document {
class DataType;
@@ -15,11 +16,11 @@ namespace document {
namespace vespalib {
class IDestructorCallback;
- class ISequencedTaskExecutor;
}
namespace search::memoryindex {
+class DocumentInverterContext;
class FieldInverter;
class UrlFieldInverter;
class IFieldIndexCollection;
@@ -31,42 +32,23 @@ class IFieldIndexCollection;
*/
class DocumentInverter {
private:
- using ISequencedTaskExecutor = vespalib::ISequencedTaskExecutor;
DocumentInverter(const DocumentInverter &) = delete;
DocumentInverter &operator=(const DocumentInverter &) = delete;
- const index::Schema &_schema;
-
- void addFieldPath(const document::DocumentType &docType, uint32_t fieldId);
- void buildFieldPath(const document::DocumentType & docType, const document::DataType *dataType);
+ DocumentInverterContext& _context;
using LidVector = std::vector<uint32_t>;
- using FieldPath = document::Field;
- using IndexedFieldPaths = std::vector<std::unique_ptr<FieldPath>>;
- IndexedFieldPaths _indexedFieldPaths;
- const document::DataType * _dataType;
- index::SchemaIndexFields _schemaIndexFields;
std::vector<std::unique_ptr<FieldInverter>> _inverters;
std::vector<std::unique_ptr<UrlFieldInverter>> _urlInverters;
- ISequencedTaskExecutor &_invertThreads;
- ISequencedTaskExecutor &_pushThreads;
-
- const index::Schema &getSchema() const { return _schema; }
public:
/**
* Create a new document inverter based on the given schema.
*
- * @param schema the schema with which text and uri fields to consider.
- * @param invertThreads the executor with threads for doing document inverting.
- * @param pushThreads the executor with threads for doing pushing of inverted documents
- * to corresponding field indexes.
+ * @param context A document inverter context shared between related document inverters.
*/
- DocumentInverter(const index::Schema &schema,
- ISequencedTaskExecutor &invertThreads,
- ISequencedTaskExecutor &pushThreads,
- IFieldIndexCollection &fieldIndexes);
+ DocumentInverter(DocumentInverterContext& context);
~DocumentInverter();
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
new file mode 100644
index 00000000000..8fea82229c8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
@@ -0,0 +1,86 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "document_inverter_context.h"
+#include <vespa/document/datatype/documenttype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <cassert>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.document_inverter_context");
+
+namespace search::memoryindex {
+
+using document::DataType;
+using document::Document;
+using document::DocumentType;
+using document::Field;
+using vespalib::ISequencedTaskExecutor;
+
+void
+DocumentInverterContext::add_field(const DocumentType& doc_type, uint32_t fieldId)
+{
+ assert(fieldId < _indexed_fields.size());
+ std::unique_ptr<Field> fp;
+ if ( ! doc_type.hasField(_schema.getIndexField(fieldId).getName())) {
+ LOG(error,
+ "Mismatch between documentdefinition and schema. "
+ "No field named '%s' from schema in document type '%s'",
+ _schema.getIndexField(fieldId).getName().c_str(),
+ doc_type.getName().c_str());
+ } else {
+ fp = std::make_unique<Field>(doc_type.getField(_schema.getIndexField(fieldId).getName()));
+ }
+ _indexed_fields[fieldId] = std::move(fp);
+}
+
+void
+DocumentInverterContext::build_fields(const DocumentType& doc_type, const DataType *data_type)
+{
+ _indexed_fields.clear();
+ _indexed_fields.resize(_schema.getNumIndexFields());
+ for (const auto & fi : _schema_index_fields._textFields) {
+ add_field(doc_type, fi);
+ }
+ for (const auto & fi : _schema_index_fields._uriFields) {
+ add_field(doc_type, fi._all);
+ }
+ _data_type = data_type;
+}
+
+DocumentInverterContext::DocumentInverterContext(const index::Schema& schema,
+ ISequencedTaskExecutor &invert_threads,
+ ISequencedTaskExecutor &push_threads,
+ IFieldIndexCollection& field_indexes)
+ : _schema(schema),
+ _indexed_fields(),
+ _data_type(nullptr),
+ _schema_index_fields(),
+ _invert_threads(invert_threads),
+ _push_threads(push_threads),
+ _field_indexes(field_indexes)
+{
+ _schema_index_fields.setup(schema);
+}
+
+DocumentInverterContext::~DocumentInverterContext() = default;
+
+void
+DocumentInverterContext::set_data_type(const Document& doc)
+{
+ const DataType *data_type(doc.getDataType());
+ if (_indexed_fields.empty() || _data_type != data_type) {
+ build_fields(doc.getType(), data_type);
+ }
+}
+
+std::unique_ptr<document::FieldValue>
+DocumentInverterContext::get_field_value(const Document& doc, uint32_t field_id) const
+{
+ const Field *const field(_indexed_fields[field_id].get());
+ if (field != nullptr) {
+ return doc.getValue(*field);
+ }
+ return {};
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h
new file mode 100644
index 00000000000..7330f4376ea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h
@@ -0,0 +1,53 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/schema_index_fields.h>
+#include <memory>
+#include <vector>
+
+namespace document {
+class DataType;
+class Document;
+class DocumentType;
+class Field;
+class FieldValue;
+}
+
+namespace vespalib { class ISequencedTaskExecutor; }
+
+namespace search::memoryindex {
+
+class IFieldIndexCollection;
+
+/*
+ * Class containing shared context for document inverters that changes
+ * rarely (type dependent data, wiring).
+ */
+class DocumentInverterContext {
+ using IndexedFields = std::vector<std::unique_ptr<document::Field>>;
+ const index::Schema& _schema;
+ IndexedFields _indexed_fields;
+ const document::DataType* _data_type;
+ index::SchemaIndexFields _schema_index_fields;
+ vespalib::ISequencedTaskExecutor& _invert_threads;
+ vespalib::ISequencedTaskExecutor& _push_threads;
+ IFieldIndexCollection& _field_indexes;
+ void add_field(const document::DocumentType& doc_type, uint32_t fieldId);
+ void build_fields(const document::DocumentType& doc_type, const document::DataType* data_type);
+public:
+ DocumentInverterContext(const index::Schema &schema,
+ vespalib::ISequencedTaskExecutor &invert_threads,
+ vespalib::ISequencedTaskExecutor &push_threads,
+ IFieldIndexCollection& field_indexes);
+ ~DocumentInverterContext();
+ void set_data_type(const document::Document& doc);
+ const index::Schema& get_schema() const noexcept { return _schema; }
+ const index::SchemaIndexFields& get_schema_index_fields() const noexcept { return _schema_index_fields; }
+ vespalib::ISequencedTaskExecutor& get_invert_threads() noexcept { return _invert_threads; }
+ vespalib::ISequencedTaskExecutor& get_push_threads() noexcept { return _push_threads; }
+ IFieldIndexCollection& get_field_indexes() noexcept { return _field_indexes; }
+ std::unique_ptr<document::FieldValue> get_field_value(const document::Document& doc, uint32_t field_id) const;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
index cb198f4d33a..1e59d7ff83b 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "document_inverter.h"
+#include "document_inverter_context.h"
#include "field_index_collection.h"
#include "memory_index.h"
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
@@ -57,8 +58,9 @@ MemoryIndex::MemoryIndex(const Schema& schema,
_invertThreads(invertThreads),
_pushThreads(pushThreads),
_fieldIndexes(std::make_unique<FieldIndexCollection>(_schema, inspector)),
- _inverter0(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)),
- _inverter1(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)),
+ _inverter_context(std::make_unique<DocumentInverterContext>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)),
+ _inverter0(std::make_unique<DocumentInverter>(*_inverter_context)),
+ _inverter1(std::make_unique<DocumentInverter>(*_inverter_context)),
_inverter(_inverter0.get()),
_frozen(false),
_maxDocId(0), // docId 0 is reserved
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
index c02e66f790a..1ea9f34b48c 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
@@ -21,6 +21,7 @@ namespace document { class Document; }
namespace search::memoryindex {
class DocumentInverter;
+class DocumentInverterContext;
class FieldIndexCollection;
/**
@@ -46,6 +47,7 @@ private:
ISequencedTaskExecutor &_invertThreads;
ISequencedTaskExecutor &_pushThreads;
std::unique_ptr<FieldIndexCollection> _fieldIndexes;
+ std::unique_ptr<DocumentInverterContext> _inverter_context;
std::unique_ptr<DocumentInverter> _inverter0;
std::unique_ptr<DocumentInverter> _inverter1;
DocumentInverter *_inverter;