diff options
author | Tor Egge <Tor.Egge@online.no> | 2021-11-10 12:00:23 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2021-11-10 12:00:23 +0100 |
commit | ac343a8719d77984e89d84e61e7972e0cd45ac83 (patch) | |
tree | d08ca94db7457235a292aead15f4dee35bee9484 /searchlib/src | |
parent | 8efde630ff73357e0f31a34f131a120d327bf224 (diff) |
Prepare for extracting field value in index inverter thread.
Diffstat (limited to 'searchlib/src')
6 files changed, 104 insertions, 84 deletions
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp index 16141bcd268..a7f0325d363 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp @@ -61,7 +61,6 @@ DocumentInverter::~DocumentInverter() void DocumentInverter::invertDocument(uint32_t docId, const Document &doc) { - _context.set_data_type(doc); auto& invert_threads = _context.get_invert_threads(); auto& invert_contexts = _context.get_invert_contexts(); for (auto& invert_context : invert_contexts) { diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp index c82f71906fb..48f5a053856 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp @@ -1,19 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_inverter_context.h" -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/fieldvalue/document.h> #include <cassert> -#include <vespa/log/log.h> -LOG_SETUP(".memoryindex.document_inverter_context"); - namespace search::memoryindex { -using document::DataType; -using document::Document; -using document::DocumentType; -using document::Field; using vespalib::ISequencedTaskExecutor; using index::SchemaIndexFields; @@ -127,44 +118,11 @@ void connect_contexts(std::vector<InvertContext>& invert_contexts, } -void -DocumentInverterContext::add_field(const DocumentType& doc_type, uint32_t fieldId) -{ - assert(fieldId < _indexed_fields.size()); - std::unique_ptr<Field> fp; - if ( ! doc_type.hasField(_schema.getIndexField(fieldId).getName())) { - LOG(error, - "Mismatch between documentdefinition and schema. " - "No field named '%s' from schema in document type '%s'", - _schema.getIndexField(fieldId).getName().c_str(), - doc_type.getName().c_str()); - } else { - fp = std::make_unique<Field>(doc_type.getField(_schema.getIndexField(fieldId).getName())); - } - _indexed_fields[fieldId] = std::move(fp); -} - -void -DocumentInverterContext::build_fields(const DocumentType& doc_type, const DataType *data_type) -{ - _indexed_fields.clear(); - _indexed_fields.resize(_schema.getNumIndexFields()); - for (const auto & fi : _schema_index_fields._textFields) { - add_field(doc_type, fi); - } - for (const auto & fi : _schema_index_fields._uriFields) { - add_field(doc_type, fi._all); - } - _data_type = data_type; -} - DocumentInverterContext::DocumentInverterContext(const index::Schema& schema, ISequencedTaskExecutor &invert_threads, ISequencedTaskExecutor &push_threads, IFieldIndexCollection& field_indexes) : _schema(schema), - _indexed_fields(), - _data_type(nullptr), _schema_index_fields(), _invert_threads(invert_threads), _push_threads(push_threads), @@ -179,25 +137,6 @@ DocumentInverterContext::DocumentInverterContext(const index::Schema& schema, DocumentInverterContext::~DocumentInverterContext() = default; void -DocumentInverterContext::set_data_type(const Document& doc) -{ - const DataType *data_type(doc.getDataType()); - if (_indexed_fields.empty() || _data_type != data_type) { - build_fields(doc.getType(), data_type); - } -} - -std::unique_ptr<document::FieldValue> -DocumentInverterContext::get_field_value(const Document& doc, uint32_t field_id) const -{ - const Field *const field(_indexed_fields[field_id].get()); - if (field != nullptr) { - return doc.getValue(*field); - } - return {}; -} - -void DocumentInverterContext::setup_contexts() { make_contexts(_schema_index_fields, _invert_threads, _invert_contexts); diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h index 54a1fff90a4..552def934c2 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h @@ -8,14 +8,6 @@ #include <memory> #include <vector> -namespace document { -class DataType; -class Document; -class DocumentType; -class Field; -class FieldValue; -} - namespace search::memoryindex { class IFieldIndexCollection; @@ -25,18 +17,13 @@ class IFieldIndexCollection; * rarely (type dependent data, wiring). */ class DocumentInverterContext { - using IndexedFields = std::vector<std::unique_ptr<document::Field>>; const index::Schema& _schema; - IndexedFields _indexed_fields; - const document::DataType* _data_type; index::SchemaIndexFields _schema_index_fields; vespalib::ISequencedTaskExecutor& _invert_threads; vespalib::ISequencedTaskExecutor& _push_threads; IFieldIndexCollection& _field_indexes; std::vector<InvertContext> _invert_contexts; std::vector<PushContext> _push_contexts; - void add_field(const document::DocumentType& doc_type, uint32_t fieldId); - void build_fields(const document::DocumentType& doc_type, const document::DataType* data_type); void setup_contexts(); public: DocumentInverterContext(const index::Schema &schema, @@ -44,13 +31,11 @@ public: vespalib::ISequencedTaskExecutor &push_threads, IFieldIndexCollection& field_indexes); ~DocumentInverterContext(); - void set_data_type(const document::Document& doc); const index::Schema& get_schema() const noexcept { return _schema; } const index::SchemaIndexFields& get_schema_index_fields() const noexcept { return _schema_index_fields; } vespalib::ISequencedTaskExecutor& get_invert_threads() noexcept { return _invert_threads; } vespalib::ISequencedTaskExecutor& get_push_threads() noexcept { return _push_threads; } IFieldIndexCollection& get_field_indexes() noexcept { return _field_indexes; } - std::unique_ptr<document::FieldValue> get_field_value(const document::Document& doc, uint32_t field_id) const; const std::vector<InvertContext>& get_invert_contexts() const noexcept { return _invert_contexts; } const std::vector<PushContext>& get_push_contexts() const noexcept { return _push_contexts; } }; diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp index fd58cb6595e..1e6506bc8d5 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp @@ -1,21 +1,81 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "invert_context.h" +#include "document_inverter_context.h" +#include <vespa/document/datatype/documenttype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/searchlib/index/schema_index_fields.h> + +#include <vespa/log/log.h> +LOG_SETUP(".memoryindex.invert_context"); namespace search::memoryindex { +using document::Document; +using document::DocumentType; +using document::Field; + +namespace { + +std::unique_ptr<document::Field> +get_field(const DocumentType& doc_type, const vespalib::string& name) +{ + std::unique_ptr<Field> fp; + if ( ! doc_type.hasField(name)) { + LOG(error, + "Mismatch between documentdefinition and schema. " + "No field named '%s' from schema in document type '%s'", + name.c_str(), + doc_type.getName().c_str()); + } else { + fp = std::make_unique<Field>(doc_type.getField(name)); + } + return fp; +} + +} + + InvertContext::InvertContext(vespalib::ISequencedTaskExecutor::ExecutorId id) : BundledFieldsContext(id), - _pushers() + _pushers(), + _document_fields(), + _document_uri_fields(), + _data_type(nullptr) { } InvertContext::~InvertContext() = default; +InvertContext::InvertContext(InvertContext&&) = default; + void InvertContext::add_pusher(uint32_t pusher_id) { _pushers.emplace_back(pusher_id); } +void +InvertContext::set_data_type(const DocumentInverterContext &doc_inv_context, const Document& doc) const +{ + auto data_type(doc.getDataType()); + if (_data_type == data_type) { + return; + } + auto& doc_type(doc.getType()); + _document_fields.clear(); + auto& schema = doc_inv_context.get_schema(); + for (auto field_id : get_fields()) { + auto& name = schema.getIndexField(field_id).getName(); + _document_fields.emplace_back(get_field(doc_type, name)); + } + _document_uri_fields.clear(); + auto& schema_index_fields = doc_inv_context.get_schema_index_fields(); + for (auto uri_field_id : get_uri_fields()) { + auto& name = schema.getIndexField(schema_index_fields._uriFields[uri_field_id]._all).getName(); + _document_uri_fields.emplace_back(get_field(doc_type, name)); + } + _data_type = data_type; +} + } diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_context.h b/searchlib/src/vespa/searchlib/memoryindex/invert_context.h index 4d2ebddd647..059fdb25d06 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/invert_context.h +++ b/searchlib/src/vespa/searchlib/memoryindex/invert_context.h @@ -4,8 +4,21 @@ #include "bundled_fields_context.h" +namespace document { +class DataType; +class Document; +class Field; +} + +namespace search::index { +class Schema; +class SchemaIndexFields; +} + namespace search::memoryindex { +class DocumentInverterContext; + /* * Context used by an InvertTask to invert a set of document fields * into corresponding field inverters or by a RemoveTask to remove @@ -17,12 +30,21 @@ namespace search::memoryindex { */ class InvertContext : public BundledFieldsContext { + using IndexedFields = std::vector<std::unique_ptr<const document::Field>>; std::vector<uint32_t> _pushers; + vespalib::string _document_field_names; + mutable IndexedFields _document_fields; + mutable IndexedFields _document_uri_fields; + mutable const document::DataType* _data_type; public: void add_pusher(uint32_t pusher_id); InvertContext(vespalib::ISequencedTaskExecutor::ExecutorId id); ~InvertContext(); + InvertContext(InvertContext&&); const std::vector<uint32_t>& get_pushers() const noexcept { return _pushers; } + void set_data_type(const DocumentInverterContext& doc_inv_context, const document::Document& doc) const; + const IndexedFields& get_document_fields() const noexcept { return _document_fields; } + const IndexedFields& get_document_uri_fields() const noexcept { return _document_uri_fields; } }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp b/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp index 2fb1ccf2444..223017ed149 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp @@ -8,6 +8,22 @@ namespace search::memoryindex { +using document::Document; +using document::Field; + +namespace { + +std::unique_ptr<document::FieldValue> +get_field_value(const Document& doc, const std::unique_ptr<const Field>& field) +{ + if (field) { + return doc.getValue(*field); + } + return {}; +} + +} + InvertTask::InvertTask(const DocumentInverterContext& inv_context, const InvertContext& context, const std::vector<std::unique_ptr<FieldInverter>>& inverters, const std::vector<std::unique_ptr<UrlFieldInverter>>& uri_inverters, uint32_t lid, const document::Document& doc) : _inv_context(inv_context), _context(context), @@ -17,15 +33,14 @@ InvertTask::InvertTask(const DocumentInverterContext& inv_context, const InvertC _uri_field_values(), _lid(lid) { + _context.set_data_type(_inv_context, doc); _field_values.reserve(_context.get_fields().size()); _uri_field_values.reserve(_context.get_uri_fields().size()); - for (uint32_t field_id : _context.get_fields()) { - _field_values.emplace_back(_inv_context.get_field_value(doc, field_id)); + for (auto& document_field : _context.get_document_fields()) { + _field_values.emplace_back(get_field_value(doc, document_field)); } - const auto& schema_index_fields = _inv_context.get_schema_index_fields(); - for (uint32_t uri_field_id : _context.get_uri_fields()) { - uint32_t field_id = schema_index_fields._uriFields[uri_field_id]._all; - _uri_field_values.emplace_back(_inv_context.get_field_value(doc, field_id)); + for (auto& document_uri_field : _context.get_document_uri_fields()) { + _uri_field_values.emplace_back(get_field_value(doc, document_uri_field)); } } |