summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2021-11-10 12:00:23 +0100
committerTor Egge <Tor.Egge@online.no>2021-11-10 12:00:23 +0100
commitac343a8719d77984e89d84e61e7972e0cd45ac83 (patch)
treed08ca94db7457235a292aead15f4dee35bee9484 /searchlib/src
parent8efde630ff73357e0f31a34f131a120d327bf224 (diff)
Prepare for extracting field value in index inverter thread.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h15
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp62
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/invert_context.h22
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp27
6 files changed, 104 insertions, 84 deletions
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
index 16141bcd268..a7f0325d363 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp
@@ -61,7 +61,6 @@ DocumentInverter::~DocumentInverter()
void
DocumentInverter::invertDocument(uint32_t docId, const Document &doc)
{
- _context.set_data_type(doc);
auto& invert_threads = _context.get_invert_threads();
auto& invert_contexts = _context.get_invert_contexts();
for (auto& invert_context : invert_contexts) {
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
index c82f71906fb..48f5a053856 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
@@ -1,19 +1,10 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "document_inverter_context.h"
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/document/fieldvalue/document.h>
#include <cassert>
-#include <vespa/log/log.h>
-LOG_SETUP(".memoryindex.document_inverter_context");
-
namespace search::memoryindex {
-using document::DataType;
-using document::Document;
-using document::DocumentType;
-using document::Field;
using vespalib::ISequencedTaskExecutor;
using index::SchemaIndexFields;
@@ -127,44 +118,11 @@ void connect_contexts(std::vector<InvertContext>& invert_contexts,
}
-void
-DocumentInverterContext::add_field(const DocumentType& doc_type, uint32_t fieldId)
-{
- assert(fieldId < _indexed_fields.size());
- std::unique_ptr<Field> fp;
- if ( ! doc_type.hasField(_schema.getIndexField(fieldId).getName())) {
- LOG(error,
- "Mismatch between documentdefinition and schema. "
- "No field named '%s' from schema in document type '%s'",
- _schema.getIndexField(fieldId).getName().c_str(),
- doc_type.getName().c_str());
- } else {
- fp = std::make_unique<Field>(doc_type.getField(_schema.getIndexField(fieldId).getName()));
- }
- _indexed_fields[fieldId] = std::move(fp);
-}
-
-void
-DocumentInverterContext::build_fields(const DocumentType& doc_type, const DataType *data_type)
-{
- _indexed_fields.clear();
- _indexed_fields.resize(_schema.getNumIndexFields());
- for (const auto & fi : _schema_index_fields._textFields) {
- add_field(doc_type, fi);
- }
- for (const auto & fi : _schema_index_fields._uriFields) {
- add_field(doc_type, fi._all);
- }
- _data_type = data_type;
-}
-
DocumentInverterContext::DocumentInverterContext(const index::Schema& schema,
ISequencedTaskExecutor &invert_threads,
ISequencedTaskExecutor &push_threads,
IFieldIndexCollection& field_indexes)
: _schema(schema),
- _indexed_fields(),
- _data_type(nullptr),
_schema_index_fields(),
_invert_threads(invert_threads),
_push_threads(push_threads),
@@ -179,25 +137,6 @@ DocumentInverterContext::DocumentInverterContext(const index::Schema& schema,
DocumentInverterContext::~DocumentInverterContext() = default;
void
-DocumentInverterContext::set_data_type(const Document& doc)
-{
- const DataType *data_type(doc.getDataType());
- if (_indexed_fields.empty() || _data_type != data_type) {
- build_fields(doc.getType(), data_type);
- }
-}
-
-std::unique_ptr<document::FieldValue>
-DocumentInverterContext::get_field_value(const Document& doc, uint32_t field_id) const
-{
- const Field *const field(_indexed_fields[field_id].get());
- if (field != nullptr) {
- return doc.getValue(*field);
- }
- return {};
-}
-
-void
DocumentInverterContext::setup_contexts()
{
make_contexts(_schema_index_fields, _invert_threads, _invert_contexts);
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h
index 54a1fff90a4..552def934c2 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.h
@@ -8,14 +8,6 @@
#include <memory>
#include <vector>
-namespace document {
-class DataType;
-class Document;
-class DocumentType;
-class Field;
-class FieldValue;
-}
-
namespace search::memoryindex {
class IFieldIndexCollection;
@@ -25,18 +17,13 @@ class IFieldIndexCollection;
* rarely (type dependent data, wiring).
*/
class DocumentInverterContext {
- using IndexedFields = std::vector<std::unique_ptr<document::Field>>;
const index::Schema& _schema;
- IndexedFields _indexed_fields;
- const document::DataType* _data_type;
index::SchemaIndexFields _schema_index_fields;
vespalib::ISequencedTaskExecutor& _invert_threads;
vespalib::ISequencedTaskExecutor& _push_threads;
IFieldIndexCollection& _field_indexes;
std::vector<InvertContext> _invert_contexts;
std::vector<PushContext> _push_contexts;
- void add_field(const document::DocumentType& doc_type, uint32_t fieldId);
- void build_fields(const document::DocumentType& doc_type, const document::DataType* data_type);
void setup_contexts();
public:
DocumentInverterContext(const index::Schema &schema,
@@ -44,13 +31,11 @@ public:
vespalib::ISequencedTaskExecutor &push_threads,
IFieldIndexCollection& field_indexes);
~DocumentInverterContext();
- void set_data_type(const document::Document& doc);
const index::Schema& get_schema() const noexcept { return _schema; }
const index::SchemaIndexFields& get_schema_index_fields() const noexcept { return _schema_index_fields; }
vespalib::ISequencedTaskExecutor& get_invert_threads() noexcept { return _invert_threads; }
vespalib::ISequencedTaskExecutor& get_push_threads() noexcept { return _push_threads; }
IFieldIndexCollection& get_field_indexes() noexcept { return _field_indexes; }
- std::unique_ptr<document::FieldValue> get_field_value(const document::Document& doc, uint32_t field_id) const;
const std::vector<InvertContext>& get_invert_contexts() const noexcept { return _invert_contexts; }
const std::vector<PushContext>& get_push_contexts() const noexcept { return _push_contexts; }
};
diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp
index fd58cb6595e..1e6506bc8d5 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/invert_context.cpp
@@ -1,21 +1,81 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "invert_context.h"
+#include "document_inverter_context.h"
+#include <vespa/document/datatype/documenttype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/searchlib/index/schema_index_fields.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".memoryindex.invert_context");
namespace search::memoryindex {
+using document::Document;
+using document::DocumentType;
+using document::Field;
+
+namespace {
+
+std::unique_ptr<document::Field>
+get_field(const DocumentType& doc_type, const vespalib::string& name)
+{
+ std::unique_ptr<Field> fp;
+ if ( ! doc_type.hasField(name)) {
+ LOG(error,
+ "Mismatch between documentdefinition and schema. "
+ "No field named '%s' from schema in document type '%s'",
+ name.c_str(),
+ doc_type.getName().c_str());
+ } else {
+ fp = std::make_unique<Field>(doc_type.getField(name));
+ }
+ return fp;
+}
+
+}
+
+
InvertContext::InvertContext(vespalib::ISequencedTaskExecutor::ExecutorId id)
: BundledFieldsContext(id),
- _pushers()
+ _pushers(),
+ _document_fields(),
+ _document_uri_fields(),
+ _data_type(nullptr)
{
}
InvertContext::~InvertContext() = default;
+InvertContext::InvertContext(InvertContext&&) = default;
+
void
InvertContext::add_pusher(uint32_t pusher_id)
{
_pushers.emplace_back(pusher_id);
}
+void
+InvertContext::set_data_type(const DocumentInverterContext &doc_inv_context, const Document& doc) const
+{
+ auto data_type(doc.getDataType());
+ if (_data_type == data_type) {
+ return;
+ }
+ auto& doc_type(doc.getType());
+ _document_fields.clear();
+ auto& schema = doc_inv_context.get_schema();
+ for (auto field_id : get_fields()) {
+ auto& name = schema.getIndexField(field_id).getName();
+ _document_fields.emplace_back(get_field(doc_type, name));
+ }
+ _document_uri_fields.clear();
+ auto& schema_index_fields = doc_inv_context.get_schema_index_fields();
+ for (auto uri_field_id : get_uri_fields()) {
+ auto& name = schema.getIndexField(schema_index_fields._uriFields[uri_field_id]._all).getName();
+ _document_uri_fields.emplace_back(get_field(doc_type, name));
+ }
+ _data_type = data_type;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_context.h b/searchlib/src/vespa/searchlib/memoryindex/invert_context.h
index 4d2ebddd647..059fdb25d06 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/invert_context.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/invert_context.h
@@ -4,8 +4,21 @@
#include "bundled_fields_context.h"
+namespace document {
+class DataType;
+class Document;
+class Field;
+}
+
+namespace search::index {
+class Schema;
+class SchemaIndexFields;
+}
+
namespace search::memoryindex {
+class DocumentInverterContext;
+
/*
* Context used by an InvertTask to invert a set of document fields
* into corresponding field inverters or by a RemoveTask to remove
@@ -17,12 +30,21 @@ namespace search::memoryindex {
*/
class InvertContext : public BundledFieldsContext
{
+ using IndexedFields = std::vector<std::unique_ptr<const document::Field>>;
std::vector<uint32_t> _pushers;
+ vespalib::string _document_field_names;
+ mutable IndexedFields _document_fields;
+ mutable IndexedFields _document_uri_fields;
+ mutable const document::DataType* _data_type;
public:
void add_pusher(uint32_t pusher_id);
InvertContext(vespalib::ISequencedTaskExecutor::ExecutorId id);
~InvertContext();
+ InvertContext(InvertContext&&);
const std::vector<uint32_t>& get_pushers() const noexcept { return _pushers; }
+ void set_data_type(const DocumentInverterContext& doc_inv_context, const document::Document& doc) const;
+ const IndexedFields& get_document_fields() const noexcept { return _document_fields; }
+ const IndexedFields& get_document_uri_fields() const noexcept { return _document_uri_fields; }
};
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp b/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp
index 2fb1ccf2444..223017ed149 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/invert_task.cpp
@@ -8,6 +8,22 @@
namespace search::memoryindex {
+using document::Document;
+using document::Field;
+
+namespace {
+
+std::unique_ptr<document::FieldValue>
+get_field_value(const Document& doc, const std::unique_ptr<const Field>& field)
+{
+ if (field) {
+ return doc.getValue(*field);
+ }
+ return {};
+}
+
+}
+
InvertTask::InvertTask(const DocumentInverterContext& inv_context, const InvertContext& context, const std::vector<std::unique_ptr<FieldInverter>>& inverters, const std::vector<std::unique_ptr<UrlFieldInverter>>& uri_inverters, uint32_t lid, const document::Document& doc)
: _inv_context(inv_context),
_context(context),
@@ -17,15 +33,14 @@ InvertTask::InvertTask(const DocumentInverterContext& inv_context, const InvertC
_uri_field_values(),
_lid(lid)
{
+ _context.set_data_type(_inv_context, doc);
_field_values.reserve(_context.get_fields().size());
_uri_field_values.reserve(_context.get_uri_fields().size());
- for (uint32_t field_id : _context.get_fields()) {
- _field_values.emplace_back(_inv_context.get_field_value(doc, field_id));
+ for (auto& document_field : _context.get_document_fields()) {
+ _field_values.emplace_back(get_field_value(doc, document_field));
}
- const auto& schema_index_fields = _inv_context.get_schema_index_fields();
- for (uint32_t uri_field_id : _context.get_uri_fields()) {
- uint32_t field_id = schema_index_fields._uriFields[uri_field_id]._all;
- _uri_field_values.emplace_back(_inv_context.get_field_value(doc, field_id));
+ for (auto& document_uri_field : _context.get_document_uri_fields()) {
+ _uri_field_values.emplace_back(get_field_value(doc, document_uri_field));
}
}