summaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp')
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp117
1 files changed, 116 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
index 8fea82229c8..84aeff92bed 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_context.cpp
@@ -15,6 +15,110 @@ using document::Document;
using document::DocumentType;
using document::Field;
using vespalib::ISequencedTaskExecutor;
+using index::SchemaIndexFields;
+
+namespace {
+
+template <typename Context>
+void make_contexts(const SchemaIndexFields& schema_index_fields, ISequencedTaskExecutor& executor, std::vector<Context>& contexts)
+{
+ using ExecutorId = ISequencedTaskExecutor::ExecutorId;
+ using IdMapping = std::vector<std::tuple<ExecutorId, bool, uint32_t>>;
+ IdMapping map;
+ for (uint32_t field_id : schema_index_fields._textFields) {
+ // TODO: Add bias when sharing sequenced task executor between document types
+ map.emplace_back(executor.getExecutorId(field_id), false, field_id);
+ }
+ uint32_t uri_field_id = 0;
+ for (auto& uri_field : schema_index_fields._uriFields) {
+ // TODO: Add bias when sharing sequenced task executor between document types
+ map.emplace_back(executor.getExecutorId(uri_field._all), true, uri_field_id);
+ ++uri_field_id;
+ }
+ std::sort(map.begin(), map.end());
+ std::optional<ExecutorId> prev_id;
+ for (auto& entry : map) {
+ if (!prev_id.has_value() || prev_id.value() != std::get<0>(entry)) {
+ contexts.emplace_back(std::get<0>(entry));
+ prev_id = std::get<0>(entry);
+ }
+ if (std::get<1>(entry)) {
+ contexts.back().add_uri_field(std::get<2>(entry));
+ } else {
+ contexts.back().add_field(std::get<2>(entry));
+ }
+ }
+}
+
+class PusherMapping {
+ std::vector<std::optional<uint32_t>> _pushers;
+public:
+ PusherMapping(size_t size);
+ ~PusherMapping();
+
+ void add_mapping(const std::vector<uint32_t>& fields, uint32_t pusher_id) {
+ for (auto field_id : fields) {
+ assert(field_id < _pushers.size());
+ auto& opt_pusher = _pushers[field_id];
+ assert(!opt_pusher.has_value());
+ opt_pusher = pusher_id;
+ }
+ }
+
+ void use_mapping(const std::vector<uint32_t>& fields, std::vector<uint32_t>& pushers) {
+ for (auto field_id : fields) {
+ assert(field_id < _pushers.size());
+ auto& opt_pusher = _pushers[field_id];
+ assert(opt_pusher.has_value());
+ pushers.emplace_back(opt_pusher.value());
+ }
+ }
+};
+
+PusherMapping::PusherMapping(size_t size)
+ : _pushers(size)
+{
+}
+
+PusherMapping::~PusherMapping() = default;
+
+/*
+ * Connect contexts for inverting to contexts for pushing. If we use
+ * different sequenced task executors or adds different biases to the
+ * getExecutorId() argument (to enable double buffering) then contexts
+ * for inverting and contexts for pushing will bundle different sets
+ * of fields, preventing a 1:1 mapping. If we use the same sequenced
+ * task executor and drop double buffering then we can simplify this
+ * to a 1:1 mapping.
+ */
+void connect_contexts(std::vector<InvertContext>& invert_contexts,
+ const std::vector<PushContext>& push_contexts,
+ uint32_t num_fields,
+ uint32_t num_uri_fields)
+{
+ PusherMapping field_to_pusher(num_fields);
+ PusherMapping uri_field_to_pusher(num_uri_fields);
+ uint32_t pusher_id = 0;
+ for (auto& push_context : push_contexts) {
+ field_to_pusher.add_mapping(push_context.get_fields(), pusher_id);
+ uri_field_to_pusher.add_mapping(push_context.get_uri_fields(), pusher_id);
+ ++pusher_id;
+ }
+ std::vector<uint32_t> pushers;
+ for (auto& invert_context : invert_contexts) {
+ pushers.clear();
+ field_to_pusher.use_mapping(invert_context.get_fields(), pushers);
+ uri_field_to_pusher.use_mapping(invert_context.get_uri_fields(), pushers);
+ std::sort(pushers.begin(), pushers.end());
+ auto last = std::unique(pushers.begin(), pushers.end());
+ pushers.erase(last, pushers.end());
+ for (auto pusher : pushers) {
+ invert_context.add_pusher(pusher);
+ }
+ }
+}
+
+}
void
DocumentInverterContext::add_field(const DocumentType& doc_type, uint32_t fieldId)
@@ -57,9 +161,12 @@ DocumentInverterContext::DocumentInverterContext(const index::Schema& schema,
_schema_index_fields(),
_invert_threads(invert_threads),
_push_threads(push_threads),
- _field_indexes(field_indexes)
+ _field_indexes(field_indexes),
+ _invert_contexts(),
+ _push_contexts()
{
_schema_index_fields.setup(schema);
+ setup_contexts();
}
DocumentInverterContext::~DocumentInverterContext() = default;
@@ -83,4 +190,12 @@ DocumentInverterContext::get_field_value(const Document& doc, uint32_t field_id)
return {};
}
+void
+DocumentInverterContext::setup_contexts()
+{
+ make_contexts(_schema_index_fields, _invert_threads, _invert_contexts);
+ make_contexts(_schema_index_fields, _push_threads, _push_contexts);
+ connect_contexts(_invert_contexts, _push_contexts, _schema.getNumIndexFields(), _schema_index_fields._uriFields.size());
+}
+
}