From 94d55bd32b3395d0dee9eacbc3a0c8573f0f3429 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 19 Mar 2024 12:28:45 +0100 Subject: Add virtual fields to index environment for streaming mode. --- .../src/apps/verify_ranksetup/verify_ranksetup.cpp | 2 ++ .../vespa/searchlib/query/streaming/queryterm.cpp | 6 ++++ .../vespa/searchlib/query/streaming/queryterm.h | 1 + .../query/streaming/same_element_query_node.cpp | 6 ++++ .../query/streaming/same_element_query_node.h | 1 + .../src/vespa/searchvisitor/indexenvironment.cpp | 32 +++++++++++++++++ .../src/vespa/searchvisitor/indexenvironment.h | 2 ++ .../src/vespa/searchvisitor/rankmanager.cpp | 42 +++++++++++++++++----- .../src/vespa/searchvisitor/rankmanager.h | 15 +++++--- .../src/vespa/searchvisitor/rankprocessor.cpp | 5 +-- .../src/vespa/searchvisitor/searchvisitor.cpp | 2 +- .../src/vespa/vsm/vsm/fieldsearchspec.cpp | 13 ++++++- .../src/vespa/vsm/vsm/fieldsearchspec.h | 4 ++- 13 files changed, 114 insertions(+), 17 deletions(-) diff --git a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp index 759792d205d..e78bcdc8757 100644 --- a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp +++ b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp @@ -220,10 +220,12 @@ VerifyRankSetup::verifyConfig(const VerifyRanksetupConfig &myCfg, if (_searchMode == SearchMode::STREAMING) { streamingProto.set_ranking_assets_repo(repo); streamingProto.detectFields(vsmFieldsCfg); + streamingProto.add_virtual_fields(); factory = [&](const search::fef::Properties &properties) { auto indexEnv = streamingProto.clone(); indexEnv->getProperties().import(properties); + indexEnv->fixup_fields(); return indexEnv; }; } else { diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 07fc60d2243..90bf276af77 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -159,4 +159,10 @@ QueryTerm::as_equiv_query_node() const noexcept return nullptr; } +bool +QueryTerm::is_same_element_query_node() const noexcept +{ + return false; +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 78b0a1fea7d..05b12804d52 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -108,6 +108,7 @@ public: virtual RegexpTerm* as_regexp_term() noexcept; virtual FuzzyTerm* as_fuzzy_term() noexcept; virtual const EquivQueryNode* as_equiv_query_node() const noexcept; + virtual bool is_same_element_query_node() const noexcept; virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env); protected: template diff --git a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp index cd9c693ca1c..fee1feb511d 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp @@ -95,4 +95,10 @@ SameElementQueryNode::multi_index_terms() const noexcept return true; } +bool +SameElementQueryNode::is_same_element_query_node() const noexcept +{ + return true; +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h index 37fb3dbba52..87f5d06d35b 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h +++ b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h @@ -18,6 +18,7 @@ public: const HitList & evaluateHits(HitList & hl) const override; void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override; bool multi_index_terms() const noexcept override; + bool is_same_element_query_node() const noexcept override; }; } diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp index 726afcc959b..104309f50fa 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp @@ -3,6 +3,7 @@ #include "indexenvironment.h" #include #include +#include using namespace search::fef; @@ -39,6 +40,37 @@ IndexEnvironment::addField(const vespalib::string& name, return true; } +/* + * Ensure that array and map fields are known by the index + * environment, allowing the matches features to be used with the + * sameElement query operator. FieldSearchSpecMap::buildFromConfig() + * propagates the name to field id mapping for the added virtual + * fields. + */ +void +IndexEnvironment::add_virtual_fields() +{ + vespalib::hash_set vfields; + for (auto& field : _fields) { + vespalib::stringref name(field.name()); + auto pos = name.rfind('.'); + while (pos != vespalib::string::npos) { + name = name.substr(0, pos); + if (_fieldNames.contains(name)) { + break; + } + vfields.insert(name); + pos = name.rfind('.'); + } + } + for (auto& vfield : vfields) { + FieldInfo info(FieldType::VIRTUAL, FieldInfo::CollectionType::ARRAY, vfield, _fields.size()); + info.set_data_type(FieldInfo::DataType::COMBINED); + _fields.push_back(info); + _fieldNames[vfield] = info.id(); + } +} + void IndexEnvironment::fixup_fields() { diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h index 50e6898262d..fdf5d7d870e 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h @@ -78,6 +78,8 @@ public: bool isAttribute, search::fef::FieldInfo::DataType data_type); + void add_virtual_fields(); + void fixup_fields(); search::fef::Properties & getProperties() { return _properties; } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp index 3efeb8ef168..c1ca5daf1cb 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp @@ -78,20 +78,37 @@ IndexEnvPrototype::detectFields(const vespa::config::search::vsm::VsmfieldsConfi } } +void +IndexEnvPrototype::add_virtual_fields() +{ + _prototype.add_virtual_fields(); +} + namespace { FieldIdTList buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const search::fef::IIndexEnvironment & indexEnv, - const VsmfieldsConfig::Documenttype::IndexVector & indexes) + const VsmfieldsConfig::Documenttype::IndexVector & indexes, bool prefer_virtual_fields) { LOG(spam, "Index %s with %zd fields", ci.name.c_str(), ci.field.size()); FieldIdTList ifm; + if (prefer_virtual_fields) { + /* + * Stop at an existing virtual field when setting up views + * used by the same element query operator. + */ + auto info = indexEnv.getFieldByName(ci.name); + if (info != nullptr && info->type() == search::fef::FieldType::VIRTUAL) { + ifm.push_back(info->id()); + return ifm; + } + } for (const VsmfieldsConfig::Documenttype::Index::Field & cf : ci.field) { LOG(spam, "Parsing field %s", cf.name.c_str()); auto foundIndex = std::find_if(indexes.begin(), indexes.end(), [&cf](const auto & v) { return v.name == cf.name;}); if ((foundIndex != indexes.end()) && (cf.name != ci.name)) { - FieldIdTList sub = buildFieldSet(*foundIndex, indexEnv, indexes); + FieldIdTList sub = buildFieldSet(*foundIndex, indexEnv, indexes, prefer_virtual_fields); ifm.insert(ifm.end(), sub.begin(), sub.end()); } else { const FieldInfo * info = indexEnv.getFieldByName(cf.name); @@ -111,15 +128,15 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const search::fef } void -RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields) +RankManager::Snapshot::build_field_mappings(const VsmfieldsHandle& fields, ViewMap& views, bool prefer_virtual_fields) { for(const VsmfieldsConfig::Documenttype & di : fields->documenttype) { LOG(debug, "Looking through indexes for documenttype '%s'", di.name.c_str()); for(const VsmfieldsConfig::Documenttype::Index & ci : di.index) { - FieldIdTList view = buildFieldSet(ci, _protoEnv.current(), di.index); - if (_views.find(ci.name) == _views.end()) { + FieldIdTList view = buildFieldSet(ci, _protoEnv.current(), di.index, prefer_virtual_fields); + if (views.find(ci.name) == views.end()) { std::sort(view.begin(), view.end()); // lowest field id first - _views[ci.name] = view; + views[ci.name] = view; } else { LOG(warning, "We already have a view for index '%s'. Drop the new view.", ci.name.c_str()); } @@ -127,6 +144,13 @@ RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields) } } +void +RankManager::Snapshot::build_field_mappings(const VsmfieldsHandle& fields) +{ + build_field_mappings(fields, _views, false); + build_field_mappings(fields, _same_element_views, true); +} + bool RankManager::Snapshot::initRankSetup(const BlueprintFactory & factory) { @@ -170,7 +194,8 @@ RankManager::Snapshot::Snapshot() : _indexEnv(), _rankSetup(), _rpmap(), - _views() + _views(), + _same_element_views() { } @@ -181,7 +206,8 @@ RankManager::Snapshot::setup(const RankManager & rm) { VsmfieldsHandle fields = rm._vsmAdapter->getFieldsConfig(); _protoEnv.detectFields(*fields); - buildFieldMappings(fields); + _protoEnv.add_virtual_fields(); + build_field_mappings(fields); if (!initRankSetup(rm._blueprintFactory)) { return false; } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h index 12785daeb89..52d44420ebc 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h @@ -19,6 +19,7 @@ private: public: IndexEnvPrototype(); void detectFields(const vespa::config::search::vsm::VsmfieldsConfig &fields); + void add_virtual_fields(); void set_ranking_assets_repo(std::shared_ptr repo) { _prototype.set_ranking_assets_repo(std::move(repo)); } @@ -53,9 +54,11 @@ public: std::vector> _rankSetup; // rank setup per rank profile Map _rpmap; ViewMap _views; + ViewMap _same_element_views; void addProperties(const vespa::config::search::RankProfilesConfig & cfg); - void buildFieldMappings(const vsm::VsmfieldsHandle & fields); + void build_field_mappings(const vsm::VsmfieldsHandle& fields, ViewMap& views, bool prefer_virtual_fields); + void build_field_mappings(const vsm::VsmfieldsHandle& fields); bool initRankSetup(const search::fef::BlueprintFactory & factory); bool setup(const RankManager & manager); int getIndex(const vespalib::string & key) const { @@ -74,9 +77,13 @@ public: const IndexEnvironment & getIndexEnvironment(const vespalib::string &rankProfile) const { return _indexEnv[getIndex(rankProfile)]; } - const View *getView(const vespalib::string & index) const { - auto itr = _views.find(index); - if (itr != _views.end()) { + const IndexEnvironment& get_proto_index_environment() const { + return _protoEnv.current(); + } + const View *getView(const vespalib::string & index, bool is_same_element) const { + auto& views = is_same_element ? _same_element_views : _views; + auto itr = views.find(index); + if (itr != views.end()) { return &itr->second; } return nullptr; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index cfaaac8b197..72c1ca60814 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,7 @@ RankProcessor::resolve_fields_from_children(QueryTermData& qtd, const MultiTerm& vespalib::hash_set field_ids; for (auto& subterm : mt.get_terms()) { vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(subterm->index()); - const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName, false); if (view != nullptr) { for (auto field_id : *view) { field_ids.insert(field_id); @@ -86,7 +87,7 @@ void RankProcessor::resolve_fields_from_term(QueryTermData& qtd, const search::streaming::QueryTerm& term) { vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.index()); - const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName, term.is_same_element_query_node()); if (view != nullptr) { for (auto field_id : *view) { qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index dc58b607848..3fdc117dc88 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -498,7 +498,7 @@ SearchVisitor::init(const Parameters & params) VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size())); // Create mapping from field name to field id, from field id to search spec, // and from index name to list of field ids - _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); + _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config(), _env->get_rank_manager_snapshot()->get_proto_index_environment()); auto additionalFields = registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs()); // Add extra elements to mapping from field name to field id _fieldSearchSpecMap.buildFromConfig(additionalFields); diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index 1dbac859262..1ab1b16cb86 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -1,6 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldsearchspec.h" +#include +#include #include #include #include @@ -284,7 +286,7 @@ normalize_mode(VsmfieldsConfig::Fieldspec::Normalize normalize_mode) { } void -FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) +FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf, const search::fef::IIndexEnvironment& index_env) { LOG(spam, "Parsing %zd fields", conf->fieldspec.size()); for(const VsmfieldsConfig::Fieldspec & cfs : conf->fieldspec) { @@ -295,6 +297,15 @@ FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) _nameIdMap.add(cfs.name, fieldId); LOG(spam, "M in %d = %s", fieldId, cfs.name.c_str()); } + /* + * Index env is based on same vsm fields config but has additional + * virtual fields, cf. IndexEnvironment::add_virtual_fields(). + */ + for (uint32_t field_id = specMap().size(); field_id < index_env.getNumFields(); ++field_id) { + auto& field = *index_env.getField(field_id); + assert(field.type() == search::fef::FieldType::VIRTUAL); + _nameIdMap.add(field.name(), field_id); + } LOG(spam, "Parsing %zd document types", conf->documenttype.size()); for(const VsmfieldsConfig::Documenttype & di : conf->documenttype) { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index e90aea1371b..5b5a6b9a783 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -5,6 +5,8 @@ #include #include +namespace search::fef { class IIndexEnvironment; } + namespace vsm { class FieldSearchSpec @@ -67,7 +69,7 @@ public: * and a mapping from field name to field id. It then iterates over all document types and index names * and creates a mapping from index name to list of field ids for each document type. **/ - void buildFromConfig(const VsmfieldsHandle & conf); + void buildFromConfig(const VsmfieldsHandle & conf, const search::fef::IIndexEnvironment& index_env); /** * Iterates over the given field name vector adding extra elements to the mapping from field name to field id. -- cgit v1.2.3