diff options
Diffstat (limited to 'streamingvisitors')
8 files changed, 98 insertions, 17 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp index 726afcc959b..104309f50fa 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp @@ -3,6 +3,7 @@ #include "indexenvironment.h" #include <vespa/searchlib/fef/i_ranking_assets_repo.h> #include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/vespalib/stllike/hash_set.h> using namespace search::fef; @@ -39,6 +40,37 @@ IndexEnvironment::addField(const vespalib::string& name, return true; } +/* + * Ensure that array and map fields are known by the index + * environment, allowing the matches features to be used with the + * sameElement query operator. FieldSearchSpecMap::buildFromConfig() + * propagates the name to field id mapping for the added virtual + * fields. + */ +void +IndexEnvironment::add_virtual_fields() +{ + vespalib::hash_set<vespalib::string> vfields; + for (auto& field : _fields) { + vespalib::stringref name(field.name()); + auto pos = name.rfind('.'); + while (pos != vespalib::string::npos) { + name = name.substr(0, pos); + if (_fieldNames.contains(name)) { + break; + } + vfields.insert(name); + pos = name.rfind('.'); + } + } + for (auto& vfield : vfields) { + FieldInfo info(FieldType::VIRTUAL, FieldInfo::CollectionType::ARRAY, vfield, _fields.size()); + info.set_data_type(FieldInfo::DataType::COMBINED); + _fields.push_back(info); + _fieldNames[vfield] = info.id(); + } +} + void IndexEnvironment::fixup_fields() { diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h index 50e6898262d..fdf5d7d870e 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h @@ -78,6 +78,8 @@ public: bool isAttribute, search::fef::FieldInfo::DataType data_type); + void add_virtual_fields(); + void fixup_fields(); search::fef::Properties & getProperties() { return _properties; } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp index 3efeb8ef168..c1ca5daf1cb 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp @@ -78,20 +78,37 @@ IndexEnvPrototype::detectFields(const vespa::config::search::vsm::VsmfieldsConfi } } +void +IndexEnvPrototype::add_virtual_fields() +{ + _prototype.add_virtual_fields(); +} + namespace { FieldIdTList buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const search::fef::IIndexEnvironment & indexEnv, - const VsmfieldsConfig::Documenttype::IndexVector & indexes) + const VsmfieldsConfig::Documenttype::IndexVector & indexes, bool prefer_virtual_fields) { LOG(spam, "Index %s with %zd fields", ci.name.c_str(), ci.field.size()); FieldIdTList ifm; + if (prefer_virtual_fields) { + /* + * Stop at an existing virtual field when setting up views + * used by the same element query operator. + */ + auto info = indexEnv.getFieldByName(ci.name); + if (info != nullptr && info->type() == search::fef::FieldType::VIRTUAL) { + ifm.push_back(info->id()); + return ifm; + } + } for (const VsmfieldsConfig::Documenttype::Index::Field & cf : ci.field) { LOG(spam, "Parsing field %s", cf.name.c_str()); auto foundIndex = std::find_if(indexes.begin(), indexes.end(), [&cf](const auto & v) { return v.name == cf.name;}); if ((foundIndex != indexes.end()) && (cf.name != ci.name)) { - FieldIdTList sub = buildFieldSet(*foundIndex, indexEnv, indexes); + FieldIdTList sub = buildFieldSet(*foundIndex, indexEnv, indexes, prefer_virtual_fields); ifm.insert(ifm.end(), sub.begin(), sub.end()); } else { const FieldInfo * info = indexEnv.getFieldByName(cf.name); @@ -111,15 +128,15 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const search::fef } void -RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields) +RankManager::Snapshot::build_field_mappings(const VsmfieldsHandle& fields, ViewMap& views, bool prefer_virtual_fields) { for(const VsmfieldsConfig::Documenttype & di : fields->documenttype) { LOG(debug, "Looking through indexes for documenttype '%s'", di.name.c_str()); for(const VsmfieldsConfig::Documenttype::Index & ci : di.index) { - FieldIdTList view = buildFieldSet(ci, _protoEnv.current(), di.index); - if (_views.find(ci.name) == _views.end()) { + FieldIdTList view = buildFieldSet(ci, _protoEnv.current(), di.index, prefer_virtual_fields); + if (views.find(ci.name) == views.end()) { std::sort(view.begin(), view.end()); // lowest field id first - _views[ci.name] = view; + views[ci.name] = view; } else { LOG(warning, "We already have a view for index '%s'. Drop the new view.", ci.name.c_str()); } @@ -127,6 +144,13 @@ RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields) } } +void +RankManager::Snapshot::build_field_mappings(const VsmfieldsHandle& fields) +{ + build_field_mappings(fields, _views, false); + build_field_mappings(fields, _same_element_views, true); +} + bool RankManager::Snapshot::initRankSetup(const BlueprintFactory & factory) { @@ -170,7 +194,8 @@ RankManager::Snapshot::Snapshot() : _indexEnv(), _rankSetup(), _rpmap(), - _views() + _views(), + _same_element_views() { } @@ -181,7 +206,8 @@ RankManager::Snapshot::setup(const RankManager & rm) { VsmfieldsHandle fields = rm._vsmAdapter->getFieldsConfig(); _protoEnv.detectFields(*fields); - buildFieldMappings(fields); + _protoEnv.add_virtual_fields(); + build_field_mappings(fields); if (!initRankSetup(rm._blueprintFactory)) { return false; } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h index 12785daeb89..52d44420ebc 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h @@ -19,6 +19,7 @@ private: public: IndexEnvPrototype(); void detectFields(const vespa::config::search::vsm::VsmfieldsConfig &fields); + void add_virtual_fields(); void set_ranking_assets_repo(std::shared_ptr<const search::fef::IRankingAssetsRepo> repo) { _prototype.set_ranking_assets_repo(std::move(repo)); } @@ -53,9 +54,11 @@ public: std::vector<std::shared_ptr<const search::fef::RankSetup>> _rankSetup; // rank setup per rank profile Map _rpmap; ViewMap _views; + ViewMap _same_element_views; void addProperties(const vespa::config::search::RankProfilesConfig & cfg); - void buildFieldMappings(const vsm::VsmfieldsHandle & fields); + void build_field_mappings(const vsm::VsmfieldsHandle& fields, ViewMap& views, bool prefer_virtual_fields); + void build_field_mappings(const vsm::VsmfieldsHandle& fields); bool initRankSetup(const search::fef::BlueprintFactory & factory); bool setup(const RankManager & manager); int getIndex(const vespalib::string & key) const { @@ -74,9 +77,13 @@ public: const IndexEnvironment & getIndexEnvironment(const vespalib::string &rankProfile) const { return _indexEnv[getIndex(rankProfile)]; } - const View *getView(const vespalib::string & index) const { - auto itr = _views.find(index); - if (itr != _views.end()) { + const IndexEnvironment& get_proto_index_environment() const { + return _protoEnv.current(); + } + const View *getView(const vespalib::string & index, bool is_same_element) const { + auto& views = is_same_element ? _same_element_views : _views; + auto itr = views.find(index); + if (itr != views.end()) { return &itr->second; } return nullptr; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index cfaaac8b197..72c1ca60814 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -7,6 +7,7 @@ #include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h> #include <vespa/vsm/vsm/fieldsearchspec.h> +#include <vespa/vespalib/stllike/hash_set.h> #include <algorithm> #include <cmath> #include <vespa/log/log.h> @@ -61,7 +62,7 @@ RankProcessor::resolve_fields_from_children(QueryTermData& qtd, const MultiTerm& vespalib::hash_set<uint32_t> field_ids; for (auto& subterm : mt.get_terms()) { vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(subterm->index()); - const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName, false); if (view != nullptr) { for (auto field_id : *view) { field_ids.insert(field_id); @@ -86,7 +87,7 @@ void RankProcessor::resolve_fields_from_term(QueryTermData& qtd, const search::streaming::QueryTerm& term) { vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.index()); - const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName, term.is_same_element_query_node()); if (view != nullptr) { for (auto field_id : *view) { qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index dc58b607848..3fdc117dc88 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -498,7 +498,7 @@ SearchVisitor::init(const Parameters & params) VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size())); // Create mapping from field name to field id, from field id to search spec, // and from index name to list of field ids - _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); + _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config(), _env->get_rank_manager_snapshot()->get_proto_index_environment()); auto additionalFields = registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs()); // Add extra elements to mapping from field name to field id _fieldSearchSpecMap.buildFromConfig(additionalFields); diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index 1dbac859262..1ab1b16cb86 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -1,6 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldsearchspec.h" +#include <vespa/searchlib/fef/fieldinfo.h> +#include <vespa/searchlib/fef/iindexenvironment.h> #include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vsm/searcher/boolfieldsearcher.h> @@ -284,7 +286,7 @@ normalize_mode(VsmfieldsConfig::Fieldspec::Normalize normalize_mode) { } void -FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) +FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf, const search::fef::IIndexEnvironment& index_env) { LOG(spam, "Parsing %zd fields", conf->fieldspec.size()); for(const VsmfieldsConfig::Fieldspec & cfs : conf->fieldspec) { @@ -295,6 +297,15 @@ FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) _nameIdMap.add(cfs.name, fieldId); LOG(spam, "M in %d = %s", fieldId, cfs.name.c_str()); } + /* + * Index env is based on same vsm fields config but has additional + * virtual fields, cf. IndexEnvironment::add_virtual_fields(). + */ + for (uint32_t field_id = specMap().size(); field_id < index_env.getNumFields(); ++field_id) { + auto& field = *index_env.getField(field_id); + assert(field.type() == search::fef::FieldType::VIRTUAL); + _nameIdMap.add(field.name(), field_id); + } LOG(spam, "Parsing %zd document types", conf->documenttype.size()); for(const VsmfieldsConfig::Documenttype & di : conf->documenttype) { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index e90aea1371b..5b5a6b9a783 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -5,6 +5,8 @@ #include <vespa/vsm/searcher/fieldsearcher.h> #include <vespa/vsm/config/vsm-cfif.h> +namespace search::fef { class IIndexEnvironment; } + namespace vsm { class FieldSearchSpec @@ -67,7 +69,7 @@ public: * and a mapping from field name to field id. It then iterates over all document types and index names * and creates a mapping from index name to list of field ids for each document type. **/ - void buildFromConfig(const VsmfieldsHandle & conf); + void buildFromConfig(const VsmfieldsHandle & conf, const search::fef::IIndexEnvironment& index_env); /** * Iterates over the given field name vector adding extra elements to the mapping from field name to field id. |