aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2024-03-19 12:28:45 +0100
committerTor Egge <Tor.Egge@online.no>2024-03-19 12:28:45 +0100
commit94d55bd32b3395d0dee9eacbc3a0c8573f0f3429 (patch)
tree45ab83501705b9008f1c50dcd188adfc411c4764
parent2f663bd8757167d9d655f38d0116be4ec77fb266 (diff)
Add virtual fields to index environment for streaming mode.
-rw-r--r--searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h1
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h1
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp32
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.h2
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp42
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankmanager.h15
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp5
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp2
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp13
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h4
13 files changed, 114 insertions, 17 deletions
diff --git a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp
index 759792d205d..e78bcdc8757 100644
--- a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp
+++ b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp
@@ -220,10 +220,12 @@ VerifyRankSetup::verifyConfig(const VerifyRanksetupConfig &myCfg,
if (_searchMode == SearchMode::STREAMING) {
streamingProto.set_ranking_assets_repo(repo);
streamingProto.detectFields(vsmFieldsCfg);
+ streamingProto.add_virtual_fields();
factory = [&](const search::fef::Properties &properties)
{
auto indexEnv = streamingProto.clone();
indexEnv->getProperties().import(properties);
+ indexEnv->fixup_fields();
return indexEnv;
};
} else {
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index 07fc60d2243..90bf276af77 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -159,4 +159,10 @@ QueryTerm::as_equiv_query_node() const noexcept
return nullptr;
}
+bool
+QueryTerm::is_same_element_query_node() const noexcept
+{
+ return false;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 78b0a1fea7d..05b12804d52 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -108,6 +108,7 @@ public:
virtual RegexpTerm* as_regexp_term() noexcept;
virtual FuzzyTerm* as_fuzzy_term() noexcept;
virtual const EquivQueryNode* as_equiv_query_node() const noexcept;
+ virtual bool is_same_element_query_node() const noexcept;
virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env);
protected:
template <typename HitListType>
diff --git a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp
index cd9c693ca1c..fee1feb511d 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.cpp
@@ -95,4 +95,10 @@ SameElementQueryNode::multi_index_terms() const noexcept
return true;
}
+bool
+SameElementQueryNode::is_same_element_query_node() const noexcept
+{
+ return true;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h
index 37fb3dbba52..87f5d06d35b 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/same_element_query_node.h
@@ -18,6 +18,7 @@ public:
const HitList & evaluateHits(HitList & hl) const override;
void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const fef::IIndexEnvironment& index_env) override;
bool multi_index_terms() const noexcept override;
+ bool is_same_element_query_node() const noexcept override;
};
}
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
index 726afcc959b..104309f50fa 100644
--- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
@@ -3,6 +3,7 @@
#include "indexenvironment.h"
#include <vespa/searchlib/fef/i_ranking_assets_repo.h>
#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/vespalib/stllike/hash_set.h>
using namespace search::fef;
@@ -39,6 +40,37 @@ IndexEnvironment::addField(const vespalib::string& name,
return true;
}
+/*
+ * Ensure that array and map fields are known by the index
+ * environment, allowing the matches features to be used with the
+ * sameElement query operator. FieldSearchSpecMap::buildFromConfig()
+ * propagates the name to field id mapping for the added virtual
+ * fields.
+ */
+void
+IndexEnvironment::add_virtual_fields()
+{
+ vespalib::hash_set<vespalib::string> vfields;
+ for (auto& field : _fields) {
+ vespalib::stringref name(field.name());
+ auto pos = name.rfind('.');
+ while (pos != vespalib::string::npos) {
+ name = name.substr(0, pos);
+ if (_fieldNames.contains(name)) {
+ break;
+ }
+ vfields.insert(name);
+ pos = name.rfind('.');
+ }
+ }
+ for (auto& vfield : vfields) {
+ FieldInfo info(FieldType::VIRTUAL, FieldInfo::CollectionType::ARRAY, vfield, _fields.size());
+ info.set_data_type(FieldInfo::DataType::COMBINED);
+ _fields.push_back(info);
+ _fieldNames[vfield] = info.id();
+ }
+}
+
void
IndexEnvironment::fixup_fields()
{
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
index 50e6898262d..fdf5d7d870e 100644
--- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
@@ -78,6 +78,8 @@ public:
bool isAttribute,
search::fef::FieldInfo::DataType data_type);
+ void add_virtual_fields();
+
void fixup_fields();
search::fef::Properties & getProperties() { return _properties; }
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
index 3efeb8ef168..c1ca5daf1cb 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
@@ -78,20 +78,37 @@ IndexEnvPrototype::detectFields(const vespa::config::search::vsm::VsmfieldsConfi
}
}
+void
+IndexEnvPrototype::add_virtual_fields()
+{
+ _prototype.add_virtual_fields();
+}
+
namespace {
FieldIdTList
buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const search::fef::IIndexEnvironment & indexEnv,
- const VsmfieldsConfig::Documenttype::IndexVector & indexes)
+ const VsmfieldsConfig::Documenttype::IndexVector & indexes, bool prefer_virtual_fields)
{
LOG(spam, "Index %s with %zd fields", ci.name.c_str(), ci.field.size());
FieldIdTList ifm;
+ if (prefer_virtual_fields) {
+ /*
+ * Stop at an existing virtual field when setting up views
+ * used by the same element query operator.
+ */
+ auto info = indexEnv.getFieldByName(ci.name);
+ if (info != nullptr && info->type() == search::fef::FieldType::VIRTUAL) {
+ ifm.push_back(info->id());
+ return ifm;
+ }
+ }
for (const VsmfieldsConfig::Documenttype::Index::Field & cf : ci.field) {
LOG(spam, "Parsing field %s", cf.name.c_str());
auto foundIndex = std::find_if(indexes.begin(), indexes.end(),
[&cf](const auto & v) { return v.name == cf.name;});
if ((foundIndex != indexes.end()) && (cf.name != ci.name)) {
- FieldIdTList sub = buildFieldSet(*foundIndex, indexEnv, indexes);
+ FieldIdTList sub = buildFieldSet(*foundIndex, indexEnv, indexes, prefer_virtual_fields);
ifm.insert(ifm.end(), sub.begin(), sub.end());
} else {
const FieldInfo * info = indexEnv.getFieldByName(cf.name);
@@ -111,15 +128,15 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const search::fef
}
void
-RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields)
+RankManager::Snapshot::build_field_mappings(const VsmfieldsHandle& fields, ViewMap& views, bool prefer_virtual_fields)
{
for(const VsmfieldsConfig::Documenttype & di : fields->documenttype) {
LOG(debug, "Looking through indexes for documenttype '%s'", di.name.c_str());
for(const VsmfieldsConfig::Documenttype::Index & ci : di.index) {
- FieldIdTList view = buildFieldSet(ci, _protoEnv.current(), di.index);
- if (_views.find(ci.name) == _views.end()) {
+ FieldIdTList view = buildFieldSet(ci, _protoEnv.current(), di.index, prefer_virtual_fields);
+ if (views.find(ci.name) == views.end()) {
std::sort(view.begin(), view.end()); // lowest field id first
- _views[ci.name] = view;
+ views[ci.name] = view;
} else {
LOG(warning, "We already have a view for index '%s'. Drop the new view.", ci.name.c_str());
}
@@ -127,6 +144,13 @@ RankManager::Snapshot::buildFieldMappings(const VsmfieldsHandle & fields)
}
}
+void
+RankManager::Snapshot::build_field_mappings(const VsmfieldsHandle& fields)
+{
+ build_field_mappings(fields, _views, false);
+ build_field_mappings(fields, _same_element_views, true);
+}
+
bool
RankManager::Snapshot::initRankSetup(const BlueprintFactory & factory)
{
@@ -170,7 +194,8 @@ RankManager::Snapshot::Snapshot() :
_indexEnv(),
_rankSetup(),
_rpmap(),
- _views()
+ _views(),
+ _same_element_views()
{
}
@@ -181,7 +206,8 @@ RankManager::Snapshot::setup(const RankManager & rm)
{
VsmfieldsHandle fields = rm._vsmAdapter->getFieldsConfig();
_protoEnv.detectFields(*fields);
- buildFieldMappings(fields);
+ _protoEnv.add_virtual_fields();
+ build_field_mappings(fields);
if (!initRankSetup(rm._blueprintFactory)) {
return false;
}
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h
index 12785daeb89..52d44420ebc 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.h
+++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.h
@@ -19,6 +19,7 @@ private:
public:
IndexEnvPrototype();
void detectFields(const vespa::config::search::vsm::VsmfieldsConfig &fields);
+ void add_virtual_fields();
void set_ranking_assets_repo(std::shared_ptr<const search::fef::IRankingAssetsRepo> repo) {
_prototype.set_ranking_assets_repo(std::move(repo));
}
@@ -53,9 +54,11 @@ public:
std::vector<std::shared_ptr<const search::fef::RankSetup>> _rankSetup; // rank setup per rank profile
Map _rpmap;
ViewMap _views;
+ ViewMap _same_element_views;
void addProperties(const vespa::config::search::RankProfilesConfig & cfg);
- void buildFieldMappings(const vsm::VsmfieldsHandle & fields);
+ void build_field_mappings(const vsm::VsmfieldsHandle& fields, ViewMap& views, bool prefer_virtual_fields);
+ void build_field_mappings(const vsm::VsmfieldsHandle& fields);
bool initRankSetup(const search::fef::BlueprintFactory & factory);
bool setup(const RankManager & manager);
int getIndex(const vespalib::string & key) const {
@@ -74,9 +77,13 @@ public:
const IndexEnvironment & getIndexEnvironment(const vespalib::string &rankProfile) const {
return _indexEnv[getIndex(rankProfile)];
}
- const View *getView(const vespalib::string & index) const {
- auto itr = _views.find(index);
- if (itr != _views.end()) {
+ const IndexEnvironment& get_proto_index_environment() const {
+ return _protoEnv.current();
+ }
+ const View *getView(const vespalib::string & index, bool is_same_element) const {
+ auto& views = is_same_element ? _same_element_views : _views;
+ auto itr = views.find(index);
+ if (itr != views.end()) {
return &itr->second;
}
return nullptr;
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
index cfaaac8b197..72c1ca60814 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -7,6 +7,7 @@
#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h>
#include <vespa/vsm/vsm/fieldsearchspec.h>
+#include <vespa/vespalib/stllike/hash_set.h>
#include <algorithm>
#include <cmath>
#include <vespa/log/log.h>
@@ -61,7 +62,7 @@ RankProcessor::resolve_fields_from_children(QueryTermData& qtd, const MultiTerm&
vespalib::hash_set<uint32_t> field_ids;
for (auto& subterm : mt.get_terms()) {
vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(subterm->index());
- const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName);
+ const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName, false);
if (view != nullptr) {
for (auto field_id : *view) {
field_ids.insert(field_id);
@@ -86,7 +87,7 @@ void
RankProcessor::resolve_fields_from_term(QueryTermData& qtd, const search::streaming::QueryTerm& term)
{
vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.index());
- const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName);
+ const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName, term.is_same_element_query_node());
if (view != nullptr) {
for (auto field_id : *view) {
qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id));
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index dc58b607848..3fdc117dc88 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -498,7 +498,7 @@ SearchVisitor::init(const Parameters & params)
VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size()));
// Create mapping from field name to field id, from field id to search spec,
// and from index name to list of field ids
- _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());
+ _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config(), _env->get_rank_manager_snapshot()->get_proto_index_environment());
auto additionalFields = registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs());
// Add extra elements to mapping from field name to field id
_fieldSearchSpecMap.buildFromConfig(additionalFields);
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
index 1dbac859262..1ab1b16cb86 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "fieldsearchspec.h"
+#include <vespa/searchlib/fef/fieldinfo.h>
+#include <vespa/searchlib/fef/iindexenvironment.h>
#include <vespa/searchlib/query/streaming/equiv_query_node.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vsm/searcher/boolfieldsearcher.h>
@@ -284,7 +286,7 @@ normalize_mode(VsmfieldsConfig::Fieldspec::Normalize normalize_mode) {
}
void
-FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
+FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf, const search::fef::IIndexEnvironment& index_env)
{
LOG(spam, "Parsing %zd fields", conf->fieldspec.size());
for(const VsmfieldsConfig::Fieldspec & cfs : conf->fieldspec) {
@@ -295,6 +297,15 @@ FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
_nameIdMap.add(cfs.name, fieldId);
LOG(spam, "M in %d = %s", fieldId, cfs.name.c_str());
}
+ /*
+ * Index env is based on same vsm fields config but has additional
+ * virtual fields, cf. IndexEnvironment::add_virtual_fields().
+ */
+ for (uint32_t field_id = specMap().size(); field_id < index_env.getNumFields(); ++field_id) {
+ auto& field = *index_env.getField(field_id);
+ assert(field.type() == search::fef::FieldType::VIRTUAL);
+ _nameIdMap.add(field.name(), field_id);
+ }
LOG(spam, "Parsing %zd document types", conf->documenttype.size());
for(const VsmfieldsConfig::Documenttype & di : conf->documenttype) {
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
index e90aea1371b..5b5a6b9a783 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
@@ -5,6 +5,8 @@
#include <vespa/vsm/searcher/fieldsearcher.h>
#include <vespa/vsm/config/vsm-cfif.h>
+namespace search::fef { class IIndexEnvironment; }
+
namespace vsm {
class FieldSearchSpec
@@ -67,7 +69,7 @@ public:
* and a mapping from field name to field id. It then iterates over all document types and index names
* and creates a mapping from index name to list of field ids for each document type.
**/
- void buildFromConfig(const VsmfieldsHandle & conf);
+ void buildFromConfig(const VsmfieldsHandle & conf, const search::fef::IIndexEnvironment& index_env);
/**
* Iterates over the given field name vector adding extra elements to the mapping from field name to field id.