summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-04-26 15:44:12 +0200
committerTor Egge <Tor.Egge@online.no>2023-04-26 15:53:51 +0200
commit9792400f4465d839de3987c70f0cc88cd74fdf49 (patch)
tree742e09b0a68446f0a7634a00227bd0f0703b1c98 /searchcore
parentc4dacaddf9bd2aff43db43180b389488be7b9fc0 (diff)
Consolidate feature extraction between indexed and streaming search.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp42
1 files changed, 7 insertions, 35 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp
index 30958214b72..4f9e1f6d1f4 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp
@@ -9,6 +9,7 @@
#include <vespa/vespalib/util/thread_bundle.h>
#include <vespa/searchlib/fef/feature_resolver.h>
#include <vespa/searchlib/fef/rank_program.h>
+#include <vespa/searchlib/fef/utils.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
using vespalib::Doom;
@@ -25,38 +26,9 @@ namespace proton::matching {
using OrderedDocs = ExtractFeatures::OrderedDocs;
using search::StringStringMap;
-namespace {
-
-auto extract_names(const FeatureResolver &resolver, const StringStringMap &renames) {
- std::vector<vespalib::string> result;
- result.reserve(resolver.num_features());
- for (size_t i = 0; i < resolver.num_features(); ++i) {
- vespalib::string name = resolver.name_of(i);
- auto iter = renames.find(name);
- if (iter != renames.end()) {
- name = iter->second;
- }
- result.emplace_back(name);
- }
- return result;
-}
+using FefUtils = search::fef::Utils;
-void extract_values(const FeatureResolver &resolver, uint32_t docid, FeatureSet::Value *dst) {
- for (uint32_t i = 0; i < resolver.num_features(); ++i) {
- if (resolver.is_object(i)) {
- auto obj = resolver.resolve(i).as_object(docid);
- if (!obj.get().type().is_double()) {
- vespalib::nbostream buf;
- encode_value(obj.get(), buf);
- dst[i].set_data(vespalib::Memory(buf.peek(), buf.size()));
- } else {
- dst[i].set_double(obj.get().as_double());
- }
- } else {
- dst[i].set_double(resolver.resolve(i).as_number(docid));
- }
- }
-}
+namespace {
struct MyChunk : Runnable {
const std::pair<uint32_t,uint32_t> *begin;
@@ -77,7 +49,7 @@ struct MyChunk : Runnable {
}
search.unpack(pos->first);
auto *dst = &result.values[pos->second * resolver.num_features()];
- extract_values(resolver, pos->first, dst);
+ FefUtils::extract_feature_values(resolver, pos->first, dst);
}
}
};
@@ -121,7 +93,7 @@ ExtractFeatures::get_feature_set(SearchIterator &search, RankProgram &rank_progr
const Doom &doom, const StringStringMap &renames)
{
FeatureResolver resolver(rank_program.get_seeds(false));
- auto result = std::make_unique<FeatureSet>(extract_names(resolver, renames), docs.size());
+ auto result = std::make_unique<FeatureSet>(FefUtils::extract_feature_names(resolver, renames), docs.size());
if (!docs.empty()) {
search.initRange(docs.front(), docs.back()+1);
for (uint32_t docid: docs) {
@@ -130,7 +102,7 @@ ExtractFeatures::get_feature_set(SearchIterator &search, RankProgram &rank_progr
}
search.unpack(docid);
auto *dst = result->getFeaturesByIndex(result->addDocId(docid));
- extract_values(resolver, docid, dst);
+ FefUtils::extract_feature_values(resolver, docid, dst);
}
}
return result;
@@ -143,7 +115,7 @@ ExtractFeatures::get_match_features(const MatchToolsFactory &mtf, const OrderedD
auto tools = mtf.createMatchTools();
tools->setup_match_features();
FeatureResolver resolver(tools->rank_program().get_seeds(false));
- result.names = extract_names(resolver, mtf.get_feature_rename_map());
+ result.names = FefUtils::extract_feature_names(resolver, mtf.get_feature_rename_map());
result.values.resize(result.names.size() * docs.size());
size_t num_threads = thread_bundle.size();
std::vector<Runnable::UP> chunks;