diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-04-26 15:44:12 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-04-26 15:53:51 +0200 |
commit | 9792400f4465d839de3987c70f0cc88cd74fdf49 (patch) | |
tree | 742e09b0a68446f0a7634a00227bd0f0703b1c98 /searchcore | |
parent | c4dacaddf9bd2aff43db43180b389488be7b9fc0 (diff) |
Consolidate feature extraction between indexed and streaming search.
Diffstat (limited to 'searchcore')
-rw-r--r-- | searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp | 42 |
1 files changed, 7 insertions, 35 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp index 30958214b72..4f9e1f6d1f4 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp @@ -9,6 +9,7 @@ #include <vespa/vespalib/util/thread_bundle.h> #include <vespa/searchlib/fef/feature_resolver.h> #include <vespa/searchlib/fef/rank_program.h> +#include <vespa/searchlib/fef/utils.h> #include <vespa/searchlib/queryeval/searchiterator.h> using vespalib::Doom; @@ -25,38 +26,9 @@ namespace proton::matching { using OrderedDocs = ExtractFeatures::OrderedDocs; using search::StringStringMap; -namespace { - -auto extract_names(const FeatureResolver &resolver, const StringStringMap &renames) { - std::vector<vespalib::string> result; - result.reserve(resolver.num_features()); - for (size_t i = 0; i < resolver.num_features(); ++i) { - vespalib::string name = resolver.name_of(i); - auto iter = renames.find(name); - if (iter != renames.end()) { - name = iter->second; - } - result.emplace_back(name); - } - return result; -} +using FefUtils = search::fef::Utils; -void extract_values(const FeatureResolver &resolver, uint32_t docid, FeatureSet::Value *dst) { - for (uint32_t i = 0; i < resolver.num_features(); ++i) { - if (resolver.is_object(i)) { - auto obj = resolver.resolve(i).as_object(docid); - if (!obj.get().type().is_double()) { - vespalib::nbostream buf; - encode_value(obj.get(), buf); - dst[i].set_data(vespalib::Memory(buf.peek(), buf.size())); - } else { - dst[i].set_double(obj.get().as_double()); - } - } else { - dst[i].set_double(resolver.resolve(i).as_number(docid)); - } - } -} +namespace { struct MyChunk : Runnable { const std::pair<uint32_t,uint32_t> *begin; @@ -77,7 +49,7 @@ struct MyChunk : Runnable { } search.unpack(pos->first); auto *dst = &result.values[pos->second * resolver.num_features()]; - extract_values(resolver, pos->first, dst); + FefUtils::extract_feature_values(resolver, pos->first, dst); } } }; @@ -121,7 +93,7 @@ ExtractFeatures::get_feature_set(SearchIterator &search, RankProgram &rank_progr const Doom &doom, const StringStringMap &renames) { FeatureResolver resolver(rank_program.get_seeds(false)); - auto result = std::make_unique<FeatureSet>(extract_names(resolver, renames), docs.size()); + auto result = std::make_unique<FeatureSet>(FefUtils::extract_feature_names(resolver, renames), docs.size()); if (!docs.empty()) { search.initRange(docs.front(), docs.back()+1); for (uint32_t docid: docs) { @@ -130,7 +102,7 @@ ExtractFeatures::get_feature_set(SearchIterator &search, RankProgram &rank_progr } search.unpack(docid); auto *dst = result->getFeaturesByIndex(result->addDocId(docid)); - extract_values(resolver, docid, dst); + FefUtils::extract_feature_values(resolver, docid, dst); } } return result; @@ -143,7 +115,7 @@ ExtractFeatures::get_match_features(const MatchToolsFactory &mtf, const OrderedD auto tools = mtf.createMatchTools(); tools->setup_match_features(); FeatureResolver resolver(tools->rank_program().get_seeds(false)); - result.names = extract_names(resolver, mtf.get_feature_rename_map()); + result.names = FefUtils::extract_feature_names(resolver, mtf.get_feature_rename_map()); result.values.resize(result.names.size() * docs.size()); size_t num_threads = thread_bundle.size(); std::vector<Runnable::UP> chunks; |