summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com>2021-11-08 12:27:13 +0100
committerGitHub <noreply@github.com>2021-11-08 12:27:13 +0100
commit996866ef40c42e1d6c26bc5be47eaf099d9f4bfd (patch)
treecb5b65c6dfa1fc7a469486ea4b2446061332ef10
parent5ee8c8f31df40c965eeff596bfbbb6d9942b3973 (diff)
parent3d41d24539d816c97f37052180c0ace16ba6cf8b (diff)
Merge pull request #19908 from vespa-engine/havardpe/refactor-to-reduce-duplication
refactor code to reduce duplication
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp84
1 files changed, 37 insertions, 47 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp
index ef03fac2f6a..b3d59f9ac5f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp
@@ -24,6 +24,32 @@ using OrderedDocs = ExtractFeatures::OrderedDocs;
namespace {
+auto extract_names(const FeatureResolver &resolver) {
+ std::vector<vespalib::string> result;
+ result.reserve(resolver.num_features());
+ for (size_t i = 0; i < resolver.num_features(); ++i) {
+ result.emplace_back(resolver.name_of(i));
+ }
+ return result;
+}
+
+void extract_values(const FeatureResolver &resolver, uint32_t docid, FeatureSet::Value *dst) {
+ for (uint32_t i = 0; i < resolver.num_features(); ++i) {
+ if (resolver.is_object(i)) {
+ auto obj = resolver.resolve(i).as_object(docid);
+ if (!obj.get().type().is_double()) {
+ vespalib::nbostream buf;
+ encode_value(obj.get(), buf);
+ dst[i].set_data(vespalib::Memory(buf.peek(), buf.size()));
+ } else {
+ dst[i].set_double(obj.get().as_double());
+ }
+ } else {
+ dst[i].set_double(resolver.resolve(i).as_number(docid));
+ }
+ }
+}
+
struct MyChunk : Runnable {
const std::pair<uint32_t,uint32_t> *begin;
const std::pair<uint32_t,uint32_t> *end;
@@ -32,41 +58,26 @@ struct MyChunk : Runnable {
const std::pair<uint32_t,uint32_t> *end_in,
FeatureValues &result_in)
: begin(begin_in), end(end_in), result(result_in) {}
- void calculate_features(SearchIterator &search, FeatureResolver &resolver) {
- size_t num_features = result.names.size();
+ void calculate_features(SearchIterator &search, const FeatureResolver &resolver) {
assert(end > begin);
- assert(num_features == resolver.num_features());
+ assert(resolver.num_features() == result.names.size());
search.initRange(begin[0].first, end[-1].first + 1);
for (auto pos = begin; pos != end; ++pos) {
- uint32_t docid = pos->first;
- search.unpack(docid);
- auto * f = &result.values[pos->second * num_features];
- for (uint32_t i = 0; i < num_features; ++i) {
- if (resolver.is_object(i)) {
- auto obj = resolver.resolve(i).as_object(docid);
- if (!obj.get().type().is_double()) {
- vespalib::nbostream buf;
- encode_value(obj.get(), buf);
- f[i].set_data(vespalib::Memory(buf.peek(), buf.size()));
- } else {
- f[i].set_double(obj.get().as_double());
- }
- } else {
- f[i].set_double(resolver.resolve(i).as_number(docid));
- }
- }
+ search.unpack(pos->first);
+ auto *dst = &result.values[pos->second * resolver.num_features()];
+ extract_values(resolver, pos->first, dst);
}
}
};
struct FirstChunk : MyChunk {
SearchIterator &search;
- FeatureResolver &resolver;
+ const FeatureResolver &resolver;
FirstChunk(const std::pair<uint32_t,uint32_t> *begin_in,
const std::pair<uint32_t,uint32_t> *end_in,
FeatureValues &result_in,
SearchIterator &search_in,
- FeatureResolver &resolver_in)
+ const FeatureResolver &resolver_in)
: MyChunk(begin_in, end_in, result_in),
search(search_in),
resolver(resolver_in) {}
@@ -110,32 +121,14 @@ struct MyWork {
FeatureSet::UP
ExtractFeatures::get_feature_set(SearchIterator &search, RankProgram &rank_program, const std::vector<uint32_t> &docs)
{
- std::vector<vespalib::string> featureNames;
FeatureResolver resolver(rank_program.get_seeds(false));
- featureNames.reserve(resolver.num_features());
- for (size_t i = 0; i < resolver.num_features(); ++i) {
- featureNames.emplace_back(resolver.name_of(i));
- }
- auto result = std::make_unique<FeatureSet>(featureNames, docs.size());
+ auto result = std::make_unique<FeatureSet>(extract_names(resolver), docs.size());
if (!docs.empty()) {
search.initRange(docs.front(), docs.back()+1);
for (uint32_t docid: docs) {
search.unpack(docid);
- auto * f = result->getFeaturesByIndex(result->addDocId(docid));
- for (uint32_t i = 0; i < featureNames.size(); ++i) {
- if (resolver.is_object(i)) {
- auto obj = resolver.resolve(i).as_object(docid);
- if (!obj.get().type().is_double()) {
- vespalib::nbostream buf;
- encode_value(obj.get(), buf);
- f[i].set_data(vespalib::Memory(buf.peek(), buf.size()));
- } else {
- f[i].set_double(obj.get().as_double());
- }
- } else {
- f[i].set_double(resolver.resolve(i).as_number(docid));
- }
- }
+ auto *dst = result->getFeaturesByIndex(result->addDocId(docid));
+ extract_values(resolver, docid, dst);
}
}
return result;
@@ -148,10 +141,7 @@ ExtractFeatures::get_match_features(const MatchToolsFactory &mtf, const OrderedD
auto tools = mtf.createMatchTools();
tools->setup_match_features();
FeatureResolver resolver(tools->rank_program().get_seeds(false));
- result.names.reserve(resolver.num_features());
- for (size_t i = 0; i < resolver.num_features(); ++i) {
- result.names.emplace_back(resolver.name_of(i));
- }
+ result.names = extract_names(resolver);
result.values.resize(result.names.size() * docs.size());
MyWork work(thread_bundle);
size_t per_thread = docs.size() / work.num_threads;