summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-02-11 11:41:22 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-02-13 17:37:08 +0000
commit340d608aa37b5fff24a0916f0320eb131f3558e8 (patch)
tree9d905ef5d25eb88665864f1a24616387f78938c6 /streamingvisitors
parenta2f05742236b0873bb6991b0134d6991d0b5d4b2 (diff)
- Add inteface for getting summary features for a single hit too.
- Produce summaryhits for grouping results too.
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp24
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.h5
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp13
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.h1
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp21
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h2
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h4
7 files changed, 64 insertions, 6 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
index 75eccaef83c..15017fa3334 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
@@ -190,6 +190,30 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram,
return retval;
}
+FeatureSet::SP
+HitCollector::getFeatureSet(IRankProgram &rankProgram,
+ search::DocumentIdT docId,
+ const FeatureResolver &resolver,
+ const search::StringStringMap &feature_rename_map)
+{
+ LOG(debug, "docId = %d, _hits.size = %zu", docId, _hits.size());
+ if (resolver.num_features() == 0 || _hits.empty()) {
+ return std::make_shared<FeatureSet>();
+ }
+ auto names = FefUtils::extract_feature_names(resolver, feature_rename_map);
+ FeatureSet::SP retval = std::make_shared<FeatureSet>(names, _hits.size());
+ for (const Hit & hit : _hits) {
+ LOG(debug, "Checking docId=%d", hit.getDocId());
+ if (docId == hit.getDocId()) {
+ rankProgram.run(docId, hit.getMatchData());
+ auto *f = retval->getFeaturesByIndex(retval->addDocId(docId));
+ FefUtils::extract_feature_values(resolver, docId, f);
+ return retval;
+ }
+ }
+ return retval;
+}
+
FeatureValues
HitCollector::get_match_features(IRankProgram& rank_program,
const FeatureResolver& resolver,
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
index 3a05f9dca86..f3d1b68aa3f 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
@@ -153,6 +153,11 @@ public:
const FeatureResolver &resolver,
const search::StringStringMap &feature_rename_map) const;
+ vespalib::FeatureSet::SP getFeatureSet(IRankProgram &rankProgram,
+ search::DocumentIdT docId,
+ const FeatureResolver &resolver,
+ const search::StringStringMap &feature_rename_map);
+
vespalib::FeatureValues get_match_features(IRankProgram& rank_program,
const FeatureResolver& resolver,
const search::StringStringMap& feature_rename_map) const;
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
index 167d5ecde4c..ac7a61e5888 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -256,6 +256,19 @@ RankProcessor::calculateFeatureSet()
return sf;
}
+FeatureSet::SP
+RankProcessor::calculateFeatureSet(search::DocumentIdT docId)
+{
+ LOG(debug, "Calculate feature set for docId = %d", docId);
+ RankProgram &rankProgram = *(_summaryProgram ? _summaryProgram : _rankProgram);
+ search::fef::FeatureResolver resolver(rankProgram.get_seeds(false));
+ LOG(debug, "Feature handles: numNames(%ld)", resolver.num_features());
+ RankProgramWrapper wrapper(*_match_data);
+ FeatureSet::SP sf = _hitCollector->getFeatureSet(wrapper, docId, resolver, _rankSetup.get_feature_rename_map());
+ LOG(debug, "Feature set: numFeatures(%u), numDocs(%u)", sf->numFeatures(), sf->numDocs());
+ return sf;
+}
+
FeatureValues
RankProcessor::calculate_match_features()
{
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
index b9ed07f1170..476ed013d23 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
@@ -77,6 +77,7 @@ public:
static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query);
void runRankProgram(uint32_t docId);
vespalib::FeatureSet::SP calculateFeatureSet();
+ vespalib::FeatureSet::SP calculateFeatureSet(search::DocumentIdT docId);
void fillSearchResult(vdslib::SearchResult & searchResult);
const search::fef::MatchData &getMatchData() const { return *_match_data; }
void setRankScore(double score) { _score = score; }
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index 9a0b720f054..28a2a521cf7 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -288,6 +288,7 @@ SearchVisitor::SearchVisitor(StorageComponent& component,
_env(get_search_environment_snapshot(vEnv, params)),
_params(params),
_init_called(false),
+ _collectGroupingHits(false),
_docSearchedCount(0),
_hitCount(0),
_hitsRejectedCount(0),
@@ -753,6 +754,14 @@ SearchVisitor::RankController::collectMatchedDocument(bool hasSorting,
}
}
+vespalib::FeatureSet::SP
+SearchVisitor::RankController::getFeatureSet(search::DocumentIdT docId) {
+ if (_hasRanking && !_rankSetup->getSummaryFeatures().empty()) {
+ return _rankProcessor->calculateFeatureSet(docId);
+ }
+ return {};
+}
+
void
SearchVisitor::RankController::onCompletedVisiting(vsm::GetDocsumsStateCallback & docsumsStateCallback, vdslib::SearchResult & searchResult)
{
@@ -763,15 +772,13 @@ SearchVisitor::RankController::onCompletedVisiting(vsm::GetDocsumsStateCallback
// calculate summary features and set them on the callback object
if (!_rankSetup->getSummaryFeatures().empty()) {
LOG(debug, "Calculate summary features");
- vespalib::FeatureSet::SP sf = _rankProcessor->calculateFeatureSet();
- docsumsStateCallback.setSummaryFeatures(sf);
+ docsumsStateCallback.setSummaryFeatures(_rankProcessor->calculateFeatureSet());
}
// calculate rank features and set them on the callback object
if (_dumpFeatures) {
LOG(debug, "Calculate rank features");
- vespalib::FeatureSet::SP rf = _dumpProcessor->calculateFeatureSet();
- docsumsStateCallback.setRankFeatures(rf);
+ docsumsStateCallback.setRankFeatures(_dumpProcessor->calculateFeatureSet());
}
}
}
@@ -996,6 +1003,9 @@ SearchVisitor::setupGrouping(const std::vector<char> & groupingBlob)
grouping.configureStaticStuff(stuff);
HitsResultPreparator preparator(_summaryGenerator);
grouping.select(preparator, preparator);
+ if (preparator.getNumHitsAggregators() > 0) {
+ _collectGroupingHits = true;
+ }
grouping.preAggregate(false);
if (!grouping.getAll() || (preparator.getNumHitsAggregators() == 0)) {
_groupingList.push_back(groupingPtr);
@@ -1082,6 +1092,9 @@ SearchVisitor::handleDocument(StorageDocument::SP documentSP)
_syntheticFieldsController.onDocumentMatch(document, documentId);
SingleDocumentStore single(document);
_summaryGenerator.setDocsumCache(single);
+ if (_collectGroupingHits) {
+ _summaryGenerator.getDocsumCallback().setSummaryFeatures(_rankController.getFeatureSet(document.getDocId()));
+ }
group(document.docDoc(), rp.getRankScore(), false);
} else {
_hitsRejectedCount++;
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index d8d97830244..33d5a14084f 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -210,6 +210,7 @@ private:
* @param docsumsStateCallback state object to store summary features and rank features.
**/
void onCompletedVisiting(vsm::GetDocsumsStateCallback & docsumsStateCallback, vdslib::SearchResult & searchResult);
+ vespalib::FeatureSet::SP getFeatureSet(search::DocumentIdT docId);
};
/**
@@ -455,6 +456,7 @@ private:
std::shared_ptr<const SearchEnvironmentSnapshot> _env;
vdslib::Parameters _params;
bool _init_called;
+ bool _collectGroupingHits;
size_t _docSearchedCount;
size_t _hitCount;
size_t _hitsRejectedCount;
diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h
index d3b3abd5fbc..a135f1d9fa2 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h
+++ b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h
@@ -37,8 +37,8 @@ public:
void fillSummaryFeatures(GetDocsumsState& state) override;
void fillRankFeatures(GetDocsumsState& state) override;
std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields& fields) override;
- void setSummaryFeatures(const vespalib::FeatureSet::SP & sf) { _summaryFeatures = sf; }
- void setRankFeatures(const vespalib::FeatureSet::SP & rf) { _rankFeatures = rf; }
+ void setSummaryFeatures(vespalib::FeatureSet::SP sf) { _summaryFeatures = std::move(sf); }
+ void setRankFeatures(vespalib::FeatureSet::SP rf) { _rankFeatures = std::move(rf); }
void set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler);
~GetDocsumsStateCallback() override;
};