diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-02-11 11:41:22 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-02-13 17:37:08 +0000 |
commit | 340d608aa37b5fff24a0916f0320eb131f3558e8 (patch) | |
tree | 9d905ef5d25eb88665864f1a24616387f78938c6 | |
parent | a2f05742236b0873bb6991b0134d6991d0b5d4b2 (diff) |
- Add inteface for getting summary features for a single hit too.
- Produce summaryhits for grouping results too.
7 files changed, 64 insertions, 6 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index 75eccaef83c..15017fa3334 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -190,6 +190,30 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram, return retval; } +FeatureSet::SP +HitCollector::getFeatureSet(IRankProgram &rankProgram, + search::DocumentIdT docId, + const FeatureResolver &resolver, + const search::StringStringMap &feature_rename_map) +{ + LOG(debug, "docId = %d, _hits.size = %zu", docId, _hits.size()); + if (resolver.num_features() == 0 || _hits.empty()) { + return std::make_shared<FeatureSet>(); + } + auto names = FefUtils::extract_feature_names(resolver, feature_rename_map); + FeatureSet::SP retval = std::make_shared<FeatureSet>(names, _hits.size()); + for (const Hit & hit : _hits) { + LOG(debug, "Checking docId=%d", hit.getDocId()); + if (docId == hit.getDocId()) { + rankProgram.run(docId, hit.getMatchData()); + auto *f = retval->getFeaturesByIndex(retval->addDocId(docId)); + FefUtils::extract_feature_values(resolver, docId, f); + return retval; + } + } + return retval; +} + FeatureValues HitCollector::get_match_features(IRankProgram& rank_program, const FeatureResolver& resolver, diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h index 3a05f9dca86..f3d1b68aa3f 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h @@ -153,6 +153,11 @@ public: const FeatureResolver &resolver, const search::StringStringMap &feature_rename_map) const; + vespalib::FeatureSet::SP getFeatureSet(IRankProgram &rankProgram, + search::DocumentIdT docId, + const FeatureResolver &resolver, + const search::StringStringMap &feature_rename_map); + vespalib::FeatureValues get_match_features(IRankProgram& rank_program, const FeatureResolver& resolver, const search::StringStringMap& feature_rename_map) const; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 167d5ecde4c..ac7a61e5888 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -256,6 +256,19 @@ RankProcessor::calculateFeatureSet() return sf; } +FeatureSet::SP +RankProcessor::calculateFeatureSet(search::DocumentIdT docId) +{ + LOG(debug, "Calculate feature set for docId = %d", docId); + RankProgram &rankProgram = *(_summaryProgram ? _summaryProgram : _rankProgram); + search::fef::FeatureResolver resolver(rankProgram.get_seeds(false)); + LOG(debug, "Feature handles: numNames(%ld)", resolver.num_features()); + RankProgramWrapper wrapper(*_match_data); + FeatureSet::SP sf = _hitCollector->getFeatureSet(wrapper, docId, resolver, _rankSetup.get_feature_rename_map()); + LOG(debug, "Feature set: numFeatures(%u), numDocs(%u)", sf->numFeatures(), sf->numDocs()); + return sf; +} + FeatureValues RankProcessor::calculate_match_features() { diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index b9ed07f1170..476ed013d23 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -77,6 +77,7 @@ public: static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query); void runRankProgram(uint32_t docId); vespalib::FeatureSet::SP calculateFeatureSet(); + vespalib::FeatureSet::SP calculateFeatureSet(search::DocumentIdT docId); void fillSearchResult(vdslib::SearchResult & searchResult); const search::fef::MatchData &getMatchData() const { return *_match_data; } void setRankScore(double score) { _score = score; } diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 9a0b720f054..28a2a521cf7 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -288,6 +288,7 @@ SearchVisitor::SearchVisitor(StorageComponent& component, _env(get_search_environment_snapshot(vEnv, params)), _params(params), _init_called(false), + _collectGroupingHits(false), _docSearchedCount(0), _hitCount(0), _hitsRejectedCount(0), @@ -753,6 +754,14 @@ SearchVisitor::RankController::collectMatchedDocument(bool hasSorting, } } +vespalib::FeatureSet::SP +SearchVisitor::RankController::getFeatureSet(search::DocumentIdT docId) { + if (_hasRanking && !_rankSetup->getSummaryFeatures().empty()) { + return _rankProcessor->calculateFeatureSet(docId); + } + return {}; +} + void SearchVisitor::RankController::onCompletedVisiting(vsm::GetDocsumsStateCallback & docsumsStateCallback, vdslib::SearchResult & searchResult) { @@ -763,15 +772,13 @@ SearchVisitor::RankController::onCompletedVisiting(vsm::GetDocsumsStateCallback // calculate summary features and set them on the callback object if (!_rankSetup->getSummaryFeatures().empty()) { LOG(debug, "Calculate summary features"); - vespalib::FeatureSet::SP sf = _rankProcessor->calculateFeatureSet(); - docsumsStateCallback.setSummaryFeatures(sf); + docsumsStateCallback.setSummaryFeatures(_rankProcessor->calculateFeatureSet()); } // calculate rank features and set them on the callback object if (_dumpFeatures) { LOG(debug, "Calculate rank features"); - vespalib::FeatureSet::SP rf = _dumpProcessor->calculateFeatureSet(); - docsumsStateCallback.setRankFeatures(rf); + docsumsStateCallback.setRankFeatures(_dumpProcessor->calculateFeatureSet()); } } } @@ -996,6 +1003,9 @@ SearchVisitor::setupGrouping(const std::vector<char> & groupingBlob) grouping.configureStaticStuff(stuff); HitsResultPreparator preparator(_summaryGenerator); grouping.select(preparator, preparator); + if (preparator.getNumHitsAggregators() > 0) { + _collectGroupingHits = true; + } grouping.preAggregate(false); if (!grouping.getAll() || (preparator.getNumHitsAggregators() == 0)) { _groupingList.push_back(groupingPtr); @@ -1082,6 +1092,9 @@ SearchVisitor::handleDocument(StorageDocument::SP documentSP) _syntheticFieldsController.onDocumentMatch(document, documentId); SingleDocumentStore single(document); _summaryGenerator.setDocsumCache(single); + if (_collectGroupingHits) { + _summaryGenerator.getDocsumCallback().setSummaryFeatures(_rankController.getFeatureSet(document.getDocId())); + } group(document.docDoc(), rp.getRankScore(), false); } else { _hitsRejectedCount++; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index d8d97830244..33d5a14084f 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -210,6 +210,7 @@ private: * @param docsumsStateCallback state object to store summary features and rank features. **/ void onCompletedVisiting(vsm::GetDocsumsStateCallback & docsumsStateCallback, vdslib::SearchResult & searchResult); + vespalib::FeatureSet::SP getFeatureSet(search::DocumentIdT docId); }; /** @@ -455,6 +456,7 @@ private: std::shared_ptr<const SearchEnvironmentSnapshot> _env; vdslib::Parameters _params; bool _init_called; + bool _collectGroupingHits; size_t _docSearchedCount; size_t _hitCount; size_t _hitsRejectedCount; diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h index d3b3abd5fbc..a135f1d9fa2 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h +++ b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h @@ -37,8 +37,8 @@ public: void fillSummaryFeatures(GetDocsumsState& state) override; void fillRankFeatures(GetDocsumsState& state) override; std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields& fields) override; - void setSummaryFeatures(const vespalib::FeatureSet::SP & sf) { _summaryFeatures = sf; } - void setRankFeatures(const vespalib::FeatureSet::SP & rf) { _rankFeatures = rf; } + void setSummaryFeatures(vespalib::FeatureSet::SP sf) { _summaryFeatures = std::move(sf); } + void setRankFeatures(vespalib::FeatureSet::SP rf) { _rankFeatures = std::move(rf); } void set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler); ~GetDocsumsStateCallback() override; }; |