From fbe9c1f4dddb9f7ca25964691c669f037d791df0 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 25 Apr 2023 11:09:40 +0200 Subject: Move search::FeatureValues to vespalib::FeatureValues in preparation for extending vdslib::SearchResult. --- .../src/tests/proton/matching/matching_test.cpp | 3 +- .../searchcore/proton/matching/docsum_matcher.cpp | 2 +- .../searchcore/proton/matching/docsum_matcher.h | 4 +- .../proton/matching/extract_features.cpp | 4 +- .../searchcore/proton/matching/extract_features.h | 6 +- .../searchcore/proton/matching/match_master.cpp | 2 +- .../vespa/searchcore/proton/matching/matcher.cpp | 2 +- .../src/vespa/searchcore/proton/matching/matcher.h | 6 +- .../common/summaryfeatures/summaryfeatures.cpp | 4 +- .../src/vespa/searchlib/common/CMakeLists.txt | 1 - .../src/vespa/searchlib/common/featureset.cpp | 90 ------------ searchlib/src/vespa/searchlib/common/featureset.h | 156 --------------------- searchlib/src/vespa/searchlib/engine/searchreply.h | 3 +- .../vespa/searchsummary/docsummary/docsumstate.h | 3 +- .../searchsummary/docsummary/rankfeaturesdfw.cpp | 2 + .../docsummary/summaryfeaturesdfw.cpp | 3 +- .../src/tests/hitcollector/hitcollector_test.cpp | 2 +- .../src/vespa/searchvisitor/hitcollector.cpp | 2 +- .../src/vespa/searchvisitor/hitcollector.h | 8 +- .../src/vespa/searchvisitor/rankprocessor.cpp | 2 +- .../src/vespa/searchvisitor/rankprocessor.h | 2 +- .../src/vespa/searchvisitor/searchvisitor.cpp | 4 +- streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h | 10 +- vespalib/src/vespa/vespalib/util/CMakeLists.txt | 1 + vespalib/src/vespa/vespalib/util/featureset.cpp | 90 ++++++++++++ vespalib/src/vespa/vespalib/util/featureset.h | 155 ++++++++++++++++++++ 26 files changed, 285 insertions(+), 282 deletions(-) delete mode 100644 searchlib/src/vespa/searchlib/common/featureset.cpp delete mode 100644 searchlib/src/vespa/searchlib/common/featureset.h create mode 100644 vespalib/src/vespa/vespalib/util/featureset.cpp create mode 100644 vespalib/src/vespa/vespalib/util/featureset.h diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index fd6f6af730c..4ad386afa3f 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -64,6 +64,7 @@ using search::index::schema::DataType; using storage::spi::Timestamp; using vespalib::eval::SimpleValue; using vespalib::eval::TensorSpec; +using vespalib::FeatureSet; using vespalib::nbostream; vespalib::ThreadBundle &ttb() { return vespalib::ThreadBundle::trivial(); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp index fd5c3782b9a..6014df1c2f9 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp @@ -18,7 +18,6 @@ #include LOG_SETUP(".proton.matching.docsum_matcher"); -using search::FeatureSet; using search::MatchingElements; using search::MatchingElementsFields; using search::fef::FeatureResolver; @@ -29,6 +28,7 @@ using search::queryeval::IntermediateBlueprint; using search::queryeval::MatchingElementsSearch; using search::queryeval::SameElementBlueprint; using search::queryeval::SearchIterator; +using vespalib::FeatureSet; using AttrSearchCtx = search::attribute::ISearchContext; diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h index 006a443e539..bf99a6b1950 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h @@ -2,9 +2,9 @@ #pragma once -#include #include #include +#include #include #include @@ -21,7 +21,7 @@ class SearchSession; class DocsumMatcher { private: - using FeatureSet = search::FeatureSet; + using FeatureSet = vespalib::FeatureSet; using MatchingElementsFields = search::MatchingElementsFields; using MatchingElements = search::MatchingElements; diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp index 8f7970f5717..30958214b72 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp @@ -12,10 +12,10 @@ #include using vespalib::Doom; +using vespalib::FeatureSet; +using vespalib::FeatureValues; using vespalib::Runnable; using vespalib::ThreadBundle; -using search::FeatureSet; -using search::FeatureValues; using search::fef::FeatureResolver; using search::fef::RankProgram; using search::queryeval::SearchIterator; diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.h b/searchcore/src/vespa/searchcore/proton/matching/extract_features.h index 48c3476f164..09da89250a2 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/extract_features.h +++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.h @@ -2,8 +2,8 @@ #pragma once -#include #include +#include #include namespace vespalib { class Doom; }; @@ -16,8 +16,8 @@ namespace proton::matching { class MatchToolsFactory; struct ExtractFeatures { - using FeatureSet = search::FeatureSet; - using FeatureValues = search::FeatureValues; + using FeatureSet = vespalib::FeatureSet; + using FeatureValues = vespalib::FeatureValues; using ThreadBundle = vespalib::ThreadBundle; using SearchIterator = search::queryeval::SearchIterator; using RankProgram = search::fef::RankProgram; diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 3a43e9a118e..0bb183d1dc0 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -18,7 +18,7 @@ namespace proton::matching { using namespace search::fef; using search::queryeval::SearchIterator; -using search::FeatureSet; +using vespalib::FeatureSet; using vespalib::ThreadBundle; using vespalib::Issue; diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp index 236964c2e6b..b393558638d 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp @@ -27,7 +27,6 @@ using namespace search::engine; using namespace search::grouping; using search::DocumentMetaData; using search::LidUsageStats; -using search::FeatureSet; using search::MatchingElementsFields; using search::MatchingElements; using search::attribute::IAttributeContext; @@ -39,6 +38,7 @@ using search::fef::indexproperties::hitcollector::ArraySize; using search::queryeval::Blueprint; using search::queryeval::SearchIterator; using vespalib::Doom; +using vespalib::FeatureSet; using vespalib::make_string_short::fmt; namespace proton::matching { diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.h b/searchcore/src/vespa/searchcore/proton/matching/matcher.h index bad56fe1c36..6507ffca2eb 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.h +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.h @@ -11,13 +11,13 @@ #include "viewresolver.h" #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -135,7 +135,7 @@ public: * @param attrCtx abstract view of attribute data * @return calculated summary features. **/ - search::FeatureSet::SP + vespalib::FeatureSet::SP getSummaryFeatures(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx, SessionManager &sessionManager) const; @@ -149,7 +149,7 @@ public: * @param attrCtx abstract view of attribute data * @return calculated rank features. **/ - search::FeatureSet::SP + vespalib::FeatureSet::SP getRankFeatures(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx, SessionManager &sessionManager) const; diff --git a/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp index 834cdbef50d..73a81be9f90 100644 --- a/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp +++ b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp @@ -2,9 +2,9 @@ #include LOG_SETUP("summaryfeatures_test"); #include -#include +#include -using namespace search; +using vespalib::FeatureSet; using vespalib::Memory; TEST_SETUP(Test); diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt index a7c8d56f11d..089151455f3 100644 --- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt @@ -9,7 +9,6 @@ vespa_add_library(searchlib_common OBJECT condensedbitvectors.cpp documentlocations.cpp documentsummary.cpp - featureset.cpp fileheadercontext.cpp flush_token.cpp geo_gcd.cpp diff --git a/searchlib/src/vespa/searchlib/common/featureset.cpp b/searchlib/src/vespa/searchlib/common/featureset.cpp deleted file mode 100644 index 5c8d4c6d9c4..00000000000 --- a/searchlib/src/vespa/searchlib/common/featureset.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "featureset.h" - -namespace search { - -FeatureSet::FeatureSet() - : _names(), - _docIds(), - _values() -{ -} - -FeatureSet::~FeatureSet() {} - -FeatureSet::FeatureSet(const StringVector &names, uint32_t expectDocs) - : _names(names), - _docIds(), - _values() -{ - _docIds.reserve(expectDocs); - _values.reserve(expectDocs * names.size()); -} - -bool -FeatureSet::equals(const FeatureSet &rhs) const -{ - return ((_docIds == rhs._docIds) && - (_values == rhs._values) && - (_names == rhs._names)); // do names last, as they are most likely to match -} - -uint32_t -FeatureSet::addDocId(uint32_t docId) -{ - _docIds.push_back(docId); - _values.resize(_names.size() * _docIds.size()); - return (_docIds.size() - 1); -} - -bool -FeatureSet::contains(const std::vector &docIds) const -{ - using ITR = std::vector::const_iterator; - ITR myPos = _docIds.begin(); - ITR myEnd = _docIds.end(); - ITR pos = docIds.begin(); - ITR end = docIds.end(); - - for (; pos != end; ++pos) { - while (myPos != myEnd && *myPos < *pos) { - ++myPos; - } - if (myPos == myEnd || *myPos != *pos) { - return false; - } - ++myPos; - } - return true; -} - -FeatureSet::Value * -FeatureSet::getFeaturesByIndex(uint32_t idx) -{ - if (idx >= _docIds.size()) { - return 0; - } - return &(_values[idx * _names.size()]); -} - -const FeatureSet::Value * -FeatureSet::getFeaturesByDocId(uint32_t docId) const -{ - uint32_t low = 0; - uint32_t hi = _docIds.size(); - while (low < hi) { - uint32_t pos = (low + hi) >> 1; - uint32_t val = _docIds[pos]; - if (val < docId) { - low = pos + 1; - } else if (val > docId) { - hi = pos; - } else { - return &(_values[pos * _names.size()]); - } - } - return 0; -} - -} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/featureset.h b/searchlib/src/vespa/searchlib/common/featureset.h deleted file mode 100644 index adda8a2728b..00000000000 --- a/searchlib/src/vespa/searchlib/common/featureset.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "feature.h" -#include -#include -#include -#include -#include - -namespace search { - -/** - * This class holds information about a set of features for a set of - * documents. - **/ -class FeatureSet -{ -public: - class Value { - private: - std::vector _data; - double _value; - public: - bool operator==(const Value &rhs) const { - return ((_data == rhs._data) && (_value == rhs._value)); - } - bool is_double() const { return _data.empty(); } - bool is_data() const { return !_data.empty(); } - double as_double() const { return _value; } - vespalib::Memory as_data() const { return vespalib::Memory(&_data[0], _data.size()); } - void set_double(double value) { - _data.clear(); - _value = value; - } - void set_data(vespalib::Memory data) { - _data.assign(data.data, data.data + data.size); - _value = 0.0; - } - }; - - using string = vespalib::string; - using StringVector = std::vector; -private: - StringVector _names; - std::vector _docIds; - std::vector _values; - - FeatureSet(const FeatureSet &); - FeatureSet & operator=(const FeatureSet &); - -public: - using SP = std::shared_ptr; - using UP = std::unique_ptr; - - /** - * Create a new object without any feature information. - **/ - FeatureSet(); - ~FeatureSet(); - - /** - * Create a new object that will contain information about the - * given features. - * - * @param names names of all features - * @param expectDocs the number of documents we expect to store information about - **/ - FeatureSet(const StringVector &names, uint32_t expectDocs); - - /** - * Check whether this object is equal to the given object. - * - * @return true if the objects are equal. - **/ - bool equals(const FeatureSet &rhs) const; - - /** - * Obtain the names of all the features tracked by this object. - * - * @return feature names - **/ - const StringVector &getNames() const { return _names; } - - /** - * Obtain the number of features this object contains information - * about. - * - * @return number of features - **/ - uint32_t numFeatures() const { return _names.size(); } - - /** - * Obtain the number of documents this object contains information - * about. - * - * @return number of documents. - **/ - uint32_t numDocs() const { return _docIds.size(); } - - /** - * Add a document to the set of documents this object contains - * information about. Documents must be added in ascending - * order. When a new document is added, all features are - * initialized to 0.0. The return value from this method can be - * used together with the @ref getFeaturesByIndex method to set - * the actual feature values. The ordering among features are - * assumed to be the same as in the name vector passed to the - * constructor. - * - * @return the index of the document just added - * @param docid the id of the document to add - **/ - uint32_t addDocId(uint32_t docid); - - /** - * Check whether this object contains information about the given - * set of documents. The given set of documents must be sorted on - * document id; lowest first. - * - * @return true if this object contains information about all the given documents - * @param docIds the documents we want information about - **/ - bool contains(const std::vector &docIds) const; - - /** - * Obtain the feature values belonging to a document based on the - * index into the internal docid array. This method is intended - * for use only when filling in the feature values during object - * initialization. - * - * @return pointer to features - * @param idx index into docid array - **/ - Value *getFeaturesByIndex(uint32_t idx); - - /** - * Obtain the feature values belonging to a document based on the - * docid value. This method is intended for lookup when generating - * the summary features or rank features docsum field. - * - * @return pointer to features - * @param docId docid value - **/ - const Value *getFeaturesByDocId(uint32_t docId) const; -}; - -// An even simpler feature container. Used to pass match features around. -struct FeatureValues { - using Value = FeatureSet::Value; - std::vector names; - std::vector values; // values.size() == names.size() * N -}; - -} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/searchreply.h b/searchlib/src/vespa/searchlib/engine/searchreply.h index 8f862d8dcf7..6b0edca3086 100644 --- a/searchlib/src/vespa/searchlib/engine/searchreply.h +++ b/searchlib/src/vespa/searchlib/engine/searchreply.h @@ -6,8 +6,8 @@ #include #include #include -#include #include +#include #include namespace search::engine { @@ -15,6 +15,7 @@ namespace search::engine { class SearchReply { public: + using FeatureValues = vespalib::FeatureValues; using UP = std::unique_ptr; class Hit diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h index 2c644a243c8..a765208cb9e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h @@ -3,9 +3,9 @@ #pragma once #include "getdocsumargs.h" -#include #include #include +#include #include namespace juniper { @@ -48,6 +48,7 @@ protected: class GetDocsumsState { public: + using FeatureSet = vespalib::FeatureSet; const search::attribute::IAttributeVector * getAttribute(size_t index) const { return _attributes[index]; } GetDocsumArgs _args; // from getdocsums request diff --git a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp index bad1ad5a6f3..c5e823bf9f4 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp @@ -5,6 +5,8 @@ #include #include +using vespalib::FeatureSet; + namespace search::docsummary { RankFeaturesDFW::RankFeaturesDFW() = default; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp index a680b01d887..a1b2d6b3af6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp @@ -5,8 +5,7 @@ #include #include -#include -LOG_SETUP(".searchlib.docsummary.summaryfeaturesdfw"); +using vespalib::FeatureSet; namespace search::docsummary { diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp index 6950c90f097..791ec01162f 100644 --- a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp +++ b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp @@ -285,7 +285,7 @@ HitCollectorTest::testFeatureSet() FeatureResolver resolver(rankProgram.get_resolver()); search::StringStringMap renames; renames["bar"] = "qux"; - search::FeatureSet::SP sf = hc.getFeatureSet(rankProgram, resolver, renames); + vespalib::FeatureSet::SP sf = hc.getFeatureSet(rankProgram, resolver, renames); EXPECT_EQUAL(sf->getNames().size(), 3u); EXPECT_EQUAL(sf->getNames()[0], "foo"); diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index 10e6c6aa68a..7b4e3cb0208 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -10,8 +10,8 @@ #include LOG_SETUP(".searchvisitor.hitcollector"); -using search::FeatureSet; using search::fef::MatchData; +using vespalib::FeatureSet; using vdslib::SearchResult; namespace streaming { diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h index 6ce7459adfd..2918f815811 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h @@ -2,13 +2,13 @@ #pragma once -#include #include #include #include #include #include #include +#include namespace search { namespace fef { class FeatureResolver; } } @@ -132,9 +132,9 @@ public: * @param rankProgram the rank program used to calculate all features. * @param resolver feature resolver, gives feature names and values **/ - search::FeatureSet::SP getFeatureSet(IRankProgram &rankProgram, - const search::fef::FeatureResolver &resolver, - const search::StringStringMap &feature_rename_map); + vespalib::FeatureSet::SP getFeatureSet(IRankProgram &rankProgram, + const search::fef::FeatureResolver &resolver, + const search::StringStringMap &feature_rename_map); }; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index ba97a708cc5..01b21edc1ba 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -10,7 +10,7 @@ #include LOG_SETUP(".searchvisitor.rankprocessor"); -using search::FeatureSet; +using vespalib::FeatureSet; using search::fef::FeatureHandle; using search::fef::ITermData; using search::fef::ITermFieldData; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index c541f62646e..c74a2d1e3ee 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -65,7 +65,7 @@ public: void unpackMatchData(uint32_t docId); static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query); void runRankProgram(uint32_t docId); - search::FeatureSet::SP calculateFeatureSet(); + vespalib::FeatureSet::SP calculateFeatureSet(); void fillSearchResult(vdslib::SearchResult & searchResult); const search::fef::MatchData &getMatchData() const { return *_match_data; } void setRankScore(double score) { _score = score; } diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 7dc0c05cfaa..8980bc1f54d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -664,14 +664,14 @@ SearchVisitor::RankController::onCompletedVisiting(vsm::GetDocsumsStateCallback // calculate summary features and set them on the callback object if (!_rankSetup->getSummaryFeatures().empty()) { LOG(debug, "Calculate summary features"); - search::FeatureSet::SP sf = _rankProcessor->calculateFeatureSet(); + vespalib::FeatureSet::SP sf = _rankProcessor->calculateFeatureSet(); docsumsStateCallback.setSummaryFeatures(sf); } // calculate rank features and set them on the callback object if (_dumpFeatures) { LOG(debug, "Calculate rank features"); - search::FeatureSet::SP rf = _dumpProcessor->calculateFeatureSet(); + vespalib::FeatureSet::SP rf = _dumpProcessor->calculateFeatureSet(); docsumsStateCallback.setRankFeatures(rf); } } diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h index 77ed9573e54..ba87ccfef05 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h +++ b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h @@ -5,11 +5,11 @@ #include #include #include -#include #include #include #include #include +#include using search::docsummary::ResultConfig; using search::docsummary::ResultClass; @@ -28,8 +28,8 @@ class IMatchingElementsFiller; class GetDocsumsStateCallback : public search::docsummary::GetDocsumsStateCallback { private: - search::FeatureSet::SP _summaryFeatures; - search::FeatureSet::SP _rankFeatures; + vespalib::FeatureSet::SP _summaryFeatures; + vespalib::FeatureSet::SP _rankFeatures; std::unique_ptr _matching_elements_filler; public: @@ -37,8 +37,8 @@ public: void fillSummaryFeatures(GetDocsumsState& state) override; void fillRankFeatures(GetDocsumsState& state) override; std::unique_ptr fill_matching_elements(const search::MatchingElementsFields& fields) override; - void setSummaryFeatures(const search::FeatureSet::SP & sf) { _summaryFeatures = sf; } - void setRankFeatures(const search::FeatureSet::SP & rf) { _rankFeatures = rf; } + void setSummaryFeatures(const vespalib::FeatureSet::SP & sf) { _summaryFeatures = sf; } + void setRankFeatures(const vespalib::FeatureSet::SP & rf) { _rankFeatures = rf; } void set_matching_elements_filler(std::unique_ptr matching_elements_filler); ~GetDocsumsStateCallback() override; }; diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 8ee3957af32..91365d446c1 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -31,6 +31,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT exceptions.cpp execution_profiler.cpp executor_idle_tracking.cpp + featureset.cpp file_area_freelist.cpp foregroundtaskexecutor.cpp gate.cpp diff --git a/vespalib/src/vespa/vespalib/util/featureset.cpp b/vespalib/src/vespa/vespalib/util/featureset.cpp new file mode 100644 index 00000000000..6ac90461cfb --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/featureset.cpp @@ -0,0 +1,90 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "featureset.h" + +namespace vespalib { + +FeatureSet::FeatureSet() + : _names(), + _docIds(), + _values() +{ +} + +FeatureSet::~FeatureSet() {} + +FeatureSet::FeatureSet(const StringVector &names, uint32_t expectDocs) + : _names(names), + _docIds(), + _values() +{ + _docIds.reserve(expectDocs); + _values.reserve(expectDocs * names.size()); +} + +bool +FeatureSet::equals(const FeatureSet &rhs) const +{ + return ((_docIds == rhs._docIds) && + (_values == rhs._values) && + (_names == rhs._names)); // do names last, as they are most likely to match +} + +uint32_t +FeatureSet::addDocId(uint32_t docId) +{ + _docIds.push_back(docId); + _values.resize(_names.size() * _docIds.size()); + return (_docIds.size() - 1); +} + +bool +FeatureSet::contains(const std::vector &docIds) const +{ + using ITR = std::vector::const_iterator; + ITR myPos = _docIds.begin(); + ITR myEnd = _docIds.end(); + ITR pos = docIds.begin(); + ITR end = docIds.end(); + + for (; pos != end; ++pos) { + while (myPos != myEnd && *myPos < *pos) { + ++myPos; + } + if (myPos == myEnd || *myPos != *pos) { + return false; + } + ++myPos; + } + return true; +} + +FeatureSet::Value * +FeatureSet::getFeaturesByIndex(uint32_t idx) +{ + if (idx >= _docIds.size()) { + return 0; + } + return &(_values[idx * _names.size()]); +} + +const FeatureSet::Value * +FeatureSet::getFeaturesByDocId(uint32_t docId) const +{ + uint32_t low = 0; + uint32_t hi = _docIds.size(); + while (low < hi) { + uint32_t pos = (low + hi) >> 1; + uint32_t val = _docIds[pos]; + if (val < docId) { + low = pos + 1; + } else if (val > docId) { + hi = pos; + } else { + return &(_values[pos * _names.size()]); + } + } + return 0; +} + +} diff --git a/vespalib/src/vespa/vespalib/util/featureset.h b/vespalib/src/vespa/vespalib/util/featureset.h new file mode 100644 index 00000000000..ae7a0c6932f --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/featureset.h @@ -0,0 +1,155 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace vespalib { + +/** + * This class holds information about a set of features for a set of + * documents. + **/ +class FeatureSet +{ +public: + class Value { + private: + std::vector _data; + double _value; + public: + bool operator==(const Value &rhs) const { + return ((_data == rhs._data) && (_value == rhs._value)); + } + bool is_double() const { return _data.empty(); } + bool is_data() const { return !_data.empty(); } + double as_double() const { return _value; } + vespalib::Memory as_data() const { return vespalib::Memory(&_data[0], _data.size()); } + void set_double(double value) { + _data.clear(); + _value = value; + } + void set_data(vespalib::Memory data) { + _data.assign(data.data, data.data + data.size); + _value = 0.0; + } + }; + + using string = vespalib::string; + using StringVector = std::vector; +private: + StringVector _names; + std::vector _docIds; + std::vector _values; + + FeatureSet(const FeatureSet &); + FeatureSet & operator=(const FeatureSet &); + +public: + using SP = std::shared_ptr; + using UP = std::unique_ptr; + + /** + * Create a new object without any feature information. + **/ + FeatureSet(); + ~FeatureSet(); + + /** + * Create a new object that will contain information about the + * given features. + * + * @param names names of all features + * @param expectDocs the number of documents we expect to store information about + **/ + FeatureSet(const StringVector &names, uint32_t expectDocs); + + /** + * Check whether this object is equal to the given object. + * + * @return true if the objects are equal. + **/ + bool equals(const FeatureSet &rhs) const; + + /** + * Obtain the names of all the features tracked by this object. + * + * @return feature names + **/ + const StringVector &getNames() const { return _names; } + + /** + * Obtain the number of features this object contains information + * about. + * + * @return number of features + **/ + uint32_t numFeatures() const { return _names.size(); } + + /** + * Obtain the number of documents this object contains information + * about. + * + * @return number of documents. + **/ + uint32_t numDocs() const { return _docIds.size(); } + + /** + * Add a document to the set of documents this object contains + * information about. Documents must be added in ascending + * order. When a new document is added, all features are + * initialized to 0.0. The return value from this method can be + * used together with the @ref getFeaturesByIndex method to set + * the actual feature values. The ordering among features are + * assumed to be the same as in the name vector passed to the + * constructor. + * + * @return the index of the document just added + * @param docid the id of the document to add + **/ + uint32_t addDocId(uint32_t docid); + + /** + * Check whether this object contains information about the given + * set of documents. The given set of documents must be sorted on + * document id; lowest first. + * + * @return true if this object contains information about all the given documents + * @param docIds the documents we want information about + **/ + bool contains(const std::vector &docIds) const; + + /** + * Obtain the feature values belonging to a document based on the + * index into the internal docid array. This method is intended + * for use only when filling in the feature values during object + * initialization. + * + * @return pointer to features + * @param idx index into docid array + **/ + Value *getFeaturesByIndex(uint32_t idx); + + /** + * Obtain the feature values belonging to a document based on the + * docid value. This method is intended for lookup when generating + * the summary features or rank features docsum field. + * + * @return pointer to features + * @param docId docid value + **/ + const Value *getFeaturesByDocId(uint32_t docId) const; +}; + +// An even simpler feature container. Used to pass match features around. +struct FeatureValues { + using Value = FeatureSet::Value; + std::vector names; + std::vector values; // values.size() == names.size() * N +}; + +} -- cgit v1.2.3