diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-02-06 14:38:33 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-02-06 14:38:33 +0100 |
commit | 624bb3303a021ab3f3065c8d2be3d399054a82cf (patch) | |
tree | 90ee218ab735fa38ac1a5cc6c9bbe8afef0d68bb /streamingvisitors | |
parent | 9ae25d8c35f7a15fdb8f45eee4ca34afe8cd483a (diff) |
Handle search::streaming::PhraseQueryNode as a leaf in the query tree.
Diffstat (limited to 'streamingvisitors')
5 files changed, 42 insertions, 156 deletions
diff --git a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp index 70b863e540b..2a4b9e1f869 100644 --- a/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp +++ b/streamingvisitors/src/tests/querywrapper/querywrapper_test.cpp @@ -26,7 +26,6 @@ void QueryWrapperTest::testQueryWrapper() { QueryNodeResultFactory empty; - PhraseQueryNode * null = NULL; { QueryBuilder<SimpleQueryNodeTypes> builder; builder.addAnd(3); @@ -48,42 +47,16 @@ QueryWrapperTest::testQueryWrapper() QueryTermList terms; q.getLeaves(terms); - ASSERT_TRUE(tl.size() == 5 && terms.size() == 5); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQUAL(tl[i].getTerm(), terms[i]); + ASSERT_TRUE(tl.size() == 3 && terms.size() == 3); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQUAL(tl[i], terms[i]); std::cout << "t[" << i << "]:" << terms[i] << std::endl; + auto phrase = dynamic_cast<PhraseQueryNode*>(terms[i]); + EXPECT_EQUAL(i == 1, phrase != nullptr); + if (i == 1) { + EXPECT_EQUAL(3u, phrase->get_terms().size()); + } } - - QueryNodeRefList phrases; - q.getPhrases(phrases); - for (size_t i = 0; i < phrases.size(); ++i) { - std::cout << "p[" << i << "]:" << phrases[i] << std::endl; - } - EXPECT_EQUAL(phrases.size(), 1u); - ASSERT_TRUE(phrases.size() == 1); - EXPECT_EQUAL(tl[0].getParent(), null); - EXPECT_EQUAL(tl[1].getParent(), phrases[0]); - EXPECT_EQUAL(tl[2].getParent(), phrases[0]); - EXPECT_EQUAL(tl[3].getParent(), phrases[0]); - EXPECT_EQUAL(tl[4].getParent(), null); - - EXPECT_EQUAL(tl[0].getIndex(), 0u); - EXPECT_EQUAL(tl[1].getIndex(), 0u); - EXPECT_EQUAL(tl[2].getIndex(), 1u); - EXPECT_EQUAL(tl[3].getIndex(), 2u); - EXPECT_EQUAL(tl[4].getIndex(), 0u); - - EXPECT_TRUE(!tl[0].isFirstPhraseTerm()); - EXPECT_TRUE( tl[1].isFirstPhraseTerm()); - EXPECT_TRUE(!tl[2].isFirstPhraseTerm()); - EXPECT_TRUE(!tl[3].isFirstPhraseTerm()); - EXPECT_TRUE(!tl[4].isFirstPhraseTerm()); - - EXPECT_TRUE(!tl[0].isPhraseTerm()); - EXPECT_TRUE( tl[1].isPhraseTerm()); - EXPECT_TRUE( tl[2].isPhraseTerm()); - EXPECT_TRUE( tl[3].isPhraseTerm()); - EXPECT_TRUE(!tl[4].isPhraseTerm()); } } diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index 0abff37d622..a4a4b4e696f 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -67,7 +67,7 @@ RankProcessorTest::test_unpack_match_data_for_term_node(bool interleaved_feature build_query(builder); auto& term_list = _query_wrapper->getTermList(); EXPECT_EQ(1u, term_list.size()); - auto node = dynamic_cast<QueryTerm*>(term_list.front().getTerm()); + auto node = dynamic_cast<QueryTerm*>(term_list.front()); EXPECT_NE(nullptr, node); auto& qtd = static_cast<QueryTermData &>(node->getQueryItem()); auto& td = qtd.getTermData(); @@ -132,7 +132,7 @@ TEST_F(RankProcessorTest, unpack_match_data_for_nearest_neighbor_query_node) build_query(builder); auto& term_list = _query_wrapper->getTermList(); EXPECT_EQ(1u, term_list.size()); - auto node = dynamic_cast<NearestNeighborQueryNode*>(term_list.front().getTerm()); + auto node = dynamic_cast<NearestNeighborQueryNode*>(term_list.front()); EXPECT_NE(nullptr, node); MockRawScoreCalculator calc; node->set_raw_score_calc(&calc); diff --git a/streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp b/streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp index 78229f401ad..aa0699a6552 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/querywrapper.cpp @@ -6,42 +6,10 @@ using namespace search::streaming; namespace streaming { -QueryWrapper::PhraseList::PhraseList(Query & query) - : _phrases() +QueryWrapper::QueryWrapper(Query & query) + : _termList() { - QueryNodeRefList phrases; - query.getPhrases(phrases); - for (size_t i = 0; i < phrases.size(); ++i) { - _phrases.push_back(static_cast<PhraseQueryNode *>(phrases[i])); - } -} - -PhraseQueryNode * -QueryWrapper::PhraseList::findPhrase(QueryTerm * term, size_t & index) -{ - for (size_t i = 0; i < _phrases.size(); ++i) { - auto& terms = _phrases[i]->get_terms(); - for (size_t j = 0; j < terms.size(); ++j) { - if (terms[j].get() == term) { - index = j; - return _phrases[i]; - } - } - } - return nullptr; -} - -QueryWrapper::QueryWrapper(Query & query) : - _phraseList(query), - _termList() -{ - QueryTermList leaves; - query.getLeaves(leaves); - for (size_t i = 0; i < leaves.size(); ++i) { - size_t index = 0; - PhraseQueryNode * parent = _phraseList.findPhrase(leaves[i], index); - _termList.push_back(Term(leaves[i], parent, index)); - } + query.getLeaves(_termList); } QueryWrapper::~QueryWrapper() = default; diff --git a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h index 420ff215833..6134bf27d71 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querywrapper.h +++ b/streamingvisitors/src/vespa/searchvisitor/querywrapper.h @@ -14,46 +14,9 @@ namespace streaming { class QueryWrapper { public: - class PhraseList { - private: - std::vector<search::streaming::PhraseQueryNode *> _phrases; - - public: - PhraseList(search::streaming::Query & query); - search::streaming::PhraseQueryNode * findPhrase(search::streaming::QueryTerm * term, size_t & index); - }; - - class Term { - private: - search::streaming::QueryTerm * _term; - search::streaming::PhraseQueryNode * _parent; - size_t _index; - - public: - Term() : - _term(nullptr), - _parent(nullptr), - _index(0) - { - } - Term(search::streaming::QueryTerm * term, search::streaming::PhraseQueryNode * parent, size_t index) : - _term(term), - _parent(parent), - _index(index) - { - } - search::streaming::QueryTerm * getTerm() { return _term; } - search::streaming::PhraseQueryNode * getParent() { return _parent; } - size_t getIndex() const { return _index; } - bool isPhraseTerm() const { return _parent != nullptr; } - bool isFirstPhraseTerm() const { return isPhraseTerm() && getIndex() == 0; } - bool isGeoPosTerm() const { return (_term != nullptr) && _term->isGeoLoc(); } - }; - - using TermList = std::vector<Term>; + using TermList = search::streaming::QueryTermList; private: - PhraseList _phraseList; TermList _termList; public: diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index bc78c24ba1b..3449df57513 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -61,46 +61,36 @@ RankProcessor::initQueryEnvironment() QueryWrapper::TermList & terms = _query.getTermList(); for (auto& term : terms) { - if (term.isGeoPosTerm()) { - const vespalib::string & fieldName = term.getTerm()->index(); - const vespalib::string & locStr = term.getTerm()->getTermString(); + if (term->isGeoLoc()) { + const vespalib::string & fieldName = term->index(); + const vespalib::string & locStr = term->getTermString(); _queryEnv.addGeoLocation(fieldName, locStr); } - if (!term.isPhraseTerm() || term.isFirstPhraseTerm()) { // register 1 term data per phrase - QueryTermData & qtd = dynamic_cast<QueryTermData &>(term.getTerm()->getQueryItem()); - - qtd.getTermData().setWeight(term.getTerm()->weight()); - qtd.getTermData().setUniqueId(term.getTerm()->uniqueId()); - if (term.isFirstPhraseTerm()) { - qtd.getTermData().setPhraseLength(term.getParent()->width()); - } else { - qtd.getTermData().setPhraseLength(1); - } - auto* nn_term = term.getTerm()->as_nearest_neighbor_query_node(); - if (nn_term != nullptr) { - qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name()); - } + QueryTermData & qtd = dynamic_cast<QueryTermData &>(term->getQueryItem()); + + qtd.getTermData().setWeight(term->weight()); + qtd.getTermData().setUniqueId(term->uniqueId()); + qtd.getTermData().setPhraseLength(term->width()); + auto* nn_term = term->as_nearest_neighbor_query_node(); + if (nn_term != nullptr) { + qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name()); + } - vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.getTerm()->index()); - const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); - if (view != nullptr) { - for (auto field_id : *view) { - qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); - } - } else { - LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", - getIndexName(term.getTerm()->index(), expandedIndexName).c_str()); + vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term->index()); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + if (view != nullptr) { + for (auto field_id : *view) { + qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); } - - LOG(debug, "Setup query term '%s:%s' (%s)", - getIndexName(term.getTerm()->index(), expandedIndexName).c_str(), - term.getTerm()->getTerm(), - term.isFirstPhraseTerm() ? "phrase" : "term"); - _queryEnv.addTerm(&qtd.getTermData()); } else { - LOG(debug, "Ignore query term '%s:%s' (part of phrase)", - term.getTerm()->index().c_str(), term.getTerm()->getTerm()); + LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", + getIndexName(term->index(), expandedIndexName).c_str()); } + + LOG(debug, "Setup query term '%s:%s'", + getIndexName(term->index(), expandedIndexName).c_str(), + term->getTerm()); + _queryEnv.addTerm(&qtd.getTermData()); } _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); _match_data = _mdLayout.createMatchData(); @@ -257,18 +247,10 @@ RankProcessor::unpackMatchData(uint32_t docId) void RankProcessor::unpack_match_data(uint32_t docid, MatchData &matchData, QueryWrapper& query) { - for (QueryWrapper::Term & term: query.getTermList()) { - if (!term.isPhraseTerm() || term.isFirstPhraseTerm()) { // consider 1 term data per phrase - bool isPhrase = term.isFirstPhraseTerm(); - QueryTermData & qtd = static_cast<QueryTermData &>(term.getTerm()->getQueryItem()); - const ITermData &td = qtd.getTermData(); - - if (isPhrase) { - term.getParent()->unpack_match_data(docid, td, matchData); - } else { - term.getTerm()->unpack_match_data(docid, td, matchData); - } - } + for (auto& term : query.getTermList()) { + QueryTermData & qtd = static_cast<QueryTermData &>(term->getQueryItem()); + const ITermData &td = qtd.getTermData(); + term->unpack_match_data(docid, td, matchData); } } |