diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2020-05-27 21:20:42 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2020-05-27 21:20:42 +0200 |
commit | a9443f824f27e62f179bf3a59818cf89591e65d1 (patch) | |
tree | 7a6e3791c0a3928e4adce4e87dc7cd1846948b16 | |
parent | 5a361cf67e4d7ffa64b8b451c05592d3f31ce3f9 (diff) |
Record need for normal term field match data for phrase splitter input.
3 files changed, 16 insertions, 10 deletions
diff --git a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp index 593b8fb29ce..eb2915c4e90 100644 --- a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp +++ b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp @@ -5,7 +5,7 @@ namespace search::fef { void -PhraseSplitterQueryEnv::considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId) +PhraseSplitterQueryEnv::considerTerm(uint32_t termIdx, const ITermData &term, uint32_t fieldId) { typedef search::fef::ITermFieldRangeAdapter FRA; @@ -19,7 +19,7 @@ PhraseSplitterQueryEnv::considerTerm(uint32_t termIdx, const ITermData &term, st prototype.setPhraseLength(1); prototype.setUniqueId(term.getUniqueId()); prototype.addField(fieldId); - phraseTerms.push_back(PhraseTerm(term, _terms.size(), h)); + _phrase_terms.push_back(PhraseTerm(term, _terms.size(), h)); for (uint32_t i = 0; i < term.getPhraseLength(); ++i) { _terms.push_back(prototype); _termIdxMap.push_back(TermIdx(_terms.size() - 1, true)); @@ -37,15 +37,14 @@ PhraseSplitterQueryEnv::PhraseSplitterQueryEnv(const IQueryEnvironment & queryEn _termIdxMap(), _maxHandle(0), _skipHandles(0), - _field_id(fieldId) + _field_id(fieldId), + _phrase_terms() { TermFieldHandle numHandles = 0; // how many handles existed in underlying data - std::vector<PhraseTerm> phraseTerms; // data about original phrase terms - for (uint32_t i = 0; i < queryEnv.getNumTerms(); ++i) { const ITermData *td = queryEnv.getTerm(i); assert(td != nullptr); - considerTerm(i, *td, phraseTerms, fieldId); + considerTerm(i, *td, fieldId); numHandles += td->numFields(); } @@ -57,8 +56,8 @@ PhraseSplitterQueryEnv::PhraseSplitterQueryEnv(const IQueryEnvironment & queryEn ++term_handle; } - for (uint32_t i = 0; i < phraseTerms.size(); ++i) { - const PhraseTerm &pterm = phraseTerms[i]; + for (uint32_t i = 0; i < _phrase_terms.size(); ++i) { + const PhraseTerm &pterm = _phrase_terms[i]; for (uint32_t j = 0; j < pterm.term.getPhraseLength(); ++j) { const ITermData &splitp_td = _terms[pterm.idx + j]; diff --git a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h index 9bdac380f1a..b2a3d416f5a 100644 --- a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h +++ b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h @@ -25,13 +25,13 @@ private: bool splitted; // whether this term has been splitted or not TermIdx(uint32_t i, bool s) : idx(i), splitted(s) {} }; +public: struct PhraseTerm { const ITermData & term; // for original phrase uint32_t idx; // index into vector of our TermData objects TermFieldHandle orig_handle; PhraseTerm(const ITermData & t, uint32_t i, uint32_t h) : term(t), idx(i), orig_handle(h) {} }; -public: struct HowToCopy { TermFieldHandle orig_handle; TermFieldHandle split_handle; @@ -45,8 +45,9 @@ private: TermFieldHandle _maxHandle; // the largest among original term field handles TermFieldHandle _skipHandles; // how many handles to skip uint32_t _field_id; + std::vector<PhraseTerm> _phrase_terms; // data about original phrase terms - void considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId); + void considerTerm(uint32_t termIdx, const ITermData &term, uint32_t fieldId); public: /** @@ -82,6 +83,7 @@ public: uint32_t get_num_phrase_split_terms() const { return _terms.size(); } uint32_t get_field_id() const { return _field_id; } const std::vector<HowToCopy>& get_copy_info() const { return _copyInfo; } + const std::vector<PhraseTerm>& get_phrase_terms() const { return _phrase_terms; } }; diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp index b80a9c9e085..b07498e8608 100644 --- a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp +++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp @@ -15,6 +15,11 @@ PhraseSplitter::PhraseSplitter(const PhraseSplitterQueryEnv& phrase_splitter_que for (auto & term_match : _termMatches) { term_match.setFieldId(field_id); } + auto &phrase_terms = _phrase_splitter_query_env.get_phrase_terms(); + for (const auto &phrase_term : phrase_terms) { + // Record that we need normal term field match data + (void) phrase_term.term.lookupField(field_id)->getHandle(MatchDataDetails::Normal); + } } PhraseSplitter::~PhraseSplitter() = default; |