summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-05-27 22:36:15 +0200
committerGitHub <noreply@github.com>2020-05-27 22:36:15 +0200
commiteb8322ea5255be916cff8b5e880ff8ddb73ab27c (patch)
tree7a6e3791c0a3928e4adce4e87dc7cd1846948b16
parent5a361cf67e4d7ffa64b8b451c05592d3f31ce3f9 (diff)
parenta9443f824f27e62f179bf3a59818cf89591e65d1 (diff)
Merge pull request #13408 from vespa-engine/toregge/record-need-for-normal-term-field-match-data-for-phrase-splitter-input
Record need for normal term field match data for phrase splitter input.
-rw-r--r--searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h6
-rw-r--r--searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp5
3 files changed, 16 insertions, 10 deletions
diff --git a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp
index 593b8fb29ce..eb2915c4e90 100644
--- a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp
+++ b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.cpp
@@ -5,7 +5,7 @@
namespace search::fef {
void
-PhraseSplitterQueryEnv::considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId)
+PhraseSplitterQueryEnv::considerTerm(uint32_t termIdx, const ITermData &term, uint32_t fieldId)
{
typedef search::fef::ITermFieldRangeAdapter FRA;
@@ -19,7 +19,7 @@ PhraseSplitterQueryEnv::considerTerm(uint32_t termIdx, const ITermData &term, st
prototype.setPhraseLength(1);
prototype.setUniqueId(term.getUniqueId());
prototype.addField(fieldId);
- phraseTerms.push_back(PhraseTerm(term, _terms.size(), h));
+ _phrase_terms.push_back(PhraseTerm(term, _terms.size(), h));
for (uint32_t i = 0; i < term.getPhraseLength(); ++i) {
_terms.push_back(prototype);
_termIdxMap.push_back(TermIdx(_terms.size() - 1, true));
@@ -37,15 +37,14 @@ PhraseSplitterQueryEnv::PhraseSplitterQueryEnv(const IQueryEnvironment & queryEn
_termIdxMap(),
_maxHandle(0),
_skipHandles(0),
- _field_id(fieldId)
+ _field_id(fieldId),
+ _phrase_terms()
{
TermFieldHandle numHandles = 0; // how many handles existed in underlying data
- std::vector<PhraseTerm> phraseTerms; // data about original phrase terms
-
for (uint32_t i = 0; i < queryEnv.getNumTerms(); ++i) {
const ITermData *td = queryEnv.getTerm(i);
assert(td != nullptr);
- considerTerm(i, *td, phraseTerms, fieldId);
+ considerTerm(i, *td, fieldId);
numHandles += td->numFields();
}
@@ -57,8 +56,8 @@ PhraseSplitterQueryEnv::PhraseSplitterQueryEnv(const IQueryEnvironment & queryEn
++term_handle;
}
- for (uint32_t i = 0; i < phraseTerms.size(); ++i) {
- const PhraseTerm &pterm = phraseTerms[i];
+ for (uint32_t i = 0; i < _phrase_terms.size(); ++i) {
+ const PhraseTerm &pterm = _phrase_terms[i];
for (uint32_t j = 0; j < pterm.term.getPhraseLength(); ++j) {
const ITermData &splitp_td = _terms[pterm.idx + j];
diff --git a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h
index 9bdac380f1a..b2a3d416f5a 100644
--- a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h
+++ b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h
@@ -25,13 +25,13 @@ private:
bool splitted; // whether this term has been splitted or not
TermIdx(uint32_t i, bool s) : idx(i), splitted(s) {}
};
+public:
struct PhraseTerm {
const ITermData & term; // for original phrase
uint32_t idx; // index into vector of our TermData objects
TermFieldHandle orig_handle;
PhraseTerm(const ITermData & t, uint32_t i, uint32_t h) : term(t), idx(i), orig_handle(h) {}
};
-public:
struct HowToCopy {
TermFieldHandle orig_handle;
TermFieldHandle split_handle;
@@ -45,8 +45,9 @@ private:
TermFieldHandle _maxHandle; // the largest among original term field handles
TermFieldHandle _skipHandles; // how many handles to skip
uint32_t _field_id;
+ std::vector<PhraseTerm> _phrase_terms; // data about original phrase terms
- void considerTerm(uint32_t termIdx, const ITermData &term, std::vector<PhraseTerm> &phraseTerms, uint32_t fieldId);
+ void considerTerm(uint32_t termIdx, const ITermData &term, uint32_t fieldId);
public:
/**
@@ -82,6 +83,7 @@ public:
uint32_t get_num_phrase_split_terms() const { return _terms.size(); }
uint32_t get_field_id() const { return _field_id; }
const std::vector<HowToCopy>& get_copy_info() const { return _copyInfo; }
+ const std::vector<PhraseTerm>& get_phrase_terms() const { return _phrase_terms; }
};
diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
index b80a9c9e085..b07498e8608 100644
--- a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
+++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp
@@ -15,6 +15,11 @@ PhraseSplitter::PhraseSplitter(const PhraseSplitterQueryEnv& phrase_splitter_que
for (auto & term_match : _termMatches) {
term_match.setFieldId(field_id);
}
+ auto &phrase_terms = _phrase_splitter_query_env.get_phrase_terms();
+ for (const auto &phrase_term : phrase_terms) {
+ // Record that we need normal term field match data
+ (void) phrase_term.term.lookupField(field_id)->getHandle(MatchDataDetails::Normal);
+ }
}
PhraseSplitter::~PhraseSplitter() = default;