summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-05-04 11:25:46 +0200
committerGitHub <noreply@github.com>2020-05-04 11:25:46 +0200
commit44d195021c1742fdbddfcb6f9afac7fdd993a96a (patch)
tree02a9eafb3b178bcf1aee696c588991812cd09b03
parente3f135fc9ccad48719e462397915c6efeaf450b6 (diff)
parent020de7058e91e2b8e36043e9f46449b0ccf1000a (diff)
Merge pull request #13136 from vespa-engine/toregge/unpack-interleaved-features-for-simple-phrase-search
Unpack interleaved features if they are needed in simple phrase search.
-rw-r--r--searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp43
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_result.h18
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_search.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp21
5 files changed, 75 insertions, 16 deletions
diff --git a/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp
index abaae7e4333..dfe2e2edbd9 100644
--- a/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp
+++ b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp
@@ -43,7 +43,7 @@ class Test : public vespalib::TestApp {
void requireThatIteratorFindsSimplePhrase(bool useBlueprint);
void requireThatIteratorFindsLongPhrase(bool useBlueprint);
void requireThatStrictIteratorFindsNextMatch(bool useBlueprint);
- void requireThatPhrasesAreUnpacked(bool useBlueprint);
+ void requireThatPhrasesAreUnpacked(bool useBlueprint, bool unpack_normal_features, bool unpack_interleaved_features);
void requireThatTermsCanBeEvaluatedInPriorityOrder();
void requireThatBlueprintExposesFieldWithEstimate();
void requireThatBlueprintForcesPositionDataOnChildren();
@@ -63,13 +63,19 @@ Test::Main()
TEST_DO(requireThatIteratorFindsSimplePhrase(false));
TEST_DO(requireThatIteratorFindsLongPhrase(false));
TEST_DO(requireThatStrictIteratorFindsNextMatch(false));
- TEST_DO(requireThatPhrasesAreUnpacked(false));
+ TEST_DO(requireThatPhrasesAreUnpacked(false, true, false));
+ TEST_DO(requireThatPhrasesAreUnpacked(false, true, true));
+ TEST_DO(requireThatPhrasesAreUnpacked(false, false, false));
+ TEST_DO(requireThatPhrasesAreUnpacked(false, false, true));
TEST_DO(requireThatTermsCanBeEvaluatedInPriorityOrder());
TEST_DO(requireThatIteratorFindsSimplePhrase(true));
TEST_DO(requireThatIteratorFindsLongPhrase(true));
TEST_DO(requireThatStrictIteratorFindsNextMatch(true));
- TEST_DO(requireThatPhrasesAreUnpacked(true));
+ TEST_DO(requireThatPhrasesAreUnpacked(true, true, false));
+ TEST_DO(requireThatPhrasesAreUnpacked(true, true, true));
+ TEST_DO(requireThatPhrasesAreUnpacked(true, false, false));
+ TEST_DO(requireThatPhrasesAreUnpacked(true, false, true));
TEST_DO(requireThatBlueprintExposesFieldWithEstimate());
TEST_DO(requireThatBlueprintForcesPositionDataOnChildren());
TEST_DO(requireThatIteratorHonorsFutureDoom());
@@ -107,6 +113,7 @@ public:
void setStrict(bool strict) { _strict = strict; }
void setOrder(const vector<uint32_t> &order) { _order = order; }
const TermFieldMatchData &tmd() const { return *_md->resolveTermField(phrase_handle); }
+ TermFieldMatchData &writable_term_field_match_data() { return *_md->resolveTermField(phrase_handle); }
PhraseSearchTest &addTerm(const string &term, bool last) {
return addTerm(term, FakeResult()
@@ -156,7 +163,10 @@ public:
} else {
search::fef::TermFieldMatchDataArray childMatch;
for (size_t i = 0; i < _children.size(); ++i) {
- childMatch.add(_md->resolveTermField(childHandle(i)));
+ auto *child_term_field_match_data = _md->resolveTermField(childHandle(i));
+ child_term_field_match_data->setNeedInterleavedFeatures(tmd().needs_interleaved_features());
+ child_term_field_match_data->setNeedNormalFeatures(true);
+ childMatch.add(child_term_field_match_data);
}
SimplePhraseSearch::Children children;
for (size_t i = 0; i < _children.size(); ++i) {
@@ -264,21 +274,34 @@ void Test::requireThatStrictIteratorFindsNextMatch(bool useBlueprint) {
EXPECT_TRUE(search->isAtEnd());
}
-void Test::requireThatPhrasesAreUnpacked(bool useBlueprint) {
+void Test::requireThatPhrasesAreUnpacked(bool useBlueprint, bool unpack_normal_features, bool unpack_interleaved_features) {
PhraseSearchTest test;
test.addTerm("foo", FakeResult()
- .doc(doc_match).pos(1).pos(11).pos(21));
+ .doc(doc_match).pos(1).pos(11).pos(21).field_length(30).num_occs(3));
test.addTerm("bar", FakeResult()
- .doc(doc_match).pos(2).pos(16).pos(22));
+ .doc(doc_match).pos(2).pos(16).pos(22).field_length(30).num_occs(3));
+ test.writable_term_field_match_data().setNeedNormalFeatures(unpack_normal_features);
+ test.writable_term_field_match_data().setNeedInterleavedFeatures(unpack_interleaved_features);
test.fetchPostings(useBlueprint);
unique_ptr<SearchIterator> search(test.createSearch(useBlueprint));
EXPECT_TRUE(search->seek(doc_match));
search->unpack(doc_match);
EXPECT_EQUAL(doc_match, test.tmd().getDocId());
- EXPECT_EQUAL(2, std::distance(test.tmd().begin(), test.tmd().end()));
- EXPECT_EQUAL(1u, test.tmd().begin()->getPosition());
- EXPECT_EQUAL(21u, (test.tmd().begin() + 1)->getPosition());
+ if (unpack_normal_features) {
+ EXPECT_EQUAL(2, std::distance(test.tmd().begin(), test.tmd().end()));
+ EXPECT_EQUAL(1u, test.tmd().begin()->getPosition());
+ EXPECT_EQUAL(21u, (test.tmd().begin() + 1)->getPosition());
+ } else {
+ EXPECT_EQUAL(0, std::distance(test.tmd().begin(), test.tmd().end()));
+ }
+ if (unpack_interleaved_features) {
+ EXPECT_EQUAL(2u, test.tmd().getNumOccs());
+ EXPECT_EQUAL(30u, test.tmd().getFieldLength());
+ } else {
+ EXPECT_EQUAL(0u, test.tmd().getNumOccs());
+ EXPECT_EQUAL(0u, test.tmd().getFieldLength());
+ }
}
void Test::requireThatTermsCanBeEvaluatedInPriorityOrder() {
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_result.h b/searchlib/src/vespa/searchlib/queryeval/fake_result.h
index bbae5e027cf..1823ebbd616 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_result.h
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_result.h
@@ -33,11 +33,15 @@ public:
uint32_t docId;
std::vector<Element> elements;
feature_t rawScore;
- Document(uint32_t id) : docId(id), elements(), rawScore(0) {}
+ uint32_t field_length;
+ uint32_t num_occs;
+ Document(uint32_t id) : docId(id), elements(), rawScore(0), field_length(0), num_occs(0) {}
bool operator==(const Document &rhs) const {
return (docId == rhs.docId &&
elements == rhs.elements &&
- rawScore == rhs.rawScore);
+ rawScore == rhs.rawScore &&
+ field_length == rhs.field_length &&
+ num_occs == rhs.num_occs);
}
};
@@ -95,6 +99,16 @@ public:
return *this;
}
+ FakeResult &field_length(uint32_t field_length_) {
+ _documents.back().field_length = field_length_;
+ return *this;
+ }
+
+ FakeResult &num_occs(uint32_t num_occs_) {
+ _documents.back().num_occs = num_occs_;
+ return *this;
+ }
+
bool operator==(const FakeResult &rhs) const {
return _documents == rhs._documents;
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
index 4ee214b0f16..6f2d3cb6b2a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
@@ -47,6 +47,10 @@ FakeSearch::doUnpack(uint32_t docid)
if (is_attr()) {
_tfmda[0]->appendPosition(PosCtx(0, 0, sum_weight, 1));
}
+ if (_tfmda[0]->needs_interleaved_features()) {
+ _tfmda[0]->setNumOccs(doc.num_occs);
+ _tfmda[0]->setFieldLength(doc.field_length);
+ }
}
void
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp
index 2c63c96695a..c52cf6ddae1 100644
--- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp
@@ -65,7 +65,10 @@ SimplePhraseBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmd
for (size_t i = 0; i < _terms.size(); ++i) {
const State &childState = _terms[i]->getState();
assert(childState.numFields() == 1);
- childMatch.add(childState.field(0).resolve(*md));
+ auto *child_term_field_match_data = childState.field(0).resolve(*md);
+ child_term_field_match_data->setNeedInterleavedFeatures(tfmda[0]->needs_interleaved_features());
+ child_term_field_match_data->setNeedNormalFeatures(true);
+ childMatch.add(child_term_field_match_data);
children[i] = _terms[i]->createSearch(*md, strict).release();
order_map.insert(std::make_pair(childState.estimate().estHits, i));
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
index 767cdba7d68..df0dff06582 100644
--- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp
@@ -102,16 +102,31 @@ public:
void fillPositions(TermFieldMatchData &tmd) {
if (_tmds.size() == 1) {
- for (const fef::TermFieldMatchDataPosition & pos : *_tmds[0]) {
- tmd.appendPosition(pos);
+ if (tmd.needs_normal_features()) {
+ for (const fef::TermFieldMatchDataPosition & pos : *_tmds[0]) {
+ tmd.appendPosition(pos);
+ }
+ }
+ if (tmd.needs_interleaved_features()) {
+ tmd.setNumOccs(_tmds[0]->size());
+ tmd.setFieldLength(_tmds[0]->getFieldLength());
}
} else {
+ const bool needs_normal_features = tmd.needs_normal_features();
+ uint32_t num_occs = 0;
while (iterator(_eval_order[0]) != end(_eval_order[0])) {
if (match()) {
- tmd.appendPosition(*iterator(0));
+ if (needs_normal_features) {
+ tmd.appendPosition(*iterator(0));
+ }
+ ++num_occs;
}
++iterator(_eval_order[0]);
}
+ if (tmd.needs_interleaved_features()) {
+ tmd.setNumOccs(num_occs);
+ tmd.setFieldLength(_tmds[0]->getFieldLength());
+ }
}
}
};