aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2020-05-14 23:47:36 +0200
committerTor Egge <Tor.Egge@broadpark.no>2020-05-15 11:57:52 +0200
commita5b49bb9f29cd9724cc538b128901e3b698533f8 (patch)
tree40a410eae3fa17968e0418b73ec56fd49078a4ea
parentd05fbb6d8eae73144cf6b5f4c5eb794f3b157389 (diff)
Unpack interleaved features if they are needed in equiv search.
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp6
-rw-r--r--searchlib/src/tests/queryeval/equiv/equiv_test.cpp106
-rw-r--r--searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp49
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp41
4 files changed, 162 insertions, 40 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index bec1691df23..eb6e49747a1 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -1291,7 +1291,7 @@ TEST("require that children does not optimize when parents refuse them to") {
}
}
-TEST("require_that_unpack_optimization_is_overruled_by_equiv") {
+TEST("require_that_unpack_optimization_is_not_overruled_by_equiv") {
FieldSpecBaseList fields;
fields.add(FieldSpecBase(1, 1));
fields.add(FieldSpecBase(2, 2));
@@ -1322,7 +1322,7 @@ TEST("require_that_unpack_optimization_is_overruled_by_equiv") {
EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
{
const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
- EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>",
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::SelectiveUnpack>",
e.getChildren()[0]->getClassName());
}
@@ -1332,7 +1332,7 @@ TEST("require_that_unpack_optimization_is_overruled_by_equiv") {
EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
{
const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
- EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>",
+ EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::NoUnpack>",
e.getChildren()[0]->getClassName());
}
}
diff --git a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp
index 219735105de..412130ecaab 100644
--- a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp
+++ b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp
@@ -19,7 +19,7 @@ protected:
EquivTest();
~EquivTest();
- void test_equiv(bool strict);
+ void test_equiv(bool strict, bool unpack_normal_features, bool unpack_interleaved_features);
};
EquivTest::EquivTest() = default;
@@ -27,15 +27,15 @@ EquivTest::EquivTest() = default;
EquivTest::~EquivTest() = default;
void
-EquivTest::test_equiv(bool strict)
+EquivTest::test_equiv(bool strict, bool unpack_normal_features, bool unpack_interleaved_features)
{
FakeResult a;
FakeResult b;
FakeResult c;
- a.doc(5).pos(1);
- b.doc(5).pos(2);
- c.doc(5).pos(3).doc(10).pos(4);
+ a.doc(5).pos(1).len(30).field_length(30).num_occs(1);
+ b.doc(5).pos(2).len(30).field_length(30).num_occs(1);
+ c.doc(5).pos(3).len(30).field_length(30).num_occs(1).doc(10).pos(4).len(35).field_length(35).num_occs(1);
MatchDataLayout subLayout;
TermFieldHandle fbh11 = subLayout.allocTermField(1);
@@ -52,6 +52,11 @@ EquivTest::test_equiv(bool strict)
bp->addTerm(std::make_unique<FakeBlueprint>(FieldSpec("bar", 2, fbh22), c), 1.0);
MatchData::UP md = MatchData::makeTestInstance(100, 10);
+ for (uint32_t field_id = 1; field_id <= 2; ++field_id) {
+ TermFieldMatchData &data = *md->resolveTermField(field_id);
+ data.setNeedNormalFeatures(unpack_normal_features);
+ data.setNeedInterleavedFeatures(unpack_interleaved_features);
+ }
bp->fetchPostings(ExecuteInfo::create(strict));
SearchIterator::UP search = bp->createSearch(*md, strict);
search->initFullRange();
@@ -69,25 +74,43 @@ EquivTest::test_equiv(bool strict)
EXPECT_EQ(1u, data.getFieldId());
EXPECT_EQ(5u, data.getDocId());
FieldPositionsIterator itr = data.getIterator();
- EXPECT_EQ(1u, itr.size());
- ASSERT_TRUE(itr.valid());
- EXPECT_EQ(1u, itr.getPosition());
- itr.next();
+ if (unpack_normal_features) {
+ EXPECT_EQ(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQ(1u, itr.getPosition());
+ itr.next();
+ }
EXPECT_TRUE(!itr.valid());
+ if (unpack_interleaved_features) {
+ EXPECT_EQ(1u, data.getNumOccs());
+ EXPECT_EQ(30u, data.getFieldLength());
+ } else {
+ EXPECT_EQ(0u, data.getNumOccs());
+ EXPECT_EQ(0u, data.getFieldLength());
+ }
}
{
TermFieldMatchData &data = *md->resolveTermField(2);
EXPECT_EQ(2u, data.getFieldId());
EXPECT_EQ(5u, data.getDocId());
FieldPositionsIterator itr = data.getIterator();
- EXPECT_EQ(2u, itr.size());
- ASSERT_TRUE(itr.valid());
- EXPECT_EQ(2u, itr.getPosition());
- itr.next();
- ASSERT_TRUE(itr.valid());
- EXPECT_EQ(3u, itr.getPosition());
- itr.next();
+ if (unpack_normal_features) {
+ EXPECT_EQ(2u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQ(2u, itr.getPosition());
+ itr.next();
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQ(3u, itr.getPosition());
+ itr.next();
+ }
EXPECT_TRUE(!itr.valid());
+ if (unpack_interleaved_features) {
+ EXPECT_EQ(2u, data.getNumOccs());
+ EXPECT_EQ(30u, data.getFieldLength());
+ } else {
+ EXPECT_EQ(0u, data.getNumOccs());
+ EXPECT_EQ(0u, data.getFieldLength());
+ }
}
}
EXPECT_TRUE(!search->seek(7));
@@ -104,11 +127,20 @@ EquivTest::test_equiv(bool strict)
EXPECT_EQ(2u, data.getFieldId());
EXPECT_EQ(10u, data.getDocId());
FieldPositionsIterator itr = data.getIterator();
- EXPECT_EQ(1u, itr.size());
- ASSERT_TRUE(itr.valid());
- EXPECT_EQ(4u, itr.getPosition());
- itr.next();
+ if (unpack_normal_features) {
+ EXPECT_EQ(1u, itr.size());
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQ(4u, itr.getPosition());
+ itr.next();
+ }
EXPECT_TRUE(!itr.valid());
+ if (unpack_interleaved_features) {
+ EXPECT_EQ(1u, data.getNumOccs());
+ EXPECT_EQ(35u, data.getFieldLength());
+ } else {
+ EXPECT_EQ(0u, data.getNumOccs());
+ EXPECT_EQ(0u, data.getFieldLength());
+ }
}
}
EXPECT_TRUE(!search->seek(13));
@@ -122,12 +154,42 @@ EquivTest::test_equiv(bool strict)
TEST_F(EquivTest, nonstrict)
{
- test_equiv(false);
+ test_equiv(false, true, false);
}
TEST_F(EquivTest, strict)
{
- test_equiv(true);
+ test_equiv(true, true, false);
+}
+
+TEST_F(EquivTest, nonstrict_no_normal_no_interleaved)
+{
+ test_equiv(false, false, false);
+}
+
+TEST_F(EquivTest, strict_no_normal_no_interleaved)
+{
+ test_equiv(true, false, false);
+}
+
+TEST_F(EquivTest, nonstrict_no_normal_interleaved)
+{
+ test_equiv(false, false, true);
+}
+
+TEST_F(EquivTest, strict_no_normal_interleaved)
+{
+ test_equiv(true, false, true);
+}
+
+TEST_F(EquivTest, nonstrict_normal_interleaved)
+{
+ test_equiv(false, true, true);
+}
+
+TEST_F(EquivTest, strict_normal_interleaved)
+{
+ test_equiv(true, true, true);
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp
index 97cb829e30c..973e11fc0d2 100644
--- a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp
+++ b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp
@@ -41,31 +41,52 @@ TermMatchDataMerger::merge(uint32_t docid,
{
_scratch.clear();
bool wasMatch = false;
+ bool needs_normal_features = out.needs_normal_features();
+ bool needs_interleaved_features = out.needs_interleaved_features();
+ uint32_t num_occs = 0u;
+ uint16_t field_length = 0u;
for (size_t i = 0; i < in.size(); ++i) {
const TermFieldMatchData *md = in[i].matchData;
if (md->getDocId() == docid) {
- for (const TermFieldMatchDataPosition &iter : *md) {
- double exactness = in[i].exactness * iter.getMatchExactness();
- _scratch.push_back(iter);
- _scratch.back().setMatchExactness(exactness);
+ if (needs_normal_features) {
+ for (const TermFieldMatchDataPosition &iter : *md) {
+ double exactness = in[i].exactness * iter.getMatchExactness();
+ _scratch.push_back(iter);
+ _scratch.back().setMatchExactness(exactness);
+ }
+ }
+ if (needs_interleaved_features) {
+ num_occs += md->getNumOccs();
+ field_length = std::max(field_length, md->getFieldLength());
}
wasMatch = true;
}
}
if (wasMatch) {
out.reset(docid);
- if (_scratch.size() > 0) {
- std::sort(_scratch.begin(), _scratch.end(),
- TermFieldMatchDataPosition::compareWithExactness);
- TermFieldMatchDataPosition prev = _scratch[0];
- for (size_t i = 1; i < _scratch.size(); ++i) {
- const TermFieldMatchDataPosition &curr = _scratch[i];
- if (prev.key() < curr.key()) {
- out.appendPosition(prev);
- prev = curr;
+ if (needs_normal_features) {
+ num_occs = 0;
+ if (_scratch.size() > 0) {
+ std::sort(_scratch.begin(), _scratch.end(),
+ TermFieldMatchDataPosition::compareWithExactness);
+ TermFieldMatchDataPosition prev = _scratch[0];
+ for (size_t i = 1; i < _scratch.size(); ++i) {
+ const TermFieldMatchDataPosition &curr = _scratch[i];
+ if (prev.key() < curr.key()) {
+ out.appendPosition(prev);
+ prev = curr;
+ ++num_occs;
+ }
}
+ out.appendPosition(prev);
+ ++num_occs;
}
- out.appendPosition(prev);
+ }
+ if (needs_interleaved_features) {
+ constexpr uint32_t max_num_occs = std::numeric_limits<uint16_t>::max();
+ uint16_t capped_num_occs = std::min(num_occs, max_num_occs);
+ out.setNumOccs(std::min(capped_num_occs, field_length));
+ out.setFieldLength(field_length);
}
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp
index 08a05b25772..cf378c95487 100644
--- a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp
@@ -4,9 +4,42 @@
#include "equivsearch.h"
#include "field_spec.hpp"
#include <vespa/vespalib/objects/visit.hpp>
+#include <vespa/vespalib/stllike/hash_map.hpp>
namespace search::queryeval {
+namespace {
+
+class UnpackNeed
+{
+ bool _needs_normal_features;
+ bool _needs_interleaved_features;
+public:
+ UnpackNeed()
+ : _needs_normal_features(false),
+ _needs_interleaved_features(false)
+ {
+ }
+
+ void observe(const fef::TermFieldMatchData &output)
+ {
+ if (output.needs_normal_features()) {
+ _needs_normal_features = true;
+ }
+ if (output.needs_interleaved_features()) {
+ _needs_interleaved_features = true;
+ }
+ }
+
+ void notify(fef::TermFieldMatchData &input) const
+ {
+ input.setNeedNormalFeatures(_needs_normal_features);
+ input.setNeedInterleavedFeatures(_needs_interleaved_features);
+ }
+};
+
+};
+
EquivBlueprint::EquivBlueprint(const FieldSpecBaseList &fields,
fef::MatchDataLayout subtree_mdl)
: ComplexLeafBlueprint(fields),
@@ -26,10 +59,16 @@ EquivBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &outputs, bo
fef::MatchData::UP md = _layout.createMatchData();
MultiSearch::Children children(_terms.size());
fef::TermMatchDataMerger::Inputs childMatch;
+ vespalib::hash_map<uint16_t, UnpackNeed> unpack_needs(outputs.size());
+ for (size_t i = 0; i < outputs.size(); ++i) {
+ unpack_needs[outputs[i]->getFieldId()].observe(*outputs[i]);
+ }
for (size_t i = 0; i < _terms.size(); ++i) {
const State &childState = _terms[i]->getState();
for (size_t j = 0; j < childState.numFields(); ++j) {
- childMatch.emplace_back(childState.field(j).resolve(*md), _exactness[i]);
+ auto *child_term_field_match_data = childState.field(j).resolve(*md);
+ unpack_needs[child_term_field_match_data->getFieldId()].notify(*child_term_field_match_data);
+ childMatch.emplace_back(child_term_field_match_data, _exactness[i]);
}
children[i] = _terms[i]->createSearch(*md, strict).release();
}