summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2018-05-29 23:05:00 +0200
committerGitHub <noreply@github.com>2018-05-29 23:05:00 +0200
commit8df85470c9d73c29132c4429549af1a453a95c02 (patch)
tree72b2c1080b7446a6b46357f089e0efdb597dc7f7
parent10019f0ca6c04f8959c06acf8919b0fce212f5cb (diff)
parentcbf48b1199f094e3643eb05cb44d81ccfc91e0b0 (diff)
Merge pull request #5992 from vespa-engine/havardpe/same-element-iterator
SameElement blueprint and search iterator
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/queryeval/same_element/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/queryeval/same_element/same_element_test.cpp99
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h43
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp117
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_search.h44
8 files changed, 396 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 3b321f4a12f..6e4a3ef1e1f 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -194,6 +194,7 @@ vespa_define_module(
src/tests/queryeval/multibitvectoriterator
src/tests/queryeval/parallel_weak_and
src/tests/queryeval/predicate
+ src/tests/queryeval/same_element
src/tests/queryeval/simple_phrase
src/tests/queryeval/sourceblender
src/tests/queryeval/sparse_vector_benchmark
diff --git a/searchlib/src/tests/queryeval/same_element/CMakeLists.txt b/searchlib/src/tests/queryeval/same_element/CMakeLists.txt
new file mode 100644
index 00000000000..97034c53376
--- /dev/null
+++ b/searchlib/src/tests/queryeval/same_element/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_same_element_test_app TEST
+ SOURCES
+ same_element_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_same_element_test_app COMMAND searchlib_same_element_test_app)
diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
new file mode 100644
index 00000000000..e0c20949c8e
--- /dev/null
+++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
@@ -0,0 +1,99 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/same_element_blueprint.h>
+
+using namespace search::fef;
+using namespace search::queryeval;
+
+std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResult> &children) {
+ auto result = std::make_unique<SameElementBlueprint>();
+ for (size_t i = 0; i < children.size(); ++i) {
+ uint32_t field_id = i;
+ vespalib::string field_name = vespalib::make_string("f%u", field_id);
+ FieldSpec field = result->getNextChildField(field_name, field_id);
+ result->addTerm(std::make_unique<FakeBlueprint>(field, children[i]));
+ }
+ return result;
+}
+
+Blueprint::UP finalize(Blueprint::UP bp, bool strict) {
+ Blueprint::UP result = Blueprint::optimize(std::move(bp));
+ result->fetchPostings(strict);
+ result->freeze();
+ return result;
+}
+
+SimpleResult find_matches(const std::vector<FakeResult> &children) {
+ auto md = MatchData::makeTestInstance(0, 0);
+ auto bp = finalize(make_blueprint(children), false);
+ auto search = bp->createSearch(*md, false);
+ return SimpleResult().search(*search, 1000);
+}
+
+FakeResult make_result(const std::vector<std::pair<uint32_t,std::vector<uint32_t> > > &match_data) {
+ FakeResult result;
+ uint32_t pos_should_be_ignored = 0;
+ for (const auto &doc: match_data) {
+ result.doc(doc.first);
+ for (const auto &elem: doc.second) {
+ result.elem(elem).pos(++pos_should_be_ignored);
+ }
+ }
+ return result;
+}
+
+TEST("require that simple match can be found") {
+ auto a = make_result({{5, {1,3,7}}});
+ auto b = make_result({{5, {3,5,10}}});
+ SimpleResult result = find_matches({a, b});
+ SimpleResult expect({5});
+ EXPECT_EQUAL(result, expect);
+}
+
+TEST("require that children must match within same element") {
+ auto a = make_result({{5, {1,3,7}}});
+ auto b = make_result({{5, {2,5,10}}});
+ SimpleResult result = find_matches({a, b});
+ SimpleResult expect({});
+ EXPECT_EQUAL(result, expect);
+}
+
+TEST("require that strict iterator seeks to next hit") {
+ auto md = MatchData::makeTestInstance(0, 0);
+ auto a = make_result({{5, {1,2}}, {7, {1,2}}, {8, {1,2}}, {9, {1,2}}});
+ auto b = make_result({{5, {3}}, {6, {1,2}}, {7, {2,4}}, {9, {1}}});
+ auto bp = finalize(make_blueprint({a,b}), true);
+ auto search = bp->createSearch(*md, true);
+ search->initRange(1, 1000);
+ EXPECT_LESS(search->getDocId(), 1u);
+ EXPECT_TRUE(!search->seek(1));
+ EXPECT_EQUAL(search->getDocId(), 7u);
+ EXPECT_TRUE(search->seek(9));
+ EXPECT_EQUAL(search->getDocId(), 9u);
+ EXPECT_TRUE(!search->seek(10));
+ EXPECT_TRUE(search->isAtEnd());
+}
+
+TEST("require that results are estimated appropriately") {
+ auto a = make_result({{5, {0}}, {5, {0}}, {5, {0}}});
+ auto b = make_result({{5, {0}}, {5, {0}}});
+ auto c = make_result({{5, {0}}, {5, {0}}, {5, {0}}, {5, {0}}});
+ auto bp = finalize(make_blueprint({a,b,c}), true);
+ EXPECT_EQUAL(bp->getState().estimate().estHits, 2u);
+}
+
+TEST("require that children are sorted") {
+ auto a = make_result({{5, {0}}, {5, {0}}, {5, {0}}});
+ auto b = make_result({{5, {0}}, {5, {0}}});
+ auto c = make_result({{5, {0}}, {5, {0}}, {5, {0}}, {5, {0}}});
+ auto bp = finalize(make_blueprint({a,b,c}), true);
+ EXPECT_EQUAL(dynamic_cast<SameElementBlueprint&>(*bp).terms()[0]->getState().estimate().estHits, 2u);
+ EXPECT_EQUAL(dynamic_cast<SameElementBlueprint&>(*bp).terms()[1]->getState().estimate().estHits, 3u);
+ EXPECT_EQUAL(dynamic_cast<SameElementBlueprint&>(*bp).terms()[2]->getState().estimate().estHits, 4u);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
index 683de780107..ecda6d6d6ef 100644
--- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -33,6 +33,8 @@ vespa_add_library(searchlib_queryeval OBJECT
predicate_blueprint.cpp
predicate_search.cpp
ranksearch.cpp
+ same_element_blueprint.cpp
+ same_element_search.cpp
searchable.cpp
searchiterator.cpp
simple_phrase_blueprint.cpp
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
new file mode 100644
index 00000000000..a563a288396
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
@@ -0,0 +1,82 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "same_element_blueprint.h"
+#include "same_element_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/vespalib/objects/visit.hpp>
+#include <algorithm>
+#include <map>
+
+namespace search::queryeval {
+
+SameElementBlueprint::SameElementBlueprint()
+ : ComplexLeafBlueprint(FieldSpecBaseList()),
+ _estimate(),
+ _layout(),
+ _terms()
+{
+}
+
+FieldSpec
+SameElementBlueprint::getNextChildField(const vespalib::string &field_name, uint32_t field_id)
+{
+ return FieldSpec(field_name, field_id, _layout.allocTermField(field_id), false);
+}
+
+void
+SameElementBlueprint::addTerm(Blueprint::UP term)
+{
+ const State &childState = term->getState();
+ assert(childState.numFields() == 1);
+ HitEstimate childEst = childState.estimate();
+ if (_terms.empty() || (childEst < _estimate)) {
+ _estimate = childEst;
+ setEstimate(_estimate);
+ }
+ _terms.push_back(std::move(term));
+}
+
+void
+SameElementBlueprint::optimize_self()
+{
+ std::sort(_terms.begin(), _terms.end(),
+ [](const auto &a, const auto &b)
+ {
+ return (a->getState().estimate() < b->getState().estimate());
+ });
+}
+
+void
+SameElementBlueprint::fetchPostings(bool strict)
+{
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ _terms[i]->fetchPostings(strict && (i == 0));
+ }
+}
+
+SearchIterator::UP
+SameElementBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const
+{
+ (void) tfmda;
+ assert(!tfmda.valid());
+ fef::MatchData::UP md = _layout.createMatchData();
+ search::fef::TermFieldMatchDataArray childMatch;
+ std::vector<SearchIterator::UP> children(_terms.size());
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ const State &childState = _terms[i]->getState();
+ assert(childState.numFields() == 1);
+ childMatch.add(childState.field(0).resolve(*md));
+ children[i] = _terms[i]->createSearch(*md, (strict && (i == 0)));
+ }
+ return std::make_unique<SameElementSearch>(std::move(md), std::move(children), childMatch, strict);
+}
+
+void
+SameElementBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ ComplexLeafBlueprint::visitMembers(visitor);
+ visit(visitor, "terms", _terms);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h
new file mode 100644
index 00000000000..050a2bc31d4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h
@@ -0,0 +1,43 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchable.h"
+#include <vespa/searchlib/fef/matchdatalayout.h>
+
+namespace search::fef { class TermFieldMatchData; }
+
+namespace search::queryeval {
+
+class SameElementBlueprint : public ComplexLeafBlueprint
+{
+private:
+ HitEstimate _estimate;
+ fef::MatchDataLayout _layout;
+ std::vector<Blueprint::UP> _terms;
+
+public:
+ SameElementBlueprint();
+ SameElementBlueprint(const SameElementBlueprint &) = delete;
+ SameElementBlueprint &operator=(const SameElementBlueprint &) = delete;
+ ~SameElementBlueprint() = default;
+
+ // no match data
+ bool isWhiteList() const override { return true; }
+
+ // used by create visitor
+ FieldSpec getNextChildField(const vespalib::string &field_name, uint32_t field_id);
+
+ // used by create visitor
+ void addTerm(Blueprint::UP term);
+
+ void optimize_self() override;
+ void fetchPostings(bool strict) override;
+
+ SearchIteratorUP createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
+ bool strict) const override;
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ const std::vector<Blueprint::UP> &terms() const { return _terms; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp
new file mode 100644
index 00000000000..8f3fc9c350d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp
@@ -0,0 +1,117 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "same_element_search.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/vespalib/objects/visit.h>
+#include <vespa/vespalib/objects/visit.hpp>
+#include <algorithm>
+#include <functional>
+
+using TFMD = search::fef::TermFieldMatchData;
+
+namespace search::queryeval {
+
+namespace {
+
+template <typename It>
+int32_t try_match(const fef::TermFieldMatchDataArray &match, std::vector<It> &iterators, uint32_t cand) {
+ for (size_t i = 0; i < iterators.size(); ++i) {
+ while ((iterators[i] != match[i]->end()) && (iterators[i]->getElementId() < cand)) {
+ ++iterators[i];
+ }
+ if (iterators[i] == match[i]->end()) {
+ return -1;
+ }
+ if (iterators[i]->getElementId() != cand) {
+ return iterators[i]->getElementId();
+ }
+ }
+ return cand;
+}
+
+}
+
+bool
+SameElementSearch::check_docid_match(uint32_t docid)
+{
+ for (const auto &child: _children) {
+ if (!child->seek(docid)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void
+SameElementSearch::unpack_children(uint32_t docid)
+{
+ for (const auto &child: _children) {
+ child->doUnpack(docid);
+ }
+ for (size_t i = 0; i < _childMatch.size(); ++i) {
+ _iterators[i] = _childMatch[i]->begin();
+ }
+}
+
+bool
+SameElementSearch::check_element_match(uint32_t docid)
+{
+ unpack_children(docid);
+ int32_t cand = 0;
+ int32_t next = try_match(_childMatch, _iterators, cand);
+ while (next > cand) {
+ cand = next;
+ next = try_match(_childMatch, _iterators, cand);
+ }
+ return (cand == next);
+}
+
+SameElementSearch::SameElementSearch(fef::MatchData::UP md,
+ std::vector<SearchIterator::UP> children,
+ const fef::TermFieldMatchDataArray &childMatch,
+ bool strict)
+ : _md(std::move(md)),
+ _children(std::move(children)),
+ _childMatch(childMatch),
+ _iterators(childMatch.size()),
+ _strict(strict)
+{
+ assert(!_children.empty());
+ assert(_childMatch.valid());
+}
+
+void
+SameElementSearch::initRange(uint32_t begin_id, uint32_t end_id)
+{
+ SearchIterator::initRange(begin_id, end_id);
+ for (const auto &child: _children) {
+ child->initRange(begin_id, end_id);
+ }
+}
+
+void
+SameElementSearch::doSeek(uint32_t docid) {
+ if (check_docid_match(docid) && check_element_match(docid)) {
+ setDocId(docid);
+ } else if (_strict) {
+ docid = std::max(docid + 1, _children[0]->getDocId());
+ while (!isAtEnd(docid)) {
+ if (check_docid_match(docid) && check_element_match(docid)) {
+ setDocId(docid);
+ return;
+ }
+ docid = std::max(docid + 1, _children[0]->getDocId());
+ }
+ setAtEnd();
+ }
+}
+
+void
+SameElementSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+ SearchIterator::visitMembers(visitor);
+ visit(visitor, "children", _children);
+ visit(visitor, "strict", _strict);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
new file mode 100644
index 00000000000..6a116c76e73
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
@@ -0,0 +1,44 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <memory>
+#include <vector>
+
+namespace search::queryeval {
+
+/**
+ * Search iterator for a collection of terms that need to match within
+ * the same element (array index).
+ */
+class SameElementSearch : public SearchIterator
+{
+private:
+ using It = fef::TermFieldMatchData::PositionsIterator;
+
+ fef::MatchData::UP _md;
+ std::vector<SearchIterator::UP> _children;
+ fef::TermFieldMatchDataArray _childMatch;
+ std::vector<It> _iterators;
+ bool _strict;
+
+ void unpack_children(uint32_t docid);
+ bool check_docid_match(uint32_t docid);
+ bool check_element_match(uint32_t docid);
+
+public:
+ SameElementSearch(fef::MatchData::UP md,
+ std::vector<SearchIterator::UP> children,
+ const fef::TermFieldMatchDataArray &childMatch,
+ bool strict);
+ void initRange(uint32_t begin_id, uint32_t end_id) override;
+ void doSeek(uint32_t docid) override;
+ void doUnpack(uint32_t) override {}
+ void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+};
+
+}