aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-05-18 18:31:44 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-05-18 22:54:51 +0000
commit723f2ae4228a82692831af2682942e701be7aa16 (patch)
tree8b43562b28f0da99d59c48f093ac013eb1a4a20a
parent38e14d3a40775f2e010046ae36cf0505f7b6e5a6 (diff)
- Handle more than 64k hits in the element vector.
- Avoid computing all vectors in full separately and instead do an incremental inline merge with. - Also avoid requiring the searchiterator aspect on the wrappers.
-rw-r--r--searchcore/src/tests/proton/matching/querynodes_test.cpp12
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp94
-rw-r--r--searchlib/src/tests/attribute/searchcontextelementiterator/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/searchcontextelementiterator/searchcontextelementiterator_test.cpp128
-rw-r--r--searchlib/src/tests/queryeval/same_element/same_element_test.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/elementiterator.cpp47
-rw-r--r--searchlib/src/vespa/searchlib/attribute/elementiterator.h28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.h23
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/elementiterator.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/elementiterator.h48
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_search.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_search.h10
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp59
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_search.h18
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h53
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeword.h9
21 files changed, 424 insertions, 274 deletions
diff --git a/searchcore/src/tests/proton/matching/querynodes_test.cpp b/searchcore/src/tests/proton/matching/querynodes_test.cpp
index 5d01753dfb6..896b2f7e07f 100644
--- a/searchcore/src/tests/proton/matching/querynodes_test.cpp
+++ b/searchcore/src/tests/proton/matching/querynodes_test.cpp
@@ -48,6 +48,8 @@ using search::query::QueryBuilder;
using search::queryeval::AndNotSearch;
using search::queryeval::AndSearch;
using search::queryeval::Blueprint;
+using search::queryeval::ElementIteratorWrapper;
+using search::queryeval::ElementIterator;
using search::queryeval::EmptySearch;
using search::queryeval::FakeRequestContext;
using search::queryeval::FakeResult;
@@ -290,16 +292,14 @@ SearchIterator *getParent<ONear>(SearchIterator *a, SearchIterator *b) {
template <>
SearchIterator *getParent<SameElement>(SearchIterator *a, SearchIterator *b) {
- std::vector<SearchIterator::UP> children;
- children.emplace_back(a);
- children.emplace_back(b);
- TermFieldMatchDataArray data;
static TermFieldMatchData tmd;
+ std::vector<ElementIterator::UP> children;
+ children.emplace_back(std::make_unique<ElementIteratorWrapper>(SearchIterator::UP(a), tmd));
+ children.emplace_back(std::make_unique<ElementIteratorWrapper>(SearchIterator::UP(b), tmd));
// we only check how many term/field combinations
// are below the SameElement parent:
// two terms searching in one index field
- data.add(&tmd).add(&tmd);
- return new SameElementSearch(nullptr, std::move(children), data, true);
+ return new SameElementSearch(nullptr, std::move(children), true);
}
template <>
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 19cad2f3905..47b382af3f1 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -94,6 +94,7 @@ vespa_define_module(
src/tests/attribute/save_target
src/tests/attribute/searchable
src/tests/attribute/searchcontext
+ src/tests/attribute/searchcontextelementiterator
src/tests/attribute/sourceselector
src/tests/attribute/stringattribute
src/tests/attribute/tensorattribute
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 416ddb5fbc0..ffae7824668 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -1,12 +1,9 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/searchlib/attribute/attribute.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributeiterators.h>
-#include <vespa/searchlib/attribute/attributevector.hpp>
-#include <vespa/searchlib/attribute/elementiterator.h>
+#include <vespa/searchlib/attribute/searchcontextelementiterator.h>
#include <vespa/searchlib/attribute/flagattribute.h>
-#include <vespa/searchlib/attribute/multistringattribute.h>
#include <vespa/searchlib/attribute/singleboolattribute.h>
#include <vespa/searchlib/attribute/singlenumericattribute.h>
#include <vespa/searchlib/attribute/singlestringattribute.h>
@@ -22,6 +19,7 @@
#include <vespa/searchlib/test/searchiteratorverifier.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/compress.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
#include <vespa/log/log.h>
LOG_SETUP("searchcontext_test");
@@ -628,29 +626,24 @@ void SearchContextTest::testSearch(const ConfigMap & cfgs) {
template<typename T, typename A>
class Verifier : public search::test::SearchIteratorVerifier {
public:
- Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString, const vespalib::string & name,
- const Config & cfg, bool withElementId);
+ Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString,
+ const vespalib::string & name, const Config & cfg);
~Verifier() override;
SearchIterator::UP
create(bool strict) const override {
_sc->fetchPostings(queryeval::ExecuteInfo::create(strict, 1.0));
- auto search = _sc->createIterator(&_dummy, strict);
- if (_withElementId) {
- search = std::make_unique<attribute::ElementIterator>(std::move(search), *_sc, _dummy);
- }
- return search;
+ return _sc->createIterator(&_dummy, strict);
}
private:
mutable TermFieldMatchData _dummy;
- const bool _withElementId;
AttributePtr _attribute;
SearchContextPtr _sc;
};
template<typename T, typename A>
-Verifier<T, A>::Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString, const vespalib::string & name,
- const Config & cfg, bool withElementId)
- : _withElementId(withElementId),
+Verifier<T, A>::Verifier(const std::vector<T> & keys, const vespalib::string & keyAsString,
+ const vespalib::string & name, const Config & cfg)
+ : _dummy(),
_attribute(AttributeFactory::createAttribute(name + "-initrange", cfg)),
_sc()
{
@@ -670,22 +663,18 @@ Verifier<T, A>::~Verifier() = default;
template<typename T, typename A>
void SearchContextTest::testSearchIterator(const std::vector<T> & keys, const vespalib::string &keyAsString, const ConfigMap &cfgs) {
-
- for (bool withElementId : {false, true} ) {
- for (const auto & cfg : cfgs) {
- {
- Verifier<T, A> verifier(keys, keyAsString, cfg.first, cfg.second, withElementId);
- verifier.verify();
- }
- {
- Config withFilter(cfg.second);
- withFilter.setIsFilter(true);
- Verifier<T, A> verifier(keys, keyAsString, cfg.first + "-filter", withFilter, withElementId);
- verifier.verify();
- }
+ for (const auto & cfg : cfgs) {
+ {
+ Verifier<T, A> verifier(keys, keyAsString, cfg.first, cfg.second);
+ verifier.verify();
+ }
+ {
+ Config withFilter(cfg.second);
+ withFilter.setIsFilter(true);
+ Verifier<T, A> verifier(keys, keyAsString, cfg.first + "-filter", withFilter);
+ verifier.verify();
}
}
-
}
void SearchContextTest::testSearchIteratorConformance() {
@@ -976,11 +965,13 @@ SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, Search
pos.setElementWeight(100);
md.appendPosition(pos);
- SearchBasePtr sb = sc.createIterator(&md, strict);
+ SearchBasePtr sbp = sc.createIterator(&md, strict);
+ SearchIterator & search = *sbp;
+ queryeval::ElementIterator::UP elemIt;
if (withElementId) {
- sb = std::make_unique<attribute::ElementIterator>(std::move(sb), sc, md);
+ elemIt = std::make_unique<attribute::SearchContextElementIterator>(std::move(sbp), sc);
}
- sb->initFullRange();
+ search.initFullRange();
std::vector<int32_t> weights(3);
if (attr->getCollectionType() == CollectionType::SINGLE ||
@@ -1000,41 +991,40 @@ SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, Search
}
// unpack and check weights
- sb->unpack(1);
- EXPECT_EQUAL(sb->getDocId(), 1u);
+ search.unpack(1);
+ EXPECT_EQUAL(search.getDocId(), 1u);
EXPECT_EQUAL(md.getDocId(), 1u);
EXPECT_EQUAL(md.getWeight(), weights[0]);
- sb->unpack(2);
- EXPECT_EQUAL(sb->getDocId(), 2u);
+ search.unpack(2);
+ EXPECT_EQUAL(search.getDocId(), 2u);
EXPECT_EQUAL(md.getDocId(), 2u);
if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) {
- EXPECT_EQUAL(2, md.end()- md.begin());
- EXPECT_EQUAL(md.begin()[0].getElementId(), 0u);
- EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1);
- EXPECT_EQUAL(md.begin()[1].getElementId(), 1u);
- EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1);
+ std::vector<uint32_t> elems;
+ elemIt->getElementIds(2, elems);
+ ASSERT_EQUAL(2u, elems.size());
+ EXPECT_EQUAL(0u,elems[0]);
+ EXPECT_EQUAL(1u,elems[1]);
} else {
EXPECT_EQUAL(md.getWeight(), weights[1]);
}
- sb->unpack(3);
- EXPECT_EQUAL(sb->getDocId(), 3u);
+ search.unpack(3);
+ EXPECT_EQUAL(search.getDocId(), 3u);
EXPECT_EQUAL(md.getDocId(), 3u);
if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) {
- EXPECT_EQUAL(3, md.end()- md.begin());
- EXPECT_EQUAL(md.begin()[0].getElementId(), 0u);
- EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1);
- EXPECT_EQUAL(md.begin()[1].getElementId(), 1u);
- EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1);
- EXPECT_EQUAL(md.begin()[2].getElementId(), 2u);
- EXPECT_EQUAL(md.begin()[2].getElementWeight(), 1);
+ std::vector<uint32_t> elems;
+ elemIt->getElementIds(3, elems);
+ ASSERT_EQUAL(3u, elems.size());
+ EXPECT_EQUAL(0u,elems[0]);
+ EXPECT_EQUAL(1u,elems[1]);
+ EXPECT_EQUAL(2u,elems[2]);
} else {
EXPECT_EQUAL(md.getWeight(), weights[2]);
}
if (extra) {
- sb->unpack(4);
- EXPECT_EQUAL(sb->getDocId(), 4u);
+ search.unpack(4);
+ EXPECT_EQUAL(search.getDocId(), 4u);
EXPECT_EQUAL(md.getDocId(), 4u);
EXPECT_EQUAL(md.getWeight(), 1);
}
diff --git a/searchlib/src/tests/attribute/searchcontextelementiterator/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontextelementiterator/CMakeLists.txt
new file mode 100644
index 00000000000..1a27b446d30
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontextelementiterator/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attribute_searchcontextelementiterator_test_app TEST
+ SOURCES
+ searchcontextelementiterator_test.cpp
+ DEPENDS
+ searchlib
+ gtest
+)
+vespa_add_test(NAME searchlib_attribute_searchcontextelementiterator_test_app COMMAND searchlib_attribute_searchcontextelementiterator_test_app)
diff --git a/searchlib/src/tests/attribute/searchcontextelementiterator/searchcontextelementiterator_test.cpp b/searchlib/src/tests/attribute/searchcontextelementiterator/searchcontextelementiterator_test.cpp
new file mode 100644
index 00000000000..400af6860ec
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontextelementiterator/searchcontextelementiterator_test.cpp
@@ -0,0 +1,128 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/attribute/searchcontextelementiterator.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/queryeval/fake_search.h>
+#include <vespa/searchlib/query/query_term_simple.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#
+
+#include <vespa/vespalib/gtest/gtest.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("searchcontextelementiterator_test");
+
+using namespace search::attribute;
+using namespace search;
+
+namespace {
+
+AttributeVector::SP
+createAndFillAttribute() {
+ AttributeFactory factory;
+ AttributeVector::SP attribute = factory.createAttribute("mva", Config(BasicType::INT32, CollectionType::ARRAY));
+ attribute->addDocs(6);
+ IntegerAttribute & ia = dynamic_cast<IntegerAttribute &>(*attribute);
+ ia.append(1, 3, 1);
+ for (int v : {1,2,3,1,2,3}) {
+ ia.append(2, v, 1);
+ }
+ for (int v : {1,2,3,4,5,1,2,3,4,5,6}) {
+ ia.append(4, v, 1);
+ }
+ ia.append(5, 5, 1);
+ attribute->commit();
+ return attribute;
+}
+
+queryeval::FakeResult
+createResult() {
+ queryeval::FakeResult result;
+ result.doc(2).elem(0).pos(7).pos(9)
+ .elem(3).pos(1);
+ result.doc(4).elem(0).pos(2)
+ .elem(5).pos(1).pos(2).pos(3);
+ return result;
+}
+
+void
+verifySeek(queryeval::ElementIterator & elemIt) {
+ elemIt.initFullRange();
+ EXPECT_FALSE(elemIt.seek(1));
+ EXPECT_TRUE(elemIt.seek(2));
+ EXPECT_FALSE(elemIt.seek(3));
+ EXPECT_TRUE(elemIt.seek(4));
+ EXPECT_FALSE(elemIt.seek(5));
+}
+
+void
+verifyGetElementIds(queryeval::ElementIterator & elemIt, const std::vector<std::vector<uint32_t>> & expectedALL) {
+ elemIt.initFullRange();
+ std::vector<uint32_t> elems;
+ for (uint32_t docId : {1,2,3,4,5}) {
+ const auto & expected = expectedALL[docId];
+ elems.clear();
+ EXPECT_EQ(expected.empty(), !elemIt.seek(docId));
+ assert(expected.empty() != elemIt.seek(docId));
+ if (elemIt.seek(docId)) {
+ elemIt.getElementIds(docId, elems);
+ EXPECT_EQ(expected.size(), elems.size());
+ EXPECT_EQ(expected, elems);
+ }
+ }
+}
+
+void
+verifyMergeElementIds(queryeval::ElementIterator & elemIt, std::vector<uint32_t> initial, const std::vector<std::vector<uint32_t>> & expectedALL) {
+ elemIt.initFullRange();
+ std::vector<uint32_t> elems;
+ for (uint32_t docId : {1,2,3,4,5}) {
+ const auto & expected = expectedALL[docId];
+ elems = initial;
+ EXPECT_EQ(expected.empty(), !elemIt.seek(docId));
+ assert(expected.empty() != elemIt.seek(docId));
+ if (elemIt.seek(docId)) {
+ elemIt.mergeElementIds(docId, elems);
+ EXPECT_EQ(expected.size(), elems.size());
+ EXPECT_EQ(expected, elems);
+ }
+ }
+}
+
+void
+verifyElementIterator(queryeval::ElementIterator & elemIt) {
+ verifySeek(elemIt);
+ std::vector<std::vector<uint32_t>> expectedALL = {{}, {}, {0, 3}, {}, {0, 5}, {}};
+ std::vector<std::vector<uint32_t>> expectedNONE = {{}, {}, {}, {}, {}, {}};
+ verifyGetElementIds(elemIt, expectedALL);
+ verifyMergeElementIds(elemIt, {0,1,2,3,4,5}, expectedALL);
+ //verifyMergeElementIds(elemIt, {}, expectedNONE);
+}
+
+}
+
+TEST(ElementIteratorTest, require_that_searchcontext)
+{
+ AttributeVector::SP attribute = createAndFillAttribute();
+ fef::TermFieldMatchData tfmd;
+
+ SearchContextParams params;
+ ISearchContext::UP sc = attribute->createSearchContext(std::make_unique<QueryTermSimple>("1", QueryTermSimple::SearchTerm::WORD), params);
+ SearchContextElementIterator elemIt(sc->createIterator(&tfmd, false), *sc);
+ verifyElementIterator(elemIt);
+}
+
+TEST(ElementIteratorTest, require_that_non_searchcontext)
+{
+ fef::TermFieldMatchData tfmd;
+ fef::TermFieldMatchDataArray tfmda;
+ tfmda.add(&tfmd);
+ queryeval::FakeResult result = createResult();
+ auto search = std::make_unique<queryeval::FakeSearch>("","","", result, tfmda);
+ queryeval::ElementIteratorWrapper wrapper(std::move(search), tfmd);
+ verifyElementIterator(wrapper);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
index 378e16480b8..db177b98b84 100644
--- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
+++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
@@ -8,12 +8,12 @@
#include <vespa/searchlib/queryeval/same_element_search.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
#include <vespa/searchcommon/attribute/i_search_context.h>
-#include <vespa/searchlib/attribute/elementiterator.h>
+#include <vespa/searchlib/attribute/searchcontextelementiterator.h>
#include <vespa/vespalib/test/insertion_operators.h>
using namespace search::fef;
using namespace search::queryeval;
-using search::attribute::ElementIterator;
+using search::attribute::SearchContextElementIterator;
void verify_elements(SameElementSearch &se, uint32_t docid, const std::initializer_list<uint32_t> list) {
std::vector<uint32_t> expect(list);
@@ -99,11 +99,11 @@ TEST("require that strict iterator seeks to next hit") {
auto search = bp->createSearch(*md, true);
search->initRange(1, 1000);
EXPECT_LESS(search->getDocId(), 1u);
- EXPECT_TRUE(!search->seek(1));
+ EXPECT_FALSE(search->seek(1));
EXPECT_EQUAL(search->getDocId(), 7u);
EXPECT_TRUE(search->seek(9));
EXPECT_EQUAL(search->getDocId(), 9u);
- EXPECT_TRUE(!search->seek(10));
+ EXPECT_FALSE(search->seek(10));
EXPECT_TRUE(search->isAtEnd());
}
@@ -134,8 +134,8 @@ TEST("require that attribute iterators are wrapped for element unpacking") {
SameElementSearch *se = dynamic_cast<SameElementSearch*>(search.get());
ASSERT_TRUE(se != nullptr);
ASSERT_EQUAL(se->children().size(), 2u);
- EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[0].get()) != nullptr);
- EXPECT_TRUE(dynamic_cast<ElementIterator*>(se->children()[1].get()) != nullptr);
+ EXPECT_TRUE(dynamic_cast<SearchContextElementIterator*>(se->children()[0].get()) != nullptr);
+ EXPECT_TRUE(dynamic_cast<SearchContextElementIterator*>(se->children()[1].get()) != nullptr);
}
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index b9e3af8c729..0fffe19727c 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -34,7 +34,7 @@ vespa_add_library(searchlib_attribute OBJECT
defines.cpp
diversity.cpp
dociditerator.cpp
- elementiterator.cpp
+ searchcontextelementiterator.cpp
enumattribute.cpp
enumattributesaver.cpp
enumcomparator.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/elementiterator.cpp b/searchlib/src/vespa/searchlib/attribute/elementiterator.cpp
deleted file mode 100644
index 3be4c478376..00000000000
--- a/searchlib/src/vespa/searchlib/attribute/elementiterator.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "elementiterator.h"
-#include <vespa/searchcommon/attribute/i_search_context.h>
-#include <vespa/searchlib/fef/termfieldmatchdata.h>
-
-using search::fef::TermFieldMatchDataPosition;
-
-namespace search::attribute {
-
-void
-ElementIterator::doSeek(uint32_t docid) {
- _search->doSeek(docid);
- setDocId(_search->getDocId());
-}
-
-void
-ElementIterator::doUnpack(uint32_t docid) {
- _tfmd.reset(docid);
- int32_t weight(0);
- for (int32_t id = _searchContext.find(docid, 0, weight); id >= 0; id = _searchContext.find(docid, id+1, weight)) {
- _tfmd.appendPosition(TermFieldMatchDataPosition(id, 0, weight, 1));
- }
-}
-
-vespalib::Trinary
-ElementIterator::is_strict() const {
- return _search->is_strict();
-}
-
-void
-ElementIterator::initRange(uint32_t beginid, uint32_t endid) {
- SearchIterator::initRange(beginid, endid);
- _search->initRange(beginid, endid);
- setDocId(_search->getDocId());
-}
-
-ElementIterator::ElementIterator(SearchIterator::UP search, const ISearchContext & sc, fef::TermFieldMatchData & tfmd)
- : _search(std::move(search)),
- _searchContext(sc),
- _tfmd(tfmd)
-{
-}
-
-ElementIterator::~ElementIterator() = default;
-
-}
diff --git a/searchlib/src/vespa/searchlib/attribute/elementiterator.h b/searchlib/src/vespa/searchlib/attribute/elementiterator.h
deleted file mode 100644
index 232139751b6..00000000000
--- a/searchlib/src/vespa/searchlib/attribute/elementiterator.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/searchlib/queryeval/searchiterator.h>
-
-namespace search::fef { class TermFieldMatchData; }
-namespace search::attribute {
-
-class ISearchContext;
-
-class ElementIterator : public queryeval::SearchIterator
-{
-private:
- SearchIterator::UP _search;
- const ISearchContext & _searchContext;
- fef::TermFieldMatchData & _tfmd;
-
- void doSeek(uint32_t docid) override;
- void doUnpack(uint32_t docid) override;
- Trinary is_strict() const override;
- void initRange(uint32_t beginid, uint32_t endid) override;
-public:
- ElementIterator(SearchIterator::UP search, const ISearchContext & sc, fef::TermFieldMatchData & tfmd);
- ~ElementIterator();
-};
-
-}
diff --git a/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.cpp b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.cpp
new file mode 100644
index 00000000000..8bfb07df938
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.cpp
@@ -0,0 +1,41 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "searchcontextelementiterator.h"
+#include <vespa/searchcommon/attribute/i_search_context.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+using search::fef::TermFieldMatchDataPosition;
+
+namespace search::attribute {
+
+void
+SearchContextElementIterator::getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) {
+ int32_t weight(0);
+ for (int32_t id = _searchContext.find(docId, 0, weight); id >= 0; id = _searchContext.find(docId, id+1, weight)) {
+ elementIds.push_back(id);
+ }
+}
+void
+SearchContextElementIterator::mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) {
+ size_t toKeep(0);
+ int32_t id(-1);
+ int32_t weight(0);
+ for (int32_t candidate : elementIds) {
+ if (candidate >= id) {
+ id = _searchContext.find(docId, candidate, weight);
+ if (id == candidate) {
+ elementIds[toKeep++] = candidate;
+ }
+ }
+ }
+ elementIds.resize(toKeep);
+}
+
+SearchContextElementIterator::SearchContextElementIterator(queryeval::SearchIterator::UP search, const ISearchContext & sc)
+ : ElementIterator(std::move(search)),
+ _searchContext(sc)
+{}
+
+SearchContextElementIterator::~SearchContextElementIterator() = default;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.h b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.h
new file mode 100644
index 00000000000..c6c5aac1cd2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/searchcontextelementiterator.h
@@ -0,0 +1,23 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/elementiterator.h>
+
+namespace search::fef { class TermFieldMatchData; }
+namespace search::attribute {
+
+class ISearchContext;
+
+class SearchContextElementIterator : public queryeval::ElementIterator
+{
+private:
+ const ISearchContext & _searchContext;
+public:
+ void getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override;
+ void mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override;
+ SearchContextElementIterator(queryeval::SearchIterator::UP search, const ISearchContext & sc);
+ ~SearchContextElementIterator() override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
index dc36e0a1f7e..723af890f0e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -9,6 +9,7 @@ vespa_add_library(searchlib_queryeval OBJECT
document_weight_search_iterator.cpp
dot_product_blueprint.cpp
dot_product_search.cpp
+ elementiterator.cpp
emptysearch.cpp
equiv_blueprint.cpp
equivsearch.cpp
diff --git a/searchlib/src/vespa/searchlib/queryeval/elementiterator.cpp b/searchlib/src/vespa/searchlib/queryeval/elementiterator.cpp
new file mode 100644
index 00000000000..750408fb242
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/elementiterator.cpp
@@ -0,0 +1,71 @@
+#include "elementiterator.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/vespalib/objects/objectvisitor.h>
+
+namespace search::queryeval {
+
+void
+ElementIterator::visitMembers(vespalib::ObjectVisitor &visitor) const {
+ visit(visitor, "iterator", _search.get());
+}
+
+ElementIteratorWrapper::ElementIteratorWrapper(SearchIterator::UP search, fef::TermFieldMatchData & tfmd)
+ : ElementIterator(std::move(search)),
+ _tfmd(tfmd)
+{}
+
+ElementIteratorWrapper::~ElementIteratorWrapper() = default;
+
+void
+ElementIteratorWrapper::getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) {
+ _search->unpack(docId);
+ int prevId(-1);
+ for (auto element : _tfmd) {
+ uint32_t id(element.getElementId());
+ if (prevId != int(id)) {
+ elementIds.push_back(id);
+ prevId = id;
+ }
+ }
+}
+
+void
+ElementIteratorWrapper::mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) {
+ _search->unpack(docId);
+ size_t toKeep(0);
+ int32_t id(-1);
+ auto it = _tfmd.begin();
+ for (int32_t candidate : elementIds) {
+ if (candidate >= id) {
+ while ((it < _tfmd.end()) && (candidate > int(it->getElementId()))) {
+ it++;
+ }
+ if (it == _tfmd.end()) break;
+ id = it->getElementId();
+ if (id == candidate) {
+ elementIds[toKeep++] = candidate;
+ }
+ }
+ }
+ elementIds.resize(toKeep);
+}
+
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::ElementIterator *obj)
+{
+ if (obj != 0) {
+ self.openStruct(name, "ElementIterator");
+ obj->visitMembers(self);
+ self.closeStruct();
+ } else {
+ self.visitNull(name);
+ }
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::ElementIterator &obj)
+{
+ visit(self, name, &obj);
+} \ No newline at end of file
diff --git a/searchlib/src/vespa/searchlib/queryeval/elementiterator.h b/searchlib/src/vespa/searchlib/queryeval/elementiterator.h
new file mode 100644
index 00000000000..a433c9be8f4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/elementiterator.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <vespa/searchlib/queryeval/searchiterator.h>
+
+namespace search::fef { class TermFieldMatchData; }
+
+namespace search::queryeval {
+
+class ElementIterator {
+public:
+ using UP = std::unique_ptr<ElementIterator>;
+ ElementIterator(SearchIterator::UP search) : _search(std::move(search)) { }
+ virtual ~ElementIterator() = default;
+ bool seek(uint32_t docId) {
+ return _search->seek(docId);
+ }
+ void initFullRange() {
+ _search->initFullRange();
+ }
+ void initRange(uint32_t beginid, uint32_t endid) {
+ _search->initRange(beginid, endid);
+ }
+ uint32_t getDocId() const {
+ return _search->getDocId();
+ }
+ virtual void getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) = 0;
+ virtual void mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) = 0;
+ virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+protected:
+ SearchIterator::UP _search;
+};
+
+class ElementIteratorWrapper : public ElementIterator {
+public:
+ ElementIteratorWrapper(SearchIterator::UP search, fef::TermFieldMatchData & tfmd);
+ ~ElementIteratorWrapper() override;
+ void getElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override;
+ void mergeElementIds(uint32_t docId, std::vector<uint32_t> & elementIds) override;
+private:
+ fef::TermFieldMatchData & _tfmd;
+};
+
+}
+
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::ElementIterator &obj);
+void visit(vespalib::ObjectVisitor &self, const vespalib::string &name,
+ const search::queryeval::ElementIterator *obj);
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
index 6f2d3cb6b2a..8e2ec3eabed 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp
@@ -6,8 +6,7 @@
#include <vespa/vespalib/objects/visit.h>
#include <vespa/searchcommon/attribute/i_search_context.h>
-namespace search {
-namespace queryeval {
+namespace search::queryeval {
void
FakeSearch::doSeek(uint32_t docid)
@@ -61,5 +60,4 @@ FakeSearch::visitMembers(vespalib::ObjectVisitor &visitor) const
visit(visitor, "term", _term);
}
-} // namespace search::queryeval
-} // namespace search
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.h b/searchlib/src/vespa/searchlib/queryeval/fake_search.h
index f5b95a94e99..4cbba5e5c24 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.h
@@ -7,8 +7,7 @@
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchcommon/attribute/i_search_context.h>
-namespace search {
-namespace queryeval {
+namespace search::queryeval {
class FakeSearch : public SearchIterator
{
@@ -41,9 +40,12 @@ public:
bool is_attr() const { return (_ctx != nullptr); }
void doSeek(uint32_t docid) override;
void doUnpack(uint32_t docid) override;
+ void initRange(uint32_t begin, uint32_t end) override {
+ SearchIterator::initRange(begin, end);
+ _offset = 0;
+ }
const PostingInfo *getPostingInfo() const override { return _result.postingInfo(); }
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
};
-} // namespace queryeval
-} // namespace search
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
index 22a392ca208..d1e0a2528e7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
@@ -4,7 +4,7 @@
#include "same_element_search.h"
#include "field_spec.hpp"
#include <vespa/searchlib/fef/termfieldmatchdata.h>
-#include <vespa/searchlib/attribute/elementiterator.h>
+#include <vespa/searchlib/attribute/searchcontextelementiterator.h>
#include <vespa/vespalib/objects/visit.hpp>
#include <algorithm>
#include <map>
@@ -48,8 +48,7 @@ void
SameElementBlueprint::optimize_self()
{
std::sort(_terms.begin(), _terms.end(),
- [](const auto &a, const auto &b)
- {
+ [](const auto &a, const auto &b) {
return (a->getState().estimate() < b->getState().estimate());
});
}
@@ -66,34 +65,23 @@ std::unique_ptr<SameElementSearch>
SameElementBlueprint::create_same_element_search(bool strict) const
{
fef::MatchDataLayout my_layout = _layout;
- std::vector<fef::TermFieldHandle> extra_handles;
- for (size_t i = 0; i < _terms.size(); ++i) {
- const State &childState = _terms[i]->getState();
- assert(childState.numFields() == 1);
- extra_handles.push_back(my_layout.allocTermField(childState.field(0).getFieldId()));
- }
fef::MatchData::UP md = my_layout.createMatchData();
- search::fef::TermFieldMatchDataArray childMatch;
- std::vector<SearchIterator::UP> children(_terms.size());
+ std::vector<ElementIterator::UP> children(_terms.size());
for (size_t i = 0; i < _terms.size(); ++i) {
const State &childState = _terms[i]->getState();
SearchIterator::UP child = _terms[i]->createSearch(*md, (strict && (i == 0)));
const attribute::ISearchContext *context = _terms[i]->get_attribute_search_context();
if (context == nullptr) {
- children[i] = std::move(child);
- childMatch.add(childState.field(0).resolve(*md));
+ children[i] = std::make_unique<ElementIteratorWrapper>(std::move(child), *childState.field(0).resolve(*md));
} else {
- fef::TermFieldMatchData *child_tfmd = md->resolveTermField(extra_handles[i]);
- children[i] = std::make_unique<attribute::ElementIterator>(std::move(child), *context, *child_tfmd);
- childMatch.add(child_tfmd);
+ children[i] = std::make_unique<attribute::SearchContextElementIterator>(std::move(child), *context);
}
}
- return std::make_unique<SameElementSearch>(std::move(md), std::move(children), childMatch, strict);
+ return std::make_unique<SameElementSearch>(std::move(md), std::move(children), strict);
}
SearchIterator::UP
-SameElementBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda,
- bool strict) const
+SameElementBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const
{
(void) tfmda;
assert(!tfmda.valid());
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp
index 37f8eccadd9..18f42597121 100644
--- a/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.cpp
@@ -11,26 +11,6 @@ using TFMD = search::fef::TermFieldMatchData;
namespace search::queryeval {
-namespace {
-
-template <typename It>
-int32_t try_match(const fef::TermFieldMatchDataArray &match, std::vector<It> &iterators, uint32_t cand) {
- for (size_t i = 0; i < iterators.size(); ++i) {
- while ((iterators[i] != match[i]->end()) && (iterators[i]->getElementId() < cand)) {
- ++iterators[i];
- }
- if (iterators[i] == match[i]->end()) {
- return -1;
- }
- if (iterators[i]->getElementId() != cand) {
- return iterators[i]->getElementId();
- }
- }
- return cand;
-}
-
-}
-
bool
SameElementSearch::check_docid_match(uint32_t docid)
{
@@ -43,41 +23,30 @@ SameElementSearch::check_docid_match(uint32_t docid)
}
void
-SameElementSearch::unpack_children(uint32_t docid)
+SameElementSearch::fetch_matching_elements(uint32_t docid, std::vector<uint32_t> & elems)
{
- for (const auto &child: _children) {
- child->doUnpack(docid);
- }
- for (size_t i = 0; i < _childMatch.size(); ++i) {
- _iterators[i] = _childMatch[i]->begin();
+ _children.front()->getElementIds(docid, elems);
+ for (auto it(_children.begin() + 1); it != _children.end(); it++) {
+ (*it)->mergeElementIds(docid, elems);
}
}
bool
SameElementSearch::check_element_match(uint32_t docid)
{
- unpack_children(docid);
- int32_t cand = 0;
- int32_t next = try_match(_childMatch, _iterators, cand);
- while (next > cand) {
- cand = next;
- next = try_match(_childMatch, _iterators, cand);
- }
- return (cand == next);
+ _matchingElements.clear();
+ fetch_matching_elements(docid, _matchingElements);
+ return !_matchingElements.empty();
}
SameElementSearch::SameElementSearch(fef::MatchData::UP md,
- std::vector<SearchIterator::UP> children,
- const fef::TermFieldMatchDataArray &childMatch,
+ std::vector<ElementIterator::UP> children,
bool strict)
: _md(std::move(md)),
_children(std::move(children)),
- _childMatch(childMatch),
- _iterators(childMatch.size()),
_strict(strict)
{
assert(!_children.empty());
- assert(_childMatch.valid());
}
void
@@ -118,17 +87,7 @@ void
SameElementSearch::find_matching_elements(uint32_t docid, std::vector<uint32_t> &dst)
{
if (check_docid_match(docid)) {
- unpack_children(docid);
- int32_t cand = 0;
- while (cand >= 0) {
- int32_t next = try_match(_childMatch, _iterators, cand);
- if (next == cand) {
- dst.push_back(cand);
- ++cand;
- } else {
- cand = next;
- }
- }
+ fetch_matching_elements(docid, dst);
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
index ae7ecab781d..e5f185e06e6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_search.h
@@ -2,7 +2,7 @@
#pragma once
-#include "searchiterator.h"
+#include "elementiterator.h"
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
@@ -21,26 +21,24 @@ class SameElementSearch : public SearchIterator
private:
using It = fef::TermFieldMatchData::PositionsIterator;
- fef::MatchData::UP _md;
- std::vector<SearchIterator::UP> _children;
- fef::TermFieldMatchDataArray _childMatch;
- std::vector<It> _iterators;
- bool _strict;
+ fef::MatchData::UP _md;
+ std::vector<ElementIterator::UP> _children;
+ std::vector<uint32_t> _matchingElements;
+ bool _strict;
- void unpack_children(uint32_t docid);
+ void fetch_matching_elements(uint32_t docid, std::vector<uint32_t> &dst);
bool check_docid_match(uint32_t docid);
bool check_element_match(uint32_t docid);
public:
SameElementSearch(fef::MatchData::UP md,
- std::vector<SearchIterator::UP> children,
- const fef::TermFieldMatchDataArray &childMatch,
+ std::vector<ElementIterator::UP> children,
bool strict);
void initRange(uint32_t begin_id, uint32_t end_id) override;
void doSeek(uint32_t docid) override;
void doUnpack(uint32_t) override {}
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
- const std::vector<SearchIterator::UP> &children() const { return _children; }
+ const std::vector<ElementIterator::UP> &children() const { return _children; }
// used during docsum fetching to identify matching elements
// initRange must be called before use.
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
index 3fcc2427880..ac315c805d5 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
@@ -13,11 +13,7 @@ using search::fef::TermFieldMatchDataPosition;
#include <vector>
#include <string>
-namespace search
-{
-
-namespace fakedata
-{
+namespace search::fakedata {
/*
* Base class for faked posting list formats.
@@ -41,56 +37,36 @@ public:
/*
* Size of posting list, in bits.
*/
- virtual size_t
- bitSize() const = 0;
-
- virtual size_t
- skipBitSize() const;
-
- virtual size_t
- l1SkipBitSize() const;
-
- virtual size_t
- l2SkipBitSize() const;
-
- virtual size_t
- l3SkipBitSize() const;
-
- virtual size_t
- l4SkipBitSize() const;
-
- virtual bool
- hasWordPositions() const = 0;
-
+ virtual size_t bitSize() const = 0;
+ virtual size_t skipBitSize() const;
+ virtual size_t l1SkipBitSize() const;
+ virtual size_t l2SkipBitSize() const;
+ virtual size_t l3SkipBitSize() const;
+ virtual size_t l4SkipBitSize() const;
+ virtual bool hasWordPositions() const = 0;
virtual bool has_interleaved_features() const;
-
virtual bool enable_unpack_normal_features() const;
-
virtual bool enable_unpack_interleaved_features() const;
/*
* Single posting list performance, without feature unpack.
*/
- virtual int
- lowLevelSinglePostingScan() const = 0;
+ virtual int lowLevelSinglePostingScan() const = 0;
/*
* Single posting list performance, with feature unpack.
*/
- virtual int
- lowLevelSinglePostingScanUnpack() const = 0;
+ virtual int lowLevelSinglePostingScanUnpack() const = 0;
/*
* Two posting lists performance (same format) without feature unpack.
*/
- virtual int
- lowLevelAndPairPostingScan(const FakePosting &rhs) const = 0;
+ virtual int lowLevelAndPairPostingScan(const FakePosting &rhs) const = 0;
/*
* Two posting lists performance (same format) with feature unpack.
*/
- virtual int
- lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const = 0;
+ virtual int lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const = 0;
/*
@@ -105,7 +81,4 @@ public:
}
};
-} // namespace fakedata
-
-} // namespace search
-
+}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
index ffe9e2e54d0..97b0d3a44d3 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
@@ -12,9 +12,7 @@
#include <vespa/searchlib/diskindex/fieldreader.h>
#include <vespa/searchlib/diskindex/fieldwriter.h>
-namespace search {
-
-namespace fakedata {
+namespace search::fakedata {
/**
@@ -275,7 +273,4 @@ public:
void addDocIdBias(uint32_t docIdBias);
};
-} // namespace fakedata
-
-} // namespace search
-
+}