summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2018-12-18 20:07:16 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2018-12-18 20:07:16 +0000
commitf3c9e8af1ef5086057f929342528ef4f793ecf31 (patch)
tree01ef37f7eeebe23e3ac0e144207df4d7a98da4ca /searchlib/src
parent90c861d4818ba4e8cf91f624656a24064e4995eb (diff)
Add inverted BitvectorIterator
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp145
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvectoriterator.h14
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h17
5 files changed, 148 insertions, 83 deletions
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index a97e817b28f..10bf5ab7dc2 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -1218,7 +1218,7 @@ TEST("require that children does not optimize when parents refuse them to") {
EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
{
const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
- EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName());
+ EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName());
EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName());
EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName());
}
@@ -1228,7 +1228,7 @@ TEST("require that children does not optimize when parents refuse them to") {
EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName());
{
const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search);
- EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName());
+ EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName());
EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName());
EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName());
}
diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp
index 07f9a245617..6471b0f5c33 100644
--- a/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp
+++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp
@@ -10,6 +10,7 @@ namespace search {
using fef::TermFieldMatchDataArray;
using fef::TermFieldMatchData;
+using vespalib::Trinary;
BitVectorIterator::BitVectorIterator(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) :
_docIdLimit(std::min(docIdLimit, bv.size())),
@@ -30,16 +31,6 @@ BitVectorIterator::initRange(uint32_t begin, uint32_t end)
}
void
-BitVectorIterator::doSeek(uint32_t docId)
-{
- if (__builtin_expect(docId >= _docIdLimit, false)) {
- setAtEnd();
- } else if (_bv.testBit(docId)) {
- setDocId(docId);
- }
-}
-
-void
BitVectorIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
{
SearchIterator::visitMembers(visitor);
@@ -54,83 +45,157 @@ BitVectorIterator::doUnpack(uint32_t docId)
_tfmd.resetOnlyDocId(docId);
}
-class BitVectorIteratorStrict : public BitVectorIterator
+template<bool inverse>
+class BitVectorIteratorT : public BitVectorIterator {
+public:
+ BitVectorIteratorT(const BitVector &other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData);
+
+ void doSeek(uint32_t docId) override;
+ BitVector::UP get_hits(uint32_t begin_id) override;
+ void or_hits_into(BitVector &result, uint32_t begin_id) override;
+ void and_hits_into(BitVector &result, uint32_t begin_id) override;
+ bool isInverted() const override { return inverse; }
+private:
+ bool isSet(uint32_t docId) const { return inverse == ! _bv.testBit(docId); }
+};
+
+template<bool inverse>
+BitVectorIteratorT<inverse>::BitVectorIteratorT(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) :
+ BitVectorIterator(bv, docIdLimit, matchData)
+{
+}
+
+template<bool inverse>
+void
+BitVectorIteratorT<inverse>::doSeek(uint32_t docId)
+{
+ if (__builtin_expect(docId >= _docIdLimit, false)) {
+ setAtEnd();
+ } else if (isSet(docId)) {
+ setDocId(docId);
+ }
+}
+
+template<bool inverse>
+class BitVectorIteratorStrictT : public BitVectorIteratorT<inverse>
{
public:
- BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData);
+ BitVectorIteratorStrictT(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData);
private:
void initRange(uint32_t begin, uint32_t end) override;
void doSeek(uint32_t docId) override;
Trinary is_strict() const override { return Trinary::True; }
+ uint32_t getNextBit(uint32_t docId) const {
+ return inverse ? ! this->_bv.getNextFalseBit(docId) : this->_bv.getNextTrueBit(docId);
+ }
+ uint32_t getFirstBit(uint32_t docId) const {
+ return inverse ? ! this->_bv.getFirstFalseBit(docId) : this->_bv.getFirstTrueBit(docId);
+ }
};
-BitVectorIteratorStrict::BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) :
- BitVectorIterator(bv, docIdLimit, matchData)
+template<bool inverse>
+BitVectorIteratorStrictT<inverse>::BitVectorIteratorStrictT(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) :
+ BitVectorIteratorT<inverse>(bv, docIdLimit, matchData)
{
}
+template<bool inverse>
void
-BitVectorIteratorStrict::doSeek(uint32_t docId)
+BitVectorIteratorStrictT<inverse>::doSeek(uint32_t docId)
{
- if (__builtin_expect(docId >= _docIdLimit, false)) {
- setAtEnd();
+ if (__builtin_expect(docId >= this->_docIdLimit, false)) {
+ this->setAtEnd();
return;
}
- docId = _bv.getNextTrueBit(docId);
- if (__builtin_expect(docId >= _docIdLimit, false)) {
- setAtEnd();
+ docId = getNextBit(docId);
+ if (__builtin_expect(docId >= this->_docIdLimit, false)) {
+ this->setAtEnd();
} else {
- setDocId(docId);
+ this->setDocId(docId);
}
}
+template<bool inverse>
void
-BitVectorIteratorStrict::initRange(uint32_t begin, uint32_t end)
+BitVectorIteratorStrictT<inverse>::initRange(uint32_t begin, uint32_t end)
{
BitVectorIterator::initRange(begin, end);
- if (!isAtEnd()) {
- uint32_t docId = _bv.getFirstTrueBit(begin);
- if (docId >= _docIdLimit) {
- setAtEnd();
+ if (!this->isAtEnd()) {
+ uint32_t docId = getFirstBit(begin);
+ if (docId >= this->_docIdLimit) {
+ this->setAtEnd();
} else {
- setDocId(docId);
+ this->setDocId(docId);
}
}
}
-queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, const TermFieldMatchDataArray &matchData, bool strict)
+queryeval::SearchIterator::UP
+BitVectorIterator::create(const BitVector *const bv, const TermFieldMatchDataArray &matchData, bool strict)
{
assert(matchData.size() == 1);
return create(bv, bv->size(), *matchData[0], strict);
}
-queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict)
+
+queryeval::SearchIterator::UP
+BitVectorIterator::create(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict)
{
- if (bv == NULL) {
- return UP(new queryeval::EmptySearch());
+ if (bv == nullptr) {
+ return std::make_unique<queryeval::EmptySearch>();
} else if (strict) {
- return UP(new BitVectorIteratorStrict(*bv, docIdLimit, matchData));
+ return std::make_unique<BitVectorIteratorStrictT<false>>(*bv, docIdLimit, matchData);
} else {
- return UP(new BitVectorIterator(*bv, docIdLimit, matchData));
+ return std::make_unique<BitVectorIteratorT<false>>(*bv, docIdLimit, matchData);
}
}
-BitVector::UP BitVectorIterator::get_hits(uint32_t begin_id) {
+queryeval::SearchIterator::UP
+BitVectorIterator::createInverse(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict)
+{
+ if (bv == nullptr) {
+ return std::make_unique<queryeval::EmptySearch>();
+ } else if (strict) {
+ return std::make_unique<BitVectorIteratorStrictT<true>>(*bv, docIdLimit, matchData);
+ } else {
+ return std::make_unique<BitVectorIteratorT<true>>(*bv, docIdLimit, matchData);
+ }
+}
+
+
+template<bool inverse>
+BitVector::UP
+BitVectorIteratorT<inverse>::get_hits(uint32_t begin_id) {
BitVector::UP result = BitVector::create(_bv, begin_id, getEndId());
+ if (inverse) {
+ result->notSelf();
+ }
if (begin_id < getDocId()) {
result->clearInterval(begin_id, getDocId());
}
return result;
}
-void BitVectorIterator::or_hits_into(BitVector &result, uint32_t begin_id) {
- (void) begin_id;
- result.orWith(_bv);
+template<bool inverse>
+void
+BitVectorIteratorT<inverse>::or_hits_into(BitVector &result, uint32_t) {
+ if (inverse) {
+ result.notSelf();
+ result.andWith(_bv);
+ result.notSelf();
+ } else {
+ result.orWith(_bv);
+ }
}
-void BitVectorIterator::and_hits_into(BitVector &result, uint32_t begin_id) {
- (void) begin_id;
- result.andWith(_bv);
+template<bool inverse>
+void
+BitVectorIteratorT<inverse>::and_hits_into(BitVector &result, uint32_t) {
+ if (inverse) {
+ result.andNotWith(_bv);
+ } else {
+ result.andWith(_bv);
+ }
}
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.h b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h
index f21a983b3a3..ee899d708f6 100644
--- a/searchlib/src/vespa/searchlib/common/bitvectoriterator.h
+++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h
@@ -13,28 +13,28 @@ namespace fef { class TermFieldMatchData; }
class BitVectorIterator : public queryeval::SearchIterator
{
protected:
- BitVectorIterator(const BitVector & other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData);
+ BitVectorIterator(const BitVector & bv, uint32_t docIdLimit, fef::TermFieldMatchData &matchData);
void initRange(uint32_t begin, uint32_t end) override;
uint32_t _docIdLimit;
const BitVector & _bv;
private:
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
- void doSeek(uint32_t docId) override;
void doUnpack(uint32_t docId) override;
- BitVector::UP get_hits(uint32_t begin_id) override;
- void or_hits_into(BitVector &result, uint32_t begin_id) override;
- void and_hits_into(BitVector &result, uint32_t begin_id) override;
bool isBitVector() const override { return true; }
fef::TermFieldMatchData &_tfmd;
public:
- const void * getBitValues() const { return _bv.getStart(); }
+ virtual bool isInverted() const = 0;
+ const void *getBitValues() const { return _bv.getStart(); }
Trinary is_strict() const override { return Trinary::False; }
virtual bool isStrict() const { return (is_strict() == Trinary::True); }
uint32_t getDocIdLimit() const { return _docIdLimit; }
static UP create(const BitVector *const other, const fef::TermFieldMatchDataArray &matchData, bool strict);
- static UP create(const BitVector *const other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData, bool strict);
+ static UP create(const BitVector *const other, uint32_t docIdLimit,
+ fef::TermFieldMatchData &matchData, bool strict);
+ static UP createInverse(const BitVector *const other, uint32_t docIdLimit,
+ fef::TermFieldMatchData &matchData, bool strict);
};
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
index 1495b446bd7..4254ee70b05 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
@@ -11,8 +11,7 @@
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/util/optimized.h>
-namespace search {
-namespace queryeval {
+namespace search::queryeval {
namespace {
@@ -111,8 +110,8 @@ typedef MultiBitVectorIteratorStrict<Or> OrBVIteratorStrict;
bool hasAtLeast2Bitvectors(const MultiSearch::Children & children)
{
size_t count(0);
- for (auto it(children.begin()); it != children.end(); it++) {
- if ((*it)->isBitVector()) {
+ for (const SearchIterator * search : children) {
+ if (search->isBitVector()) {
count++;
}
}
@@ -137,26 +136,25 @@ MultiBitVectorIteratorBase::MultiBitVectorIteratorBase(const Children & children
_numDocs(std::numeric_limits<unsigned int>::max()),
_lastValue(0),
_lastMaxDocIdLimit(0),
- _bvs(children.size())
+ _bvs()
{
+ _bvs.reserve(children.size());
for (size_t i(0); i < children.size(); i++) {
- const BitVectorIterator * bv = static_cast<const BitVectorIterator *>(children[i]);
- _bvs[i] = reinterpret_cast<const Word *>(bv->getBitValues());
+ const auto * bv = static_cast<const BitVectorIterator *>(children[i]);
+ _bvs.emplace_back(reinterpret_cast<const Word *>(bv->getBitValues()), bv->isInverted());
_numDocs = std::min(_numDocs, bv->getDocIdLimit());
}
}
-MultiBitVectorIteratorBase::~MultiBitVectorIteratorBase()
-{
-}
+MultiBitVectorIteratorBase::~MultiBitVectorIteratorBase() = default;
SearchIterator::UP
MultiBitVectorIteratorBase::andWith(UP filter, uint32_t estimate)
{
(void) estimate;
if (filter->isBitVector() && acceptExtraFilter()) {
- const BitVectorIterator & bv = static_cast<const BitVectorIterator &>(*filter);
- _bvs.push_back(reinterpret_cast<const Word *>(bv.getBitValues()));
+ const auto & bv = static_cast<const BitVectorIterator &>(*filter);
+ _bvs.emplace_back(reinterpret_cast<const Word *>(bv.getBitValues()), bv.isInverted());
insert(getChildren().size(), std::move(filter));
_lastMaxDocIdLimit = 0; // force reload
}
@@ -170,8 +168,7 @@ MultiBitVectorIteratorBase::doUnpack(uint32_t docid)
MultiSearch::doUnpack(docid);
} else {
auto &children = getChildren();
- _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);},
- children.size());
+ _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);}, children.size());
}
}
@@ -179,7 +176,7 @@ SearchIterator::UP
MultiBitVectorIteratorBase::optimize(SearchIterator::UP parentIt)
{
if (parentIt->isSourceBlender()) {
- SourceBlenderSearch & parent(static_cast<SourceBlenderSearch &>(*parentIt));
+ auto & parent(static_cast<SourceBlenderSearch &>(*parentIt));
for (size_t i(0); i < parent.getNumChildren(); i++) {
parent.setChild(i, optimize(parent.steal(i)));
}
@@ -192,7 +189,7 @@ MultiBitVectorIteratorBase::optimize(SearchIterator::UP parentIt)
SearchIterator::UP
MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt)
{
- MultiSearch & parent(static_cast<MultiSearch &>(*parentIt));
+ auto & parent(static_cast<MultiSearch &>(*parentIt));
if (canOptimize(parent)) {
MultiSearch::Children stolen;
std::vector<size_t> _unpackIndex;
@@ -218,24 +215,24 @@ MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt)
SearchIterator::UP next;
if (parent.isAnd()) {
if (strict) {
- next.reset(new AndBVIteratorStrict(stolen));
+ next = std::make_unique<AndBVIteratorStrict>(stolen);
} else {
- next.reset(new AndBVIterator(stolen));
+ next = std::make_unique<AndBVIterator>(stolen);
}
} else if (parent.isOr()) {
if (strict) {
- next.reset(new OrBVIteratorStrict(stolen));
+ next = std::make_unique<OrBVIteratorStrict>(stolen);
} else {
- next.reset(new OrBVIterator(stolen));
+ next = std::make_unique<OrBVIterator>(stolen);
}
} else if (parent.isAndNot()) {
if (strict) {
- next.reset(new OrBVIteratorStrict(stolen));
+ next = std::make_unique<OrBVIteratorStrict>(stolen);
} else {
- next.reset(new OrBVIterator(stolen));
+ next = std::make_unique<OrBVIterator>(stolen);
}
}
- MultiBitVectorIteratorBase & nextM(static_cast<MultiBitVectorIteratorBase &>(*next));
+ auto & nextM(static_cast<MultiBitVectorIteratorBase &>(*next));
for (size_t index : _unpackIndex) {
nextM.addUnpackIndex(index);
}
@@ -245,14 +242,12 @@ MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt)
parent.insert(insertPosition, std::move(next));
}
}
- MultiSearch::Children & toOptimize(const_cast<MultiSearch::Children &>(parent.getChildren()));
- for (size_t i(0); i < toOptimize.size(); i++) {
- toOptimize[i] = optimize(MultiSearch::UP(toOptimize[i])).release();
+ auto & toOptimize(const_cast<MultiSearch::Children &>(parent.getChildren()));
+ for (SearchIterator * & search : toOptimize) {
+ search = optimize(MultiSearch::UP(search)).release();
}
return parentIt;
}
}
-
-} // namespace search
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
index 15fa044474c..d21026993d7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
@@ -6,8 +6,7 @@
#include "unpackinfo.h"
#include <vespa/searchlib/common/bitword.h>
-namespace search {
-namespace queryeval {
+namespace search::queryeval {
class MultiBitVectorIteratorBase : public MultiSearch, protected BitWord
{
@@ -22,11 +21,19 @@ public:
static SearchIterator::UP optimize(SearchIterator::UP parent);
protected:
MultiBitVectorIteratorBase(const Children & children);
+ class MetaWord {
+ public:
+ MetaWord(const Word * words, bool inverted) : _words(words), _inverted(inverted) { }
+ Word operator [] (uint32_t index) const { return _inverted ? ~_words[index] : _words[index]; }
+ private:
+ const Word * _words;
+ bool _inverted;
+ };
uint32_t _numDocs;
Word _lastValue; // Last value computed
uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation.
- std::vector<const Word *> _bvs;
+ std::vector<MetaWord> _bvs;
private:
virtual bool acceptExtraFilter() const = 0;
UP andWith(UP filter, uint32_t estimate) override;
@@ -35,6 +42,4 @@ private:
static SearchIterator::UP optimizeMultiSearch(SearchIterator::UP parent);
};
-} // namespace queryeval
-} // namespace search
-
+}