aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@oath.com>2018-05-07 15:56:58 +0200
committerHenning Baldersheim <balder@oath.com>2018-05-08 10:40:06 +0200
commit166a0dc167b0fa87cd62d74b5b77ece472e68bce (patch)
tree850844ed8fa002032b6df1967c096d3427b4ae8f
parent0196b41f4e5c11188f020a17a4c48ecfcf638293 (diff)
Use the prefilter if present.
-rw-r--r--searchlib/src/vespa/searchlib/attribute/diversity.h41
-rw-r--r--searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/posting_list_merger.h29
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp7
-rw-r--r--searchlib/src/vespa/searchlib/btree/btreeiterator.hpp7
7 files changed, 60 insertions, 45 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/diversity.h b/searchlib/src/vespa/searchlib/attribute/diversity.h
index fe2874a65a1..3425b17cb6e 100644
--- a/searchlib/src/vespa/searchlib/attribute/diversity.h
+++ b/searchlib/src/vespa/searchlib/attribute/diversity.h
@@ -4,6 +4,7 @@
#include "singleenumattribute.h"
#include "singlenumericattribute.h"
+#include <vespa/searchlib/common/prefilter.h>
#include <vespa/vespalib/stllike/hash_map.h>
/**
@@ -157,17 +158,25 @@ template <typename DictRange, typename PostingStore, typename Fetcher, typename
void diversify_3(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits,
const Fetcher &diversity, size_t max_per_group,
size_t cutoff_max_groups, bool cutoff_strict,
- Result &result, std::vector<size_t> &fragments)
+ Result &result, std::vector<size_t> &fragments, const PreFilter * preFilter)
{
+ (void) preFilter;
DictRange range(range_in);
using DataType = typename PostingStore::DataType;
using KeyDataType = typename PostingStore::KeyDataType;
DiversityFilter<Fetcher, Result> filter(diversity, max_per_group, cutoff_max_groups, cutoff_strict, result, wanted_hits);
while (range.has_next() && (result.size() < wanted_hits)) {
typename DictRange::Next dict_entry(range);
- posting.foreach_frozen(dict_entry.get().getData(),
- [&](uint32_t key, const DataType &data)
- { filter.push_back(KeyDataType(key, data)); });
+ if (preFilter) {
+ posting.foreach_frozen(dict_entry.get().getData(),
+ [&](uint32_t key, const DataType &data)
+ { if (preFilter->keep(key)) { filter.push_back(KeyDataType(key, data)); }});
+ } else {
+ posting.foreach_frozen(dict_entry.get().getData(),
+ [&](uint32_t key, const DataType &data)
+ { filter.push_back(KeyDataType(key, data)); });
+ }
+
if (fragments.back() < result.size()) {
fragments.push_back(result.size());
}
@@ -178,34 +187,34 @@ template <typename DictRange, typename PostingStore, typename Result>
void diversify_2(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits,
const IAttributeVector &diversity_attr, size_t max_per_group,
size_t cutoff_max_groups, bool cutoff_strict,
- Result &result, std::vector<size_t> &fragments)
+ Result &result, std::vector<size_t> &fragments, const PreFilter * filter)
{
if (diversity_attr.hasEnum()) { // must handle enum first
FetchEnumFast fastEnum(diversity_attr);
if (fastEnum.valid()) {
- diversify_3(range_in, posting, wanted_hits, fastEnum, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, fastEnum, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
} else {
- diversify_3(range_in, posting, wanted_hits, FetchEnum(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, FetchEnum(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
}
} else if (diversity_attr.isIntegerType()) {
FetchNumberFast<SingleValueNumericAttribute<IntegerAttributeTemplate<int32_t> > > fastInt32(diversity_attr);
FetchNumberFast<SingleValueNumericAttribute<IntegerAttributeTemplate<int64_t> > > fastInt64(diversity_attr);
if (fastInt32.valid()) {
- diversify_3(range_in, posting, wanted_hits, fastInt32, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, fastInt32, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
} else if (fastInt64.valid()) {
- diversify_3(range_in, posting, wanted_hits, fastInt64, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, fastInt64, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
} else {
- diversify_3(range_in, posting, wanted_hits, FetchInteger(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, FetchInteger(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
}
} else if (diversity_attr.isFloatingPointType()) {
FetchNumberFast<SingleValueNumericAttribute<FloatingPointAttributeTemplate<float> > > fastFloat(diversity_attr);
FetchNumberFast<SingleValueNumericAttribute<FloatingPointAttributeTemplate<double> > > fastDouble(diversity_attr);
if (fastFloat.valid()) {
- diversify_3(range_in, posting, wanted_hits, fastFloat, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, fastFloat, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
} else if (fastDouble.valid()) {
- diversify_3(range_in, posting, wanted_hits, fastDouble, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, fastDouble, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
} else {
- diversify_3(range_in, posting, wanted_hits, FetchFloat(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+ diversify_3(range_in, posting, wanted_hits, FetchFloat(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments, filter);
}
}
}
@@ -214,14 +223,14 @@ template <typename DictItr, typename PostingStore, typename Result>
void diversify(bool forward, const DictItr &lower, const DictItr &upper, const PostingStore &posting, size_t wanted_hits,
const IAttributeVector &diversity_attr, size_t max_per_group,
size_t cutoff_max_groups, bool cutoff_strict,
- Result &array, std::vector<size_t> &fragments)
+ Result &array, std::vector<size_t> &fragments, const PreFilter * filter)
{
if (forward) {
diversify_2(ForwardRange<DictItr>(lower, upper), posting, wanted_hits,
- diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments);
+ diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments, filter);
} else {
diversify_2(ReverseRange<DictItr>(lower, upper), posting, wanted_hits,
- diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments);
+ diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments, filter);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp
index d022e806b91..855ea77684e 100644
--- a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.cpp
@@ -9,15 +9,14 @@ PostingListMerger<DataT>::PostingListMerger(uint32_t docIdLimit)
: _array(),
_startPos(),
_bitVector(),
+ _preFilter(nullptr),
_docIdLimit(docIdLimit),
_arrayValid(false)
{
}
template <typename DataT>
-PostingListMerger<DataT>::~PostingListMerger()
-{
-}
+PostingListMerger<DataT>::~PostingListMerger() = default;
template <typename DataT>
void
diff --git a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h
index 8568661dfdd..b2fb1126b32 100644
--- a/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h
+++ b/searchlib/src/vespa/searchlib/attribute/posting_list_merger.h
@@ -4,6 +4,7 @@
#include <vespa/searchlib/btree/btree_key_data.h>
#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/common/prefilter.h>
#include <vespa/vespalib/util/arrayref.h>
namespace search::attribute {
@@ -19,11 +20,12 @@ class PostingListMerger
using PostingVector = std::vector<Posting>;
using StartVector = std::vector<size_t>;
- PostingVector _array;
- StartVector _startPos;
+ PostingVector _array;
+ StartVector _startPos;
std::shared_ptr<BitVector> _bitVector;
- uint32_t _docIdLimit;
- bool _arrayValid;
+ const search::PreFilter *_preFilter;
+ uint32_t _docIdLimit;
+ bool _arrayValid;
PostingVector &merge(PostingVector &v, PostingVector &temp, const StartVector &startPos) __attribute__((noinline));
public:
@@ -31,6 +33,7 @@ public:
~PostingListMerger();
+ void setPreFilter(const search::PreFilter *filter) { _preFilter = filter; }
void reserveArray(uint32_t postingsCount, size_t postingsSize);
void allocBitVector();
void merge();
@@ -46,8 +49,13 @@ public:
void addToArray(const PostingListType & postingList)
{
PostingVector &array = _array;
- postingList.foreach([&array](uint32_t key, const DataT &data)
- { array.emplace_back(key, data); });
+ if (_preFilter) {
+ postingList.foreach([&array, filter=_preFilter](uint32_t key, const DataT &data)
+ { if (filter->keep(key)) { array.emplace_back(key, data); }} );
+ } else {
+ postingList.foreach([&array](uint32_t key, const DataT &data)
+ { array.emplace_back(key, data); });
+ }
if (_startPos.back() < array.size()) {
_startPos.push_back(array.size());
}
@@ -58,8 +66,13 @@ public:
{
BitVector &bv = *_bitVector;
uint32_t limit = _docIdLimit;
- postingList.foreach_key([&bv, limit](uint32_t key)
- { if (__builtin_expect(key < limit, true)) { bv.setBit(key); } });
+ if (_preFilter) {
+ postingList.foreach_key([&bv, limit, filter=_preFilter](uint32_t key)
+ { if (filter->keep(key) && __builtin_expect(key < limit, true)) { bv.setBit(key); } });
+ } else {
+ postingList.foreach_key([&bv, limit](uint32_t key)
+ { if (__builtin_expect(key < limit, true)) { bv.setBit(key); } });
+ }
}
// Until diversity handling has been rewritten
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
index f9a638a6090..c71470fc98c 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp
@@ -6,11 +6,7 @@
#include "diversity.hpp"
#include <vespa/searchlib/btree/btreeiterator.hpp>
-
-
-namespace search {
-
-namespace attribute {
+namespace search::attribute {
using btree::BTreeNode;
@@ -85,7 +81,4 @@ template class PostingListSearchContextT<int32_t>;
template class PostingListFoldedSearchContextT<btree::BTreeNoLeafData>;
template class PostingListFoldedSearchContextT<int32_t>;
-
-} // namespace attribute
-
-} // namespace search
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index f8349b6ad36..f47e3217743 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -121,7 +121,8 @@ protected:
void fetchPostings(bool strict, const PreFilter * filter) override;
// this will be called instead of the fetchPostings function in some cases
- void diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr,
+ void diversify(bool forward, size_t wanted_hits, const search::PreFilter * filter,
+ const IAttributeVector &diversity_attr,
size_t max_per_group, size_t cutoff_groups, bool cutoff_strict);
std::unique_ptr<queryeval::SearchIterator>
@@ -228,7 +229,7 @@ private:
if (params().diversityAttribute() != nullptr) {
bool forward = (this->getRangeLimit() > 0);
size_t wanted_hits = std::abs(this->getRangeLimit());
- PostingListSearchContextT<DataT>::diversify(forward, wanted_hits,
+ PostingListSearchContextT<DataT>::diversify(forward, wanted_hits, filter,
*(params().diversityAttribute()), this->getMaxPerGroup(),
params().diversityCutoffGroups(), params().diversityCutoffStrict());
} else {
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index 92dda8174fc..3d7ece0211b 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -121,6 +121,7 @@ PostingListSearchContextT<DataT>::fetchPostings(bool strict, const PreFilter * f
}
if (strict && !fallbackToFiltering()) {
size_t sum(countHits());
+ _merger.setPreFilter(filter);
if (sum < _docIdLimit / 64) {
_merger.reserveArray(_uniqueValues, sum);
fillArray();
@@ -135,14 +136,16 @@ PostingListSearchContextT<DataT>::fetchPostings(bool strict, const PreFilter * f
template <typename DataT>
void
-PostingListSearchContextT<DataT>::diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr,
+PostingListSearchContextT<DataT>::diversify(bool forward, size_t wanted_hits, const search::PreFilter * filter,
+ const IAttributeVector &diversity_attr,
size_t max_per_group, size_t cutoff_groups, bool cutoff_strict)
{
assert(!_fetchPostingsDone);
_fetchPostingsDone = true;
_merger.reserveArray(128, wanted_hits);
diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits, diversity_attr,
- max_per_group, cutoff_groups, cutoff_strict, _merger.getWritableArray(), _merger.getWritableStartPos());
+ max_per_group, cutoff_groups, cutoff_strict,
+ _merger.getWritableArray(), _merger.getWritableStartPos(), filter);
_merger.merge();
}
diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
index 741121aebab..849359dece9 100644
--- a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
+++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp
@@ -7,8 +7,7 @@
#include "btreenode.hpp"
#include <vespa/vespalib/stllike/asciistream.h>
-namespace search {
-namespace btree {
+namespace search::btree {
#define STRICT_BTREE_ITERATOR_SEEK
@@ -1375,6 +1374,4 @@ BTreeIterator<KeyT, DataT, AggrT, CompareT, TraitsT>::adjustGivenEntriesToRightL
}
}
-} // namespace search::btree
-} // namespace search
-
+}