summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-06-19 09:30:13 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-06-19 13:06:30 +0000
commitd995dda4234ed90e4c9c46688c54c6be409e46bb (patch)
treed38657ce96b6e21fdb8256df365b19d0f3db49df /searchlib
parent07d9b0abf288a3926dbb309b6db9c23b32fa30e7 (diff)
Create interface for a memory field index.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp112
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp80
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h38
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h26
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/i_field_index.h47
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h6
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp67
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h3
-rw-r--r--searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h33
-rw-r--r--searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h2
10 files changed, 250 insertions, 164 deletions
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index 0f1c966ad5d..15d97d314a1 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -197,6 +197,25 @@ assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr)
return assertPostingList(ss.str(), itr);
}
+FieldIndex::PostingList::Iterator
+find_in_field_index(const vespalib::stringref word,
+ uint32_t fieldId,
+ const FieldIndexCollection& fic)
+{
+ auto* field_index = dynamic_cast<FieldIndex*>(fic.getFieldIndex(fieldId));
+ assert(field_index != nullptr);
+ return field_index->find(word);
+}
+
+FieldIndex::PostingList::ConstIterator
+find_frozen_in_field_index(const vespalib::stringref word,
+ uint32_t fieldId,
+ const FieldIndexCollection& fic)
+{
+ auto* field_index = dynamic_cast<FieldIndex*>(fic.getFieldIndex(fieldId));
+ assert(field_index != nullptr);
+ return field_index->findFrozen(word);
+}
namespace {
@@ -332,7 +351,7 @@ public:
bool assertPosting(const vespalib::string &word,
uint32_t fieldId) {
std::vector<uint32_t> exp = _mock.find(word, fieldId);
- PostingConstItr itr = _fieldIndexes.find(word, fieldId);
+ PostingConstItr itr = find_in_field_index(word, fieldId, _fieldIndexes);
bool result = assertPostingList(exp, itr);
EXPECT_TRUE(result);
return result;
@@ -487,6 +506,8 @@ make_multi_field_schema()
return result;
}
+
+
struct FieldIndexCollectionTest : public ::testing::Test {
Schema schema;
FieldIndexCollection fic;
@@ -496,6 +517,11 @@ struct FieldIndexCollectionTest : public ::testing::Test {
{
}
~FieldIndexCollectionTest() {}
+
+ FieldIndex::PostingList::Iterator find(const vespalib::stringref word,
+ uint32_t fieldId) const {
+ return find_in_field_index(word, fieldId, fic);
+ }
};
TEST_F(FieldIndexTest, require_that_fresh_insert_works)
@@ -529,12 +555,12 @@ TEST_F(FieldIndexCollectionTest, require_that_multiple_posting_lists_across_mult
WrapInserter(fic, 0).word("a").add(10).word("b").add(11).add(15).flush();
WrapInserter(fic, 1).word("a").add(5).word("b").add(12).flush();
EXPECT_EQ(4u, fic.getNumUniqueWords());
- EXPECT_TRUE(assertPostingList("[10]", fic.find("a", 0)));
- EXPECT_TRUE(assertPostingList("[5]", fic.find("a", 1)));
- EXPECT_TRUE(assertPostingList("[11,15]", fic.find("b", 0)));
- EXPECT_TRUE(assertPostingList("[12]", fic.find("b", 1)));
- EXPECT_TRUE(assertPostingList("[]", fic.find("a", 2)));
- EXPECT_TRUE(assertPostingList("[]", fic.find("c", 0)));
+ EXPECT_TRUE(assertPostingList("[10]", find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[5]", find("a", 1)));
+ EXPECT_TRUE(assertPostingList("[11,15]", find("b", 0)));
+ EXPECT_TRUE(assertPostingList("[12]", find("b", 1)));
+ EXPECT_TRUE(assertPostingList("[]", find("a", 2)));
+ EXPECT_TRUE(assertPostingList("[]", find("c", 0)));
}
TEST_F(FieldIndexTest, require_that_remove_works)
@@ -622,16 +648,16 @@ TEST_F(FieldIndexCollectionTest, require_that_features_are_in_posting_lists)
{
WrapInserter(fic, 0).word("a").add(1, getFeatures(4, 2)).flush();
EXPECT_TRUE(assertPostingList("[1{4:0,1}]",
- fic.find("a", 0),
+ find("a", 0),
featureStorePtr(fic, 0)));
WrapInserter(fic, 0).word("b").add(2, getFeatures(5, 1)).
add(3, getFeatures(6, 2)).flush();
EXPECT_TRUE(assertPostingList("[2{5:0},3{6:0,1}]",
- fic.find("b", 0),
+ find("b", 0),
featureStorePtr(fic, 0)));
WrapInserter(fic, 1).word("c").add(4, getFeatures(7, 2)).flush();
EXPECT_TRUE(assertPostingList("[4{7:0,1}]",
- fic.find("c", 1),
+ find("c", 1),
featureStorePtr(fic, 1)));
}
@@ -764,6 +790,12 @@ public:
_inv(_schema, _invertThreads, _pushThreads, _fic)
{
}
+ PostingList::Iterator find(const vespalib::stringref word, uint32_t fieldId) const {
+ return find_in_field_index(word, fieldId, _fic);
+ }
+ PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t fieldId) const {
+ return find_frozen_in_field_index(word, fieldId, _fic);
+ }
};
class BasicInverterTest : public InverterTest {
@@ -922,12 +954,12 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working)
TermFieldMatchDataArray matchData;
matchData.add(&tfmd);
{
- PostingIterator itr(_fic.findFrozen("not", 0), featureStoreRef(_fic, 0), 0, matchData);
+ PostingIterator itr(findFrozen("not", 0), featureStoreRef(_fic, 0), 0, matchData);
itr.initFullRange();
EXPECT_TRUE(itr.isAtEnd());
}
{
- PostingIterator itr(_fic.findFrozen("a", 0), featureStoreRef(_fic, 0), 0, matchData);
+ PostingIterator itr(findFrozen("a", 0), featureStoreRef(_fic, 0), 0, matchData);
itr.initFullRange();
EXPECT_EQ(10u, itr.getDocId());
itr.unpack(10);
@@ -944,19 +976,19 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working)
EXPECT_TRUE(itr.isAtEnd());
}
{
- PostingIterator itr(_fic.findFrozen("x", 0), featureStoreRef(_fic, 0), 0, matchData);
+ PostingIterator itr(findFrozen("x", 0), featureStoreRef(_fic, 0), 0, matchData);
itr.initFullRange();
EXPECT_TRUE(itr.isAtEnd());
}
{
- PostingIterator itr(_fic.findFrozen("x", 1), featureStoreRef(_fic, 1), 1, matchData);
+ PostingIterator itr(findFrozen("x", 1), featureStoreRef(_fic, 1), 1, matchData);
itr.initFullRange();
EXPECT_EQ(30u, itr.getDocId());
itr.unpack(30);
EXPECT_EQ("{6:2[e=0,w=1,l=6]}", toString(tfmd.getIterator(), true, true));
}
{
- PostingIterator itr(_fic.findFrozen("x", 2), featureStoreRef(_fic, 2), 2, matchData);
+ PostingIterator itr(findFrozen("x", 2), featureStoreRef(_fic, 2), 2, matchData);
itr.initFullRange();
EXPECT_EQ(30u, itr.getDocId());
itr.unpack(30);
@@ -964,7 +996,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working)
EXPECT_EQ("{2:1[e=0,w=1,l=2]}", toString(tfmd.getIterator(), true, true));
}
{
- PostingIterator itr(_fic.findFrozen("x", 3), featureStoreRef(_fic, 3), 3, matchData);
+ PostingIterator itr(findFrozen("x", 3), featureStoreRef(_fic, 3), 3, matchData);
itr.initFullRange();
EXPECT_EQ(30u, itr.getDocId());
itr.unpack(30);
@@ -994,20 +1026,20 @@ TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remo
myPushDocument(_inv);
_pushThreads.sync();
- EXPECT_TRUE(assertPostingList("[1]", _fic.find("a", 0)));
- EXPECT_TRUE(assertPostingList("[1,2]", _fic.find("b", 0)));
- EXPECT_TRUE(assertPostingList("[2]", _fic.find("c", 0)));
- EXPECT_TRUE(assertPostingList("[1]", _fic.find("a", 1)));
- EXPECT_TRUE(assertPostingList("[1]", _fic.find("c", 1)));
+ EXPECT_TRUE(assertPostingList("[1]", find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[1,2]", find("b", 0)));
+ EXPECT_TRUE(assertPostingList("[2]", find("c", 0)));
+ EXPECT_TRUE(assertPostingList("[1]", find("a", 1)));
+ EXPECT_TRUE(assertPostingList("[1]", find("c", 1)));
myremove(1, _inv, _invertThreads);
_pushThreads.sync();
- EXPECT_TRUE(assertPostingList("[]", _fic.find("a", 0)));
- EXPECT_TRUE(assertPostingList("[2]", _fic.find("b", 0)));
- EXPECT_TRUE(assertPostingList("[2]", _fic.find("c", 0)));
- EXPECT_TRUE(assertPostingList("[]", _fic.find("a", 1)));
- EXPECT_TRUE(assertPostingList("[]", _fic.find("c", 1)));
+ EXPECT_TRUE(assertPostingList("[]", find("a", 0)));
+ EXPECT_TRUE(assertPostingList("[2]", find("b", 0)));
+ EXPECT_TRUE(assertPostingList("[2]", find("c", 0)));
+ EXPECT_TRUE(assertPostingList("[]", find("a", 1)));
+ EXPECT_TRUE(assertPostingList("[]", find("c", 1)));
}
Schema
@@ -1161,7 +1193,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
matchData.add(&tfmd);
{
uint32_t fieldId = _schema.getIndexFieldId("iu");
- PostingIterator itr(_fic.findFrozen("not", fieldId),
+ PostingIterator itr(findFrozen("not", fieldId),
featureStoreRef(_fic, fieldId),
fieldId, matchData);
itr.initFullRange();
@@ -1169,7 +1201,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
}
{
uint32_t fieldId = _schema.getIndexFieldId("iu");
- PostingIterator itr(_fic.findFrozen("example", fieldId),
+ PostingIterator itr(findFrozen("example", fieldId),
featureStoreRef(_fic, fieldId),
fieldId, matchData);
itr.initFullRange();
@@ -1181,7 +1213,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
}
{
uint32_t fieldId = _schema.getIndexFieldId("iau");
- PostingIterator itr(_fic.findFrozen("example", fieldId),
+ PostingIterator itr(findFrozen("example", fieldId),
featureStoreRef(_fic, fieldId),
fieldId, matchData);
itr.initFullRange();
@@ -1194,7 +1226,7 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
}
{
uint32_t fieldId = _schema.getIndexFieldId("iwu");
- PostingIterator itr(_fic.findFrozen("example", fieldId),
+ PostingIterator itr(findFrozen("example", fieldId),
featureStoreRef(_fic, fieldId),
fieldId, matchData);
itr.initFullRange();
@@ -1247,16 +1279,16 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working)
matchData.add(&tfmd);
uint32_t fieldId = _schema.getIndexFieldId("f0");
{
- PostingIterator itr(_fic.findFrozen("not", fieldId),
+ PostingIterator itr(findFrozen("not", fieldId),
featureStoreRef(_fic, fieldId),
fieldId, matchData);
itr.initFullRange();
EXPECT_TRUE(itr.isAtEnd());
}
{
- PostingIterator itr(_fic.findFrozen("我就"
- "是那个",
- fieldId),
+ PostingIterator itr(findFrozen("我就"
+ "是那个",
+ fieldId),
featureStoreRef(_fic, fieldId),
fieldId, matchData);
itr.initFullRange();
@@ -1267,9 +1299,9 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working)
EXPECT_TRUE(itr.isAtEnd());
}
{
- PostingIterator itr(_fic.findFrozen("大灰"
- "狼",
- fieldId),
+ PostingIterator itr(findFrozen("大灰"
+ "狼",
+ fieldId),
featureStoreRef(_fic, fieldId),
fieldId, matchData);
itr.initFullRange();
@@ -1315,9 +1347,9 @@ struct RemoverTest : public FieldIndexCollectionTest {
void assertPostingLists(const vespalib::string &e1,
const vespalib::string &e2,
const vespalib::string &e3) {
- EXPECT_TRUE(assertPostingList(e1, fic.find("a", 1)));
- EXPECT_TRUE(assertPostingList(e2, fic.find("a", 2)));
- EXPECT_TRUE(assertPostingList(e3, fic.find("b", 1)));
+ EXPECT_TRUE(assertPostingList(e1, find("a", 1)));
+ EXPECT_TRUE(assertPostingList(e2, find("a", 2)));
+ EXPECT_TRUE(assertPostingList(e3, find("b", 1)));
}
void remove(uint32_t docId) {
DocumentInverter inv(schema, _invertThreads, _pushThreads, fic);
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 7e40a4a2aa1..e492e3e7eee 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -2,21 +2,33 @@
#include "field_index.h"
#include "ordered_field_index_inserter.h"
-#include <vespa/vespalib/util/stringfmt.h>
-#include <vespa/vespalib/util/exceptions.h>
+#include "posting_iterator.h"
#include <vespa/searchlib/bitcompression/posocccompression.h>
+#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/btree/btree.hpp>
+#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreenode.hpp>
#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/vespalib/btree/btreenodestore.hpp>
-#include <vespa/vespalib/btree/btreestore.hpp>
-#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreeroot.hpp>
-#include <vespa/vespalib/btree/btree.hpp>
+#include <vespa/vespalib/btree/btreestore.hpp>
#include <vespa/vespalib/util/array.hpp>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.memoryindex.field_index");
+
+using search::fef::TermFieldMatchDataArray;
using search::index::DocIdAndFeatures;
-using search::index::WordDocElementFeatures;
using search::index::Schema;
+using search::index::WordDocElementFeatures;
+using search::queryeval::BooleanMatchIteratorWrapper;
+using search::queryeval::FieldSpecBase;
+using search::queryeval::SearchIterator;
+using search::queryeval::SimpleLeafBlueprint;
+using vespalib::GenerationHandler;
namespace search::memoryindex {
@@ -225,6 +237,62 @@ FieldIndex::getMemoryUsage() const
return usage;
}
+namespace {
+
+class MemoryTermBlueprint : public SimpleLeafBlueprint {
+private:
+ GenerationHandler::Guard _guard;
+ FieldIndex::PostingList::ConstIterator _posting_itr;
+ const FeatureStore& _feature_store;
+ const uint32_t _field_id;
+ const bool _use_bit_vector;
+
+public:
+ MemoryTermBlueprint(GenerationHandler::Guard&& guard,
+ FieldIndex::PostingList::ConstIterator posting_itr,
+ const FeatureStore& feature_store,
+ const FieldSpecBase& field,
+ uint32_t field_id,
+ bool use_bit_vector)
+ : SimpleLeafBlueprint(field),
+ _guard(),
+ _posting_itr(posting_itr),
+ _feature_store(feature_store),
+ _field_id(field_id),
+ _use_bit_vector(use_bit_vector)
+ {
+ _guard = std::move(guard);
+ HitEstimate estimate(_posting_itr.size(), !_posting_itr.valid());
+ setEstimate(estimate);
+ }
+
+ SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray& tfmda, bool) const override {
+ auto result = std::make_unique<PostingIterator>(_posting_itr, _feature_store, _field_id, tfmda);
+ if (_use_bit_vector) {
+ LOG(debug, "Return BooleanMatchIteratorWrapper: field_id(%u), doc_count(%zu)",
+ _field_id, _posting_itr.size());
+ return std::make_unique<BooleanMatchIteratorWrapper>(std::move(result), tfmda);
+ }
+ LOG(debug, "Return PostingIterator: field_id(%u), doc_count(%zu)",
+ _field_id, _posting_itr.size());
+ return result;
+ }
+};
+
+}
+
+std::unique_ptr<queryeval::SimpleLeafBlueprint>
+FieldIndex::make_term_blueprint(const vespalib::string& term,
+ const queryeval::FieldSpecBase& field,
+ uint32_t field_id)
+{
+ auto guard = takeGenerationGuard();
+ auto posting_itr = findFrozen(term);
+ bool use_bit_vector = field.isFilter();
+ return std::make_unique<MemoryTermBlueprint>(std::move(guard), posting_itr, getFeatureStore(),
+ field, field_id, use_bit_vector);
+}
+
}
namespace search::btree {
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 27ba6b26a37..9c97ebf3e85 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -4,8 +4,9 @@
#include "feature_store.h"
#include "field_index_remover.h"
-#include "word_store.h"
+#include "i_field_index.h"
#include "posting_list_entry.h"
+#include "word_store.h"
#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/searchlib/index/field_length_calculator.h>
#include <vespa/searchlib/index/indexbuilder.h>
@@ -18,10 +19,10 @@
namespace search::memoryindex {
-class OrderedFieldIndexInserter;
+class IOrderedFieldIndexInserter;
/**
- * Memory index for a single field using lock-free B-Trees in underlying components.
+ * Implementation of memory index for a single field using lock-free B-Trees in underlying components.
*
* It consists of the following components:
* - WordStore containing all unique words in this field (across all documents).
@@ -33,7 +34,7 @@ class OrderedFieldIndexInserter;
*
* Elements in the three stores are accessed using 32-bit references / handles.
*/
-class FieldIndex {
+class FieldIndex : public IFieldIndex {
public:
// Mapping from docid -> feature ref
using PostingListEntryType = PostingListEntry<false>;
@@ -93,7 +94,7 @@ private:
FeatureStore _featureStore;
uint32_t _fieldId;
FieldIndexRemover _remover;
- std::unique_ptr<OrderedFieldIndexInserter> _inserter;
+ std::unique_ptr<IOrderedFieldIndexInserter> _inserter;
index::FieldLengthCalculator _calculator;
public:
@@ -114,11 +115,11 @@ public:
PostingList::ConstIterator
findFrozen(const vespalib::stringref word) const;
- uint64_t getNumUniqueWords() const { return _numUniqueWords; }
- const FeatureStore & getFeatureStore() const { return _featureStore; }
- const WordStore &getWordStore() const { return _wordStore; }
- OrderedFieldIndexInserter &getInserter() const { return *_inserter; }
- index::FieldLengthCalculator &get_calculator() { return _calculator; }
+ uint64_t getNumUniqueWords() const override { return _numUniqueWords; }
+ const FeatureStore & getFeatureStore() const override { return _featureStore; }
+ const WordStore &getWordStore() const override { return _wordStore; }
+ IOrderedFieldIndexInserter &getInserter() override { return *_inserter; }
+ index::FieldLengthCalculator &get_calculator() override { return _calculator; }
private:
void freeze() {
@@ -147,27 +148,30 @@ private:
}
public:
- GenerationHandler::Guard takeGenerationGuard() {
+ GenerationHandler::Guard takeGenerationGuard() override {
return _generationHandler.takeGuard();
}
- void
- compactFeatures();
+ void compactFeatures() override;
- void dump(search::index::IndexBuilder & indexBuilder);
+ void dump(search::index::IndexBuilder & indexBuilder) override;
- vespalib::MemoryUsage getMemoryUsage() const;
+ vespalib::MemoryUsage getMemoryUsage() const override;
DictionaryTree &getDictionaryTree() { return _dict; }
PostingListStore &getPostingListStore() { return _postingListStore; }
- FieldIndexRemover &getDocumentRemover() { return _remover; }
+ FieldIndexRemover &getDocumentRemover() override { return _remover; }
- void commit() {
+ void commit() override {
_remover.flush();
freeze();
transferHoldLists();
incGeneration();
trimHoldLists();
}
+
+ std::unique_ptr<queryeval::SimpleLeafBlueprint> make_term_blueprint(const vespalib::string& term,
+ const queryeval::FieldSpecBase& field,
+ uint32_t field_id) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
index 53f42658d0a..a737175d346 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
@@ -3,9 +3,14 @@
#pragma once
#include "i_field_index_collection.h"
-#include "field_index.h"
+#include "i_field_index.h"
+#include <memory>
+#include <vector>
-namespace search::index { class IFieldLengthInspector; }
+namespace search::index {
+ class IFieldLengthInspector;
+ class Schema;
+}
namespace search::memoryindex {
@@ -19,26 +24,15 @@ class FieldInverter;
* for a given word in a given field.
*/
class FieldIndexCollection : public IFieldIndexCollection {
-public:
- using PostingList = FieldIndex::PostingList;
-
private:
using GenerationHandler = vespalib::GenerationHandler;
- std::vector<std::unique_ptr<FieldIndex>> _fieldIndexes;
+ std::vector<std::unique_ptr<IFieldIndex>> _fieldIndexes;
uint32_t _numFields;
public:
FieldIndexCollection(const index::Schema& schema, const index::IFieldLengthInspector& inspector);
~FieldIndexCollection();
- PostingList::Iterator find(const vespalib::stringref word,
- uint32_t fieldId) const {
- return _fieldIndexes[fieldId]->find(word);
- }
-
- PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t fieldId) const {
- return _fieldIndexes[fieldId]->findFrozen(word);
- }
uint64_t getNumUniqueWords() const {
uint64_t numUniqueWords = 0;
@@ -52,11 +46,11 @@ public:
vespalib::MemoryUsage getMemoryUsage() const;
- FieldIndex *getFieldIndex(uint32_t fieldId) const {
+ IFieldIndex *getFieldIndex(uint32_t fieldId) const {
return _fieldIndexes[fieldId].get();
}
- const std::vector<std::unique_ptr<FieldIndex>> &getFieldIndexes() const { return _fieldIndexes; }
+ const std::vector<std::unique_ptr<IFieldIndex>> &getFieldIndexes() const { return _fieldIndexes; }
uint32_t getNumFields() const { return _numFields; }
diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h
new file mode 100644
index 00000000000..86082c08d36
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h
@@ -0,0 +1,47 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/vespalib/util/generationhandler.h>
+#include <vespa/vespalib/util/memoryusage.h>
+
+namespace search::index {
+class FieldLengthCalculator;
+class IndexBuilder;
+}
+
+namespace search::memoryindex {
+
+class FeatureStore;
+class FieldIndexRemover;
+class IOrderedFieldIndexInserter;
+class WordStore;
+
+/**
+ * Interface for a memory index for a single field as seen from the FieldIndexCollection.
+ */
+class IFieldIndex {
+public:
+ virtual ~IFieldIndex() {}
+
+ virtual uint64_t getNumUniqueWords() const = 0;
+ virtual vespalib::MemoryUsage getMemoryUsage() const = 0;
+ virtual const FeatureStore& getFeatureStore() const = 0;
+ virtual const WordStore& getWordStore() const = 0;
+ virtual IOrderedFieldIndexInserter& getInserter() = 0;
+ virtual FieldIndexRemover& getDocumentRemover() = 0;
+ virtual index::FieldLengthCalculator& get_calculator() = 0;
+ virtual void compactFeatures() = 0;
+ virtual void dump(search::index::IndexBuilder& indexBuilder) = 0;
+
+ virtual std::unique_ptr<queryeval::SimpleLeafBlueprint> make_term_blueprint(const vespalib::string& term,
+ const queryeval::FieldSpecBase& field,
+ uint32_t field_id) = 0;
+
+ // Should only be directly used by unit tests
+ virtual vespalib::GenerationHandler::Guard takeGenerationGuard() = 0;
+ virtual void commit() = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h
index cf10db3c4d8..4da0844da58 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/datastore/entryref.h>
#include <cstdint>
namespace search::index { class DocIdAndFeatures; }
@@ -30,6 +31,11 @@ public:
virtual void add(uint32_t docId, const index::DocIdAndFeatures &features) = 0;
/**
+ * Returns the reference to the current word (only used by unit tests).
+ */
+ virtual datastore::EntryRef getWordRef() const = 0;
+
+ /**
* Remove (word, docId) tuple.
*/
virtual void remove(uint32_t docId) = 0;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
index 6686745f8c2..d3d3004100c 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
@@ -3,16 +3,15 @@
#include "document_inverter.h"
#include "field_index_collection.h"
#include "memory_index.h"
-#include "posting_iterator.h"
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
#include <vespa/document/fieldvalue/document.h>
-#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+#include <vespa/searchlib/index/field_length_calculator.h>
#include <vespa/searchlib/index/schemautil.h>
-#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
#include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
#include <vespa/searchlib/queryeval/leaf_blueprints.h>
+#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.memoryindex.memory_index");
@@ -20,19 +19,17 @@ LOG_SETUP(".searchlib.memoryindex.memory_index");
using document::ArrayFieldValue;
using document::WeightedSetFieldValue;
using vespalib::LockGuard;
-using vespalib::GenerationHandler;
namespace search {
-using fef::TermFieldMatchDataArray;
using index::FieldLengthInfo;
using index::IFieldLengthInspector;
using index::IndexBuilder;
using index::Schema;
using index::SchemaUtil;
-using query::NumberTerm;
using query::LocationTerm;
using query::Node;
+using query::NumberTerm;
using query::PredicateQuery;
using query::PrefixTerm;
using query::RangeTerm;
@@ -40,16 +37,12 @@ using query::RegExpTerm;
using query::StringTerm;
using query::SubstringTerm;
using query::SuffixTerm;
-using queryeval::SearchIterator;
-using queryeval::Searchable;
-using queryeval::CreateBlueprintVisitorHelper;
using queryeval::Blueprint;
-using queryeval::BooleanMatchIteratorWrapper;
+using queryeval::CreateBlueprintVisitorHelper;
using queryeval::EmptyBlueprint;
-using queryeval::FieldSpecBase;
-using queryeval::FieldSpecBaseList;
using queryeval::FieldSpec;
using queryeval::IRequestContext;
+using queryeval::Searchable;
}
@@ -141,47 +134,6 @@ MemoryIndex::dump(IndexBuilder &indexBuilder)
namespace {
-class MemTermBlueprint : public queryeval::SimpleLeafBlueprint {
-private:
- GenerationHandler::Guard _genGuard;
- FieldIndex::PostingList::ConstIterator _pitr;
- const FeatureStore &_featureStore;
- const uint32_t _fieldId;
- const bool _useBitVector;
-
-public:
- MemTermBlueprint(GenerationHandler::Guard &&genGuard,
- FieldIndex::PostingList::ConstIterator pitr,
- const FeatureStore &featureStore,
- const FieldSpecBase &field,
- uint32_t fieldId,
- bool useBitVector)
- : SimpleLeafBlueprint(field),
- _genGuard(),
- _pitr(pitr),
- _featureStore(featureStore),
- _fieldId(fieldId),
- _useBitVector(useBitVector)
- {
- _genGuard = std::move(genGuard);
- HitEstimate estimate(_pitr.size(), !_pitr.valid());
- setEstimate(estimate);
- }
-
- SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const override {
- auto search = std::make_unique<PostingIterator>(_pitr, _featureStore, _fieldId, tfmda);
- if (_useBitVector) {
- LOG(debug, "Return BooleanMatchIteratorWrapper: fieldId(%u), docCount(%zu)",
- _fieldId, _pitr.size());
- return std::make_unique<BooleanMatchIteratorWrapper>(std::move(search), tfmda);
- }
- LOG(debug, "Return PostingIterator: fieldId(%u), docCount(%zu)",
- _fieldId, _pitr.size());
- return search;
- }
-
-};
-
/**
* Determines the correct Blueprint to use.
**/
@@ -207,13 +159,8 @@ public:
const vespalib::string termStr = queryeval::termAsString(n);
LOG(debug, "searching for '%s' in '%s'",
termStr.c_str(), _field.getName().c_str());
- FieldIndex *fieldIndex = _fieldIndexes.getFieldIndex(_fieldId);
- GenerationHandler::Guard genGuard = fieldIndex->takeGenerationGuard();
- FieldIndex::PostingList::ConstIterator pitr = fieldIndex->findFrozen(termStr);
- bool useBitVector = _field.isFilter();
- setResult(std::make_unique<MemTermBlueprint>(std::move(genGuard), pitr,
- fieldIndex->getFeatureStore(),
- _field, _fieldId, useBitVector));
+ IFieldIndex* fieldIndex = _fieldIndexes.getFieldIndex(_fieldId);
+ setResult(fieldIndex->make_term_blueprint(termStr, _field, _fieldId));
}
void visit(LocationTerm &n) override { visitTerm(n); }
diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h
index 9d6be2bcd94..529b1d6d6a7 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h
@@ -72,8 +72,7 @@ public:
*/
void rewind() override;
- // Used by unit test
- datastore::EntryRef getWordRef() const;
+ datastore::EntryRef getWordRef() const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h
index 9680da7af11..c0ea7be0ce1 100644
--- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h
+++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h
@@ -14,15 +14,14 @@ class OrderedFieldIndexInserter : public IOrderedFieldIndexInserter {
bool _show_interleaved_features;
uint32_t _fieldId;
- void
- addComma()
- {
+ void addComma() {
if (!_first) {
_ss << ",";
} else {
_first = false;
}
}
+
public:
OrderedFieldIndexInserter()
: _ss(),
@@ -33,23 +32,17 @@ public:
{
}
- virtual void
- setNextWord(const vespalib::stringref word) override
- {
+ virtual void setNextWord(const vespalib::stringref word) override {
addComma();
_ss << "w=" << word;
}
- void
- setFieldId(uint32_t fieldId)
- {
+ void setFieldId(uint32_t fieldId) {
_fieldId = fieldId;
}
- virtual void
- add(uint32_t docId,
- const index::DocIdAndFeatures &features) override
- {
+ virtual void add(uint32_t docId,
+ const index::DocIdAndFeatures &features) override {
(void) features;
addComma();
_ss << "a=" << docId;
@@ -85,9 +78,9 @@ public:
}
}
- virtual void
- remove(uint32_t docId) override
- {
+ virtual datastore::EntryRef getWordRef() const override { return datastore::EntryRef(); }
+
+ virtual void remove(uint32_t docId) override {
addComma();
_ss << "r=" << docId;
}
@@ -99,15 +92,11 @@ public:
_ss << "f=" << _fieldId;
}
- std::string
- toStr() const
- {
+ std::string toStr() const {
return _ss.str();
}
- void
- reset()
- {
+ void reset() {
_ss.str("");
_first = true;
_verbose = false;
diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h
index eeb09898aa2..647f624daea 100644
--- a/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h
+++ b/searchlib/src/vespa/searchlib/test/memoryindex/wrap_inserter.h
@@ -12,7 +12,7 @@ namespace search::memoryindex::test {
*/
class WrapInserter {
private:
- OrderedFieldIndexInserter& _inserter;
+ IOrderedFieldIndexInserter& _inserter;
public:
WrapInserter(FieldIndexCollection& field_indexes, uint32_t field_id)