summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-06-21 11:28:35 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-06-21 11:30:50 +0000
commitbad7460816a4f3e229be227b04c4cbaa27049ed0 (patch)
tree8049ac203e682c7a73d7fbb71e1b60929db2b267 /searchlib/src
parentb729f40270612be3074ecd1ae584a0b997a88b23 (diff)
Support unpacking of interleaved features in memory posting list iterators.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp379
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_base.h1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp41
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h3
8 files changed, 269 insertions, 167 deletions
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index 90e33714cac..ca02573bea2 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -40,9 +40,7 @@ using vespalib::GenerationHandler;
namespace memoryindex {
using test::WrapInserter;
-using FieldIndexType = FieldIndex<false>;
-using PostingList = FieldIndexType::PostingList;
-using PostingConstItr = PostingList::ConstIterator;
+using NormalFieldIndex = FieldIndex<false>;
class MyBuilder : public IndexBuilder {
private:
@@ -131,11 +129,21 @@ public:
}
};
+struct SimpleMatchData {
+ TermFieldMatchData term;
+ TermFieldMatchDataArray array;
+ SimpleMatchData() : term(), array() {
+ array.add(&term);
+ }
+ ~SimpleMatchData() {}
+};
+
std::string
-toString(FieldPositionsIterator posItr,
+toString(const SimpleMatchData& match_data,
bool hasElements = false,
bool hasWeights = false)
{
+ auto posItr = match_data.term.getIterator();
std::stringstream ss;
ss << "{";
ss << posItr.getFieldLength() << ":";
@@ -156,16 +164,15 @@ toString(FieldPositionsIterator posItr,
return ss.str();
}
+template <typename PostingIteratorType>
bool
assertPostingList(const std::string &exp,
- PostingConstItr itr,
+ PostingIteratorType itr,
const FeatureStore *store = nullptr)
{
std::stringstream ss;
FeatureStore::DecodeContextCooked decoder(nullptr);
- TermFieldMatchData tfmd;
- TermFieldMatchDataArray matchData;
- matchData.add(&tfmd);
+ SimpleMatchData match_data;
ss << "[";
for (size_t i = 0; itr.valid(); ++itr, ++i) {
if (i > 0) ss << ",";
@@ -175,8 +182,8 @@ assertPostingList(const std::string &exp,
EntryRef ref(itr.getData().get_features());
store->setupForField(0, decoder);
store->setupForUnpackFeatures(ref, decoder);
- decoder.unpackFeatures(matchData, docId);
- ss << toString(tfmd.getIterator());
+ decoder.unpackFeatures(match_data.array, docId);
+ ss << toString(match_data);
}
}
ss << "]";
@@ -185,8 +192,9 @@ assertPostingList(const std::string &exp,
return result;
}
+template <typename PostingIteratorType>
bool
-assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr)
+assertPostingList(std::vector<uint32_t> &exp, PostingIteratorType itr)
{
std::stringstream ss;
ss << "[";
@@ -198,21 +206,25 @@ assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr)
return assertPostingList(ss.str(), itr);
}
-FieldIndexType::PostingList::Iterator
+template <bool interleaved_features>
+typename FieldIndex<interleaved_features>::PostingList::Iterator
find_in_field_index(const vespalib::stringref word,
uint32_t field_id,
const FieldIndexCollection& fic)
{
+ using FieldIndexType = FieldIndex<interleaved_features>;
auto* field_index = dynamic_cast<FieldIndexType*>(fic.getFieldIndex(field_id));
assert(field_index != nullptr);
return field_index->find(word);
}
-FieldIndexType::PostingList::ConstIterator
+template <bool interleaved_features>
+typename FieldIndex<interleaved_features>::PostingList::ConstIterator
find_frozen_in_field_index(const vespalib::stringref word,
uint32_t field_id,
const FieldIndexCollection& fic)
{
+ using FieldIndexType = FieldIndex<interleaved_features>;
auto* field_index = dynamic_cast<FieldIndexType*>(fic.getFieldIndex(field_id));
assert(field_index != nullptr);
return field_index->findFrozen(word);
@@ -352,7 +364,7 @@ public:
bool assertPosting(const vespalib::string &word,
uint32_t fieldId) {
std::vector<uint32_t> exp = _mock.find(word, fieldId);
- PostingConstItr itr = find_in_field_index(word, fieldId, _fieldIndexes);
+ auto itr = find_in_field_index<false>(word, fieldId, _fieldIndexes);
bool result = assertPostingList(exp, itr);
EXPECT_TRUE(result);
return result;
@@ -410,7 +422,7 @@ public:
{
}
- MyDrainRemoves(FieldIndexType& field_index)
+ MyDrainRemoves(IFieldIndex& field_index)
: _remover(field_index.getDocumentRemover())
{
}
@@ -486,6 +498,7 @@ make_single_field_schema()
return result;
}
+template <typename FieldIndexType>
struct FieldIndexTest : public ::testing::Test {
Schema schema;
FieldIndexType idx;
@@ -494,12 +507,166 @@ struct FieldIndexTest : public ::testing::Test {
idx(schema, 0)
{
}
+ ~FieldIndexTest() {}
SearchIterator::UP search(const vespalib::stringref word,
- const TermFieldMatchDataArray& match_data) {
- return make_search_iterator<false>(idx.find(word), idx.getFeatureStore(), 0, match_data);
+ const SimpleMatchData& match_data) {
+ return make_search_iterator<FieldIndexType::has_interleaved_features>(idx.find(word), idx.getFeatureStore(), 0, match_data.array);
}
};
+using FieldIndexTestTypes = ::testing::Types<FieldIndex<false>, FieldIndex<true>>;
+TYPED_TEST_CASE(FieldIndexTest, FieldIndexTestTypes);
+
+// Disable warnings emitted by gtest generated files when using typed tests
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wsuggest-override"
+#endif
+
+TYPED_TEST(FieldIndexTest, require_that_fresh_insert_works)
+{
+ EXPECT_TRUE(assertPostingList("[]", this->idx.find("a")));
+ EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a")));
+ EXPECT_EQ(0u, this->idx.getNumUniqueWords());
+ WrapInserter(this->idx).word("a").add(10).flush();
+ EXPECT_TRUE(assertPostingList("[10]", this->idx.find("a")));
+ EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a")));
+ this->idx.commit();
+ EXPECT_TRUE(assertPostingList("[10]", this->idx.findFrozen("a")));
+ EXPECT_EQ(1u, this->idx.getNumUniqueWords());
+}
+
+TYPED_TEST(FieldIndexTest, require_that_append_insert_works)
+{
+ WrapInserter(this->idx).word("a").add(10).flush().rewind().
+ word("a").add(5).flush();
+ EXPECT_TRUE(assertPostingList("[5,10]", this->idx.find("a")));
+ EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a")));
+ WrapInserter(this->idx).rewind().word("a").add(20).flush();
+ EXPECT_TRUE(assertPostingList("[5,10,20]", this->idx.find("a")));
+ EXPECT_TRUE(assertPostingList("[]", this->idx.findFrozen("a")));
+ this->idx.commit();
+ EXPECT_TRUE(assertPostingList("[5,10,20]", this->idx.findFrozen("a")));
+}
+
+TYPED_TEST(FieldIndexTest, require_that_remove_works)
+{
+ WrapInserter(this->idx).word("a").remove(10).flush();
+ EXPECT_TRUE(assertPostingList("[]", this->idx.find("a")));
+ WrapInserter(this->idx).add(10).add(20).add(30).flush();
+ EXPECT_TRUE(assertPostingList("[10,20,30]", this->idx.find("a")));
+ WrapInserter(this->idx).rewind().word("a").remove(10).flush();
+ EXPECT_TRUE(assertPostingList("[20,30]", this->idx.find("a")));
+ WrapInserter(this->idx).remove(20).flush();
+ EXPECT_TRUE(assertPostingList("[30]", this->idx.find("a")));
+ WrapInserter(this->idx).remove(30).flush();
+ EXPECT_TRUE(assertPostingList("[]", this->idx.find("a")));
+ EXPECT_EQ(1u, this->idx.getNumUniqueWords());
+ MyDrainRemoves(this->idx).drain(10);
+ WrapInserter(this->idx).rewind().word("a").add(10).flush();
+ EXPECT_TRUE(assertPostingList("[10]", this->idx.find("a")));
+}
+
+void
+addElement(DocIdAndFeatures &f,
+ uint32_t elemLen,
+ uint32_t numOccs,
+ int32_t weight = 1)
+{
+ f.elements().emplace_back(f.elements().size(), weight, elemLen);
+ f.elements().back().setNumOccs(numOccs);
+ for (uint32_t i = 0; i < numOccs; ++i) {
+ f.word_positions().emplace_back(i);
+ }
+}
+
+DocIdAndFeatures
+getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1)
+{
+ DocIdAndFeatures f;
+ addElement(f, elemLen, numOccs, weight);
+ f.set_num_occs(numOccs);
+ f.set_field_length(elemLen);
+ return f;
+}
+
+TYPED_TEST(FieldIndexTest, require_that_posting_iterator_is_working)
+{
+ WrapInserter(this->idx).word("a").add(10, getFeatures(4, 1)).
+ add(20, getFeatures(5, 2)).
+ add(30, getFeatures(6, 1)).
+ add(40, getFeatures(7, 2)).flush();
+ SimpleMatchData match_data;
+ {
+ auto itr = this->search("not", match_data);
+ itr->initFullRange();
+ EXPECT_TRUE(itr->isAtEnd());
+ }
+ {
+ auto itr = this->search("a", match_data);
+ itr->initFullRange();
+ EXPECT_EQ(10u, itr->getDocId());
+ itr->unpack(10);
+ EXPECT_EQ("{4:0}", toString(match_data));
+ EXPECT_TRUE(!itr->seek(25));
+ EXPECT_EQ(30u, itr->getDocId());
+ itr->unpack(30);
+ EXPECT_EQ("{6:0}", toString(match_data));
+ EXPECT_TRUE(itr->seek(40));
+ EXPECT_EQ(40u, itr->getDocId());
+ itr->unpack(40);
+ EXPECT_EQ("{7:0,1}", toString(match_data));
+ EXPECT_TRUE(!itr->seek(41));
+ EXPECT_TRUE(itr->isAtEnd());
+ }
+}
+
+#pragma GCC diagnostic pop
+
+struct FieldIndexInterleavedFeaturesTest : public FieldIndexTest<FieldIndex<true>> {
+ SimpleMatchData match_data;
+ FieldIndexInterleavedFeaturesTest()
+ : FieldIndexTest<FieldIndex<true>>()
+ {
+ WrapInserter(idx).word("a").add(10, getFeatures(5, 2)).flush();
+ }
+ void
+ expect_features_unpacked(const std::string& exp_field_positions,
+ uint32_t exp_num_occs,
+ uint32_t exp_field_length) {
+ auto itr = search("a", match_data);
+ itr->initFullRange();
+ EXPECT_EQ(10u, itr->getDocId());
+ itr->unpack(10);
+ EXPECT_EQ(exp_field_positions, toString(match_data));
+ EXPECT_EQ(exp_num_occs, match_data.term.getNumOccs());
+ EXPECT_EQ(exp_field_length, match_data.term.getFieldLength());
+ EXPECT_TRUE(!itr->seek(11));
+ EXPECT_TRUE(itr->isAtEnd());
+ }
+};
+
+TEST_F(FieldIndexInterleavedFeaturesTest, only_normal_features_are_unpacked)
+{
+ match_data.term.setNeedNormalFeatures(true);
+ match_data.term.setNeedInterleavedFeatures(false);
+ expect_features_unpacked("{5:0,1}", 0, 0);
+}
+
+TEST_F(FieldIndexInterleavedFeaturesTest, only_interleaved_features_are_unpacked)
+{
+ match_data.term.setNeedNormalFeatures(false);
+ match_data.term.setNeedInterleavedFeatures(true);
+ expect_features_unpacked("{1000000:}", 2, 5);
+}
+
+TEST_F(FieldIndexInterleavedFeaturesTest, both_normal_and_interleaved_features_are_unpacked)
+{
+ match_data.term.setNeedNormalFeatures(true);
+ match_data.term.setNeedInterleavedFeatures(true);
+ expect_features_unpacked("{5:0,1}", 2, 5);
+}
+
Schema
make_multi_field_schema()
{
@@ -511,8 +678,6 @@ make_multi_field_schema()
return result;
}
-
-
struct FieldIndexCollectionTest : public ::testing::Test {
Schema schema;
FieldIndexCollection fic;
@@ -523,38 +688,12 @@ struct FieldIndexCollectionTest : public ::testing::Test {
}
~FieldIndexCollectionTest() {}
- FieldIndexType::PostingList::Iterator find(const vespalib::stringref word,
- uint32_t fieldId) const {
- return find_in_field_index(word, fieldId, fic);
+ NormalFieldIndex::PostingList::Iterator find(const vespalib::stringref word,
+ uint32_t field_id) const {
+ return find_in_field_index<false>(word, field_id, fic);
}
};
-TEST_F(FieldIndexTest, require_that_fresh_insert_works)
-{
- EXPECT_TRUE(assertPostingList("[]", idx.find("a")));
- EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a")));
- EXPECT_EQ(0u, idx.getNumUniqueWords());
- WrapInserter(idx).word("a").add(10).flush();
- EXPECT_TRUE(assertPostingList("[10]", idx.find("a")));
- EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a")));
- idx.commit();
- EXPECT_TRUE(assertPostingList("[10]", idx.findFrozen("a")));
- EXPECT_EQ(1u, idx.getNumUniqueWords());
-}
-
-TEST_F(FieldIndexTest, require_that_append_insert_works)
-{
- WrapInserter(idx).word("a").add(10).flush().rewind().
- word("a").add(5).flush();
- EXPECT_TRUE(assertPostingList("[5,10]", idx.find("a")));
- EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a")));
- WrapInserter(idx).rewind().word("a").add(20).flush();
- EXPECT_TRUE(assertPostingList("[5,10,20]", idx.find("a")));
- EXPECT_TRUE(assertPostingList("[]", idx.findFrozen("a")));
- idx.commit();
- EXPECT_TRUE(assertPostingList("[5,10,20]", idx.findFrozen("a")));
-}
-
TEST_F(FieldIndexCollectionTest, require_that_multiple_posting_lists_across_multiple_fields_can_exist)
{
WrapInserter(fic, 0).word("a").add(10).word("b").add(11).add(15).flush();
@@ -568,24 +707,6 @@ TEST_F(FieldIndexCollectionTest, require_that_multiple_posting_lists_across_mult
EXPECT_TRUE(assertPostingList("[]", find("c", 0)));
}
-TEST_F(FieldIndexTest, require_that_remove_works)
-{
- WrapInserter(idx).word("a").remove(10).flush();
- EXPECT_TRUE(assertPostingList("[]", idx.find("a")));
- WrapInserter(idx).add(10).add(20).add(30).flush();
- EXPECT_TRUE(assertPostingList("[10,20,30]", idx.find("a")));
- WrapInserter(idx).rewind().word("a").remove(10).flush();
- EXPECT_TRUE(assertPostingList("[20,30]", idx.find("a")));
- WrapInserter(idx).remove(20).flush();
- EXPECT_TRUE(assertPostingList("[30]", idx.find("a")));
- WrapInserter(idx).remove(30).flush();
- EXPECT_TRUE(assertPostingList("[]", idx.find("a")));
- EXPECT_EQ(1u, idx.getNumUniqueWords());
- MyDrainRemoves(idx).drain(10);
- WrapInserter(idx).rewind().word("a").add(10).flush();
- EXPECT_TRUE(assertPostingList("[10]", idx.find("a")));
-}
-
TEST_F(FieldIndexCollectionTest, require_that_multiple_insert_and_remove_works)
{
MyInserter inserter(schema);
@@ -628,27 +749,6 @@ TEST_F(FieldIndexCollectionTest, require_that_multiple_insert_and_remove_works)
EXPECT_TRUE(inserter.assertPostings());
}
-void
-addElement(DocIdAndFeatures &f,
- uint32_t elemLen,
- uint32_t numOccs,
- int32_t weight = 1)
-{
- f.elements().emplace_back(f.elements().size(), weight, elemLen);
- f.elements().back().setNumOccs(numOccs);
- for (uint32_t i = 0; i < numOccs; ++i) {
- f.word_positions().emplace_back(i);
- }
-}
-
-DocIdAndFeatures
-getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1)
-{
- DocIdAndFeatures f;
- addElement(f, elemLen, numOccs, weight);
- return f;
-}
-
TEST_F(FieldIndexCollectionTest, require_that_features_are_in_posting_lists)
{
WrapInserter(fic, 0).word("a").add(1, getFeatures(4, 2)).flush();
@@ -666,39 +766,6 @@ TEST_F(FieldIndexCollectionTest, require_that_features_are_in_posting_lists)
featureStorePtr(fic, 1)));
}
-TEST_F(FieldIndexTest, require_that_posting_iterator_is_working)
-{
- WrapInserter(idx).word("a").add(10, getFeatures(4, 1)).
- add(20, getFeatures(5, 2)).
- add(30, getFeatures(6, 1)).
- add(40, getFeatures(7, 2)).flush();
- TermFieldMatchData tfmd;
- TermFieldMatchDataArray matchData;
- matchData.add(&tfmd);
- {
- auto itr = search("not", matchData);
- itr->initFullRange();
- EXPECT_TRUE(itr->isAtEnd());
- }
- {
- auto itr = search("a", matchData);
- itr->initFullRange();
- EXPECT_EQ(10u, itr->getDocId());
- itr->unpack(10);
- EXPECT_EQ("{4:0}", toString(tfmd.getIterator()));
- EXPECT_TRUE(!itr->seek(25));
- EXPECT_EQ(30u, itr->getDocId());
- itr->unpack(30);
- EXPECT_EQ("{6:0}", toString(tfmd.getIterator()));
- EXPECT_TRUE(itr->seek(40));
- EXPECT_EQ(40u, itr->getDocId());
- itr->unpack(40);
- EXPECT_EQ("{7:0,1}", toString(tfmd.getIterator()));
- EXPECT_TRUE(!itr->seek(41));
- EXPECT_TRUE(itr->isAtEnd());
- }
-}
-
TEST_F(FieldIndexCollectionTest, require_that_basic_dumping_to_index_builder_is_working)
{
MyBuilder b(schema);
@@ -791,16 +858,16 @@ public:
_inv(_schema, _invertThreads, _pushThreads, _fic)
{
}
- PostingList::Iterator find(const vespalib::stringref word, uint32_t field_id) const {
- return find_in_field_index(word, field_id, _fic);
+ NormalFieldIndex::PostingList::Iterator find(const vespalib::stringref word, uint32_t field_id) const {
+ return find_in_field_index<false>(word, field_id, _fic);
}
- PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t field_id) const {
- return find_frozen_in_field_index(word, field_id, _fic);
+ NormalFieldIndex::PostingList::ConstIterator findFrozen(const vespalib::stringref word, uint32_t field_id) const {
+ return find_frozen_in_field_index<false>(word, field_id, _fic);
}
SearchIterator::UP search(const vespalib::stringref word, uint32_t field_id,
- const TermFieldMatchDataArray& match_data) {
+ const SimpleMatchData& match_data) {
return make_search_iterator<false>(findFrozen(word, field_id), featureStoreRef(_fic, field_id),
- field_id, match_data);
+ field_id, match_data.array);
}
};
@@ -956,58 +1023,56 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working)
afterStats._activeBuffers,
afterStats._holdBuffers);
- TermFieldMatchData tfmd;
- TermFieldMatchDataArray matchData;
- matchData.add(&tfmd);
+ SimpleMatchData match_data;
{
- auto itr = search("not", 0, matchData);
+ auto itr = search("not", 0, match_data);
itr->initFullRange();
EXPECT_TRUE(itr->isAtEnd());
}
{
- auto itr = search("a", 0, matchData);
+ auto itr = search("a", 0, match_data);
itr->initFullRange();
EXPECT_EQ(10u, itr->getDocId());
itr->unpack(10);
- EXPECT_EQ("{4:0}", toString(tfmd.getIterator()));
+ EXPECT_EQ("{4:0}", toString(match_data));
EXPECT_TRUE(!itr->seek(25));
EXPECT_EQ(30u, itr->getDocId());
itr->unpack(30);
- EXPECT_EQ("{6:0}", toString(tfmd.getIterator()));
+ EXPECT_EQ("{6:0}", toString(match_data));
EXPECT_TRUE(itr->seek(40));
EXPECT_EQ(40u, itr->getDocId());
itr->unpack(40);
- EXPECT_EQ("{7:0,1,4}", toString(tfmd.getIterator()));
+ EXPECT_EQ("{7:0,1,4}", toString(match_data));
EXPECT_TRUE(!itr->seek(41));
EXPECT_TRUE(itr->isAtEnd());
}
{
- auto itr = search("x", 0, matchData);
+ auto itr = search("x", 0, match_data);
itr->initFullRange();
EXPECT_TRUE(itr->isAtEnd());
}
{
- auto itr = search("x", 1, matchData);
+ auto itr = search("x", 1, match_data);
itr->initFullRange();
EXPECT_EQ(30u, itr->getDocId());
itr->unpack(30);
- EXPECT_EQ("{6:2[e=0,w=1,l=6]}", toString(tfmd.getIterator(), true, true));
+ EXPECT_EQ("{6:2[e=0,w=1,l=6]}", toString(match_data, true, true));
}
{
- auto itr = search("x", 2, matchData);
+ auto itr = search("x", 2, match_data);
itr->initFullRange();
EXPECT_EQ(30u, itr->getDocId());
itr->unpack(30);
// weight is hardcoded to 1 for new style il doc array field
- EXPECT_EQ("{2:1[e=0,w=1,l=2]}", toString(tfmd.getIterator(), true, true));
+ EXPECT_EQ("{2:1[e=0,w=1,l=2]}", toString(match_data, true, true));
}
{
- auto itr = search("x", 3, matchData);
+ auto itr = search("x", 3, match_data);
itr->initFullRange();
EXPECT_EQ(30u, itr->getDocId());
itr->unpack(30);
EXPECT_EQ("{2:1[e=0,w=6,l=2]}",
- toString(tfmd.getIterator(), true, true));
+ toString(match_data, true, true));
}
}
@@ -1194,44 +1259,42 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
_pushThreads.sync();
- TermFieldMatchData tfmd;
- TermFieldMatchDataArray matchData;
- matchData.add(&tfmd);
+ SimpleMatchData match_data;
{
uint32_t fieldId = _schema.getIndexFieldId("iu");
- auto itr = search("not", fieldId, matchData);
+ auto itr = search("not", fieldId, match_data);
itr->initFullRange();
EXPECT_TRUE(itr->isAtEnd());
}
{
uint32_t fieldId = _schema.getIndexFieldId("iu");
- auto itr = search("example", fieldId, matchData);
+ auto itr = search("example", fieldId, match_data);
itr->initFullRange();
EXPECT_EQ(10u, itr->getDocId());
itr->unpack(10);
- EXPECT_EQ("{9:2}", toString(tfmd.getIterator()));
+ EXPECT_EQ("{9:2}", toString(match_data));
EXPECT_TRUE(!itr->seek(25));
EXPECT_TRUE(itr->isAtEnd());
}
{
uint32_t fieldId = _schema.getIndexFieldId("iau");
- auto itr = search("example", fieldId, matchData);
+ auto itr = search("example", fieldId, match_data);
itr->initFullRange();
EXPECT_EQ(10u, itr->getDocId());
itr->unpack(10);
EXPECT_EQ("{9:2[e=0,l=9]}",
- toString(tfmd.getIterator(), true, false));
+ toString(match_data, true, false));
EXPECT_TRUE(!itr->seek(25));
EXPECT_TRUE(itr->isAtEnd());
}
{
uint32_t fieldId = _schema.getIndexFieldId("iwu");
- auto itr = search("example", fieldId, matchData);
+ auto itr = search("example", fieldId, match_data);
itr->initFullRange();
EXPECT_EQ(10u, itr->getDocId());
itr->unpack(10);
EXPECT_EQ("{9:2[e=0,w=4,l=9]}",
- toString(tfmd.getIterator(), true, true));
+ toString(match_data, true, true));
EXPECT_TRUE(!itr->seek(25));
EXPECT_TRUE(itr->isAtEnd());
}
@@ -1272,34 +1335,32 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working)
_pushThreads.sync();
- TermFieldMatchData tfmd;
- TermFieldMatchDataArray matchData;
- matchData.add(&tfmd);
+ SimpleMatchData match_data;
uint32_t fieldId = _schema.getIndexFieldId("f0");
{
- auto itr = search("not", fieldId, matchData);
+ auto itr = search("not", fieldId, match_data);
itr->initFullRange();
EXPECT_TRUE(itr->isAtEnd());
}
{
auto itr = search("我就"
"是那个",
- fieldId, matchData);
+ fieldId, match_data);
itr->initFullRange();
EXPECT_EQ(10u, itr->getDocId());
itr->unpack(10);
- EXPECT_EQ("{2:0}", toString(tfmd.getIterator()));
+ EXPECT_EQ("{2:0}", toString(match_data));
EXPECT_TRUE(!itr->seek(25));
EXPECT_TRUE(itr->isAtEnd());
}
{
auto itr = search("大灰"
"狼",
- fieldId, matchData);
+ fieldId, match_data);
itr->initFullRange();
EXPECT_EQ(10u, itr->getDocId());
itr->unpack(10);
- EXPECT_EQ("{2:1}", toString(tfmd.getIterator()));
+ EXPECT_EQ("{2:1}", toString(match_data));
EXPECT_TRUE(!itr->seek(25));
EXPECT_TRUE(itr->isAtEnd());
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 37a558b17a3..4d18e0a2690 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -293,8 +293,8 @@ FieldIndex<interleaved_features>::make_term_blueprint(const vespalib::string& te
(std::move(guard), posting_itr, getFeatureStore(), field, field_id, use_bit_vector);
}
-template
-class FieldIndex<false>;
+template class FieldIndex<false>;
+template class FieldIndex<true>;
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 05665945800..324de79d9e8 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -32,6 +32,8 @@ class IOrderedFieldIndexInserter;
template <bool interleaved_features>
class FieldIndex : public FieldIndexBase {
public:
+ static constexpr bool has_interleaved_features = interleaved_features;
+
// Mapping from docid -> feature ref
using PostingListEntryType = PostingListEntry<interleaved_features>;
using PostingList = btree::BTreeRoot<uint32_t, PostingListEntryType, search::btree::NoAggregated>;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp
index ee1fee3d935..7bf20151b11 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.cpp
@@ -32,5 +32,7 @@ FieldIndexBase::FieldIndexBase(const index::Schema& schema, uint32_t fieldId,
{
}
+FieldIndexBase::~FieldIndexBase() = default;
+
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h
index 7efec1f2ae8..9c6bf823023 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_base.h
@@ -99,6 +99,7 @@ public:
FieldIndexBase(const index::Schema& schema, uint32_t fieldId);
FieldIndexBase(const index::Schema& schema, uint32_t fieldId, const index::FieldLengthInfo& info);
+ ~FieldIndexBase();
uint64_t getNumUniqueWords() const override { return _numUniqueWords; }
const FeatureStore& getFeatureStore() const override { return _featureStore; }
diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp
index 0d2bb1f5371..c75087e8577 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp
@@ -155,7 +155,7 @@ OrderedFieldIndexInserter<interleaved_features>::getWordRef() const
return _dItr.getKey()._wordRef;
}
-template
-class OrderedFieldIndexInserter<false>;
+template class OrderedFieldIndexInserter<false>;
+template class OrderedFieldIndexInserter<true>;
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp
index 7c6ceb26f31..ccb410a4e10 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/posting_iterator.cpp
@@ -126,8 +126,10 @@ PostingIterator<interleaved_features, unpack_normal_features, unpack_interleaved
_feature_decoder.unpackFeatures(_matchData, docId);
setUnpacked();
}
- if (unpack_interleaved_features) {
- // TODO: implement
+ if (interleaved_features && unpack_interleaved_features) {
+ auto* tfmd = _matchData[0];
+ tfmd->setNumOccs(_itr.getData().get_num_occs());
+ tfmd->setFieldLength(_itr.getData().get_field_length());
}
}
@@ -139,8 +141,24 @@ make_search_iterator(typename FieldIndex<interleaved_features>::PostingList::Con
const fef::TermFieldMatchDataArray& match_data)
{
assert(match_data.size() == 1);
- return std::make_unique<PostingIterator<interleaved_features, true, false>>
- (itr, feature_store, field_id, match_data);
+ auto* tfmd = match_data[0];
+ if (tfmd->needs_normal_features()) {
+ if (tfmd->needs_interleaved_features()) {
+ return std::make_unique<PostingIterator<interleaved_features, true, true>>
+ (itr, feature_store, field_id, match_data);
+ } else {
+ return std::make_unique<PostingIterator<interleaved_features, true, false>>
+ (itr, feature_store, field_id, match_data);
+ }
+ } else {
+ if (tfmd->needs_interleaved_features()) {
+ return std::make_unique<PostingIterator<interleaved_features, false, true>>
+ (itr, feature_store, field_id, match_data);
+ } else {
+ return std::make_unique<PostingIterator<interleaved_features, false, false>>
+ (itr, feature_store, field_id, match_data);
+ }
+ }
}
template
@@ -150,9 +168,24 @@ make_search_iterator<false>(typename FieldIndex<false>::PostingList::ConstIterat
uint32_t,
const fef::TermFieldMatchDataArray&);
+template
+queryeval::SearchIterator::UP
+make_search_iterator<true>(typename FieldIndex<true>::PostingList::ConstIterator,
+ const FeatureStore&,
+ uint32_t,
+ const fef::TermFieldMatchDataArray&);
+
template class PostingIteratorBase<false>;
+template class PostingIteratorBase<true>;
+template class PostingIterator<false, false, false>;
+template class PostingIterator<false, false, true>;
template class PostingIterator<false, true, false>;
+template class PostingIterator<false, true, true>;
+template class PostingIterator<true, false, false>;
+template class PostingIterator<true, false, true>;
+template class PostingIterator<true, true, false>;
+template class PostingIterator<true, true, true>;
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h
index 33cfda32843..f58a62ddef5 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/posting_list_entry.h
@@ -74,4 +74,7 @@ public:
void update_features(datastore::EntryRef features) const { _features = features; }
};
+template class PostingListEntry<false>;
+template class PostingListEntry<true>;
+
}