diff options
Diffstat (limited to 'searchlib')
35 files changed, 486 insertions, 510 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index f03a7cfd445..6d1695155a8 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -175,9 +175,9 @@ vespa_define_module( src/tests/ld-library-path src/tests/memoryindex/compact_document_words_store src/tests/memoryindex/datastore - src/tests/memoryindex/dictionary src/tests/memoryindex/document_remover src/tests/memoryindex/documentinverter + src/tests/memoryindex/field_index src/tests/memoryindex/fieldinverter src/tests/memoryindex/memoryindex src/tests/memoryindex/urlfieldinverter diff --git a/searchlib/abi-spec.json b/searchlib/abi-spec.json index b8c51f4e33d..0b9cb06d2a5 100644 --- a/searchlib/abi-spec.json +++ b/searchlib/abi-spec.json @@ -1382,7 +1382,7 @@ "public void <init>(java.util.Map)", "public void <init>(java.util.Map, java.util.Map)", "public com.yahoo.searchlib.rankingexpression.ExpressionFunction getFunction(java.lang.String)", - "protected java.util.Map functions()", + "protected final com.google.common.collect.ImmutableMap functions()", "public java.lang.String getBinding(java.lang.String)", "public com.yahoo.searchlib.rankingexpression.rule.FunctionReferenceContext withBindings(java.util.Map)" ], diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionReferenceContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionReferenceContext.java index ed1e2838717..084bfe65e06 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionReferenceContext.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionReferenceContext.java @@ -61,7 +61,7 @@ public class FunctionReferenceContext { */ public ExpressionFunction getFunction(String name) { return functions.get(name); } - protected Map<String, ExpressionFunction> functions() { return functions; } + protected final ImmutableMap<String, ExpressionFunction> functions() { return functions; } /** Returns the resolution of an argument, or null if it isn't defined in this context */ public String getBinding(String name) { return bindings.get(name); } diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java index 0b68e71c21a..4acc1a85490 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java @@ -3,12 +3,10 @@ package com.yahoo.searchlib.rankingexpression.rule; import com.google.common.collect.ImmutableMap; import com.yahoo.searchlib.rankingexpression.ExpressionFunction; -import com.yahoo.searchlib.rankingexpression.RankingExpression; import com.yahoo.tensor.TensorType; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -95,7 +93,7 @@ public class SerializationContext extends FunctionReferenceContext { @Override public SerializationContext withBindings(Map<String, String> bindings) { - return new SerializationContext(functions().values(), bindings, this.serializedFunctions); + return new SerializationContext(functions(), bindings, this.serializedFunctions); } public Map<String, String> serializedFunctions() { return serializedFunctions; } diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp index 809688bdb2e..fb6535c4a70 100644 --- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -1,5 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/searchlib/diskindex/diskindex.h> #include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/diskindex/zcposoccrandread.h> @@ -7,16 +12,11 @@ #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/index/docbuilder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> -#include <vespa/searchlib/btree/btreeroot.hpp> -#include <vespa/searchlib/btree/btreenodeallocator.hpp> -#include <vespa/searchlib/btree/btreenode.hpp> -#include <vespa/searchlib/memoryindex/dictionary.h> #include <vespa/searchlib/memoryindex/documentinverter.h> +#include <vespa/searchlib/memoryindex/field_index_collection.h> #include <vespa/searchlib/memoryindex/postingiterator.h> -#include <vespa/searchlib/diskindex/diskindex.h> -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/searchlib/util/filekit.h> -#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/vespalib/testkit/testapp.h> #include <vespa/log/log.h> LOG_SETUP("fusion_test"); @@ -27,7 +27,7 @@ using document::Document; using fef::FieldPositionsIterator; using fef::TermFieldMatchData; using fef::TermFieldMatchDataArray; -using memoryindex::Dictionary; +using memoryindex::FieldIndexCollection; using memoryindex::DocumentInverter; using queryeval::SearchIterator; using search::common::FileHeaderContext; @@ -54,9 +54,9 @@ public: namespace { void -myPushDocument(DocumentInverter &inv, Dictionary &d) +myPushDocument(DocumentInverter &inv, FieldIndexCollection &fieldIndexes) { - inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); + inv.pushDocuments(fieldIndexes, std::shared_ptr<IDestructorCallback>()); } } @@ -274,7 +274,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, addField("f0").addField("f1"). addField("f2").addField("f3"). addField("f4")); - Dictionary d(schema); + FieldIndexCollection fic(schema); DocBuilder b(schema); SequencedTaskExecutor invertThreads(2); SequencedTaskExecutor pushThreads(2); @@ -301,7 +301,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, doc = b.endDocument(); inv.invertDocument(10, *doc); invertThreads.sync(); - myPushDocument(inv, d); + myPushDocument(inv, fic); pushThreads.sync(); b.startDocument("doc::11"). @@ -311,7 +311,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, doc = b.endDocument(); inv.invertDocument(11, *doc); invertThreads.sync(); - myPushDocument(inv, d); + myPushDocument(inv, fic); pushThreads.sync(); b.startDocument("doc::12"). @@ -321,14 +321,14 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, doc = b.endDocument(); inv.invertDocument(12, *doc); invertThreads.sync(); - myPushDocument(inv, d); + myPushDocument(inv, fic); pushThreads.sync(); IndexBuilder ib(schema); vespalib::string dump2dir = prefix + "dump2"; ib.setPrefix(dump2dir); uint32_t numDocs = 12 + 1; - uint32_t numWords = d.getNumUniqueWords(); + uint32_t numWords = fic.getNumUniqueWords(); bool dynamicKPosOcc = false; TuneFileIndexing tuneFileIndexing; TuneFileSearch tuneFileSearch; @@ -341,7 +341,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, if (readmmap) tuneFileSearch._read.setWantMemoryMap(); ib.open(numDocs, numWords, tuneFileIndexing, fileHeaderContext); - d.dump(ib); + fic.dump(ib); ib.close(); vespalib::string tsName = dump2dir + "/.teststamp"; diff --git a/searchlib/src/tests/memoryindex/dictionary/.gitignore b/searchlib/src/tests/memoryindex/dictionary/.gitignore deleted file mode 100644 index d404d7d7063..00000000000 --- a/searchlib/src/tests/memoryindex/dictionary/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -.depend -Makefile -dictionary_test -dump -/urldump -searchlib_dictionary_test_app diff --git a/searchlib/src/tests/memoryindex/field_index/.gitignore b/searchlib/src/tests/memoryindex/field_index/.gitignore new file mode 100644 index 00000000000..e1a6c049431 --- /dev/null +++ b/searchlib/src/tests/memoryindex/field_index/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +field_index_test +dump +/urldump +searchlib_field_index_test_app diff --git a/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt b/searchlib/src/tests/memoryindex/field_index/CMakeLists.txt index 5866e1d3413..767097b99db 100644 --- a/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt +++ b/searchlib/src/tests/memoryindex/field_index/CMakeLists.txt @@ -1,9 +1,9 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_dictionary_test_app TEST +vespa_add_executable(searchlib_field_index_test_app TEST SOURCES - dictionary_test.cpp + field_index_test.cpp DEPENDS searchlib searchlib_test ) -vespa_add_test(NAME searchlib_dictionary_test_app COMMAND searchlib_dictionary_test_app) +vespa_add_test(NAME searchlib_field_index_test_app COMMAND searchlib_field_index_test_app) diff --git a/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index 2c4fdc35ac6..373ed7fd311 100644 --- a/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -1,5 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> #include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/diskindex/zcposoccrandread.h> @@ -7,14 +10,11 @@ #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/index/docbuilder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> -#include <vespa/searchlib/btree/btreeroot.hpp> -#include <vespa/searchlib/btree/btreenodeallocator.hpp> -#include <vespa/searchlib/memoryindex/dictionary.h> #include <vespa/searchlib/memoryindex/documentinverter.h> +#include <vespa/searchlib/memoryindex/field_index_collection.h> #include <vespa/searchlib/memoryindex/fieldinverter.h> -#include <vespa/searchlib/memoryindex/postingiterator.h> #include <vespa/searchlib/memoryindex/ordereddocumentinserter.h> -#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/searchlib/memoryindex/postingiterator.h> #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/vespalib/testkit/testapp.h> @@ -37,7 +37,7 @@ using vespalib::GenerationHandler; namespace memoryindex { -typedef Dictionary::PostingList PostingList; +typedef FieldIndex::PostingList PostingList; typedef PostingList::ConstIterator PostingConstItr; class MyBuilder : public IndexBuilder { @@ -240,17 +240,17 @@ namespace { /** - * MockDictionary is a simple mockup of memory index, used to verify - * that we get correct posting lists from real memory index. + * A simple mockup of a memory field index, used to verify + * that we get correct posting lists from real memory field index. */ -class MockDictionary +class MockFieldIndex { std::map<std::pair<vespalib::string, uint32_t>, std::set<uint32_t>> _dict; vespalib::string _word; uint32_t _fieldId; public: - ~MockDictionary(); + ~MockFieldIndex(); void setNextWord(const vespalib::string &word) { @@ -296,7 +296,7 @@ public: } }; -MockDictionary::~MockDictionary() = default; +MockFieldIndex::~MockFieldIndex() = default; /** * MockWordStoreScan is a helper class to ensure that previous word is @@ -335,6 +335,7 @@ public: }; MockWordStoreScan::~MockWordStoreScan() = default; + /** * MyInserter performs insertions on both a mockup version of memory index * and a real memory index. Mockup version is used to calculate expected @@ -343,8 +344,8 @@ MockWordStoreScan::~MockWordStoreScan() = default; class MyInserter { MockWordStoreScan _wordStoreScan; - MockDictionary _mock; - Dictionary _d; + MockFieldIndex _mock; + FieldIndexCollection _fieldIndexes; DocIdAndPosOccFeatures _features; IOrderedDocumentInserter *_documentInserter; @@ -352,7 +353,7 @@ public: MyInserter(const Schema &schema) : _wordStoreScan(), _mock(), - _d(schema), + _fieldIndexes(schema), _features(), _documentInserter(nullptr) { @@ -374,7 +375,7 @@ public: if (_documentInserter != nullptr) { _documentInserter->flush(); } - _documentInserter = &_d.getFieldIndex(fieldId)->getInserter(); + _documentInserter = &_fieldIndexes.getFieldIndex(fieldId)->getInserter(); _documentInserter->rewind(); _mock.setNextField(fieldId); } @@ -398,7 +399,7 @@ public: uint32_t fieldId) { std::vector<uint32_t> exp = _mock.find(word, fieldId); - PostingConstItr itr = _d.find(word, fieldId); + PostingConstItr itr = _fieldIndexes.find(word, fieldId); return EXPECT_TRUE(assertPostingList(exp, itr)); } @@ -431,20 +432,20 @@ public: uint32_t getNumUniqueWords() { - return _d.getNumUniqueWords(); + return _fieldIndexes.getNumUniqueWords(); } - Dictionary &getDict() { return _d; } + FieldIndexCollection &getFieldIndexes() { return _fieldIndexes; } }; MyInserter::~MyInserter() = default; void -myremove(uint32_t docId, DocumentInverter &inv, Dictionary &d, +myremove(uint32_t docId, DocumentInverter &inv, FieldIndexCollection &fieldIndexes, ISequencedTaskExecutor &invertThreads) { inv.removeDocument(docId); invertThreads.sync(); - inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); + inv.pushDocuments(fieldIndexes, std::shared_ptr<IDestructorCallback>()); } @@ -452,8 +453,8 @@ class WrapInserter { OrderedDocumentInserter &_inserter; public: - WrapInserter(Dictionary &d, uint32_t fieldId) - : _inserter(d.getFieldIndex(fieldId)->getInserter()) + WrapInserter(FieldIndexCollection &fieldIndexes, uint32_t fieldId) + : _inserter(fieldIndexes.getFieldIndex(fieldId)->getInserter()) { } @@ -508,8 +509,8 @@ class MyDrainRemoves : IDocumentRemoveListener public: virtual void remove(const vespalib::stringref, uint32_t) override { } - MyDrainRemoves(Dictionary &d, uint32_t fieldId) - : _remover(d.getFieldIndex(fieldId)->getDocumentRemover()) + MyDrainRemoves(FieldIndexCollection &fieldIndexes, uint32_t fieldId) + : _remover(fieldIndexes.getFieldIndex(fieldId)->getDocumentRemover()) { } @@ -520,43 +521,43 @@ public: }; void -myPushDocument(DocumentInverter &inv, Dictionary &d) +myPushDocument(DocumentInverter &inv, FieldIndexCollection &fieldIndexes) { - inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); + inv.pushDocuments(fieldIndexes, std::shared_ptr<IDestructorCallback>()); } const FeatureStore * -featureStorePtr(const Dictionary &d, uint32_t fieldId) +featureStorePtr(const FieldIndexCollection &fieldIndexes, uint32_t fieldId) { - return &d.getFieldIndex(fieldId)->getFeatureStore(); + return &fieldIndexes.getFieldIndex(fieldId)->getFeatureStore(); } const FeatureStore & -featureStoreRef(const Dictionary &d, uint32_t fieldId) +featureStoreRef(const FieldIndexCollection &fieldIndexes, uint32_t fieldId) { - return d.getFieldIndex(fieldId)->getFeatureStore(); + return fieldIndexes.getFieldIndex(fieldId)->getFeatureStore(); } DataStoreBase::MemStats -getFeatureStoreMemStats(const Dictionary &d) +getFeatureStoreMemStats(const FieldIndexCollection &fieldIndexes) { DataStoreBase::MemStats res; - uint32_t numFields = d.getNumFields(); + uint32_t numFields = fieldIndexes.getNumFields(); for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) { DataStoreBase::MemStats stats = - d.getFieldIndex(fieldId)->getFeatureStore().getMemStats(); + fieldIndexes.getFieldIndex(fieldId)->getFeatureStore().getMemStats(); res += stats; } return res; } -void myCommit(Dictionary &d, ISequencedTaskExecutor &pushThreads) +void myCommit(FieldIndexCollection &fieldIndexes, ISequencedTaskExecutor &pushThreads) { uint32_t fieldId = 0; - for (auto &fieldIndex : d.getFieldIndexes()) { + for (auto &fieldIndex : fieldIndexes.getFieldIndexes()) { pushThreads.execute(fieldId, [fieldIndex(fieldIndex.get())]() { fieldIndex->commit(); }); @@ -567,10 +568,10 @@ void myCommit(Dictionary &d, ISequencedTaskExecutor &pushThreads) void -myCompactFeatures(Dictionary &d, ISequencedTaskExecutor &pushThreads) +myCompactFeatures(FieldIndexCollection &fieldIndexes, ISequencedTaskExecutor &pushThreads) { uint32_t fieldId = 0; - for (auto &fieldIndex : d.getFieldIndexes()) { + for (auto &fieldIndex : fieldIndexes.getFieldIndexes()) { pushThreads.execute(fieldId, [fieldIndex(fieldIndex.get())]() { fieldIndex->compactFeatures(); }); @@ -593,67 +594,69 @@ struct Fixture const Schema & getSchema() const { return _schema; } }; +// TODO: Rewrite most tests to use FieldIndex directly instead of going via FieldIndexCollection. + TEST_F("requireThatFreshInsertWorks", Fixture) { - Dictionary d(f.getSchema()); + FieldIndexCollection fic(f.getSchema()); SequencedTaskExecutor pushThreads(2); - EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); - EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); - EXPECT_EQUAL(0u, d.getNumUniqueWords()); - WrapInserter(d, 0).word("a").add(10).flush(); - EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); - EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); - myCommit(d, pushThreads); - EXPECT_TRUE(assertPostingList("[10]", d.findFrozen("a", 0))); - EXPECT_EQUAL(1u, d.getNumUniqueWords()); + EXPECT_TRUE(assertPostingList("[]", fic.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", fic.findFrozen("a", 0))); + EXPECT_EQUAL(0u, fic.getNumUniqueWords()); + WrapInserter(fic, 0).word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", fic.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", fic.findFrozen("a", 0))); + myCommit(fic, pushThreads); + EXPECT_TRUE(assertPostingList("[10]", fic.findFrozen("a", 0))); + EXPECT_EQUAL(1u, fic.getNumUniqueWords()); } TEST_F("requireThatAppendInsertWorks", Fixture) { - Dictionary d(f.getSchema()); + FieldIndexCollection fic(f.getSchema()); SequencedTaskExecutor pushThreads(2); - WrapInserter(d, 0).word("a").add(10).flush().rewind(). + WrapInserter(fic, 0).word("a").add(10).flush().rewind(). word("a").add(5).flush(); - EXPECT_TRUE(assertPostingList("[5,10]", d.find("a", 0))); - EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); - WrapInserter(d, 0).rewind().word("a").add(20).flush(); - EXPECT_TRUE(assertPostingList("[5,10,20]", d.find("a", 0))); - EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); - myCommit(d, pushThreads); - EXPECT_TRUE(assertPostingList("[5,10,20]", d.findFrozen("a", 0))); + EXPECT_TRUE(assertPostingList("[5,10]", fic.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", fic.findFrozen("a", 0))); + WrapInserter(fic, 0).rewind().word("a").add(20).flush(); + EXPECT_TRUE(assertPostingList("[5,10,20]", fic.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", fic.findFrozen("a", 0))); + myCommit(fic, pushThreads); + EXPECT_TRUE(assertPostingList("[5,10,20]", fic.findFrozen("a", 0))); } TEST_F("requireThatMultiplePostingListsCanExist", Fixture) { - Dictionary d(f.getSchema()); - WrapInserter(d, 0).word("a").add(10).word("b").add(11).add(15).flush(); - WrapInserter(d, 1).word("a").add(5).word("b").add(12).flush(); - EXPECT_EQUAL(4u, d.getNumUniqueWords()); - EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); - EXPECT_TRUE(assertPostingList("[5]", d.find("a", 1))); - EXPECT_TRUE(assertPostingList("[11,15]", d.find("b", 0))); - EXPECT_TRUE(assertPostingList("[12]", d.find("b", 1))); - EXPECT_TRUE(assertPostingList("[]", d.find("a", 2))); - EXPECT_TRUE(assertPostingList("[]", d.find("c", 0))); + FieldIndexCollection fic(f.getSchema()); + WrapInserter(fic, 0).word("a").add(10).word("b").add(11).add(15).flush(); + WrapInserter(fic, 1).word("a").add(5).word("b").add(12).flush(); + EXPECT_EQUAL(4u, fic.getNumUniqueWords()); + EXPECT_TRUE(assertPostingList("[10]", fic.find("a", 0))); + EXPECT_TRUE(assertPostingList("[5]", fic.find("a", 1))); + EXPECT_TRUE(assertPostingList("[11,15]", fic.find("b", 0))); + EXPECT_TRUE(assertPostingList("[12]", fic.find("b", 1))); + EXPECT_TRUE(assertPostingList("[]", fic.find("a", 2))); + EXPECT_TRUE(assertPostingList("[]", fic.find("c", 0))); } TEST_F("requireThatRemoveWorks", Fixture) { - Dictionary d(f.getSchema()); - WrapInserter(d, 0).word("a").remove(10).flush(); - EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); - WrapInserter(d, 0).add(10).add(20).add(30).flush(); - EXPECT_TRUE(assertPostingList("[10,20,30]", d.find("a", 0))); - WrapInserter(d, 0).rewind().word("a").remove(10).flush(); - EXPECT_TRUE(assertPostingList("[20,30]", d.find("a", 0))); - WrapInserter(d, 0).remove(20).flush(); - EXPECT_TRUE(assertPostingList("[30]", d.find("a", 0))); - WrapInserter(d, 0).remove(30).flush(); - EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); - EXPECT_EQUAL(1u, d.getNumUniqueWords()); - MyDrainRemoves(d, 0).drain(10); - WrapInserter(d, 0).rewind().word("a").add(10).flush(); - EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); + FieldIndexCollection fic(f.getSchema()); + WrapInserter(fic, 0).word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[]", fic.find("a", 0))); + WrapInserter(fic, 0).add(10).add(20).add(30).flush(); + EXPECT_TRUE(assertPostingList("[10,20,30]", fic.find("a", 0))); + WrapInserter(fic, 0).rewind().word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[20,30]", fic.find("a", 0))); + WrapInserter(fic, 0).remove(20).flush(); + EXPECT_TRUE(assertPostingList("[30]", fic.find("a", 0))); + WrapInserter(fic, 0).remove(30).flush(); + EXPECT_TRUE(assertPostingList("[]", fic.find("a", 0))); + EXPECT_EQUAL(1u, fic.getNumUniqueWords()); + MyDrainRemoves(fic, 0).drain(10); + WrapInserter(fic, 0).rewind().word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", fic.find("a", 0))); } TEST_F("requireThatMultipleInsertAndRemoveWorks", Fixture) @@ -675,7 +678,7 @@ TEST_F("requireThatMultipleInsertAndRemoveWorks", Fixture) EXPECT_TRUE(inserter.assertPostings()); inserter.rewind(); for (uint32_t fi = 0; fi < numFields; ++fi) { - MyDrainRemoves drainRemoves(inserter.getDict(), fi); + MyDrainRemoves drainRemoves(inserter.getFieldIndexes(), fi); for (uint32_t di = 0; di < 'z' * 2 + 1; ++di) { drainRemoves.drain(di); } @@ -723,20 +726,20 @@ getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1) TEST_F("requireThatFeaturesAreInPostingLists", Fixture) { - Dictionary d(f.getSchema()); - WrapInserter(d, 0).word("a").add(1, getFeatures(4, 2)).flush(); + FieldIndexCollection fic(f.getSchema()); + WrapInserter(fic, 0).word("a").add(1, getFeatures(4, 2)).flush(); EXPECT_TRUE(assertPostingList("[1{4:0,1}]", - d.find("a", 0), - featureStorePtr(d, 0))); - WrapInserter(d, 0).word("b").add(2, getFeatures(5, 1)). + fic.find("a", 0), + featureStorePtr(fic, 0))); + WrapInserter(fic, 0).word("b").add(2, getFeatures(5, 1)). add(3, getFeatures(6, 2)).flush(); EXPECT_TRUE(assertPostingList("[2{5:0},3{6:0,1}]", - d.find("b", 0), - featureStorePtr(d, 0))); - WrapInserter(d, 1).word("c").add(4, getFeatures(7, 2)).flush(); + fic.find("b", 0), + featureStorePtr(fic, 0))); + WrapInserter(fic, 1).word("c").add(4, getFeatures(7, 2)).flush(); EXPECT_TRUE(assertPostingList("[4{7:0,1}]", - d.find("c", 1), - featureStorePtr(d, 1))); + fic.find("c", 1), + featureStorePtr(fic, 1))); } class Verifier : public SearchIteratorVerifier { @@ -748,20 +751,20 @@ public: (void) strict; TermFieldMatchDataArray matchData; matchData.add(&_tfmd); - return std::make_unique<PostingIterator>(_dictionary.find("a", 0), featureStoreRef(_dictionary, 0), 0, matchData); + return std::make_unique<PostingIterator>(_fieldIndexes.find("a", 0), featureStoreRef(_fieldIndexes, 0), 0, matchData); } private: mutable TermFieldMatchData _tfmd; - Dictionary _dictionary; + FieldIndexCollection _fieldIndexes; }; Verifier::Verifier(const Schema & schema) : _tfmd(), - _dictionary(schema) + _fieldIndexes(schema) { - WrapInserter inserter(_dictionary, 0); + WrapInserter inserter(_fieldIndexes, 0); inserter.word("a"); for (uint32_t docId : getExpectedDocIds()) { inserter.add(docId); @@ -778,8 +781,8 @@ TEST_F("require that postingiterator conforms", Fixture) { TEST_F("requireThatPostingIteratorIsWorking", Fixture) { - Dictionary d(f.getSchema()); - WrapInserter(d, 0).word("a").add(10, getFeatures(4, 1)). + FieldIndexCollection fic(f.getSchema()); + WrapInserter(fic, 0).word("a").add(10, getFeatures(4, 1)). add(20, getFeatures(5, 2)). add(30, getFeatures(6, 1)). add(40, getFeatures(7, 2)).flush(); @@ -787,15 +790,15 @@ TEST_F("requireThatPostingIteratorIsWorking", Fixture) TermFieldMatchDataArray matchData; matchData.add(&tfmd); { - PostingIterator itr(d.find("not", 0), - featureStoreRef(d, 0), + PostingIterator itr(fic.find("not", 0), + featureStoreRef(fic, 0), 0, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(d.find("a", 0), - featureStoreRef(d, 0), + PostingIterator itr(fic.find("a", 0), + featureStoreRef(fic, 0), 0, matchData); itr.initFullRange(); EXPECT_EQUAL(10u, itr.getDocId()); @@ -834,28 +837,28 @@ TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture) EXPECT_EQUAL("f=4[w=a[d=2[e=0,w=10,l=20[1,3]]]]", b.toStr()); } { - Dictionary d(f.getSchema()); + FieldIndexCollection fic(f.getSchema()); MyBuilder b(f.getSchema()); DocIdAndFeatures df; - WrapInserter(d, 1).word("a").add(5, getFeatures(2, 1)). + WrapInserter(fic, 1).word("a").add(5, getFeatures(2, 1)). add(7, getFeatures(3, 2)). word("b").add(5, getFeatures(12, 2)).flush(); df = getFeatures(4, 1); addElement(df, 5, 2); - WrapInserter(d, 2).word("a").add(5, df); + WrapInserter(fic, 2).word("a").add(5, df); df = getFeatures(6, 1); addElement(df, 7, 2); - WrapInserter(d, 2).add(7, df).flush(); + WrapInserter(fic, 2).add(7, df).flush(); df = getFeatures(8, 1, 12); addElement(df, 9, 2, 13); - WrapInserter(d, 3).word("a").add(5, df); + WrapInserter(fic, 3).word("a").add(5, df); df = getFeatures(10, 1, 14); addElement(df, 11, 2, 15); - WrapInserter(d, 3).add(7, df).flush(); + WrapInserter(fic, 3).add(7, df).flush(); - d.dump(b); + fic.dump(b); EXPECT_EQUAL("f=0[]," "f=1[w=a[d=5[e=0,w=1,l=2[0]],d=7[e=0,w=1,l=3[0,1]]]," @@ -867,13 +870,13 @@ TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture) b.toStr()); } { // test word with no docs - Dictionary d(f.getSchema()); - WrapInserter(d, 0).word("a").add(2, getFeatures(2, 1)). + FieldIndexCollection fic(f.getSchema()); + WrapInserter(fic, 0).word("a").add(2, getFeatures(2, 1)). word("b").add(4, getFeatures(4, 1)).flush().rewind(). word("a").remove(2).flush(); { MyBuilder b(f.getSchema()); - d.dump(b); + fic.dump(b); EXPECT_EQUAL("f=0[w=b[d=4[e=0,w=1,l=4[0]]]],f=1[],f=2[],f=3[]", b.toStr()); } @@ -883,7 +886,7 @@ TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture) TuneFileIndexing tuneFileIndexing; DummyFileHeaderContext fileHeaderContext; b.open(5, 2, tuneFileIndexing, fileHeaderContext); - d.dump(b); + fic.dump(b); b.close(); } } @@ -891,19 +894,19 @@ TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture) template <typename FixtureBase> -class DictionaryFixture : public FixtureBase +class FieldIndexFixture : public FixtureBase { public: using FixtureBase::getSchema; - Dictionary _d; + FieldIndexCollection _fic; DocBuilder _b; SequencedTaskExecutor _invertThreads; SequencedTaskExecutor _pushThreads; DocumentInverter _inv; - DictionaryFixture() + FieldIndexFixture() : FixtureBase(), - _d(getSchema()), + _fic(getSchema()), _b(getSchema()), _invertThreads(2), _pushThreads(2), @@ -913,7 +916,7 @@ public: }; -TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) +TEST_F("requireThatInversionIsWorking", FieldIndexFixture<Fixture>) { Document::UP doc; @@ -924,7 +927,7 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) doc = f._b.endDocument(); f._inv.invertDocument(10, *doc); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); f._b.startDocument("doc::20"); @@ -934,7 +937,7 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) doc = f._b.endDocument(); f._inv.invertDocument(20, *doc); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); f._b.startDocument("doc::30"); @@ -965,7 +968,7 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) doc = f._b.endDocument(); f._inv.invertDocument(30, *doc); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); f._b.startDocument("doc::40"); @@ -976,7 +979,7 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) doc = f._b.endDocument(); f._inv.invertDocument(40, *doc); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); f._b.startDocument("doc::999"); @@ -1006,12 +1009,12 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) for (uint32_t docId = 10000; docId < 20000; ++docId) { f._inv.invertDocument(docId, *doc); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); } f._pushThreads.sync(); - DataStoreBase::MemStats beforeStats = getFeatureStoreMemStats(f._d); + DataStoreBase::MemStats beforeStats = getFeatureStoreMemStats(f._fic); LOG(info, "Before feature compaction: allocElems=%zu, usedElems=%zu" ", deadElems=%zu, holdElems=%zu" @@ -1024,14 +1027,14 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) beforeStats._freeBuffers, beforeStats._activeBuffers, beforeStats._holdBuffers); - myCompactFeatures(f._d, f._pushThreads); + myCompactFeatures(f._fic, f._pushThreads); std::vector<std::unique_ptr<GenerationHandler::Guard>> guards; - for (auto &fieldIndex : f._d.getFieldIndexes()) { + for (auto &fieldIndex : f._fic.getFieldIndexes()) { guards.push_back(std::make_unique<GenerationHandler::Guard> (fieldIndex->takeGenerationGuard())); } - myCommit(f._d, f._pushThreads); - DataStoreBase::MemStats duringStats = getFeatureStoreMemStats(f._d); + myCommit(f._fic, f._pushThreads); + DataStoreBase::MemStats duringStats = getFeatureStoreMemStats(f._fic); LOG(info, "During feature compaction: allocElems=%zu, usedElems=%zu" ", deadElems=%zu, holdElems=%zu" @@ -1045,8 +1048,8 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) duringStats._activeBuffers, duringStats._holdBuffers); guards.clear(); - myCommit(f._d, f._pushThreads); - DataStoreBase::MemStats afterStats = getFeatureStoreMemStats(f._d); + myCommit(f._fic, f._pushThreads); + DataStoreBase::MemStats afterStats = getFeatureStoreMemStats(f._fic); LOG(info, "After feature compaction: allocElems=%zu, usedElems=%zu" ", deadElems=%zu, holdElems=%zu" @@ -1064,12 +1067,12 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) TermFieldMatchDataArray matchData; matchData.add(&tfmd); { - PostingIterator itr(f._d.findFrozen("not", 0), featureStoreRef(f._d, 0), 0, matchData); + PostingIterator itr(f._fic.findFrozen("not", 0), featureStoreRef(f._fic, 0), 0, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(f._d.findFrozen("a", 0), featureStoreRef(f._d, 0), 0, matchData); + PostingIterator itr(f._fic.findFrozen("a", 0), featureStoreRef(f._fic, 0), 0, matchData); itr.initFullRange(); EXPECT_EQUAL(10u, itr.getDocId()); itr.unpack(10); @@ -1086,19 +1089,19 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(f._d.findFrozen("x", 0), featureStoreRef(f._d, 0), 0, matchData); + PostingIterator itr(f._fic.findFrozen("x", 0), featureStoreRef(f._fic, 0), 0, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { - PostingIterator itr(f._d.findFrozen("x", 1), featureStoreRef(f._d, 1), 1, matchData); + PostingIterator itr(f._fic.findFrozen("x", 1), featureStoreRef(f._fic, 1), 1, matchData); itr.initFullRange(); EXPECT_EQUAL(30u, itr.getDocId()); itr.unpack(30); EXPECT_EQUAL("{6:2[e=0,w=1,l=6]}", toString(tfmd.getIterator(), true, true)); } { - PostingIterator itr(f._d.findFrozen("x", 2), featureStoreRef(f._d, 2), 2, matchData); + PostingIterator itr(f._fic.findFrozen("x", 2), featureStoreRef(f._fic, 2), 2, matchData); itr.initFullRange(); EXPECT_EQUAL(30u, itr.getDocId()); itr.unpack(30); @@ -1106,7 +1109,7 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) EXPECT_EQUAL("{2:1[e=0,w=1,l=2]}", toString(tfmd.getIterator(), true, true)); } { - PostingIterator itr(f._d.findFrozen("x", 3), featureStoreRef(f._d, 3), 3, matchData); + PostingIterator itr(f._fic.findFrozen("x", 3), featureStoreRef(f._fic, 3), 3, matchData); itr.initFullRange(); EXPECT_EQUAL(30u, itr.getDocId()); itr.unpack(30); @@ -1116,7 +1119,7 @@ TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) } TEST_F("requireThatInverterHandlesRemoveViaDocumentRemover", - DictionaryFixture<Fixture>) + FieldIndexFixture<Fixture>) { Document::UP doc; @@ -1126,7 +1129,7 @@ TEST_F("requireThatInverterHandlesRemoveViaDocumentRemover", Document::UP doc1 = f._b.endDocument(); f._inv.invertDocument(1, *doc1.get()); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); f._b.startDocument("doc::2"); @@ -1134,23 +1137,23 @@ TEST_F("requireThatInverterHandlesRemoveViaDocumentRemover", Document::UP doc2 = f._b.endDocument(); f._inv.invertDocument(2, *doc2.get()); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); - EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 0))); - EXPECT_TRUE(assertPostingList("[1,2]", f._d.find("b", 0))); - EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); - EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 1))); - EXPECT_TRUE(assertPostingList("[1]", f._d.find("c", 1))); + EXPECT_TRUE(assertPostingList("[1]", f._fic.find("a", 0))); + EXPECT_TRUE(assertPostingList("[1,2]", f._fic.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._fic.find("c", 0))); + EXPECT_TRUE(assertPostingList("[1]", f._fic.find("a", 1))); + EXPECT_TRUE(assertPostingList("[1]", f._fic.find("c", 1))); - myremove(1, f._inv, f._d, f._invertThreads); + myremove(1, f._inv, f._fic, f._invertThreads); f._pushThreads.sync(); - EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 0))); - EXPECT_TRUE(assertPostingList("[2]", f._d.find("b", 0))); - EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); - EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 1))); - EXPECT_TRUE(assertPostingList("[]", f._d.find("c", 1))); + EXPECT_TRUE(assertPostingList("[]", f._fic.find("a", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._fic.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._fic.find("c", 0))); + EXPECT_TRUE(assertPostingList("[]", f._fic.find("a", 1))); + EXPECT_TRUE(assertPostingList("[]", f._fic.find("c", 1))); } class UriFixture @@ -1168,7 +1171,7 @@ public: }; -TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) +TEST_F("requireThatUriIndexingIsWorking", FieldIndexFixture<UriFixture>) { Document::UP doc; @@ -1295,7 +1298,7 @@ TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) doc = f._b.endDocument(); f._inv.invertDocument(10, *doc); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); @@ -1304,16 +1307,16 @@ TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) matchData.add(&tfmd); { uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); - PostingIterator itr(f._d.findFrozen("not", fieldId), - featureStoreRef(f._d, fieldId), + PostingIterator itr(f._fic.findFrozen("not", fieldId), + featureStoreRef(f._fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); - PostingIterator itr(f._d.findFrozen("example", fieldId), - featureStoreRef(f._d, fieldId), + PostingIterator itr(f._fic.findFrozen("example", fieldId), + featureStoreRef(f._fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_EQUAL(10u, itr.getDocId()); @@ -1324,8 +1327,8 @@ TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) } { uint32_t fieldId = f.getSchema().getIndexFieldId("iau"); - PostingIterator itr(f._d.findFrozen("example", fieldId), - featureStoreRef(f._d, fieldId), + PostingIterator itr(f._fic.findFrozen("example", fieldId), + featureStoreRef(f._fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_EQUAL(10u, itr.getDocId()); @@ -1337,8 +1340,8 @@ TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) } { uint32_t fieldId = f.getSchema().getIndexFieldId("iwu"); - PostingIterator itr(f._d.findFrozen("example", fieldId), - featureStoreRef(f._d, fieldId), + PostingIterator itr(f._fic.findFrozen("example", fieldId), + featureStoreRef(f._fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_EQUAL(10u, itr.getDocId()); @@ -1353,9 +1356,9 @@ TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) dib.setPrefix("urldump"); TuneFileIndexing tuneFileIndexing; DummyFileHeaderContext fileHeaderContext; - dib.open(11, f._d.getNumUniqueWords(), tuneFileIndexing, + dib.open(11, f._fic.getNumUniqueWords(), tuneFileIndexing, fileHeaderContext); - f._d.dump(dib); + f._fic.dump(dib); dib.close(); } } @@ -1373,7 +1376,7 @@ public: const Schema & getSchema() const { return _schema; } }; -TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>) +TEST_F("requireThatCjkIndexingIsWorking", FieldIndexFixture<SingleFieldFixture>) { Document::UP doc; @@ -1387,7 +1390,7 @@ TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>) doc = f._b.endDocument(); f._inv.invertDocument(10, *doc); f._invertThreads.sync(); - myPushDocument(f._inv, f._d); + myPushDocument(f._inv, f._fic); f._pushThreads.sync(); @@ -1396,18 +1399,18 @@ TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>) matchData.add(&tfmd); { uint32_t fieldId = f.getSchema().getIndexFieldId("i"); - PostingIterator itr(f._d.findFrozen("not", fieldId), - featureStoreRef(f._d, fieldId), + PostingIterator itr(f._fic.findFrozen("not", fieldId), + featureStoreRef(f._fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_TRUE(itr.isAtEnd()); } { uint32_t fieldId = f.getSchema().getIndexFieldId("i"); - PostingIterator itr(f._d.findFrozen("我就" + PostingIterator itr(f._fic.findFrozen("我就" "是那个", fieldId), - featureStoreRef(f._d, fieldId), + featureStoreRef(f._fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_EQUAL(10u, itr.getDocId()); @@ -1418,10 +1421,10 @@ TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>) } { uint32_t fieldId = f.getSchema().getIndexFieldId("i"); - PostingIterator itr(f._d.findFrozen("大灰" + PostingIterator itr(f._fic.findFrozen("大灰" "狼", fieldId), - featureStoreRef(f._d, fieldId), + featureStoreRef(f._fic, fieldId), fieldId, matchData); itr.initFullRange(); EXPECT_EQUAL(10u, itr.getDocId()); @@ -1434,7 +1437,7 @@ TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>) void insertAndAssertTuple(const vespalib::string &word, uint32_t fieldId, uint32_t docId, - Dictionary &dict) + FieldIndexCollection &dict) { EntryRef wordRef = WrapInserter(dict, fieldId).rewind().word(word). add(docId).flush().getWordRef(); @@ -1445,7 +1448,7 @@ insertAndAssertTuple(const vespalib::string &word, uint32_t fieldId, uint32_t do TEST_F("require that insert tells which word ref that was inserted", Fixture) { - Dictionary d(f.getSchema()); + FieldIndexCollection d(f.getSchema()); insertAndAssertTuple("a", 1, 11, d); insertAndAssertTuple("b", 1, 11, d); insertAndAssertTuple("a", 2, 11, d); @@ -1457,14 +1460,14 @@ TEST_F("require that insert tells which word ref that was inserted", Fixture) struct RemoverFixture : public Fixture { - Dictionary _d; + FieldIndexCollection _fic; SequencedTaskExecutor _invertThreads; SequencedTaskExecutor _pushThreads; RemoverFixture() : Fixture(), - _d(getSchema()), + _fic(getSchema()), _invertThreads(2), _pushThreads(2) { @@ -1472,24 +1475,24 @@ struct RemoverFixture : public Fixture void assertPostingLists(const vespalib::string &e1, const vespalib::string &e2, const vespalib::string &e3) { - EXPECT_TRUE(assertPostingList(e1, _d.find("a", 1))); - EXPECT_TRUE(assertPostingList(e2, _d.find("a", 2))); - EXPECT_TRUE(assertPostingList(e3, _d.find("b", 1))); + EXPECT_TRUE(assertPostingList(e1, _fic.find("a", 1))); + EXPECT_TRUE(assertPostingList(e2, _fic.find("a", 2))); + EXPECT_TRUE(assertPostingList(e3, _fic.find("b", 1))); } void remove(uint32_t docId) { DocumentInverter inv(getSchema(), _invertThreads, _pushThreads); - myremove(docId, inv, _d, _invertThreads); + myremove(docId, inv, _fic, _invertThreads); _pushThreads.sync(); - EXPECT_FALSE(_d.getFieldIndex(0u)->getDocumentRemover(). + EXPECT_FALSE(_fic.getFieldIndex(0u)->getDocumentRemover(). getStore().get(docId).valid()); } }; TEST_F("require that document remover can remove several documents", RemoverFixture) { - WrapInserter(f._d, 1).word("a").add(11).add(13).add(15). + WrapInserter(f._fic, 1).word("a").add(11).add(13).add(15). word("b").add(11).add(15).flush(); - WrapInserter(f._d, 2).word("a").add(11).add(13).flush(); + WrapInserter(f._fic, 2).word("a").add(11).add(13).flush(); f.assertPostingLists("[11,13,15]", "[11,13]", "[11,15]"); f.remove(13); @@ -1504,8 +1507,8 @@ TEST_F("require that document remover can remove several documents", RemoverFixt TEST_F("require that removal of non-existing document does not do anything", RemoverFixture) { - WrapInserter(f._d, 1).word("a").add(11).word("b").add(11).flush(); - WrapInserter(f._d, 2).word("a").add(11).flush(); + WrapInserter(f._fic, 1).word("a").add(11).word("b").add(11).flush(); + WrapInserter(f._fic, 2).word("a").add(11).flush(); f.assertPostingLists("[11]", "[11]", "[11]"); f.remove(13); f.assertPostingLists("[11]", "[11]", "[11]"); diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt index f1127a3f554..ffcd7ebd975 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt @@ -2,12 +2,12 @@ vespa_add_library(searchlib_memoryindex OBJECT SOURCES compact_document_words_store.cpp - dictionary.cpp documentinverter.cpp document_remover.cpp featurestore.cpp + field_index.cpp + field_index_collection.cpp fieldinverter.cpp - memoryfieldindex.cpp memoryindex.cpp ordereddocumentinserter.cpp postingiterator.cpp diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp index ec8fab1991b..e2d089626b1 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp @@ -7,10 +7,9 @@ #include <vespa/log/log.h> LOG_SETUP(".memoryindex.compact_document_words_store"); -namespace search { -namespace memoryindex { +namespace search::memoryindex { -typedef CompactDocumentWordsStore::Builder Builder; +using Builder = CompactDocumentWordsStore::Builder; namespace { @@ -59,7 +58,7 @@ CompactDocumentWordsStore::Iterator::nextWord() } CompactDocumentWordsStore::Iterator::Iterator() - : _buf(NULL), + : _buf(nullptr), _remainingWords(0), _wordRef(0), _valid(false) @@ -173,6 +172,5 @@ CompactDocumentWordsStore::getMemoryUsage() const } -} // namespace memoryindex -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h index d3bb2220dc5..ced7ec241bd 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h @@ -23,8 +23,8 @@ public: class Builder { public: - typedef std::unique_ptr<Builder> UP; - typedef vespalib::Array<datastore::EntryRef> WordRefVector; + using UP = std::unique_ptr<Builder>; + using WordRefVector = vespalib::Array<datastore::EntryRef>; private: uint32_t _docId; @@ -66,8 +66,8 @@ public: class Store { public: - typedef datastore::DataStoreT<datastore::EntryRefT<22> > DataStoreType; - typedef DataStoreType::RefType RefType; + using DataStoreType = datastore::DataStoreT<datastore::EntryRefT<22>>; + using RefType = DataStoreType::RefType; private: DataStoreType _store; @@ -82,7 +82,7 @@ public: MemoryUsage getMemoryUsage() const { return _store.getMemoryUsage(); } }; - typedef vespalib::hash_map<uint32_t, datastore::EntryRef> DocumentWordsMap; + using DocumentWordsMap = vespalib::hash_map<uint32_t, datastore::EntryRef>; private: DocumentWordsMap _docs; diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp index 6ab17d5c524..67b519bbadc 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp @@ -4,11 +4,10 @@ #include "wordstore.h" #include <vespa/searchlib/common/sort.h> -namespace search { -namespace memoryindex { +namespace search::memoryindex { -typedef CompactDocumentWordsStore::Builder Builder; -typedef CompactDocumentWordsStore::Iterator Iterator; +using Builder = CompactDocumentWordsStore::Builder; +using Iterator = CompactDocumentWordsStore::Iterator; DocumentRemover::DocumentRemover(const WordStore &wordStore) : _store(), @@ -49,11 +48,11 @@ DocumentRemover::flush() } ShiftBasedRadixSorter<WordFieldDocTuple, WordFieldDocTuple::Radix, std::less<WordFieldDocTuple>, 24, true>:: radix_sort(WordFieldDocTuple::Radix(), std::less<WordFieldDocTuple>(), &_wordFieldDocTuples[0], _wordFieldDocTuples.size(), 16); - Builder::UP builder(new Builder(_wordFieldDocTuples[0]._docId)); + auto builder = std::make_unique<Builder>(_wordFieldDocTuples[0]._docId); for (const auto &tuple : _wordFieldDocTuples) { if (builder->docId() != tuple._docId) { _store.insert(*builder); - builder.reset(new Builder(tuple._docId)); + builder = std::make_unique<Builder>(tuple._docId); } builder->insert(tuple._wordRef); } @@ -62,5 +61,4 @@ DocumentRemover::flush() } -} // namespace memoryindex -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_remover.h b/searchlib/src/vespa/searchlib/memoryindex/document_remover.h index dbd5bf5d472..5d44a666ff9 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_remover.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_remover.h @@ -4,8 +4,7 @@ #include "compact_document_words_store.h" #include "i_document_insert_listener.h" -namespace search { -namespace memoryindex { +namespace search::memoryindex { class IDocumentRemoveListener; class WordStore; @@ -59,6 +58,5 @@ public: void flush() override; }; -} // namespace memoryindex -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp index 08d77fa8dce..1501ff7d2fc 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp @@ -1,21 +1,21 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "documentinverter.h" +#include "field_index_collection.h" #include "fieldinverter.h" -#include "urlfieldinverter.h" -#include "dictionary.h" #include "ordereddocumentinserter.h" -#include <vespa/document/datatype/urldatatype.h> +#include "urlfieldinverter.h" #include <vespa/document/annotation/alternatespanlist.h> -#include <vespa/searchlib/util/url.h> -#include <stdexcept> -#include <vespa/vespalib/text/utf8.h> -#include <vespa/vespalib/text/lowercase.h> -#include <vespa/searchlib/common/sort.h> +#include <vespa/document/datatype/urldatatype.h> #include <vespa/document/repo/fixedtyperepo.h> #include <vespa/searchlib/common/isequencedtaskexecutor.h> -#include <vespa/log/log.h> +#include <vespa/searchlib/common/sort.h> +#include <vespa/searchlib/util/url.h> +#include <vespa/vespalib/text/lowercase.h> +#include <vespa/vespalib/text/utf8.h> +#include <stdexcept> +#include <vespa/log/log.h> LOG_SETUP(".memoryindex.documentinverter"); namespace search::memoryindex { @@ -95,7 +95,7 @@ DocumentInverter::addFieldPath(const document::DocumentType &docType, _schema.getIndexField(fieldId).getName().c_str(), docType.getName().c_str()); } else { - fp.reset(new Field(docType.getField(_schema.getIndexField(fieldId).getName()))); + fp = std::make_unique<Field>(docType.getField(_schema.getIndexField(fieldId).getName())); } _indexedFieldPaths[fieldId] = std::move(fp); } @@ -177,14 +177,13 @@ DocumentInverter::removeDocument(uint32_t docId) void -DocumentInverter::pushDocuments(Dictionary &dict, - const std::shared_ptr<IDestructorCallback> & - onWriteDone) +DocumentInverter::pushDocuments(FieldIndexCollection &fieldIndexes, + const std::shared_ptr<IDestructorCallback> &onWriteDone) { - auto indexFieldIterator = dict.getFieldIndexes().begin(); + auto indexFieldIterator = fieldIndexes.getFieldIndexes().begin(); uint32_t fieldId = 0; for (auto &inverter : _inverters) { - MemoryFieldIndex &fieldIndex(**indexFieldIterator); + FieldIndex &fieldIndex(**indexFieldIterator); DocumentRemover &remover(fieldIndex.getDocumentRemover()); OrderedDocumentInserter &inserter(fieldIndex.getInserter()); _pushThreads.execute(fieldId, diff --git a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h index e92c0810ea7..fa8d13d98fc 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h @@ -22,7 +22,7 @@ namespace search::memoryindex { class FieldInverter; class UrlFieldInverter; -class Dictionary; +class FieldIndexCollection; class DocumentInverter { @@ -37,9 +37,8 @@ private: void invertNormalDocTextField(size_t fieldId, const document::FieldValue &field); void invertNormalDocUriField(const index::UriField &handle, const document::FieldValue &field); - //typedef document::FieldPath FieldPath; - typedef document::Field FieldPath; - typedef std::vector<std::unique_ptr<FieldPath> > IndexedFieldPaths; + using FieldPath = document::Field; + using IndexedFieldPaths = std::vector<std::unique_ptr<FieldPath>>; IndexedFieldPaths _indexedFieldPaths; const document::DataType * _dataType; @@ -70,11 +69,9 @@ public: ~DocumentInverter(); /** - * Push inverted documents to memory index structure. - * - * @param dict dictionary + * Push inverted documents to memory field indexes. */ - void pushDocuments(Dictionary &dict, const std::shared_ptr<IDestructorCallback> &onWriteDone); + void pushDocuments(FieldIndexCollection &fieldIndexes, const std::shared_ptr<IDestructorCallback> &onWriteDone); /** * Invert a document. diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp b/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp index 9fdb23876d8..c032bb33217 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp @@ -73,9 +73,9 @@ FeatureStore::moveFeatures(datastore::EntryRef ref, uint64_t bitLen) FeatureStore::FeatureStore(const Schema &schema) : _store(), - _f(NULL), + _f(nullptr), _fctx(_f), - _d(NULL), + _d(nullptr), _fieldsParams(), _schema(schema), _type(RefType::align(1u), MIN_BUFFER_ARRAYS, @@ -88,7 +88,7 @@ FeatureStore::FeatureStore(const Schema &schema) _fieldsParams.resize(_schema.getNumIndexFields()); SchemaUtil::IndexIterator it(_schema); - for(; it.isValid(); ++it) { + for (; it.isValid(); ++it) { _fieldsParams[it.getIndex()].setSchemaParams(_schema, it.getIndex()); } _store.addType(&_type); diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.h b/searchlib/src/vespa/searchlib/memoryindex/featurestore.h index f3c2ad6cd03..ef75b9f6d31 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/featurestore.h +++ b/searchlib/src/vespa/searchlib/memoryindex/featurestore.h @@ -12,17 +12,16 @@ namespace search::memoryindex { class FeatureStore { public: - typedef datastore::DataStoreT<datastore::AlignedEntryRefT<22, 2> > DataStoreType; - typedef DataStoreType::RefType RefType; - typedef bitcompression::EG2PosOccEncodeContext<true> EncodeContext; - typedef bitcompression::EG2PosOccDecodeContextCooked<true> - DecodeContextCooked; - typedef vespalib::GenerationHandler::generation_t generation_t; + using DataStoreType = datastore::DataStoreT<datastore::AlignedEntryRefT<22, 2>>; + using RefType = DataStoreType::RefType; + using EncodeContext = bitcompression::EG2PosOccEncodeContext<true>; + using DecodeContextCooked = bitcompression::EG2PosOccDecodeContextCooked<true>; + using generation_t = vespalib::GenerationHandler::generation_t; private: - typedef index::Schema Schema; - typedef index::DocIdAndFeatures DocIdAndFeatures; - typedef bitcompression::PosOccFieldsParams PosOccFieldsParams; + using Schema = index::Schema; + using DocIdAndFeatures = index::DocIdAndFeatures; + using PosOccFieldsParams = bitcompression::PosOccFieldsParams; static const uint32_t DECODE_SAFETY = 16; diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index 4e02c3cb09d..4d42b9ae493 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -1,6 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "memoryfieldindex.h" +#include "field_index.h" #include "ordereddocumentinserter.h" #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/exceptions.h> @@ -23,13 +23,13 @@ namespace search::memoryindex { using datastore::EntryRef; vespalib::asciistream & -operator<<(vespalib::asciistream & os, const MemoryFieldIndex::WordKey & rhs) +operator<<(vespalib::asciistream & os, const FieldIndex::WordKey & rhs) { os << "wr(" << rhs._wordRef.ref() << ")"; return os; } -MemoryFieldIndex::MemoryFieldIndex(const Schema & schema, uint32_t fieldId) +FieldIndex::FieldIndex(const Schema & schema, uint32_t fieldId) : _wordStore(), _numUniqueWords(0), _generationHandler(), @@ -41,7 +41,7 @@ MemoryFieldIndex::MemoryFieldIndex(const Schema & schema, uint32_t fieldId) _inserter(std::make_unique<OrderedDocumentInserter>(*this)) { } -MemoryFieldIndex::~MemoryFieldIndex() +FieldIndex::~FieldIndex() { _postingListStore.disableFreeLists(); _postingListStore.disableElemHoldList(); @@ -68,8 +68,8 @@ MemoryFieldIndex::~MemoryFieldIndex() trimHoldLists(); } -MemoryFieldIndex::PostingList::Iterator -MemoryFieldIndex::find(const vespalib::stringref word) const +FieldIndex::PostingList::Iterator +FieldIndex::find(const vespalib::stringref word) const { DictionaryTree::Iterator itr = _dict.find(WordKey(EntryRef()), KeyComp(_wordStore, word)); if (itr.valid()) { @@ -78,10 +78,10 @@ MemoryFieldIndex::find(const vespalib::stringref word) const return PostingList::Iterator(); } -MemoryFieldIndex::PostingList::ConstIterator -MemoryFieldIndex::findFrozen(const vespalib::stringref word) const +FieldIndex::PostingList::ConstIterator +FieldIndex::findFrozen(const vespalib::stringref word) const { - DictionaryTree::ConstIterator itr = _dict.getFrozenView().find(WordKey(EntryRef()), KeyComp(_wordStore, word)); + auto itr = _dict.getFrozenView().find(WordKey(EntryRef()), KeyComp(_wordStore, word)); if (itr.valid()) { return _postingListStore.beginFrozen(EntryRef(itr.getData())); } @@ -90,23 +90,24 @@ MemoryFieldIndex::findFrozen(const vespalib::stringref word) const void -MemoryFieldIndex::compactFeatures() +FieldIndex::compactFeatures() { std::vector<uint32_t> toHold; toHold = _featureStore.startCompact(); - DictionaryTree::Iterator itr(_dict.begin()); + auto itr = _dict.begin(); uint32_t packedIndex = _fieldId; for (; itr.valid(); ++itr) { PostingListStore::RefType pidx(EntryRef(itr.getData())); - if (!pidx.valid()) + if (!pidx.valid()) { continue; + } uint32_t clusterSize = _postingListStore.getClusterSize(pidx); if (clusterSize == 0) { const PostingList *tree = _postingListStore.getTreeEntry(pidx); - PostingList::Iterator it(tree->begin(_postingListStore.getAllocator())); - for (; it.valid(); ++it) { - EntryRef oldFeatures(it.getData()); + auto pitr = tree->begin(_postingListStore.getAllocator()); + for (; pitr.valid(); ++pitr) { + EntryRef oldFeatures(pitr.getData()); // Filter on which buffers to move features from when // performing incremental compaction. @@ -117,7 +118,7 @@ MemoryFieldIndex::compactFeatures() std::atomic_thread_fence(std::memory_order_release); // Ugly, ugly due to const_cast in iterator - it.writeData(newFeatures.ref()); + pitr.writeData(newFeatures.ref()); } } else { const PostingListKeyDataType *shortArray = _postingListStore.getKeyDataEntry(pidx, clusterSize); @@ -139,31 +140,32 @@ MemoryFieldIndex::compactFeatures() } } } - typedef GenerationHandler::generation_t generation_t; + using generation_t = GenerationHandler::generation_t; _featureStore.finishCompact(toHold); generation_t generation = _generationHandler.getCurrentGeneration(); _featureStore.transferHoldLists(generation); } void -MemoryFieldIndex::dump(search::index::IndexBuilder & indexBuilder) +FieldIndex::dump(search::index::IndexBuilder & indexBuilder) { vespalib::stringref word; - FeatureStore::DecodeContextCooked decoder(NULL); + FeatureStore::DecodeContextCooked decoder(nullptr); DocIdAndFeatures features; vespalib::Array<uint32_t> wordMap(_numUniqueWords + 1, 0); _featureStore.setupForField(_fieldId, decoder); - for (DictionaryTree::Iterator itr = _dict.begin(); itr.valid(); ++itr) { + for (auto itr = _dict.begin(); itr.valid(); ++itr) { const WordKey & wk = itr.getKey(); PostingListStore::RefType plist(EntryRef(itr.getData())); word = _wordStore.getWord(wk._wordRef); - if (!plist.valid()) + if (!plist.valid()) { continue; + } indexBuilder.startWord(word); uint32_t clusterSize = _postingListStore.getClusterSize(plist); if (clusterSize == 0) { const PostingList *tree = _postingListStore.getTreeEntry(plist); - PostingList::Iterator pitr = tree->begin(_postingListStore.getAllocator()); + auto pitr = tree->begin(_postingListStore.getAllocator()); assert(pitr.valid()); for (; pitr.valid(); ++pitr) { uint32_t docId = pitr.getKey(); @@ -218,7 +220,7 @@ MemoryFieldIndex::dump(search::index::IndexBuilder & indexBuilder) MemoryUsage -MemoryFieldIndex::getMemoryUsage() const +FieldIndex::getMemoryUsage() const { MemoryUsage usage; usage.merge(_wordStore.getMemoryUsage()); @@ -234,78 +236,78 @@ MemoryFieldIndex::getMemoryUsage() const namespace search::btree { template -class BTreeNodeDataWrap<memoryindex::MemoryFieldIndex::WordKey, BTreeDefaultTraits::LEAF_SLOTS>; +class BTreeNodeDataWrap<memoryindex::FieldIndex::WordKey, BTreeDefaultTraits::LEAF_SLOTS>; template -class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey, BTreeDefaultTraits::INTERNAL_SLOTS>; +class BTreeNodeT<memoryindex::FieldIndex::WordKey, BTreeDefaultTraits::INTERNAL_SLOTS>; #if 0 template -class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey, +class BTreeNodeT<memoryindex::FieldIndex::WordKey, BTreeDefaultTraits::LEAF_SLOTS>; #endif template -class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey, +class BTreeNodeTT<memoryindex::FieldIndex::WordKey, datastore::EntryRef, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS>; template -class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeNodeTT<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::LEAF_SLOTS>; template -class BTreeInternalNode<memoryindex::MemoryFieldIndex::WordKey, +class BTreeInternalNode<memoryindex::FieldIndex::WordKey, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS>; template -class BTreeLeafNode<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeLeafNode<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::LEAF_SLOTS>; template -class BTreeNodeStore<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeNodeStore<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS, BTreeDefaultTraits::LEAF_SLOTS>; template -class BTreeIterator<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeIterator<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, - const memoryindex::MemoryFieldIndex::KeyComp, + const memoryindex::FieldIndex::KeyComp, BTreeDefaultTraits>; template -class BTree<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTree<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, - const memoryindex::MemoryFieldIndex::KeyComp, + const memoryindex::FieldIndex::KeyComp, BTreeDefaultTraits>; template -class BTreeRoot<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeRoot<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, - const memoryindex::MemoryFieldIndex::KeyComp, + const memoryindex::FieldIndex::KeyComp, BTreeDefaultTraits>; template -class BTreeRootBase<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeRootBase<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS, BTreeDefaultTraits::LEAF_SLOTS>; template -class BTreeNodeAllocator<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeNodeAllocator<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS, BTreeDefaultTraits::LEAF_SLOTS>; diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 5b4ab7eb4fd..4a27e30b47a 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -17,19 +17,29 @@ namespace search::memoryindex { class OrderedDocumentInserter; -/* + +/** * Memory index for a single field. + * + * It consists of the following components: + * - WordStore containing all unique words in this field (across all documents). + * - B-Tree dictionary that maps from unique word (32-bit ref) -> posting list (32-bit ref). + * - B-Tree posting lists that maps from document id (32-bit) -> features (32-bit ref). + * - BTreeStore containing all the posting lists. + * - FeatureStore containing information on where a (word, document) pair matched this field. + * This information is unpacked and used during ranking. + * + * Elements in the three stores are accessed using 32-bit references / handles. */ -class MemoryFieldIndex { +class FieldIndex { public: - typedef btree::BTreeRoot<uint32_t, uint32_t, search::btree::NoAggregated> - PostingList; // docid -> feature ref - typedef btree::BTreeStore<uint32_t, uint32_t, - search::btree::NoAggregated, - std::less<uint32_t>, - btree::BTreeDefaultTraits> PostingListStore; - typedef PostingListStore::KeyDataType PostingListKeyDataType; - + // Mapping from docid -> feature ref + using PostingList = btree::BTreeRoot<uint32_t, uint32_t, search::btree::NoAggregated>; + using PostingListStore = btree::BTreeStore<uint32_t, uint32_t, + search::btree::NoAggregated, + std::less<uint32_t>, + btree::BTreeDefaultTraits>; + using PostingListKeyDataType = PostingListStore::KeyDataType; struct WordKey { datastore::EntryRef _wordRef; @@ -69,12 +79,12 @@ public: } }; - typedef uint32_t PostingListPtr; - typedef btree::BTree<WordKey, PostingListPtr, - search::btree::NoAggregated, - const KeyComp> DictionaryTree; + using PostingListPtr = uint32_t; + using DictionaryTree = btree::BTree<WordKey, PostingListPtr, + search::btree::NoAggregated, + const KeyComp>; private: - typedef vespalib::GenerationHandler GenerationHandler; + using GenerationHandler = vespalib::GenerationHandler; WordStore _wordStore; uint64_t _numUniqueWords; @@ -98,8 +108,8 @@ public: return _featureStore.addFeatures(_fieldId, features).first; } - MemoryFieldIndex(const index::Schema &schema, uint32_t fieldId); - ~MemoryFieldIndex(); + FieldIndex(const index::Schema &schema, uint32_t fieldId); + ~FieldIndex(); PostingList::Iterator find(const vespalib::stringref word) const; PostingList::ConstIterator @@ -187,80 +197,80 @@ public: namespace search::btree { extern template -class BTreeNodeDataWrap<memoryindex::MemoryFieldIndex::WordKey, +class BTreeNodeDataWrap<memoryindex::FieldIndex::WordKey, BTreeDefaultTraits::LEAF_SLOTS>; extern template -class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey, +class BTreeNodeT<memoryindex::FieldIndex::WordKey, BTreeDefaultTraits::INTERNAL_SLOTS>; #if 0 extern template -class BTreeNodeT<memoryindex::MemoryFieldIndex::WordKey, +class BTreeNodeT<memoryindex::FieldIndex::WordKey, BTreeDefaultTraits::LEAF_SLOTS>; #endif extern template -class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey, +class BTreeNodeTT<memoryindex::FieldIndex::WordKey, datastore::EntryRef, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS>; extern template -class BTreeNodeTT<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeNodeTT<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::LEAF_SLOTS>; extern template -class BTreeInternalNode<memoryindex::MemoryFieldIndex::WordKey, +class BTreeInternalNode<memoryindex::FieldIndex::WordKey, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS>; extern template -class BTreeLeafNode<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeLeafNode<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::LEAF_SLOTS>; extern template -class BTreeNodeStore<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeNodeStore<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS, BTreeDefaultTraits::LEAF_SLOTS>; extern template -class BTreeIterator<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeIterator<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, - const memoryindex::MemoryFieldIndex::KeyComp, + const memoryindex::FieldIndex::KeyComp, BTreeDefaultTraits>; extern template -class BTree<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTree<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, - const memoryindex::MemoryFieldIndex::KeyComp, + const memoryindex::FieldIndex::KeyComp, BTreeDefaultTraits>; extern template -class BTreeRoot<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeRoot<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, - const memoryindex::MemoryFieldIndex::KeyComp, + const memoryindex::FieldIndex::KeyComp, BTreeDefaultTraits>; extern template -class BTreeRootBase<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeRootBase<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS, BTreeDefaultTraits::LEAF_SLOTS>; extern template -class BTreeNodeAllocator<memoryindex::MemoryFieldIndex::WordKey, - memoryindex::MemoryFieldIndex::PostingListPtr, +class BTreeNodeAllocator<memoryindex::FieldIndex::WordKey, + memoryindex::FieldIndex::PostingListPtr, search::btree::NoAggregated, BTreeDefaultTraits::INTERNAL_SLOTS, BTreeDefaultTraits::LEAF_SLOTS>; diff --git a/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp index accf227b96c..45431f0e8ef 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp @@ -1,6 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "dictionary.h" +#include "field_index_collection.h" #include "fieldinverter.h" #include <vespa/searchlib/bitcompression/posocccompression.h> @@ -15,7 +15,7 @@ #include <vespa/vespalib/util/exceptions.h> #include <vespa/log/log.h> -LOG_SETUP(".searchlib.memoryindex.dictionary"); +LOG_SETUP(".searchlib.memoryindex.field_index_collection"); namespace search { @@ -26,23 +26,23 @@ using index::Schema; namespace memoryindex { -Dictionary::Dictionary(const Schema & schema) +FieldIndexCollection::FieldIndexCollection(const Schema & schema) : _fieldIndexes(), _numFields(schema.getNumIndexFields()) { for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) { - auto fieldIndex = std::make_unique<MemoryFieldIndex>(schema, fieldId); + auto fieldIndex = std::make_unique<FieldIndex>(schema, fieldId); _fieldIndexes.push_back(std::move(fieldIndex)); } } -Dictionary::~Dictionary() +FieldIndexCollection::~FieldIndexCollection() { } void -Dictionary::dump(search::index::IndexBuilder &indexBuilder) +FieldIndexCollection::dump(search::index::IndexBuilder &indexBuilder) { for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) { indexBuilder.startField(fieldId); @@ -52,7 +52,7 @@ Dictionary::dump(search::index::IndexBuilder &indexBuilder) } MemoryUsage -Dictionary::getMemoryUsage() const +FieldIndexCollection::getMemoryUsage() const { MemoryUsage usage; for (auto &fieldIndex : _fieldIndexes) { @@ -62,6 +62,5 @@ Dictionary::getMemoryUsage() const } -} // namespace search::memoryindex - -} // namespace search +} +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/dictionary.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h index e9db79c1bd8..3b8e63626bf 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/dictionary.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h @@ -2,26 +2,32 @@ #pragma once -#include "memoryfieldindex.h" +#include "field_index.h" namespace search::memoryindex { class IDocumentRemoveListener; class FieldInverter; -class Dictionary { +/** + * The collection of all field indexes that are part of a memory index. + * + * Provides functions to create a posting list iterator (used for searching) + * for a given word in a given field. + */ +class FieldIndexCollection { public: - using PostingList = MemoryFieldIndex::PostingList; + using PostingList = FieldIndex::PostingList; private: - typedef vespalib::GenerationHandler GenerationHandler; + using GenerationHandler = vespalib::GenerationHandler; - std::vector<std::unique_ptr<MemoryFieldIndex> > _fieldIndexes; + std::vector<std::unique_ptr<FieldIndex>> _fieldIndexes; uint32_t _numFields; public: - Dictionary(const index::Schema &schema); - ~Dictionary(); + FieldIndexCollection(const index::Schema &schema); + ~FieldIndexCollection(); PostingList::Iterator find(const vespalib::stringref word, uint32_t fieldId) const { @@ -46,11 +52,11 @@ public: MemoryUsage getMemoryUsage() const; - MemoryFieldIndex *getFieldIndex(uint32_t fieldId) const { + FieldIndex *getFieldIndex(uint32_t fieldId) const { return _fieldIndexes[fieldId].get(); } - const std::vector<std::unique_ptr<MemoryFieldIndex> > & + const std::vector<std::unique_ptr<FieldIndex>> & getFieldIndexes() const { return _fieldIndexes; } uint32_t getNumFields() const { return _numFields; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp index 78f33b9fae8..fa261a4e90a 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp @@ -21,9 +21,7 @@ #include <vespa/document/annotation/spantree.h> #include <vespa/document/annotation/spantreevisitor.h> -namespace search { - -namespace memoryindex { +namespace search::memoryindex { using document::AlternateSpanList; using document::Annotation; @@ -112,11 +110,12 @@ FieldInverter::processAnnotations(const StringFieldValue &value) _terms.clear(); StringFieldValue::SpanTrees spanTrees = value.getSpanTrees(); const SpanTree *tree = StringFieldValue::findTree(spanTrees, linguistics::SPANTREE_NAME); - if (tree == NULL) { + if (tree == nullptr) { /* This is wrong unless field is exact match */ const vespalib::string &text = value.getValue(); - if (text.empty()) + if (text.empty()) { return; + } uint32_t wordRef = saveWord(text); if (wordRef != 0u) { add(wordRef); @@ -251,8 +250,9 @@ FieldInverter::saveWord(const vespalib::stringref word) const size_t wordsSize = _words.size(); // assert((wordsSize & 3) == 0); // Check alignment size_t len = word.size(); - if (len == 0) + if (len == 0) { return 0u; + } const size_t fullyPaddedSize = (wordsSize + 4 + len + 1 + 3) & ~3; _words.reserve(vespalib::roundUp2inN(fullyPaddedSize)); @@ -275,7 +275,7 @@ uint32_t FieldInverter::saveWord(const document::FieldValue &fv) { assert(fv.getClass().id() == StringFieldValue::classId); - typedef std::pair<const char*, size_t> RawRef; + using RawRef = std::pair<const char*, size_t>; RawRef sRef = fv.getAsRaw(); return saveWord(vespalib::stringref(sRef.first, sRef.second)); } @@ -307,8 +307,7 @@ FieldInverter::processNormalDocArrayTextField(const ArrayFieldValue &field) for (;el < ele; ++el) { const FieldValue &elfv = field[el]; assert(elfv.getClass().id() == StringFieldValue::classId); - const StringFieldValue &element = - static_cast<const StringFieldValue &>(elfv); + const auto &element = static_cast<const StringFieldValue &>(elfv); startElement(1); processAnnotations(element); endElement(); @@ -324,7 +323,7 @@ FieldInverter::processNormalDocWeightedSetTextField(const WeightedSetFieldValue const FieldValue &xweight = *el.second; assert(key.getClass().id() == StringFieldValue::classId); assert(xweight.getClass().id() == IntFieldValue::classId); - const StringFieldValue &element = static_cast<const StringFieldValue &>(key); + const auto &element = static_cast<const StringFieldValue &>(key); int32_t weight = xweight.getAsInt(); startElement(weight); processAnnotations(element); @@ -374,8 +373,9 @@ FieldInverter::moveNotAbortedDocs(uint32_t &dstIdx, { assert(nextTrimIdx >= srcIdx); uint32_t size = nextTrimIdx - srcIdx; - if (size == 0) + if (size == 0) { return; + } assert(dstIdx < srcIdx); assert(srcIdx < _positions.size()); assert(srcIdx + size <= _positions.size()); @@ -440,7 +440,7 @@ FieldInverter::invertNormalDocTextField(const FieldValue &val) break; case CollectionType::WEIGHTEDSET: if (cInfo.id() == WeightedSetFieldValue::classId) { - const WeightedSetFieldValue &wset = static_cast<const WeightedSetFieldValue &>(val); + const auto &wset = static_cast<const WeightedSetFieldValue &>(val); if (wset.getNestedType() == *DataType::STRING) { processNormalDocWeightedSetTextField(wset); } else { @@ -452,7 +452,7 @@ FieldInverter::invertNormalDocTextField(const FieldValue &val) break; case CollectionType::ARRAY: if (cInfo.id() == ArrayFieldValue::classId) { - const ArrayFieldValue &arr = static_cast<const ArrayFieldValue&>(val); + const auto &arr = static_cast<const ArrayFieldValue&>(val); if (arr.getNestedType() == *DataType::STRING) { processNormalDocArrayTextField(arr); } else { @@ -569,7 +569,5 @@ FieldInverter::pushDocuments(IOrderedDocumentInserter &inserter) } -} // namespace memoryindex - -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h index 96108a50f77..69cfd370041 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h @@ -13,11 +13,7 @@ #include <vespa/searchlib/bitcompression/posocccompression.h> #include <vespa/document/annotation/span.h> -namespace search -{ - -namespace memoryindex -{ +namespace search::memoryindex { class IOrderedDocumentInserter; class DocumentRemover; @@ -97,7 +93,7 @@ private: FieldInverter &operator=(const FieldInverter &) = delete; FieldInverter &operator=(const FieldInverter &&) = delete; - typedef vespalib::Array<char> WordBuffer; + using WordBuffer = vespalib::Array<char>; class ElemInfo { @@ -118,9 +114,8 @@ private: } }; - typedef std::vector<ElemInfo> ElemInfoVec; - - typedef std::vector<PosInfo> PosInfoVec; + using ElemInfoVec = std::vector<ElemInfo>; + using PosInfoVec = std::vector<PosInfo>; class CompareWordRef { @@ -189,8 +184,8 @@ private: std::vector<uint32_t> _elementWordRefs; std::vector<uint32_t> _wordRefs; - typedef std::pair<document::Span, const document::FieldValue *> SpanTerm; - typedef std::vector<SpanTerm> SpanTermVector; + using SpanTerm = std::pair<document::Span, const document::FieldValue *>; + using SpanTermVector = std::vector<SpanTerm>; SpanTermVector _terms; // info about aborted and pending documents. @@ -442,7 +437,5 @@ public: } }; -} // namespace memoryindex - -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h b/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h index a37f87aed97..194a98ef8ba 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h @@ -2,8 +2,7 @@ #pragma once #include <vespa/searchlib/datastore/entryref.h> -namespace search { -namespace memoryindex { +namespace search::memoryindex { /** * Interface used to track which {wordRef, fieldId} pairs that are @@ -17,7 +16,5 @@ public: virtual void flush() = 0; }; - -} // namespace memoryindex -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp index 4c9ea41183a..90036c83efb 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp @@ -1,17 +1,18 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "documentinverter.h" +#include "field_index_collection.h" #include "memoryindex.h" #include "postingiterator.h" -#include "documentinverter.h" #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/document.h> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> #include <vespa/searchlib/index/schemautil.h> -#include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h> #include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> +#include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/queryeval/leaf_blueprints.h> -#include <vespa/searchlib/common/sequencedtaskexecutor.h> -#include <vespa/searchlib/btree/btreenodeallocator.hpp> #include <vespa/log/log.h> LOG_SETUP(".searchlib.memoryindex.memoryindex"); @@ -61,7 +62,7 @@ MemoryIndex::MemoryIndex(const Schema &schema, _inverter0(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads)), _inverter1(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads)), _inverter(_inverter0.get()), - _dictionary(std::make_unique<Dictionary>(_schema)), + _fieldIndexes(std::make_unique<FieldIndexCollection>(_schema)), _frozen(false), _maxDocId(0), // docId 0 is reserved _numDocs(0), @@ -113,7 +114,7 @@ MemoryIndex::commit(const std::shared_ptr<IDestructorCallback> &onWriteDone) { _invertThreads.sync(); // drain inverting into this inverter _pushThreads.sync(); // drain use of other inverter - _inverter->pushDocuments(*_dictionary, onWriteDone); + _inverter->pushDocuments(*_fieldIndexes, onWriteDone); flipInverter(); } @@ -133,7 +134,7 @@ MemoryIndex::freeze() void MemoryIndex::dump(IndexBuilder &indexBuilder) { - _dictionary->dump(indexBuilder); + _fieldIndexes->dump(indexBuilder); } namespace { @@ -142,14 +143,14 @@ class MemTermBlueprint : public queryeval::SimpleLeafBlueprint { private: GenerationHandler::Guard _genGuard; - Dictionary::PostingList::ConstIterator _pitr; + FieldIndex::PostingList::ConstIterator _pitr; const FeatureStore &_featureStore; const uint32_t _fieldId; const bool _useBitVector; public: MemTermBlueprint(GenerationHandler::Guard &&genGuard, - Dictionary::PostingList::ConstIterator pitr, + FieldIndex::PostingList::ConstIterator pitr, const FeatureStore &featureStore, const FieldSpecBase &field, uint32_t fieldId, @@ -168,11 +169,11 @@ public: SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const override { - SearchIterator::UP search(new PostingIterator(_pitr, _featureStore, _fieldId, tfmda)); + auto search = std::make_unique<PostingIterator>(_pitr, _featureStore, _fieldId, tfmda); if (_useBitVector) { LOG(debug, "Return BooleanMatchIteratorWrapper: fieldId(%u), docCount(%zu)", _fieldId, _pitr.size()); - return SearchIterator::UP(new BooleanMatchIteratorWrapper(std::move(search), tfmda)); + return std::make_unique<BooleanMatchIteratorWrapper>(std::move(search), tfmda); } LOG(debug, "Return PostingIterator: fieldId(%u), docCount(%zu)", _fieldId, _pitr.size()); @@ -189,32 +190,31 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper private: const FieldSpec &_field; const uint32_t _fieldId; - Dictionary & _dictionary; + FieldIndexCollection &_fieldIndexes; public: CreateBlueprintVisitor(Searchable &searchable, const IRequestContext & requestContext, const FieldSpec &field, uint32_t fieldId, - Dictionary &dictionary) + FieldIndexCollection &fieldIndexes) : CreateBlueprintVisitorHelper(searchable, field, requestContext), _field(field), _fieldId(fieldId), - _dictionary(dictionary) {} + _fieldIndexes(fieldIndexes) {} template <class TermNode> void visitTerm(TermNode &n) { const vespalib::string termStr = queryeval::termAsString(n); LOG(debug, "searching for '%s' in '%s'", termStr.c_str(), _field.getName().c_str()); - MemoryFieldIndex *fieldIndex = _dictionary.getFieldIndex(_fieldId); + FieldIndex *fieldIndex = _fieldIndexes.getFieldIndex(_fieldId); GenerationHandler::Guard genGuard = fieldIndex->takeGenerationGuard(); - Dictionary::PostingList::ConstIterator pitr - = fieldIndex->findFrozen(termStr); + FieldIndex::PostingList::ConstIterator pitr = fieldIndex->findFrozen(termStr); bool useBitVector = _field.isFilter(); - setResult(make_UP(new MemTermBlueprint(std::move(genGuard), pitr, - fieldIndex->getFeatureStore(), - _field, _fieldId, useBitVector))); + setResult(std::make_unique<MemTermBlueprint>(std::move(genGuard), pitr, + fieldIndex->getFeatureStore(), + _field, _fieldId, useBitVector)); } void visit(LocationTerm &n) override { visitTerm(n); } @@ -241,9 +241,9 @@ MemoryIndex::createBlueprint(const IRequestContext & requestContext, { uint32_t fieldId = _schema.getIndexFieldId(field.getName()); if (fieldId == Schema::UNKNOWN_FIELD_ID || _hiddenFields[fieldId]) { - return Blueprint::UP(new EmptyBlueprint(field)); + return std::make_unique<EmptyBlueprint>(field); } - CreateBlueprintVisitor visitor(*this, requestContext, field, fieldId, *_dictionary); + CreateBlueprintVisitor visitor(*this, requestContext, field, fieldId, *_fieldIndexes); const_cast<Node &>(term).accept(visitor); return visitor.getResult(); } @@ -252,28 +252,30 @@ MemoryUsage MemoryIndex::getMemoryUsage() const { MemoryUsage usage; - usage.merge(_dictionary->getMemoryUsage()); + usage.merge(_fieldIndexes->getMemoryUsage()); return usage; } uint64_t MemoryIndex::getNumWords() const { - return _dictionary->getNumUniqueWords(); + return _fieldIndexes->getNumUniqueWords(); } void MemoryIndex::pruneRemovedFields(const Schema &schema) { LockGuard lock(_lock); - if (_prunedSchema.get() == NULL) { - Schema::UP newSchema = Schema::intersect(_schema, schema); - if (_schema == *newSchema) + if (_prunedSchema.get() == nullptr) { + auto newSchema = Schema::intersect(_schema, schema); + if (_schema == *newSchema) { return; + } _prunedSchema.reset(newSchema.release()); } else { - Schema::UP newSchema = Schema::intersect(*_prunedSchema, schema); - if (*_prunedSchema == *newSchema) + auto newSchema = Schema::intersect(*_prunedSchema, schema); + if (*_prunedSchema == *newSchema) { return; + } _prunedSchema.reset(newSchema.release()); } SchemaUtil::IndexIterator i(_schema); diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h index d12e844f35f..621c72d56a3 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h @@ -17,7 +17,7 @@ namespace document { class Document; } namespace search::memoryindex { class DocumentInverter; -class Dictionary; +class FieldIndexCollection; /** * Lock-free implementation of a memory-based index @@ -32,7 +32,7 @@ private: std::unique_ptr<DocumentInverter> _inverter0; std::unique_ptr<DocumentInverter> _inverter1; DocumentInverter *_inverter; - std::unique_ptr<Dictionary> _dictionary; + std::unique_ptr<FieldIndexCollection> _fieldIndexes; bool _frozen; uint32_t _maxDocId; uint32_t _numDocs; @@ -65,11 +65,8 @@ private: void flipInverter(); public: - /** - * Convenience type defs. - */ - typedef std::unique_ptr<MemoryIndex> UP; - typedef std::shared_ptr<MemoryIndex> SP; + using UP = std::unique_ptr<MemoryIndex>; + using SP = std::shared_ptr<MemoryIndex>; /** * Create a new memory index based on the given schema. diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp index 1f15bcf1c75..3c4fca5b044 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp @@ -27,7 +27,7 @@ const vespalib::string emptyWord = ""; } -OrderedDocumentInserter::OrderedDocumentInserter(MemoryFieldIndex &fieldIndex) +OrderedDocumentInserter::OrderedDocumentInserter(FieldIndex &fieldIndex) : _word(), _prevDocId(noDocId), _prevAdd(false), @@ -106,7 +106,7 @@ OrderedDocumentInserter::setNextWord(const vespalib::stringref word) void OrderedDocumentInserter::add(uint32_t docId, - const index::DocIdAndFeatures &features) + const index::DocIdAndFeatures &features) { assert(docId != noDocId); assert(_prevDocId == noDocId || _prevDocId < docId || diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h index 9645c3890e2..328346e9eee 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h @@ -3,7 +3,7 @@ #pragma once #include "iordereddocumentinserter.h" -#include "memoryfieldindex.h" +#include "field_index.h" #include <limits> namespace search::memoryindex { @@ -12,7 +12,7 @@ class IDocumentInsertListener; /** - * Class for inserting updates to MemoryFieldIndex in an ordered manner + * Class for inserting updates to FieldIndex in an ordered manner * (single pass scan of dictionary tree) * * Insert order must be properly sorted, by (word, docId) @@ -22,12 +22,12 @@ class OrderedDocumentInserter : public IOrderedDocumentInserter vespalib::stringref _word; uint32_t _prevDocId; bool _prevAdd; - using DictionaryTree = MemoryFieldIndex::DictionaryTree; - using PostingListStore = MemoryFieldIndex::PostingListStore; - using KeyComp = MemoryFieldIndex::KeyComp; - using WordKey = MemoryFieldIndex::WordKey; - using PostingListKeyDataType = MemoryFieldIndex::PostingListKeyDataType; - MemoryFieldIndex &_fieldIndex; + using DictionaryTree = FieldIndex::DictionaryTree; + using PostingListStore = FieldIndex::PostingListStore; + using KeyComp = FieldIndex::KeyComp; + using WordKey = FieldIndex::WordKey; + using PostingListKeyDataType = FieldIndex::PostingListKeyDataType; + FieldIndex &_fieldIndex; DictionaryTree::Iterator _dItr; IDocumentInsertListener &_listener; @@ -47,7 +47,7 @@ class OrderedDocumentInserter : public IOrderedDocumentInserter void flushWord(); public: - OrderedDocumentInserter(MemoryFieldIndex &fieldIndex); + OrderedDocumentInserter(FieldIndex &fieldIndex); ~OrderedDocumentInserter() override; void setNextWord(const vespalib::stringref word) override; void add(uint32_t docId, const index::DocIdAndFeatures &features) override; diff --git a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp index 1f55aa466b1..ca56299f906 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp @@ -10,17 +10,16 @@ #include <vespa/log/log.h> LOG_SETUP(".searchlib.memoryindex.postingiterator"); -namespace search { -namespace memoryindex { +namespace search::memoryindex { -PostingIterator::PostingIterator(Dictionary::PostingList::ConstIterator itr, +PostingIterator::PostingIterator(FieldIndex::PostingList::ConstIterator itr, const FeatureStore & featureStore, uint32_t packedIndex, const fef::TermFieldMatchDataArray & matchData) : queryeval::RankedSearchIteratorBase(matchData), _itr(itr), _featureStore(featureStore), - _featureDecoder(NULL) + _featureDecoder(nullptr) { _featureStore.setupForField(packedIndex, _featureDecoder); } @@ -69,7 +68,5 @@ PostingIterator::doUnpack(uint32_t docId) setUnpacked(); } - -} // namespace search::memoryindex -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h index 4960a3f299b..2838c65c5eb 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h +++ b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h @@ -2,19 +2,18 @@ #pragma once -#include "dictionary.h" +#include "field_index.h" #include <vespa/searchlib/queryeval/iterators.h> -namespace search { -namespace memoryindex { +namespace search::memoryindex { /** - * Search iterator for memory index posting list. - **/ + * Search iterator for memory field index posting list. + */ class PostingIterator : public queryeval::RankedSearchIteratorBase { private: - Dictionary::PostingList::ConstIterator _itr; + FieldIndex::PostingList::ConstIterator _itr; const FeatureStore &_featureStore; FeatureStore::DecodeContextCooked _featureDecoder; @@ -27,7 +26,7 @@ public: * @param packedIndex the field or field collection owning features. * @param matchData the match data to unpack features into. **/ - PostingIterator(Dictionary::PostingList::ConstIterator itr, + PostingIterator(FieldIndex::PostingList::ConstIterator itr, const FeatureStore &featureStore, uint32_t packedIndex, const fef::TermFieldMatchDataArray &matchData); @@ -39,6 +38,5 @@ public: Trinary is_strict() const override { return Trinary::True; } }; -} // namespace search::memoryindex -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp index be216f7c2ba..2c290f17782 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp @@ -16,9 +16,7 @@ #include <vespa/log/log.h> LOG_SETUP(".memoryindex.urlfieldinverter"); -namespace search { - -namespace memoryindex { +namespace search::memoryindex { namespace { @@ -46,8 +44,7 @@ lowercaseToken(vespalib::string &dest, const char *src, size_t srcSize) return dest.size(); } - -} // namespace +} using document::ArrayFieldValue; @@ -128,8 +125,9 @@ UrlFieldInverter::processUrlSubField(FieldInverter *inverter, bool addAnchors) { const FieldValue::UP sfv = field.getValue(subField); - if (!sfv) + if (!sfv) { return; + } if (!sfv->inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) { LOG(error, "Illegal field type %s for URL subfield %s, expected string", @@ -137,7 +135,7 @@ UrlFieldInverter::processUrlSubField(FieldInverter *inverter, vespalib::string(subField).data()); return; } - const StringFieldValue &value = static_cast<const StringFieldValue &>(*sfv); + const auto &value = static_cast<const StringFieldValue &>(*sfv); if (addAnchors) { inverter->addWord(HOSTNAME_BEGIN); } @@ -172,11 +170,10 @@ UrlFieldInverter::processUrlField(const FieldValue &url_field) return; } assert(url_field.getClass().id() == StructFieldValue::classId); - const StructFieldValue &field = - static_cast<const StructFieldValue &>(url_field); + const auto &field = static_cast<const StructFieldValue &>(url_field); const FieldValue::UP all_val = field.getValue("all"); - if (all_val.get() == NULL) { + if (all_val.get() == nullptr) { if (_useAnnotations) { // New style, use annotations processAnnotatedUrlField(field); @@ -190,12 +187,11 @@ UrlFieldInverter::processUrlField(const FieldValue &url_field) all_val->getDataType()->getName().c_str()); return; } - const StringFieldValue &all_sfv = - static_cast<const StringFieldValue &>(*all_val); + const auto &all_sfv = static_cast<const StringFieldValue &>(*all_val); if (_useAnnotations) { StringFieldValue::SpanTrees trees = all_sfv.getSpanTrees(); const SpanTree *tree = StringFieldValue::findTree(trees, SPANTREE_NAME); - if (tree != NULL) { + if (tree != nullptr) { // New style, use annotations processAnnotatedUrlField(field); return; @@ -320,7 +316,7 @@ UrlFieldInverter::invertUrlField(const FieldValue &val) break; case CollectionType::WEIGHTEDSET: if (cInfo.id() == WeightedSetFieldValue::classId) { - const WeightedSetFieldValue &wset = static_cast<const WeightedSetFieldValue &>(val); + const auto &wset = static_cast<const WeightedSetFieldValue &>(val); if (isUriType(wset.getNestedType())) { processWeightedSetUrlField(wset); } else { @@ -332,7 +328,7 @@ UrlFieldInverter::invertUrlField(const FieldValue &val) break; case CollectionType::ARRAY: if (cInfo.id() == ArrayFieldValue::classId) { - const ArrayFieldValue &arr = static_cast<const ArrayFieldValue&>(val); + const auto &arr = static_cast<const ArrayFieldValue&>(val); if (isUriType(arr.getNestedType())) { processArrayUrlField(arr); } else { @@ -394,7 +390,5 @@ UrlFieldInverter::UrlFieldInverter(index::Schema::CollectionType collectionType, } -} // namespace memoryindex - -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h index 74f96cd40c3..c902feaf5a6 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h @@ -5,11 +5,7 @@ #include <vespa/searchcommon/common/datatype.h> #include <vespa/document/fieldvalue/structfieldvalue.h> -namespace search -{ - -namespace memoryindex -{ +namespace search::memoryindex { class FieldInverter; @@ -71,7 +67,4 @@ public: } }; - -} // namespace memoryindex - -} // namespace search +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/wordstore.h b/searchlib/src/vespa/searchlib/memoryindex/wordstore.h index a9e941e04d1..b909f26157f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/wordstore.h +++ b/searchlib/src/vespa/searchlib/memoryindex/wordstore.h @@ -10,8 +10,8 @@ namespace search::memoryindex { class WordStore { public: - typedef datastore::DataStoreT<datastore::AlignedEntryRefT<22, 2> > DataStoreType; - typedef DataStoreType::RefType RefType; + using DataStoreType = datastore::DataStoreT<datastore::AlignedEntryRefT<22, 2>>; + using RefType = DataStoreType::RefType; private: DataStoreType _store; diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h index ede14f237b2..7fa46fc7531 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h @@ -4,8 +4,8 @@ #include "fakeword.h" #include "fakeposting.h" #include "fpfactory.h" -#include <vespa/searchlib/memoryindex/dictionary.h> #include <vespa/searchlib/memoryindex/featurestore.h> +#include <vespa/searchlib/memoryindex/field_index.h> #include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/bitcompression/posocccompression.h> @@ -15,7 +15,7 @@ namespace fakedata { class FakeMemTreeOccMgr : public FakeWord::RandomizedWriter { public: - typedef memoryindex::Dictionary::PostingList Tree; + typedef memoryindex::FieldIndex::PostingList Tree; typedef Tree::NodeAllocatorType NodeAllocator; typedef memoryindex::FeatureStore FeatureStore; typedef datastore::EntryRef EntryRef; |