summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp25
-rw-r--r--searchcore/src/tests/proton/index/fusionrunner_test.cpp12
-rw-r--r--searchcore/src/tests/proton/index/indexmanager_test.cpp7
-rw-r--r--searchcore/src/vespa/searchcore/proton/index/indexmanager.cpp6
-rw-r--r--searchcore/src/vespa/searchcore/proton/index/indexmanager.h4
-rw-r--r--searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp16
-rw-r--r--searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h10
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/iindexmaintaineroperations.h6
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp6
-rw-r--r--searchlib/src/apps/tests/memoryindexstress_test.cpp46
-rw-r--r--searchlib/src/tests/diskindex/fusion/fusion_test.cpp14
-rw-r--r--searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp8
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp12
-rw-r--r--searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/index/field_length_calculator.h18
-rw-r--r--searchlib/src/vespa/searchlib/index/field_length_info.h2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h3
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h4
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.h18
-rw-r--r--searchlib/src/vespa/searchlib/test/index/mock_field_length_inspector.h19
23 files changed, 224 insertions, 99 deletions
diff --git a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp
index d0b19d77181..9a343667fd6 100644
--- a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp
+++ b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp
@@ -1,27 +1,29 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/log/log.h>
-LOG_SETUP("feed_and_search_test");
#include <vespa/document/datatype/datatype.h>
#include <vespa/document/fieldvalue/document.h>
#include <vespa/document/fieldvalue/fieldvalue.h>
-#include <vespa/searchlib/memoryindex/memory_index.h>
+#include <vespa/searchlib/common/documentsummary.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
#include <vespa/searchlib/diskindex/diskindex.h>
+#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/fef/fef.h>
#include <vespa/searchlib/index/docbuilder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/memoryindex/memory_index.h>
+#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
#include <vespa/searchlib/query/base.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/queryeval/blueprint.h>
-#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
#include <sstream>
-#include <vespa/searchlib/diskindex/fusion.h>
-#include <vespa/searchlib/common/documentsummary.h>
-#include <vespa/searchlib/common/sequencedtaskexecutor.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("feed_and_search_test");
using document::DataType;
using document::Document;
@@ -32,25 +34,26 @@ using search::TuneFileSearch;
using search::diskindex::DiskIndex;
using search::diskindex::IndexBuilder;
using search::diskindex::SelectorArray;
+using search::docsummary::DocumentSummary;
using search::fef::FieldPositionsIterator;
using search::fef::MatchData;
using search::fef::MatchDataLayout;
using search::fef::TermFieldHandle;
using search::fef::TermFieldMatchData;
using search::index::DocBuilder;
-using search::index::Schema;
using search::index::DummyFileHeaderContext;
+using search::index::Schema;
+using search::index::test::MockFieldLengthInspector;
using search::memoryindex::MemoryIndex;
using search::query::SimpleStringTerm;
using search::queryeval::Blueprint;
+using search::queryeval::FakeRequestContext;
using search::queryeval::FieldSpec;
using search::queryeval::FieldSpecList;
using search::queryeval::SearchIterator;
using search::queryeval::Searchable;
-using search::queryeval::FakeRequestContext;
using std::ostringstream;
using vespalib::string;
-using search::docsummary::DocumentSummary;
namespace {
@@ -148,7 +151,7 @@ void Test::requireThatMemoryIndexCanBeDumpedAndSearched() {
vespalib::ThreadStackExecutor sharedExecutor(2, 0x10000);
search::SequencedTaskExecutor indexFieldInverter(2);
search::SequencedTaskExecutor indexFieldWriter(2);
- MemoryIndex memory_index(schema, indexFieldInverter, indexFieldWriter);
+ MemoryIndex memory_index(schema, MockFieldLengthInspector(), indexFieldInverter, indexFieldWriter);
DocBuilder doc_builder(schema);
Document::UP doc = buildDocument(doc_builder, doc_id1, word1);
diff --git a/searchcore/src/tests/proton/index/fusionrunner_test.cpp b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
index 8f9944e178e..25e7a4ffa6b 100644
--- a/searchcore/src/tests/proton/index/fusionrunner_test.cpp
+++ b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
@@ -1,19 +1,19 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Unit tests for fusionrunner.
+#include <vespa/fastos/file.h>
#include <vespa/searchcore/proton/index/indexmanager.h>
#include <vespa/searchcore/proton/server/executorthreadingservice.h>
#include <vespa/searchcorespi/index/fusionrunner.h>
-#include <vespa/searchlib/memoryindex/memory_index.h>
+#include <vespa/searchlib/common/isequencedtaskexecutor.h>
#include <vespa/searchlib/diskindex/diskindex.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/searchlib/index/docbuilder.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/memoryindex/memory_index.h>
#include <vespa/searchlib/query/tree/simplequery.h>
-#include <vespa/searchlib/common/isequencedtaskexecutor.h>
+#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/fastos/file.h>
#include <set>
using document::Document;
@@ -35,6 +35,7 @@ using search::index::DocBuilder;
using search::index::DummyFileHeaderContext;
using search::index::Schema;
using search::index::schema::DataType;
+using search::index::test::MockFieldLengthInspector;
using search::memoryindex::MemoryIndex;
using search::query::SimpleStringTerm;
using search::queryeval::Blueprint;
@@ -171,7 +172,8 @@ void Test::createIndex(const string &dir, uint32_t id, bool fusion) {
Schema schema = getSchema();
DocBuilder doc_builder(schema);
- MemoryIndex memory_index(schema, _threadingService.indexFieldInverter(),
+ MemoryIndex memory_index(schema, MockFieldLengthInspector(),
+ _threadingService.indexFieldInverter(),
_threadingService.indexFieldWriter());
addDocument(doc_builder, memory_index, *_selector, id, id + 0, term);
addDocument(doc_builder, memory_index, *_selector, id, id + 1, "bar");
diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp
index 9ceb85981c0..4149d563bf9 100644
--- a/searchcore/src/tests/proton/index/indexmanager_test.cpp
+++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp
@@ -19,6 +19,7 @@
#include <vespa/searchlib/memoryindex/field_index_collection.h>
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/queryeval/isourceselector.h>
+#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
#include <vespa/searchlib/util/dirtraverse.h>
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/testkit/testapp.h>
@@ -41,6 +42,7 @@ using search::index::DocBuilder;
using search::index::DummyFileHeaderContext;
using search::index::Schema;
using search::index::schema::DataType;
+using search::index::test::MockFieldLengthInspector;
using vespalib::makeLambdaTask;
using search::memoryindex::CompactWordsStore;
using search::memoryindex::FieldIndexCollection;
@@ -58,8 +60,7 @@ using namespace searchcorespi::index;
namespace {
-class IndexManagerDummyReconfigurer : public searchcorespi::IIndexManager::Reconfigurer
-{
+class IndexManagerDummyReconfigurer : public searchcorespi::IIndexManager::Reconfigurer {
virtual bool
reconfigure(vespalib::Closure0<bool>::UP closure) override
{
@@ -360,7 +361,7 @@ TEST_F("requireThatSourceSelectorIsFlushed", Fixture) {
TEST_F("requireThatFlushStatsAreCalculated", Fixture) {
Schema schema(getSchema());
- FieldIndexCollection fic(schema);
+ FieldIndexCollection fic(schema, MockFieldLengthInspector());
SequencedTaskExecutor invertThreads(2);
SequencedTaskExecutor pushThreads(2);
search::memoryindex::DocumentInverter inverter(schema, invertThreads,
diff --git a/searchcore/src/vespa/searchcore/proton/index/indexmanager.cpp b/searchcore/src/vespa/searchcore/proton/index/indexmanager.cpp
index b74cc2c603f..8e838414015 100644
--- a/searchcore/src/vespa/searchcore/proton/index/indexmanager.cpp
+++ b/searchcore/src/vespa/searchcore/proton/index/indexmanager.cpp
@@ -37,9 +37,11 @@ IndexManager::MaintainerOperations::MaintainerOperations(const FileHeaderContext
}
IMemoryIndex::SP
-IndexManager::MaintainerOperations::createMemoryIndex(const Schema &schema, SerialNum serialNum)
+IndexManager::MaintainerOperations::createMemoryIndex(const Schema& schema,
+ const IFieldLengthInspector& inspector,
+ SerialNum serialNum)
{
- return std::make_shared<MemoryIndexWrapper>(schema, _fileHeaderContext, _tuneFileIndexing,
+ return std::make_shared<MemoryIndexWrapper>(schema, inspector, _fileHeaderContext, _tuneFileIndexing,
_threadingService, serialNum);
}
diff --git a/searchcore/src/vespa/searchcore/proton/index/indexmanager.h b/searchcore/src/vespa/searchcore/proton/index/indexmanager.h
index 4f2d03c81e6..b14912239a3 100644
--- a/searchcore/src/vespa/searchcore/proton/index/indexmanager.h
+++ b/searchcore/src/vespa/searchcore/proton/index/indexmanager.h
@@ -48,7 +48,9 @@ public:
size_t cacheSize,
searchcorespi::index::IThreadingService &threadingService);
- IMemoryIndex::SP createMemoryIndex(const Schema &schema, SerialNum serialNum) override;
+ IMemoryIndex::SP createMemoryIndex(const Schema& schema,
+ const IFieldLengthInspector& inspector,
+ SerialNum serialNum) override;
IDiskIndex::SP loadDiskIndex(const vespalib::string &indexDir) override;
IDiskIndex::SP reloadDiskIndex(const IDiskIndex &oldIndex) override;
bool runFusion(const Schema &schema, const vespalib::string &outputDir,
diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
index 3d1e04196a6..d206388ca04 100644
--- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
+++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
@@ -17,13 +17,13 @@ using vespalib::IllegalStateException;
namespace proton {
-MemoryIndexWrapper::MemoryIndexWrapper(const search::index::Schema &schema,
- const search::common::FileHeaderContext &fileHeaderContext,
- const TuneFileIndexing &tuneFileIndexing,
- searchcorespi::index::IThreadingService &
- threadingService,
+MemoryIndexWrapper::MemoryIndexWrapper(const search::index::Schema& schema,
+ const search::index::IFieldLengthInspector& inspector,
+ const search::common::FileHeaderContext& fileHeaderContext,
+ const TuneFileIndexing& tuneFileIndexing,
+ searchcorespi::index::IThreadingService& threadingService,
search::SerialNum serialNum)
- : _index(schema, threadingService.indexFieldInverter(),
+ : _index(schema, inspector, threadingService.indexFieldInverter(),
threadingService.indexFieldWriter()),
_serialNum(serialNum),
_fileHeaderContext(fileHeaderContext),
@@ -62,9 +62,7 @@ MemoryIndexWrapper::accept(searchcorespi::IndexSearchableVisitor &visitor) const
FieldLengthInfo
MemoryIndexWrapper::get_field_length_info(const vespalib::string& field_name) const
{
- // TODO: implement
- (void) field_name;
- return FieldLengthInfo();
+ return _index.get_field_length_info(field_name);
}
} // namespace proton
diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h
index d94a259eb24..dfebaff20f3 100644
--- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h
+++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.h
@@ -24,11 +24,11 @@ private:
const search::TuneFileIndexing _tuneFileIndexing;
public:
- MemoryIndexWrapper(const search::index::Schema &schema,
- const search::common::FileHeaderContext &fileHeaderContext,
- const search::TuneFileIndexing &tuneFileIndexing,
- searchcorespi::index::IThreadingService &
- threadingService,
+ MemoryIndexWrapper(const search::index::Schema& schema,
+ const search::index::IFieldLengthInspector& inspector,
+ const search::common::FileHeaderContext& fileHeaderContext,
+ const search::TuneFileIndexing& tuneFileIndexing,
+ searchcorespi::index::IThreadingService& threadingService,
SerialNum serialNum);
/**
diff --git a/searchcorespi/src/vespa/searchcorespi/index/iindexmaintaineroperations.h b/searchcorespi/src/vespa/searchcorespi/index/iindexmaintaineroperations.h
index 507ccf9483b..99f17b12b79 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/iindexmaintaineroperations.h
+++ b/searchcorespi/src/vespa/searchcorespi/index/iindexmaintaineroperations.h
@@ -6,6 +6,7 @@
#include <vespa/searchcommon/common/schema.h>
#include <vespa/searchlib/common/serialnum.h>
#include <vespa/searchlib/diskindex/docidmapper.h>
+#include <vespa/searchlib/index/i_field_length_inspector.h>
namespace searchcorespi::index {
@@ -13,6 +14,7 @@ namespace searchcorespi::index {
* Interface for operations needed by an index maintainer.
*/
struct IIndexMaintainerOperations {
+ using IFieldLengthInspector = search::index::IFieldLengthInspector;
using Schema = search::index::Schema;
using SelectorArray = search::diskindex::SelectorArray;
virtual ~IIndexMaintainerOperations() {}
@@ -20,7 +22,9 @@ struct IIndexMaintainerOperations {
/**
* Creates a new memory index using the given schema.
*/
- virtual IMemoryIndex::SP createMemoryIndex(const Schema &schema, search::SerialNum serialNum) = 0;
+ virtual IMemoryIndex::SP createMemoryIndex(const Schema& schema,
+ const IFieldLengthInspector& inspector,
+ search::SerialNum serialNum) = 0;
/**
* Loads a disk index from the given directory.
diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
index acde26ad554..a174592eb55 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
+++ b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
@@ -870,11 +870,11 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config,
_selector.reset(getSourceSelector().cloneAndSubtract(ost.str(), id_diff).release());
assert(_last_fusion_id == _selector->getBaseId());
}
- _current_index = operations.createMemoryIndex(_schema, _current_serial_num);
_current_index_id = getNewAbsoluteId() - _last_fusion_id;
assert(_current_index_id < ISourceSelector::SOURCE_LIMIT);
_selector->setDefaultSource(_current_index_id);
ISearchableIndexCollection::UP sourceList(loadDiskIndexes(spec, ISearchableIndexCollection::UP(new IndexCollection(_selector))));
+ _current_index = operations.createMemoryIndex(_schema, *sourceList, _current_serial_num);
LOG(debug, "Index manager created with flushed serial num %" PRIu64, _flush_serial_num);
sourceList->append(_current_index_id, _current_index);
sourceList->setCurrentIndex(_current_index_id);
@@ -900,7 +900,7 @@ IndexMaintainer::initFlush(SerialNum serialNum, searchcorespi::FlushStats * stat
_current_serial_num = std::max(_current_serial_num, serialNum);
}
- IMemoryIndex::SP new_index(_operations.createMemoryIndex(getSchema(), _current_serial_num));
+ IMemoryIndex::SP new_index(_operations.createMemoryIndex(getSchema(), *_current_index, _current_serial_num));
FlushArgs args;
args.stats = stats;
scheduleCommit();
@@ -1208,7 +1208,7 @@ IndexMaintainer::setSchema(const Schema & schema, SerialNum serialNum)
{
assert(_ctx.getThreadingService().master().isCurrentThread());
pruneRemovedFields(schema, serialNum);
- IMemoryIndex::SP new_index(_operations.createMemoryIndex(schema, _current_serial_num));
+ IMemoryIndex::SP new_index(_operations.createMemoryIndex(schema, *_current_index, _current_serial_num));
SetSchemaArgs args;
args._newSchema = schema;
diff --git a/searchlib/src/apps/tests/memoryindexstress_test.cpp b/searchlib/src/apps/tests/memoryindexstress_test.cpp
index 2ef8448db8b..1571cef630b 100644
--- a/searchlib/src/apps/tests/memoryindexstress_test.cpp
+++ b/searchlib/src/apps/tests/memoryindexstress_test.cpp
@@ -1,28 +1,31 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/searchlib/memoryindex/memory_index.h>
+
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/datatype/documenttype.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+#include <vespa/searchlib/common/scheduletaskcallback.h>
+#include <vespa/searchlib/common/sequencedtaskexecutor.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/index/i_field_length_inspector.h>
+#include <vespa/searchlib/memoryindex/memory_index.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
#include <vespa/searchlib/queryeval/fake_search.h>
#include <vespa/searchlib/queryeval/fake_searchable.h>
-#include <vespa/searchlib/queryeval/fake_requestcontext.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/stringfmt.h>
-#include <vespa/searchlib/common/sequencedtaskexecutor.h>
-#include <vespa/searchlib/common/scheduletaskcallback.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
-#include <vespa/document/repo/documenttyperepo.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/document/fieldvalue/document.h>
-#include <vespa/document/fieldvalue/stringfieldvalue.h>
-#include <vespa/document/repo/configbuilder.h>
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <vespa/document/annotation/spanlist.h>
-#include <vespa/document/annotation/spantree.h>
-#include <vespa/searchlib/util/rand48.h>
#include <vespa/log/log.h>
LOG_SETUP("memoryindexstress_test");
@@ -36,17 +39,18 @@ using document::FieldValue;
using document::Span;
using document::SpanList;
using document::StringFieldValue;
+using namespace search::fef;
+using namespace search::index;
+using namespace search::memoryindex;
+using namespace search::queryeval;
using search::ScheduleTaskCallback;
using search::index::schema::DataType;
-using vespalib::makeLambdaTask;
using search::query::Node;
using search::query::SimplePhrase;
using search::query::SimpleStringTerm;
-using namespace search::fef;
-using namespace search::index;
-using namespace search::memoryindex;
-using namespace search::queryeval;
+using search::index::test::MockFieldLengthInspector;
using vespalib::asciistream;
+using vespalib::makeLambdaTask;
namespace {
@@ -189,8 +193,6 @@ Node::UP makePhrase(const std::string &term1, const std::string &term2) {
} // namespace
-
-
struct Fixture {
Schema schema;
DocumentTypeRepo repo;
@@ -249,7 +251,7 @@ Fixture::Fixture(uint32_t readThreads)
_executor(1, 128 * 1024),
_invertThreads(2),
_pushThreads(2),
- index(schema, _invertThreads, _pushThreads),
+ index(schema, MockFieldLengthInspector(), _invertThreads, _pushThreads),
_readThreads(readThreads),
_writer(1, 128 * 1024),
_readers(readThreads, 128 * 1024),
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
index 339e196c9bf..694af2f1ad1 100644
--- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
@@ -1,8 +1,5 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/btree/btreenode.hpp>
-#include <vespa/vespalib/btree/btreenodeallocator.hpp>
-#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/searchlib/common/sequencedtaskexecutor.h>
#include <vespa/searchlib/diskindex/diskindex.h>
#include <vespa/searchlib/diskindex/fusion.h>
@@ -15,9 +12,13 @@
#include <vespa/searchlib/memoryindex/document_inverter.h>
#include <vespa/searchlib/memoryindex/field_index_collection.h>
#include <vespa/searchlib/memoryindex/posting_iterator.h>
+#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
#include <vespa/searchlib/util/filekit.h>
-#include <vespa/vespalib/util/threadstackexecutor.h>
+#include <vespa/vespalib/btree/btreenode.hpp>
+#include <vespa/vespalib/btree/btreenodeallocator.hpp>
+#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/threadstackexecutor.h>
#include <vespa/log/log.h>
LOG_SETUP("fusion_test");
@@ -28,12 +29,13 @@ using document::Document;
using fef::FieldPositionsIterator;
using fef::TermFieldMatchData;
using fef::TermFieldMatchDataArray;
-using memoryindex::FieldIndexCollection;
using memoryindex::DocumentInverter;
+using memoryindex::FieldIndexCollection;
using queryeval::SearchIterator;
using search::common::FileHeaderContext;
using search::index::schema::CollectionType;
using search::index::schema::DataType;
+using search::index::test::MockFieldLengthInspector;
using namespace index;
@@ -268,7 +270,7 @@ Test::requireThatFusionIsWorking(const vespalib::string &prefix, bool directio,
addField("f0").addField("f1").
addField("f2").addField("f3").
addField("f4"));
- FieldIndexCollection fic(schema);
+ FieldIndexCollection fic(schema, MockFieldLengthInspector());
DocBuilder b(schema);
SequencedTaskExecutor invertThreads(2);
SequencedTaskExecutor pushThreads(2);
diff --git a/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp b/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp
index c99d241cbc0..8ec1655e372 100644
--- a/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp
+++ b/searchlib/src/tests/index/field_length_calculator/field_length_calculator_test.cpp
@@ -63,6 +63,14 @@ TEST(FieldLengthCalculatorTest, average_until_max_num_samples)
EXPECT_EQ(max_num_samples, calc.get_num_samples());
}
+TEST(FieldLengthCalculatorTest, calculator_can_return_info_object)
+{
+ FieldLengthCalculator calc(3, 5);
+ auto info = calc.get_info();
+ EXPECT_EQ(3, info.get_average_field_length());
+ EXPECT_EQ(5, info.get_num_samples());
+}
+
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index 95861643f84..234cf9b5e84 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -1,7 +1,5 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/btree/btreenodeallocator.hpp>
-#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/searchlib/common/sequencedtaskexecutor.h>
#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
@@ -16,7 +14,10 @@
#include <vespa/searchlib/memoryindex/field_inverter.h>
#include <vespa/searchlib/memoryindex/ordered_field_index_inserter.h>
#include <vespa/searchlib/memoryindex/posting_iterator.h>
+#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
#include <vespa/searchlib/test/memoryindex/wrap_inserter.h>
+#include <vespa/vespalib/btree/btreenodeallocator.hpp>
+#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/log/log.h>
@@ -33,6 +34,7 @@ using document::Document;
using queryeval::SearchIterator;
using search::index::schema::CollectionType;
using search::index::schema::DataType;
+using search::index::test::MockFieldLengthInspector;
using vespalib::GenerationHandler;
namespace memoryindex {
@@ -294,7 +296,7 @@ public:
MyInserter(const Schema &schema)
: _wordStoreScan(),
_mock(),
- _fieldIndexes(schema),
+ _fieldIndexes(schema, MockFieldLengthInspector()),
_features(),
_inserter(nullptr)
{
@@ -490,7 +492,7 @@ struct FieldIndexCollectionTest : public ::testing::Test {
FieldIndexCollection fic;
FieldIndexCollectionTest()
: schema(make_multi_field_schema()),
- fic(schema)
+ fic(schema, MockFieldLengthInspector())
{
}
~FieldIndexCollectionTest() {}
@@ -755,7 +757,7 @@ public:
InverterTest(const Schema& schema)
: _schema(schema),
- _fic(_schema),
+ _fic(_schema, MockFieldLengthInspector()),
_b(_schema),
_invertThreads(2),
_pushThreads(2),
diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
index 4695f0e6e00..dd4bb2cef7f 100644
--- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp
@@ -6,6 +6,7 @@
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/i_field_length_inspector.h>
#include <vespa/searchlib/memoryindex/memory_index.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h>
@@ -24,6 +25,8 @@ using document::Document;
using document::FieldValue;
using search::ScheduleTaskCallback;
using search::index::schema::DataType;
+using search::index::FieldLengthInfo;
+using search::index::IFieldLengthInspector;
using vespalib::makeLambdaTask;
using search::query::Node;
using search::query::SimplePhrase;
@@ -35,12 +38,25 @@ using namespace search::queryeval;
//-----------------------------------------------------------------------------
-struct MySetup {
+struct MySetup : public IFieldLengthInspector {
Schema schema;
+ std::map<vespalib::string, FieldLengthInfo> field_lengths;
MySetup &field(const std::string &name) {
schema.addIndexField(Schema::IndexField(name, DataType::STRING));
return *this;
}
+ MySetup& field_length(const vespalib::string& field_name, const FieldLengthInfo& info) {
+ field_lengths[field_name] = info;
+ return *this;
+ }
+ FieldLengthInfo get_field_length_info(const vespalib::string& field_name) const override {
+ auto itr = field_lengths.find(field_name);
+ if (itr != field_lengths.end()) {
+ return itr->second;
+ }
+ return FieldLengthInfo();
+ }
+
};
//-----------------------------------------------------------------------------
@@ -109,7 +125,7 @@ Index::Index(const MySetup &setup)
_executor(1, 128 * 1024),
_invertThreads(2),
_pushThreads(2),
- index(schema, _invertThreads, _pushThreads),
+ index(schema, setup, _invertThreads, _pushThreads),
builder(schema),
docid(1),
currentField()
@@ -445,4 +461,20 @@ TEST(MemoryIndexTest, require_that_we_can_fake_bit_vector)
}
}
+TEST(MemoryIndexTest, field_length_info_can_be_retrieved_per_field)
+{
+ Index index(MySetup().field(title).field(body)
+ .field_length("title", FieldLengthInfo(3, 5))
+ .field_length("body", FieldLengthInfo(7, 11)));
+
+ EXPECT_EQ(3, index.index.get_field_length_info("title").get_average_field_length());
+ EXPECT_EQ(5, index.index.get_field_length_info("title").get_num_samples());
+
+ EXPECT_EQ(7, index.index.get_field_length_info("body").get_average_field_length());
+ EXPECT_EQ(11, index.index.get_field_length_info("body").get_num_samples());
+
+ EXPECT_EQ(0, index.index.get_field_length_info("na").get_average_field_length());
+ EXPECT_EQ(0, index.index.get_field_length_info("na").get_num_samples());
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/index/field_length_calculator.h b/searchlib/src/vespa/searchlib/index/field_length_calculator.h
index acbbe38321a..4d4858e38bc 100644
--- a/searchlib/src/vespa/searchlib/index/field_length_calculator.h
+++ b/searchlib/src/vespa/searchlib/index/field_length_calculator.h
@@ -2,8 +2,10 @@
#pragma once
+#include "field_length_info.h"
#include <atomic>
#include <algorithm>
+#include <cstdint>
namespace search::index {
@@ -29,16 +31,28 @@ public:
{
}
+ FieldLengthCalculator(const FieldLengthInfo& info, uint32_t max_num_samples = 100000)
+ : _average_field_length(info.get_average_field_length()),
+ _num_samples(std::min(info.get_num_samples(), max_num_samples)),
+ _max_num_samples(max_num_samples)
+ {
+ }
+
double get_average_field_length() const { return _average_field_length.load(std::memory_order_relaxed); }
uint32_t get_num_samples() const { return _num_samples; }
- uint32_t get_max_num_samples() { return _max_num_samples; }
-
+ uint32_t get_max_num_samples() const { return _max_num_samples; }
+
+ FieldLengthInfo get_info() const {
+ return FieldLengthInfo(get_average_field_length(), get_num_samples());
+ }
+
void add_field_length(uint32_t field_length) {
if (_num_samples < _max_num_samples) {
++_num_samples;
}
_average_field_length.store((_average_field_length.load(std::memory_order_relaxed) * (_num_samples - 1) + field_length) / _num_samples, std::memory_order_relaxed);
}
+
};
}
diff --git a/searchlib/src/vespa/searchlib/index/field_length_info.h b/searchlib/src/vespa/searchlib/index/field_length_info.h
index 3ae3d38d86e..929e12beba9 100644
--- a/searchlib/src/vespa/searchlib/index/field_length_info.h
+++ b/searchlib/src/vespa/searchlib/index/field_length_info.h
@@ -2,6 +2,8 @@
#pragma once
+#include <cstdint>
+
namespace search::index {
/**
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 2243df41b0b..e2e1c99a9b9 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -43,7 +43,12 @@ operator<<(vespalib::asciistream & os, const FieldIndex::WordKey & rhs)
return os;
}
-FieldIndex::FieldIndex(const Schema & schema, uint32_t fieldId)
+FieldIndex::FieldIndex(const index::Schema& schema, uint32_t fieldId)
+ : FieldIndex(schema, fieldId, index::FieldLengthInfo())
+{
+}
+
+FieldIndex::FieldIndex(const index::Schema& schema, uint32_t fieldId, const index::FieldLengthInfo& info)
: _wordStore(),
_numUniqueWords(0),
_generationHandler(),
@@ -53,8 +58,9 @@ FieldIndex::FieldIndex(const Schema & schema, uint32_t fieldId)
_fieldId(fieldId),
_remover(_wordStore),
_inserter(std::make_unique<OrderedFieldIndexInserter>(*this)),
- _calculator()
-{ }
+ _calculator(info)
+{
+}
FieldIndex::~FieldIndex()
{
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 85685a5e1d1..dba57f553b5 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -104,7 +104,8 @@ public:
return _featureStore.addFeatures(_fieldId, features).first;
}
- FieldIndex(const index::Schema &schema, uint32_t fieldId);
+ FieldIndex(const index::Schema& schema, uint32_t fieldId);
+ FieldIndex(const index::Schema& schema, uint32_t fieldId, const index::FieldLengthInfo& info);
~FieldIndex();
PostingList::Iterator find(const vespalib::stringref word) const;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
index b75ea7f0a70..40b1e8f360f 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
@@ -4,16 +4,16 @@
#include "field_inverter.h"
#include "ordered_field_index_inserter.h"
#include <vespa/searchlib/bitcompression/posocccompression.h>
-
+#include <vespa/searchlib/index/i_field_length_inspector.h>
+#include <vespa/vespalib/btree/btree.hpp>
+#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreenode.hpp>
#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/vespalib/btree/btreenodestore.hpp>
-#include <vespa/vespalib/btree/btreestore.hpp>
-#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreeroot.hpp>
-#include <vespa/vespalib/btree/btree.hpp>
-#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/btree/btreestore.hpp>
#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.memoryindex.field_index_collection");
@@ -22,17 +22,19 @@ LOG_SETUP(".searchlib.memoryindex.field_index_collection");
namespace search {
using index::DocIdAndFeatures;
-using index::WordDocElementFeatures;
+using index::IFieldLengthInspector;
using index::Schema;
+using index::WordDocElementFeatures;
namespace memoryindex {
-FieldIndexCollection::FieldIndexCollection(const Schema & schema)
+FieldIndexCollection::FieldIndexCollection(const Schema& schema, const IFieldLengthInspector& inspector)
: _fieldIndexes(),
_numFields(schema.getNumIndexFields())
{
for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) {
- auto fieldIndex = std::make_unique<FieldIndex>(schema, fieldId);
+ const auto& field = schema.getIndexField(fieldId);
+ auto fieldIndex = std::make_unique<FieldIndex>(schema, fieldId, inspector.get_field_length_info(field.getName()));
_fieldIndexes.push_back(std::move(fieldIndex));
}
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
index d5212c41819..53f42658d0a 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
@@ -5,6 +5,8 @@
#include "i_field_index_collection.h"
#include "field_index.h"
+namespace search::index { class IFieldLengthInspector; }
+
namespace search::memoryindex {
class IFieldIndexRemoveListener;
@@ -27,7 +29,7 @@ private:
uint32_t _numFields;
public:
- FieldIndexCollection(const index::Schema &schema);
+ FieldIndexCollection(const index::Schema& schema, const index::IFieldLengthInspector& inspector);
~FieldIndexCollection();
PostingList::Iterator find(const vespalib::stringref word,
uint32_t fieldId) const {
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
index e2cac316580..6686745f8c2 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
@@ -25,6 +25,8 @@ using vespalib::GenerationHandler;
namespace search {
using fef::TermFieldMatchDataArray;
+using index::FieldLengthInfo;
+using index::IFieldLengthInspector;
using index::IndexBuilder;
using index::Schema;
using index::SchemaUtil;
@@ -53,13 +55,14 @@ using queryeval::IRequestContext;
namespace search::memoryindex {
-MemoryIndex::MemoryIndex(const Schema &schema,
- ISequencedTaskExecutor &invertThreads,
- ISequencedTaskExecutor &pushThreads)
+MemoryIndex::MemoryIndex(const Schema& schema,
+ const IFieldLengthInspector& inspector,
+ ISequencedTaskExecutor& invertThreads,
+ ISequencedTaskExecutor& pushThreads)
: _schema(schema),
_invertThreads(invertThreads),
_pushThreads(pushThreads),
- _fieldIndexes(std::make_unique<FieldIndexCollection>(_schema)),
+ _fieldIndexes(std::make_unique<FieldIndexCollection>(_schema, inspector)),
_inverter0(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)),
_inverter1(std::make_unique<DocumentInverter>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)),
_inverter(_inverter0.get()),
@@ -290,4 +293,14 @@ MemoryIndex::getPrunedSchema() const
return _prunedSchema;
}
+FieldLengthInfo
+MemoryIndex::get_field_length_info(const vespalib::string& field_name) const
+{
+ uint32_t field_id = _schema.getIndexFieldId(field_name);
+ if (field_id != Schema::UNKNOWN_FIELD_ID) {
+ return _fieldIndexes->get_calculator(field_id).get_info();
+ }
+ return FieldLengthInfo();
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
index c350da31c54..44252aa2cdc 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
@@ -2,13 +2,17 @@
#pragma once
+#include <vespa/searchcommon/common/schema.h>
#include <vespa/searchlib/common/idestructorcallback.h>
+#include <vespa/searchlib/index/field_length_info.h>
#include <vespa/searchlib/queryeval/searchable.h>
-#include <vespa/searchcommon/common/schema.h>
#include <vespa/vespalib/stllike/hash_set.h>
#include <vespa/vespalib/util/memoryusage.h>
-namespace search::index { class IndexBuilder; }
+namespace search::index {
+ class IFieldLengthInspector;
+ class IndexBuilder;
+}
namespace search { class ISequencedTaskExecutor; }
@@ -82,13 +86,15 @@ public:
* Create a new memory index based on the given schema.
*
* @param schema the schema with which text and uri fields to keep in the index.
+ * @param inspector the inspector used to lookup initial field length info for all index fields.
* @param invertThreads the executor with threads for doing document inverting.
* @param pushThreads the executor with threads for doing pushing of changes (inverted documents)
* to corresponding field indexes.
*/
- MemoryIndex(const index::Schema &schema,
- ISequencedTaskExecutor &invertThreads,
- ISequencedTaskExecutor &pushThreads);
+ MemoryIndex(const index::Schema& schema,
+ const index::IFieldLengthInspector& inspector,
+ ISequencedTaskExecutor& invertThreads,
+ ISequencedTaskExecutor& pushThreads);
~MemoryIndex();
@@ -165,6 +171,8 @@ public:
vespalib::MemoryUsage getMemoryUsage() const;
uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; }
+
+ search::index::FieldLengthInfo get_field_length_info(const vespalib::string& field_name) const;
};
}
diff --git a/searchlib/src/vespa/searchlib/test/index/mock_field_length_inspector.h b/searchlib/src/vespa/searchlib/test/index/mock_field_length_inspector.h
new file mode 100644
index 00000000000..4954e732862
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/index/mock_field_length_inspector.h
@@ -0,0 +1,19 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/index/i_field_length_inspector.h>
+
+namespace search::index::test {
+
+/**
+ * Mock of IFieldLengthInspector returning empty field info for all fields.
+ */
+class MockFieldLengthInspector : public IFieldLengthInspector {
+ FieldLengthInfo get_field_length_info(const vespalib::string& field_name) const override {
+ (void) field_name;
+ return FieldLengthInfo();
+ }
+};
+
+}