diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2019-01-25 11:06:46 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-25 11:06:46 +0100 |
commit | 00863d967d0eb56bb73b0418b91c82f1615445f5 (patch) | |
tree | ae1a6775c6f540be30770b0b90755f4624e8f9ae | |
parent | 8a864f26c96240f829f1a65d9105e3c8734f2d92 (diff) | |
parent | 2b682b9a6dab1dd27ca9fae8cb62cfc678f5bafa (diff) |
Merge pull request #8172 from vespa-engine/toregge/cap-fixed-source-selector-on-load
Cap sources in FixedSourceSelector on load.
8 files changed, 59 insertions, 15 deletions
diff --git a/searchcore/src/tests/proton/index/fusionrunner_test.cpp b/searchcore/src/tests/proton/index/fusionrunner_test.cpp index 650b263ff75..be41aa96efd 100644 --- a/searchcore/src/tests/proton/index/fusionrunner_test.cpp +++ b/searchcore/src/tests/proton/index/fusionrunner_test.cpp @@ -165,6 +165,7 @@ void Test::createIndex(const string &dir, uint32_t id, bool fusion) { _fusion_spec.flush_ids.push_back(id); } const string index_dir = ost.str(); + _selector->setDefaultSource(id - _selector->getBaseId()); Schema schema = getSchema(); DocBuilder doc_builder(schema); diff --git a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp index b5d3086d11e..009bf5d16d9 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp +++ b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp @@ -43,9 +43,9 @@ FusionRunner::~FusionRunner() { namespace { void readSelectorArray(const string &selector_name, SelectorArray &selector_array, - const vector<uint8_t> &id_map, uint32_t base_id) { + const vector<uint8_t> &id_map, uint32_t base_id, uint32_t fusion_id) { FixedSourceSelector::UP selector = - FixedSourceSelector::load(selector_name); + FixedSourceSelector::load(selector_name, fusion_id); if (base_id != selector->getBaseId()) { selector = selector->cloneAndSubtract("tmp_for_fusion", base_id - selector->getBaseId()); } @@ -115,7 +115,7 @@ FusionRunner::fuse(const FusionSpec &fusion_spec, const string selector_name = IndexDiskLayout::getSelectorFileName(_diskLayout.getFlushDir(fusion_id)); SelectorArray selector_array; - readSelectorArray(selector_name, selector_array, id_map, fusion_spec.last_fusion_id); + readSelectorArray(selector_name, selector_array, id_map, fusion_spec.last_fusion_id, fusion_id); if (!operations.runFusion(_schema, fusion_dir, sources, selector_array, lastSerialNum)) { return 0; diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp index 6a7f4a14f2a..440cc8bc605 100644 --- a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp +++ b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp @@ -471,6 +471,7 @@ IndexMaintainer::doneInitFlush(FlushArgs *args, IMemoryIndex::SP *new_index) // XXX: Overflow issue in source selector _current_index_id = getNewAbsoluteId() - _last_fusion_id; assert(_current_index_id < ISourceSelector::SOURCE_LIMIT); + _selector->setDefaultSource(_current_index_id); _source_selector_changes = 0; } _current_index = *new_index; @@ -747,6 +748,7 @@ IndexMaintainer::doneSetSchema(SetSchemaArgs &args, IMemoryIndex::SP &newIndex) // XXX: Overflow issue in source selector _current_index_id = getNewAbsoluteId() - _last_fusion_id; assert(_current_index_id < ISourceSelector::SOURCE_LIMIT); + _selector->setDefaultSource(_current_index_id); // Extra index to flush next time flushing is performed _frozenMemoryIndexes.emplace_back(args._oldIndex, freezeSerialNum, std::move(saveInfo), oldAbsoluteId); } @@ -849,7 +851,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config, _lastFlushTime = search::FileKit::getModificationTime(latest_index_dir); _current_serial_num = _flush_serial_num; const string selector = IndexDiskLayout::getSelectorFileName(latest_index_dir); - _selector.reset(FixedSourceSelector::load(selector).release()); + _selector.reset(FixedSourceSelector::load(selector, _next_id - 1).release()); } else { _flush_serial_num = 0; _selector.reset(new FixedSourceSelector(0, "sourceselector", 1)); @@ -866,6 +868,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config, _current_index = operations.createMemoryIndex(_schema, _current_serial_num); _current_index_id = getNewAbsoluteId() - _last_fusion_id; assert(_current_index_id < ISourceSelector::SOURCE_LIMIT); + _selector->setDefaultSource(_current_index_id); ISearchableIndexCollection::UP sourceList(loadDiskIndexes(spec, ISearchableIndexCollection::UP(new IndexCollection(_selector)))); LOG(debug, "Index manager created with flushed serial num %" PRIu64, _flush_serial_num); sourceList->append(_current_index_id, _current_index); diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp index 3ad3607ab23..54fb7ffd387 100644 --- a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp +++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp @@ -32,12 +32,16 @@ const uint32_t default_source = 7; const uint32_t invalid_source = (uint8_t)search::attribute::getUndefined<int8_t>(); const uint32_t base_id = 42; +uint8_t capSource(uint8_t source, uint8_t defaultSource, bool cap) { + return (cap ? std::min(source, defaultSource) : source); +} + class Test : public vespalib::TestApp { public: int Main() override; private: - void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector); + void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector, bool cap); void testFixed(const DocSource *docSource, size_t sz); template <typename SelectorType> void requireThatSelectorCanCloneAndSubtract(); @@ -87,26 +91,26 @@ void Test::testFixed(const DocSource *docSource, size_t sz) EXPECT_EQUAL(10u, selector.getDocIdLimit()); // EXPECT_EQUAL(default_source, selector.createIterator()->getSource(maxDocId + 1)); setSources(selector); - testSourceSelector(docSource, sz, selector.getDefaultSource(), selector); + testSourceSelector(docSource, sz, selector.getDefaultSource(), selector, false); EXPECT_EQUAL(maxDocId+1, selector.getDocIdLimit()); } void Test::testSourceSelector(const DocSource *docSource, size_t sz, - uint8_t defaultSource, ISourceSelector &selector) + uint8_t defaultSource, ISourceSelector &selector, bool cap) { { auto it(selector.createIterator()); for (size_t i = 0; i < sz; ++i) { - EXPECT_EQUAL(docSource[i].source, it->getSource(docSource[i].docId)); + EXPECT_EQUAL((uint32_t)capSource(docSource[i].source, defaultSource, cap), (uint32_t)it->getSource(docSource[i].docId)); } } { auto it(selector.createIterator()); for (size_t i = 0, j = 0; i <= docSource[sz - 1].docId; ++i) { if (i != docSource[j].docId) { - EXPECT_EQUAL(defaultSource, it->getSource(i)); + EXPECT_EQUAL((uint32_t)defaultSource, (uint32_t)it->getSource(i)); } else { - EXPECT_EQUAL(docSource[j].source, it->getSource(i)); + EXPECT_EQUAL((uint32_t)capSource(docSource[j].source, defaultSource, cap), (uint32_t)it->getSource(i)); ++j; } } @@ -163,8 +167,8 @@ Test::requireThatSelectorCanSaveAndLoad(bool compactLidSpace) selector.extractSaveInfo(base_file_name); save_info->save(TuneFileAttributes(), DummyFileHeaderContext()); typename SelectorType::UP - selector2(SelectorType::load(base_file_name)); - testSourceSelector(docs, arraysize(docs) - compactLidSpace, default_source, *selector2); + selector2(SelectorType::load(base_file_name, default_source + base_id)); + testSourceSelector(docs, arraysize(docs) - compactLidSpace, default_source, *selector2, true); EXPECT_EQUAL(base_id, selector2->getBaseId()); if (compactLidSpace) { EXPECT_EQUAL(maxDocId - 4, selector2->getDocIdLimit()); diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp index 29dcad9705c..e3e6eca93b4 100644 --- a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp @@ -10,6 +10,22 @@ namespace search { namespace { attribute::Config getConfig() { return attribute::Config(attribute::BasicType::INT8); } + +uint32_t +capSelector(queryeval::sourceselector::Iterator::SourceStore &store, queryeval::Source defaultSource) +{ + uint32_t committedDocIdLimit = store.getCommittedDocIdLimit(); + uint32_t cappedSources = 0; + for (uint32_t docId = 0; docId < committedDocIdLimit; ++docId) { + queryeval::Source source = store.getFast(docId); + if (source > defaultSource) { + ++cappedSources; + store.set(docId, defaultSource); + } + } + return cappedSources; +} + } FixedSourceSelector::Iterator::Iterator(const FixedSourceSelector & sourceSelector) : @@ -50,16 +66,27 @@ FixedSourceSelector::cloneAndSubtract(const vespalib::string & attrBaseFileName, } FixedSourceSelector::UP -FixedSourceSelector::load(const vespalib::string & baseFileName) +FixedSourceSelector::load(const vespalib::string & baseFileName, uint32_t currentId) { LoadInfo::UP info = extractLoadInfo(baseFileName); info->load(); + uint32_t defaultSource = currentId - info->header()._baseId; + assert(defaultSource < SOURCE_LIMIT); + if (defaultSource != info->header()._defaultSource) { + LOG(info, "Default source mismatch: header says %u, should be %u selector %s", + (uint32_t) info->header()._defaultSource, defaultSource, + baseFileName.c_str()); + } FixedSourceSelector::UP selector(new FixedSourceSelector( - info->header()._defaultSource, + defaultSource, info->header()._baseFileName, 0)); selector->setBaseId(info->header()._baseId); selector->_source.load(); + uint32_t cappedSources = capSelector(selector->_source, selector->getDefaultSource()); + if (cappedSources > 0) { + LOG(warning, "%u sources capped in source selector %s", cappedSources, baseFileName.c_str()); + } return selector; } diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h index f92bb1e0eb2..055d216dda1 100644 --- a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h +++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h @@ -33,7 +33,7 @@ public: ~FixedSourceSelector(); FixedSourceSelector::UP cloneAndSubtract(const vespalib::string & attrBaseFileName, uint32_t diff); - static FixedSourceSelector::UP load(const vespalib::string & baseFileName); + static FixedSourceSelector::UP load(const vespalib::string & baseFileName, uint32_t currentId); // Inherit doc from ISourceSelector void setSource(uint32_t docId, queryeval::Source source) final override; diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp index 1e0659c92e3..e19a76b58ba 100644 --- a/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp @@ -10,4 +10,12 @@ ISourceSelector::ISourceSelector(Source defaultSource) : assert(defaultSource < SOURCE_LIMIT); } +void +ISourceSelector::setDefaultSource(Source source) +{ + assert(source < SOURCE_LIMIT); + assert(source >= _defaultSource); + _defaultSource = source; +} + } diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.h b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h index 7151a785c26..8d92cd5293e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/isourceselector.h +++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h @@ -56,6 +56,7 @@ protected: public: void setBaseId(uint32_t baseId) { _baseId = baseId; } uint32_t getBaseId() const { return _baseId; } + void setDefaultSource(Source source); Source getDefaultSource() const { return _defaultSource; } /** * Set the source to be used for a given document. |