summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2019-01-25 11:06:46 +0100
committerGitHub <noreply@github.com>2019-01-25 11:06:46 +0100
commit00863d967d0eb56bb73b0418b91c82f1615445f5 (patch)
treeae1a6775c6f540be30770b0b90755f4624e8f9ae
parent8a864f26c96240f829f1a65d9105e3c8734f2d92 (diff)
parent2b682b9a6dab1dd27ca9fae8cb62cfc678f5bafa (diff)
Merge pull request #8172 from vespa-engine/toregge/cap-fixed-source-selector-on-load
Cap sources in FixedSourceSelector on load.
-rw-r--r--searchcore/src/tests/proton/index/fusionrunner_test.cpp1
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp6
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp5
-rw-r--r--searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/isourceselector.h1
8 files changed, 59 insertions, 15 deletions
diff --git a/searchcore/src/tests/proton/index/fusionrunner_test.cpp b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
index 650b263ff75..be41aa96efd 100644
--- a/searchcore/src/tests/proton/index/fusionrunner_test.cpp
+++ b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
@@ -165,6 +165,7 @@ void Test::createIndex(const string &dir, uint32_t id, bool fusion) {
_fusion_spec.flush_ids.push_back(id);
}
const string index_dir = ost.str();
+ _selector->setDefaultSource(id - _selector->getBaseId());
Schema schema = getSchema();
DocBuilder doc_builder(schema);
diff --git a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp
index b5d3086d11e..009bf5d16d9 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp
+++ b/searchcorespi/src/vespa/searchcorespi/index/fusionrunner.cpp
@@ -43,9 +43,9 @@ FusionRunner::~FusionRunner() {
namespace {
void readSelectorArray(const string &selector_name, SelectorArray &selector_array,
- const vector<uint8_t> &id_map, uint32_t base_id) {
+ const vector<uint8_t> &id_map, uint32_t base_id, uint32_t fusion_id) {
FixedSourceSelector::UP selector =
- FixedSourceSelector::load(selector_name);
+ FixedSourceSelector::load(selector_name, fusion_id);
if (base_id != selector->getBaseId()) {
selector = selector->cloneAndSubtract("tmp_for_fusion", base_id - selector->getBaseId());
}
@@ -115,7 +115,7 @@ FusionRunner::fuse(const FusionSpec &fusion_spec,
const string selector_name = IndexDiskLayout::getSelectorFileName(_diskLayout.getFlushDir(fusion_id));
SelectorArray selector_array;
- readSelectorArray(selector_name, selector_array, id_map, fusion_spec.last_fusion_id);
+ readSelectorArray(selector_name, selector_array, id_map, fusion_spec.last_fusion_id, fusion_id);
if (!operations.runFusion(_schema, fusion_dir, sources, selector_array, lastSerialNum)) {
return 0;
diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
index 6a7f4a14f2a..440cc8bc605 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
+++ b/searchcorespi/src/vespa/searchcorespi/index/indexmaintainer.cpp
@@ -471,6 +471,7 @@ IndexMaintainer::doneInitFlush(FlushArgs *args, IMemoryIndex::SP *new_index)
// XXX: Overflow issue in source selector
_current_index_id = getNewAbsoluteId() - _last_fusion_id;
assert(_current_index_id < ISourceSelector::SOURCE_LIMIT);
+ _selector->setDefaultSource(_current_index_id);
_source_selector_changes = 0;
}
_current_index = *new_index;
@@ -747,6 +748,7 @@ IndexMaintainer::doneSetSchema(SetSchemaArgs &args, IMemoryIndex::SP &newIndex)
// XXX: Overflow issue in source selector
_current_index_id = getNewAbsoluteId() - _last_fusion_id;
assert(_current_index_id < ISourceSelector::SOURCE_LIMIT);
+ _selector->setDefaultSource(_current_index_id);
// Extra index to flush next time flushing is performed
_frozenMemoryIndexes.emplace_back(args._oldIndex, freezeSerialNum, std::move(saveInfo), oldAbsoluteId);
}
@@ -849,7 +851,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config,
_lastFlushTime = search::FileKit::getModificationTime(latest_index_dir);
_current_serial_num = _flush_serial_num;
const string selector = IndexDiskLayout::getSelectorFileName(latest_index_dir);
- _selector.reset(FixedSourceSelector::load(selector).release());
+ _selector.reset(FixedSourceSelector::load(selector, _next_id - 1).release());
} else {
_flush_serial_num = 0;
_selector.reset(new FixedSourceSelector(0, "sourceselector", 1));
@@ -866,6 +868,7 @@ IndexMaintainer::IndexMaintainer(const IndexMaintainerConfig &config,
_current_index = operations.createMemoryIndex(_schema, _current_serial_num);
_current_index_id = getNewAbsoluteId() - _last_fusion_id;
assert(_current_index_id < ISourceSelector::SOURCE_LIMIT);
+ _selector->setDefaultSource(_current_index_id);
ISearchableIndexCollection::UP sourceList(loadDiskIndexes(spec, ISearchableIndexCollection::UP(new IndexCollection(_selector))));
LOG(debug, "Index manager created with flushed serial num %" PRIu64, _flush_serial_num);
sourceList->append(_current_index_id, _current_index);
diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
index 3ad3607ab23..54fb7ffd387 100644
--- a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
+++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
@@ -32,12 +32,16 @@ const uint32_t default_source = 7;
const uint32_t invalid_source = (uint8_t)search::attribute::getUndefined<int8_t>();
const uint32_t base_id = 42;
+uint8_t capSource(uint8_t source, uint8_t defaultSource, bool cap) {
+ return (cap ? std::min(source, defaultSource) : source);
+}
+
class Test : public vespalib::TestApp
{
public:
int Main() override;
private:
- void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector);
+ void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector, bool cap);
void testFixed(const DocSource *docSource, size_t sz);
template <typename SelectorType>
void requireThatSelectorCanCloneAndSubtract();
@@ -87,26 +91,26 @@ void Test::testFixed(const DocSource *docSource, size_t sz)
EXPECT_EQUAL(10u, selector.getDocIdLimit());
// EXPECT_EQUAL(default_source, selector.createIterator()->getSource(maxDocId + 1));
setSources(selector);
- testSourceSelector(docSource, sz, selector.getDefaultSource(), selector);
+ testSourceSelector(docSource, sz, selector.getDefaultSource(), selector, false);
EXPECT_EQUAL(maxDocId+1, selector.getDocIdLimit());
}
void Test::testSourceSelector(const DocSource *docSource, size_t sz,
- uint8_t defaultSource, ISourceSelector &selector)
+ uint8_t defaultSource, ISourceSelector &selector, bool cap)
{
{
auto it(selector.createIterator());
for (size_t i = 0; i < sz; ++i) {
- EXPECT_EQUAL(docSource[i].source, it->getSource(docSource[i].docId));
+ EXPECT_EQUAL((uint32_t)capSource(docSource[i].source, defaultSource, cap), (uint32_t)it->getSource(docSource[i].docId));
}
}
{
auto it(selector.createIterator());
for (size_t i = 0, j = 0; i <= docSource[sz - 1].docId; ++i) {
if (i != docSource[j].docId) {
- EXPECT_EQUAL(defaultSource, it->getSource(i));
+ EXPECT_EQUAL((uint32_t)defaultSource, (uint32_t)it->getSource(i));
} else {
- EXPECT_EQUAL(docSource[j].source, it->getSource(i));
+ EXPECT_EQUAL((uint32_t)capSource(docSource[j].source, defaultSource, cap), (uint32_t)it->getSource(i));
++j;
}
}
@@ -163,8 +167,8 @@ Test::requireThatSelectorCanSaveAndLoad(bool compactLidSpace)
selector.extractSaveInfo(base_file_name);
save_info->save(TuneFileAttributes(), DummyFileHeaderContext());
typename SelectorType::UP
- selector2(SelectorType::load(base_file_name));
- testSourceSelector(docs, arraysize(docs) - compactLidSpace, default_source, *selector2);
+ selector2(SelectorType::load(base_file_name, default_source + base_id));
+ testSourceSelector(docs, arraysize(docs) - compactLidSpace, default_source, *selector2, true);
EXPECT_EQUAL(base_id, selector2->getBaseId());
if (compactLidSpace) {
EXPECT_EQUAL(maxDocId - 4, selector2->getDocIdLimit());
diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp
index 29dcad9705c..e3e6eca93b4 100644
--- a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp
@@ -10,6 +10,22 @@ namespace search {
namespace {
attribute::Config getConfig() { return attribute::Config(attribute::BasicType::INT8); }
+
+uint32_t
+capSelector(queryeval::sourceselector::Iterator::SourceStore &store, queryeval::Source defaultSource)
+{
+ uint32_t committedDocIdLimit = store.getCommittedDocIdLimit();
+ uint32_t cappedSources = 0;
+ for (uint32_t docId = 0; docId < committedDocIdLimit; ++docId) {
+ queryeval::Source source = store.getFast(docId);
+ if (source > defaultSource) {
+ ++cappedSources;
+ store.set(docId, defaultSource);
+ }
+ }
+ return cappedSources;
+}
+
}
FixedSourceSelector::Iterator::Iterator(const FixedSourceSelector & sourceSelector) :
@@ -50,16 +66,27 @@ FixedSourceSelector::cloneAndSubtract(const vespalib::string & attrBaseFileName,
}
FixedSourceSelector::UP
-FixedSourceSelector::load(const vespalib::string & baseFileName)
+FixedSourceSelector::load(const vespalib::string & baseFileName, uint32_t currentId)
{
LoadInfo::UP info = extractLoadInfo(baseFileName);
info->load();
+ uint32_t defaultSource = currentId - info->header()._baseId;
+ assert(defaultSource < SOURCE_LIMIT);
+ if (defaultSource != info->header()._defaultSource) {
+ LOG(info, "Default source mismatch: header says %u, should be %u selector %s",
+ (uint32_t) info->header()._defaultSource, defaultSource,
+ baseFileName.c_str());
+ }
FixedSourceSelector::UP selector(new FixedSourceSelector(
- info->header()._defaultSource,
+ defaultSource,
info->header()._baseFileName,
0));
selector->setBaseId(info->header()._baseId);
selector->_source.load();
+ uint32_t cappedSources = capSelector(selector->_source, selector->getDefaultSource());
+ if (cappedSources > 0) {
+ LOG(warning, "%u sources capped in source selector %s", cappedSources, baseFileName.c_str());
+ }
return selector;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h
index f92bb1e0eb2..055d216dda1 100644
--- a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h
+++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h
@@ -33,7 +33,7 @@ public:
~FixedSourceSelector();
FixedSourceSelector::UP cloneAndSubtract(const vespalib::string & attrBaseFileName, uint32_t diff);
- static FixedSourceSelector::UP load(const vespalib::string & baseFileName);
+ static FixedSourceSelector::UP load(const vespalib::string & baseFileName, uint32_t currentId);
// Inherit doc from ISourceSelector
void setSource(uint32_t docId, queryeval::Source source) final override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp
index 1e0659c92e3..e19a76b58ba 100644
--- a/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp
@@ -10,4 +10,12 @@ ISourceSelector::ISourceSelector(Source defaultSource) :
assert(defaultSource < SOURCE_LIMIT);
}
+void
+ISourceSelector::setDefaultSource(Source source)
+{
+ assert(source < SOURCE_LIMIT);
+ assert(source >= _defaultSource);
+ _defaultSource = source;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.h b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h
index 7151a785c26..8d92cd5293e 100644
--- a/searchlib/src/vespa/searchlib/queryeval/isourceselector.h
+++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h
@@ -56,6 +56,7 @@ protected:
public:
void setBaseId(uint32_t baseId) { _baseId = baseId; }
uint32_t getBaseId() const { return _baseId; }
+ void setDefaultSource(Source source);
Source getDefaultSource() const { return _defaultSource; }
/**
* Set the source to be used for a given document.