diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2020-04-24 14:31:22 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2020-04-24 15:37:45 +0200 |
commit | c09675a41768642d37390c00cb05ecfc533ca5d1 (patch) | |
tree | 38e5dd7d1725a8d6651894d408ea1e7d158263e2 | |
parent | 24eaf1642c012eb2f9f76a39102e87fb82874321 (diff) |
Ignore hnsw index attribute save file if major parameters are changed.
3 files changed, 109 insertions, 17 deletions
diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.h b/searchcommon/src/vespa/searchcommon/attribute/config.h index 836fcfed84a..df5aa9e217a 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/config.h +++ b/searchcommon/src/vespa/searchcommon/attribute/config.h @@ -71,6 +71,10 @@ public: _hnsw_index_params = params; return *this; } + Config& clear_hnsw_index_params() { + _hnsw_index_params.reset(); + return *this; + } /** * Enable attribute posting list to consist of a bitvector in diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 39a7e53ca8c..a008bc73e93 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -215,6 +215,7 @@ struct Fixture { Config _cfg; vespalib::string _name; vespalib::string _typeSpec; + bool _use_mock_index; std::unique_ptr<NearestNeighborIndexFactory> _index_factory; std::shared_ptr<TensorAttribute> _tensorAttr; std::shared_ptr<AttributeVector> _attr; @@ -229,27 +230,45 @@ struct Fixture { _cfg(BasicType::TENSOR, CollectionType::SINGLE), _name(attr_name), _typeSpec(typeSpec), - _index_factory(std::make_unique<DefaultNearestNeighborIndexFactory>()), + _use_mock_index(use_mock_index), + _index_factory(), _tensorAttr(), _attr(), _denseTensors(false), _useDenseTensorAttribute(useDenseTensorAttribute) { - _cfg.setTensorType(ValueType::from_spec(typeSpec)); + if (enable_hnsw_index) { + _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, DistanceMetric::Euclidean)); + } + setup(); + } + + ~Fixture() {} + + void setup() { + _cfg.setTensorType(ValueType::from_spec(_typeSpec)); if (_cfg.tensorType().is_dense()) { _denseTensors = true; } - if (enable_hnsw_index) { - _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, DistanceMetric::Euclidean)); - if (use_mock_index) { - _index_factory = std::make_unique<MockNearestNeighborIndexFactory>(); - } + if (_use_mock_index) { + _index_factory = std::make_unique<MockNearestNeighborIndexFactory>(); + } else { + _index_factory = std::make_unique<DefaultNearestNeighborIndexFactory>(); } _tensorAttr = makeAttr(); _attr = _tensorAttr; _attr->addReservedDoc(); } - ~Fixture() {} + + void set_hnsw_index_params(const HnswIndexParams ¶ms) { + _cfg.set_hnsw_index_params(params); + setup(); + } + + void disable_hnsw_index() { + _cfg.clear_hnsw_index_params(); + setup(); + } std::shared_ptr<TensorAttribute> makeAttr() { if (_useDenseTensorAttribute) { @@ -364,6 +383,9 @@ struct Fixture { return denseSpec; } + void set_example_tensors(); + void assert_example_tensors(); + void save_example_tensors_with_mock_index(); void testEmptyAttribute(); void testSetTensorValue(); void testSaveLoad(); @@ -372,6 +394,30 @@ struct Fixture { void testEmptyTensor(); }; + +void +Fixture::set_example_tensors() +{ + set_tensor(1, vec_2d(3, 5)); + set_tensor(2, vec_2d(7, 9)); +} + +void +Fixture::assert_example_tensors() +{ + assertGetTensor(vec_2d(3, 5), 1); + assertGetTensor(vec_2d(7, 9), 2); +} + +void +Fixture::save_example_tensors_with_mock_index() +{ + set_example_tensors(); + mock_index().save_index_with_value(123); + save(); + EXPECT_TRUE(vespalib::fileExists(_name + ".nnidx")); +} + void Fixture::testEmptyAttribute() { @@ -664,11 +710,7 @@ TEST_F("Memory usage is extracted from index when updating stats on attribute", TEST_F("Nearest neighbor index can be saved to disk and then loaded from file", DenseTensorAttributeMockIndex) { - f.set_tensor(1, vec_2d(3, 5)); - f.set_tensor(2, vec_2d(7, 9)); - f.mock_index().save_index_with_value(123); - f.save(); - EXPECT_TRUE(vespalib::fileExists(attr_name + ".nnidx")); + f.save_example_tensors_with_mock_index(); f.load(); // index is loaded from saved file auto& index = f.mock_index(); @@ -678,8 +720,7 @@ TEST_F("Nearest neighbor index can be saved to disk and then loaded from file", TEST_F("onLoad() reconstructs nearest neighbor index if save file does not exists", DenseTensorAttributeMockIndex) { - f.set_tensor(1, vec_2d(3, 5)); - f.set_tensor(2, vec_2d(7, 9)); + f.set_example_tensors(); f.save(); EXPECT_FALSE(vespalib::fileExists(attr_name + ".nnidx")); @@ -689,5 +730,36 @@ TEST_F("onLoad() reconstructs nearest neighbor index if save file does not exist index.expect_adds({{1, {3, 5}}, {2, {7, 9}}}); } +TEST_F("onLoads() ignores saved nearest neighbor index if not enabled in config", DenseTensorAttributeMockIndex) +{ + f.save_example_tensors_with_mock_index(); + f.disable_hnsw_index(); + f.load(); + f.assert_example_tensors(); + EXPECT_EQUAL(f.as_dense_tensor().nearest_neighbor_index(), nullptr); +} + +TEST_F("onLoad() ignores saved nearest neightbor index if major index parameters are changed", DenseTensorAttributeMockIndex) +{ + f.save_example_tensors_with_mock_index(); + f.set_hnsw_index_params(HnswIndexParams(5, 20, DistanceMetric::Euclidean)); + f.load(); + f.assert_example_tensors(); + auto& index = f.mock_index(); + EXPECT_EQUAL(0, index.get_index_value()); + index.expect_adds({{1, {3, 5}}, {2, {7, 9}}}); +} + +TEST_F("onLoad() uses saved nearest neightbor index if only minor index parameters are changed", DenseTensorAttributeMockIndex) +{ + f.save_example_tensors_with_mock_index(); + f.set_hnsw_index_params(HnswIndexParams(4, 21, DistanceMetric::Euclidean)); + f.load(); + f.assert_example_tensors(); + auto& index = f.mock_index(); + EXPECT_EQUAL(123, index.get_index_value()); + index.expect_adds({}); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 68ce0c1bb00..c9ed4039655 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -59,6 +59,21 @@ TensorReader::is_present() { return true; } +bool +can_use_index_save_file(const search::attribute::Config &config, const search::attribute::AttributeHeader &header) +{ + if (!config.hnsw_index_params().has_value() || !header.get_hnsw_index_params().has_value()) { + return false; + } + const auto &config_params = config.hnsw_index_params().value(); + const auto &header_params = header.get_hnsw_index_params().value(); + if ((config_params.max_links_per_node() != header_params.max_links_per_node()) || + (config_params.distance_metric() != header_params.distance_metric())) { + return false; + } + return true; +} + } void @@ -152,6 +167,7 @@ DenseTensorAttribute::onLoad() return false; } bool has_index_file = LoadUtils::file_exists(*this, DenseTensorAttributeSaver::index_file_suffix()); + bool use_index_file = has_index_file && _index && can_use_index_save_file(getConfig(), search::attribute::AttributeHeader::extractTags(tensorReader.getDatHeader())); setCreateSerialNum(tensorReader.getCreateSerialNum()); assert(tensorReader.getVersion() == DENSE_TENSOR_ATTRIBUTE_VERSION); @@ -165,7 +181,7 @@ DenseTensorAttribute::onLoad() auto raw = _denseTensorStore.allocRawBuffer(); tensorReader.readTensor(raw.data, _denseTensorStore.getBufSize()); _refVector.push_back(raw.ref); - if (_index && !has_index_file) { + if (_index && !use_index_file) { // This ensures that get_vector() (via getTensor()) is able to find the newly added tensor. setCommittedDocIdLimit(lid + 1); _index->add_document(lid); @@ -176,7 +192,7 @@ DenseTensorAttribute::onLoad() } setNumDocs(numDocs); setCommittedDocIdLimit(numDocs); - if (_index && has_index_file) { + if (_index && use_index_file) { auto buffer = LoadUtils::loadFile(*this, DenseTensorAttributeSaver::index_file_suffix()); if (!_index->load(*buffer)) { return false; |