summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2020-04-24 14:31:22 +0200
committerTor Egge <Tor.Egge@broadpark.no>2020-04-24 15:37:45 +0200
commitc09675a41768642d37390c00cb05ecfc533ca5d1 (patch)
tree38e5dd7d1725a8d6651894d408ea1e7d158263e2
parent24eaf1642c012eb2f9f76a39102e87fb82874321 (diff)
Ignore hnsw index attribute save file if major parameters are changed.
-rw-r--r--searchcommon/src/vespa/searchcommon/attribute/config.h4
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp102
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp20
3 files changed, 109 insertions, 17 deletions
diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.h b/searchcommon/src/vespa/searchcommon/attribute/config.h
index 836fcfed84a..df5aa9e217a 100644
--- a/searchcommon/src/vespa/searchcommon/attribute/config.h
+++ b/searchcommon/src/vespa/searchcommon/attribute/config.h
@@ -71,6 +71,10 @@ public:
_hnsw_index_params = params;
return *this;
}
+ Config& clear_hnsw_index_params() {
+ _hnsw_index_params.reset();
+ return *this;
+ }
/**
* Enable attribute posting list to consist of a bitvector in
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 39a7e53ca8c..a008bc73e93 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -215,6 +215,7 @@ struct Fixture {
Config _cfg;
vespalib::string _name;
vespalib::string _typeSpec;
+ bool _use_mock_index;
std::unique_ptr<NearestNeighborIndexFactory> _index_factory;
std::shared_ptr<TensorAttribute> _tensorAttr;
std::shared_ptr<AttributeVector> _attr;
@@ -229,27 +230,45 @@ struct Fixture {
_cfg(BasicType::TENSOR, CollectionType::SINGLE),
_name(attr_name),
_typeSpec(typeSpec),
- _index_factory(std::make_unique<DefaultNearestNeighborIndexFactory>()),
+ _use_mock_index(use_mock_index),
+ _index_factory(),
_tensorAttr(),
_attr(),
_denseTensors(false),
_useDenseTensorAttribute(useDenseTensorAttribute)
{
- _cfg.setTensorType(ValueType::from_spec(typeSpec));
+ if (enable_hnsw_index) {
+ _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, DistanceMetric::Euclidean));
+ }
+ setup();
+ }
+
+ ~Fixture() {}
+
+ void setup() {
+ _cfg.setTensorType(ValueType::from_spec(_typeSpec));
if (_cfg.tensorType().is_dense()) {
_denseTensors = true;
}
- if (enable_hnsw_index) {
- _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, DistanceMetric::Euclidean));
- if (use_mock_index) {
- _index_factory = std::make_unique<MockNearestNeighborIndexFactory>();
- }
+ if (_use_mock_index) {
+ _index_factory = std::make_unique<MockNearestNeighborIndexFactory>();
+ } else {
+ _index_factory = std::make_unique<DefaultNearestNeighborIndexFactory>();
}
_tensorAttr = makeAttr();
_attr = _tensorAttr;
_attr->addReservedDoc();
}
- ~Fixture() {}
+
+ void set_hnsw_index_params(const HnswIndexParams &params) {
+ _cfg.set_hnsw_index_params(params);
+ setup();
+ }
+
+ void disable_hnsw_index() {
+ _cfg.clear_hnsw_index_params();
+ setup();
+ }
std::shared_ptr<TensorAttribute> makeAttr() {
if (_useDenseTensorAttribute) {
@@ -364,6 +383,9 @@ struct Fixture {
return denseSpec;
}
+ void set_example_tensors();
+ void assert_example_tensors();
+ void save_example_tensors_with_mock_index();
void testEmptyAttribute();
void testSetTensorValue();
void testSaveLoad();
@@ -372,6 +394,30 @@ struct Fixture {
void testEmptyTensor();
};
+
+void
+Fixture::set_example_tensors()
+{
+ set_tensor(1, vec_2d(3, 5));
+ set_tensor(2, vec_2d(7, 9));
+}
+
+void
+Fixture::assert_example_tensors()
+{
+ assertGetTensor(vec_2d(3, 5), 1);
+ assertGetTensor(vec_2d(7, 9), 2);
+}
+
+void
+Fixture::save_example_tensors_with_mock_index()
+{
+ set_example_tensors();
+ mock_index().save_index_with_value(123);
+ save();
+ EXPECT_TRUE(vespalib::fileExists(_name + ".nnidx"));
+}
+
void
Fixture::testEmptyAttribute()
{
@@ -664,11 +710,7 @@ TEST_F("Memory usage is extracted from index when updating stats on attribute",
TEST_F("Nearest neighbor index can be saved to disk and then loaded from file", DenseTensorAttributeMockIndex)
{
- f.set_tensor(1, vec_2d(3, 5));
- f.set_tensor(2, vec_2d(7, 9));
- f.mock_index().save_index_with_value(123);
- f.save();
- EXPECT_TRUE(vespalib::fileExists(attr_name + ".nnidx"));
+ f.save_example_tensors_with_mock_index();
f.load(); // index is loaded from saved file
auto& index = f.mock_index();
@@ -678,8 +720,7 @@ TEST_F("Nearest neighbor index can be saved to disk and then loaded from file",
TEST_F("onLoad() reconstructs nearest neighbor index if save file does not exists", DenseTensorAttributeMockIndex)
{
- f.set_tensor(1, vec_2d(3, 5));
- f.set_tensor(2, vec_2d(7, 9));
+ f.set_example_tensors();
f.save();
EXPECT_FALSE(vespalib::fileExists(attr_name + ".nnidx"));
@@ -689,5 +730,36 @@ TEST_F("onLoad() reconstructs nearest neighbor index if save file does not exist
index.expect_adds({{1, {3, 5}}, {2, {7, 9}}});
}
+TEST_F("onLoads() ignores saved nearest neighbor index if not enabled in config", DenseTensorAttributeMockIndex)
+{
+ f.save_example_tensors_with_mock_index();
+ f.disable_hnsw_index();
+ f.load();
+ f.assert_example_tensors();
+ EXPECT_EQUAL(f.as_dense_tensor().nearest_neighbor_index(), nullptr);
+}
+
+TEST_F("onLoad() ignores saved nearest neightbor index if major index parameters are changed", DenseTensorAttributeMockIndex)
+{
+ f.save_example_tensors_with_mock_index();
+ f.set_hnsw_index_params(HnswIndexParams(5, 20, DistanceMetric::Euclidean));
+ f.load();
+ f.assert_example_tensors();
+ auto& index = f.mock_index();
+ EXPECT_EQUAL(0, index.get_index_value());
+ index.expect_adds({{1, {3, 5}}, {2, {7, 9}}});
+}
+
+TEST_F("onLoad() uses saved nearest neightbor index if only minor index parameters are changed", DenseTensorAttributeMockIndex)
+{
+ f.save_example_tensors_with_mock_index();
+ f.set_hnsw_index_params(HnswIndexParams(4, 21, DistanceMetric::Euclidean));
+ f.load();
+ f.assert_example_tensors();
+ auto& index = f.mock_index();
+ EXPECT_EQUAL(123, index.get_index_value());
+ index.expect_adds({});
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index 68ce0c1bb00..c9ed4039655 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -59,6 +59,21 @@ TensorReader::is_present() {
return true;
}
+bool
+can_use_index_save_file(const search::attribute::Config &config, const search::attribute::AttributeHeader &header)
+{
+ if (!config.hnsw_index_params().has_value() || !header.get_hnsw_index_params().has_value()) {
+ return false;
+ }
+ const auto &config_params = config.hnsw_index_params().value();
+ const auto &header_params = header.get_hnsw_index_params().value();
+ if ((config_params.max_links_per_node() != header_params.max_links_per_node()) ||
+ (config_params.distance_metric() != header_params.distance_metric())) {
+ return false;
+ }
+ return true;
+}
+
}
void
@@ -152,6 +167,7 @@ DenseTensorAttribute::onLoad()
return false;
}
bool has_index_file = LoadUtils::file_exists(*this, DenseTensorAttributeSaver::index_file_suffix());
+ bool use_index_file = has_index_file && _index && can_use_index_save_file(getConfig(), search::attribute::AttributeHeader::extractTags(tensorReader.getDatHeader()));
setCreateSerialNum(tensorReader.getCreateSerialNum());
assert(tensorReader.getVersion() == DENSE_TENSOR_ATTRIBUTE_VERSION);
@@ -165,7 +181,7 @@ DenseTensorAttribute::onLoad()
auto raw = _denseTensorStore.allocRawBuffer();
tensorReader.readTensor(raw.data, _denseTensorStore.getBufSize());
_refVector.push_back(raw.ref);
- if (_index && !has_index_file) {
+ if (_index && !use_index_file) {
// This ensures that get_vector() (via getTensor()) is able to find the newly added tensor.
setCommittedDocIdLimit(lid + 1);
_index->add_document(lid);
@@ -176,7 +192,7 @@ DenseTensorAttribute::onLoad()
}
setNumDocs(numDocs);
setCommittedDocIdLimit(numDocs);
- if (_index && has_index_file) {
+ if (_index && use_index_file) {
auto buffer = LoadUtils::loadFile(*this, DenseTensorAttributeSaver::index_file_suffix());
if (!_index->load(*buffer)) {
return false;