diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-02-20 15:15:22 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-20 15:15:22 +0100 |
commit | 8b9ddc1fea064f2851f540b9fdeff94d12c8ffa4 (patch) | |
tree | 94f8270a8a21d066ebd0b67e48272ab41ea0e60a /searchlib/src/tests | |
parent | f9661fed45e23cd170c77c668e30852f2f42d5b4 (diff) | |
parent | e59c8adf957ee3147ce6e8d90757c5882c494fb2 (diff) |
Merge pull request #12287 from vespa-engine/geirst/integrate-nearest-neighbor-index-in-dense-tensor-attribute
Integrate nearest neighbor index in dense tensor attribute
Diffstat (limited to 'searchlib/src/tests')
3 files changed, 275 insertions, 83 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp index 7d09b2aa0b8..850a967ed3d 100644 --- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -278,6 +278,22 @@ AttributeManagerTest::testConfigConvert() AttributeVector::Config out = ConfigConverter::convert(a); EXPECT_EQUAL("tensor(x[5])", out.tensorType().to_spec()); } + { // hnsw index params (enabled) + CACA a; + a.index.hnsw.enabled = true; + a.index.hnsw.maxlinkspernode = 32; + a.index.hnsw.neighborstoexploreatinsert = 300; + auto out = ConfigConverter::convert(a); + EXPECT_TRUE(out.hnsw_index_params().has_value()); + EXPECT_EQUAL(32u, out.hnsw_index_params().value().max_links_per_node()); + EXPECT_EQUAL(300u, out.hnsw_index_params().value().neighbors_to_explore_at_insert()); + } + { // hnsw index params (disabled) + CACA a; + a.index.hnsw.enabled = false; + auto out = ConfigConverter::convert(a); + EXPECT_FALSE(out.hnsw_index_params().has_value()); + } } bool gt_attribute(const attribute::IAttributeVector * a, const attribute::IAttributeVector * b) { diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 7e0fcdc0ccc..644230cb340 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -1,34 +1,48 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> #include <vespa/document/base/exceptions.h> -#include <vespa/searchlib/tensor/tensor_attribute.h> -#include <vespa/searchlib/tensor/generic_tensor_attribute.h> -#include <vespa/searchlib/tensor/dense_tensor_attribute.h> -#include <vespa/searchlib/attribute/attributeguard.h> -#include <vespa/eval/tensor/tensor.h> -#include <vespa/eval/tensor/dense/dense_tensor.h> #include <vespa/eval/tensor/default_tensor_engine.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/data/fileheader.h> +#include <vespa/eval/tensor/dense/dense_tensor.h> +#include <vespa/eval/tensor/tensor.h> #include <vespa/fastos/file.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h> +#include <vespa/searchlib/tensor/dense_tensor_attribute.h> +#include <vespa/searchlib/tensor/doc_vector_access.h> +#include <vespa/searchlib/tensor/generic_tensor_attribute.h> +#include <vespa/searchlib/tensor/hnsw_index.h> +#include <vespa/searchlib/tensor/nearest_neighbor_index.h> +#include <vespa/searchlib/tensor/nearest_neighbor_index_factory.h> +#include <vespa/searchlib/tensor/tensor_attribute.h> +#include <vespa/vespalib/data/fileheader.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <vespa/vespalib/testkit/test_kit.h> + #include <vespa/log/log.h> LOG_SETUP("tensorattribute_test"); using document::WrongTensorTypeException; -using search::tensor::TensorAttribute; -using search::tensor::DenseTensorAttribute; -using search::tensor::GenericTensorAttribute; using search::AttributeGuard; using search::AttributeVector; -using vespalib::eval::ValueType; +using search::attribute::HnswIndexParams; +using search::tensor::DefaultNearestNeighborIndexFactory; +using search::tensor::DenseTensorAttribute; +using search::tensor::DocVectorAccess; +using search::tensor::GenericTensorAttribute; +using search::tensor::HnswIndex; +using search::tensor::NearestNeighborIndex; +using search::tensor::NearestNeighborIndexFactory; +using search::tensor::TensorAttribute; using vespalib::eval::TensorSpec; -using vespalib::tensor::Tensor; -using vespalib::tensor::DenseTensor; +using vespalib::eval::ValueType; using vespalib::tensor::DefaultTensorEngine; +using vespalib::tensor::DenseTensor; +using vespalib::tensor::Tensor; + +using DoubleVector = std::vector<double>; -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { static bool operator==(const Tensor &lhs, const Tensor &rhs) { @@ -36,10 +50,10 @@ static bool operator==(const Tensor &lhs, const Tensor &rhs) } } -} vespalib::string sparseSpec("tensor(x{},y{})"); vespalib::string denseSpec("tensor(x[2],y[3])"); +vespalib::string vec_2d_spec("tensor(x[2])"); Tensor::UP createTensor(const TensorSpec &spec) { auto value = DefaultTensorEngine::ref().from_spec(spec); @@ -52,6 +66,78 @@ Tensor::UP createTensor(const TensorSpec &spec) { return Tensor::UP(tensor); } +TensorSpec +vec_2d(double x0, double x1) +{ + return TensorSpec(vec_2d_spec).add({{"x", 0}}, x0).add({{"x", 1}}, x1); +} + +class MockNearestNeighborIndex : public NearestNeighborIndex { +private: + using Entry = std::pair<uint32_t, DoubleVector>; + using EntryVector = std::vector<Entry>; + + const DocVectorAccess& _vectors; + EntryVector _adds; + EntryVector _removes; + +public: + MockNearestNeighborIndex(const DocVectorAccess& vectors) + : _vectors(vectors), + _adds(), + _removes() + { + } + void clear() { + _adds.clear(); + _removes.clear(); + } + void expect_empty_add() const { + EXPECT_TRUE(_adds.empty()); + } + void expect_add(uint32_t exp_docid, const DoubleVector& exp_vector) const { + EXPECT_EQUAL(1u, _adds.size()); + EXPECT_EQUAL(exp_docid, _adds.back().first); + EXPECT_EQUAL(exp_vector, _adds.back().second); + } + void expect_adds(const EntryVector &exp_adds) const { + EXPECT_EQUAL(exp_adds, _adds); + } + void expect_empty_remove() const { + EXPECT_TRUE(_removes.empty()); + } + void expect_remove(uint32_t exp_docid, const DoubleVector& exp_vector) const { + EXPECT_EQUAL(1u, _removes.size()); + EXPECT_EQUAL(exp_docid, _removes.back().first); + EXPECT_EQUAL(exp_vector, _removes.back().second); + } + void add_document(uint32_t docid) override { + auto vector = _vectors.get_vector(docid).typify<double>(); + _adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); + } + void remove_document(uint32_t docid) override { + auto vector = _vectors.get_vector(docid).typify<double>(); + _removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); + } + std::vector<uint32_t> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) override { + (void) k; + (void) vector; + (void) explore_k; + return std::vector<uint32_t>(); + } +}; + +class MockNearestNeighborIndexFactory : public NearestNeighborIndexFactory { + + std::unique_ptr<NearestNeighborIndex> make(const DocVectorAccess& vectors, + ValueType::CellType cell_type, + const search::attribute::HnswIndexParams& params) const override { + (void) params; + assert(cell_type == ValueType::CellType::DOUBLE); + return std::make_unique<MockNearestNeighborIndex>(vectors); + } +}; + struct Fixture { using BasicType = search::attribute::BasicType; @@ -61,16 +147,20 @@ struct Fixture Config _cfg; vespalib::string _name; vespalib::string _typeSpec; + std::unique_ptr<NearestNeighborIndexFactory> _index_factory; std::shared_ptr<TensorAttribute> _tensorAttr; std::shared_ptr<AttributeVector> _attr; bool _denseTensors; bool _useDenseTensorAttribute; Fixture(const vespalib::string &typeSpec, - bool useDenseTensorAttribute = false) + bool useDenseTensorAttribute = false, + bool enable_hnsw_index = false, + bool use_mock_index = false) : _cfg(BasicType::TENSOR, CollectionType::SINGLE), _name("test"), _typeSpec(typeSpec), + _index_factory(std::make_unique<DefaultNearestNeighborIndexFactory>()), _tensorAttr(), _attr(), _denseTensors(false), @@ -80,20 +170,40 @@ struct Fixture if (_cfg.tensorType().is_dense()) { _denseTensors = true; } + if (enable_hnsw_index) { + _cfg.set_hnsw_index_params(HnswIndexParams(4, 20)); + if (use_mock_index) { + _index_factory = std::make_unique<MockNearestNeighborIndexFactory>(); + } + } _tensorAttr = makeAttr(); _attr = _tensorAttr; _attr->addReservedDoc(); } + ~Fixture() {} std::shared_ptr<TensorAttribute> makeAttr() { if (_useDenseTensorAttribute) { assert(_denseTensors); - return std::make_shared<DenseTensorAttribute>(_name, _cfg); + return std::make_shared<DenseTensorAttribute>(_name, _cfg, *_index_factory); } else { return std::make_shared<GenericTensorAttribute>(_name, _cfg); } } + const DenseTensorAttribute& as_dense_tensor() const { + auto result = dynamic_cast<const DenseTensorAttribute*>(_tensorAttr.get()); + assert(result != nullptr); + return *result; + } + + MockNearestNeighborIndex& mock_index() { + assert(as_dense_tensor().nearest_neighbor_index() != nullptr); + auto mock_index = dynamic_cast<const MockNearestNeighborIndex*>(as_dense_tensor().nearest_neighbor_index()); + assert(mock_index != nullptr); + return *const_cast<MockNearestNeighborIndex*>(mock_index); + } + void ensureSpace(uint32_t docId) { while (_attr->getNumDocs() <= docId) { uint32_t newDocId = 0u; @@ -108,7 +218,15 @@ struct Fixture _attr->commit(); } - void setTensor(uint32_t docId, const Tensor &tensor) { + void set_tensor(uint32_t docid, const TensorSpec &spec) { + set_tensor_internal(docid, *createTensor(spec)); + } + + void set_empty_tensor(uint32_t docid) { + set_tensor_internal(docid, *_tensorAttr->getEmptyTensor()); + } + + void set_tensor_internal(uint32_t docId, const Tensor &tensor) { ensureSpace(docId); _tensorAttr->setTensor(docId, tensor); _attr->commit(); @@ -119,27 +237,18 @@ struct Fixture return _attr->getStatus(); } - void - assertGetNoTensor(uint32_t docId) { + void assertGetNoTensor(uint32_t docId) { AttributeGuard guard(_attr); Tensor::UP actTensor = _tensorAttr->getTensor(docId); EXPECT_FALSE(actTensor); } - void - assertGetTensor(const Tensor &expTensor, uint32_t docId) - { + void assertGetTensor(const TensorSpec &expSpec, uint32_t docId) { + Tensor::UP expTensor = createTensor(expSpec); AttributeGuard guard(_attr); Tensor::UP actTensor = _tensorAttr->getTensor(docId); EXPECT_TRUE(static_cast<bool>(actTensor)); - EXPECT_EQUAL(expTensor, *actTensor); - } - - void - assertGetTensor(const TensorSpec &expSpec, uint32_t docId) - { - Tensor::UP expTensor = createTensor(expSpec); - assertGetTensor(*expTensor, docId); + EXPECT_EQUAL(*expTensor, *actTensor); } void save() { @@ -154,23 +263,20 @@ struct Fixture EXPECT_TRUE(loadok); } - Tensor::UP expDenseTensor3() const - { - return createTensor(TensorSpec(denseSpec) - .add({{"x", 0}, {"y", 1}}, 11) - .add({{"x", 1}, {"y", 2}}, 0)); + TensorSpec expDenseTensor3() const { + return TensorSpec(denseSpec) + .add({{"x", 0}, {"y", 1}}, 11) + .add({{"x", 1}, {"y", 2}}, 0); } - Tensor::UP expDenseFillTensor() const - { - return createTensor(TensorSpec(denseSpec) - .add({{"x", 0}, {"y", 0}}, 5) - .add({{"x", 1}, {"y", 2}}, 0)); + TensorSpec expDenseFillTensor() const { + return TensorSpec(denseSpec) + .add({{"x", 0}, {"y", 0}}, 5) + .add({{"x", 1}, {"y", 2}}, 0); } - Tensor::UP expEmptyDenseTensor() const - { - return createTensor(TensorSpec(denseSpec)); + TensorSpec expEmptyDenseTensor() const { + return TensorSpec(denseSpec); } vespalib::string expEmptyDenseTensorSpec() const { @@ -200,21 +306,21 @@ Fixture::testSetTensorValue() EXPECT_EQUAL(5u, _attr->getNumDocs()); EXPECT_EQUAL(5u, _attr->getCommittedDocIdLimit()); TEST_DO(assertGetNoTensor(4)); - EXPECT_EXCEPTION(setTensor(4, *createTensor(TensorSpec("double"))), + EXPECT_EXCEPTION(set_tensor(4, TensorSpec("double")), WrongTensorTypeException, "but other tensor type is 'double'"); TEST_DO(assertGetNoTensor(4)); - setTensor(4, *_tensorAttr->getEmptyTensor()); + set_empty_tensor(4); if (_denseTensors) { - TEST_DO(assertGetTensor(*expEmptyDenseTensor(), 4)); - setTensor(3, *expDenseTensor3()); - TEST_DO(assertGetTensor(*expDenseTensor3(), 3)); + TEST_DO(assertGetTensor(expEmptyDenseTensor(), 4)); + set_tensor(3, expDenseTensor3()); + TEST_DO(assertGetTensor(expDenseTensor3(), 3)); } else { TEST_DO(assertGetTensor(TensorSpec(sparseSpec), 4)); - setTensor(3, *createTensor(TensorSpec(sparseSpec) - .add({{"x", ""}, {"y", ""}}, 11))); + set_tensor(3, TensorSpec(sparseSpec) + .add({{"x", ""}, {"y", ""}}, 11)); TEST_DO(assertGetTensor(TensorSpec(sparseSpec) - .add({{"x", ""}, {"y", ""}}, 11), 3)); + .add({{"x", ""}, {"y", ""}}, 11), 3)); } TEST_DO(assertGetNoTensor(2)); TEST_DO(clearTensor(3)); @@ -225,23 +331,23 @@ void Fixture::testSaveLoad() { ensureSpace(4); - setTensor(4, *_tensorAttr->getEmptyTensor()); + set_empty_tensor(4); if (_denseTensors) { - setTensor(3, *expDenseTensor3()); + set_tensor(3, expDenseTensor3()); } else { - setTensor(3, *createTensor(TensorSpec(sparseSpec) - .add({{"x", ""}, {"y", "1"}}, 11))); + set_tensor(3, TensorSpec(sparseSpec) + .add({{"x", ""}, {"y", "1"}}, 11)); } TEST_DO(save()); TEST_DO(load()); EXPECT_EQUAL(5u, _attr->getNumDocs()); EXPECT_EQUAL(5u, _attr->getCommittedDocIdLimit()); if (_denseTensors) { - TEST_DO(assertGetTensor(*expDenseTensor3(), 3)); - TEST_DO(assertGetTensor(*expEmptyDenseTensor(), 4)); + TEST_DO(assertGetTensor(expDenseTensor3(), 3)); + TEST_DO(assertGetTensor(expEmptyDenseTensor(), 4)); } else { TEST_DO(assertGetTensor(TensorSpec(sparseSpec) - .add({{"x", ""}, {"y", "1"}}, 11), 3)); + .add({{"x", ""}, {"y", "1"}}, 11), 3)); TEST_DO(assertGetTensor(TensorSpec(sparseSpec), 4)); } TEST_DO(assertGetNoTensor(2)); @@ -256,29 +362,28 @@ Fixture::testCompaction() return; } ensureSpace(4); - Tensor::UP emptytensor = _tensorAttr->getEmptyTensor(); - Tensor::UP emptyxytensor = createTensor(TensorSpec(sparseSpec)); - Tensor::UP simpletensor = createTensor(TensorSpec(sparseSpec) - .add({{"x", ""}, {"y", "1"}}, 11)); - Tensor::UP filltensor = createTensor(TensorSpec(sparseSpec) - .add({{"x", ""}, {"y", ""}}, 5)); + TensorSpec empty_xy_tensor(sparseSpec); + TensorSpec simple_tensor = TensorSpec(sparseSpec) + .add({{"x", ""}, {"y", "1"}}, 11); + TensorSpec fill_tensor = TensorSpec(sparseSpec) + .add({{"x", ""}, {"y", ""}}, 5); if (_denseTensors) { - emptyxytensor = expEmptyDenseTensor(); - simpletensor = expDenseTensor3(); - filltensor = expDenseFillTensor(); + empty_xy_tensor = expEmptyDenseTensor(); + simple_tensor = expDenseTensor3(); + fill_tensor = expDenseFillTensor(); } - setTensor(4, *emptytensor); - setTensor(3, *simpletensor); - setTensor(2, *filltensor); + set_empty_tensor(4); + set_tensor(3, simple_tensor); + set_tensor(2, fill_tensor); clearTensor(2); - setTensor(2, *filltensor); + set_tensor(2, fill_tensor); search::attribute::Status oldStatus = getStatus(); search::attribute::Status newStatus = oldStatus; uint64_t iter = 0; uint64_t iterLimit = 100000; for (; iter < iterLimit; ++iter) { clearTensor(2); - setTensor(2, *filltensor); + set_tensor(2, fill_tensor); newStatus = getStatus(); if (newStatus.getUsed() < oldStatus.getUsed()) { break; @@ -290,9 +395,9 @@ Fixture::testCompaction() "iter = %" PRIu64 ", memory usage %" PRIu64 ", -> %" PRIu64, iter, oldStatus.getUsed(), newStatus.getUsed()); TEST_DO(assertGetNoTensor(1)); - TEST_DO(assertGetTensor(*filltensor, 2)); - TEST_DO(assertGetTensor(*simpletensor, 3)); - TEST_DO(assertGetTensor(*emptyxytensor, 4)); + TEST_DO(assertGetTensor(fill_tensor, 2)); + TEST_DO(assertGetTensor(simple_tensor, 3)); + TEST_DO(assertGetTensor(empty_xy_tensor, 4)); } void @@ -357,4 +462,73 @@ TEST("Test dense tensors with dense tensor attribute") testAll([]() { return std::make_shared<Fixture>(denseSpec, true); }); } +TEST_F("Hnsw index is NOT instantiated in dense tensor attribute by default", + Fixture(vec_2d_spec, true, false)) +{ + const auto& tensor = f.as_dense_tensor(); + EXPECT_TRUE(tensor.nearest_neighbor_index() == nullptr); +} + +TEST_F("Hnsw index is instantiated in dense tensor attribute when specified in config", + Fixture(vec_2d_spec, true, true)) +{ + const auto& tensor = f.as_dense_tensor(); + ASSERT_TRUE(tensor.nearest_neighbor_index() != nullptr); + auto hnsw_index = dynamic_cast<const HnswIndex*>(tensor.nearest_neighbor_index()); + ASSERT_TRUE(hnsw_index != nullptr); + + const auto& cfg = hnsw_index->config(); + EXPECT_EQUAL(8u, cfg.max_links_at_level_0()); + EXPECT_EQUAL(4u, cfg.max_links_at_hierarchic_levels()); + EXPECT_EQUAL(20u, cfg.neighbors_to_explore_at_construction()); + EXPECT_TRUE(cfg.heuristic_select_neighbors()); +} + +class DenseTensorAttributeMockIndex : public Fixture { +public: + DenseTensorAttributeMockIndex() : Fixture(vec_2d_spec, true, true, true) {} +}; + +TEST_F("setTensor() updates nearest neighbor index", DenseTensorAttributeMockIndex) +{ + auto& index = f.mock_index(); + + f.set_tensor(1, vec_2d(3, 5)); + index.expect_add(1, {3, 5}); + index.expect_empty_remove(); + index.clear(); + + // Replaces previous value. + f.set_tensor(1, vec_2d(7, 9)); + index.expect_remove(1, {3, 5}); + index.expect_add(1, {7, 9}); +} + +TEST_F("clearDoc() updates nearest neighbor index", DenseTensorAttributeMockIndex) +{ + auto& index = f.mock_index(); + + // Nothing to clear. + f.clearTensor(1); + index.expect_empty_remove(); + index.expect_empty_add(); + + // Clears previous value. + f.set_tensor(1, vec_2d(3, 5)); + index.clear(); + f.clearTensor(1); + index.expect_remove(1, {3, 5}); + index.expect_empty_add(); +} + +TEST_F("onLoad() updates nearest neighbor index", DenseTensorAttributeMockIndex) +{ + f.set_tensor(1, vec_2d(3, 5)); + f.set_tensor(2, vec_2d(7, 9)); + f.save(); + f.load(); + auto& index = f.mock_index(); + index.expect_adds({{1, {3, 5}}, {2, {7, 9}}}); +} + TEST_MAIN() { TEST_RUN_ALL(); vespalib::unlink("test.dat"); } diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index c6246bb8434..cd0d4bcaad0 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -48,8 +48,7 @@ using HnswIndexUP = std::unique_ptr<HnswIndex>; class HnswIndexTest : public ::testing::Test { public: FloatVectors vectors; - FloatSqEuclideanDistance distance_func; - LevelGenerator level_generator; + LevelGenerator* level_generator; HnswIndexUP index; HnswIndexTest() @@ -62,11 +61,14 @@ public: .set(7, {3, 5}).set(8, {0, 3}).set(9, {4, 5}); } void init(bool heuristic_select_neighbors) { - index = std::make_unique<HnswIndex>(vectors, distance_func, level_generator, + auto generator = std::make_unique<LevelGenerator>(); + level_generator = generator.get(); + index = std::make_unique<HnswIndex>(vectors, std::make_unique<FloatSqEuclideanDistance>(), + std::move(generator), HnswIndex::Config(2, 1, 10, heuristic_select_neighbors)); } void add_document(uint32_t docid, uint32_t max_level = 0) { - level_generator.level = max_level; + level_generator->level = max_level; index->add_document(docid); } void remove_document(uint32_t docid) { |