summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-02-20 12:57:06 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-02-20 12:57:06 +0000
commitfd74211c61afbdac5cec8b7fa9956f53d909b8c4 (patch)
tree29834fde87b80d3f5df8bb1ee76a8b9a98b2a17c /searchlib
parenta04b22c8b07d9d03d374c1732126f4502a981319 (diff)
Update nearest neighbor index when tensor values change.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp157
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h17
3 files changed, 187 insertions, 11 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 69d9e3c8393..88118ca4204 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -6,12 +6,17 @@
#include <vespa/eval/tensor/tensor.h>
#include <vespa/fastos/file.h>
#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
+#include <vespa/searchlib/tensor/doc_vector_access.h>
#include <vespa/searchlib/tensor/generic_tensor_attribute.h>
#include <vespa/searchlib/tensor/hnsw_index.h>
+#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
+#include <vespa/searchlib/tensor/nearest_neighbor_index_factory.h>
#include <vespa/searchlib/tensor/tensor_attribute.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/vespalib/test/insertion_operators.h>
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/log/log.h>
@@ -21,9 +26,13 @@ using document::WrongTensorTypeException;
using search::AttributeGuard;
using search::AttributeVector;
using search::attribute::HnswIndexParams;
+using search::tensor::DefaultNearestNeighborIndexFactory;
using search::tensor::DenseTensorAttribute;
+using search::tensor::DocVectorAccess;
using search::tensor::GenericTensorAttribute;
using search::tensor::HnswIndex;
+using search::tensor::NearestNeighborIndex;
+using search::tensor::NearestNeighborIndexFactory;
using search::tensor::TensorAttribute;
using vespalib::eval::TensorSpec;
using vespalib::eval::ValueType;
@@ -31,6 +40,8 @@ using vespalib::tensor::DefaultTensorEngine;
using vespalib::tensor::DenseTensor;
using vespalib::tensor::Tensor;
+using DoubleVector = std::vector<double>;
+
namespace vespalib::tensor {
static bool operator==(const Tensor &lhs, const Tensor &rhs)
@@ -42,6 +53,7 @@ static bool operator==(const Tensor &lhs, const Tensor &rhs)
vespalib::string sparseSpec("tensor(x{},y{})");
vespalib::string denseSpec("tensor(x[2],y[3])");
+vespalib::string vec_2d_spec("tensor(x[2])");
Tensor::UP createTensor(const TensorSpec &spec) {
auto value = DefaultTensorEngine::ref().from_spec(spec);
@@ -54,6 +66,78 @@ Tensor::UP createTensor(const TensorSpec &spec) {
return Tensor::UP(tensor);
}
+TensorSpec
+vec_2d(double x0, double x1)
+{
+ return TensorSpec(vec_2d_spec).add({{"x", 0}}, x0).add({{"x", 1}}, x1);
+}
+
+class MockNearestNeighborIndex : public NearestNeighborIndex {
+private:
+ using Entry = std::pair<uint32_t, DoubleVector>;
+ using EntryVector = std::vector<Entry>;
+
+ const DocVectorAccess& _vectors;
+ EntryVector _adds;
+ EntryVector _removes;
+
+public:
+ MockNearestNeighborIndex(const DocVectorAccess& vectors)
+ : _vectors(vectors),
+ _adds(),
+ _removes()
+ {
+ }
+ void clear() {
+ _adds.clear();
+ _removes.clear();
+ }
+ void expect_empty_add() const {
+ EXPECT_TRUE(_adds.empty());
+ }
+ void expect_add(uint32_t exp_docid, const DoubleVector& exp_vector) const {
+ EXPECT_EQUAL(1u, _adds.size());
+ EXPECT_EQUAL(exp_docid, _adds.back().first);
+ EXPECT_EQUAL(exp_vector, _adds.back().second);
+ }
+ void expect_adds(const EntryVector &exp_adds) const {
+ EXPECT_EQUAL(exp_adds, _adds);
+ }
+ void expect_empty_remove() const {
+ EXPECT_TRUE(_removes.empty());
+ }
+ void expect_remove(uint32_t exp_docid, const DoubleVector& exp_vector) const {
+ EXPECT_EQUAL(1u, _removes.size());
+ EXPECT_EQUAL(exp_docid, _removes.back().first);
+ EXPECT_EQUAL(exp_vector, _removes.back().second);
+ }
+ void add_document(uint32_t docid) override {
+ auto vector = _vectors.get_vector(docid).typify<double>();
+ _adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
+ }
+ void remove_document(uint32_t docid) override {
+ auto vector = _vectors.get_vector(docid).typify<double>();
+ _removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
+ }
+ std::vector<uint32_t> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) override {
+ (void) k;
+ (void) vector;
+ (void) explore_k;
+ return std::vector<uint32_t>();
+ }
+};
+
+class MockNearestNeighborIndexFactory : public NearestNeighborIndexFactory {
+
+ std::unique_ptr<NearestNeighborIndex> make(const DocVectorAccess& vectors,
+ ValueType::CellType cell_type,
+ const search::attribute::HnswIndexParams& params) const override {
+ (void) params;
+ assert(cell_type == ValueType::CellType::DOUBLE);
+ return std::make_unique<MockNearestNeighborIndex>(vectors);
+ }
+};
+
struct Fixture
{
using BasicType = search::attribute::BasicType;
@@ -63,6 +147,7 @@ struct Fixture
Config _cfg;
vespalib::string _name;
vespalib::string _typeSpec;
+ std::unique_ptr<NearestNeighborIndexFactory> _index_factory;
std::shared_ptr<TensorAttribute> _tensorAttr;
std::shared_ptr<AttributeVector> _attr;
bool _denseTensors;
@@ -70,10 +155,12 @@ struct Fixture
Fixture(const vespalib::string &typeSpec,
bool useDenseTensorAttribute = false,
- bool enable_hnsw_index = false)
+ bool enable_hnsw_index = false,
+ bool use_mock_index = false)
: _cfg(BasicType::TENSOR, CollectionType::SINGLE),
_name("test"),
_typeSpec(typeSpec),
+ _index_factory(std::make_unique<DefaultNearestNeighborIndexFactory>()),
_tensorAttr(),
_attr(),
_denseTensors(false),
@@ -85,16 +172,20 @@ struct Fixture
}
if (enable_hnsw_index) {
_cfg.set_hnsw_index_params(HnswIndexParams(4, 20));
+ if (use_mock_index) {
+ _index_factory = std::make_unique<MockNearestNeighborIndexFactory>();
+ }
}
_tensorAttr = makeAttr();
_attr = _tensorAttr;
_attr->addReservedDoc();
}
+ ~Fixture() {}
std::shared_ptr<TensorAttribute> makeAttr() {
if (_useDenseTensorAttribute) {
assert(_denseTensors);
- return std::make_shared<DenseTensorAttribute>(_name, _cfg);
+ return std::make_shared<DenseTensorAttribute>(_name, _cfg, *_index_factory);
} else {
return std::make_shared<GenericTensorAttribute>(_name, _cfg);
}
@@ -106,6 +197,13 @@ struct Fixture
return *result;
}
+ MockNearestNeighborIndex& mock_index() {
+ assert(as_dense_tensor().nearest_neighbor_index() != nullptr);
+ auto mock_index = dynamic_cast<const MockNearestNeighborIndex*>(as_dense_tensor().nearest_neighbor_index());
+ assert(mock_index != nullptr);
+ return *const_cast<MockNearestNeighborIndex*>(mock_index);
+ }
+
void ensureSpace(uint32_t docId) {
while (_attr->getNumDocs() <= docId) {
uint32_t newDocId = 0u;
@@ -120,6 +218,10 @@ struct Fixture
_attr->commit();
}
+ void set_tensor(uint32_t docid, const TensorSpec &spec) {
+ setTensor(docid, *createTensor(spec));
+ }
+
void setTensor(uint32_t docId, const Tensor &tensor) {
ensureSpace(docId);
_tensorAttr->setTensor(docId, tensor);
@@ -370,14 +472,14 @@ TEST("Test dense tensors with dense tensor attribute")
}
TEST_F("Hnsw index is NOT instantiated in dense tensor attribute by default",
- Fixture("tensor(x[2])", true, false))
+ Fixture(vec_2d_spec, true, false))
{
const auto& tensor = f.as_dense_tensor();
EXPECT_TRUE(tensor.nearest_neighbor_index() == nullptr);
}
TEST_F("Hnsw index is instantiated in dense tensor attribute when specified in config",
- Fixture("tensor(x[2])", true, true))
+ Fixture(vec_2d_spec, true, true))
{
const auto& tensor = f.as_dense_tensor();
ASSERT_TRUE(tensor.nearest_neighbor_index() != nullptr);
@@ -391,4 +493,51 @@ TEST_F("Hnsw index is instantiated in dense tensor attribute when specified in c
EXPECT_TRUE(cfg.heuristic_select_neighbors());
}
+class DenseTensorAttributeMockIndex : public Fixture {
+public:
+ DenseTensorAttributeMockIndex() : Fixture(vec_2d_spec, true, true, true) {}
+};
+
+TEST_F("setTensor() updates nearest neighbor index", DenseTensorAttributeMockIndex)
+{
+ auto& index = f.mock_index();
+
+ f.set_tensor(1, vec_2d(3, 5));
+ index.expect_add(1, {3, 5});
+ index.expect_empty_remove();
+ index.clear();
+
+ // Replaces previous value.
+ f.set_tensor(1, vec_2d(7, 9));
+ index.expect_remove(1, {3, 5});
+ index.expect_add(1, {7, 9});
+}
+
+TEST_F("clearDoc() updates nearest neighbor index", DenseTensorAttributeMockIndex)
+{
+ auto& index = f.mock_index();
+
+ // Nothing to clear.
+ f.clearTensor(1);
+ index.expect_empty_remove();
+ index.expect_empty_add();
+
+ // Clears previous value.
+ f.set_tensor(1, vec_2d(3, 5));
+ index.clear();
+ f.clearTensor(1);
+ index.expect_remove(1, {3, 5});
+ index.expect_empty_add();
+}
+
+TEST_F("onLoad() updates nearest neighbor index", DenseTensorAttributeMockIndex)
+{
+ f.set_tensor(1, vec_2d(3, 5));
+ f.set_tensor(2, vec_2d(7, 9));
+ f.save();
+ f.load();
+ auto& index = f.mock_index();
+ index.expect_adds({{1, {3, 5}}, {2, {7, 9}}});
+}
+
TEST_MAIN() { TEST_RUN_ALL(); vespalib::unlink("test.dat"); }
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index d4786cc2e1a..171340e07f1 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -56,6 +56,14 @@ TensorReader::is_present() {
}
+void
+DenseTensorAttribute::consider_remove_from_index(DocId docid)
+{
+ if (_index && _refVector[docid].valid()) {
+ _index->remove_document(docid);
+ }
+}
+
DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, const Config& cfg,
const NearestNeighborIndexFactory& index_factory)
: TensorAttribute(baseFileName, cfg, _denseTensorStore),
@@ -74,12 +82,23 @@ DenseTensorAttribute::~DenseTensorAttribute()
_tensorStore.clearHoldLists();
}
+uint32_t
+DenseTensorAttribute::clearDoc(DocId docId)
+{
+ consider_remove_from_index(docId);
+ return TensorAttribute::clearDoc(docId);
+}
+
void
DenseTensorAttribute::setTensor(DocId docId, const Tensor &tensor)
{
checkTensorType(tensor);
+ consider_remove_from_index(docId);
EntryRef ref = _denseTensorStore.setTensor(tensor);
setTensorRef(docId, ref);
+ if (_index) {
+ _index->add_document(docId);
+ }
}
@@ -125,6 +144,11 @@ DenseTensorAttribute::onLoad()
auto raw = _denseTensorStore.allocRawBuffer();
tensorReader.readTensor(raw.data, _denseTensorStore.getBufSize());
_refVector.push_back(raw.ref);
+ if (_index) {
+ // This ensures that get_vector() (via getTensor()) is able to find the newly added tensor.
+ setCommittedDocIdLimit(lid + 1);
+ _index->add_document(lid);
+ }
} else {
_refVector.push_back(EntryRef());
}
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
index 74c93aca212..f9a8a81b56b 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
@@ -23,18 +23,21 @@ private:
DenseTensorStore _denseTensorStore;
std::unique_ptr<NearestNeighborIndex> _index;
+ void consider_remove_from_index(DocId docid);
+
public:
DenseTensorAttribute(vespalib::stringref baseFileName, const Config& cfg,
const NearestNeighborIndexFactory& index_factory = DefaultNearestNeighborIndexFactory());
virtual ~DenseTensorAttribute();
// Implements TensorAttribute
- virtual void setTensor(DocId docId, const Tensor &tensor) override;
- virtual std::unique_ptr<Tensor> getTensor(DocId docId) const override;
- virtual void getTensor(DocId docId, vespalib::tensor::MutableDenseTensorView &tensor) const override;
- virtual bool onLoad() override;
- virtual std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override;
- virtual void compactWorst() override;
- virtual uint32_t getVersion() const override;
+ uint32_t clearDoc(DocId docId) override;
+ void setTensor(DocId docId, const Tensor &tensor) override;
+ std::unique_ptr<Tensor> getTensor(DocId docId) const override;
+ void getTensor(DocId docId, vespalib::tensor::MutableDenseTensorView &tensor) const override;
+ bool onLoad() override;
+ std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override;
+ void compactWorst() override;
+ uint32_t getVersion() const override;
// Implements DocVectorAccess
vespalib::tensor::TypedCells get_vector(uint32_t docid) const override;