summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-06-13 13:21:58 +0200
committerGitHub <noreply@github.com>2022-06-13 13:21:58 +0200
commit728487b2d7a8bacca478840feef908523159b4bb (patch)
treeeae871410e6dca876df2d393b4620611ee520668 /searchlib
parent9ed4500b380ce77c1dc9f303006c02ba8096ee42 (diff)
Revert "Don't make changes to the hnsw index when the inserted tensor is unchanged"
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h30
8 files changed, 7 insertions, 90 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 72b2f1e320a..bf0b74b0003 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -157,12 +157,6 @@ public:
void expect_empty_add() const {
EXPECT_TRUE(_adds.empty());
}
- void expect_empty_prepare_add() const {
- EXPECT_TRUE(_prepare_adds.empty());
- }
- void expect_empty_complete_add() const {
- EXPECT_TRUE(_complete_adds.empty());
- }
void expect_entry(uint32_t exp_docid, const DoubleVector& exp_vector, const EntryVector& entries) const {
EXPECT_EQUAL(1u, entries.size());
EXPECT_EQUAL(exp_docid, entries.back().first);
@@ -887,30 +881,6 @@ TEST_F("nearest neighbor index can be updated in two phases", DenseTensorAttribu
}
}
-TEST_F("nearest neighbor index is NOT updated when tensor value is unchanged", DenseTensorAttributeMockIndex)
-{
- auto& index = f.mock_index();
- {
- auto vec_a = vec_2d(3, 5);
- auto prepare_result = f.prepare_set_tensor(1, vec_a);
- index.expect_prepare_add(1, {3, 5});
- f.complete_set_tensor(1, vec_a, std::move(prepare_result));
- f.assertGetTensor(vec_a, 1);
- index.expect_complete_add(1, {3, 5});
- }
- index.clear();
- {
- // Replaces previous value with the same value
- auto vec_b = vec_2d(3, 5);
- auto prepare_result = f.prepare_set_tensor(1, vec_b);
- EXPECT_TRUE(prepare_result.get() == nullptr);
- index.expect_empty_prepare_add();
- f.complete_set_tensor(1, vec_b, std::move(prepare_result));
- f.assertGetTensor(vec_b, 1);
- index.expect_empty_complete_add();
- }
-}
-
TEST_F("clearDoc() updates nearest neighbor index", DenseTensorAttributeMockIndex)
{
auto& index = f.mock_index();
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index c713b3ef335..2fdb73fcf96 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -8,9 +8,9 @@
#include "tensor_attribute.hpp"
#include <vespa/eval/eval/value.h>
#include <vespa/fastlib/io/bufferedfile.h>
-#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/attribute/load_utils.h>
#include <vespa/searchlib/attribute/readerbase.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/vespalib/data/slime/inserter.h>
#include <vespa/vespalib/util/cpu_usage.h>
#include <vespa/vespalib/util/lambdatask.h>
@@ -102,16 +102,10 @@ BlobSequenceReader::is_present() {
}
-bool
-DenseTensorAttribute::tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const
-{
- auto old_tensor = extract_cells_ref(docid);
- return _comp.equals(old_tensor, new_tensor.cells());
-}
-
void
DenseTensorAttribute::internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor)
{
+ checkTensorType(tensor);
consider_remove_from_index(docid);
EntryRef ref = _denseTensorStore.setTensor(tensor);
setTensorRef(docid, ref);
@@ -158,8 +152,7 @@ DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, con
const NearestNeighborIndexFactory& index_factory)
: TensorAttribute(baseFileName, cfg, _denseTensorStore),
_denseTensorStore(cfg.tensorType(), get_memory_allocator()),
- _index(),
- _comp(cfg.tensorType())
+ _index()
{
if (cfg.hnsw_index_params().has_value()) {
auto tensor_type = cfg.tensorType();
@@ -187,7 +180,6 @@ DenseTensorAttribute::clearDoc(DocId docId)
void
DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor)
{
- checkTensorType(tensor);
internal_set_tensor(docId, tensor);
if (_index) {
_index->add_document(docId);
@@ -197,26 +189,16 @@ DenseTensorAttribute::setTensor(DocId docId, const vespalib::eval::Value &tensor
std::unique_ptr<PrepareResult>
DenseTensorAttribute::prepare_set_tensor(DocId docid, const vespalib::eval::Value& tensor) const
{
- checkTensorType(tensor);
if (_index) {
- if (tensor_is_unchanged(docid, tensor)) {
- // Don't make changes to the nearest neighbor index when the inserted tensor is unchanged.
- // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point.
- return {};
- }
return _index->prepare_add_document(docid, tensor.cells(), getGenerationHandler().takeGuard());
}
- return {};
+ return std::unique_ptr<PrepareResult>();
}
void
DenseTensorAttribute::complete_set_tensor(DocId docid, const vespalib::eval::Value& tensor,
std::unique_ptr<PrepareResult> prepare_result)
{
- if (_index && !prepare_result) {
- // The tensor is unchanged.
- return;
- }
internal_set_tensor(docid, tensor);
if (_index) {
_index->complete_add_document(docid, std::move(prepare_result));
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
index 1138a4f4433..da7a88af1be 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
@@ -6,7 +6,6 @@
#include "dense_tensor_store.h"
#include "doc_vector_access.h"
#include "tensor_attribute.h"
-#include "typed_cells_comparator.h"
#include <memory>
namespace search::tensor {
@@ -21,9 +20,7 @@ class DenseTensorAttribute : public TensorAttribute, public DocVectorAccess {
private:
DenseTensorStore _denseTensorStore;
std::unique_ptr<NearestNeighborIndex> _index;
- TypedCellsComparator _comp;
- bool tensor_is_unchanged(DocId docid, const vespalib::eval::Value& new_tensor) const;
void internal_set_tensor(DocId docid, const vespalib::eval::Value& tensor);
void consider_remove_from_index(DocId docid);
vespalib::MemoryUsage update_stat() override;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 2ee1b268449..e82f31df38e 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -436,7 +436,7 @@ HnswIndex::prepare_add_document(uint32_t docid,
if (max_nodes < _cfg.min_size_before_two_phase()) {
// the first documents added will do all work in write thread
// to ensure they are linked together:
- return std::make_unique<PreparedFirstAddDoc>();
+ return std::unique_ptr<PrepareResult>();
}
PreparedAddDoc op = internal_prepare_add(docid, vector, std::move(read_guard));
return std::make_unique<PreparedAddDoc>(std::move(op));
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 3f5a9d514ed..72a10724ff1 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -152,8 +152,6 @@ protected:
const BitVector *filter, uint32_t explore_k,
double distance_threshold) const;
- struct PreparedFirstAddDoc : public PrepareResult {};
-
struct PreparedAddDoc : public PrepareResult {
using ReadGuard = vespalib::GenerationHandler::Guard;
uint32_t docid;
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index 78c58e86a3b..add5184c4eb 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -135,7 +135,7 @@ TensorAttribute::addDoc(DocId &docId)
}
void
-TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor) const
+TensorAttribute::checkTensorType(const vespalib::eval::Value &tensor)
{
const ValueType &fieldTensorType = getConfig().tensorType();
const ValueType &tensorType = tensor.type();
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
index c8aa42c6133..ae6a4a302ea 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
@@ -32,7 +32,7 @@ protected:
template <typename RefType>
void doCompactWorst();
- void checkTensorType(const vespalib::eval::Value &tensor) const;
+ void checkTensorType(const vespalib::eval::Value &tensor);
void setTensorRef(DocId docId, EntryRef ref);
virtual vespalib::MemoryUsage update_stat();
virtual vespalib::MemoryUsage memory_usage() const;
diff --git a/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h b/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h
deleted file mode 100644
index d1c890be961..00000000000
--- a/searchlib/src/vespa/searchlib/tensor/typed_cells_comparator.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/eval/eval/cell_type.h>
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/eval/eval/value_type.h>
-#include <cstring>
-
-namespace search::tensor {
-
-/**
- * Comparator used to compare two vespalib::eval::TypedCells instances.
- *
- * The caller must first validate that they are of the same vespalib::eval::ValueType.
- */
-class TypedCellsComparator {
-private:
- size_t _mem_size;
-
-public:
- TypedCellsComparator(const vespalib::eval::ValueType& type)
- : _mem_size(vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), type.dense_subspace_size()))
- {}
- bool equals(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const {
- return std::memcmp(lhs.data, rhs.data, _mem_size) == 0;
- }
-};
-
-}