aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-02-25 14:32:41 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-02-25 14:34:28 +0000
commitffe28ab39fe90676dd0f29f00c75bdb4014e2159 (patch)
tree5981c4f613f4765a03cc39efae6822dbda681454 /searchlib
parent7bb08d44728e9b3d3eb2c1334e7e252aee7a5e7e (diff)
Add proper memory management to hnsw index and integrate with dense tensor attribute.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp50
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp8
7 files changed, 100 insertions, 10 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 5089743a54a..761d403ac6c 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -6,6 +6,7 @@
#include <vespa/eval/tensor/tensor.h>
#include <vespa/fastos/file.h>
#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attribute_read_guard.h>
#include <vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/doc_vector_access.h>
@@ -41,6 +42,7 @@ using vespalib::tensor::DenseTensor;
using vespalib::tensor::Tensor;
using DoubleVector = std::vector<double>;
+using generation_t = vespalib::GenerationHandler::generation_t;
namespace vespalib::tensor {
@@ -80,12 +82,16 @@ private:
const DocVectorAccess& _vectors;
EntryVector _adds;
EntryVector _removes;
+ generation_t _transfer_gen;
+ generation_t _trim_gen;
public:
MockNearestNeighborIndex(const DocVectorAccess& vectors)
: _vectors(vectors),
_adds(),
- _removes()
+ _removes(),
+ _transfer_gen(std::numeric_limits<generation_t>::max()),
+ _trim_gen(std::numeric_limits<generation_t>::max())
{
}
void clear() {
@@ -111,6 +117,9 @@ public:
EXPECT_EQUAL(exp_docid, _removes.back().first);
EXPECT_EQUAL(exp_vector, _removes.back().second);
}
+ generation_t get_transfer_gen() const { return _transfer_gen; }
+ generation_t get_trim_gen() const { return _trim_gen; }
+
void add_document(uint32_t docid) override {
auto vector = _vectors.get_vector(docid).typify<double>();
_adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
@@ -119,6 +128,12 @@ public:
auto vector = _vectors.get_vector(docid).typify<double>();
_removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
}
+ void transfer_hold_lists(generation_t current_gen) override {
+ _transfer_gen = current_gen;
+ }
+ void trim_hold_lists(generation_t first_used_gen) override {
+ _trim_gen = first_used_gen;
+ }
std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override {
(void) k;
(void) vector;
@@ -232,6 +247,10 @@ struct Fixture
_attr->commit();
}
+ generation_t get_current_gen() const {
+ return _attr->getCurrentGeneration();
+ }
+
search::attribute::Status getStatus() {
_attr->commit(true);
return _attr->getStatus();
@@ -531,4 +550,33 @@ TEST_F("onLoad() updates nearest neighbor index", DenseTensorAttributeMockIndex)
index.expect_adds({{1, {3, 5}}, {2, {7, 9}}});
}
+
+TEST_F("commit() ensures transfer and trim hold lists on nearest neighbor index", DenseTensorAttributeMockIndex)
+{
+ auto& index = f.mock_index();
+ TensorSpec spec = vec_2d(3, 5);
+
+ f.set_tensor(1, spec);
+ generation_t gen_1 = f.get_current_gen();
+ EXPECT_EQUAL(gen_1 - 1, index.get_transfer_gen());
+ EXPECT_EQUAL(gen_1, index.get_trim_gen());
+
+ generation_t gen_2 = 0;
+ {
+ // Takes guard on gen_1
+ auto guard = f._attr->makeReadGuard(false);
+ f.set_tensor(2, spec);
+ gen_2 = f.get_current_gen();
+ EXPECT_GREATER(gen_2, gen_1);
+ EXPECT_EQUAL(gen_2 - 1, index.get_transfer_gen());
+ EXPECT_EQUAL(gen_1, index.get_trim_gen());
+ }
+
+ f.set_tensor(3, spec);
+ generation_t gen_3 = f.get_current_gen();
+ EXPECT_GREATER(gen_3, gen_2);
+ EXPECT_EQUAL(gen_3 - 1, index.get_transfer_gen());
+ EXPECT_EQUAL(gen_3, index.get_trim_gen());
+}
+
TEST_MAIN() { TEST_RUN_ALL(); vespalib::unlink("test.dat"); }
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index 171340e07f1..229c6c2c34f 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -183,6 +183,26 @@ DenseTensorAttribute::getVersion() const
return DENSE_TENSOR_ATTRIBUTE_VERSION;
}
+void
+DenseTensorAttribute::onGenerationChange(generation_t next_gen)
+{
+ // TODO: Change onGenerationChange() to send current generation instead of next generation.
+ // This applies for entire attribute vector code.
+ TensorAttribute::onGenerationChange(next_gen);
+ if (_index) {
+ _index->transfer_hold_lists(next_gen - 1);
+ }
+}
+
+void
+DenseTensorAttribute::removeOldGenerations(generation_t first_used_gen)
+{
+ TensorAttribute::removeOldGenerations(first_used_gen);
+ if (_index) {
+ _index->trim_hold_lists(first_used_gen);
+ }
+}
+
vespalib::tensor::TypedCells
DenseTensorAttribute::get_vector(uint32_t docid) const
{
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
index f9a8a81b56b..84a60376fe7 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
@@ -29,7 +29,7 @@ public:
DenseTensorAttribute(vespalib::stringref baseFileName, const Config& cfg,
const NearestNeighborIndexFactory& index_factory = DefaultNearestNeighborIndexFactory());
virtual ~DenseTensorAttribute();
- // Implements TensorAttribute
+ // Implements AttributeVector and ITensorAttribute
uint32_t clearDoc(DocId docId) override;
void setTensor(DocId docId, const Tensor &tensor) override;
std::unique_ptr<Tensor> getTensor(DocId docId) const override;
@@ -38,6 +38,8 @@ public:
std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override;
void compactWorst() override;
uint32_t getVersion() const override;
+ void onGenerationChange(generation_t next_gen) override;
+ void removeOldGenerations(generation_t first_used_gen) override;
// Implements DocVectorAccess
vespalib::tensor::TypedCells get_vector(uint32_t docid) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 54779408b37..e0b999bf5c5 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -310,6 +310,24 @@ HnswIndex::remove_document(uint32_t docid)
_node_refs[docid].store_release(invalid);
}
+void
+HnswIndex::transfer_hold_lists(generation_t current_gen)
+{
+ // Note: RcuVector transfers hold lists as part of reallocation based on current generation.
+ // We need to set the next generation here, as it is incremented on a higher level right after this call.
+ _node_refs.setGeneration(current_gen + 1);
+ _nodes.transferHoldLists(current_gen);
+ _links.transferHoldLists(current_gen);
+}
+
+void
+HnswIndex::trim_hold_lists(generation_t first_used_gen)
+{
+ _node_refs.removeOldGenerations(first_used_gen);
+ _nodes.trimHoldLists(first_used_gen);
+ _links.trimHoldLists(first_used_gen);
+}
+
struct NeighborsByDocId {
bool operator() (const NearestNeighborIndex::Neighbor &lhs,
const NearestNeighborIndex::Neighbor &rhs)
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index a8129032c11..afbb3207fc2 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -133,9 +133,13 @@ public:
const Config& config() const { return _cfg; }
+ // Implements NearestNeighborIndex
void add_document(uint32_t docid) override;
void remove_document(uint32_t docid) override;
+ void transfer_hold_lists(generation_t current_gen) override;
+ void trim_hold_lists(generation_t first_used_gen) override;
std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override;
+
FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k) const;
// TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists)
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index f933af0147e..003daa3185e 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -5,6 +5,7 @@
#include <cstdint>
#include <vector>
#include <vespa/eval/tensor/dense/typed_cells.h>
+#include <vespa/vespalib/util/generationhandler.h>
namespace search::tensor {
@@ -13,6 +14,7 @@ namespace search::tensor {
*/
class NearestNeighborIndex {
public:
+ using generation_t = vespalib::GenerationHandler::generation_t;
struct Neighbor {
uint32_t docid;
double distance;
@@ -24,9 +26,13 @@ public:
virtual ~NearestNeighborIndex() {}
virtual void add_document(uint32_t docid) = 0;
virtual void remove_document(uint32_t docid) = 0;
+ virtual void transfer_hold_lists(generation_t current_gen) = 0;
+ virtual void trim_hold_lists(generation_t first_used_gen) = 0;
+
virtual std::vector<Neighbor> find_top_k(uint32_t k,
vespalib::tensor::TypedCells vector,
uint32_t explore_k) const = 0;
+
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index 89b8e77e136..0b9628e6872 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -71,7 +71,6 @@ TensorAttribute::TensorAttribute(vespalib::stringref name, const Config &cfg, Te
{
}
-
TensorAttribute::~TensorAttribute() = default;
const ITensorAttribute *
@@ -93,7 +92,6 @@ TensorAttribute::clearDoc(DocId docId)
return 0u;
}
-
void
TensorAttribute::onCommit()
{
@@ -110,7 +108,6 @@ TensorAttribute::onCommit()
}
}
-
void
TensorAttribute::onUpdateStat()
{
@@ -126,7 +123,6 @@ TensorAttribute::onUpdateStat()
total.allocatedBytesOnHold());
}
-
void
TensorAttribute::removeOldGenerations(generation_t firstUsed)
{
@@ -141,7 +137,6 @@ TensorAttribute::onGenerationChange(generation_t generation)
_tensorStore.transferHoldLists(generation - 1);
}
-
bool
TensorAttribute::addDoc(DocId &docId)
{
@@ -209,7 +204,6 @@ TensorAttribute::clearDocs(DocId lidLow, DocId lidLimit)
}
}
-
void
TensorAttribute::onShrinkLidSpace()
{
@@ -220,14 +214,12 @@ TensorAttribute::onShrinkLidSpace()
setNumDocs(committedDocIdLimit);
}
-
uint32_t
TensorAttribute::getVersion() const
{
return TENSOR_ATTRIBUTE_VERSION;
}
-
TensorAttribute::RefCopyVector
TensorAttribute::getRefCopy() const
{