summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-10-14 14:18:00 +0200
committerTor Egge <Tor.Egge@online.no>2022-10-14 14:18:00 +0200
commitc41b01a5525e138f64317c185e3604c6249a70c7 (patch)
tree25e2764a9334fbf2609dbc7f36dc51f38fd26101 /searchlib
parent903bb98de0de610e155d9cc318b52ef89126489e (diff)
Skip reclaim_labels on original buffer when it has been copied as part of compaction
or fallback copy (due to datastore buffer fallback resize).
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h32
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp2
6 files changed, 50 insertions, 27 deletions
diff --git a/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp b/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp
index 55598a1b11f..bda229f8074 100644
--- a/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp
+++ b/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp
@@ -137,10 +137,9 @@ TensorBufferOperationsTest::assert_store_copy_load(const TensorSpec& tensor_spec
{
auto buf = store_tensor(tensor_spec);
auto buf2 = buf;
- _ops.copied_labels(buf2);
- EXPECT_EQ(buf, buf2);
- _ops.reclaim_labels(buf);
+ _ops.copied_labels(buf);
EXPECT_NE(buf, buf2);
+ _ops.reclaim_labels(buf);
buf.clear();
auto loaded_spec = load_tensor_spec(buf2);
_ops.reclaim_labels(buf2);
diff --git a/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp
index cdd4d35c1df..cb074348f08 100644
--- a/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp
@@ -56,7 +56,7 @@ LargeSubspacesBufferType::fallbackCopy(void *newBuffer, const void *oldBuffer, E
auto& old_elem = old_elems[i];
new (new_elems + i) ArrayType(old_elem);
if (!old_elem.empty()) {
- _ops.copied_labels({old_elem.data(), old_elem.size()});
+ _ops.copied_labels(unconstify(vespalib::ConstArrayRef<char>(old_elem.data(), old_elem.size())));
}
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp
index adbd3dee2b7..ba27f017022 100644
--- a/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp
@@ -46,7 +46,7 @@ SmallSubspacesBufferType::fallbackCopy(void *newBuffer, const void *oldBuffer, E
memcpy(newBuffer, oldBuffer, numElems);
const char *elem = static_cast<const char *>(oldBuffer);
while (numElems >= getArraySize()) {
- _ops.copied_labels(vespalib::ConstArrayRef<char>(elem, getArraySize()));
+ _ops.copied_labels(unconstify(vespalib::ConstArrayRef<char>(elem, getArraySize())));
elem += getArraySize();
numElems -= getArraySize();
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp
index 22298354444..b496acd19b8 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp
@@ -6,7 +6,7 @@
#include <vespa/eval/eval/value_codec.h>
#include <vespa/eval/eval/value_type.h>
#include <vespa/eval/streamed/streamed_value_view.h>
-#include <vespa/vespalib/util/arrayref.h>
+#include <vespa/vespalib/util/atomic.h>
#include <vespa/vespalib/util/shared_string_repo.h>
#include <algorithm>
@@ -85,16 +85,29 @@ TensorBufferOperations::TensorBufferOperations(const vespalib::eval::ValueType&
TensorBufferOperations::~TensorBufferOperations() = default;
uint32_t
-TensorBufferOperations::get_num_subspaces(ConstArrayRef<char> buf) const noexcept
+TensorBufferOperations::get_num_subspaces_and_flag(ConstArrayRef<char> buf) const noexcept
{
assert(buf.size() >= get_num_subspaces_size());
- return *reinterpret_cast<const uint32_t*>(buf.data());
+ const uint32_t& num_subspaces_and_flag_ref = *reinterpret_cast<const uint32_t*>(buf.data());
+ return vespalib::atomic::load_ref_relaxed(num_subspaces_and_flag_ref);
+}
+
+uint32_t
+TensorBufferOperations::get_num_subspaces_and_flag_and_set_flag(ArrayRef<char> buf) const noexcept
+{
+ uint32_t& num_subspaces_and_flag_ref = *reinterpret_cast<uint32_t*>(buf.data());
+ auto num_subspaces_and_flag = vespalib::atomic::load_ref_relaxed(num_subspaces_and_flag_ref);
+ if (!get_skip_reclaim_labels(num_subspaces_and_flag)) {
+ vespalib::atomic::store_ref_relaxed(num_subspaces_and_flag_ref, (num_subspaces_and_flag | skip_reclaim_labels_mask));
+ }
+ return num_subspaces_and_flag;
}
void
TensorBufferOperations::store_tensor(ArrayRef<char> buf, const vespalib::eval::Value& tensor)
{
uint32_t num_subspaces = tensor.index().size();
+ assert(num_subspaces <= num_subspaces_mask);
auto labels_end_offset = get_labels_offset() + get_labels_mem_size(num_subspaces);
auto cells_size = num_subspaces * _dense_subspace_size;
auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes
@@ -148,23 +161,22 @@ TensorBufferOperations::make_fast_view(ConstArrayRef<char> buf, const vespalib::
}
void
-TensorBufferOperations::copied_labels(ConstArrayRef<char> buf) const
+TensorBufferOperations::copied_labels(ArrayRef<char> buf) const
{
- auto num_subspaces = get_num_subspaces(buf);
- ConstArrayRef<string_id> labels(reinterpret_cast<const string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions);
- for (auto& label : labels) {
- SharedStringRepo::unsafe_copy(label); // Source buffer has an existing ref
- }
+ (void) get_num_subspaces_and_flag_and_set_flag(buf);
}
void
TensorBufferOperations::reclaim_labels(ArrayRef<char> buf) const
{
- auto num_subspaces = get_num_subspaces(buf);
+ auto num_subspaces_and_flag = get_num_subspaces_and_flag_and_set_flag(buf);
+ if (get_skip_reclaim_labels(num_subspaces_and_flag)) {
+ return;
+ }
+ auto num_subspaces = get_num_subspaces(num_subspaces_and_flag);
ArrayRef<string_id> labels(reinterpret_cast<string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions);
for (auto& label : labels) {
SharedStringRepo::unsafe_reclaim(label);
- label = string_id(); // Clear label to avoid double reclaim
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
index 18c7bc84ab2..451770614ba 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
@@ -4,15 +4,12 @@
#include <vespa/eval/eval/cell_type.h>
#include <vespa/vespalib/datastore/aligner.h>
+#include <vespa/vespalib/util/arrayref.h>
#include <vespa/vespalib/util/string_id.h>
#include <cstddef>
#include <memory>
-namespace vespalib {
-template <typename T> class ArrayRef;
-template <typename T> class ConstArrayRef;
-class nbostream;
-}
+namespace vespalib { class nbostream; }
namespace vespalib::eval {
struct Value;
@@ -27,7 +24,10 @@ namespace search::tensor {
*
* Layout of buffer is:
*
- * num_subspaces - number of subspaces
+ * num_subspaces_and_flag
+ * 31 least significant bits is num_subspaces - number of subspaces
+ * 1 bit to signal that reclaim_labels should be a noop (i.e. buffer has been copied
+ * as part of compaction or fallback copy (due to datastore buffer fallback resize)).
* labels[num_subspaces * _num_mapped_dimensions] - array of labels for sparse dimensions
* padding - to align start of cells
* cells[num_subspaces * _dense_subspaces_size] - array of tensor cell values
@@ -50,6 +50,8 @@ class TensorBufferOperations
static constexpr size_t CELLS_ALIGNMENT = 16;
static constexpr size_t CELLS_ALIGNMENT_MEM_SIZE_MIN = 32;
+ static constexpr uint32_t num_subspaces_mask = ((1u << 31) - 1);
+ static constexpr uint32_t skip_reclaim_labels_mask = (1u << 31);
static constexpr size_t get_num_subspaces_size() noexcept { return sizeof(uint32_t); }
static constexpr size_t get_labels_offset() noexcept { return get_num_subspaces_size(); }
@@ -65,7 +67,17 @@ class TensorBufferOperations
size_t get_cells_offset(uint32_t num_subspaces, auto aligner) const noexcept {
return aligner.align(get_labels_offset() + get_labels_mem_size(num_subspaces));
}
- uint32_t get_num_subspaces(vespalib::ConstArrayRef<char> buf) const noexcept;
+ uint32_t get_num_subspaces_and_flag(vespalib::ConstArrayRef<char> buf) const noexcept;
+ uint32_t get_num_subspaces_and_flag_and_set_flag(vespalib::ArrayRef<char> buf) const noexcept;
+ static uint32_t get_num_subspaces(uint32_t num_subspaces_and_flag) noexcept {
+ return num_subspaces_and_flag & num_subspaces_mask;
+ }
+ static bool get_skip_reclaim_labels(uint32_t num_subspaces_and_flag) noexcept {
+ return (num_subspaces_and_flag & skip_reclaim_labels_mask) != 0;
+ }
+ uint32_t get_num_subspaces(vespalib::ConstArrayRef<char> buf) const noexcept {
+ return get_num_subspaces(get_num_subspaces_and_flag(buf));
+ }
public:
size_t get_array_size(uint32_t num_subspaces) const noexcept {
auto cells_mem_size = get_cells_mem_size(num_subspaces);
@@ -81,9 +93,9 @@ public:
void store_tensor(vespalib::ArrayRef<char> buf, const vespalib::eval::Value& tensor);
std::unique_ptr<vespalib::eval::Value> make_fast_view(vespalib::ConstArrayRef<char> buf, const vespalib::eval::ValueType& tensor_type) const;
- // Increase reference counts for labels after copying tensor buffer
- void copied_labels(vespalib::ConstArrayRef<char> buf) const;
- // Decrease reference counts for labels and invalidate them
+ // Mark that reclaim_labels should be skipped for old buffer after copying tensor buffer
+ void copied_labels(vespalib::ArrayRef<char> buf) const;
+ // Decrease reference counts for labels and set skip flag unless skip flag is set.
void reclaim_labels(vespalib::ArrayRef<char> buf) const;
// Serialize stored tensor to target (used when saving attribute)
void encode_stored_tensor(vespalib::ConstArrayRef<char> buf, const vespalib::eval::ValueType& type, vespalib::nbostream& target) const;
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp
index 6b96a91ec1c..ff7ad781d75 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp
@@ -55,7 +55,7 @@ TensorBufferStore::move_on_compact(EntryRef ref)
}
auto buf = _array_store.get(ref);
auto new_ref = _array_store.add(buf);
- _ops.copied_labels(buf);
+ _ops.copied_labels(unconstify(buf));
return new_ref;
}