diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-10-14 14:18:00 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-10-14 14:18:00 +0200 |
commit | c41b01a5525e138f64317c185e3604c6249a70c7 (patch) | |
tree | 25e2764a9334fbf2609dbc7f36dc51f38fd26101 /searchlib | |
parent | 903bb98de0de610e155d9cc318b52ef89126489e (diff) |
Skip reclaim_labels on original buffer when it has been copied as part of compaction
or fallback copy (due to datastore buffer fallback resize).
Diffstat (limited to 'searchlib')
6 files changed, 50 insertions, 27 deletions
diff --git a/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp b/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp index 55598a1b11f..bda229f8074 100644 --- a/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp +++ b/searchlib/src/tests/tensor/tensor_buffer_operations/tensor_buffer_operations_test.cpp @@ -137,10 +137,9 @@ TensorBufferOperationsTest::assert_store_copy_load(const TensorSpec& tensor_spec { auto buf = store_tensor(tensor_spec); auto buf2 = buf; - _ops.copied_labels(buf2); - EXPECT_EQ(buf, buf2); - _ops.reclaim_labels(buf); + _ops.copied_labels(buf); EXPECT_NE(buf, buf2); + _ops.reclaim_labels(buf); buf.clear(); auto loaded_spec = load_tensor_spec(buf2); _ops.reclaim_labels(buf2); diff --git a/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp index cdd4d35c1df..cb074348f08 100644 --- a/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp +++ b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp @@ -56,7 +56,7 @@ LargeSubspacesBufferType::fallbackCopy(void *newBuffer, const void *oldBuffer, E auto& old_elem = old_elems[i]; new (new_elems + i) ArrayType(old_elem); if (!old_elem.empty()) { - _ops.copied_labels({old_elem.data(), old_elem.size()}); + _ops.copied_labels(unconstify(vespalib::ConstArrayRef<char>(old_elem.data(), old_elem.size()))); } } } diff --git a/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp index adbd3dee2b7..ba27f017022 100644 --- a/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp +++ b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp @@ -46,7 +46,7 @@ SmallSubspacesBufferType::fallbackCopy(void *newBuffer, const void *oldBuffer, E memcpy(newBuffer, oldBuffer, numElems); const char *elem = static_cast<const char *>(oldBuffer); while (numElems >= getArraySize()) { - _ops.copied_labels(vespalib::ConstArrayRef<char>(elem, getArraySize())); + _ops.copied_labels(unconstify(vespalib::ConstArrayRef<char>(elem, getArraySize()))); elem += getArraySize(); numElems -= getArraySize(); } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp index 22298354444..b496acd19b8 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp @@ -6,7 +6,7 @@ #include <vespa/eval/eval/value_codec.h> #include <vespa/eval/eval/value_type.h> #include <vespa/eval/streamed/streamed_value_view.h> -#include <vespa/vespalib/util/arrayref.h> +#include <vespa/vespalib/util/atomic.h> #include <vespa/vespalib/util/shared_string_repo.h> #include <algorithm> @@ -85,16 +85,29 @@ TensorBufferOperations::TensorBufferOperations(const vespalib::eval::ValueType& TensorBufferOperations::~TensorBufferOperations() = default; uint32_t -TensorBufferOperations::get_num_subspaces(ConstArrayRef<char> buf) const noexcept +TensorBufferOperations::get_num_subspaces_and_flag(ConstArrayRef<char> buf) const noexcept { assert(buf.size() >= get_num_subspaces_size()); - return *reinterpret_cast<const uint32_t*>(buf.data()); + const uint32_t& num_subspaces_and_flag_ref = *reinterpret_cast<const uint32_t*>(buf.data()); + return vespalib::atomic::load_ref_relaxed(num_subspaces_and_flag_ref); +} + +uint32_t +TensorBufferOperations::get_num_subspaces_and_flag_and_set_flag(ArrayRef<char> buf) const noexcept +{ + uint32_t& num_subspaces_and_flag_ref = *reinterpret_cast<uint32_t*>(buf.data()); + auto num_subspaces_and_flag = vespalib::atomic::load_ref_relaxed(num_subspaces_and_flag_ref); + if (!get_skip_reclaim_labels(num_subspaces_and_flag)) { + vespalib::atomic::store_ref_relaxed(num_subspaces_and_flag_ref, (num_subspaces_and_flag | skip_reclaim_labels_mask)); + } + return num_subspaces_and_flag; } void TensorBufferOperations::store_tensor(ArrayRef<char> buf, const vespalib::eval::Value& tensor) { uint32_t num_subspaces = tensor.index().size(); + assert(num_subspaces <= num_subspaces_mask); auto labels_end_offset = get_labels_offset() + get_labels_mem_size(num_subspaces); auto cells_size = num_subspaces * _dense_subspace_size; auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes @@ -148,23 +161,22 @@ TensorBufferOperations::make_fast_view(ConstArrayRef<char> buf, const vespalib:: } void -TensorBufferOperations::copied_labels(ConstArrayRef<char> buf) const +TensorBufferOperations::copied_labels(ArrayRef<char> buf) const { - auto num_subspaces = get_num_subspaces(buf); - ConstArrayRef<string_id> labels(reinterpret_cast<const string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); - for (auto& label : labels) { - SharedStringRepo::unsafe_copy(label); // Source buffer has an existing ref - } + (void) get_num_subspaces_and_flag_and_set_flag(buf); } void TensorBufferOperations::reclaim_labels(ArrayRef<char> buf) const { - auto num_subspaces = get_num_subspaces(buf); + auto num_subspaces_and_flag = get_num_subspaces_and_flag_and_set_flag(buf); + if (get_skip_reclaim_labels(num_subspaces_and_flag)) { + return; + } + auto num_subspaces = get_num_subspaces(num_subspaces_and_flag); ArrayRef<string_id> labels(reinterpret_cast<string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); for (auto& label : labels) { SharedStringRepo::unsafe_reclaim(label); - label = string_id(); // Clear label to avoid double reclaim } } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h index 18c7bc84ab2..451770614ba 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h @@ -4,15 +4,12 @@ #include <vespa/eval/eval/cell_type.h> #include <vespa/vespalib/datastore/aligner.h> +#include <vespa/vespalib/util/arrayref.h> #include <vespa/vespalib/util/string_id.h> #include <cstddef> #include <memory> -namespace vespalib { -template <typename T> class ArrayRef; -template <typename T> class ConstArrayRef; -class nbostream; -} +namespace vespalib { class nbostream; } namespace vespalib::eval { struct Value; @@ -27,7 +24,10 @@ namespace search::tensor { * * Layout of buffer is: * - * num_subspaces - number of subspaces + * num_subspaces_and_flag + * 31 least significant bits is num_subspaces - number of subspaces + * 1 bit to signal that reclaim_labels should be a noop (i.e. buffer has been copied + * as part of compaction or fallback copy (due to datastore buffer fallback resize)). * labels[num_subspaces * _num_mapped_dimensions] - array of labels for sparse dimensions * padding - to align start of cells * cells[num_subspaces * _dense_subspaces_size] - array of tensor cell values @@ -50,6 +50,8 @@ class TensorBufferOperations static constexpr size_t CELLS_ALIGNMENT = 16; static constexpr size_t CELLS_ALIGNMENT_MEM_SIZE_MIN = 32; + static constexpr uint32_t num_subspaces_mask = ((1u << 31) - 1); + static constexpr uint32_t skip_reclaim_labels_mask = (1u << 31); static constexpr size_t get_num_subspaces_size() noexcept { return sizeof(uint32_t); } static constexpr size_t get_labels_offset() noexcept { return get_num_subspaces_size(); } @@ -65,7 +67,17 @@ class TensorBufferOperations size_t get_cells_offset(uint32_t num_subspaces, auto aligner) const noexcept { return aligner.align(get_labels_offset() + get_labels_mem_size(num_subspaces)); } - uint32_t get_num_subspaces(vespalib::ConstArrayRef<char> buf) const noexcept; + uint32_t get_num_subspaces_and_flag(vespalib::ConstArrayRef<char> buf) const noexcept; + uint32_t get_num_subspaces_and_flag_and_set_flag(vespalib::ArrayRef<char> buf) const noexcept; + static uint32_t get_num_subspaces(uint32_t num_subspaces_and_flag) noexcept { + return num_subspaces_and_flag & num_subspaces_mask; + } + static bool get_skip_reclaim_labels(uint32_t num_subspaces_and_flag) noexcept { + return (num_subspaces_and_flag & skip_reclaim_labels_mask) != 0; + } + uint32_t get_num_subspaces(vespalib::ConstArrayRef<char> buf) const noexcept { + return get_num_subspaces(get_num_subspaces_and_flag(buf)); + } public: size_t get_array_size(uint32_t num_subspaces) const noexcept { auto cells_mem_size = get_cells_mem_size(num_subspaces); @@ -81,9 +93,9 @@ public: void store_tensor(vespalib::ArrayRef<char> buf, const vespalib::eval::Value& tensor); std::unique_ptr<vespalib::eval::Value> make_fast_view(vespalib::ConstArrayRef<char> buf, const vespalib::eval::ValueType& tensor_type) const; - // Increase reference counts for labels after copying tensor buffer - void copied_labels(vespalib::ConstArrayRef<char> buf) const; - // Decrease reference counts for labels and invalidate them + // Mark that reclaim_labels should be skipped for old buffer after copying tensor buffer + void copied_labels(vespalib::ArrayRef<char> buf) const; + // Decrease reference counts for labels and set skip flag unless skip flag is set. void reclaim_labels(vespalib::ArrayRef<char> buf) const; // Serialize stored tensor to target (used when saving attribute) void encode_stored_tensor(vespalib::ConstArrayRef<char> buf, const vespalib::eval::ValueType& type, vespalib::nbostream& target) const; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp index 6b96a91ec1c..ff7ad781d75 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp @@ -55,7 +55,7 @@ TensorBufferStore::move_on_compact(EntryRef ref) } auto buf = _array_store.get(ref); auto new_ref = _array_store.add(buf); - _ops.copied_labels(buf); + _ops.copied_labels(unconstify(buf)); return new_ref; } |