diff options
6 files changed, 146 insertions, 99 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index e3028773b75..663c760fc44 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -104,6 +104,7 @@ vespa_add_library(searchlib_attribute OBJECT postinglisttraits.cpp postingstore.cpp predicate_attribute.cpp + raw_buffer_store.cpp raw_buffer_type_mapper.cpp raw_multi_value_read_view.cpp readerbase.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.cpp b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.cpp new file mode 100644 index 00000000000..74894728ff4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.cpp @@ -0,0 +1,69 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "raw_buffer_store.h" +#include <vespa/vespalib/datastore/array_store.hpp> +#include <cassert> + +using vespalib::alloc::MemoryAllocator; +using vespalib::datastore::EntryRef; + +namespace { + +constexpr float ALLOC_GROW_FACTOR = 0.2; + +} + +namespace search::attribute { + +RawBufferStore::RawBufferStore(std::shared_ptr<vespalib::alloc::MemoryAllocator> allocator, uint32_t max_small_buffer_type_id, double grow_factor) + : _array_store(ArrayStoreType::optimizedConfigForHugePage(max_small_buffer_type_id, + RawBufferTypeMapper(max_small_buffer_type_id, grow_factor), + MemoryAllocator::HUGEPAGE_SIZE, + MemoryAllocator::PAGE_SIZE, + 8_Ki, ALLOC_GROW_FACTOR), + std::move(allocator), RawBufferTypeMapper(max_small_buffer_type_id, grow_factor)) +{ +} + +RawBufferStore::~RawBufferStore() = default; + +vespalib::ConstArrayRef<char> +RawBufferStore::get(EntryRef ref) const +{ + auto array = _array_store.get(ref); + uint32_t size = 0; + assert(array.size() >= sizeof(size)); + memcpy(&size, array.data(), sizeof(size)); + assert(array.size() >= sizeof(size) + size); + return {array.data() + sizeof(size), size}; +} + +EntryRef +RawBufferStore::set(vespalib::ConstArrayRef<char> raw) +{ + uint32_t size = raw.size(); + if (size == 0) { + return EntryRef(); + } + size_t buffer_size = raw.size() + sizeof(size); + auto& mapper = _array_store.get_mapper(); + auto type_id = mapper.get_type_id(buffer_size); + auto array_size = (type_id != 0) ? mapper.get_array_size(type_id) : buffer_size; + assert(array_size >= buffer_size); + auto ref = _array_store.allocate(array_size); + auto buf = _array_store.get_writable(ref); + memcpy(buf.data(), &size, sizeof(size)); + memcpy(buf.data() + sizeof(size), raw.data(), size); + if (array_size > buffer_size) { + memset(buf.data() + buffer_size, 0, array_size - buffer_size); + } + return ref; +} + +std::unique_ptr<vespalib::datastore::ICompactionContext> +RawBufferStore::start_compact(const vespalib::datastore::CompactionStrategy& compaction_strategy) +{ + return _array_store.compact_worst(compaction_strategy); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h new file mode 100644 index 00000000000..60132c70852 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/raw_buffer_store.h @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/array_store.h> +#include "raw_buffer_type_mapper.h" + +namespace search::attribute { + +/** + * Class handling storage of raw values in an array store. A stored entry + * starts with 4 bytes that contains the size of the raw value. + */ +class RawBufferStore +{ + using EntryRef = vespalib::datastore::EntryRef; + using RefType = vespalib::datastore::EntryRefT<19>; + using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType, RawBufferTypeMapper>; + using generation_t = vespalib::GenerationHandler::generation_t; + + ArrayStoreType _array_store; +public: + RawBufferStore(std::shared_ptr<vespalib::alloc::MemoryAllocator> allocator, uint32_t max_small_buffer_type_id, double grow_factor); + ~RawBufferStore(); + EntryRef set(vespalib::ConstArrayRef<char> raw); + vespalib::ConstArrayRef<char> get(EntryRef ref) const; + void remove(EntryRef ref) { _array_store.remove(ref); } + vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy& compaction_strategy) { return _array_store.update_stat(compaction_strategy); } + bool consider_compact() const noexcept { return _array_store.consider_compact(); } + std::unique_ptr<vespalib::datastore::ICompactionContext> start_compact(const vespalib::datastore::CompactionStrategy& compaction_strategy); + void reclaim_memory(generation_t oldest_used_gen) { _array_store.reclaim_memory(oldest_used_gen); } + void assign_generation(generation_t current_gen) { _array_store.assign_generation(current_gen); } +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp index a37400ed88e..9746929c666 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.cpp @@ -9,8 +9,6 @@ using vespalib::datastore::EntryRef; namespace { -constexpr float ALLOC_GROW_FACTOR = 0.2; - constexpr double mapper_grow_factor = 1.03; constexpr uint32_t max_small_buffer_type_id = 500u; @@ -22,12 +20,7 @@ namespace search::attribute { SingleRawAttribute::SingleRawAttribute(const vespalib::string& name, const Config& config) : NotImplementedAttribute(name, config), _ref_vector(config.getGrowStrategy(), getGenerationHolder()), - _array_store(ArrayStoreType::optimizedConfigForHugePage(max_small_buffer_type_id, - RawBufferTypeMapper(max_small_buffer_type_id, mapper_grow_factor), - MemoryAllocator::HUGEPAGE_SIZE, - MemoryAllocator::PAGE_SIZE, - 8_Ki, ALLOC_GROW_FACTOR), - get_memory_allocator(), RawBufferTypeMapper(max_small_buffer_type_id, mapper_grow_factor)) + _raw_store(get_memory_allocator(), max_small_buffer_type_id, mapper_grow_factor) { } @@ -39,7 +32,7 @@ SingleRawAttribute::~SingleRawAttribute() void SingleRawAttribute::reclaim_memory(generation_t oldest_used_gen) { - _array_store.reclaim_memory(oldest_used_gen); + _raw_store.reclaim_memory(oldest_used_gen); getGenerationHolder().reclaim(oldest_used_gen); } @@ -47,7 +40,7 @@ void SingleRawAttribute::before_inc_generation(generation_t current_gen) { getGenerationHolder().assign_generation(current_gen); - _array_store.assign_generation(current_gen); + _raw_store.assign_generation(current_gen); } bool @@ -70,8 +63,8 @@ void SingleRawAttribute::onCommit() { incGeneration(); - if (_array_store.consider_compact()) { - auto context = _array_store.compact_worst(getConfig().getCompactionStrategy()); + if (_raw_store.consider_compact()) { + auto context = _raw_store.start_compact(getConfig().getCompactionStrategy()); if (context) { context->compact(vespalib::ArrayRef<AtomicEntryRef>(&_ref_vector[0], _ref_vector.size())); } @@ -96,23 +89,12 @@ vespalib::MemoryUsage SingleRawAttribute::update_stat() { vespalib::MemoryUsage result = _ref_vector.getMemoryUsage(); - result.merge(_array_store.update_stat(getConfig().getCompactionStrategy())); + result.merge(_raw_store.update_stat(getConfig().getCompactionStrategy())); result.mergeGenerationHeldBytes(getGenerationHolder().get_held_bytes()); return result; } vespalib::ConstArrayRef<char> -SingleRawAttribute::get_raw(EntryRef ref) const -{ - auto array = _array_store.get(ref); - uint32_t size = 0; - assert(array.size() >= sizeof(size)); - memcpy(&size, array.data(), sizeof(size)); - assert(array.size() >= sizeof(size) + size); - return {array.data() + sizeof(size), size}; -} - -vespalib::ConstArrayRef<char> SingleRawAttribute::get_raw(DocId docid) const { EntryRef ref; @@ -122,42 +104,20 @@ SingleRawAttribute::get_raw(DocId docid) const if (!ref.valid()) { return {}; } - return get_raw(ref); -} - -EntryRef -SingleRawAttribute::set_raw(vespalib::ConstArrayRef<char> raw) -{ - uint32_t size = raw.size(); - if (size == 0) { - return EntryRef(); - } - size_t buffer_size = raw.size() + sizeof(size); - auto& mapper = _array_store.get_mapper(); - auto type_id = mapper.get_type_id(buffer_size); - auto array_size = (type_id != 0) ? mapper.get_array_size(type_id) : buffer_size; - assert(array_size >= buffer_size); - auto ref = _array_store.allocate(array_size); - auto buf = _array_store.get_writable(ref); - memcpy(buf.data(), &size, sizeof(size)); - memcpy(buf.data() + sizeof(size), raw.data(), size); - if (array_size > buffer_size) { - memset(buf.data() + buffer_size, 0, array_size - buffer_size); - } - return ref; + return _raw_store.get(ref); } void SingleRawAttribute::set_raw(DocId docid, vespalib::ConstArrayRef<char> raw) { - auto ref = set_raw(raw); + auto ref = _raw_store.set(raw); assert(docid < _ref_vector.size()); updateUncommittedDocIdLimit(docid); auto& elem_ref = _ref_vector[docid]; EntryRef old_ref(elem_ref.load_relaxed()); elem_ref.store_release(ref); if (old_ref.valid()) { - _array_store.remove(old_ref); + _raw_store.remove(old_ref); } } @@ -169,7 +129,7 @@ SingleRawAttribute::clearDoc(DocId docId) EntryRef old_ref(elem_ref.load_relaxed()); elem_ref.store_relaxed(EntryRef()); if (old_ref.valid()) { - _array_store.remove(old_ref); + _raw_store.remove(old_ref); return 1u; } return 0u; diff --git a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h index 876acc9ad58..d7ea321a3d4 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/single_raw_attribute.h @@ -3,8 +3,7 @@ #pragma once #include "not_implemented_attribute.h" -#include "raw_buffer_type_mapper.h" -#include <vespa/vespalib/datastore/array_store.h> +#include "raw_buffer_store.h" #include <vespa/vespalib/util/rcuvector.h> namespace search::attribute { @@ -17,17 +16,12 @@ class SingleRawAttribute : public NotImplementedAttribute using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; using EntryRef = vespalib::datastore::EntryRef; using RefVector = vespalib::RcuVectorBase<AtomicEntryRef>; - using RefType = vespalib::datastore::EntryRefT<19>; - using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType, RawBufferTypeMapper>; RefVector _ref_vector; - ArrayStoreType _array_store; - vespalib::datastore::CompactionSpec _compaction_spec; + RawBufferStore _raw_store; vespalib::MemoryUsage update_stat(); EntryRef acquire_entry_ref(DocId docid) const noexcept { return _ref_vector.acquire_elem_ref(docid).load_acquire(); } - EntryRef set_raw(vespalib::ConstArrayRef<char> raw); - vespalib::ConstArrayRef<char> get_raw(EntryRef ref) const; public: SingleRawAttribute(const vespalib::string& name, const Config& config); ~SingleRawAttribute() override; diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp index 4d40e93477f..9e9196dbee7 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp @@ -12,6 +12,7 @@ #include <vespa/document/base/exceptions.h> #include <vespa/vespalib/stllike/asciistream.h> #include <sstream> +#include <optional> #include <vespa/log/log.h> LOG_SETUP(".visitoroperation"); @@ -88,7 +89,7 @@ VisitorOperation::VisitorOperation( { const std::vector<document::BucketId>& buckets = m->getBuckets(); - if (buckets.size() > 0) { + if (!buckets.empty()) { _superBucket = SuperBucketInfo(buckets[0]); } @@ -114,12 +115,10 @@ VisitorOperation::getLastBucketVisited() LOG(spam, "getLastBucketVisited(): Sub bucket count: %zu", _superBucket.subBucketsVisitOrder.size()); - for (uint32_t i=0; i<_superBucket.subBucketsVisitOrder.size(); i++) { - auto found = _superBucket.subBuckets.find(_superBucket.subBucketsVisitOrder[i]); + for (const auto& sub_bucket : _superBucket.subBucketsVisitOrder) { + auto found = _superBucket.subBuckets.find(sub_bucket); assert(found != _superBucket.subBuckets.end()); - LOG(spam, "%s => %s", - found->first.toString().c_str(), - found->second.toString().c_str()); + LOG(spam, "%s => %s", found->first.toString().c_str(), found->second.toString().c_str()); if (found->second.done) { foundDone = true; @@ -151,9 +150,7 @@ VisitorOperation::timeLeft() const noexcept { const auto elapsed = _operationTimer.getElapsedTime(); - - LOG(spam, - "Checking if visitor has timed out: elapsed=%" PRId64 " ms, timeout=%" PRId64 " ms", + LOG(spam, "Checking if visitor has timed out: elapsed=%" PRId64 " ms, timeout=%" PRId64 " ms", vespalib::count_ms(elapsed), vespalib::count_ms(_msg->getTimeout())); @@ -168,7 +165,7 @@ void VisitorOperation::markCompleted(const document::BucketId& bid, const api::ReturnCode& code) { - VisitBucketMap::iterator found = _superBucket.subBuckets.find(bid); + auto found = _superBucket.subBuckets.find(bid); assert(found != _superBucket.subBuckets.end()); BucketInfo& info = found->second; @@ -196,11 +193,11 @@ VisitorOperation::onReceive( DistributorStripeMessageSender& sender, const api::StorageReply::SP& r) { - api::CreateVisitorReply& reply = static_cast<api::CreateVisitorReply&>(*r); + auto& reply = dynamic_cast<api::CreateVisitorReply&>(*r); _trace.add(reply.steal_trace()); - SentMessagesMap::iterator iter = _sentMessages.find(reply.getMsgId()); + auto iter = _sentMessages.find(reply.getMsgId()); assert(iter != _sentMessages.end()); api::CreateVisitorCommand& storageVisitor = *iter->second; @@ -223,9 +220,8 @@ VisitorOperation::onReceive( } // else: will lose code for non-critical events, degenerates to "not found". - for (uint32_t i = 0; i < storageVisitor.getBuckets().size(); i++) { - const document::BucketId& bid(storageVisitor.getBuckets()[i]); - markCompleted(bid, result); + for (const auto& bucket : storageVisitor.getBuckets()) { + markCompleted(bucket, result); } _sentMessages.erase(iter); @@ -234,15 +230,14 @@ VisitorOperation::onReceive( namespace { -class VisitorVerificationException -{ +class VisitorVerificationException { public: VisitorVerificationException(api::ReturnCode::Result result, vespalib::stringref message) : _code(result, message) {} - const api::ReturnCode& getReturnCode() const { + const api::ReturnCode& getReturnCode() const noexcept { return _code; } @@ -438,10 +433,11 @@ namespace { struct NextEntryFinder : public BucketDatabase::EntryProcessor { bool _first; document::BucketId _last; - std::unique_ptr<document::BucketId> _next; + std::optional<document::BucketId> _next; - NextEntryFinder(const document::BucketId& id) - : _first(true), _last(id), _next() {} + explicit NextEntryFinder(const document::BucketId& id) noexcept + : _first(true), _last(id), _next() + {} bool process(const BucketDatabase::ConstEntryRef& e) override { document::BucketId bucket(e.getBucketId()); @@ -450,27 +446,26 @@ struct NextEntryFinder : public BucketDatabase::EntryProcessor { _first = false; return true; } else { - _next.reset(new document::BucketId(bucket)); + _next.emplace(bucket); return false; } } }; -std::unique_ptr<document::BucketId> -getBucketIdAndLast( - BucketDatabase& database, - const document::BucketId& super, - const document::BucketId& last) +std::optional<document::BucketId> +getBucketIdAndLast(BucketDatabase& database, + const document::BucketId& super, + const document::BucketId& last) { if (!super.contains(last)) { NextEntryFinder proc(super); database.forEach(proc, super); - return std::move(proc._next); + return proc._next; } else { NextEntryFinder proc(last); database.forEach(proc, last); - return std::move(proc._next); + return proc._next; } } @@ -481,12 +476,12 @@ VisitorOperation::expandBucketContained() { uint32_t maxBuckets = _msg->getMaxBucketsPerVisitor(); - std::unique_ptr<document::BucketId> bid = getBucketIdAndLast( + std::optional<document::BucketId> bid = getBucketIdAndLast( _bucketSpace.getBucketDatabase(), _superBucket.bid, _lastBucket); - while (bid.get() && _superBucket.subBuckets.size() < maxBuckets) { + while (bid.has_value() && _superBucket.subBuckets.size() < maxBuckets) { if (!_superBucket.bid.contains(*bid)) { LOG(spam, "Iterating: Found bucket %s is not contained in bucket %s", @@ -502,7 +497,7 @@ VisitorOperation::expandBucketContained() bid = getBucketIdAndLast(_bucketSpace.getBucketDatabase(), _superBucket.bid, *bid); } - bool doneExpand = (!bid.get() || !_superBucket.bid.contains(*bid)); + bool doneExpand = (!bid.has_value() || !_superBucket.bid.contains(*bid)); return doneExpand; } @@ -541,15 +536,8 @@ VisitorOperation::expandBucket() namespace { -bool -alreadyTried(const std::vector<uint16_t>& triedNodes, uint16_t node) -{ - for (uint32_t j = 0; j < triedNodes.size(); j++) { - if (triedNodes[j] == node) { - return true; - } - } - return false; +[[nodiscard]] bool alreadyTried(const std::vector<uint16_t>& triedNodes, uint16_t node) noexcept { + return std::find(triedNodes.begin(), triedNodes.end(), node) != triedNodes.end(); } int |