From 158e69f96f5aced5655048ef97182ce535ad4190 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 12 Feb 2024 17:23:20 +0100 Subject: Add extra logging when saving predicate attribute. --- .../src/tests/predicate/predicate_index_test.cpp | 6 ++++-- searchlib/src/tests/predicate/simple_index_test.cpp | 3 ++- .../searchlib/attribute/predicate_attribute.cpp | 20 +++++++++++++++++++- .../vespa/searchlib/predicate/predicate_index.cpp | 9 ++++++--- .../src/vespa/searchlib/predicate/predicate_index.h | 15 ++++++++++++++- .../src/vespa/searchlib/predicate/simple_index.h | 13 ++++++++++++- .../src/vespa/searchlib/predicate/simple_index.hpp | 9 ++++++++- 7 files changed, 65 insertions(+), 10 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp index 40b650e489a..8e72fc1dbd9 100644 --- a/searchlib/src/tests/predicate/predicate_index_test.cpp +++ b/searchlib/src/tests/predicate/predicate_index_test.cpp @@ -292,7 +292,8 @@ TEST("require that PredicateIndex can be (de)serialized") { index.commit(); vespalib::DataBuffer buffer; - index.serialize(buffer); + PredicateIndex::SerializeStats dummy_stats; + index.serialize(buffer, dummy_stats); uint32_t doc_id_limit; DocIdLimitFinder finder(doc_id_limit); PredicateIndex index2(generation_holder, dummy_provider, simple_index_config, @@ -336,7 +337,8 @@ TEST("require that DocumentFeaturesStore is restored on deserialization") { EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); indexFeature(index, doc_id, min_feature, {{hash, interval}}, {{hash2, bounds}}); vespalib::DataBuffer buffer; - index.serialize(buffer); + PredicateIndex::SerializeStats dummy_stats; + index.serialize(buffer, dummy_stats); uint32_t doc_id_limit; DocIdLimitFinder finder(doc_id_limit); PredicateIndex index2(generation_holder, dummy_provider, simple_index_config, diff --git a/searchlib/src/tests/predicate/simple_index_test.cpp b/searchlib/src/tests/predicate/simple_index_test.cpp index 8cd36a26f6e..c37d377e668 100644 --- a/searchlib/src/tests/predicate/simple_index_test.cpp +++ b/searchlib/src/tests/predicate/simple_index_test.cpp @@ -176,7 +176,8 @@ TEST_FF("require that SimpleIndex can be serialized and deserialized.", Fixture, } f1.commit(); vespalib::DataBuffer buffer; - f1.index().serialize(buffer, MyDataSerializer()); + SimpleIndex::SerializeStats dummy_stats; + f1.index().serialize(buffer, MyDataSerializer(), dummy_stats); MyObserver observer; MyDataDeserializer deserializer; f2.index().deserialize(buffer, deserializer, observer, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp index ddf71063306..1f596699bca 100644 --- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp @@ -141,8 +141,11 @@ PredicateAttribute::before_inc_generation(generation_t current_gen) void PredicateAttribute::onSave(IAttributeSaveTarget &saveTarget) { LOG(info, "Saving predicate attribute version %d", getVersion()); + vespalib::string name(getBaseFileName()); + PredicateIndex::SerializeStats stats; IAttributeSaveTarget::Buffer buffer(saveTarget.datWriter().allocBuf(4_Ki)); - _index->serialize(*buffer); + _index->serialize(*buffer, stats); + size_t predicate_index_len = buffer->getDataLen(); uint32_t highest_doc_id = static_cast(_min_feature.size() - 1); buffer->writeInt32(highest_doc_id); for (size_t i = 1; i <= highest_doc_id; ++i) { @@ -152,6 +155,21 @@ PredicateAttribute::onSave(IAttributeSaveTarget &saveTarget) { buffer->writeInt16(_interval_range_vector[i]); } buffer->writeInt16(_max_interval_range); + auto min_feature_and_interval_range_vector_len = buffer->getDataLen() - predicate_index_len; + auto total_len = buffer->getDataLen(); + LOG(info, "Serialized predicate attribute %s: " + "{features=%zu, zeros=%zu, " + "interval={dictionary=%zu, btrees=%zu, bytes=%zu}, " + "interval_with_bounds={dictionary %zu, btrees=%zu, bytes=%zu}, " + "predicate-index_len=%zu, " + "min_feature and interval_range_vector=%zu, total=%zu}", + name.c_str(), + stats._features_len, stats._zeroes_len, + stats._interval._dictionary_size, stats._interval._btree_count, stats._interval._bytes, + stats._interval_with_bounds._dictionary_size, stats._interval_with_bounds._btree_count, stats._interval_with_bounds._bytes, + predicate_index_len, + min_feature_and_interval_range_vector_len, + total_len); saveTarget.datWriter().writeBuf(std::move(buffer)); } diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp index c24c2f53f1d..7e3d62640cf 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp @@ -126,17 +126,20 @@ PredicateIndex::PredicateIndex(GenerationHolder &genHolder, PredicateIndex::~PredicateIndex() = default; void -PredicateIndex::serialize(DataBuffer &buffer) const { +PredicateIndex::serialize(DataBuffer &buffer, SerializeStats& stats) const { _features_store.serialize(buffer); + stats._features_len = buffer.getDataLen(); + auto old_len = buffer.getDataLen(); buffer.writeInt16(_arity); buffer.writeInt32(_zero_constraint_docs.size()); for (auto it = _zero_constraint_docs.begin(); it.valid(); ++it) { buffer.writeInt32(it.getKey()); } + stats._zeroes_len = buffer.getDataLen() - old_len; IntervalSerializer interval_serializer(_interval_store); - _interval_index.serialize(buffer, interval_serializer); + _interval_index.serialize(buffer, interval_serializer, stats._interval); IntervalSerializer bounds_serializer(_interval_store); - _bounds_index.serialize(buffer, bounds_serializer); + _bounds_index.serialize(buffer, bounds_serializer, stats._interval_with_bounds); } void diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h index 439187bccd7..351fa3a1a9f 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h @@ -37,6 +37,19 @@ public: using GenerationHolder = vespalib::GenerationHolder; using BTreeIterator = SimpleIndex::BTreeIterator; using VectorIterator = SimpleIndex::VectorIterator; + struct SerializeStats { + size_t _features_len; + size_t _zeroes_len; + IntervalIndex::SerializeStats _interval; + BoundsIndex::SerializeStats _interval_with_bounds; + SerializeStats() + : _features_len(0), + _zeroes_len(0), + _interval(), + _interval_with_bounds() + { + } + }; private: uint32_t _arity; const DocIdLimitProvider &_limit_provider; @@ -66,7 +79,7 @@ public: SimpleIndexDeserializeObserver<> & observer, uint32_t version); ~PredicateIndex() override; - void serialize(vespalib::DataBuffer &buffer) const; + void serialize(vespalib::DataBuffer &buffer, SerializeStats& stats) const; void onDeserializationCompleted(); void indexEmptyDocument(uint32_t doc_id); diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h index 3290aaf929e..0fb6ce6e9db 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.h +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h @@ -136,6 +136,17 @@ public: using PostingVector = vespalib::RcuVectorBase; using VectorStore = vespalib::btree::BTree, vespalib::btree::NoAggregated>; using VectorIterator = PostingVectorIterator; + struct SerializeStats { + size_t _dictionary_size; + size_t _btree_count; + size_t _bytes; + SerializeStats() + : _dictionary_size(0), + _btree_count(0), + _bytes(0) + { + } + }; private: using GenerationHolder = vespalib::GenerationHolder; @@ -176,7 +187,7 @@ public: ~SimpleIndex(); void serialize(vespalib::DataBuffer &buffer, - const PostingSerializer &serializer) const; + const PostingSerializer &serializer, SerializeStats& stats) const; void deserialize(vespalib::DataBuffer &buffer, PostingDeserializer &deserializer, SimpleIndexDeserializeObserver &observer, uint32_t version); diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp index 46441a33692..0b5c8cbdb62 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp @@ -69,9 +69,12 @@ SimpleIndex::~SimpleIndex() { template void -SimpleIndex::serialize(vespalib::DataBuffer &buffer, const PostingSerializer &serializer) const { +SimpleIndex::serialize(vespalib::DataBuffer &buffer, const PostingSerializer &serializer, SerializeStats& stats) const { assert(sizeof(Key) <= sizeof(uint64_t)); assert(sizeof(DocId) <= sizeof(uint32_t)); + stats = SerializeStats(); + stats._dictionary_size = _dictionary.size(); + auto old_size = buffer.getDataLen(); buffer.writeInt32(_dictionary.size()); for (auto it = _dictionary.begin(); it.valid(); ++it) { vespalib::datastore::EntryRef ref = it.getData(); @@ -79,12 +82,16 @@ SimpleIndex::serialize(vespalib::DataBuffer &buffer, const auto posting_it = _btree_posting_lists.begin(ref); if (!posting_it.valid()) continue; + if (posting_it.size() > 8u) { + ++stats._btree_count; + } buffer.writeInt64(it.getKey()); // Key for (; posting_it.valid(); ++posting_it) { buffer.writeInt32(posting_it.getKey()); // DocId serializer.serialize(posting_it.getData(), buffer); } } + stats._bytes = buffer.getDataLen() - old_size; } template -- cgit v1.2.3