From dd097b175f164d93da7765e827bcdd6fa0a006b6 Mon Sep 17 00:00:00 2001
From: Tor Egge <Tor.Egge@online.no>
Date: Fri, 26 Apr 2024 12:33:32 +0200
Subject: Expose imported attributes in metrics.

---
 .../proton/server/documentdb_metrics_updater.cpp   | 16 ++++++
 .../proton/server/fast_access_doc_subdb.cpp        | 66 +++++++++++++---------
 .../bitvector_search_cache_test.cpp                | 13 +++++
 .../imported_search_context_test.cpp               |  4 ++
 .../searchlib/attribute/bitvector_search_cache.cpp | 27 ++++++++-
 .../searchlib/attribute/bitvector_search_cache.h   |  4 ++
 .../attribute/imported_attribute_vector.cpp        | 12 ++++
 .../attribute/imported_attribute_vector.h          |  3 +
 8 files changed, 117 insertions(+), 28 deletions(-)

diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp
index dd66c7ceb46..3c7d197d5e1 100644
--- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp
@@ -9,19 +9,23 @@
 #include <vespa/searchcommon/attribute/status.h>
 #include <vespa/searchcore/proton/attribute/attribute_usage_filter.h>
 #include <vespa/searchcore/proton/attribute/i_attribute_manager.h>
+#include <vespa/searchcore/proton/attribute/imported_attributes_repo.h>
 #include <vespa/searchcore/proton/docsummary/isummarymanager.h>
 #include <vespa/searchcore/proton/matching/matching_stats.h>
 #include <vespa/searchcore/proton/metrics/documentdb_job_trackers.h>
 #include <vespa/searchcore/proton/metrics/executor_threading_service_stats.h>
 #include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/imported_attribute_vector.h>
 #include <vespa/vespalib/stllike/cache_stats.h>
 #include <vespa/searchlib/util/searchable_stats.h>
 #include <vespa/vespalib/util/memoryusage.h>
+#include <vespa/vespalib/util/size_literals.h>
 
 #include <vespa/log/log.h>
 LOG_SETUP(".proton.server.documentdb_metrics_updater");
 
 using search::LidUsageStats;
+using search::attribute::ImportedAttributeVector;
 using vespalib::CacheStats;
 using vespalib::MemoryUsage;
 
@@ -141,6 +145,18 @@ fillTempAttributeMetrics(TempAttributeMetrics &totalMetrics,
                     fillTempAttributeMetrics(*subMetrics, attr->getName(), memoryUsage, bitVectors);
                 }
             }
+            auto imported = attrMgr->getImportedAttributes();
+            if (imported != nullptr) {
+                std::vector<std::shared_ptr<ImportedAttributeVector>> i_list;
+                imported->getAll(i_list);
+                for (const auto& attr : i_list) {
+                    auto memory_usage = attr->get_memory_usage();
+                    fillTempAttributeMetrics(totalMetrics,  attr->getName(), memory_usage, 0);
+                    if (subMetrics != nullptr) {
+                        fillTempAttributeMetrics(*subMetrics,  attr->getName(), memory_usage, 0);
+                    }
+                }
+            }
         }
     }
 }
diff --git a/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp b/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp
index a2d68ad8920..4972cc790c5 100644
--- a/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/fast_access_doc_subdb.cpp
@@ -12,9 +12,11 @@
 #include <vespa/searchcore/proton/attribute/attribute_manager_initializer.h>
 #include <vespa/searchcore/proton/attribute/attribute_writer.h>
 #include <vespa/searchcore/proton/attribute/filter_attribute_manager.h>
+#include <vespa/searchcore/proton/attribute/imported_attributes_repo.h>
 #include <vespa/searchcore/proton/common/alloc_config.h>
 #include <vespa/searchcore/proton/reprocessing/attribute_reprocessing_initializer.h>
 #include <vespa/searchcore/proton/reprocessing/reprocess_documents_task.h>
+#include <vespa/searchlib/attribute/imported_attribute_vector.h>
 #include <vespa/vespalib/util/destructor_callbacks.h>
 
 #include <vespa/log/log.h>
@@ -23,6 +25,7 @@ LOG_SETUP(".proton.server.fast_access_doc_subdb");
 using search::AttributeGuard;
 using search::AttributeVector;
 using search::SerialNum;
+using search::attribute::ImportedAttributeVector;
 using search::index::Schema;
 using proton::initializer::InitializerTask;
 using searchcorespi::IFlushTarget;
@@ -85,16 +88,38 @@ FastAccessDocSubDB::createAttributeManagerInitializer(const DocumentDBConfig &co
                                                          attrMgrResult);
 }
 
+namespace {
+
+vespalib::hash_set<vespalib::string>
+get_attribute_names(const proton::IAttributeManager& mgr)
+{
+    vespalib::hash_set<vespalib::string> both;
+    std::vector<AttributeGuard> list;
+    mgr.getAttributeListAll(list);
+    for (const auto& attr : list) {
+        both.insert(attr->getName());
+    }
+    auto imported = mgr.getImportedAttributes();
+    if (imported != nullptr) {
+        std::vector<std::shared_ptr<ImportedAttributeVector>> i_list;
+        imported->getAll(i_list);
+        for (const auto& attr : i_list) {
+            both.insert(attr->getName());
+        }
+    }
+    return both;
+}
+
+}
+
 void
 FastAccessDocSubDB::setupAttributeManager(AttributeManager::SP attrMgrResult)
 {
     if (_addMetrics) {
         // register attribute metrics
-        std::vector<AttributeGuard> list;
-        attrMgrResult->getAttributeListAll(list);
+        auto list = get_attribute_names(*attrMgrResult);
         for (const auto &attr : list) {
-            const AttributeVector &v = *attr;
-            _metricsWireService.addAttribute(_subAttributeMetrics, v.getName());
+            _metricsWireService.addAttribute(_subAttributeMetrics, attr);
         }
     }
     _initAttrMgr = attrMgrResult;
@@ -141,33 +166,20 @@ void
 FastAccessDocSubDB::reconfigureAttributeMetrics(const proton::IAttributeManager &newMgr,
                                                 const proton::IAttributeManager &oldMgr)
 {
-    std::set<vespalib::string> toAdd;
-    std::set<vespalib::string> toRemove;
-    std::vector<AttributeGuard> newList;
-    std::vector<AttributeGuard> oldList;
-    newMgr.getAttributeList(newList);
-    oldMgr.getAttributeList(oldList);
-    for (const auto &newAttr : newList) {
-        if (std::find_if(oldList.begin(),
-                         oldList.end(),
-                         AttributeGuardComp(newAttr->getName())) ==
-            oldList.end()) {
-            toAdd.insert(newAttr->getName());
-        }
-    }
-    for (const auto &oldAttr : oldList) {
-        if (std::find_if(newList.begin(),
-                         newList.end(),
-                         AttributeGuardComp(oldAttr->getName())) ==
-            newList.end()) {
-            toRemove.insert(oldAttr->getName());
+    auto old_list = get_attribute_names(oldMgr);
+    auto new_list = get_attribute_names(newMgr);
+
+    for (const auto &attrName : new_list) {
+        if (old_list.contains(attrName)) {
+            continue;
         }
-    }
-    for (const auto &attrName : toAdd) {
         LOG(debug, "reconfigureAttributeMetrics(): addAttribute='%s'", attrName.c_str());
         _metricsWireService.addAttribute(_subAttributeMetrics, attrName);
     }
-    for (const auto &attrName : toRemove) {
+    for (const auto &attrName : old_list) {
+        if (new_list.contains(attrName)) {
+            continue;
+        }
         LOG(debug, "reconfigureAttributeMetrics(): removeAttribute='%s'", attrName.c_str());
         _metricsWireService.removeAttribute(_subAttributeMetrics, attrName);
     }
diff --git a/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp b/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp
index d51ec22a54a..1d66eefaff7 100644
--- a/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp
+++ b/searchlib/src/tests/attribute/bitvector_search_cache/bitvector_search_cache_test.cpp
@@ -3,6 +3,7 @@
 #include <vespa/vespalib/testkit/test_kit.h>
 #include <vespa/searchlib/attribute/bitvector_search_cache.h>
 #include <vespa/searchlib/common/bitvector.h>
+#include <vespa/vespalib/util/memoryusage.h>
 
 using namespace search;
 using namespace search::attribute;
@@ -31,9 +32,13 @@ struct Fixture {
 TEST_F("require that bit vectors can be inserted and retrieved", Fixture)
 {
     EXPECT_EQUAL(0u, f.cache.size());
+    auto old_mem_usage = f.cache.get_memory_usage();
     f.cache.insert("foo", f.entry1);
     f.cache.insert("bar", f.entry2);
     EXPECT_EQUAL(2u, f.cache.size());
+    auto new_mem_usage = f.cache.get_memory_usage();
+    EXPECT_LESS(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+    EXPECT_LESS(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
 
     EXPECT_EQUAL(f.entry1, f.cache.find("foo"));
     EXPECT_EQUAL(f.entry2, f.cache.find("bar"));
@@ -43,9 +48,13 @@ TEST_F("require that bit vectors can be inserted and retrieved", Fixture)
 TEST_F("require that insert() doesn't replace existing bit vector", Fixture)
 {
     f.cache.insert("foo", f.entry1);
+    auto old_mem_usage = f.cache.get_memory_usage();
     f.cache.insert("foo", f.entry2);
+    auto new_mem_usage = f.cache.get_memory_usage();
     EXPECT_EQUAL(1u, f.cache.size());
     EXPECT_EQUAL(f.entry1, f.cache.find("foo"));
+    EXPECT_EQUAL(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+    EXPECT_EQUAL(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
 }
 
 TEST_F("require that cache can be cleared", Fixture)
@@ -53,11 +62,15 @@ TEST_F("require that cache can be cleared", Fixture)
     f.cache.insert("foo", f.entry1);
     f.cache.insert("bar", f.entry2);
     EXPECT_EQUAL(2u, f.cache.size());
+    auto old_mem_usage = f.cache.get_memory_usage();
     f.cache.clear();
+    auto new_mem_usage = f.cache.get_memory_usage();
 
     EXPECT_EQUAL(0u, f.cache.size());
     EXPECT_TRUE(f.cache.find("foo").get() == nullptr);
     EXPECT_TRUE(f.cache.find("bar").get() == nullptr);
+    EXPECT_GREATER(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+    EXPECT_GREATER(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
 }
 
 TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
index 41ec377dece..7c38c322bc8 100644
--- a/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
+++ b/searchlib/src/tests/attribute/imported_search_context/imported_search_context_test.cpp
@@ -508,6 +508,7 @@ assertBitVector(const std::vector<uint32_t> &expDocIds, const BitVector &bitVect
 TEST_F("Entry is inserted into search cache if bit vector posting list is used", SearchCacheFixture)
 {
     EXPECT_EQUAL(0u, f.imported_attr->getSearchCache()->size());
+    auto old_mem_usage = f.imported_attr->get_memory_usage();
     auto ctx = f.create_context(word_term("5678"));
     ctx->fetchPostings(queryeval::ExecuteInfo::FULL, true);
     TermFieldMatchData match;
@@ -515,6 +516,9 @@ TEST_F("Entry is inserted into search cache if bit vector posting list is used",
     TEST_DO(f.assertSearch({3, 5}, *iter));
 
     EXPECT_EQUAL(1u, f.imported_attr->getSearchCache()->size());
+    auto new_mem_usage = f.imported_attr->get_memory_usage();
+    EXPECT_LESS(old_mem_usage.usedBytes(), new_mem_usage.usedBytes());
+    EXPECT_LESS(old_mem_usage.allocatedBytes(), new_mem_usage.allocatedBytes());
     auto cacheEntry = f.imported_attr->getSearchCache()->find("5678");
     EXPECT_EQUAL(cacheEntry->docIdLimit, f.get_imported_attr()->getNumDocs());
     TEST_DO(assertBitVector({3, 5}, *cacheEntry->bitVector));
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
index e20d02afe50..6762c0516b2 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.cpp
@@ -3,6 +3,7 @@
 #include "bitvector_search_cache.h"
 #include <vespa/searchlib/common/bitvector.h>
 #include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/util/memoryusage.h>
 #include <mutex>
 
 namespace search::attribute {
@@ -10,6 +11,7 @@ namespace search::attribute {
 BitVectorSearchCache::BitVectorSearchCache()
     : _mutex(),
       _size(0),
+      _entries_extra_memory_usage(0),
       _cache()
 {}
 
@@ -18,9 +20,19 @@ BitVectorSearchCache::~BitVectorSearchCache() = default;
 void
 BitVectorSearchCache::insert(const vespalib::string &term, std::shared_ptr<Entry> entry)
 {
+    size_t entry_extra_memory_usage = 0;
+    if (entry) {
+        entry_extra_memory_usage = sizeof(Entry);
+        if (entry->bitVector) {
+            entry_extra_memory_usage += entry->bitVector->getFileBytes();
+        }
+    }
     std::unique_lock guard(_mutex);
-    _cache.insert(std::make_pair(term, std::move(entry)));
+    auto ins_res = _cache.insert(std::make_pair(term, std::move(entry)));
     _size.store(_cache.size());
+    if (ins_res.second) {
+        _entries_extra_memory_usage += entry_extra_memory_usage;
+    }
 }
 
 std::shared_ptr<BitVectorSearchCache::Entry>
@@ -36,12 +48,25 @@ BitVectorSearchCache::find(const vespalib::string &term) const
     return {};
 }
 
+vespalib::MemoryUsage
+BitVectorSearchCache::get_memory_usage() const
+{
+    std::lock_guard guard(_mutex);
+    size_t cache_memory_consumption = _cache.getMemoryConsumption();
+    size_t cache_memory_used = _cache.getMemoryUsed();
+    size_t self_memory_used = sizeof(BitVectorSearchCache) - sizeof(_cache);
+    size_t allocated = self_memory_used + cache_memory_consumption + _entries_extra_memory_usage;
+    size_t used = self_memory_used + cache_memory_used + _entries_extra_memory_usage;
+    return vespalib::MemoryUsage(allocated, used, 0, 0);
+}
+
 void
 BitVectorSearchCache::clear()
 {
     std::unique_lock guard(_mutex);
     _cache.clear();
     _size.store(0ul, std::memory_order_relaxed);
+    _entries_extra_memory_usage = 0;
 }
 
 }
diff --git a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
index 233f8315aaf..3a38cdcea26 100644
--- a/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
+++ b/searchlib/src/vespa/searchlib/attribute/bitvector_search_cache.h
@@ -10,6 +10,8 @@
 #include <atomic>
 
 namespace search { class BitVector; }
+namespace vespalib { class MemoryUsage; }
+
 namespace search::attribute {
 
 /**
@@ -37,6 +39,7 @@ private:
 
     mutable std::shared_mutex _mutex;
     std::atomic<uint64_t>     _size;
+    size_t                    _entries_extra_memory_usage;
     Cache _cache;
 
 public:
@@ -45,6 +48,7 @@ public:
     void insert(const vespalib::string &term, std::shared_ptr<Entry> entry);
     std::shared_ptr<Entry> find(const vespalib::string &term) const;
     size_t size() const { return _size.load(std::memory_order_relaxed); }
+    vespalib::MemoryUsage get_memory_usage() const;
     void clear();
 };
 
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
index 029dc155785..f6a33165f0c 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.cpp
@@ -3,6 +3,7 @@
 #include "imported_attribute_vector.h"
 #include "imported_attribute_vector_read_guard.h"
 #include "imported_search_context.h"
+#include <vespa/vespalib/util/memoryusage.h>
 
 namespace search::attribute {
 
@@ -58,4 +59,15 @@ void ImportedAttributeVector::clearSearchCache() {
     }
 }
 
+vespalib::MemoryUsage
+ImportedAttributeVector::get_memory_usage() const
+{
+    constexpr auto self_memory_usage = sizeof(ImportedAttributeVector);
+    vespalib::MemoryUsage result(self_memory_usage, self_memory_usage, 0, 0);
+    if (_search_cache) {
+        result.merge(_search_cache->get_memory_usage());
+    }
+    return result;
+}
+
 }
diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
index bd018df5273..5b68957b7f5 100644
--- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
+++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector.h
@@ -6,6 +6,8 @@
 #include <vespa/searchcommon/attribute/i_document_meta_store_context.h>
 #include <vespa/vespalib/stllike/string.h>
 
+namespace vespalib { class MemoryUsage; }
+
 namespace search::attribute {
 
 class BitVectorSearchCache;
@@ -62,6 +64,7 @@ public:
 
     std::unique_ptr<AttributeReadGuard> makeReadGuard(bool stableEnumGuard) const override;
     virtual std::unique_ptr<AttributeReadGuard> makeReadGuard(std::shared_ptr<MetaStoreReadGuard> targetMetaStoreReadGuard, bool stableEnumGuard) const;
+    vespalib::MemoryUsage get_memory_usage() const;
 
 protected:
     vespalib::string                           _name;
-- 
cgit v1.2.3