diff options
Diffstat (limited to 'searchcore')
3 files changed, 153 insertions, 15 deletions
diff --git a/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp b/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp index 98dca748533..a1ca69e54d7 100644 --- a/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp +++ b/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp @@ -6,7 +6,9 @@ #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/size_literals.h> #include <filesystem> +#include <fstream> #include <vespa/log/log.h> LOG_SETUP("attribute_directory_test"); @@ -45,6 +47,10 @@ bool hasWriter(const std::unique_ptr<AttributeDirectory::Writer> &writer) { return static_cast<bool>(writer); } +void create_directory(const vespalib::string& path) { + std::filesystem::create_directory(std::filesystem::path(path)); +} + } class Fixture : public DirectoryHandler { @@ -134,7 +140,7 @@ public: EXPECT_TRUE(hasAttributeDir(dir)); auto writer = dir->getWriter(); writer->createInvalidSnapshot(serialNum); - std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(serialNum))); + create_directory(writer->getSnapshotDir(serialNum)); writer->markValidSnapshot(serialNum); assertAttributeDiskDir("foo"); } @@ -160,7 +166,7 @@ public: auto dir = createFooAttrDir(); auto writer = dir->getWriter(); writer->createInvalidSnapshot(serialNum); - std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(serialNum))); + create_directory(writer->getSnapshotDir(serialNum)); writer->markValidSnapshot(serialNum); } @@ -208,10 +214,10 @@ TEST_F(AttributeDirectoryTest, can_prune_attribute_snapshots) assertNotAttributeDiskDir("foo"); auto writer = dir->getWriter(); writer->createInvalidSnapshot(2); - std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(2))); + create_directory(writer->getSnapshotDir(2)); writer->markValidSnapshot(2); writer->createInvalidSnapshot(4); - std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(4))); + create_directory(writer->getSnapshotDir(4)); writer->markValidSnapshot(4); writer.reset(); assertAttributeDiskDir("foo"); @@ -262,9 +268,9 @@ TEST_F(AttributeDirectoryTest, attribute_directory_is_not_removed_due_to_pruning TEST(BasicDirectoryTest, initial_state_tracks_disk_layout) { - std::filesystem::create_directory(std::filesystem::path("attributes")); - std::filesystem::create_directory(std::filesystem::path("attributes/foo")); - std::filesystem::create_directory(std::filesystem::path("attributes/bar")); + create_directory("attributes"); + create_directory("attributes/foo"); + create_directory("attributes/bar"); IndexMetaInfo fooInfo("attributes/foo"); IndexMetaInfo barInfo("attributes/bar"); fooInfo.addSnapshot({true, 4, "snapshot-4"}); @@ -290,8 +296,8 @@ TEST(BasicDirectoryTest, initial_state_tracks_disk_layout) TEST_F(AttributeDirectoryTest, snapshot_removal_removes_correct_snapshot_directory) { setupFooSnapshots(5); - std::filesystem::create_directory(std::filesystem::path(getSnapshotDir("foo", 5))); - std::filesystem::create_directory(std::filesystem::path(getSnapshotDir("foo", 6))); + create_directory(getSnapshotDir("foo", 5)); + create_directory(getSnapshotDir("foo", 6)); assertSnapshotDir("foo", 5); assertSnapshotDir("foo", 6); invalidateFooSnapshots(false); @@ -316,6 +322,86 @@ TEST_F(AttributeDirectoryTest, can_get_nonblocking_writer) EXPECT_FALSE(hasWriter(writer)); } +class TransientDiskUsageFixture : public Fixture { +public: + std::shared_ptr<AttributeDirectory> dir; + std::unique_ptr<AttributeDirectory::Writer> writer; + TransientDiskUsageFixture() + : dir(createFooAttrDir()), + writer(dir->getWriter()) + { + } + ~TransientDiskUsageFixture() {} + void create_invalid_snapshot(SerialNum serial_num) { + writer->createInvalidSnapshot(serial_num); + create_directory(writer->getSnapshotDir(serial_num)); + } + void create_valid_snapshot(SerialNum serial_num, size_t num_bytes_in_file) { + create_invalid_snapshot(serial_num); + write_snapshot_file(serial_num, num_bytes_in_file); + writer->markValidSnapshot(serial_num); + } + void write_snapshot_file(SerialNum serial_num, size_t num_bytes) { + std::ofstream file; + file.open(writer->getSnapshotDir(serial_num) + "/file.dat"); + std::string data(num_bytes, 'X'); + file.write(data.data(), num_bytes); + file.close(); + } + size_t transient_disk_usage() const { + return dir->get_transient_resource_usage().disk(); + } +}; + +class TransientDiskUsageTest : public TransientDiskUsageFixture, public testing::Test {}; + +TEST_F(TransientDiskUsageTest, disk_usage_of_snapshots_can_count_towards_transient_usage) +{ + create_invalid_snapshot(3); + EXPECT_EQ(0, transient_disk_usage()); + write_snapshot_file(3, 64); + // Note: search::DirectoryTraverse rounds each file size up to a block size of 4 KiB. + // Writing of snapshot 3 is ongoing and counts towards transient disk usage. + EXPECT_EQ(4_Ki, transient_disk_usage()); + writer->markValidSnapshot(3); + // Snapshot 3 is now the best and does NOT count towards transient disk usage. + EXPECT_EQ(0, transient_disk_usage()); + + create_invalid_snapshot(5); + EXPECT_EQ(0, transient_disk_usage()); + write_snapshot_file(5, 4_Ki + 1); + // Writing of snapshot 5 is ongoing and counts towards transient disk usage. + EXPECT_EQ(8_Ki, transient_disk_usage()); + writer->markValidSnapshot(5); + // Snapshot 5 is now the best and only 3 counts towards transient disk usage. + EXPECT_EQ(4_Ki, transient_disk_usage()); + + // Snapshot 3 is removed. + writer->invalidateOldSnapshots(); + writer->removeInvalidSnapshots(); + EXPECT_EQ(0, transient_disk_usage()); +} + +TEST(TransientDiskUsageLoadTest, disk_usage_of_snapshots_are_calculated_when_loading) +{ + { + TransientDiskUsageFixture f; + f.cleanup(false); + f.create_valid_snapshot(3, 64); + f.create_valid_snapshot(5, 4_Ki + 1); + f.writer->invalidateOldSnapshots(); + EXPECT_EQ(4_Ki, f.transient_disk_usage()); + } + { + TransientDiskUsageFixture f; + // Snapshot 5 is the best and only 3 counts towards transient disk usage. + EXPECT_EQ(4_Ki, f.transient_disk_usage()); + // Snapshot 3 is removed. + f.writer->removeInvalidSnapshots(); + EXPECT_EQ(0, f.transient_disk_usage()); + } +} + } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp index 8228174d6fd..91f22635513 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp @@ -2,6 +2,7 @@ #include "attribute_directory.h" #include "attributedisklayout.h" +#include <vespa/searchlib/util/dirtraverse.h> #include <vespa/searchlib/util/filekit.h> #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/stllike/asciistream.h> @@ -36,7 +37,8 @@ AttributeDirectory::AttributeDirectory(const std::shared_ptr<AttributeDiskLayout _writer(nullptr), _mutex(), _cv(), - _snapInfo(getDirName()) + _snapInfo(getDirName()), + _disk_sizes() { _snapInfo.load(); SerialNum flushedSerialNum = getFlushedSerialNum(); @@ -44,6 +46,10 @@ AttributeDirectory::AttributeDirectory(const std::shared_ptr<AttributeDiskLayout vespalib::string dirName = getSnapshotDir(flushedSerialNum); _lastFlushTime = search::FileKit::getModificationTime(dirName); } + for (const auto& snapshot : _snapInfo.snapshots()) { + search::DirectoryTraverse dirt(getSnapshotDir(snapshot.syncToken)); + _disk_sizes[snapshot.syncToken] = dirt.GetTreeSize(); + } } AttributeDirectory::~AttributeDirectory() @@ -100,7 +106,7 @@ AttributeDirectory::saveSnapInfo() } vespalib::string -AttributeDirectory::getSnapshotDir(search::SerialNum serialNum) +AttributeDirectory::getSnapshotDir(search::SerialNum serialNum) const { vespalib::string dirName(getDirName()); return dirName + "/" + getSnapshotDirComponent(serialNum); @@ -118,6 +124,7 @@ AttributeDirectory::createInvalidSnapshot(SerialNum serialNum) { std::lock_guard<std::mutex> guard(_mutex); _snapInfo.addSnapshot(newSnap); + _disk_sizes[serialNum] = std::nullopt; } saveSnapInfo(); } @@ -135,6 +142,12 @@ AttributeDirectory::markValidSnapshot(SerialNum serialNum) vespalib::string snapshotDir(getSnapshotDir(serialNum)); vespalib::File::sync(snapshotDir); vespalib::File::sync(dirname(snapshotDir)); + search::DirectoryTraverse dirt(snapshotDir); + uint64_t size_on_disk = dirt.GetTreeSize(); + { + std::lock_guard<std::mutex> guard(_mutex); + _disk_sizes[serialNum] = size_on_disk; + } saveSnapInfo(); } @@ -188,6 +201,7 @@ AttributeDirectory::removeInvalidSnapshots() std::lock_guard<std::mutex> guard(_mutex); for (const auto &serialNum : toRemove) { _snapInfo.removeSnapshot(serialNum); + _disk_sizes.erase(serialNum); } } saveSnapInfo(); @@ -267,4 +281,33 @@ AttributeDirectory::Writer::~Writer() _dir._cv.notify_all(); } +TransientResourceUsage +AttributeDirectory::get_transient_resource_usage() const +{ + uint64_t total_size_on_disk = 0; + std::vector<SerialNum> to_traverse; + { + std::lock_guard<std::mutex> guard(_mutex); + auto best = _snapInfo.getBestSnapshot(); + // All snapshots except the best one count towards transient disk usage. + for (const auto& snapshot : _disk_sizes) { + auto serial_num = snapshot.first; + if (serial_num != best.syncToken) { + if (snapshot.second.has_value()) { + // The size of this snapshot has already been calculated. + total_size_on_disk += snapshot.second.value(); + } else { + // Writing of this snapshot is ongoing and the size must be calculated now. + to_traverse.push_back(serial_num); + } + } + } + } + for (auto serial_num : to_traverse) { + search::DirectoryTraverse dirt(getSnapshotDir(serial_num)); + total_size_on_disk += dirt.GetTreeSize(); + } + return {total_size_on_disk, 0}; +} + } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h index 59d5166d5f9..5531a32cee5 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h @@ -2,12 +2,15 @@ #pragma once -#include <vespa/vespalib/stllike/string.h> -#include <vespa/vespalib/util/time.h> +#include <vespa/searchcore/proton/common/i_transient_resource_usage_provider.h> #include <vespa/searchlib/common/indexmetainfo.h> #include <vespa/searchlib/common/serialnum.h> -#include <mutex> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/time.h> #include <condition_variable> +#include <mutex> +#include <optional> +#include <unordered_map> namespace proton { @@ -24,6 +27,10 @@ public: using SerialNum = search::SerialNum; private: + // Keeps track of the disk size (in bytes) for attribute snapshots. + // The disk size is calculated and set when a snapshot is marked as valid. + using SnapshotDiskSizes = std::unordered_map<SerialNum, std::optional<uint64_t>>; + std::weak_ptr<AttributeDiskLayout> _diskLayout; const vespalib::string _name; vespalib::system_time _lastFlushTime; @@ -31,9 +38,10 @@ private: mutable std::mutex _mutex; std::condition_variable _cv; search::IndexMetaInfo _snapInfo; + SnapshotDiskSizes _disk_sizes; void saveSnapInfo(); - vespalib::string getSnapshotDir(SerialNum serialNum); + vespalib::string getSnapshotDir(SerialNum serialNum) const; void setLastFlushTime(vespalib::system_time lastFlushTime); void createInvalidSnapshot(SerialNum serialNum); void markValidSnapshot(SerialNum serialNum); @@ -83,6 +91,7 @@ public: vespalib::system_time getLastFlushTime() const; bool empty() const; vespalib::string getAttributeFileName(SerialNum serialNum); + TransientResourceUsage get_transient_resource_usage() const; }; } // namespace proton |