summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2022-12-02 15:52:01 +0100
committerGitHub <noreply@github.com>2022-12-02 15:52:01 +0100
commit42c05e4dc000a82a9cbe2d00604bd17d708a2cd2 (patch)
tree13e39660724e8c27025cc04989444ac928da29b1
parent0f5a6d241394b7b064debdd38fabd58c7c64f94d (diff)
parent844c6bf22f2e0234f4298e99c02dd4b7c41fd2fe (diff)
Merge pull request #25094 from vespa-engine/geirst/attribute-directory-transient-disk-usage
Report transient disk usage for attribute disk snapshots.
-rw-r--r--searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp104
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp47
-rw-r--r--searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h17
3 files changed, 153 insertions, 15 deletions
diff --git a/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp b/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp
index 98dca748533..a1ca69e54d7 100644
--- a/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp
+++ b/searchcore/src/tests/proton/attribute/attribute_directory/attribute_directory_test.cpp
@@ -6,7 +6,9 @@
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/size_literals.h>
#include <filesystem>
+#include <fstream>
#include <vespa/log/log.h>
LOG_SETUP("attribute_directory_test");
@@ -45,6 +47,10 @@ bool hasWriter(const std::unique_ptr<AttributeDirectory::Writer> &writer) {
return static_cast<bool>(writer);
}
+void create_directory(const vespalib::string& path) {
+ std::filesystem::create_directory(std::filesystem::path(path));
+}
+
}
class Fixture : public DirectoryHandler {
@@ -134,7 +140,7 @@ public:
EXPECT_TRUE(hasAttributeDir(dir));
auto writer = dir->getWriter();
writer->createInvalidSnapshot(serialNum);
- std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(serialNum)));
+ create_directory(writer->getSnapshotDir(serialNum));
writer->markValidSnapshot(serialNum);
assertAttributeDiskDir("foo");
}
@@ -160,7 +166,7 @@ public:
auto dir = createFooAttrDir();
auto writer = dir->getWriter();
writer->createInvalidSnapshot(serialNum);
- std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(serialNum)));
+ create_directory(writer->getSnapshotDir(serialNum));
writer->markValidSnapshot(serialNum);
}
@@ -208,10 +214,10 @@ TEST_F(AttributeDirectoryTest, can_prune_attribute_snapshots)
assertNotAttributeDiskDir("foo");
auto writer = dir->getWriter();
writer->createInvalidSnapshot(2);
- std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(2)));
+ create_directory(writer->getSnapshotDir(2));
writer->markValidSnapshot(2);
writer->createInvalidSnapshot(4);
- std::filesystem::create_directory(std::filesystem::path(writer->getSnapshotDir(4)));
+ create_directory(writer->getSnapshotDir(4));
writer->markValidSnapshot(4);
writer.reset();
assertAttributeDiskDir("foo");
@@ -262,9 +268,9 @@ TEST_F(AttributeDirectoryTest, attribute_directory_is_not_removed_due_to_pruning
TEST(BasicDirectoryTest, initial_state_tracks_disk_layout)
{
- std::filesystem::create_directory(std::filesystem::path("attributes"));
- std::filesystem::create_directory(std::filesystem::path("attributes/foo"));
- std::filesystem::create_directory(std::filesystem::path("attributes/bar"));
+ create_directory("attributes");
+ create_directory("attributes/foo");
+ create_directory("attributes/bar");
IndexMetaInfo fooInfo("attributes/foo");
IndexMetaInfo barInfo("attributes/bar");
fooInfo.addSnapshot({true, 4, "snapshot-4"});
@@ -290,8 +296,8 @@ TEST(BasicDirectoryTest, initial_state_tracks_disk_layout)
TEST_F(AttributeDirectoryTest, snapshot_removal_removes_correct_snapshot_directory)
{
setupFooSnapshots(5);
- std::filesystem::create_directory(std::filesystem::path(getSnapshotDir("foo", 5)));
- std::filesystem::create_directory(std::filesystem::path(getSnapshotDir("foo", 6)));
+ create_directory(getSnapshotDir("foo", 5));
+ create_directory(getSnapshotDir("foo", 6));
assertSnapshotDir("foo", 5);
assertSnapshotDir("foo", 6);
invalidateFooSnapshots(false);
@@ -316,6 +322,86 @@ TEST_F(AttributeDirectoryTest, can_get_nonblocking_writer)
EXPECT_FALSE(hasWriter(writer));
}
+class TransientDiskUsageFixture : public Fixture {
+public:
+ std::shared_ptr<AttributeDirectory> dir;
+ std::unique_ptr<AttributeDirectory::Writer> writer;
+ TransientDiskUsageFixture()
+ : dir(createFooAttrDir()),
+ writer(dir->getWriter())
+ {
+ }
+ ~TransientDiskUsageFixture() {}
+ void create_invalid_snapshot(SerialNum serial_num) {
+ writer->createInvalidSnapshot(serial_num);
+ create_directory(writer->getSnapshotDir(serial_num));
+ }
+ void create_valid_snapshot(SerialNum serial_num, size_t num_bytes_in_file) {
+ create_invalid_snapshot(serial_num);
+ write_snapshot_file(serial_num, num_bytes_in_file);
+ writer->markValidSnapshot(serial_num);
+ }
+ void write_snapshot_file(SerialNum serial_num, size_t num_bytes) {
+ std::ofstream file;
+ file.open(writer->getSnapshotDir(serial_num) + "/file.dat");
+ std::string data(num_bytes, 'X');
+ file.write(data.data(), num_bytes);
+ file.close();
+ }
+ size_t transient_disk_usage() const {
+ return dir->get_transient_resource_usage().disk();
+ }
+};
+
+class TransientDiskUsageTest : public TransientDiskUsageFixture, public testing::Test {};
+
+TEST_F(TransientDiskUsageTest, disk_usage_of_snapshots_can_count_towards_transient_usage)
+{
+ create_invalid_snapshot(3);
+ EXPECT_EQ(0, transient_disk_usage());
+ write_snapshot_file(3, 64);
+ // Note: search::DirectoryTraverse rounds each file size up to a block size of 4 KiB.
+ // Writing of snapshot 3 is ongoing and counts towards transient disk usage.
+ EXPECT_EQ(4_Ki, transient_disk_usage());
+ writer->markValidSnapshot(3);
+ // Snapshot 3 is now the best and does NOT count towards transient disk usage.
+ EXPECT_EQ(0, transient_disk_usage());
+
+ create_invalid_snapshot(5);
+ EXPECT_EQ(0, transient_disk_usage());
+ write_snapshot_file(5, 4_Ki + 1);
+ // Writing of snapshot 5 is ongoing and counts towards transient disk usage.
+ EXPECT_EQ(8_Ki, transient_disk_usage());
+ writer->markValidSnapshot(5);
+ // Snapshot 5 is now the best and only 3 counts towards transient disk usage.
+ EXPECT_EQ(4_Ki, transient_disk_usage());
+
+ // Snapshot 3 is removed.
+ writer->invalidateOldSnapshots();
+ writer->removeInvalidSnapshots();
+ EXPECT_EQ(0, transient_disk_usage());
+}
+
+TEST(TransientDiskUsageLoadTest, disk_usage_of_snapshots_are_calculated_when_loading)
+{
+ {
+ TransientDiskUsageFixture f;
+ f.cleanup(false);
+ f.create_valid_snapshot(3, 64);
+ f.create_valid_snapshot(5, 4_Ki + 1);
+ f.writer->invalidateOldSnapshots();
+ EXPECT_EQ(4_Ki, f.transient_disk_usage());
+ }
+ {
+ TransientDiskUsageFixture f;
+ // Snapshot 5 is the best and only 3 counts towards transient disk usage.
+ EXPECT_EQ(4_Ki, f.transient_disk_usage());
+ // Snapshot 3 is removed.
+ f.writer->removeInvalidSnapshots();
+ EXPECT_EQ(0, f.transient_disk_usage());
+ }
+}
+
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp
index 8228174d6fd..91f22635513 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp
+++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.cpp
@@ -2,6 +2,7 @@
#include "attribute_directory.h"
#include "attributedisklayout.h"
+#include <vespa/searchlib/util/dirtraverse.h>
#include <vespa/searchlib/util/filekit.h>
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/stllike/asciistream.h>
@@ -36,7 +37,8 @@ AttributeDirectory::AttributeDirectory(const std::shared_ptr<AttributeDiskLayout
_writer(nullptr),
_mutex(),
_cv(),
- _snapInfo(getDirName())
+ _snapInfo(getDirName()),
+ _disk_sizes()
{
_snapInfo.load();
SerialNum flushedSerialNum = getFlushedSerialNum();
@@ -44,6 +46,10 @@ AttributeDirectory::AttributeDirectory(const std::shared_ptr<AttributeDiskLayout
vespalib::string dirName = getSnapshotDir(flushedSerialNum);
_lastFlushTime = search::FileKit::getModificationTime(dirName);
}
+ for (const auto& snapshot : _snapInfo.snapshots()) {
+ search::DirectoryTraverse dirt(getSnapshotDir(snapshot.syncToken));
+ _disk_sizes[snapshot.syncToken] = dirt.GetTreeSize();
+ }
}
AttributeDirectory::~AttributeDirectory()
@@ -100,7 +106,7 @@ AttributeDirectory::saveSnapInfo()
}
vespalib::string
-AttributeDirectory::getSnapshotDir(search::SerialNum serialNum)
+AttributeDirectory::getSnapshotDir(search::SerialNum serialNum) const
{
vespalib::string dirName(getDirName());
return dirName + "/" + getSnapshotDirComponent(serialNum);
@@ -118,6 +124,7 @@ AttributeDirectory::createInvalidSnapshot(SerialNum serialNum)
{
std::lock_guard<std::mutex> guard(_mutex);
_snapInfo.addSnapshot(newSnap);
+ _disk_sizes[serialNum] = std::nullopt;
}
saveSnapInfo();
}
@@ -135,6 +142,12 @@ AttributeDirectory::markValidSnapshot(SerialNum serialNum)
vespalib::string snapshotDir(getSnapshotDir(serialNum));
vespalib::File::sync(snapshotDir);
vespalib::File::sync(dirname(snapshotDir));
+ search::DirectoryTraverse dirt(snapshotDir);
+ uint64_t size_on_disk = dirt.GetTreeSize();
+ {
+ std::lock_guard<std::mutex> guard(_mutex);
+ _disk_sizes[serialNum] = size_on_disk;
+ }
saveSnapInfo();
}
@@ -188,6 +201,7 @@ AttributeDirectory::removeInvalidSnapshots()
std::lock_guard<std::mutex> guard(_mutex);
for (const auto &serialNum : toRemove) {
_snapInfo.removeSnapshot(serialNum);
+ _disk_sizes.erase(serialNum);
}
}
saveSnapInfo();
@@ -267,4 +281,33 @@ AttributeDirectory::Writer::~Writer()
_dir._cv.notify_all();
}
+TransientResourceUsage
+AttributeDirectory::get_transient_resource_usage() const
+{
+ uint64_t total_size_on_disk = 0;
+ std::vector<SerialNum> to_traverse;
+ {
+ std::lock_guard<std::mutex> guard(_mutex);
+ auto best = _snapInfo.getBestSnapshot();
+ // All snapshots except the best one count towards transient disk usage.
+ for (const auto& snapshot : _disk_sizes) {
+ auto serial_num = snapshot.first;
+ if (serial_num != best.syncToken) {
+ if (snapshot.second.has_value()) {
+ // The size of this snapshot has already been calculated.
+ total_size_on_disk += snapshot.second.value();
+ } else {
+ // Writing of this snapshot is ongoing and the size must be calculated now.
+ to_traverse.push_back(serial_num);
+ }
+ }
+ }
+ }
+ for (auto serial_num : to_traverse) {
+ search::DirectoryTraverse dirt(getSnapshotDir(serial_num));
+ total_size_on_disk += dirt.GetTreeSize();
+ }
+ return {total_size_on_disk, 0};
+}
+
} // namespace proton
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h
index 59d5166d5f9..5531a32cee5 100644
--- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h
+++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_directory.h
@@ -2,12 +2,15 @@
#pragma once
-#include <vespa/vespalib/stllike/string.h>
-#include <vespa/vespalib/util/time.h>
+#include <vespa/searchcore/proton/common/i_transient_resource_usage_provider.h>
#include <vespa/searchlib/common/indexmetainfo.h>
#include <vespa/searchlib/common/serialnum.h>
-#include <mutex>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/time.h>
#include <condition_variable>
+#include <mutex>
+#include <optional>
+#include <unordered_map>
namespace proton {
@@ -24,6 +27,10 @@ public:
using SerialNum = search::SerialNum;
private:
+ // Keeps track of the disk size (in bytes) for attribute snapshots.
+ // The disk size is calculated and set when a snapshot is marked as valid.
+ using SnapshotDiskSizes = std::unordered_map<SerialNum, std::optional<uint64_t>>;
+
std::weak_ptr<AttributeDiskLayout> _diskLayout;
const vespalib::string _name;
vespalib::system_time _lastFlushTime;
@@ -31,9 +38,10 @@ private:
mutable std::mutex _mutex;
std::condition_variable _cv;
search::IndexMetaInfo _snapInfo;
+ SnapshotDiskSizes _disk_sizes;
void saveSnapInfo();
- vespalib::string getSnapshotDir(SerialNum serialNum);
+ vespalib::string getSnapshotDir(SerialNum serialNum) const;
void setLastFlushTime(vespalib::system_time lastFlushTime);
void createInvalidSnapshot(SerialNum serialNum);
void markValidSnapshot(SerialNum serialNum);
@@ -83,6 +91,7 @@ public:
vespalib::system_time getLastFlushTime() const;
bool empty() const;
vespalib::string getAttributeFileName(SerialNum serialNum);
+ TransientResourceUsage get_transient_resource_usage() const;
};
} // namespace proton