aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-11-28 15:13:47 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2022-11-28 15:19:06 +0000
commitcd75ec563465a63df5b11deed0ee7205f4e46e3a (patch)
tree023da6f4b245982a2b284198c672be282252803f
parent326b765c033dd1083de7eeeffc2f40df8cbe1734 (diff)
Avoid mass pending GC on config enable edge
If a system is running in a stable state with no GC enabled, per-bucket last GC timestamps in the DB will end up further and further in the past. If GC is then enabled in config, we must ensure that GC timestamps are reset to the current time to avoid suddenly ending up with _every single_ bucket having exceeded its GC deadline, causing pending GC en masse. Resetting is edge-triggered, so it should not happen if GC is enabled in both the old and new configs.
-rw-r--r--storage/src/tests/distributor/statecheckerstest.cpp11
-rw-r--r--storage/src/tests/distributor/top_level_distributor_test.cpp77
-rw-r--r--storage/src/vespa/storage/distributor/distributor_stripe.cpp15
-rw-r--r--storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp37
-rw-r--r--storage/src/vespa/storage/distributor/stripe_bucket_db_updater.h2
5 files changed, 136 insertions, 6 deletions
diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp
index 0fc148eb549..98ee1fbe8e6 100644
--- a/storage/src/tests/distributor/statecheckerstest.cpp
+++ b/storage/src/tests/distributor/statecheckerstest.cpp
@@ -1371,6 +1371,12 @@ std::string StateCheckersTest::testGarbageCollection(
uint32_t checkInterval, uint32_t lastChangeTime,
bool includePriority, bool includeSchedulingPri)
{
+ GarbageCollectionStateChecker checker;
+ auto cfg = make_config();
+ cfg->setGarbageCollection("music", std::chrono::seconds(checkInterval));
+ cfg->setLastGarbageCollectionChangeTime(vespalib::steady_time(std::chrono::seconds(lastChangeTime)));
+ configure_stripe(cfg);
+ // Insert after stripe configuration to avoid GC timestamp being implicitly reset
BucketDatabase::Entry e(document::BucketId(17, 0));
e.getBucketInfo().addNode(BucketCopy(prevTimestamp, 0,
api::BucketInfo(3,3,3)),
@@ -1378,11 +1384,6 @@ std::string StateCheckersTest::testGarbageCollection(
e.getBucketInfo().setLastGarbageCollectionTime(prevTimestamp);
getBucketDatabase().update(e);
- GarbageCollectionStateChecker checker;
- auto cfg = make_config();
- cfg->setGarbageCollection("music", std::chrono::seconds(checkInterval));
- cfg->setLastGarbageCollectionChangeTime(vespalib::steady_time(std::chrono::seconds(lastChangeTime)));
- configure_stripe(cfg);
NodeMaintenanceStatsTracker statsTracker;
StateChecker::Context c(node_context(), operation_context(),
getDistributorBucketSpace(), statsTracker,
diff --git a/storage/src/tests/distributor/top_level_distributor_test.cpp b/storage/src/tests/distributor/top_level_distributor_test.cpp
index 2c19652b4c2..a64e94a5e66 100644
--- a/storage/src/tests/distributor/top_level_distributor_test.cpp
+++ b/storage/src/tests/distributor/top_level_distributor_test.cpp
@@ -132,6 +132,18 @@ struct TopLevelDistributorTest : Test, TopLevelDistributorTestUtil {
EXPECT_EQ(stripe->non_activation_maintenance_is_inhibited(), inhibited);
}
}
+
+ void set_bucket_last_gc_time(const BucketId& bucket_id, uint32_t last_gc_time) {
+ auto db_entry = get_bucket(bucket_id);
+ db_entry->setLastGarbageCollectionTime(last_gc_time);
+ stripe_bucket_database(stripe_index_of_bucket(bucket_id)).update(db_entry);
+ }
+
+ uint32_t get_bucket_last_gc_time(const BucketId& bucket_id) const {
+ auto db_entry = get_bucket(bucket_id);
+ return db_entry->getLastGarbageCollectionTime();
+ }
+
};
TopLevelDistributorTest::TopLevelDistributorTest()
@@ -620,4 +632,69 @@ TEST_F(TopLevelDistributorTest, maintenance_safe_time_not_triggered_if_state_tra
ASSERT_NO_FATAL_FAILURE(assert_all_stripes_are_maintenance_inhibited(false));
}
+/**
+ * If a system is running in a stable state with no GC enabled, per-bucket last GC timestamps
+ * in the DB will end up further and further in the past. If GC is then enabled in config,
+ * we must ensure that GC timestamps are reset to the current time to avoid suddenly ending
+ * up with _every single_ bucket having exceeded its GC deadline, causing pending GC en masse.
+ *
+ * Resetting is edge-triggered, so it should not happen if GC is enabled in both the old
+ * and new configs.
+ */
+TEST_F(TopLevelDistributorTest, gc_timestamps_reset_to_current_time_on_gc_enabled_edge) {
+ setup_distributor(Redundancy(2), NodeCount(2), "version:1 distributor:1 storage:2");
+ fake_clock().setAbsoluteTimeInSeconds(1234);
+
+ BucketId b1(16, 1);
+ BucketId b2(16, 2);
+ BucketId b3(16, 3);
+
+ add_nodes_to_stripe_bucket_db(b1, "0=1/1/1/t/a");
+ add_nodes_to_stripe_bucket_db(b2, "0=2/2/2/t/a");
+ add_nodes_to_stripe_bucket_db(b3, "0=3/3/3/t/a");
+
+ // Reconfigure GC interval from 0 (disabled) to 3600 (enabled).
+ auto cfg = current_distributor_config();
+ cfg.garbagecollection.interval = 3600;
+ cfg.garbagecollection.selectiontoremove = "true";
+ reconfigure(cfg);
+
+ // GC timestamps must be set to the current time to avoid a flood of GC ops caused by
+ // all buckets suddenly implicitly exceeding their GC deadline.
+ ASSERT_EQ(get_bucket_last_gc_time(b1), 1234);
+ ASSERT_EQ(get_bucket_last_gc_time(b2), 1234);
+ ASSERT_EQ(get_bucket_last_gc_time(b3), 1234);
+}
+
+TEST_F(TopLevelDistributorTest, gc_timestamps_not_reset_to_current_time_when_gc_enabled_in_old_and_new_configs) {
+ setup_distributor(Redundancy(2), NodeCount(2), "version:1 distributor:1 storage:2");
+ fake_clock().setAbsoluteTimeInSeconds(1234);
+
+ auto cfg = current_distributor_config();
+ cfg.garbagecollection.interval = 3600;
+ cfg.garbagecollection.selectiontoremove = "true";
+ reconfigure(cfg);
+
+ BucketId b1(16, 1);
+ BucketId b2(16, 2);
+ BucketId b3(16, 3);
+
+ add_nodes_to_stripe_bucket_db(b1, "0=1/1/1/t/a");
+ set_bucket_last_gc_time(b1, 1001);
+ add_nodes_to_stripe_bucket_db(b2, "0=2/2/2/t/a");
+ set_bucket_last_gc_time(b2, 1002);
+ add_nodes_to_stripe_bucket_db(b3, "0=3/3/3/t/a");
+ set_bucket_last_gc_time(b3, 1003);
+
+ // Change in GC interval, but no enabling-edge
+ cfg = current_distributor_config();
+ cfg.garbagecollection.interval = 1800;
+ reconfigure(cfg);
+
+ // No changes in GC time
+ ASSERT_EQ(get_bucket_last_gc_time(b1), 1001);
+ ASSERT_EQ(get_bucket_last_gc_time(b2), 1002);
+ ASSERT_EQ(get_bucket_last_gc_time(b3), 1003);
+}
+
}
diff --git a/storage/src/vespa/storage/distributor/distributor_stripe.cpp b/storage/src/vespa/storage/distributor/distributor_stripe.cpp
index a811e043678..cb1c935082e 100644
--- a/storage/src/vespa/storage/distributor/distributor_stripe.cpp
+++ b/storage/src/vespa/storage/distributor/distributor_stripe.cpp
@@ -769,11 +769,26 @@ void DistributorStripe::mark_maintenance_tick_as_no_longer_inhibited() noexcept
_inhibited_maintenance_tick_count = 0;
}
+namespace {
+
+bool config_change_has_gc_enable_edge(const DistributorConfiguration& old_config,
+ const DistributorConfiguration& new_config) noexcept {
+ return ((old_config.getGarbageCollectionInterval().count() == 0) &&
+ (new_config.getGarbageCollectionInterval().count() != 0));
+}
+
+}
+
void
DistributorStripe::update_total_distributor_config(std::shared_ptr<const DistributorConfiguration> config)
{
+ auto old_config = std::move(_total_config);
_total_config = std::move(config);
propagate_config_snapshot_to_internal_components();
+ if (config_change_has_gc_enable_edge(*old_config, *_total_config)) {
+ LOG(debug, "GC has been enabled at reconfig edge; resetting last GC for all buckets to current time");
+ _bucketDBUpdater.reset_all_last_gc_timestamps_to_current_time();
+ }
}
void
diff --git a/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp b/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp
index 3f7a0fc3d12..5a584f7c332 100644
--- a/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp
+++ b/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.cpp
@@ -1,7 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "stripe_bucket_db_updater.h"
-#include "bucket_db_prune_elision.h"
#include "bucket_space_distribution_context.h"
#include "top_level_distributor.h"
#include "distributor_bucket_space.h"
@@ -800,4 +799,40 @@ StripeBucketDBUpdater::MergingNodeRemover::storage_node_is_available(uint16_t in
StripeBucketDBUpdater::MergingNodeRemover::~MergingNodeRemover() = default;
+namespace {
+
+class MergingGcTimeSetter : public BucketDatabase::MergingProcessor {
+ // time_point would be preferable, but the internal DB representation is seconds since epoch
+ std::chrono::seconds _last_gc_at_secs_from_epoch;
+public:
+ explicit MergingGcTimeSetter(std::chrono::seconds gc_time_point) noexcept
+ : _last_gc_at_secs_from_epoch(gc_time_point) {
+ }
+
+ ~MergingGcTimeSetter() override = default;
+
+ Result merge(BucketDatabase::Merger& merger) override {
+ auto& entry = merger.current_entry();
+ // TODO widen internal GC time type...!
+ entry->setLastGarbageCollectionTime(static_cast<uint32_t>(_last_gc_at_secs_from_epoch.count()));
+ return Result::Update;
+ }
+
+};
+
+}
+
+void StripeBucketDBUpdater::reset_all_last_gc_timestamps_to_current_time() {
+ // Epochs are expected to be identical between clock types
+ // TODO remove framework clock types in favor of std::chrono
+ auto now_from_epoch = std::chrono::seconds(_node_ctx.clock().getTimeInSeconds().getTime());
+ MergingGcTimeSetter gc_time_setter(now_from_epoch);
+
+ auto& repo = _op_ctx.bucket_space_repo();
+ for (auto& bucket_space : repo) {
+ auto& bucket_db = bucket_space.second->getBucketDatabase();
+ bucket_db.merge(gc_time_setter);
+ }
+}
+
} // distributor
diff --git a/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.h b/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.h
index 0837a3bc1f3..7a34ed03a3c 100644
--- a/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.h
+++ b/storage/src/vespa/storage/distributor/stripe_bucket_db_updater.h
@@ -71,6 +71,8 @@ public:
}
OperationRoutingSnapshot read_snapshot_for_bucket(const document::Bucket&) const;
+
+ void reset_all_last_gc_timestamps_to_current_time();
private:
class MergeReplyGuard {
public: