aboutsummaryrefslogtreecommitdiffstats
path: root/storage/src/vespa/storage/frameworkimpl/thread/deadlockdetector.h
blob: 93487baa71da4818b66d51e51b799f764492a848 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
/**
 * @class storage::DeadLockDetector
 * @ingroup common
 *
 * Threads register in the deadlock detector and calls registerTick
 * periodically. If they do not tick often enough, the deadlock detector
 * will shut down the node.
 *
 * @brief A class for detecting whether storage has entered a deadlock.
 */

#pragma once

#include "appkiller.h"
#include <vespa/storage/common/distributorcomponent.h>
#include <vespa/storage/common/servicelayercomponent.h>
#include <vespa/storageframework/generic/status/htmlstatusreporter.h>
#include <vespa/storageframework/generic/thread/threadpool.h>
#include <vespa/storageframework/generic/thread/runnable.h>
#include <vespa/storageframework/generic/thread/thread.h>
#include <atomic>
#include <map>

namespace storage {

namespace framework { class Thread; }

struct DeadLockDetector : private framework::Runnable,
                          private framework::HtmlStatusReporter
{
    enum State { OK, WARNED, HALTED };

    DeadLockDetector(StorageComponentRegister&,
                     AppKiller::UP killer = std::make_unique<RealAppKiller>());
    ~DeadLockDetector() override;

    void enableWarning(bool enable); // Thread-safe
    void enableShutdown(bool enable); // Thread-safe
    // There are no data read/write dependencies on neither _processSlack
    // nor _waitSlack so relaxed ops suffice.
    void setProcessSlack(vespalib::duration slack) {
        _processSlack.store(slack, std::memory_order_relaxed);
    }
    vespalib::duration getProcessSlack() const {
        return _processSlack.load(std::memory_order_relaxed);
    }
    void setWaitSlack(vespalib::duration slack) {
        _waitSlack.store(slack, std::memory_order_relaxed);
    }
    vespalib::duration getWaitSlack() const {
        return _waitSlack.load(std::memory_order_relaxed);
    }

    // These utility functions are public as internal anonymous classes are
    // using them. Can also be useful for whitebox testing.
    struct ThreadVisitor {
        virtual ~ThreadVisitor() = default;
        virtual void visitThread(const framework::Thread& thread, State& state) = 0;
    };
    void visitThreads(ThreadVisitor&) const;

    bool isAboveFailThreshold(vespalib::steady_time time,
                              const framework::ThreadProperties& tp,
                              const framework::ThreadTickData& tick) const;
    bool isAboveWarnThreshold(vespalib::steady_time ,
                              const framework::ThreadProperties& tp,
                              const framework::ThreadTickData& tick) const;
    void handleDeadlock(vespalib::steady_time currentTime,
                        const framework::Thread& deadlocked_thread,
                        const vespalib::string& id,
                        const framework::ThreadProperties& tp,
                        const framework::ThreadTickData& tick,
                        bool warnOnly);

    // Note: returned value may change between calls due to reconfiguration by other threads
    [[nodiscard]] bool warning_enabled_relaxed() const noexcept {
        return _enableWarning.load(std::memory_order_relaxed);
    }
    [[nodiscard]] bool shutdown_enabled_relaxed() const noexcept {
        return _enableShutdown.load(std::memory_order_relaxed);
    }

private:
    AppKiller::UP _killer;
    mutable std::map<vespalib::string, State> _states;
    mutable std::mutex      _lock;
    std::condition_variable _cond;
    std::atomic<bool> _enableWarning;
    std::atomic<bool> _enableShutdown;
    std::atomic<vespalib::duration> _processSlack;
    std::atomic<vespalib::duration> _waitSlack;
    DistributorComponent::UP _dComponent;
    ServiceLayerComponent::UP _slComponent;
    StorageComponent* _component;
    framework::Thread::UP _thread;

    void run(framework::ThreadHandle&) override;
    void reportHtmlStatus(std::ostream& out, const framework::HttpUrlPath&) const override;
    vespalib::string getBucketLockInfo() const;
};

}