blob: 9aafdb8b236ffe08deb003359be51888725e4fbd (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.cluster;
import java.util.logging.Logger;
import com.yahoo.search.result.ErrorMessage;
/**
* A node monitor is responsible for maintaining the state of a monitored node.
* It has the following properties:
* <ul>
* <li>A node is taken out of operation if it fails</li>
* <li>A node is put back in operation when it responds correctly again
* <i>responseAfterFailLimit</i> times <b>unless</b>
* it has failed <i>failQuarantineLimit</i>. In the latter case it won't
* be put into operation again before that time period has expired</li>
* </ul>
*
* @author bratseth
*/
public abstract class BaseNodeMonitor<T> {
protected static Logger log = Logger.getLogger(BaseNodeMonitor.class.getName());
/** The object representing the monitored node */
protected T node;
protected boolean isWorking=true;
/** Whether this node is quarantined for unstability */
protected boolean isQuarantined=false;
/** The last time this node failed, in ms */
protected long failedAt=0;
/** The last time this node responded (failed or succeeded), in ms */
protected long respondedAt=0;
/** The last time this node responded successfully */
protected long succeededAt=0;
/** The configuration of this monitor */
protected MonitorConfiguration configuration;
/** Is the node we monitor part of an internal Vespa cluster or not */
private boolean internal=false;
public BaseNodeMonitor(boolean internal) {
this.internal=internal;
}
public T getNode() { return node; }
/**
* Returns whether this node is currently in a state suitable
* for receiving traffic. As far as we know, that is
*/
public boolean isWorking() { return isWorking; }
public boolean isQuarantined() { return isQuarantined; }
/**
* Called when this node fails.
*
* @param error a description of the error
*/
public abstract void failed(ErrorMessage error);
/**
* Called when a response is received from this node. If the node was
* quarantined and it has been in that state for more than QuarantineTime
* milliseconds, it is taken out of quarantine.
*
* if it is not in quarantine but is not working, it may be set to working
* if this method is called at least responseAfterFailLimit times
*/
public abstract void responded();
public boolean isIdle() {
return (now()-respondedAt) >= configuration.getIdleLimit();
}
protected long now() {
return System.currentTimeMillis();
}
/** Thread-safely changes the state of this node if required */
protected abstract void setWorking(boolean working,String explanation);
/** Returns whether or not this is monitoring an internal node. Default is false. */
public boolean isInternal() { return internal; }
}
|