aboutsummaryrefslogtreecommitdiffstats
path: root/client/go/internal/admin/vespa-wrapper/configserver/check.go
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2023-05-12 10:33:01 +0000
committerArne Juul <arnej@yahooinc.com>2023-05-12 10:33:01 +0000
commit93cdbc4255040bca62645df21d7b210910cdd6ac (patch)
tree772c0bc29d142af119b11525ef6e4348f897782e /client/go/internal/admin/vespa-wrapper/configserver/check.go
parent5a80d77bc5d6df103e9beb7ca9b21fd8b8670234 (diff)
wait for ping OK to majority of configserver hosts
Diffstat (limited to 'client/go/internal/admin/vespa-wrapper/configserver/check.go')
-rw-r--r--client/go/internal/admin/vespa-wrapper/configserver/check.go93
1 files changed, 93 insertions, 0 deletions
diff --git a/client/go/internal/admin/vespa-wrapper/configserver/check.go b/client/go/internal/admin/vespa-wrapper/configserver/check.go
index a0248dd128f..2a444019261 100644
--- a/client/go/internal/admin/vespa-wrapper/configserver/check.go
+++ b/client/go/internal/admin/vespa-wrapper/configserver/check.go
@@ -5,10 +5,15 @@ package configserver
import (
"fmt"
+ "os"
+ "strings"
+ "time"
"github.com/vespa-engine/vespa/client/go/internal/admin/defaults"
+ "github.com/vespa-engine/vespa/client/go/internal/admin/envvars"
"github.com/vespa-engine/vespa/client/go/internal/admin/trace"
"github.com/vespa-engine/vespa/client/go/internal/util"
+ "github.com/vespa-engine/vespa/client/go/internal/vespa"
)
func checkIsConfigserver(myname string) {
@@ -22,3 +27,91 @@ func checkIsConfigserver(myname string) {
trace.Warning("only these hosts should run a config server:", onlyHosts)
util.JustExitMsg(fmt.Sprintf("this host [%s] should not run a config server", myname))
}
+
+type pingChecker struct {
+ hostNames []string
+ lastErr map[string]error
+ lastOut map[string]string
+ backticks util.BackTicks
+}
+
+func (pc *pingChecker) ping(hostname string) bool {
+ out, err := pc.backticks.Run("ping", "-c", "1", "-q", hostname)
+ pc.lastErr[hostname] = err
+ pc.lastOut[hostname] = strings.TrimSuffix(out, "\n")
+ return err == nil
+}
+
+func (pc *pingChecker) pingAll() {
+ for _, hn := range pc.hostNames {
+ pc.ping(hn)
+ }
+}
+
+func (pc *pingChecker) countOk() int {
+ isOk := 0
+ for _, err := range pc.lastErr {
+ if err == nil {
+ isOk++
+ }
+ }
+ return isOk
+}
+
+func (pc *pingChecker) requiredOk() int {
+ return len(pc.hostNames)/2 + 1
+}
+
+func (pc *pingChecker) printErrors() {
+ for hn, err := range pc.lastErr {
+ if err != nil {
+ out := pc.lastOut[hn]
+ trace.Warning("failed to 'ping' host:", hn, "=>", err, "command output:", out)
+ }
+ }
+}
+
+func waitForDnsResolving() {
+ onlyHosts := defaults.VespaConfigserverHosts()
+ if len(onlyHosts) < 2 {
+ // no wait in single-node case
+ return
+ }
+ if os.Getenv(envvars.VESPA_SKIP_PING) != "" {
+ trace.Debug("skipping DNS resolution check")
+ return
+ }
+ helper := pingChecker{
+ hostNames: onlyHosts,
+ lastErr: make(map[string]error),
+ lastOut: make(map[string]string),
+ backticks: util.BackTicksWithStderr,
+ }
+ myname, _ := vespa.FindOurHostname()
+ if !helper.ping(myname) {
+ trace.Warning("self-ping failed, consider skipping this check")
+ }
+ trace.Debug("check DNS resolution, require", helper.requiredOk(), "OK answers")
+ for i := 0; i < 180; i++ {
+ helper.pingAll()
+ isOk := helper.countOk()
+ if isOk >= helper.requiredOk() {
+ if i > 2 || isOk < len(onlyHosts) {
+ trace.Info("successful 'ping' of", isOk, "configservers after", i, "retries")
+ }
+ helper.printErrors()
+ return
+ }
+ if i%10 == 2 {
+ trace.Warning("waiting for successful 'ping' of configservers", onlyHosts)
+ }
+ if i%40 == 3 {
+ helper.printErrors()
+ }
+ if i == 2 {
+ trace.Warning(fmt.Sprintf("set %s=true in environment to skip this check", envvars.VESPA_SKIP_PING))
+ }
+ time.Sleep(1000 * time.Millisecond)
+ }
+ util.JustExitMsg("Giving up waiting for working 'ping' of enough configservers")
+}