From c64501aab06da35778cb2f6daa186218c133aaca Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Fri, 3 Feb 2023 15:38:43 +0100 Subject: Move clusterstate to admin --- .../internal/admin/clusterstate/cluster_state.go | 122 ++++++++++++++++ .../go/internal/admin/clusterstate/detect_model.go | 67 +++++++++ .../admin/clusterstate/get_cluster_state.go | 77 ++++++++++ .../internal/admin/clusterstate/get_node_state.go | 100 +++++++++++++ .../go/internal/admin/clusterstate/known_state.go | 35 +++++ .../go/internal/admin/clusterstate/model_config.go | 126 +++++++++++++++++ client/go/internal/admin/clusterstate/options.go | 149 ++++++++++++++++++++ client/go/internal/admin/clusterstate/run_curl.go | 85 +++++++++++ .../internal/admin/clusterstate/set_node_state.go | 155 +++++++++++++++++++++ .../go/internal/admin/clusterstate/show_hidden.go | 36 +++++ client/go/internal/admin/script-utils/main.go | 2 +- 11 files changed, 953 insertions(+), 1 deletion(-) create mode 100644 client/go/internal/admin/clusterstate/cluster_state.go create mode 100644 client/go/internal/admin/clusterstate/detect_model.go create mode 100644 client/go/internal/admin/clusterstate/get_cluster_state.go create mode 100644 client/go/internal/admin/clusterstate/get_node_state.go create mode 100644 client/go/internal/admin/clusterstate/known_state.go create mode 100644 client/go/internal/admin/clusterstate/model_config.go create mode 100644 client/go/internal/admin/clusterstate/options.go create mode 100644 client/go/internal/admin/clusterstate/run_curl.go create mode 100644 client/go/internal/admin/clusterstate/set_node_state.go create mode 100644 client/go/internal/admin/clusterstate/show_hidden.go (limited to 'client/go/internal/admin') diff --git a/client/go/internal/admin/clusterstate/cluster_state.go b/client/go/internal/admin/clusterstate/cluster_state.go new file mode 100644 index 00000000000..7317e9a8a3a --- /dev/null +++ b/client/go/internal/admin/clusterstate/cluster_state.go @@ -0,0 +1,122 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "bytes" + "encoding/json" + "fmt" + "strings" + + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" + "github.com/vespa-engine/vespa/client/go/internal/util" +) + +// common struct used various places in the clustercontroller REST api: +type StateAndReason struct { + State string `json:"state"` + Reason string `json:"reason"` +} + +func (s *StateAndReason) writeTo(buf *strings.Builder) { + buf.WriteString(s.State) + if s.Reason != "" { + buf.WriteString(" [reason: ") + buf.WriteString(s.Reason) + buf.WriteString("]") + } +} + +// cluster state as returned by the clustercontroller REST api: +type ClusterState struct { + State struct { + Generated StateAndReason `json:"generated"` + } `json:"state"` + Service map[string]struct { + Node map[string]struct { + Attributes struct { + HierarchicalGroup string `json:"hierarchical-group"` + } `json:"attributes"` + State struct { + Generated StateAndReason `json:"generated"` + Unit StateAndReason `json:"unit"` + User StateAndReason `json:"user"` + } `json:"state"` + Metrics struct { + BucketCount int `json:"bucket-count"` + UniqueDocumentCount int `json:"unique-document-count"` + UniqueDocumentTotalSize int `json:"unique-document-total-size"` + } `json:"metrics"` + } `json:"node"` + } `json:"service"` + DistributionStates struct { + Published struct { + Baseline string `json:"baseline"` + BucketSpaces []struct { + Name string `json:"name"` + State string `json:"state"` + } `json:"bucket-spaces"` + } `json:"published"` + } `json:"distribution-states"` +} + +func (cs *ClusterState) String() string { + if cs == nil { + return "nil" + } + var buf strings.Builder + buf.WriteString("cluster state: ") + cs.State.Generated.writeTo(&buf) + for n, s := range cs.Service { + buf.WriteString("\n ") + buf.WriteString(n) + buf.WriteString(": [") + for nn, node := range s.Node { + buf.WriteString("\n ") + buf.WriteString(nn) + buf.WriteString(" -> {generated: ") + node.State.Generated.writeTo(&buf) + buf.WriteString("} {unit: ") + node.State.Unit.writeTo(&buf) + buf.WriteString("} {user: ") + node.State.User.writeTo(&buf) + buf.WriteString("}") + } + } + buf.WriteString("\n") + return buf.String() +} + +func (model *VespaModelConfig) getClusterState(cluster string) (*ClusterState, *ClusterControllerSpec) { + errs := make([]string, 0, 0) + ccs := model.findClusterControllers() + if len(ccs) == 0 { + trace.Trace("No cluster controllers found in vespa model:", model) + errs = append(errs, "No cluster controllers found in vespa model config") + } + for _, cc := range ccs { + url := fmt.Sprintf("http://%s:%d/cluster/v2/%s/?recursive=true", + cc.host, cc.port, cluster) + var buf bytes.Buffer + err := curlGet(url, &buf) + if err != nil { + errs = append(errs, "could not get: "+url) + continue + } + codec := json.NewDecoder(&buf) + var parsedJson ClusterState + err = codec.Decode(&parsedJson) + if err != nil { + trace.Trace("Could not parse JSON >>>", buf.String(), "<<< from", url) + errs = append(errs, "Bad JSON from "+url+" was: "+buf.String()) + continue + } + // success: + return &parsedJson, &cc + } + // no success: + util.JustExitMsg(fmt.Sprint(errs)) + panic("unreachable") +} diff --git a/client/go/internal/admin/clusterstate/detect_model.go b/client/go/internal/admin/clusterstate/detect_model.go new file mode 100644 index 00000000000..bb1192d4106 --- /dev/null +++ b/client/go/internal/admin/clusterstate/detect_model.go @@ -0,0 +1,67 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "strconv" + "strings" + + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" + "github.com/vespa-engine/vespa/client/go/internal/util" + "github.com/vespa-engine/vespa/client/go/internal/vespa" +) + +func getConfigServerHosts(s string) []string { + if s != "" { + return []string{s} + } + backticks := util.BackTicksForwardStderr + got, err := backticks.Run(vespa.FindHome()+"/bin/vespa-print-default", "configservers") + res := strings.Fields(got) + if err != nil || len(res) < 1 { + util.JustExitMsg("bad configservers: " + got) + } + trace.Debug("found", len(res), "configservers:", res) + return res +} + +func getConfigServerPort(i int) int { + if i > 0 { + return i + } + backticks := util.BackTicksForwardStderr + got, err := backticks.Run(vespa.FindHome()+"/bin/vespa-print-default", "configserver_rpc_port") + if err == nil { + i, err = strconv.Atoi(strings.TrimSpace(got)) + } + if err != nil || i < 1 { + util.JustExitMsg("bad configserver_rpc_port: " + got) + } + trace.Debug("found configservers rpc port:", i) + return i +} + +func detectModel(opts *Options) *VespaModelConfig { + vespa.LoadDefaultEnv() + cfgHosts := getConfigServerHosts(opts.ConfigServerHost) + cfgPort := getConfigServerPort(opts.ConfigServerPort) + for _, cfgHost := range cfgHosts { + args := []string{ + "-j", + "-n", "cloud.config.model", + "-i", "admin/model", + "-p", strconv.Itoa(cfgPort), + "-s", cfgHost, + } + backticks := util.BackTicksForwardStderr + data, err := backticks.Run(vespa.FindHome()+"/bin/vespa-get-config", args...) + parsed := parseModelConfig(data) + if err == nil && parsed != nil { + return parsed + } + } + util.JustExitMsg("could not get model config") + panic("unreachable") +} diff --git a/client/go/internal/admin/clusterstate/get_cluster_state.go b/client/go/internal/admin/clusterstate/get_cluster_state.go new file mode 100644 index 00000000000..505235a284e --- /dev/null +++ b/client/go/internal/admin/clusterstate/get_cluster_state.go @@ -0,0 +1,77 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// code for the "vespa-get-cluster-state" command +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "fmt" + "os" + + "github.com/fatih/color" + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/client/go/internal/admin/envvars" + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" + "github.com/vespa-engine/vespa/client/go/internal/cli/build" +) + +func NewGetClusterStateCmd() *cobra.Command { + var ( + curOptions Options + ) + cmd := &cobra.Command{ + Use: "vespa-get-cluster-state [-h] [-v] [-f] [-c cluster]", + Short: "Get the cluster state of a given cluster.", + Long: `Usage: get-cluster-state [Options]`, + Version: build.Version, + Args: cobra.MaximumNArgs(0), + CompletionOptions: cobra.CompletionOptions{DisableDefaultCmd: true}, + Run: func(cmd *cobra.Command, args []string) { + curOptions.NodeIndex = AllNodes + runGetClusterState(&curOptions) + }, + } + addCommonOptions(cmd, &curOptions) + return cmd +} + +func runGetClusterState(opts *Options) { + if opts.Silent { + trace.Silent() + } + if opts.NoColors || os.Getenv(envvars.TERM) == "" { + color.NoColor = true + } + trace.Debug("run getClusterState with: ", opts) + m := detectModel(opts) + trace.Debug("model:", m) + sss := m.findSelectedServices(opts) + clusters := make(map[string]*ClusterState) + for _, s := range sss { + trace.Debug("found service: ", s) + if clusters[s.cluster] == nil { + state, _ := m.getClusterState(s.cluster) + trace.Debug("cluster ", s.cluster, state) + clusters[s.cluster] = state + } + } + for k, v := range clusters { + globalState := v.State.Generated.State + if globalState == "up" { + fmt.Printf("Cluster %s:\n", k) + } else { + fmt.Printf("Cluster %s is %s. Too few nodes available.\n", k, color.HiRedString("%s", globalState)) + } + for serviceType, serviceList := range v.Service { + for dn, dv := range serviceList.Node { + nodeState := dv.State.Generated.State + if nodeState == "up" { + fmt.Printf("%s/%s/%s: %v\n", k, serviceType, dn, nodeState) + } else { + fmt.Printf("%s/%s/%s: %v\n", k, serviceType, dn, color.HiRedString(nodeState)) + } + } + } + } +} diff --git a/client/go/internal/admin/clusterstate/get_node_state.go b/client/go/internal/admin/clusterstate/get_node_state.go new file mode 100644 index 00000000000..6d45e377a72 --- /dev/null +++ b/client/go/internal/admin/clusterstate/get_node_state.go @@ -0,0 +1,100 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// code for the "vespa-get-node-state" command +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "fmt" + "os" + "strconv" + + "github.com/fatih/color" + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/client/go/internal/admin/envvars" + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" + "github.com/vespa-engine/vespa/client/go/internal/cli/build" +) + +const ( + longdesc = `Retrieve the state of one or more storage services from the fleet controller. Will list the state of the locally running services, possibly restricted to less by options.` + header = `Shows the various states of one or more nodes in a Vespa Storage cluster. There exist three different type of node states. They are: + + Unit state - The state of the node seen from the cluster controller. + User state - The state we want the node to be in. By default up. Can be + set by administrators or by cluster controller when it + detects nodes that are behaving badly. + Generated state - The state of a given node in the current cluster state. + This is the state all the other nodes know about. This + state is a product of the other two states and cluster + controller logic to keep the cluster stable.` +) + +func NewGetNodeStateCmd() *cobra.Command { + var ( + curOptions Options + ) + cmd := &cobra.Command{ + Use: "vespa-get-node-state [-h] [-v] [-c cluster] [-t type] [-i index]", + Short: "Get the state of a node.", + Long: longdesc + "\n\n" + header, + Version: build.Version, + Args: cobra.MaximumNArgs(0), + CompletionOptions: cobra.CompletionOptions{DisableDefaultCmd: true}, + Run: func(cmd *cobra.Command, args []string) { + runGetNodeState(&curOptions) + }, + } + addCommonOptions(cmd, &curOptions) + cmd.Flags().StringVarP(&curOptions.NodeType, "type", "t", "", + "Node type - can either be 'storage' or 'distributor'. If not specified, the operation will use state for both types.") + cmd.Flags().IntVarP(&curOptions.NodeIndex, "index", "i", OnlyLocalNode, + "Node index. If not specified, all nodes found running on this host will be used.") + return cmd +} + +func runGetNodeState(opts *Options) { + if opts.Silent { + trace.Silent() + } + if opts.NoColors || os.Getenv(envvars.TERM) == "" { + color.NoColor = true + } + trace.Info(header) + m := detectModel(opts) + sss := m.findSelectedServices(opts) + clusters := make(map[string]*ClusterState) + for _, s := range sss { + state := clusters[s.cluster] + if state == nil { + state, _ = m.getClusterState(s.cluster) + clusters[s.cluster] = state + } + if state == nil { + trace.Warning("no state for cluster: ", s.cluster) + continue + } + if nodes, ok := state.Service[s.serviceType]; ok { + for name, node := range nodes.Node { + if name == strconv.Itoa(s.index) { + fmt.Printf("\n%s/%s.%s:\n", s.cluster, s.serviceType, name) + dumpState(node.State.Unit, "Unit") + dumpState(node.State.Generated, "Generated") + dumpState(node.State.User, "User") + } + } + } else { + trace.Warning("no nodes for service type: ", s.serviceType) + continue + } + + } +} + +func dumpState(s StateAndReason, tag string) { + if s.State != "up" { + s.State = color.HiRedString(s.State) + } + fmt.Printf("%s: %s: %s\n", tag, s.State, s.Reason) +} diff --git a/client/go/internal/admin/clusterstate/known_state.go b/client/go/internal/admin/clusterstate/known_state.go new file mode 100644 index 00000000000..60a4ada7711 --- /dev/null +++ b/client/go/internal/admin/clusterstate/known_state.go @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "fmt" +) + +type KnownState string + +// these are all the valid node states: +const ( + StateUp KnownState = "up" + StateDown KnownState = "down" + StateMaintenance KnownState = "maintenance" + StateRetired KnownState = "retired" +) + +// verify that a string is one of the known states: +func knownState(s string) (KnownState, error) { + alternatives := []KnownState{ + StateUp, + StateDown, + StateMaintenance, + StateRetired, + } + for _, v := range alternatives { + if s == string(v) { + return v, nil + } + } + return KnownState("unknown"), fmt.Errorf(" must be one of %v, was %s\n", alternatives, s) +} diff --git a/client/go/internal/admin/clusterstate/model_config.go b/client/go/internal/admin/clusterstate/model_config.go new file mode 100644 index 00000000000..5d0e9d98200 --- /dev/null +++ b/client/go/internal/admin/clusterstate/model_config.go @@ -0,0 +1,126 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "encoding/json" + "sort" + "strings" + + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" +) + +type VespaModelConfig struct { + VespaVersion string `json:"vespaVersion"` + Hosts []struct { + Name string `json:"name"` + Services []struct { + Name string `json:"name"` + Type string `json:"type"` + Configid string `json:"configid"` + Clustertype string `json:"clustertype"` + Clustername string `json:"clustername"` + Index int `json:"index"` + Ports []struct { + Number int `json:"number"` + Tags string `json:"tags"` + } `json:"ports"` + } `json:"services"` + } `json:"hosts"` +} + +func (m *VespaModelConfig) String() string { + if m == nil { + return "nil" + } + var buf strings.Builder + buf.WriteString("vespa version: ") + buf.WriteString(m.VespaVersion) + for _, h := range m.Hosts { + buf.WriteString("\n host: ") + buf.WriteString(h.Name) + for _, s := range h.Services { + buf.WriteString("\n service: ") + buf.WriteString(s.Name) + buf.WriteString(" type: ") + buf.WriteString(s.Type) + buf.WriteString(" cluster: ") + buf.WriteString(s.Clustername) + } + buf.WriteString("\n") + } + buf.WriteString("\n") + return buf.String() +} + +type ClusterControllerSpec struct { + host string + port int +} + +func parseModelConfig(input string) *VespaModelConfig { + codec := json.NewDecoder(strings.NewReader(input)) + var parsedJson VespaModelConfig + err := codec.Decode(&parsedJson) + if err != nil { + trace.Trace("could not decode JSON >>>", input, "<<< error:", err) + return nil + } + return &parsedJson +} + +func (m *VespaModelConfig) findClusterControllers() []ClusterControllerSpec { + res := make([]ClusterControllerSpec, 0, 1) + for _, h := range m.Hosts { + for _, s := range h.Services { + if s.Type == "container-clustercontroller" { + for _, p := range s.Ports { + if strings.Contains(p.Tags, "state") { + res = append(res, ClusterControllerSpec{ + host: h.Name, port: p.Number, + }) + } + } + } + } + } + return res +} + +func (m *VespaModelConfig) findSelectedServices(opts *Options) []serviceSpec { + res := make([]serviceSpec, 0, 5) + for _, h := range m.Hosts { + for _, s := range h.Services { + spec := serviceSpec{ + cluster: s.Clustername, + serviceType: s.Type, + index: s.Index, + host: h.Name, + } + if s.Type == "storagenode" { + // simplify: + spec.serviceType = "storage" + } + if opts.wantService(spec) { + res = append(res, spec) + } + } + } + sort.Slice(res, func(i, j int) bool { + a := res[i] + b := res[j] + if a.cluster != b.cluster { + return a.cluster < b.cluster + } + if a.serviceType != b.serviceType { + return a.serviceType < b.serviceType + } + if a.index != b.index { + return a.index < b.index + } + return a.host < b.host + }) + return res +} diff --git a/client/go/internal/admin/clusterstate/options.go b/client/go/internal/admin/clusterstate/options.go new file mode 100644 index 00000000000..b58562f9abe --- /dev/null +++ b/client/go/internal/admin/clusterstate/options.go @@ -0,0 +1,149 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "strconv" + "strings" + + "github.com/fatih/color" + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" + "github.com/vespa-engine/vespa/client/go/internal/vespa" +) + +const ( + OnlyLocalNode int = -2 + AllNodes int = -1 +) + +type Options struct { + Verbose int + Silent bool + ShowHidden showHiddenFlag + Force bool + NoColors bool + SafeMode bool + NoWait bool + Cluster string + ConfigServerHost string + ConfigServerPort int + ConfigRequestTimeout int + NodeType string + NodeIndex int + WantedState string +} + +func (v *Options) String() string { + var buf strings.Builder + buf.WriteString("command-line options [") + if v.Verbose > 0 { + buf.WriteString(" verbosity=") + buf.WriteString(strconv.Itoa(v.Verbose)) + } + if v.Silent { + buf.WriteString(" silent") + } + if v.ShowHidden.showHidden { + buf.WriteString(" show-hidden") + } + if v.Force { + buf.WriteString(color.HiYellowString(" force=true")) + } + if v.NoColors { + buf.WriteString(" no-colors") + } + if v.SafeMode { + buf.WriteString(" safe-mode") + } + if v.NoWait { + buf.WriteString(color.HiYellowString(" no-wait=true")) + } + if v.Cluster != "" { + buf.WriteString(" cluster=") + buf.WriteString(v.Cluster) + } + if v.ConfigServerHost != "" { + buf.WriteString(" config-server=") + buf.WriteString(v.ConfigServerHost) + } + if v.ConfigServerPort != 0 { + buf.WriteString(" config-server-port=") + buf.WriteString(strconv.Itoa(v.ConfigServerPort)) + } + if v.ConfigRequestTimeout != 90 { + buf.WriteString(" config-request-timeout=") + buf.WriteString(strconv.Itoa(v.ConfigRequestTimeout)) + } + if v.NodeType != "" { + buf.WriteString(" node-type=") + buf.WriteString(v.NodeType) + } + if v.NodeIndex >= 0 { + buf.WriteString(" node-index=") + buf.WriteString(strconv.Itoa(int(v.NodeIndex))) + } + if v.WantedState != "" { + buf.WriteString(" WantedState=") + buf.WriteString(v.WantedState) + } + buf.WriteString(" ]") + return buf.String() +} + +type serviceSpec struct { + cluster string + serviceType string + index int + host string +} + +func (o *Options) wantService(s serviceSpec) bool { + if o.Cluster != "" && o.Cluster != s.cluster { + return false + } + if o.NodeType == "" { + if s.serviceType != "storage" && s.serviceType != "distributor" { + return false + } + } else if o.NodeType != s.serviceType { + return false + } + switch o.NodeIndex { + case OnlyLocalNode: + myName, _ := vespa.FindOurHostname() + return s.host == "localhost" || s.host == myName + case AllNodes: + return true + case s.index: + return true + default: + return false + } +} + +func addCommonOptions(cmd *cobra.Command, curOptions *Options) { + cmd.Flags().BoolVar(&curOptions.NoColors, "nocolors", false, "Do not use ansi colors in print.") + cmd.Flags().BoolVarP(&curOptions.Silent, "silent", "s", false, "Create less verbose output.") + cmd.Flags().CountVarP(&curOptions.Verbose, "verbose", "v", "Create more verbose output.") + cmd.Flags().IntVar(&curOptions.ConfigRequestTimeout, "config-request-timeout", 90, "Timeout of config request") + cmd.Flags().IntVar(&curOptions.ConfigServerPort, "config-server-port", 0, "Port to connect to config server on") + cmd.Flags().StringVar(&curOptions.ConfigServerHost, "config-server", "", "Host name of config server to query") + cmd.Flags().StringVarP(&curOptions.Cluster, "cluster", "c", "", + "Cluster name. If unspecified, and vespa is installed on current node, information will be attempted auto-extracted") + cmd.Flags().MarkHidden("config-request-timeout") + cmd.Flags().MarkHidden("config-server-port") + cmd.Flags().MarkHidden("nocolors") + curOptions.ShowHidden.cmd = cmd + flag := cmd.Flags().VarPF(&curOptions.ShowHidden, "show-hidden", "", "Also show hidden undocumented debug options.") + flag.NoOptDefVal = "true" + cobra.OnInitialize(func() { + if curOptions.Silent { + trace.Silent() + } else { + trace.AdjustVerbosity(curOptions.Verbose) + } + }) +} diff --git a/client/go/internal/admin/clusterstate/run_curl.go b/client/go/internal/admin/clusterstate/run_curl.go new file mode 100644 index 00000000000..1dcb31528e1 --- /dev/null +++ b/client/go/internal/admin/clusterstate/run_curl.go @@ -0,0 +1,85 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "bytes" + "fmt" + "io" + "os" + "os/exec" + "strings" + + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" + "github.com/vespa-engine/vespa/client/go/internal/curl" + "github.com/vespa-engine/vespa/client/go/internal/vespa" +) + +func curlCommand(url string, args []string) (*curl.Command, error) { + tls, err := vespa.LoadTlsConfig() + if err != nil { + return nil, err + } + if tls != nil && strings.HasPrefix(url, "http:") { + url = "https:" + url[5:] + } + cmd, err := curl.RawArgs(url, args...) + if err != nil { + return nil, err + } + if tls != nil { + if tls.DisableHostnameValidation { + cmd, err = curl.RawArgs(url, append(args, "--insecure")...) + if err != nil { + return nil, err + } + } + cmd.PrivateKey = tls.Files.PrivateKey + cmd.Certificate = tls.Files.Certificates + cmd.CaCertificate = tls.Files.CaCertificates + } + return cmd, err +} + +func curlGet(url string, output io.Writer) error { + cmd, err := curlCommand(url, commonCurlArgs()) + if err != nil { + return err + } + trace.Trace("running curl:", cmd.String()) + err = cmd.Run(output, os.Stderr) + return err +} + +func curlPost(url string, input []byte) (string, error) { + cmd, err := curlCommand(url, commonCurlArgs()) + cmd.Method = "POST" + cmd.Header("Content-Type", "application/json") + cmd.WithBodyInput(bytes.NewReader(input)) + var out bytes.Buffer + trace.Debug("POST input: " + string(input)) + trace.Trace("running curl:", cmd.String()) + err = cmd.Run(&out, os.Stderr) + if err != nil { + if ee, ok := err.(*exec.ExitError); ok { + if ee.ProcessState.ExitCode() == 7 { + return "", fmt.Errorf("HTTP request to %s failed, could not connect", url) + } + } + return "", fmt.Errorf("HTTP request failed with curl %s", err.Error()) + } + return out.String(), err +} + +func commonCurlArgs() []string { + return []string{ + "-A", "vespa-cluster-state", + "--silent", + "--show-error", + "--connect-timeout", "30", + "--max-time", "1200", + "--write-out", "\n%{http_code}", + } +} diff --git a/client/go/internal/admin/clusterstate/set_node_state.go b/client/go/internal/admin/clusterstate/set_node_state.go new file mode 100644 index 00000000000..2a6869c84f5 --- /dev/null +++ b/client/go/internal/admin/clusterstate/set_node_state.go @@ -0,0 +1,155 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// code for the "vespa-set-node-state" command +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "encoding/json" + "fmt" + "os" + "strconv" + + "github.com/fatih/color" + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/client/go/internal/admin/envvars" + "github.com/vespa-engine/vespa/client/go/internal/admin/trace" + "github.com/vespa-engine/vespa/client/go/internal/cli/build" + "github.com/vespa-engine/vespa/client/go/internal/util" +) + +const ( + usageSetNodeState = `vespa-set-node-state [Options] [Description] + +Arguments: + Wanted State : User state to set. This must be one of up, down, maintenance or retired. + Description : Give a reason for why you are altering the user state, which will show up in various admin tools. (Use double quotes to give a reason + with whitespace in it)` + + longSetNodeState = `Set the user state of a node. This will set the generated state to the user state if the user state is "better" than the generated state that would +have been created if the user state was up. For instance, a node that is currently in initializing state can be forced into down state, while a node +that is currently down can not be forced into retired state, but can be forced into maintenance state.` +) + +func NewSetNodeStateCmd() *cobra.Command { + var ( + curOptions Options + ) + cmd := &cobra.Command{ + Use: usageSetNodeState, + Short: "vespa-set-node-state [Options] [Description]", + Long: longSetNodeState, + Version: build.Version, + Args: func(cmd *cobra.Command, args []string) error { + switch { + case len(args) < 1: + return fmt.Errorf("Missing ") + case len(args) > 2: + return fmt.Errorf("Too many arguments, maximum is 2") + } + _, err := knownState(args[0]) + return err + }, + CompletionOptions: cobra.CompletionOptions{DisableDefaultCmd: true}, + Run: func(cmd *cobra.Command, args []string) { + runSetNodeState(&curOptions, args) + }, + } + addCommonOptions(cmd, &curOptions) + cmd.Flags().BoolVarP(&curOptions.Force, "force", "f", false, + "Force execution") + cmd.Flags().BoolVarP(&curOptions.NoWait, "no-wait", "n", false, + "Do not wait for node state changes to be visible in the cluster before returning.") + cmd.Flags().BoolVarP(&curOptions.SafeMode, "safe", "a", false, + "Only carries out state changes if deemed safe by the cluster controller.") + cmd.Flags().StringVarP(&curOptions.NodeType, "type", "t", "", + "Node type - can either be 'storage' or 'distributor'. If not specified, the operation will set state for both types.") + cmd.Flags().IntVarP(&curOptions.NodeIndex, "index", "i", OnlyLocalNode, + "Node index. If not specified, all nodes found running on this host will be used.") + cmd.Flags().MarkHidden("no-wait") + return cmd +} + +func runSetNodeState(opts *Options, args []string) { + if opts.Silent { + trace.Silent() + } + if opts.NoColors || os.Getenv(envvars.TERM) == "" { + color.NoColor = true + } + wanted, err := knownState(args[0]) + if err != nil { + util.JustExitWith(err) + } + reason := "" + if len(args) > 1 { + reason = args[1] + } + if !opts.Force && wanted == StateMaintenance && opts.NodeType != "storage" { + fmt.Println(color.HiYellowString( + `Setting the distributor to maintenance mode may have severe consequences for feeding! +Please specify -t storage to only set the storage node to maintenance mode, or -f to override this error.`)) + return + } + m := detectModel(opts) + sss := m.findSelectedServices(opts) + if len(sss) == 0 { + fmt.Println(color.HiYellowString("Attempted setting of user state for no nodes")) + return + } + for _, s := range sss { + _, cc := m.getClusterState(s.cluster) + cc.setNodeUserState(s, wanted, reason, opts) + } +} + +type SetNodeStateJson struct { + State struct { + User StateAndReason `json:"user"` + } `json:"state"` + ResponseWait string `json:"response-wait,omitempty"` + Condition string `json:"condition,omitempty"` +} + +func splitResultCode(s string) (int, string) { + for idx := len(s); idx > 0; { + idx-- + if s[idx] == '\n' { + resCode, err := strconv.Atoi(s[idx+1:]) + if err != nil { + return -1, s + } + return resCode, s[:idx] + } + } + return -1, s +} + +func (cc *ClusterControllerSpec) setNodeUserState(s serviceSpec, wanted KnownState, reason string, opts *Options) error { + var request SetNodeStateJson + request.State.User.State = string(wanted) + request.State.User.Reason = reason + if opts.NoWait { + request.ResponseWait = "no-wait" + } + if opts.SafeMode { + request.Condition = "safe" + } + jsonBytes, err := json.Marshal(request) + if err != nil { + util.JustExitWith(err) + } + url := fmt.Sprintf("http://%s:%d/cluster/v2/%s/%s/%d", + cc.host, cc.port, + s.cluster, s.serviceType, s.index) + result, err := curlPost(url, jsonBytes) + resCode, output := splitResultCode(result) + if resCode < 200 || resCode >= 300 { + fmt.Println(color.HiYellowString("failed with HTTP code %d", resCode)) + fmt.Println(output) + } else { + fmt.Print(output, "OK\n") + } + return err +} diff --git a/client/go/internal/admin/clusterstate/show_hidden.go b/client/go/internal/admin/clusterstate/show_hidden.go new file mode 100644 index 00000000000..8c0ef61bf18 --- /dev/null +++ b/client/go/internal/admin/clusterstate/show_hidden.go @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Author: arnej + +// utilities to get and manipulate node states in a storage cluster +package clusterstate + +import ( + "strconv" + + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +// handle CLI flag --show-hidden + +type showHiddenFlag struct { + showHidden bool + cmd *cobra.Command +} + +func (v *showHiddenFlag) Type() string { + return "" +} + +func (v *showHiddenFlag) String() string { + return strconv.FormatBool(v.showHidden) +} + +func (v *showHiddenFlag) Set(val string) error { + b, err := strconv.ParseBool(val) + v.showHidden = b + v.cmd.Flags().VisitAll(func(f *pflag.Flag) { f.Hidden = false }) + return err +} + +func (v *showHiddenFlag) IsBoolFlag() bool { return true } diff --git a/client/go/internal/admin/script-utils/main.go b/client/go/internal/admin/script-utils/main.go index a6016c86291..b0bd260d082 100644 --- a/client/go/internal/admin/script-utils/main.go +++ b/client/go/internal/admin/script-utils/main.go @@ -8,13 +8,13 @@ import ( "os" "strings" + "github.com/vespa-engine/vespa/client/go/internal/admin/clusterstate" "github.com/vespa-engine/vespa/client/go/internal/admin/jvm" "github.com/vespa-engine/vespa/client/go/internal/admin/script-utils/configserver" "github.com/vespa-engine/vespa/client/go/internal/admin/script-utils/logfmt" "github.com/vespa-engine/vespa/client/go/internal/admin/script-utils/services" "github.com/vespa-engine/vespa/client/go/internal/admin/script-utils/standalone" "github.com/vespa-engine/vespa/client/go/internal/admin/script-utils/startcbinary" - "github.com/vespa-engine/vespa/client/go/internal/cli/cmd/clusterstate" "github.com/vespa-engine/vespa/client/go/internal/cli/cmd/deploy" "github.com/vespa-engine/vespa/client/go/internal/util" "github.com/vespa-engine/vespa/client/go/internal/vespa" -- cgit v1.2.3