aboutsummaryrefslogtreecommitdiffstats
path: root/configd/src/apps/sentinel/config-handler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'configd/src/apps/sentinel/config-handler.cpp')
-rw-r--r--configd/src/apps/sentinel/config-handler.cpp638
1 files changed, 638 insertions, 0 deletions
diff --git a/configd/src/apps/sentinel/config-handler.cpp b/configd/src/apps/sentinel/config-handler.cpp
new file mode 100644
index 00000000000..dedcfc5595a
--- /dev/null
+++ b/configd/src/apps/sentinel/config-handler.cpp
@@ -0,0 +1,638 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+#include <list>
+#include <algorithm>
+
+
+#include <vespa/log/log.h>
+LOG_SETUP(".config-handler");
+LOG_RCSID("$Id$");
+
+#include "config-handler.h"
+#include "service.h"
+#include "command-connection.h"
+#include "output-connection.h"
+
+#include <vespa/vespalib/net/simple_metric_snapshot.h>
+#include <vespa/vespalib/net/socket_address.h>
+
+namespace config {
+namespace sentinel {
+
+int
+ConfigHandler::listen(int port) {
+ auto handle = vespalib::SocketAddress::select_local(port).listen();
+ if (!handle) {
+ LOG(error, "Fatal: listen on command control socket failed: %s",
+ strerror(errno));
+ EV_STOPPING("config-sentinel", "listen on command control socket failed");
+ exit(EXIT_FAILURE);
+ }
+ int fd = handle.release();
+ fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ return fd;
+}
+
+void
+ConfigHandler::configure_port(int port)
+{
+ if (port == 0) {
+ port = 19098;
+ const char *portString = getenv("VESPA_SENTINEL_PORT");
+ if (portString) {
+ port = strtoul(portString, NULL, 10);
+ }
+ }
+ if (port <= 0 || port > 65535) {
+ LOG(error, "Fatal: bad port %d, expected range [1,65535]", port);
+ EV_STOPPING("config-sentinel", "bad port");
+ exit(EXIT_FAILURE);
+ }
+ LOG(debug, "Config-sentinel accepts connections on port %d", port);
+ close(_commandSocket);
+ _commandSocket = listen(port);
+ _boundPort = port;
+}
+
+ConfigHandler::ConfigHandler()
+ : _subscriber(),
+ _services(),
+ _connections(),
+ _outputConnections(),
+ _boundPort(0),
+ _commandSocket(listen(0)),
+ _startMetrics()
+{
+ _startMetrics.startedTime = time(NULL);
+}
+
+ConfigHandler::~ConfigHandler()
+{
+ terminateServices(false);
+ std::list<CommandConnection *>::iterator i;
+ for (i = _connections.begin(); i != _connections.end(); ++i)
+ {
+ delete *i;
+ }
+ std::list<OutputConnection *>::iterator it;
+ for (it = _outputConnections.begin(); it != _outputConnections.end(); ++it)
+ {
+ delete *it;
+ }
+ close(_commandSocket);
+}
+
+void
+ConfigHandler::terminateServices(bool catchable, bool printDebug)
+{
+ for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) {
+ Service::LP service = it->second;
+ if (printDebug && service->isRunning()) {
+ LOG(info, "%s: killing", service->name().c_str());
+ }
+ service->terminate(catchable);
+ }
+}
+
+
+bool
+ConfigHandler::terminate()
+{
+ // Call terminate(true) for all services.
+ // Give them 58 seconds to exit cleanly, then terminate(false) all
+ // of them.
+ terminateServices(true);
+ struct timeval endTime;
+ gettimeofday(&endTime, NULL);
+ endTime.tv_sec += 58;
+ struct timeval tv = {0, 0};
+
+ while (tv.tv_sec >= 0 && doWork()) {
+ gettimeofday(&tv, NULL);
+ tv.tv_sec = endTime.tv_sec - tv.tv_sec;
+ tv.tv_usec = endTime.tv_usec - tv.tv_usec;
+
+ if (tv.tv_usec >= 1000000) {
+ tv.tv_usec -= 1000000;
+ tv.tv_sec += 1;
+ } else if (tv.tv_usec < 0) {
+ tv.tv_usec += 100000;
+ tv.tv_sec -= 1;
+ }
+
+ if (tv.tv_sec < 0) {
+ break;
+ }
+
+ if (tv.tv_sec > 0 || tv.tv_usec > 200000) {
+ // Never wait more than 200ms per select regardless
+ tv.tv_sec = 0;
+ tv.tv_usec = 200000;
+ }
+
+ // Any child exiting will send SIGCHLD and break this select so
+ // we handle the children exiting even quicker..
+ select(0, NULL, NULL, NULL, &tv);
+ }
+ for (int retry = 0; retry < 10 && doWork(); ++retry) {
+ LOG(warning, "some services refuse to terminate cleanly, sending KILL");
+ terminateServices(false, true);
+ }
+ return !doWork();
+}
+
+void
+ConfigHandler::subscribe(const std::string & configId)
+{
+ _sentinelHandle = _subscriber.subscribe<SentinelConfig>(configId);
+}
+
+void
+ConfigHandler::doConfigure()
+{
+ std::unique_ptr<SentinelConfig> cfg(_sentinelHandle->getConfig());
+ const SentinelConfig& config(*cfg);
+
+ if (config.port.telnet != _boundPort) {
+ configure_port(config.port.telnet);
+ _stateApi.bound(_boundPort);
+ }
+
+ LOG(debug, "ConfigHandler::configure() %d config elements, tenant(%s), application(%s), instance(%s)",
+ (int)config.service.size(), config.application.tenant.c_str(), config.application.name.c_str(),
+ config.application.instance.c_str());
+ ServiceMap services;
+ for (unsigned int i = 0; i < config.service.size(); ++i) {
+ const SentinelConfig::Service& serviceConfig = config.service[i];
+ const vespalib::string name(serviceConfig.name);
+ ServiceMap::iterator found(_services.find(name));
+ if (found == _services.end()) {
+ services[name] = Service::LP(new Service(serviceConfig, config.application, _outputConnections, _startMetrics));
+ } else {
+ services[name] = found->second;
+ found->second->reconfigure(serviceConfig);
+ }
+ }
+ stopOldServicesNotInMap(services);
+ _services.swap(services);
+ vespalib::ComponentConfigProducer::Config current("sentinel", _subscriber.getGeneration(), "ok");
+ _stateApi.myComponents.addConfig(current);
+}
+
+void
+ConfigHandler::stopOldServicesNotInMap(const ServiceMap & newServices)
+{
+ for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) {
+ const vespalib::string & key(it->first);
+ if (newServices.find(key) == newServices.end()) {
+ Service::LP service = it->second;
+ if (service->isRunning()) {
+ service->terminate(true);
+ }
+ }
+ }
+}
+
+
+int
+ConfigHandler::doWork()
+{
+ // Return true if there are any running services, false if not.
+
+ if (_subscriber.nextGeneration(0)) {
+ doConfigure();
+ }
+
+ handleCommands();
+ handleOutputs();
+ handleChildDeaths();
+ _startMetrics.maybeLog();
+
+ // Check for active services.
+ for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) {
+ if (it->second->isRunning()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+void
+ConfigHandler::handleChildDeaths()
+{
+ // See if any of our child processes have exited, and take
+ // the appropriate action.
+ int status;
+ pid_t pid;
+ while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
+ // A child process has exited. find it.
+ Service::LP service = serviceByPid(pid);
+ if (service.get() != NULL) {
+ LOG(debug, "pid %d finished, Service:%s", (int)pid,
+ service->name().c_str());
+ service->youExited(status);
+ } else {
+ LOG(warning, "Unknown child pid %d exited (wait-status = %d)",
+ (int)pid, status);
+ EV_STOPPED("unknown", pid, status);
+ }
+ }
+}
+
+void
+ConfigHandler::updateActiveFdset(fd_set *fds, int *maxNum)
+{
+ std::list<OutputConnection *>::const_iterator
+ src = _outputConnections.begin();
+ // ### _Possibly put an assert here if fd is > 1023???
+ while (src != _outputConnections.end()) {
+ OutputConnection *c = *src;
+ ++src;
+ int fd = c->fd();
+ if (fd >= 0) {
+ FD_SET(fd, fds);
+ if (fd >= *maxNum) {
+ *maxNum = fd + 1;
+ }
+ }
+ }
+ FD_SET(_commandSocket, fds);
+ if (_commandSocket >= *maxNum) {
+ *maxNum = _commandSocket + 1;
+ }
+
+ std::list<CommandConnection *>::const_iterator
+ connections = _connections.begin();
+
+ while (connections != _connections.end()) {
+ CommandConnection *c = *connections;
+ ++connections;
+ int fd = c->fd();
+ if (fd != -1) {
+ FD_SET(fd, fds);
+ if (fd >= *maxNum) {
+ *maxNum = fd + 1;
+ }
+ }
+ }
+}
+
+void
+ConfigHandler::handleOutputs()
+{
+ std::list<OutputConnection *>::iterator dst;
+ std::list<OutputConnection *>::const_iterator src;
+
+ src = _outputConnections.begin();
+ dst = _outputConnections.begin();
+ while (src != _outputConnections.end()) {
+ OutputConnection *c = *src;
+ ++src;
+ c->handleOutput();
+ if (c->isFinished()) {
+ LOG(debug, "Output is finished...");
+ delete c;
+ } else {
+ *dst = c;
+ ++dst;
+ }
+ }
+ _outputConnections.erase(dst, _outputConnections.end());
+}
+
+void
+ConfigHandler::handleCommands()
+{
+ // Accept new command connections, and read commands.
+ int fd;
+ struct sockaddr_storage sad;
+ socklen_t sadLen = sizeof(sad);
+ while ((fd = accept(_commandSocket,
+ reinterpret_cast<struct sockaddr *>(&sad),
+ &sadLen)) >= 0)
+ {
+ LOG(debug, "Got new command connection!");
+ fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
+ CommandConnection *c = new CommandConnection(fd);
+ _connections.push_back(c);
+ }
+
+ std::list<CommandConnection *>::iterator dst;
+ std::list<CommandConnection *>::const_iterator src;
+
+ src = _connections.begin();
+ dst = _connections.begin();
+ while (src != _connections.end()) {
+ CommandConnection *c = *src;
+ ++src;
+ handleCommand(c);
+ if (c->isFinished()) {
+ LOG(debug, "Connection is finished..");
+ delete c;
+ } else {
+ *dst = c;
+ ++dst;
+ }
+ }
+ _connections.erase(dst, _connections.end());
+}
+
+Service::LP
+ConfigHandler::serviceByPid(pid_t pid)
+{
+ for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) {
+ Service::LP service = it->second;
+ if (service->pid() == pid) {
+ return service;
+ }
+ }
+ return Service::LP(NULL);
+}
+
+Service::LP
+ConfigHandler::serviceByName(const vespalib::string & name)
+{
+ ServiceMap::iterator found(_services.find(name));
+ if (found != _services.end()) {
+ return found->second;
+ }
+ return Service::LP(NULL);
+}
+
+
+void
+splitCommand(char *line, char *&cmd, char *&args)
+{
+ cmd = line;
+ while (*line && !isspace(*line)) {
+ *line = tolower(*line);
+ ++line;
+ }
+ if (*line) {
+ *line++ = '\0';
+ while (*line && isspace(*line)) {
+ ++line;
+ }
+ }
+ args = line;
+}
+
+
+void
+ConfigHandler::handleCommand(CommandConnection *c)
+{
+ while (char *line = c->getCommand()) {
+ LOG(debug, "Got command from connection: '%s'", line);
+
+ char *cmd, *args;
+ splitCommand(line, cmd, args);
+ LOG(debug, "Command is '%s', args is '%s'", cmd, args);
+ if (strcmp(cmd, "ls") == 0) {
+ doLs(c, args);
+ } else if (strcmp(cmd, "get") == 0) {
+ doGet(c, args);
+ } else if (strcmp(cmd, "restart") == 0) {
+ doRestart(c, args);
+ } else if (strcmp(cmd, "forcerestart") == 0) {
+ doRestart(c, args, true);
+ } else if (strcmp(cmd, "start") == 0) {
+ doStart(c, args);
+ } else if (strcmp(cmd, "stop") == 0) {
+ doStop(c, args);
+ } else if (strcmp(cmd, "forcestop") == 0) {
+ doStop(c, args, true);
+ } else if (strcmp(cmd, "auto") == 0) {
+ doAuto(c, args);
+ } else if (strcmp(cmd, "manual") == 0) {
+ doManual(c, args);
+ } else if (strcmp(cmd, "quit") == 0) {
+ doQuit(c, args);
+ } else {
+ c->printf("ERROR: Unknown cmd '%s' "
+ "(ls/restart/start/stop/auto/manual/quit)\n", cmd);
+ }
+ }
+}
+
+void
+ConfigHandler::updateMetrics()
+{
+ vespalib::SimpleMetricSnapshot snapshot(_startMetrics.snapshotStart, _startMetrics.snapshotEnd);
+ snapshot.addCount("sentinel.restarts", "how many times sentinel restarted a service",
+ _startMetrics.totalRestartsLastSnapshot);
+ snapshot.addGauge("sentinel.running", "how many services the sentinel has running currently",
+ _startMetrics.currentlyRunningServices);
+ _stateApi.myMetrics.setMetrics(snapshot.asString());
+
+ vespalib::SimpleMetricSnapshot totals(_startMetrics.startedTime, time(NULL));
+ totals.addCount("sentinel.restarts", "how many times sentinel restarted a service",
+ _startMetrics.totalRestartsCounter);
+ totals.addGauge("sentinel.running", "how many services the sentinel has running currently",
+ _startMetrics.currentlyRunningServices);
+ _stateApi.myMetrics.setTotalMetrics(totals.asString());
+
+}
+
+void
+ConfigHandler::doGet(CommandConnection *c, char *args)
+{
+ char *path, *extra;
+ splitCommand(args, path, extra);
+ if (path[0] == '/') {
+ updateMetrics();
+ vespalib::string response = _stateApi.get(path);
+ if (response.size() > 0) {
+ c->printf("HTTP/1.0 200 OK\r\n"
+ "Content-Type: application/json; charset=ASCII\r\n\r\n");
+ c->printf("%s", response.c_str());
+ c->printf("\r\n");
+ } else {
+ c->printf("HTTP/1.0 404 Not found\r\n"
+ "Content-Type: text/plain; charset=ASCII\r\n\r\n"
+ "This web server only has metrics\r\n");
+ }
+ } else {
+ c->printf("HTTP/1.0 400 Bad URL\r\nContent-Type: text/plain; charset=ASCII\r\n\r\nThis web server only has metrics\r\n");
+ }
+ c->finish();
+ while (! c->isFinished()) {
+ c->getCommand();
+ }
+}
+
+void
+ConfigHandler::doLs(CommandConnection *c, char *args)
+{
+ for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) {
+ Service::LP service = it->second;
+ if (*args && strcmp(args, service->name().c_str()) != 0) {
+ continue;
+ }
+ const SentinelConfig::Service& config = service->serviceConfig();
+ c->printf("%s state=%s mode=%s pid=%d exitstatus=%d "
+ "autostart=%s autorestart=%s id=\"%s\"\n",
+ service->name().c_str(), service->stateName(),
+ service->isAutomatic() ? "AUTO" : "MANUAL",
+ service->pid(), service->exitStatus(),
+ config.autostart ? "TRUE" : "FALSE",
+ config.autorestart ? "TRUE" : "FALSE",
+ config.id.c_str());
+ }
+ c->printf("\n");
+}
+
+void
+ConfigHandler::doQuit(CommandConnection *c, char *)
+{
+ c->printf("Exiting.\n");
+ c->finish();
+}
+
+void
+ConfigHandler::doStart(CommandConnection *c, char *args)
+{
+ Service::LP service = serviceByName(args);
+ if (service.get() == NULL) {
+ c->printf("Cannot find any service named '%s'\n", args);
+ return;
+ }
+
+ if (service->isRunning()) {
+ c->printf("ERROR: %s is already running as pid %d!\n", args,
+ service->pid());
+ } else {
+ service->resetRestartPenalty();
+ service->start();
+ c->printf("%s started as pid %d, mode=%s\n", args, service->pid(),
+ service->isAutomatic() ? "AUTO" : "MANUAL");
+ }
+}
+
+void
+ConfigHandler::doRestart(CommandConnection *c, char *args)
+{
+ doRestart(c, args, false);
+}
+
+void
+ConfigHandler::doRestart(CommandConnection *c, char *args, bool force)
+{
+ Service::LP service = serviceByName(args);
+ if (service.get() == NULL) {
+ c->printf("Cannot find any service named '%s'\n", args);
+ return;
+ }
+
+ if (!service->isRunning()) {
+ service->resetRestartPenalty();
+ service->start();
+ c->printf("%s started as pid %d, mode=%s\n", args, service->pid(),
+ service->isAutomatic() ? "AUTO" : "MANUAL");
+ return;
+ }
+
+ if (!service->isAutomatic()) {
+ c->printf("ERROR: %s is in MANUAL mode, use stop+start\n", args);
+ return;
+ }
+ const SentinelConfig::Service& config = service->serviceConfig();
+ if (!config.autorestart) {
+ c->printf("ERROR: %s does not autorestart, use stop+start\n", args);
+ return;
+ }
+ c->printf("terminating service %s pid %d, will be autorestarted\n",
+ args, service->pid());
+ service->terminate(!force);
+}
+
+void
+ConfigHandler::doStop(CommandConnection *c, char *args)
+{
+ doStop(c, args, false);
+}
+
+void
+ConfigHandler::doStop(CommandConnection *c, char *args, bool force)
+{
+ Service::LP service = serviceByName(args);
+ if (service.get() == NULL) {
+ c->printf("Cannot find any service named '%s'\n", args);
+ return;
+ }
+
+ if (!service->isRunning()) {
+ c->printf("%s is not running, it is in state %s. Cannot stop.\n",
+ service->name().c_str(), service->stateName());
+ return;
+ }
+ const SentinelConfig::Service& config = service->serviceConfig();
+ if (service->isAutomatic() && config.autorestart) {
+ c->printf("ERROR: %s in AUTO mode. Use restart, or manual+stop.\n",
+ args);
+ return;
+ }
+ c->printf("Stopping %s.\n", args);
+ service->terminate(!force);
+}
+
+void
+ConfigHandler::doAuto(CommandConnection *c, char *args)
+{
+ Service::LP service = serviceByName(args);
+ if (service.get() == NULL) {
+ c->printf("Cannot find any service named '%s'\n", args);
+ return;
+ }
+
+ if (service->isAutomatic()) {
+ c->printf("%s is already automatic.\n", args);
+ } else {
+ service->setAutomatic(true);
+ const SentinelConfig::Service& config = service->serviceConfig();
+ if (service->isRunning()) {
+ c->printf("%s is now automatic again (and running).\n", args);
+ } else if (config.autostart || config.autorestart) {
+ service->start();
+ c->printf("%s is now automatic again (and started).\n", args);
+ } else {
+ c->printf("%s is now automatic again (but not started)\n", args);
+ }
+ }
+}
+
+
+void
+ConfigHandler::doManual(CommandConnection *c, char *args)
+{
+ Service::LP service = serviceByName(args);
+ if (service.get() == NULL) {
+ c->printf("Cannot find any service named '%s'\n", args);
+ return;
+ }
+
+ if (!service->isAutomatic()) {
+ c->printf("%s is already manual.\n", args);
+ } else {
+ service->setAutomatic(false);
+ if (service->isRunning()) {
+ c->printf("%s is now manual (but still running).\n", args);
+ } else {
+ c->printf("%s is now manual).\n", args);
+ }
+ }
+}
+
+
+} // end namespace config::sentinel
+} // end namespace config