diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /configd |
Publish
Diffstat (limited to 'configd')
40 files changed, 2008 insertions, 0 deletions
diff --git a/configd/.gitignore b/configd/.gitignore new file mode 100644 index 00000000000..a9b20e8992d --- /dev/null +++ b/configd/.gitignore @@ -0,0 +1,2 @@ +Makefile +Testing diff --git a/configd/AUTHORS b/configd/AUTHORS new file mode 100644 index 00000000000..f7c84236a0f --- /dev/null +++ b/configd/AUTHORS @@ -0,0 +1,3 @@ +Finn Arne Gangstad <finnag@yahoo-inc.com> +Lars Christian Jensen <Lars.Jensen@europe.yahoo-inc.com> +Arne Henrik Juul <arnej27959> diff --git a/configd/CMakeLists.txt b/configd/CMakeLists.txt new file mode 100644 index 00000000000..66be2fde277 --- /dev/null +++ b/configd/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_define_module( + APPS + src/apps/sentinel + src/apps/su + + TESTS + src/tests/messages + src/tests/configd +) diff --git a/configd/OWNERS b/configd/OWNERS new file mode 100644 index 00000000000..67cd2820bb8 --- /dev/null +++ b/configd/OWNERS @@ -0,0 +1 @@ +arnej27959 diff --git a/configd/README b/configd/README new file mode 100644 index 00000000000..abf0612d76d --- /dev/null +++ b/configd/README @@ -0,0 +1 @@ +VESPA Config System local daemons diff --git a/configd/src/.gitignore b/configd/src/.gitignore new file mode 100644 index 00000000000..2129b57c8a4 --- /dev/null +++ b/configd/src/.gitignore @@ -0,0 +1,4 @@ +Makefile.ini +config_command.sh +configd.mak +project.dsw diff --git a/configd/src/apps/sentinel/.gitignore b/configd/src/apps/sentinel/.gitignore new file mode 100644 index 00000000000..bc87d9cebdc --- /dev/null +++ b/configd/src/apps/sentinel/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +config-sentinel diff --git a/configd/src/apps/sentinel/CMakeLists.txt b/configd/src/apps/sentinel/CMakeLists.txt new file mode 100644 index 00000000000..27a5d052d72 --- /dev/null +++ b/configd/src/apps/sentinel/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(configd_config-sentinel_app + SOURCES + sentinel.cpp + service.cpp + config-handler.cpp + line-splitter.cpp + command-connection.cpp + output-connection.cpp + metrics.cpp + state-api.cpp + OUTPUT_NAME config-sentinel + INSTALL sbin + DEPENDS + configdefinitions +) diff --git a/configd/src/apps/sentinel/command-connection.cpp b/configd/src/apps/sentinel/command-connection.cpp new file mode 100644 index 00000000000..9b35d801ecb --- /dev/null +++ b/configd/src/apps/sentinel/command-connection.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <unistd.h> +#include <stdarg.h> +#include <cstdio> +#include <cstring> +#include <sys/socket.h> + +#include "command-connection.h" +#include "line-splitter.h" + +namespace config { +namespace sentinel { + +CommandConnection::CommandConnection(int f) + : _fd(f), + _lines(f) +{ +} + +bool +CommandConnection::isFinished() const +{ + return _lines.eof(); +} + +char * +CommandConnection::getCommand() +{ + return _lines.getLine(); +} + +CommandConnection::~CommandConnection() +{ + close(_fd); +} + +void +CommandConnection::finish() +{ + ::shutdown(_fd, SHUT_RDWR); +} + +int +CommandConnection::printf(const char *fmt, ...) +{ + char buf[10000]; + va_list args; + va_start(args, fmt); + + int ret = vsnprintf(buf, sizeof buf, fmt, args); + va_end(args); + + write(_fd, buf, strlen(buf)); + return ret; +} + +} // end namespace config::sentinel +} // end namespace config diff --git a/configd/src/apps/sentinel/command-connection.h b/configd/src/apps/sentinel/command-connection.h new file mode 100644 index 00000000000..355ee4e72b1 --- /dev/null +++ b/configd/src/apps/sentinel/command-connection.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "line-splitter.h" + +namespace config { +namespace sentinel { + +class CommandConnection { +private: + int _fd; + LineSplitter _lines; + + // Unused constructors/assignment operator: + CommandConnection(); + CommandConnection(const CommandConnection&); + CommandConnection& operator =(const CommandConnection&); + +public: + explicit CommandConnection(int fd); + ~CommandConnection(); + bool isFinished() const; + char *getCommand(); + int printf(const char *fmt, ...) __attribute__((format(printf, 2, 3))); + void finish(); + int fd() const { return _fd; } +}; + +} // end namespace sentinel +} // end namespace config + diff --git a/configd/src/apps/sentinel/config-handler.cpp b/configd/src/apps/sentinel/config-handler.cpp new file mode 100644 index 00000000000..dedcfc5595a --- /dev/null +++ b/configd/src/apps/sentinel/config-handler.cpp @@ -0,0 +1,638 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <fcntl.h> +#include <ctype.h> + +#include <list> +#include <algorithm> + + +#include <vespa/log/log.h> +LOG_SETUP(".config-handler"); +LOG_RCSID("$Id$"); + +#include "config-handler.h" +#include "service.h" +#include "command-connection.h" +#include "output-connection.h" + +#include <vespa/vespalib/net/simple_metric_snapshot.h> +#include <vespa/vespalib/net/socket_address.h> + +namespace config { +namespace sentinel { + +int +ConfigHandler::listen(int port) { + auto handle = vespalib::SocketAddress::select_local(port).listen(); + if (!handle) { + LOG(error, "Fatal: listen on command control socket failed: %s", + strerror(errno)); + EV_STOPPING("config-sentinel", "listen on command control socket failed"); + exit(EXIT_FAILURE); + } + int fd = handle.release(); + fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK); + fcntl(fd, F_SETFD, FD_CLOEXEC); + return fd; +} + +void +ConfigHandler::configure_port(int port) +{ + if (port == 0) { + port = 19098; + const char *portString = getenv("VESPA_SENTINEL_PORT"); + if (portString) { + port = strtoul(portString, NULL, 10); + } + } + if (port <= 0 || port > 65535) { + LOG(error, "Fatal: bad port %d, expected range [1,65535]", port); + EV_STOPPING("config-sentinel", "bad port"); + exit(EXIT_FAILURE); + } + LOG(debug, "Config-sentinel accepts connections on port %d", port); + close(_commandSocket); + _commandSocket = listen(port); + _boundPort = port; +} + +ConfigHandler::ConfigHandler() + : _subscriber(), + _services(), + _connections(), + _outputConnections(), + _boundPort(0), + _commandSocket(listen(0)), + _startMetrics() +{ + _startMetrics.startedTime = time(NULL); +} + +ConfigHandler::~ConfigHandler() +{ + terminateServices(false); + std::list<CommandConnection *>::iterator i; + for (i = _connections.begin(); i != _connections.end(); ++i) + { + delete *i; + } + std::list<OutputConnection *>::iterator it; + for (it = _outputConnections.begin(); it != _outputConnections.end(); ++it) + { + delete *it; + } + close(_commandSocket); +} + +void +ConfigHandler::terminateServices(bool catchable, bool printDebug) +{ + for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) { + Service::LP service = it->second; + if (printDebug && service->isRunning()) { + LOG(info, "%s: killing", service->name().c_str()); + } + service->terminate(catchable); + } +} + + +bool +ConfigHandler::terminate() +{ + // Call terminate(true) for all services. + // Give them 58 seconds to exit cleanly, then terminate(false) all + // of them. + terminateServices(true); + struct timeval endTime; + gettimeofday(&endTime, NULL); + endTime.tv_sec += 58; + struct timeval tv = {0, 0}; + + while (tv.tv_sec >= 0 && doWork()) { + gettimeofday(&tv, NULL); + tv.tv_sec = endTime.tv_sec - tv.tv_sec; + tv.tv_usec = endTime.tv_usec - tv.tv_usec; + + if (tv.tv_usec >= 1000000) { + tv.tv_usec -= 1000000; + tv.tv_sec += 1; + } else if (tv.tv_usec < 0) { + tv.tv_usec += 100000; + tv.tv_sec -= 1; + } + + if (tv.tv_sec < 0) { + break; + } + + if (tv.tv_sec > 0 || tv.tv_usec > 200000) { + // Never wait more than 200ms per select regardless + tv.tv_sec = 0; + tv.tv_usec = 200000; + } + + // Any child exiting will send SIGCHLD and break this select so + // we handle the children exiting even quicker.. + select(0, NULL, NULL, NULL, &tv); + } + for (int retry = 0; retry < 10 && doWork(); ++retry) { + LOG(warning, "some services refuse to terminate cleanly, sending KILL"); + terminateServices(false, true); + } + return !doWork(); +} + +void +ConfigHandler::subscribe(const std::string & configId) +{ + _sentinelHandle = _subscriber.subscribe<SentinelConfig>(configId); +} + +void +ConfigHandler::doConfigure() +{ + std::unique_ptr<SentinelConfig> cfg(_sentinelHandle->getConfig()); + const SentinelConfig& config(*cfg); + + if (config.port.telnet != _boundPort) { + configure_port(config.port.telnet); + _stateApi.bound(_boundPort); + } + + LOG(debug, "ConfigHandler::configure() %d config elements, tenant(%s), application(%s), instance(%s)", + (int)config.service.size(), config.application.tenant.c_str(), config.application.name.c_str(), + config.application.instance.c_str()); + ServiceMap services; + for (unsigned int i = 0; i < config.service.size(); ++i) { + const SentinelConfig::Service& serviceConfig = config.service[i]; + const vespalib::string name(serviceConfig.name); + ServiceMap::iterator found(_services.find(name)); + if (found == _services.end()) { + services[name] = Service::LP(new Service(serviceConfig, config.application, _outputConnections, _startMetrics)); + } else { + services[name] = found->second; + found->second->reconfigure(serviceConfig); + } + } + stopOldServicesNotInMap(services); + _services.swap(services); + vespalib::ComponentConfigProducer::Config current("sentinel", _subscriber.getGeneration(), "ok"); + _stateApi.myComponents.addConfig(current); +} + +void +ConfigHandler::stopOldServicesNotInMap(const ServiceMap & newServices) +{ + for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) { + const vespalib::string & key(it->first); + if (newServices.find(key) == newServices.end()) { + Service::LP service = it->second; + if (service->isRunning()) { + service->terminate(true); + } + } + } +} + + +int +ConfigHandler::doWork() +{ + // Return true if there are any running services, false if not. + + if (_subscriber.nextGeneration(0)) { + doConfigure(); + } + + handleCommands(); + handleOutputs(); + handleChildDeaths(); + _startMetrics.maybeLog(); + + // Check for active services. + for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) { + if (it->second->isRunning()) { + return true; + } + } + return false; +} + + +void +ConfigHandler::handleChildDeaths() +{ + // See if any of our child processes have exited, and take + // the appropriate action. + int status; + pid_t pid; + while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { + // A child process has exited. find it. + Service::LP service = serviceByPid(pid); + if (service.get() != NULL) { + LOG(debug, "pid %d finished, Service:%s", (int)pid, + service->name().c_str()); + service->youExited(status); + } else { + LOG(warning, "Unknown child pid %d exited (wait-status = %d)", + (int)pid, status); + EV_STOPPED("unknown", pid, status); + } + } +} + +void +ConfigHandler::updateActiveFdset(fd_set *fds, int *maxNum) +{ + std::list<OutputConnection *>::const_iterator + src = _outputConnections.begin(); + // ### _Possibly put an assert here if fd is > 1023??? + while (src != _outputConnections.end()) { + OutputConnection *c = *src; + ++src; + int fd = c->fd(); + if (fd >= 0) { + FD_SET(fd, fds); + if (fd >= *maxNum) { + *maxNum = fd + 1; + } + } + } + FD_SET(_commandSocket, fds); + if (_commandSocket >= *maxNum) { + *maxNum = _commandSocket + 1; + } + + std::list<CommandConnection *>::const_iterator + connections = _connections.begin(); + + while (connections != _connections.end()) { + CommandConnection *c = *connections; + ++connections; + int fd = c->fd(); + if (fd != -1) { + FD_SET(fd, fds); + if (fd >= *maxNum) { + *maxNum = fd + 1; + } + } + } +} + +void +ConfigHandler::handleOutputs() +{ + std::list<OutputConnection *>::iterator dst; + std::list<OutputConnection *>::const_iterator src; + + src = _outputConnections.begin(); + dst = _outputConnections.begin(); + while (src != _outputConnections.end()) { + OutputConnection *c = *src; + ++src; + c->handleOutput(); + if (c->isFinished()) { + LOG(debug, "Output is finished..."); + delete c; + } else { + *dst = c; + ++dst; + } + } + _outputConnections.erase(dst, _outputConnections.end()); +} + +void +ConfigHandler::handleCommands() +{ + // Accept new command connections, and read commands. + int fd; + struct sockaddr_storage sad; + socklen_t sadLen = sizeof(sad); + while ((fd = accept(_commandSocket, + reinterpret_cast<struct sockaddr *>(&sad), + &sadLen)) >= 0) + { + LOG(debug, "Got new command connection!"); + fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK); + CommandConnection *c = new CommandConnection(fd); + _connections.push_back(c); + } + + std::list<CommandConnection *>::iterator dst; + std::list<CommandConnection *>::const_iterator src; + + src = _connections.begin(); + dst = _connections.begin(); + while (src != _connections.end()) { + CommandConnection *c = *src; + ++src; + handleCommand(c); + if (c->isFinished()) { + LOG(debug, "Connection is finished.."); + delete c; + } else { + *dst = c; + ++dst; + } + } + _connections.erase(dst, _connections.end()); +} + +Service::LP +ConfigHandler::serviceByPid(pid_t pid) +{ + for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) { + Service::LP service = it->second; + if (service->pid() == pid) { + return service; + } + } + return Service::LP(NULL); +} + +Service::LP +ConfigHandler::serviceByName(const vespalib::string & name) +{ + ServiceMap::iterator found(_services.find(name)); + if (found != _services.end()) { + return found->second; + } + return Service::LP(NULL); +} + + +void +splitCommand(char *line, char *&cmd, char *&args) +{ + cmd = line; + while (*line && !isspace(*line)) { + *line = tolower(*line); + ++line; + } + if (*line) { + *line++ = '\0'; + while (*line && isspace(*line)) { + ++line; + } + } + args = line; +} + + +void +ConfigHandler::handleCommand(CommandConnection *c) +{ + while (char *line = c->getCommand()) { + LOG(debug, "Got command from connection: '%s'", line); + + char *cmd, *args; + splitCommand(line, cmd, args); + LOG(debug, "Command is '%s', args is '%s'", cmd, args); + if (strcmp(cmd, "ls") == 0) { + doLs(c, args); + } else if (strcmp(cmd, "get") == 0) { + doGet(c, args); + } else if (strcmp(cmd, "restart") == 0) { + doRestart(c, args); + } else if (strcmp(cmd, "forcerestart") == 0) { + doRestart(c, args, true); + } else if (strcmp(cmd, "start") == 0) { + doStart(c, args); + } else if (strcmp(cmd, "stop") == 0) { + doStop(c, args); + } else if (strcmp(cmd, "forcestop") == 0) { + doStop(c, args, true); + } else if (strcmp(cmd, "auto") == 0) { + doAuto(c, args); + } else if (strcmp(cmd, "manual") == 0) { + doManual(c, args); + } else if (strcmp(cmd, "quit") == 0) { + doQuit(c, args); + } else { + c->printf("ERROR: Unknown cmd '%s' " + "(ls/restart/start/stop/auto/manual/quit)\n", cmd); + } + } +} + +void +ConfigHandler::updateMetrics() +{ + vespalib::SimpleMetricSnapshot snapshot(_startMetrics.snapshotStart, _startMetrics.snapshotEnd); + snapshot.addCount("sentinel.restarts", "how many times sentinel restarted a service", + _startMetrics.totalRestartsLastSnapshot); + snapshot.addGauge("sentinel.running", "how many services the sentinel has running currently", + _startMetrics.currentlyRunningServices); + _stateApi.myMetrics.setMetrics(snapshot.asString()); + + vespalib::SimpleMetricSnapshot totals(_startMetrics.startedTime, time(NULL)); + totals.addCount("sentinel.restarts", "how many times sentinel restarted a service", + _startMetrics.totalRestartsCounter); + totals.addGauge("sentinel.running", "how many services the sentinel has running currently", + _startMetrics.currentlyRunningServices); + _stateApi.myMetrics.setTotalMetrics(totals.asString()); + +} + +void +ConfigHandler::doGet(CommandConnection *c, char *args) +{ + char *path, *extra; + splitCommand(args, path, extra); + if (path[0] == '/') { + updateMetrics(); + vespalib::string response = _stateApi.get(path); + if (response.size() > 0) { + c->printf("HTTP/1.0 200 OK\r\n" + "Content-Type: application/json; charset=ASCII\r\n\r\n"); + c->printf("%s", response.c_str()); + c->printf("\r\n"); + } else { + c->printf("HTTP/1.0 404 Not found\r\n" + "Content-Type: text/plain; charset=ASCII\r\n\r\n" + "This web server only has metrics\r\n"); + } + } else { + c->printf("HTTP/1.0 400 Bad URL\r\nContent-Type: text/plain; charset=ASCII\r\n\r\nThis web server only has metrics\r\n"); + } + c->finish(); + while (! c->isFinished()) { + c->getCommand(); + } +} + +void +ConfigHandler::doLs(CommandConnection *c, char *args) +{ + for (ServiceMap::iterator it(_services.begin()), mt(_services.end()); it != mt; it++) { + Service::LP service = it->second; + if (*args && strcmp(args, service->name().c_str()) != 0) { + continue; + } + const SentinelConfig::Service& config = service->serviceConfig(); + c->printf("%s state=%s mode=%s pid=%d exitstatus=%d " + "autostart=%s autorestart=%s id=\"%s\"\n", + service->name().c_str(), service->stateName(), + service->isAutomatic() ? "AUTO" : "MANUAL", + service->pid(), service->exitStatus(), + config.autostart ? "TRUE" : "FALSE", + config.autorestart ? "TRUE" : "FALSE", + config.id.c_str()); + } + c->printf("\n"); +} + +void +ConfigHandler::doQuit(CommandConnection *c, char *) +{ + c->printf("Exiting.\n"); + c->finish(); +} + +void +ConfigHandler::doStart(CommandConnection *c, char *args) +{ + Service::LP service = serviceByName(args); + if (service.get() == NULL) { + c->printf("Cannot find any service named '%s'\n", args); + return; + } + + if (service->isRunning()) { + c->printf("ERROR: %s is already running as pid %d!\n", args, + service->pid()); + } else { + service->resetRestartPenalty(); + service->start(); + c->printf("%s started as pid %d, mode=%s\n", args, service->pid(), + service->isAutomatic() ? "AUTO" : "MANUAL"); + } +} + +void +ConfigHandler::doRestart(CommandConnection *c, char *args) +{ + doRestart(c, args, false); +} + +void +ConfigHandler::doRestart(CommandConnection *c, char *args, bool force) +{ + Service::LP service = serviceByName(args); + if (service.get() == NULL) { + c->printf("Cannot find any service named '%s'\n", args); + return; + } + + if (!service->isRunning()) { + service->resetRestartPenalty(); + service->start(); + c->printf("%s started as pid %d, mode=%s\n", args, service->pid(), + service->isAutomatic() ? "AUTO" : "MANUAL"); + return; + } + + if (!service->isAutomatic()) { + c->printf("ERROR: %s is in MANUAL mode, use stop+start\n", args); + return; + } + const SentinelConfig::Service& config = service->serviceConfig(); + if (!config.autorestart) { + c->printf("ERROR: %s does not autorestart, use stop+start\n", args); + return; + } + c->printf("terminating service %s pid %d, will be autorestarted\n", + args, service->pid()); + service->terminate(!force); +} + +void +ConfigHandler::doStop(CommandConnection *c, char *args) +{ + doStop(c, args, false); +} + +void +ConfigHandler::doStop(CommandConnection *c, char *args, bool force) +{ + Service::LP service = serviceByName(args); + if (service.get() == NULL) { + c->printf("Cannot find any service named '%s'\n", args); + return; + } + + if (!service->isRunning()) { + c->printf("%s is not running, it is in state %s. Cannot stop.\n", + service->name().c_str(), service->stateName()); + return; + } + const SentinelConfig::Service& config = service->serviceConfig(); + if (service->isAutomatic() && config.autorestart) { + c->printf("ERROR: %s in AUTO mode. Use restart, or manual+stop.\n", + args); + return; + } + c->printf("Stopping %s.\n", args); + service->terminate(!force); +} + +void +ConfigHandler::doAuto(CommandConnection *c, char *args) +{ + Service::LP service = serviceByName(args); + if (service.get() == NULL) { + c->printf("Cannot find any service named '%s'\n", args); + return; + } + + if (service->isAutomatic()) { + c->printf("%s is already automatic.\n", args); + } else { + service->setAutomatic(true); + const SentinelConfig::Service& config = service->serviceConfig(); + if (service->isRunning()) { + c->printf("%s is now automatic again (and running).\n", args); + } else if (config.autostart || config.autorestart) { + service->start(); + c->printf("%s is now automatic again (and started).\n", args); + } else { + c->printf("%s is now automatic again (but not started)\n", args); + } + } +} + + +void +ConfigHandler::doManual(CommandConnection *c, char *args) +{ + Service::LP service = serviceByName(args); + if (service.get() == NULL) { + c->printf("Cannot find any service named '%s'\n", args); + return; + } + + if (!service->isAutomatic()) { + c->printf("%s is already manual.\n", args); + } else { + service->setAutomatic(false); + if (service->isRunning()) { + c->printf("%s is now manual (but still running).\n", args); + } else { + c->printf("%s is now manual).\n", args); + } + } +} + + +} // end namespace config::sentinel +} // end namespace config diff --git a/configd/src/apps/sentinel/config-handler.h b/configd/src/apps/sentinel/config-handler.h new file mode 100644 index 00000000000..ffd5af7ef4c --- /dev/null +++ b/configd/src/apps/sentinel/config-handler.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <sys/types.h> +#include <sys/select.h> + +#include <list> +#include <vespa/vespalib/util/hashmap.h> + +#include <vespa/config-sentinel.h> +#include <vespa/config/config.h> + +#include "service.h" +#include "metrics.h" +#include "state-api.h" + +using cloud::config::SentinelConfig; +using config::ConfigSubscriber; +using config::ConfigHandle; + +namespace config { +namespace sentinel { + +class CommandConnection; +class OutputConnection; + +class ConfigHandler { +private: + typedef std::map<vespalib::string, Service::LP> ServiceMap; + + ConfigSubscriber _subscriber; + ConfigHandle<SentinelConfig>::UP _sentinelHandle; + ServiceMap _services; + std::list<CommandConnection *> _connections; + std::list<OutputConnection *> _outputConnections; + int _boundPort; + int _commandSocket; + StartMetrics _startMetrics; + StateApi _stateApi; + + ConfigHandler(const ConfigHandler&); + ConfigHandler& operator =(const ConfigHandler&); + + Service::LP serviceByPid(pid_t pid); + Service::LP serviceByName(const vespalib::string & name); + void handleCommands(); + void handleCommand(CommandConnection *c); + void handleOutputs(); + void handleChildDeaths(); + + static int listen(int port); + void configure_port(int port); + + void updateMetrics(); + + void doGet(CommandConnection *c, char *args); + void doLs(CommandConnection *c, char *args); + void doRestart(CommandConnection *c, char *args); + void doRestart(CommandConnection *c, char *args, bool force); + void doStart(CommandConnection *c, char *args); + void doStop(CommandConnection *c, char *args); + void doStop(CommandConnection *c, char *args, bool force); + void doAuto(CommandConnection *c, char *args); + void doManual(CommandConnection *c, char *args); + void doQuit(CommandConnection *c, char *args); + + void terminateServices(bool catchable, bool printDebug = false); + void stopOldServicesNotInMap(const ServiceMap & newServices); + + void doConfigure(); + +public: + ConfigHandler(); + virtual ~ConfigHandler(); + void subscribe(const std::string & configId); + bool terminate(); + int doWork(); + void updateActiveFdset(fd_set *fds, int *maxNum); +}; + + +} // end namespace config::sentinel +} // end namespace config + diff --git a/configd/src/apps/sentinel/line-splitter.cpp b/configd/src/apps/sentinel/line-splitter.cpp new file mode 100644 index 00000000000..8e990475b99 --- /dev/null +++ b/configd/src/apps/sentinel/line-splitter.cpp @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + + +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <algorithm> +#include <cstdlib> + +#include <unistd.h> + +#include "line-splitter.h" + +namespace config { +namespace sentinel { + + +LineSplitter::LineSplitter(int fd) + : _fd(fd), + _size(8192), + _buffer(static_cast<char *>(malloc(_size))), + _readPos(0), + _writePos(0), + _eof(false) +{ +} + +LineSplitter::~LineSplitter() +{ + free(_buffer); +} + +bool +LineSplitter::resize() +{ + _size = _size * 2; + _buffer = static_cast<char *>(realloc(_buffer, _size)); + return (_buffer != NULL); +} + + +bool +LineSplitter::fill() +{ + // Check if we have read to end + int leftToWrite = _writePos - _readPos; + if (leftToWrite == 0) { + _writePos = 0; + _readPos = 0; + } else if (_readPos > 0) { // Move to front + memmove(_buffer, &_buffer[_readPos], leftToWrite); + _writePos -= _readPos; + _readPos = 0; + } + + // If buffer is full, resize it + if (_writePos >= _size) { + if (!resize()) { + _eof = true; + shutdown(_fd, SHUT_RD); + return false; + } + } + + int readLen = read(_fd, &_buffer[_writePos], _size - _writePos); + if (readLen == -1) { + if (errno != EINTR && errno != EAGAIN) { + _eof = true; + } + } else if (readLen == 0) { + _eof = true; + if (_buffer[_writePos] != '\n') { + _buffer[_writePos++] = '\n'; // Fake a final separator + } + } else { + _writePos += readLen; + } + + return readLen > 0; +} + +char * +LineSplitter::getLine() +{ + do { + int bufLen = _writePos - _readPos; + + if (bufLen > 0) { + char *start = &_buffer[_readPos]; + char *end = static_cast<char *>(memchr(start, '\n', bufLen)); + if (end) { + *end = '\0'; + if (end - start > 0 && end[-1] == '\r') { + // Get rid of carriage return as well. + end[-1] = '\0'; + } + _readPos = (end - _buffer) + 1; + return start; + } + } + } while (!_eof && fill()); + return NULL; +} + +} // end namespace config::sentinel +} // end namespace config + + diff --git a/configd/src/apps/sentinel/line-splitter.h b/configd/src/apps/sentinel/line-splitter.h new file mode 100644 index 00000000000..28d183f3e15 --- /dev/null +++ b/configd/src/apps/sentinel/line-splitter.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +namespace config { +namespace sentinel { + +class LineSplitter { +private: + int _fd; + int _size; + char *_buffer; + int _readPos; + int _writePos; + bool _eof; + + LineSplitter(); + LineSplitter& operator =(const LineSplitter&); + LineSplitter(const LineSplitter&); + + bool resize(); + bool fill(); + +public: + explicit LineSplitter(int fd); + char *getLine(); + bool eof() const { return _eof; } + + ~LineSplitter(); +}; + +} // end namespace config::sentinel +} // end namespace config + diff --git a/configd/src/apps/sentinel/metrics.cpp b/configd/src/apps/sentinel/metrics.cpp new file mode 100644 index 00000000000..14527221643 --- /dev/null +++ b/configd/src/apps/sentinel/metrics.cpp @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".metrics"); + +#include "metrics.h" + +namespace config { +namespace sentinel { + +StartMetrics::StartMetrics() + : currentlyRunningServices(0), totalRestartsCounter(0), totalRestartsLastPeriod(0), + lastLoggedTime(0), + totalRestartsLastSnapshot(0), + snapshotStart(0), + snapshotEnd(0) +{ + snapshotEnd = time(NULL); + lastLoggedTime = snapshotEnd - 55; +} + +void +StartMetrics::output() +{ + EV_VALUE("currently_running_services", currentlyRunningServices); + EV_VALUE("total_restarts_last_period", totalRestartsLastPeriod); + EV_COUNT("total_restarts_counter", totalRestartsCounter); +} + +void +StartMetrics::reset(unsigned long curTime) +{ + totalRestartsLastSnapshot = totalRestartsLastPeriod; + snapshotStart = snapshotEnd; + snapshotEnd = curTime; + totalRestartsLastPeriod = 0; + lastLoggedTime = curTime; +} + +void +StartMetrics::maybeLog() +{ + uint32_t curTime = time(NULL); + if (curTime > lastLoggedTime + 59) { + output(); + reset(curTime); + } +} + +} // end namespace config::sentinel +} // end namespace config diff --git a/configd/src/apps/sentinel/metrics.h b/configd/src/apps/sentinel/metrics.h new file mode 100644 index 00000000000..709ee9103d9 --- /dev/null +++ b/configd/src/apps/sentinel/metrics.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <sys/time.h> + +namespace config { +namespace sentinel { + +struct StartMetrics { + unsigned long currentlyRunningServices; + unsigned long totalRestartsCounter; + unsigned long totalRestartsLastPeriod; + long lastLoggedTime; + unsigned long totalRestartsLastSnapshot; + long snapshotStart; + long snapshotEnd; + long startedTime; + + StartMetrics(); + + void output(); + void reset(unsigned long curTime); + void maybeLog(); +}; + +} // end namespace config::sentinel +} // end namespace config + diff --git a/configd/src/apps/sentinel/output-connection.cpp b/configd/src/apps/sentinel/output-connection.cpp new file mode 100644 index 00000000000..4524e15da72 --- /dev/null +++ b/configd/src/apps/sentinel/output-connection.cpp @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <unistd.h> +#include <stdarg.h> +#include <cstdio> +#include <cstring> + +#include <vespa/log/log.h> +LOG_SETUP(""); +#include <vespa/log/llparser.h> + +#include "output-connection.h" +#include "line-splitter.h" + +namespace config { +namespace sentinel { + +OutputConnection::OutputConnection(int f, ns_log::LLParser *p) + : _fd(f), + _lines(f), + _parser(p) +{ +} + +bool +OutputConnection::isFinished() const +{ + return _lines.eof(); +} + +void +OutputConnection::handleOutput() +{ + while (1) { + char *line = _lines.getLine(); + if (!line) { + return; + } + LOG(spam, "Got Output from connection: '%s'", line); + _parser->doInput(line); + } +} + +OutputConnection::~OutputConnection() +{ + close(_fd); + delete _parser; +} + +} // end namespace config::sentinel +} // end namespace config diff --git a/configd/src/apps/sentinel/output-connection.h b/configd/src/apps/sentinel/output-connection.h new file mode 100644 index 00000000000..c8b7178e07a --- /dev/null +++ b/configd/src/apps/sentinel/output-connection.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "line-splitter.h" + +namespace ns_log { class LLParser; } + +namespace config { +namespace sentinel { + +class OutputConnection { +private: + int _fd; + LineSplitter _lines; + ns_log::LLParser *_parser; + + // Unused constructors/assignment operator: + OutputConnection(); + OutputConnection(const OutputConnection&); + OutputConnection& operator =(const OutputConnection&); + +public: + explicit OutputConnection(int fd, ns_log::LLParser *p); + ~OutputConnection(); + bool isFinished() const; + void handleOutput(); + int fd() const { return _fd; } +}; + +} // end namespace sentinel +} // end namespace config + diff --git a/configd/src/apps/sentinel/sentinel-tester.sh b/configd/src/apps/sentinel/sentinel-tester.sh new file mode 100755 index 00000000000..f1c28a6f342 --- /dev/null +++ b/configd/src/apps/sentinel/sentinel-tester.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +sleep 1 +kill -SEGV $$ diff --git a/configd/src/apps/sentinel/sentinel.cfg b/configd/src/apps/sentinel/sentinel.cfg new file mode 100644 index 00000000000..8284afed959 --- /dev/null +++ b/configd/src/apps/sentinel/sentinel.cfg @@ -0,0 +1,16 @@ +service[3] +service[0].command "./sentinel-tester.sh" +service[0].name "sentinel-tester" +service[0].autostart true +service[0].autorestart true +service[0].id "whatever" +service[1].command "/usr/bin/true" +service[1].name "true" +service[1].autostart true +service[1].autorestart true +service[1].id "whatever" +service[2].command "/bin/sleep 137" +service[2].name "sleep" +service[2].autostart true +service[2].autorestart true +service[2].id "whatever" diff --git a/configd/src/apps/sentinel/sentinel.cpp b/configd/src/apps/sentinel/sentinel.cpp new file mode 100644 index 00000000000..c9185bcf01f --- /dev/null +++ b/configd/src/apps/sentinel/sentinel.cpp @@ -0,0 +1,145 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <sys/types.h> +#include <signal.h> +#include <cstring> +#include <unistd.h> +#include <sys/time.h> + +#include <vespa/defaults.h> +#include <vespa/log/log.h> +LOG_SETUP("config-sentinel"); +LOG_RCSID("$Id$"); + +#include <vespa/config-sentinel.h> + +#include "config-handler.h" + +using namespace config; + +static int sigPermanent(int sig, void(*handler)(int)); + +static void gracefulShutdown(int sig); +static void sigchldHandler(int sig); + +sig_atomic_t stop = 0; +static sig_atomic_t pendingWait = 0; + +int +main(int argc, char **argv) +{ + int c = getopt(argc, argv, "c:"); + if (c != 'c') { + LOG(error, "Usage: %s -c <config-id>", argv[0]); + EV_STOPPING("config-sentinel", "Bad arguments on command line"); + exit(EXIT_FAILURE); + } + + const char *configId = strdup(optarg); + + const char *rootDir = getenv("ROOT"); + if (!rootDir) { + rootDir = vespa::Defaults::vespaHome(); + LOG(warning, "ROOT is not set, using %s", rootDir); + setenv("ROOT", rootDir, 1); + } + + if (chdir(rootDir) == -1) { + LOG(error, "Fatal: Cannot cd to $ROOT (%s)", rootDir); + EV_STOPPING("config-sentinel", "Cannot cd to $ROOT"); + exit(EXIT_FAILURE); + } + + EV_STARTED("config-sentinel"); + + sigPermanent(SIGPIPE, SIG_IGN); + sigPermanent(SIGTERM, gracefulShutdown); + sigPermanent(SIGINT, gracefulShutdown); + sigPermanent(SIGCHLD, sigchldHandler); + if (setenv("LC_ALL", "C", 1) != 0) { + LOG(error, "Unable to set locale"); + exit(EXIT_FAILURE); + } + setlocale(LC_ALL, "C"); + + sentinel::ConfigHandler handler; + + LOG(debug, "Reading configuration"); + try { + handler.subscribe(configId); + } catch (InvalidConfigException& ex) { + LOG(error, "Fatal: Invalid configuration, please check your setup: %s", ex.getMessage().c_str()); + EV_STOPPING("config-sentinel", ex.what()); + exit(EXIT_FAILURE); + } catch (ConfigRuntimeException& ex) { + LOG(error, "Fatal: Could not get config, please check your setup: %s", ex.getMessage().c_str()); + EV_STOPPING("config-sentinel", ex.what()); + exit(EXIT_FAILURE); + } + + struct timeval lastTv; + gettimeofday(&lastTv, NULL); + while (!stop) { + try { + pendingWait = 0; + handler.doWork(); // Check for child procs & commands + } catch (InvalidConfigException& ex) { + LOG(warning, "Configuration problem: (ignoring): %s", + ex.what()); + } + if (!pendingWait) { + int maxNum = 0; + fd_set fds; + FD_ZERO(&fds); + handler.updateActiveFdset(&fds, &maxNum); + + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + + if (!pendingWait) { + select(maxNum, &fds, NULL, NULL, &tv); + } + } + + struct timeval tv; + gettimeofday(&tv, NULL); + double delta = tv.tv_sec - lastTv.tv_sec + + 1e-6 * tv.tv_usec - lastTv.tv_usec; + if (delta < 0.01) { + usleep(12500); // Avoid busy looping; + } + lastTv = tv; + } + + int rv = handler.terminate(); + + EV_STOPPING("config-sentinel", "normal exit"); + return rv; +} + +static void +gracefulShutdown(int sig) +{ + (void)sig; + stop = 1; +} + +static void +sigchldHandler(int sig) +{ + (void)sig; + pendingWait = 1; +} + +static int +sigPermanent(int sig, void(*handler)(int)) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; // no SA_RESTART! + sa.sa_handler = handler; + return sigaction(sig, &sa, NULL); +} + diff --git a/configd/src/apps/sentinel/service.cpp b/configd/src/apps/sentinel/service.cpp new file mode 100644 index 00000000000..9a1f3dc9c82 --- /dev/null +++ b/configd/src/apps/sentinel/service.cpp @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <sys/types.h> +#include <signal.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/wait.h> + +#include <vespa/log/log.h> +LOG_SETUP(".service", "$Id$"); +#include <vespa/log/llparser.h> + +#include "service.h" +#include "output-connection.h" + +extern sig_atomic_t stop; + +namespace config { +namespace sentinel { + + +Service::Service(const SentinelConfig::Service& service, const SentinelConfig::Application& application, + std::list<OutputConnection *> &ocs, StartMetrics &metrics) + : _pid(-1), + _rawState(READY), + _state(_rawState), + _exitStatus(0), + _config(new SentinelConfig::Service(service)), + _isAutomatic(true), + _restartPenalty(0), + _last_start(0), + _application(application), + _outputConnections(ocs), + _metrics(metrics) +{ + LOG(debug, "%s: created", name().c_str()); + LOG(debug, "autostart: %s", _config->autostart ? "YES" : "NO"); + LOG(debug, " restart: %s", _config->autorestart ? "YES" : "NO"); + LOG(debug, " command: %s", _config->command.c_str()); + LOG(debug, " configid: %s", _config->id.c_str()); + + if (_config->autostart) { + start(); + } +} + +void +Service::reconfigure(const SentinelConfig::Service& config) +{ + if (config.command != _config->command) { + LOG(debug, "%s: reconfigured command '%s' -> '%s' - this will " + "take effect at next restart", name().c_str(), + _config->command.c_str(), config.command.c_str()); + } + if (config.autostart != _config->autostart) { + LOG(debug, "%s: reconfigured autostart %s", name().c_str(), + config.autostart ? "OFF -> ON" : "ON -> OFF"); + } + if (config.autorestart != _config->autorestart) { + LOG(debug, "%s: reconfigured autorestart %s", name().c_str(), + config.autorestart ? "OFF -> ON" : "ON -> OFF"); + } + if (config.id != _config->id) { + LOG(warning, "%s: reconfigured config id '%s' -> '%s' - signaling service restart", + name().c_str(), _config->id.c_str(), config.id.c_str()); + terminate(true); + } + + delete _config; + _config = new SentinelConfig::Service(config); + + if (_isAutomatic + && ((_config->autostart && _state == READY) + || (_config->autorestart && _state == FINISHED))) + { + LOG(debug, "%s: Restarting due to new config", name().c_str()); + start(); + } +} + +Service::~Service() +{ + terminate(false); + delete _config; +} + +int +Service::terminate(bool catchable) +{ + if (isRunning()) { + runPreShutdownCommand(); + LOG(debug, "%s: terminate(%s)", name().c_str(), catchable ? "cleanly" : "NOW"); + resetRestartPenalty(); + if (catchable) { + setState(TERMINATING); + int ret = kill(_pid, SIGTERM); + LOG(debug, "%s: kill -SIGTERM %d: %s", name().c_str(), (int)_pid, + ret == 0 ? "OK" : strerror(errno)); + return ret; + } else { + setState(KILLING); + kill(_pid, SIGCONT); // if it was stopped for some reason + int ret = kill(_pid, SIGKILL); + LOG(debug, "%s: kill -SIGKILL %d: %s", name().c_str(), (int)_pid, + ret == 0 ? "OK" : strerror(errno)); + return ret; + } + } + + return 0; // Not running, so all is ok. +} + +void +Service::runPreShutdownCommand() +{ + if (_config->preShutdownCommand.length() > 0) { + LOG(debug, "%s: runPreShutdownCommand(%s)", name().c_str(), _config->preShutdownCommand.c_str()); + runCommand(_config->preShutdownCommand); + } +} + +void +Service::runCommand(const std::string & command) +{ + int ret = system(command.c_str()); + if (ret != 0) { + LOG(info, "%s: unable to run showdown command (%s): %d (%s)", name().c_str(), command.c_str(), ret, strerror(ret)); + } +} + +int +Service::start() +{ + // make sure the service does not restart in a tight loop: + time_t now = time(0); + int diff = now - _last_start; + if (diff < 10) { + incrementRestartPenalty(); + now += _restartPenalty; // will delay start this much + } + _last_start = now; + +// make a pipe, close the good ends of it, mark it close-on-exec +// if exec fails, write a complaint on the fd (which will then be read +// by mother program). +// +// Return 0 on success, -1 on failure + setState(STARTING); + + int pipes[2]; + int err = pipe(pipes); + int stdoutpipes[2]; + err |= pipe(stdoutpipes); + int stderrpipes[2]; + err |= pipe(stderrpipes); + + if (err == -1) { + LOG(error, "%s: Attempted to start, but pipe() failed: %s", name().c_str(), + strerror(errno)); + setState(FAILED); + return -1; + } + + fflush(NULL); + _pid = fork(); + if (_pid == -1) { + LOG(error, "%s: Attempted to start, but fork() failed: %s", name().c_str(), + strerror(errno)); + setState(FAILED); + close(pipes[0]); + close(pipes[1]); + close(stdoutpipes[0]); + close(stdoutpipes[1]); + close(stderrpipes[0]); + close(stderrpipes[1]); + return -1; + } + + if (_pid == 0) { + close(pipes[0]); // Close reading end + close(stdoutpipes[0]); + close(stderrpipes[0]); + + close(1); + dup2(stdoutpipes[1], 1); + close(stdoutpipes[1]); + + close(2); + dup2(stderrpipes[1], 2); + close(stderrpipes[1]); + + LOG(debug, "%s: Started as pid %d", name().c_str(), + static_cast<int>(getpid())); + signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); + if (stop) { + kill(getpid(), SIGTERM); + } + if (_restartPenalty > 0) { + LOG(debug, "%s: Applying %u sec restart penalty", name().c_str(), + _restartPenalty); + sleep(_restartPenalty); + } + EV_STARTING(name().c_str()); + runChild(pipes); // This function should not return. + _exit(EXIT_FAILURE); + } + + close(pipes[1]); // close writing end + close(stdoutpipes[1]); + close(stderrpipes[1]); + + // do not call ensureChildRuns, as the pipe magic did not work as intended + // This also ensures that the process does not wait while the service process waits in penalty. + // ensureChildRuns(pipes[0]); // This will wait until the execl goes through + setState(RUNNING); + _metrics.currentlyRunningServices++; + close(pipes[0]); // close reading end + + using ns_log::LLParser; + LLParser *p = new LLParser(); + p->setService(_config->name.c_str()); + p->setComponent("stdout"); + p->setPid(_pid); + fcntl(stdoutpipes[0], F_SETFL, + fcntl(stdoutpipes[0], F_GETFL) | O_NONBLOCK); + OutputConnection *c = new OutputConnection(stdoutpipes[0], p); + _outputConnections.push_back(c); + + p = new LLParser(); + p->setService(_config->name.c_str()); + p->setComponent("stderr"); + p->setPid(_pid); + p->setDefaultLevel(ns_log::Logger::warning); + fcntl(stderrpipes[0], F_SETFL, + fcntl(stderrpipes[0], F_GETFL) | O_NONBLOCK); + c = new OutputConnection(stderrpipes[0], p); + _outputConnections.push_back(c); + + return (_state == RUNNING) ? 0 : -1; +} + + +// TODO: Garbage collect this, since it did not work as intended when execl'ing /bin/sh +void +Service::ensureChildRuns(int fd) +{ + char buf[200]; + int len; + do { + len = read(fd, buf, sizeof buf); + } while (len == -1 && errno == EINTR); + if (len > 0) { + // Failed to do an execl.. pick up the remains + _exitStatus = 0; + waitpid(_pid, &_exitStatus, 0); + setState(FAILED); + } else { + setState(RUNNING); + } +} + + +void +Service::youExited(int status) +{ + // Someone did a waitpid() and figured out that we exited. + _exitStatus = status; + if (WIFEXITED(status)) { + LOG(debug, "%s: Exited with exit code %d", name().c_str(), + WEXITSTATUS(status)); + EV_STOPPED(name().c_str(), _pid, WEXITSTATUS(status)); + setState(FINISHED); + } else if (WIFSIGNALED(status)) { + bool expectedDeath = (_state == KILLING || _state == TERMINATING + || _state == KILLED || _state == TERMINATED); + if (expectedDeath) { + EV_STOPPED(name().c_str(), _pid, WTERMSIG(status)); + LOG(debug, "%s: Exited expectedly by signal %d", name().c_str(), + WTERMSIG(status)); + } else { + EV_CRASH(name().c_str(), _pid, WTERMSIG(status)); + setState(FAILED); + } + } else if (WIFSTOPPED(status)) { + LOG(warning, "%s: STOPPED by signal %d!", name().c_str(), WSTOPSIG(status)); + setState(FAILED); + } else { + LOG(error, "%s: Weird exit code %d", name().c_str(), status); + setState(FAILED); + } + _metrics.currentlyRunningServices--; + + if (_state == TERMINATING) { + setState(TERMINATED); + } else if (_state == KILLING) { + setState(KILLED); + } + if (_isAutomatic && _config->autorestart && !stop) { + // ### Implement some rate limiting here maybe? + LOG(debug, "%s: Has autorestart flag, restarting.", name().c_str()); + setState(READY); + _metrics.totalRestartsCounter++; + _metrics.totalRestartsLastPeriod++; + start(); + } +} + +void +Service::runChild(int pipes[2]) +{ + // child process - this should exec or signal error + for (int n = 3; n < 1024; ++n) { // Close all open fds on exec() + fcntl(n, F_SETFD, FD_CLOEXEC); + } + + // TODO: Garbage collect the clever pipes magic, as it does not work when the execl target is /bin/sh + fcntl(pipes[1], F_SETFD, FD_CLOEXEC); // close on exec() + + // Set up environment + setenv("VESPA_SERVICE_NAME", _config->name.c_str(), 1); + setenv("VESPA_CONFIG_ID", _config->id.c_str(), 1); + setenv("VESPA_APPLICATION_TENANT", _application.tenant.c_str(), 1); + setenv("VESPA_APPLICATION_NAME", _application.name.c_str(), 1); + setenv("VESPA_APPLICATION_ENVIRONMENT", _application.environment.c_str(), 1); + setenv("VESPA_APPLICATION_REGION", _application.region.c_str(), 1); + setenv("VESPA_APPLICATION_INSTANCE", _application.instance.c_str(), 1); + if (_config->affinity.cpuSocket >= 0) { + setenv("VESPA_AFFINITY_CPU_SOCKET", std::to_string(_config->affinity.cpuSocket).c_str(), 1); + } + // ROOT is already set + + // Set up file descriptor 0 (1 and 2 should be setup already) + close(0); + int fd = open("/dev/null", O_RDONLY | O_NOCTTY, 0666); + if (fd != 0) { + char buf[200]; + snprintf(buf, sizeof buf, "open /dev/null for fd 0: got %d " + "(%s)", fd, strerror(errno)); + write(pipes[1], buf, strlen(buf)); + _exit(EXIT_FAILURE); + } + fcntl(0, F_SETFD, 0); // Don't close on exec + + execl("/bin/sh", "/bin/sh", "-c", _config->command.c_str(), NULL); + + char buf[200]; + snprintf(buf, sizeof buf, "exec error: %s for /bin/sh -c '%s'", + strerror(errno), _config->command.c_str()); + write(pipes[1], buf, strlen(buf)); + _exit(EXIT_FAILURE); +} + +const vespalib::string & +Service::name() const +{ + return _config->name; +} + +bool +Service::isRunning() const +{ + switch (_state) { + case READY: + case FINISHED: + case KILLED: + case TERMINATED: + case FAILED: + return false; + + case STARTING: + case RUNNING: + case TERMINATING: + case KILLING: + return true; + } + return true; // this will not be reached +} + +void +Service::setAutomatic(bool autoStatus) +{ + _isAutomatic = autoStatus; + resetRestartPenalty(); +} + + +void +Service::incrementRestartPenalty() +{ + if (_restartPenalty < MAX_RESTART_PENALTY) { + _restartPenalty++; + } else { + _restartPenalty = MAX_RESTART_PENALTY; + } +} + + +void +Service::setState(ServiceState state) +{ + if (state != _state) { + LOG(debug, "%s: %s->%s", name().c_str(), stateName(_state), stateName(state)); + _rawState = state; + } + + // penalize failed services + if (state == FAILED) { + incrementRestartPenalty(); + } +} + +const char * +Service::stateName(ServiceState state) const +{ + switch (state) { + case READY: return "READY"; + case STARTING: return "STARTING"; + case RUNNING: return "RUNNING"; + case TERMINATING: return "TERMINATING"; + case KILLING: return "KILLING"; + case FINISHED: return "FINISHED"; + case TERMINATED: return "TERMINATED"; + case KILLED: return "KILLED"; + case FAILED: return "FAILED"; + } + return "--BAD--"; +} + + +} // end namespace sentinel +} // end namespace config diff --git a/configd/src/apps/sentinel/service.h b/configd/src/apps/sentinel/service.h new file mode 100644 index 00000000000..0021221b17e --- /dev/null +++ b/configd/src/apps/sentinel/service.h @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/vespalib/util/linkedptr.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/config-sentinel.h> +#include <list> + +#include "metrics.h" + +using cloud::config::SentinelConfig; + +namespace config { +namespace sentinel { + +class OutputConnection; + +class Service +{ +private: + Service(const Service &); + Service& operator=(const Service &); + + pid_t _pid; + enum ServiceState { READY, STARTING, RUNNING, TERMINATING, KILLING, + FINISHED, TERMINATED, KILLED, FAILED } _rawState; + const enum ServiceState& _state; + int _exitStatus; + SentinelConfig::Service *_config; + bool _isAutomatic; + + static const unsigned int MAX_RESTART_PENALTY = 60; + unsigned int _restartPenalty; + time_t _last_start; + + void runChild(int pipes[2]) __attribute__((noreturn)); + void ensureChildRuns(int fd); + void setState(ServiceState state); + void runPreShutdownCommand(); + void runCommand(const std::string & command); + const char *stateName(ServiceState state) const; + + const SentinelConfig::Application _application; + std::list<OutputConnection *> &_outputConnections; + + StartMetrics &_metrics; + +public: + typedef vespalib::LinkedPtr<Service> LP; + ~Service(); + Service(const SentinelConfig::Service& config, + const SentinelConfig::Application& application, + std::list<OutputConnection *> &ocs, + StartMetrics &metrics); + void reconfigure(const SentinelConfig::Service& config); + int pid() const { return _pid; } + int terminate(bool catchable); + int start(); + void youExited(int status); // Call this if waitpid says it exited + const vespalib::string & name() const; + const char *stateName() const { return stateName(_state); } + bool isRunning() const; + int exitStatus() const { return _exitStatus; } + const SentinelConfig::Service& serviceConfig() const { return *_config; } + void setAutomatic(bool autoStatus); + bool isAutomatic() const { return _isAutomatic; } + void resetRestartPenalty() { _restartPenalty = 0; } + void incrementRestartPenalty(); +}; + +} // end namespace sentinel +} // end namespace config + diff --git a/configd/src/apps/sentinel/state-api.cpp b/configd/src/apps/sentinel/state-api.cpp new file mode 100644 index 00000000000..0f1a901ec6b --- /dev/null +++ b/configd/src/apps/sentinel/state-api.cpp @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "state-api.h" +#include <vespa/vespalib/util/host_name.h> +#include <vespa/vespalib/util/stringfmt.h> + +namespace { + +std::map<vespalib::string, vespalib::string> noParams; + +} // namespace <unnamed> + + +namespace config { +namespace sentinel { + +vespalib::string +StateApi::get(const char *path) const +{ + return myStateApi.get(host_and_port, path, noParams); +} + +void +StateApi::bound(int port) +{ + host_and_port = vespalib::make_string("%s:%d", vespalib::HostName::get().c_str(), port); +} + +} // namespace config::sentinel +} // namespace config diff --git a/configd/src/apps/sentinel/state-api.h b/configd/src/apps/sentinel/state-api.h new file mode 100644 index 00000000000..59e614a26cc --- /dev/null +++ b/configd/src/apps/sentinel/state-api.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/net/state_api.h> +#include <vespa/vespalib/net/simple_metrics_producer.h> +#include <vespa/vespalib/net/simple_health_producer.h> +#include <vespa/vespalib/net/simple_component_config_producer.h> + +namespace config { +namespace sentinel { + +struct StateApi { + vespalib::string host_and_port; + vespalib::SimpleHealthProducer myHealth; + vespalib::SimpleMetricsProducer myMetrics; + vespalib::SimpleComponentConfigProducer myComponents; + vespalib::StateApi myStateApi; + + StateApi() : myStateApi(myHealth, myMetrics, myComponents) {} + + vespalib::string get(const char *path) const; + void bound(int port); +}; + +} // namespace config::sentinel +} // namespace config diff --git a/configd/src/apps/su/.gitignore b/configd/src/apps/su/.gitignore new file mode 100644 index 00000000000..343f531c8c1 --- /dev/null +++ b/configd/src/apps/su/.gitignore @@ -0,0 +1,3 @@ +/.depend +/Makefile +/run-as-yahoo diff --git a/configd/src/apps/su/CMakeLists.txt b/configd/src/apps/su/CMakeLists.txt new file mode 100644 index 00000000000..8883fe2344f --- /dev/null +++ b/configd/src/apps/su/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(configd_run-as-yahoo_app + SOURCES + main.cpp + OUTPUT_NAME run-as-yahoo + INSTALL bin + DEPENDS +) diff --git a/configd/src/apps/su/main.cpp b/configd/src/apps/su/main.cpp new file mode 100644 index 00000000000..7baa726d630 --- /dev/null +++ b/configd/src/apps/su/main.cpp @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <pwd.h> +#include <grp.h> + +/** + * small utility to use instead of "su" when we want to just + * switch to the "yahoo" user without any more fuss + **/ + +int main(int argc, char** argv) +{ + if (argc < 2) { + fprintf(stderr, "missing arguments, usage: run-as-yahoo <cmd> [args ...]"); + exit(1); + } + struct passwd *p = getpwnam("yahoo"); + if (p == NULL) { + perror("FATAL error: user 'yahoo' missing in passwd file"); + exit(1); + } + gid_t g = p->pw_gid; + uid_t u = p->pw_uid; + + if (setgid(g) != 0) { + perror("FATAL error: could not change group id"); + exit(1); + } + size_t listsize = 1; + gid_t grouplist[1] = { g }; + if (setgroups(listsize, grouplist) != 0) { + perror("FATAL error: could not setgroups"); + exit(1); + } + if (setuid(u) != 0) { + perror("FATAL error: could not change user id"); + exit(1); + } + execvp(argv[1], &argv[1]); + perror("FATAL error: execvp failed"); + exit(1); +} diff --git a/configd/src/testlist.txt b/configd/src/testlist.txt new file mode 100644 index 00000000000..51a90719d43 --- /dev/null +++ b/configd/src/testlist.txt @@ -0,0 +1,2 @@ +tests/messages +tests/configd diff --git a/configd/src/tests/configd/.gitignore b/configd/src/tests/configd/.gitignore new file mode 100644 index 00000000000..cd635735b8c --- /dev/null +++ b/configd/src/tests/configd/.gitignore @@ -0,0 +1,4 @@ +.depend* +Makefile +configd_test +/tmp.log diff --git a/configd/src/tests/configd/CMakeLists.txt b/configd/src/tests/configd/CMakeLists.txt new file mode 100644 index 00000000000..59f87a30390 --- /dev/null +++ b/configd/src/tests/configd/CMakeLists.txt @@ -0,0 +1,2 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +add_test(NAME configd_configd_test COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/run-sentinel.sh) diff --git a/configd/src/tests/configd/DESC b/configd/src/tests/configd/DESC new file mode 100644 index 00000000000..b46cf2c5ba3 --- /dev/null +++ b/configd/src/tests/configd/DESC @@ -0,0 +1 @@ +Unit tests for the config daemon (aka sentinel) diff --git a/configd/src/tests/configd/FILES b/configd/src/tests/configd/FILES new file mode 100644 index 00000000000..63c2a3e08ca --- /dev/null +++ b/configd/src/tests/configd/FILES @@ -0,0 +1 @@ +run-sentinel.sh diff --git a/configd/src/tests/configd/run-sentinel.sh b/configd/src/tests/configd/run-sentinel.sh new file mode 100755 index 00000000000..97ba32d34e8 --- /dev/null +++ b/configd/src/tests/configd/run-sentinel.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +if ../../apps/sentinel/config-sentinel > tmp.log 2>&1 ; then + echo "Should need argument" + exit 1 +fi +if grep -q Usage tmp.log ; then + : ok +else + echo "Missing usage in log:" + cat tmp.log + exit 1 +fi + +exit 0 diff --git a/configd/src/tests/messages/.gitignore b/configd/src/tests/messages/.gitignore new file mode 100644 index 00000000000..7e625100dc2 --- /dev/null +++ b/configd/src/tests/messages/.gitignore @@ -0,0 +1,4 @@ +.depend* +Makefile +messages_test +configd_messages_test_app diff --git a/configd/src/tests/messages/CMakeLists.txt b/configd/src/tests/messages/CMakeLists.txt new file mode 100644 index 00000000000..fed4f9fe470 --- /dev/null +++ b/configd/src/tests/messages/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(configd_messages_test_app + SOURCES + messages.cpp + DEPENDS + vespalog + vespalib +) +vespa_add_test(NAME configd_messages_test_app COMMAND configd_messages_test_app) diff --git a/configd/src/tests/messages/DESC b/configd/src/tests/messages/DESC new file mode 100644 index 00000000000..089612b9cae --- /dev/null +++ b/configd/src/tests/messages/DESC @@ -0,0 +1 @@ +Unit tests for the configserver protocol. diff --git a/configd/src/tests/messages/FILES b/configd/src/tests/messages/FILES new file mode 100644 index 00000000000..139bb6fb364 --- /dev/null +++ b/configd/src/tests/messages/FILES @@ -0,0 +1 @@ +messages.cpp diff --git a/configd/src/tests/messages/messages.cpp b/configd/src/tests/messages/messages.cpp new file mode 100644 index 00000000000..b2627f66453 --- /dev/null +++ b/configd/src/tests/messages/messages.cpp @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> + +LOG_SETUP("messages_test"); + +class MessagesTest : public vespalib::TestApp +{ +public: + MessagesTest() { } + int Main(); +}; + +int MessagesTest::Main() +{ + TEST_INIT("messages_test"); + TEST_DONE(); +} + +TEST_APPHOOK(MessagesTest); diff --git a/configd/testrun/.gitignore b/configd/testrun/.gitignore new file mode 100644 index 00000000000..559f57dccbe --- /dev/null +++ b/configd/testrun/.gitignore @@ -0,0 +1,9 @@ +test-report.html +test-report.html.* +test.*.*.desc +test.*.*.file.* +test.*.*.files.html +test.*.*.log +tmp.* +/test.*.*.result +Makefile |