diff options
71 files changed, 787 insertions, 2828 deletions
diff --git a/bundle-plugin/pom.xml b/bundle-plugin/pom.xml index 4c39d32131a..839551f5b07 100644 --- a/bundle-plugin/pom.xml +++ b/bundle-plugin/pom.xml @@ -24,6 +24,11 @@ </dependency> <dependency> <groupId>org.apache.maven</groupId> + <artifactId>maven-archiver</artifactId> + <version>3.1.1</version> + </dependency> + <dependency> + <groupId>org.apache.maven</groupId> <artifactId>maven-model</artifactId> </dependency> <dependency> @@ -36,10 +41,6 @@ <scope>provided</scope> </dependency> <dependency> - <groupId>org.apache.maven</groupId> - <artifactId>maven-project</artifactId> - </dependency> - <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <scope>test</scope> @@ -71,10 +72,6 @@ <artifactId>asm</artifactId> </dependency> <dependency> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - </dependency> - <dependency> <groupId>org.twdata.maven</groupId> <artifactId>mojo-executor</artifactId> </dependency> diff --git a/bundle-plugin/src/main/scala/com/yahoo/container/plugin/mojo/AssembleContainerPluginMojo.scala b/bundle-plugin/src/main/scala/com/yahoo/container/plugin/mojo/AssembleContainerPluginMojo.scala index 15c2a2ed35a..67bce48601c 100644 --- a/bundle-plugin/src/main/scala/com/yahoo/container/plugin/mojo/AssembleContainerPluginMojo.scala +++ b/bundle-plugin/src/main/scala/com/yahoo/container/plugin/mojo/AssembleContainerPluginMojo.scala @@ -8,6 +8,7 @@ import java.util.zip.ZipEntry import com.yahoo.container.plugin.util.{Files, JarFiles} import org.apache.maven.archiver.{MavenArchiveConfiguration, MavenArchiver} +import org.apache.maven.execution.MavenSession import org.apache.maven.plugin.AbstractMojo import org.apache.maven.plugins.annotations.{Component, Mojo, Parameter, ResolutionScope} import org.apache.maven.project.MavenProject @@ -27,6 +28,9 @@ class AssembleContainerPluginMojo extends AbstractMojo { @Parameter(defaultValue = "${project}") var project: MavenProject = null + @Parameter(defaultValue = "${session}", readonly = true, required = true) + var session: MavenSession = null + @Component(role = classOf[Archiver], hint = "jar") var jarArchiver: JarArchiver = null @@ -71,7 +75,7 @@ class AssembleContainerPluginMojo extends AbstractMojo { val mavenArchiver = new MavenArchiver mavenArchiver.setArchiver(jarArchiver) mavenArchiver.setOutputFile(jarFile) - mavenArchiver.createArchive(project, archiveConfiguration) + mavenArchiver.createArchive(session, project, archiveConfiguration) } private def addDependencies() { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/TenantHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/TenantHandlerTest.java index d5f4dd573c6..21e68b02a37 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/TenantHandlerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/TenantHandlerTest.java @@ -97,7 +97,7 @@ public class TenantHandlerTest extends TenantTest { handler.handleDELETE(HttpRequest.createTestRequest("http://deploy.example.yahoo.com:80/application/v2/tenant/" + a, Method.DELETE)); fail(); } catch (BadRequestException e) { - assertThat(e.getMessage(), is("Cannot delete tenant 'a', as it has active applications: [tenant 'a', application 'foo', instance 'default']")); + assertThat(e.getMessage(), is("Cannot delete tenant 'a', as it has active applications: [a.foo]")); } } diff --git a/container-disc/pom.xml b/container-disc/pom.xml index 265dee1093c..5d19e9ecffe 100644 --- a/container-disc/pom.xml +++ b/container-disc/pom.xml @@ -219,7 +219,6 @@ <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>exec-maven-plugin</artifactId> - <version>1.2.1</version> <executions> <execution> <phase>validate</phase> diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Container.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Container.java index 062f0503615..7f47b638dde 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Container.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Container.java @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.dockerapi; -import java.time.Instant; import java.util.Objects; /** @@ -13,31 +12,18 @@ public class Container { public final ContainerName name; public final State state; public final int pid; - public final Instant created; public Container( final String hostname, final DockerImage image, final ContainerName containerName, final State state, - final int pid, - final String created) { + final int pid) { this.hostname = hostname; this.image = image; this.name = containerName; this.state = state; this.pid = pid; - this.created = Instant.parse(created); - } - - // For testing only - public Container( - final String hostname, - final DockerImage image, - final ContainerName containerName, - final State state, - final int pid) { - this(hostname, image, containerName, state, pid, "2017-02-13T13:45:12.133713371Z"); } @Override @@ -49,8 +35,7 @@ public class Container { return Objects.equals(hostname, other.hostname) && Objects.equals(image, other.image) && Objects.equals(name, other.name) - && Objects.equals(pid, other.pid) - && Objects.equals(created, other.created); + && Objects.equals(pid, other.pid); } @Override @@ -66,7 +51,6 @@ public class Container { + " name=" + name + " state=" + state + " pid=" + pid - + " created=" + created + "}"; } diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java index bd06794c733..c198c1b0a6f 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java @@ -23,7 +23,6 @@ import com.github.dockerjava.core.command.PullImageResultCallback; import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory; import com.google.inject.Inject; import com.yahoo.log.LogLevel; -import com.yahoo.net.HostName; import com.yahoo.vespa.hosted.dockerapi.metrics.CounterWrapper; import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions; import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; @@ -375,8 +374,7 @@ public class DockerImpl implements Docker { new DockerImage(response.getConfig().getImage()), new ContainerName(decode(response.getName())), Container.State.valueOf(response.getState().getStatus().toUpperCase()), - response.getState().getPid(), - response.getCreated() + response.getState().getPid() )) .map(Stream::of) .orElse(Stream.empty()); @@ -525,10 +523,7 @@ public class DockerImpl implements Docker { } private void setMetrics(MetricReceiverWrapper metricReceiver) { - Dimensions dimensions = new Dimensions.Builder() - .add("host", HostName.getLocalhost()) - .add("role", "docker").build(); - + Dimensions dimensions = new Dimensions.Builder().add("role", "docker").build(); numberOfDockerDaemonFails = metricReceiver.declareCounter(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "daemon.api_fails"); } } diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/DimensionMetrics.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/DimensionMetrics.java new file mode 100644 index 00000000000..2cccee1e066 --- /dev/null +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/DimensionMetrics.java @@ -0,0 +1,67 @@ +// Copyright 2017 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.dockerapi.metrics; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * @author freva + */ +public class DimensionMetrics { + private final static ObjectMapper objectMapper = new ObjectMapper(); + + private final String application; + private final Dimensions dimensions; + private final Map<String, Number> metrics; + + DimensionMetrics(String application, Dimensions dimensions, Map<String, Number> metrics) { + this.application = application; + this.dimensions = dimensions; + this.metrics = metrics; + } + + Map<String, Object> getMetrics() { + final Map<String, Object> routing = new HashMap<>(); + final Map<String, Object> routingYamas = new HashMap<>(); + routing.put("yamas", routingYamas); + routingYamas.put("namespaces", Collections.singletonList("Vespa")); + + Map<String, Object> report = new HashMap<>(); + report.put("application", application); + report.put("dimensions", dimensions.dimensionsMap); + report.put("metrics", metrics); + report.put("routing", routing); + return report; + } + + public String toSecretAgentReport() throws JsonProcessingException { + Map<String, Object> report = getMetrics(); + report.put("timestamp", System.currentTimeMillis() / 1000); + + return objectMapper.writeValueAsString(report); + } + + public static class Builder { + private final String application; + private final Dimensions dimensions; + private final Map<String, Number> metrics = new HashMap<>(); + + public Builder(String application, Dimensions dimensions) { + this.application = application; + this.dimensions = dimensions; + } + + public Builder withMetric(String metricName, Number metricValue) { + metrics.put(metricName, metricValue); + return this; + } + + public DimensionMetrics build() { + return new DimensionMetrics(application, dimensions, metrics); + } + } +} diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java index 231ddbc07f1..31edf49e80e 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java @@ -1,14 +1,11 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.dockerapi.metrics; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.inject.Inject; import com.yahoo.metrics.simple.MetricReceiver; import com.yahoo.metrics.simple.Point; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -27,9 +24,7 @@ public class MetricReceiverWrapper { // Application names used public static final String APPLICATION_DOCKER = "docker"; public static final String APPLICATION_NODE = "vespa.node"; - public static final String APPLICATION_HOST_LIFE = "host_life"; - private final static ObjectMapper objectMapper = new ObjectMapper(); private final Object monitor = new Object(); private final Map<String, ApplicationMetrics> applicationMetrics = new HashMap<>(); // key is application name private final MetricReceiver metricReceiver; @@ -72,22 +67,13 @@ public class MetricReceiverWrapper { } } - public void unsetMetricsForContainer(String hostname) { - synchronized (monitor) { - applicationMetrics.values() - .forEach(m -> m.metricsByDimensions.keySet() - .removeIf(d -> d.dimensionsMap.containsKey("host") && - d.dimensionsMap.get("host").equals(hostname))); - } - } - public List<DimensionMetrics> getAllMetrics() { synchronized (monitor) { List<DimensionMetrics> dimensionMetrics = new ArrayList<>(); - applicationMetrics.entrySet() - .forEach(e -> e.getValue().metricsByDimensions().entrySet().stream() - .map(entry -> new DimensionMetrics(e.getKey(), entry.getKey(), entry.getValue())) - .forEach(dimensionMetrics::add)); + applicationMetrics.forEach((application, applicationMetrics) -> applicationMetrics.metricsByDimensions().entrySet().stream() + .map(entry -> new DimensionMetrics(application, entry.getKey(), + entry.getValue().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, value -> value.getValue().getValue())))) + .forEach(dimensionMetrics::add)); return dimensionMetrics; } } @@ -96,11 +82,11 @@ public class MetricReceiverWrapper { public Set<Map<String, Object>> getAllMetricsRaw() { synchronized (monitor) { Set<Map<String, Object>> dimensionMetrics = new HashSet<>(); - applicationMetrics.entrySet() - .forEach(e -> e.getValue().metricsByDimensions().entrySet().stream() - .map(entry -> new DimensionMetrics(e.getKey(), entry.getKey(), entry.getValue())) - .map(DimensionMetrics::getMetrics) - .forEach(dimensionMetrics::add)); + applicationMetrics.forEach((application, applicationMetrics) -> applicationMetrics.metricsByDimensions().entrySet().stream() + .map(entry -> new DimensionMetrics(application, entry.getKey(), + entry.getValue().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, value -> value.getValue().getValue())))) + .map(DimensionMetrics::getMetrics) + .forEach(dimensionMetrics::add)); return dimensionMetrics; } } @@ -114,40 +100,6 @@ public class MetricReceiverWrapper { } } - public class DimensionMetrics { - private final String application; - private final Dimensions dimensions; - private final Map<String, Object> metrics; - - DimensionMetrics(String application, Dimensions dimensions, Map<String, MetricValue> metricValues) { - this.application = application; - this.dimensions = dimensions; - this.metrics = metricValues.entrySet().stream().collect( - Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().getValue())); - } - - private Map<String, Object> getMetrics() { - final Map<String, Object> routing = new HashMap<>(); - final Map<String, Object> routingYamas = new HashMap<>(); - routing.put("yamas", routingYamas); - routingYamas.put("namespaces", Arrays.asList("Vespa")); - - Map<String, Object> report = new HashMap<>(); - report.put("application", application); - report.put("dimensions", dimensions.dimensionsMap); - report.put("metrics", metrics); - report.put("routing", routing); - return report; - } - - public String toSecretAgentReport() throws JsonProcessingException { - Map<String, Object> report = getMetrics(); - report.put("timestamp", System.currentTimeMillis() / 1000); - - return objectMapper.writeValueAsString(report); - } - } - private Map<Dimensions, Map<String, MetricValue>> getOrCreateApplicationMetrics(String application) { if (! applicationMetrics.containsKey(application)) { ApplicationMetrics metrics = new ApplicationMetrics(); diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index 67a8dafdda5..0bd34978621 100644 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -1,9 +1,7 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. FROM vespabuild -COPY vespa-ci-internal.sh /vespa-ci-internal.sh COPY tmp/vespa.spec /vespa.spec - RUN yum-builddep -y /vespa.spec # Build using non-root user 'vespabuilder' @@ -13,3 +11,5 @@ USER vespabuilder # Java requires proper locale for unicode ENV LANG en_US.UTF-8 + +COPY vespa-ci-internal.sh /vespa-ci-internal.sh diff --git a/docker/vespa-ci-internal.sh b/docker/vespa-ci-internal.sh index c1de874d5ca..1e5446f2905 100755 --- a/docker/vespa-ci-internal.sh +++ b/docker/vespa-ci-internal.sh @@ -13,16 +13,16 @@ GIT_COMMIT=$1 SOURCE_DIR=~/vespa BUILD_DIR=~/build NUM_CORES=$(nproc --all) -NUM_THREADS=$((${NUM_CORES} + ${NUM_CORES}/2)) +NUM_THREADS=$((${NUM_CORES} * 2)) mkdir "${SOURCE_DIR}" mkdir "${BUILD_DIR}" git clone --no-checkout --local --no-hardlinks file:///vespa "${SOURCE_DIR}" cd "${SOURCE_DIR}" -git -c advice.detachedHead=false checkout --detach ${GIT_COMMIT} +git -c advice.detachedHead=false checkout ${GIT_COMMIT} source /opt/rh/devtoolset-6/enable || true sh ./bootstrap.sh full -MAVEN_OPTS="-Xms128m -Xmx512m" mvn install +MAVEN_OPTS="-Xms128m -Xmx512m" mvn -T ${NUM_THREADS} install cd "${BUILD_DIR}" cmake3 -DCMAKE_INSTALL_PREFIX=/opt/vespa \ -DJAVA_HOME=/usr/lib/jvm/java-openjdk \ diff --git a/docker/vespa-ci.sh b/docker/vespa-ci.sh index 3e990bb2991..beb5944233b 100755 --- a/docker/vespa-ci.sh +++ b/docker/vespa-ci.sh @@ -23,4 +23,4 @@ cp -p ../dist/vespa.spec tmp/vespa.spec docker build -t "$CI_DOCKER_IMAGE" -f Dockerfile.ci . docker run --rm -v $(pwd)/..:/vespa --entrypoint /vespa-ci-internal.sh "$CI_DOCKER_IMAGE" "$GIT_COMMIT" \ - 2>&1 | tee vespa-ci-${GIT_COMMIT}-$(date +%Y-%m-%dT%H:%M:%S%z).log + 2>&1 | tee vespa-ci-$(date +%Y-%m-%dT%H:%M:%S%z).log diff --git a/fastos/README b/fastos/README new file mode 100644 index 00000000000..ed9afabffb8 --- /dev/null +++ b/fastos/README @@ -0,0 +1,4 @@ +Old OS abstraction layer + +obsolete, to be replaced with implementations using +standard C++14 threads and newer unix networking APIs diff --git a/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.cpp b/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.cpp index 78f5751b2ef..92777ef7b18 100644 --- a/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.cpp +++ b/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.cpp @@ -89,7 +89,6 @@ ZKFileDBModel::cleanDeployedFilesToDownload( for (auto & host : allHosts) { Path hostPath = _hostsPath / host; try { - removeLegacyDeployFileNodes(hostPath); // If this host is NOT part of hosts to deploy to if (toPreserve.find(host) == toPreserve.end()) { removeDeployFileNodes(hostPath, appId); @@ -138,22 +137,6 @@ ZKFileDBModel::removeNonApplicationFiles(const Path & hostPath, const std::strin } } - -void -ZKFileDBModel::removeLegacyDeployFileNodes(const Path & hostPath) -{ - std::vector<std::string> deployNodes = _zk->getChildren(hostPath); - for (auto & deployNode : deployNodes) { - Path deployNodePath = hostPath / deployNode; - std::string applicationId(readApplicationId(*_zk, deployNodePath)); - size_t numParts = std::count(applicationId.begin(), applicationId.end(), ':'); - // If we have an id with 3 colons, it is a legacy id and can be deleted. - if (numParts == 3) { - _zk->remove(deployNodePath); - } - } -} - void ZKFileDBModel::removeDeployFileNodes(const Path & hostPath, const std::string& appId) { std::vector<std::string> deployNodes = _zk->getChildren(hostPath); diff --git a/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.h b/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.h index 6216f6a54c2..4a89a9547a9 100644 --- a/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.h +++ b/filedistribution/src/vespa/filedistribution/model/zkfiledbmodel.h @@ -11,7 +11,6 @@ private: const std::shared_ptr<ZKFacade> _zk; char getProgress(const Path& path); void removeDeployFileNodes(const Path& hostPath, const std::string& appId); - void removeLegacyDeployFileNodes(const Path& hostPath); bool canRemoveHost(const Path& hostPath, const std::string& appId); public: const static Path _root; diff --git a/logserver/README b/logserver/README new file mode 100644 index 00000000000..24608e4479e --- /dev/null +++ b/logserver/README @@ -0,0 +1 @@ +Server that receives vespa-format log lines and archives them. diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/ContainerNodeSpec.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/ContainerNodeSpec.java index 670efe9d309..c5e1da5cd85 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/ContainerNodeSpec.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/ContainerNodeSpec.java @@ -18,6 +18,7 @@ public class ContainerNodeSpec { public final Node.State nodeState; public final String nodeType; public final String nodeFlavor; + public final String nodeCanonicalFlavor; public final Optional<String> wantedVespaVersion; public final Optional<String> vespaVersion; public final Optional<Owner> owner; @@ -37,6 +38,7 @@ public class ContainerNodeSpec { final Node.State nodeState, final String nodeType, final String nodeFlavor, + final String nodeCanonicalFlavor, final Optional<String> wantedVespaVersion, final Optional<String> vespaVersion, final Optional<Owner> owner, @@ -59,6 +61,7 @@ public class ContainerNodeSpec { this.nodeState = nodeState; this.nodeType = nodeType; this.nodeFlavor = nodeFlavor; + this.nodeCanonicalFlavor = nodeCanonicalFlavor; this.wantedVespaVersion = wantedVespaVersion; this.vespaVersion = vespaVersion; this.owner = owner; @@ -85,6 +88,7 @@ public class ContainerNodeSpec { Objects.equals(nodeState, that.nodeState) && Objects.equals(nodeType, that.nodeType) && Objects.equals(nodeFlavor, that.nodeFlavor) && + Objects.equals(nodeCanonicalFlavor, that.nodeCanonicalFlavor) && Objects.equals(wantedVespaVersion, that.wantedVespaVersion) && Objects.equals(vespaVersion, that.vespaVersion) && Objects.equals(owner, that.owner) && @@ -107,6 +111,7 @@ public class ContainerNodeSpec { nodeState, nodeType, nodeFlavor, + nodeCanonicalFlavor, wantedVespaVersion, vespaVersion, owner, @@ -129,6 +134,7 @@ public class ContainerNodeSpec { + " nodeState=" + nodeState + " nodeType = " + nodeType + " nodeFlavor = " + nodeFlavor + + " nodeCanonicalFlavor = " + nodeCanonicalFlavor + " wantedVespaVersion = " + wantedVespaVersion + " vespaVersion = " + vespaVersion + " owner = " + owner @@ -243,6 +249,7 @@ public class ContainerNodeSpec { private Node.State nodeState; private String nodeType; private String nodeFlavor; + private String nodeCanonicalFlavor; private Optional<String> wantedVespaVersion = Optional.empty(); private Optional<String> vespaVersion = Optional.empty(); private Optional<Owner> owner = Optional.empty(); @@ -262,6 +269,7 @@ public class ContainerNodeSpec { nodeState(nodeSpec.nodeState); nodeType(nodeSpec.nodeType); nodeFlavor(nodeSpec.nodeFlavor); + nodeCanonicalFlavor(nodeSpec.nodeCanonicalFlavor); nodeSpec.wantedDockerImage.ifPresent(this::wantedDockerImage); nodeSpec.currentDockerImage.ifPresent(this::currentDockerImage); @@ -307,6 +315,11 @@ public class ContainerNodeSpec { return this; } + public Builder nodeCanonicalFlavor(String nodeCanonicalFlavor) { + this.nodeCanonicalFlavor = nodeCanonicalFlavor; + return this; + } + public Builder wantedVespaVersion(String wantedVespaVersion) { this.wantedVespaVersion = Optional.of(wantedVespaVersion); return this; @@ -363,7 +376,8 @@ public class ContainerNodeSpec { } public ContainerNodeSpec build() { - return new ContainerNodeSpec(hostname, wantedDockerImage, currentDockerImage, nodeState, nodeType, nodeFlavor, + return new ContainerNodeSpec(hostname, wantedDockerImage, currentDockerImage, nodeState, nodeType, + nodeFlavor, nodeCanonicalFlavor, wantedVespaVersion, vespaVersion, owner, membership, wantedRestartGeneration, currentRestartGeneration, wantedRebootGeneration, currentRebootGeneration, diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index 0b0d141e46d..fd5029162bc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -63,10 +63,7 @@ public class StorageMaintainer { this.environment = environment; this.clock = clock; - Dimensions dimensions = new Dimensions.Builder() - .add("host", HostName.getLocalhost()) - .add("role", "docker").build(); - + Dimensions dimensions = new Dimensions.Builder().add("role", "docker").build(); numberOfNodeAdminMaintenanceFails = metricReceiver.declareCounter(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.maintenance.fails"); } @@ -74,28 +71,31 @@ public class StorageMaintainer { final Path yamasAgentFolder = environment.pathInNodeAdminFromPathInNode(containerName, "/etc/yamas-agent/"); Path vespaCheckPath = Paths.get(getDefaults().underVespaHome("libexec/yms/yms_check_vespa")); - SecretAgentScheduleMaker scheduleMaker = new SecretAgentScheduleMaker("vespa", 60, vespaCheckPath, "all") + SecretAgentScheduleMaker vespaSchedule = new SecretAgentScheduleMaker("vespa", 60, vespaCheckPath, "all") + .withTag("parentHostname", environment.getParentHostHostname()); + + Path hostLifeCheckPath = Paths.get("/home/y/libexec/yms/yms_check_host_life"); + SecretAgentScheduleMaker hostLifeSchedule = new SecretAgentScheduleMaker("host-life", 60, hostLifeCheckPath) .withTag("namespace", "Vespa") .withTag("role", "tenants") .withTag("flavor", nodeSpec.nodeFlavor) .withTag("state", nodeSpec.nodeState.toString()) .withTag("zone", environment.getZone()) .withTag("parentHostname", environment.getParentHostHostname()); - - nodeSpec.owner.ifPresent(owner -> - scheduleMaker - .withTag("tenantName", owner.tenant) - .withTag("app", owner.application + "." + owner.instance)); - - nodeSpec.membership.ifPresent(membership -> - scheduleMaker - .withTag("clustertype", membership.clusterType) - .withTag("clusterid", membership.clusterId)); - - nodeSpec.vespaVersion.ifPresent(version -> scheduleMaker.withTag("vespaVersion", version)); + nodeSpec.owner.ifPresent(owner -> hostLifeSchedule + .withTag("tenantName", owner.tenant) + .withTag("app", owner.application + "." + owner.instance) + .withTag("applicationName", owner.application) + .withTag("instanceName", owner.instance) + .withTag("applicationId", owner.tenant + "." + owner.application + "." + owner.instance)); + nodeSpec.membership.ifPresent(membership -> hostLifeSchedule + .withTag("clustertype", membership.clusterType) + .withTag("clusterid", membership.clusterId)); + nodeSpec.vespaVersion.ifPresent(version -> hostLifeSchedule.withTag("vespaVersion", version)); try { - scheduleMaker.writeTo(yamasAgentFolder); + vespaSchedule.writeTo(yamasAgentFolder); + hostLifeSchedule.writeTo(yamasAgentFolder); final String[] restartYamasAgent = new String[]{"service", "yamas-agent", "restart"}; docker.executeInContainerAsRoot(containerName, restartYamasAgent); } catch (IOException e) { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index 79f0c23fd24..eaae5030b50 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.node.admin.nodeadmin; import com.yahoo.collections.Pair; import com.yahoo.concurrent.ThreadFactoryFactory; -import com.yahoo.net.HostName; import com.yahoo.vespa.hosted.dockerapi.ContainerName; import com.yahoo.vespa.hosted.dockerapi.metrics.CounterWrapper; import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions; @@ -78,20 +77,17 @@ public class NodeAdminImpl implements NodeAdmin { this.isFrozen = true; this.startOfFreezeConvergence = clock.instant(); - Dimensions dimensions = new Dimensions.Builder() - .add("host", HostName.getLocalhost()) - .add("role", "docker").build(); - + Dimensions dimensions = new Dimensions.Builder().add("role", "docker").build(); this.numberOfContainersInLoadImageState = metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.image.loading"); this.numberOfUnhandledExceptionsInNodeAgent = metricReceiver.declareCounter(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.unhandled_exceptions"); - metricsScheduler.scheduleWithFixedDelay(() -> { + metricsScheduler.scheduleAtFixedRate(() -> { try { nodeAgents.values().forEach(nodeAgent -> nodeAgent.updateContainerNodeMetrics(nodeAgents.size())); } catch (Throwable e) { logger.warning("Metric fetcher scheduler failed", e); } - }, 0, 30, TimeUnit.SECONDS); + }, 0, 55, TimeUnit.SECONDS); aclMaintainer.ifPresent(maintainer -> aclScheduler.scheduleWithFixedDelay(() -> { if (!isFrozen()) maintainer.run(); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index baa924811c3..08fae2b707a 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.dockerapi.Docker; import com.yahoo.vespa.hosted.dockerapi.DockerExecTimeoutException; import com.yahoo.vespa.hosted.dockerapi.DockerImage; import com.yahoo.vespa.hosted.dockerapi.ProcessResult; +import com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics; import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions; import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; @@ -26,9 +27,11 @@ import java.text.SimpleDateFormat; import java.time.Clock; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; import java.util.Date; import java.util.LinkedHashMap; import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.Executors; @@ -93,7 +96,7 @@ public class NodeAgentImpl implements NodeAgent { // The attributes of the last successful node repo attribute update for this node. Used to avoid redundant calls. private NodeAttributes lastAttributesSet = null; private ContainerNodeSpec lastNodeSpec = null; - private CpuUsageReporter lastCpuMetric; + private CpuUsageReporter lastCpuMetric = new CpuUsageReporter(); public NodeAgentImpl( final String hostName, @@ -131,12 +134,8 @@ public class NodeAgentImpl implements NodeAgent { }; // If the container is already running, initialize vespaVersion and lastCpuMetric - lastCpuMetric = new CpuUsageReporter(clock.instant()); dockerOperations.getContainer(containerName) .ifPresent(container -> { - if (container.state.isRunning()) { - lastCpuMetric = new CpuUsageReporter(container.created); - } containerState = RUNNING_HOWEVER_RESUME_SCRIPT_NOT_RUN; logger.info("Container is already running, setting containerState to " + containerState); }); @@ -257,7 +256,7 @@ public class NodeAgentImpl implements NodeAgent { private void startContainer(ContainerNodeSpec nodeSpec) { aclMaintainer.ifPresent(AclMaintainer::run); dockerOperations.startContainer(containerName, nodeSpec); - lastCpuMetric = new CpuUsageReporter(clock.instant()); + lastCpuMetric = new CpuUsageReporter(); currentFilebeatRestarter = filebeatRestarter.scheduleWithFixedDelay(() -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS); storageMaintainer.ifPresent(maintainer -> { @@ -345,7 +344,6 @@ public class NodeAgentImpl implements NodeAgent { } if (currentFilebeatRestarter != null) currentFilebeatRestarter.cancel(true); dockerOperations.removeContainer(existingContainer); - metricReceiver.unsetMetricsForContainer(hostname); containerState = ABSENT; logger.info("Container successfully removed, new containerState is " + containerState); return Optional.empty(); @@ -436,7 +434,6 @@ public class NodeAgentImpl implements NodeAgent { // Every time the node spec changes, we should clear the metrics for this container as the dimensions // will change and we will be reporting duplicate metrics. // TODO: Should be retried if writing fails - metricReceiver.unsetMetricsForContainer(hostname); if (container.isPresent()) { storageMaintainer.ifPresent(maintainer -> { maintainer.writeMetricsConfig(containerName, nodeSpec); @@ -506,38 +503,17 @@ public class NodeAgentImpl implements NodeAgent { @SuppressWarnings("unchecked") public void updateContainerNodeMetrics(int numAllocatedContainersOnHost) { final ContainerNodeSpec nodeSpec = lastNodeSpec; - if (nodeSpec == null) return; + if (nodeSpec == null || containerState == ABSENT) return; + + Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName); + if (!containerStats.isPresent()) return; Dimensions.Builder dimensionsBuilder = new Dimensions.Builder() .add("host", hostname) .add("role", "tenants") - .add("flavor", nodeSpec.nodeFlavor) .add("state", nodeSpec.nodeState.toString()) - .add("zone", environment.getZone()) .add("parentHostname", environment.getParentHostHostname()); - nodeSpec.vespaVersion.ifPresent(version -> dimensionsBuilder.add("vespaVersion", version)); - - nodeSpec.owner.ifPresent(owner -> - dimensionsBuilder - .add("tenantName", owner.tenant) - .add("applicationName", owner.application) - .add("instanceName", owner.instance) - .add("applicationId", owner.tenant + "." + owner.application + "." + owner.instance) - .add("app", owner.application + "." + owner.instance)); - - nodeSpec.membership.ifPresent(membership -> - dimensionsBuilder - .add("clustertype", membership.clusterType) - .add("clusterid", membership.clusterId)); Dimensions dimensions = dimensionsBuilder.build(); - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_NODE, dimensions, "alive").sample(1); - // TODO: REMOVE - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.alive").sample(1); - - // The remaining metrics require container to exists and be running - if (containerState == ABSENT) return; - Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName); - if (!containerStats.isPresent()) return; Docker.ContainerStats stats = containerStats.get(); final String APP = MetricReceiverWrapper.APPLICATION_NODE; @@ -560,83 +536,48 @@ public class NodeAgentImpl implements NodeAgent { double memoryPercentUsed = 100.0 * memoryTotalBytesUsed / memoryTotalBytes; Optional<Double> diskPercentUsed = diskTotalBytes.flatMap(total -> diskTotalBytesUsed.map(used -> 100.0 * used / total)); - metricReceiver.declareGauge(APP, dimensions, "cpu.util").sample(cpuPercentageOfAllocated); - metricReceiver.declareGauge(APP, dimensions, "mem.limit").sample(memoryTotalBytes); - metricReceiver.declareGauge(APP, dimensions, "mem.used").sample(memoryTotalBytesUsed); - metricReceiver.declareGauge(APP, dimensions, "mem.util").sample(memoryPercentUsed); - diskTotalBytes.ifPresent(diskLimit -> metricReceiver.declareGauge(APP, dimensions, "disk.limit").sample(diskLimit)); - diskTotalBytesUsed.ifPresent(diskUsed -> metricReceiver.declareGauge(APP, dimensions, "disk.used").sample(diskUsed)); - diskPercentUsed.ifPresent(diskUtil -> metricReceiver.declareGauge(APP, dimensions, "disk.util").sample(diskUtil)); - - stats.getNetworks().forEach((interfaceName, interfaceStats) -> { - Dimensions netDims = dimensionsBuilder.add("interface", interfaceName).build(); - Map<String, Number> infStats = (Map<String, Number>) interfaceStats; - - metricReceiver.declareGauge(APP, netDims, "net.in.bytes").sample(infStats.get("rx_bytes").longValue()); - metricReceiver.declareGauge(APP, netDims, "net.in.errors").sample(infStats.get("rx_errors").longValue()); - metricReceiver.declareGauge(APP, netDims, "net.in.dropped").sample(infStats.get("rx_dropped").longValue()); - metricReceiver.declareGauge(APP, netDims, "net.out.bytes").sample(infStats.get("tx_bytes").longValue()); - metricReceiver.declareGauge(APP, netDims, "net.out.errors").sample(infStats.get("tx_errors").longValue()); - metricReceiver.declareGauge(APP, netDims, "net.out.dropped").sample(infStats.get("tx_dropped").longValue()); - }); + List<DimensionMetrics> metrics = new ArrayList<>(); + DimensionMetrics.Builder systemMetricsBuilder = new DimensionMetrics.Builder(APP, dimensions) + .withMetric("cpu.util", cpuPercentageOfAllocated) + .withMetric("mem.limit", memoryTotalBytes) + .withMetric("mem.used", memoryTotalBytesUsed) + .withMetric("mem.util", memoryPercentUsed); - - // TODO: Remove when all alerts and dashboards have been updated to use new metric names - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.cpu.busy.pct").sample(cpuPercentageOfAllocated); - - addIfNotNull(dimensions, "node.cpu.throttled_time", stats.getCpuStats().get("throttling_data"), "throttled_time"); - addIfNotNull(dimensions, "node.memory.limit", stats.getMemoryStats(), "limit"); - - long memoryUsageTotal = ((Number) stats.getMemoryStats().get("usage")).longValue(); - long memoryUsageCache = ((Number) ((Map) stats.getMemoryStats().get("stats")).get("cache")).longValue(); - long memoryUsage = memoryUsageTotal - memoryUsageCache; - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.memory.usage").sample(memoryUsage); + diskTotalBytes.ifPresent(diskLimit -> systemMetricsBuilder.withMetric("disk.limit", diskLimit)); + diskTotalBytesUsed.ifPresent(diskUsed -> systemMetricsBuilder.withMetric("disk.used", diskUsed)); + diskPercentUsed.ifPresent(diskUtil -> systemMetricsBuilder.withMetric("disk.util", diskUtil)); + metrics.add(systemMetricsBuilder.build()); stats.getNetworks().forEach((interfaceName, interfaceStats) -> { Dimensions netDims = dimensionsBuilder.add("interface", interfaceName).build(); - - addIfNotNull(netDims, "node.net.in.bytes", interfaceStats, "rx_bytes"); - addIfNotNull(netDims, "node.net.in.errors", interfaceStats, "rx_errors"); - addIfNotNull(netDims, "node.net.in.dropped", interfaceStats, "rx_dropped"); - addIfNotNull(netDims, "node.net.out.bytes", interfaceStats, "tx_bytes"); - addIfNotNull(netDims, "node.net.out.errors", interfaceStats, "tx_errors"); - addIfNotNull(netDims, "node.net.out.dropped", interfaceStats, "tx_dropped"); + Map<String, Number> infStats = (Map<String, Number>) interfaceStats; + DimensionMetrics networkMetrics = new DimensionMetrics.Builder(APP, netDims) + .withMetric("net.in.bytes", infStats.get("rx_bytes").longValue()) + .withMetric("net.in.errors", infStats.get("rx_errors").longValue()) + .withMetric("net.in.dropped", infStats.get("rx_dropped").longValue()) + .withMetric("net.out.bytes", infStats.get("tx_bytes").longValue()) + .withMetric("net.out.errors", infStats.get("tx_errors").longValue()) + .withMetric("net.out.dropped", infStats.get("tx_dropped").longValue()) + .build(); + metrics.add(networkMetrics); }); - diskTotalBytes.ifPresent(diskLimit -> - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.disk.limit").sample(diskLimit)); - diskTotalBytesUsed.ifPresent(diskUsed -> - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.disk.used").sample(diskUsed)); - // TODO END REMOVE - - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "uptime").sample(lastCpuMetric.getUptime()); - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_HOST_LIFE, dimensions, "alive").sample(1); - - // Push metrics to the metrics proxy in each container - give it maximum 1 seconds to complete - try { - dockerOperations.executeCommandInContainerAsRoot(containerName, 1L, "rpc_invoke", "-t 1", "tcp/localhost:19091", "setExtraMetrics", buildRPCArgumentFromMetrics()); - } catch (DockerExecTimeoutException|JsonProcessingException e) { - logger.warning("Unable to push metrics to container: " + containerName, e); - } + pushMetricsToContainer(metrics); } - protected String buildRPCArgumentFromMetrics() throws JsonProcessingException { + private void pushMetricsToContainer(List<DimensionMetrics> metrics) { StringBuilder params = new StringBuilder(); - for (MetricReceiverWrapper.DimensionMetrics dimensionMetrics : metricReceiver.getAllMetrics()) { - params.append(dimensionMetrics.toSecretAgentReport()); - } - return "s:'" + params.toString() + "'"; - } - - @SuppressWarnings("unchecked") - private void addIfNotNull(Dimensions dimensions, String yamasName, Object metrics, String metricName) { - Map<String, Object> metricsMap = (Map<String, Object>) metrics; - if (metricsMap == null || !metricsMap.containsKey(metricName)) return; try { - metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, yamasName) - .sample(((Number) metricsMap.get(metricName)).doubleValue()); - } catch (Throwable e) { - logger.warning("Failed to update " + yamasName + " metric with value " + metricsMap.get(metricName), e); + for (DimensionMetrics dimensionMetrics : metrics) { + params.append(dimensionMetrics.toSecretAgentReport()); + } + String wrappedMetrics = "s:" + params.toString(); + + // Push metrics to the metrics proxy in each container - give it maximum 1 seconds to complete + String[] command = {"rpc_invoke", "-t", "1", "tcp/localhost:19091", "setExtraMetrics", wrappedMetrics}; + dockerOperations.executeCommandInContainerAsRoot(containerName, 5L, command); + } catch (DockerExecTimeoutException | JsonProcessingException e) { + logger.warning("Unable to push metrics to container: " + containerName, e); } } @@ -665,11 +606,6 @@ public class NodeAgentImpl implements NodeAgent { class CpuUsageReporter { private long totalContainerUsage = 0; private long totalSystemUsage = 0; - private final Instant created; - - CpuUsageReporter(Instant created) { - this.created = created; - } double getCpuUsagePercentage(long currentContainerUsage, long currentSystemUsage) { long deltaSystemUsage = currentSystemUsage - totalSystemUsage; @@ -680,10 +616,6 @@ public class NodeAgentImpl implements NodeAgent { totalSystemUsage = currentSystemUsage; return cpuUsagePct; } - - long getUptime() { - return Duration.between(created, clock.instant()).getSeconds(); - } } // TODO: Also skip orchestration if we're downgrading in test/staging diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java index 03a8ed3d8c5..a83b7dd61b6 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/NodeRepositoryImpl.java @@ -127,6 +127,7 @@ public class NodeRepositoryImpl implements NodeRepository { nodeState, node.nodeType, node.nodeFlavor, + node.nodeCanonicalFlavor, Optional.ofNullable(node.wantedVespaVersion), Optional.ofNullable(node.vespaVersion), Optional.ofNullable(owner), diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/bindings/GetNodesResponse.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/bindings/GetNodesResponse.java index 0a0c61119b4..0dd0f410c07 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/bindings/GetNodesResponse.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/noderepository/bindings/GetNodesResponse.java @@ -31,6 +31,7 @@ public class GetNodesResponse { public final String nodeState; public final String nodeType; public final String nodeFlavor; + public final String nodeCanonicalFlavor; public final String wantedVespaVersion; public final String vespaVersion; public final Owner owner; @@ -50,6 +51,7 @@ public class GetNodesResponse { @JsonProperty("state") String nodeState, @JsonProperty("type") String nodeType, @JsonProperty("flavor") String nodeFlavor, + @JsonProperty("canonicalFlavor") String nodeCanonicalFlavor, @JsonProperty("wantedVespaVersion") String wantedVespaVersion, @JsonProperty("vespaVersion") String vespaVersion, @JsonProperty("owner") Owner owner, @@ -67,6 +69,7 @@ public class GetNodesResponse { this.nodeState = nodeState; this.nodeType = nodeType; this.nodeFlavor = nodeFlavor; + this.nodeCanonicalFlavor = nodeCanonicalFlavor; this.wantedVespaVersion = wantedVespaVersion; this.vespaVersion = vespaVersion; this.owner = owner; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java index 07ee9620102..ff6ac9ce1e7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/restapi/RestApiHandler.java @@ -7,6 +7,7 @@ import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.container.jdisc.LoggingRequestHandler; import com.yahoo.container.logging.AccessLog; +import com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics; import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper; import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater; import com.yahoo.vespa.hosted.node.admin.provider.ComponentsProvider; @@ -68,7 +69,7 @@ public class RestApiHandler extends LoggingRequestHandler{ @Override public void render(OutputStream outputStream) throws IOException { try (PrintStream printStream = new PrintStream(outputStream)) { - for (MetricReceiverWrapper.DimensionMetrics dimensionMetrics : metricReceiverWrapper.getAllMetrics()) { + for (DimensionMetrics dimensionMetrics : metricReceiverWrapper.getAllMetrics()) { String secretAgentJsonReport = dimensionMetrics.toSecretAgentReport() + "\n"; printStream.write(secretAgentJsonReport.getBytes(StandardCharsets.UTF_8.name())); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 6f322165054..91d9b382b7c 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -32,12 +32,14 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.inOrder; @@ -529,25 +531,23 @@ public class NodeAgentImplTest { clock.advance(Duration.ofSeconds(1234)); nodeAgent.updateContainerNodeMetrics(5); - - File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.active.expected.json").getFile()); - Set<Map<String, Object>> expectedMetrics = objectMapper.readValue(expectedMetricsFile, Set.class); - Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw(); - - String arg = nodeAgent.buildRPCArgumentFromMetrics(); - arg = arg.replaceAll("\"timestamp\":\\d+", "\"timestamp\":0"); - - assertEquals("s:'{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"vespa.node\",\"metrics\":{\"mem.limit\":4.294967296E9,\"mem.used\":1.073741824E9,\"alive\":1.0,\"disk.used\":4.2547019776E10,\"disk.util\":15.85,\"cpu.util\":6.75,\"disk.limit\":2.68435456E11,\"mem.util\":25.0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"vespa.node\",\"metrics\":{\"net.out.bytes\":2.0303455E7,\"net.out.dropped\":13.0,\"net.in.dropped\":4.0,\"net.in.bytes\":1.949927E7,\"net.out.errors\":3.0,\"net.in.errors\":55.0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"interface\":\"eth0\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"vespa.node\",\"metrics\":{\"net.out.bytes\":5.4246745E7,\"net.out.dropped\":0.0,\"net.in.dropped\":0.0,\"net.in.bytes\":3245766.0,\"net.out.errors\":0.0,\"net.in.errors\":0.0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"interface\":\"eth1\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"host_life\",\"metrics\":{\"alive\":1.0,\"uptime\":1234.0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"docker\",\"metrics\":{\"node.disk.limit\":2.68435456E11,\"node.disk.used\":4.2547019776E10,\"node.memory.usage\":1.073741824E9,\"node.cpu.busy.pct\":6.75,\"node.cpu.throttled_time\":4523.0,\"node.memory.limit\":4.294967296E9,\"node.alive\":1.0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"docker\",\"metrics\":{\"node.net.in.dropped\":4.0,\"node.net.out.errors\":3.0,\"node.net.out.bytes\":2.0303455E7,\"node.net.in.bytes\":1.949927E7,\"node.net.out.dropped\":13.0,\"node.net.in.errors\":55.0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"interface\":\"eth0\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"docker\",\"metrics\":{\"node.net.in.dropped\":0.0,\"node.net.out.errors\":0.0,\"node.net.out.bytes\":5.4246745E7,\"node.net.in.bytes\":3245766.0,\"node.net.out.dropped\":0.0,\"node.net.in.errors\":0.0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"interface\":\"eth1\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}'", arg); - - assertEquals(expectedMetrics, actualMetrics); + String[] expectedCommand = {"rpc_invoke", "-t", "1", "tcp/localhost:19091", "setExtraMetrics", + "s:{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"vespa.node\",\"metrics\":{\"mem.limit\":4294967296,\"mem.used\":1073741824,\"disk.used\":42547019776,\"disk.util\":15.85,\"cpu.util\":0.0,\"mem.util\":25.0,\"disk.limit\":268435456000},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"vespa.node\",\"metrics\":{\"net.out.bytes\":20303455,\"net.in.dropped\":4,\"net.out.dropped\":13,\"net.in.bytes\":19499270,\"net.out.errors\":3,\"net.in.errors\":55},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"interface\":\"eth0\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}{\"routing\":{\"yamas\":{\"namespaces\":[\"Vespa\"]}},\"application\":\"vespa.node\",\"metrics\":{\"net.out.bytes\":54246745,\"net.in.dropped\":0,\"net.out.dropped\":0,\"net.in.bytes\":3245766,\"net.out.errors\":0,\"net.in.errors\":0},\"dimensions\":{\"app\":\"testapp.testinstance\",\"role\":\"tenants\",\"instanceName\":\"testinstance\",\"vespaVersion\":\"1.2.3\",\"clusterid\":\"clustId\",\"interface\":\"eth1\",\"parentHostname\":\"parent.host.name.yahoo.com\",\"flavor\":\"docker\",\"clustertype\":\"clustType\",\"tenantName\":\"tester\",\"zone\":\"dev.us-east-1\",\"host\":\"host1.test.yahoo.com\",\"state\":\"active\",\"applicationId\":\"tester.testapp.testinstance\",\"applicationName\":\"testapp\"},\"timestamp\":0}"}; + doAnswer(invocation -> { + ContainerName calledContainerName = (ContainerName) invocation.getArguments()[0]; + long calledTimeout = (long) invocation.getArguments()[1]; + String[] calledCommand = (String[]) invocation.getArguments()[2]; + calledCommand[calledCommand.length - 1] = calledCommand[calledCommand.length - 1].replaceAll("\"timestamp\":\\d+", "\"timestamp\":0"); + + assertEquals(containerName, calledContainerName); + assertEquals(5L, calledTimeout); + assertArrayEquals(expectedCommand, calledCommand); + return null; + }).when(dockerOperations).executeCommandInContainerAsRoot(any(), any(), any()); } @Test - @SuppressWarnings("unchecked") public void testGetRelevantMetricsForReadyNode() throws Exception { - final ObjectMapper objectMapper = new ObjectMapper(); - ClassLoader classLoader = getClass().getClassLoader(); - final ContainerNodeSpec nodeSpec = nodeSpecBuilder .nodeState(Node.State.ready) .build(); @@ -561,11 +561,8 @@ public class NodeAgentImplTest { nodeAgent.updateContainerNodeMetrics(5); - File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.ready.expected.json").getFile()); - Set<Map<String, Object>> expectedMetrics = objectMapper.readValue(expectedMetricsFile, Set.class); Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw(); - - assertEquals(expectedMetrics, actualMetrics); + assertEquals(Collections.emptySet(), actualMetrics); } @@ -576,8 +573,7 @@ public class NodeAgentImplTest { dockerImage, containerName, isRunning ? Container.State.RUNNING : Container.State.EXITED, - isRunning ? 1 : 0, - clock.instant().toString())) : + isRunning ? 1 : 0)) : Optional.empty(); when(dockerOperations.getContainerStats(any())).thenReturn(Optional.of(emptyContainerStats)); diff --git a/node-admin/src/test/resources/docker.stats.metrics.active.expected.json b/node-admin/src/test/resources/docker.stats.metrics.active.expected.json deleted file mode 100644 index 350a054ebac..00000000000 --- a/node-admin/src/test/resources/docker.stats.metrics.active.expected.json +++ /dev/null @@ -1,231 +0,0 @@ -[ - { - "application": "host_life", - "dimensions": { - "flavor": "docker", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "applicationName": "testapp", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "zone": "dev.us-east-1", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com" - }, - "metrics": { - "alive": 1.0, - "uptime": 1234.0 - }, - "routing": { - "yamas": { - "namespaces": [ - "Vespa" - ] - } - } - }, - { - "application": "vespa.node", - "dimensions": { - "flavor": "docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1" - }, - "metrics": { - "alive": 1.0, - "cpu.util": 6.75, - "mem.limit": 4.294967296E9, - "mem.used": 1.073741824E9, - "mem.util": 25.0, - "disk.limit": 2.68435456E11, - "disk.used": 4.2547019776E10, - "disk.util": 15.85 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - }, - { - "application": "vespa.node", - "dimensions": { - "flavor": "docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1", - "interface": "eth1" - }, - "metrics": { - "net.out.bytes": 5.4246745E7, - "net.out.errors": 0.0, - "net.out.dropped": 0.0, - "net.in.bytes": 3245766.0, - "net.in.errors": 0.0, - "net.in.dropped": 0.0 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - }, - { - "application": "vespa.node", - "dimensions": { - "flavor": "docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1", - "interface": "eth0" - }, - "metrics": { - "net.out.bytes": 2.0303455E7, - "net.out.errors": 3.0, - "net.out.dropped": 13.0, - "net.in.bytes": 1.949927E7, - "net.in.errors": 55.0, - "net.in.dropped": 4.0 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - }, - { - "application": "docker", - "dimensions": { - "flavor": "docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1", - "interface": "eth1" - }, - "metrics": { - "node.net.out.bytes": 5.4246745E7, - "node.net.out.errors": 0.0, - "node.net.out.dropped": 0.0, - "node.net.in.bytes": 3245766.0, - "node.net.in.errors": 0.0, - "node.net.in.dropped": 0.0 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - }, - { - "application": "docker", - "dimensions": { - "flavor": "docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1" - }, - "metrics": { - "node.alive": 1.0, - "node.cpu.busy.pct": 6.75, - "node.cpu.throttled_time": 4523.0, - "node.memory.usage": 1.073741824E9, - "node.memory.limit": 4.294967296E9, - "node.disk.used": 4.2547019776E10, - "node.disk.limit": 2.68435456E11 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - }, - { - "application": "docker", - "dimensions": { - "flavor": "docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1", - "interface": "eth0" - }, - "metrics": { - "node.net.out.bytes": 2.0303455E7, - "node.net.out.errors": 3.0, - "node.net.out.dropped": 13.0, - "node.net.in.bytes": 1.949927E7, - "node.net.in.errors": 55.0, - "node.net.in.dropped": 4.0 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - } -] diff --git a/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json b/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json deleted file mode 100644 index ed2d5e04446..00000000000 --- a/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json +++ /dev/null @@ -1,40 +0,0 @@ -[ - { - "application": "vespa.node", - "dimensions": { - "flavor": "docker", - "role": "tenants", - "host": "host1.test.yahoo.com", - "state": "ready", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1" - }, - "metrics": { - "alive": 1.0 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - }, - { - "application": "docker", - "dimensions": { - "flavor": "docker", - "role": "tenants", - "host": "host1.test.yahoo.com", - "state": "ready", - "parentHostname": "parent.host.name.yahoo.com", - "zone": "dev.us-east-1" - }, - "metrics": { - "node.alive": 1.0 - }, - "routing": { - "yamas": { - "namespaces": ["Vespa"] - } - } - } -] diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java index 6dc85118ad2..f139f2bc156 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java @@ -357,7 +357,8 @@ public class RestApiTest { "{\"message\":\"Moved host1.yahoo.com to failed\"}"); assertResponse(new Request("http://localhost:8080/nodes/v2/state/ready/host1.yahoo.com", new byte[0], Request.Method.PUT), - 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Can not set failed node host1.yahoo.com allocated to tenant 'tenant2', application 'application2', instance 'instance2' as 'content/id2/0/0' ready. It is not dirty.\"}"); + 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Can not set failed node host1.yahoo.com allocated to tenant2.application2.instance2 as 'content/id2/0/0' ready. It is not dirty.\"}"); + // (... while dirty then ready works (the ready move will be initiated by node maintenance)) assertResponse(new Request("http://localhost:8080/nodes/v2/state/dirty/host1.yahoo.com", new byte[0], Request.Method.PUT), @@ -372,7 +373,7 @@ public class RestApiTest { "{\"message\":\"Moved host2.yahoo.com to parked\"}"); assertResponse(new Request("http://localhost:8080/nodes/v2/state/ready/host2.yahoo.com", new byte[0], Request.Method.PUT), - 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Can not set parked node host2.yahoo.com allocated to tenant 'tenant2', application 'application2', instance 'instance2' as 'content/id2/0/1' ready. It is not dirty.\"}"); + 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Can not set parked node host2.yahoo.com allocated to tenant2.application2.instance2 as 'content/id2/0/1' ready. It is not dirty.\"}"); // (... while dirty then ready works (the ready move will be initiated by node maintenance)) assertResponse(new Request("http://localhost:8080/nodes/v2/state/dirty/host2.yahoo.com", new byte[0], Request.Method.PUT), @@ -137,7 +137,7 @@ <configuration> <additionalparam>-Xdoclint:${doclint} -Xdoclint:-missing</additionalparam> </configuration> - <version>2.9</version> + <version>2.10.4</version> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> @@ -210,7 +210,7 @@ <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>exec-maven-plugin</artifactId> - <version>1.2.1</version> + <version>1.6.0</version> </plugin> <plugin> <groupId>org.codehaus.mojo</groupId> @@ -535,7 +535,7 @@ <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-compress</artifactId> - <version>1.8</version> + <version>1.11</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> @@ -806,17 +806,17 @@ <dependency> <groupId>org.apache.maven</groupId> <artifactId>maven-artifact</artifactId> - <version>3.1.1</version> + <version>3.5.0</version> </dependency> <dependency> <groupId>org.apache.maven</groupId> <artifactId>maven-core</artifactId> - <version>3.1.1</version> + <version>3.5.0</version> </dependency> <dependency> <groupId>org.apache.maven</groupId> <artifactId>maven-model</artifactId> - <version>3.1.1</version> + <version>3.5.0</version> </dependency> <dependency> <groupId>org.apache.maven.plugin-tools</groupId> @@ -834,14 +834,9 @@ <version>2.2.1</version> </dependency> <dependency> - <groupId>org.codehaus.plexus</groupId> - <artifactId>plexus-interactivity-api</artifactId> - <version>1.0-alpha-5</version> - </dependency> - <dependency> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> - <version>2.2</version> + <version>3.0.2</version> </dependency> <dependency> <groupId>org.apache.maven.surefire</groupId> @@ -1013,7 +1008,7 @@ <dependency> <groupId>org.twdata.maven</groupId> <artifactId>mojo-executor</artifactId> - <version>2.2.0</version> + <version>2.3.0</version> </dependency> <dependency> <groupId>net.jcip</groupId> diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java deleted file mode 100644 index 13343029ebc..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import java.util.List; - -/** - * A producer of a list of cases for function training. - * - * @author bratseth - */ -public interface CaseList { - - public List<TrainingSet.Case> cases(); - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java deleted file mode 100644 index bbd3844d036..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; - -import java.util.List; - -/** - * An entity which may evolve over time - * - * @author bratseth - */ -public abstract class Evolvable implements Comparable<Evolvable> { - - public abstract Evolvable makeSuccessor(int memberNumber, List<RankingExpression> genepool, TrainingEnvironment environment); - - public abstract RankingExpression getGenepool(); - - @Override - public int compareTo(Evolvable other) { - return -Double.compare(getFitness(), other.getFitness()); - } - - public abstract double getFitness(); - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java deleted file mode 100644 index e42636c00b2..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; - -import java.util.Collections; -import java.util.List; - -/** - * An individual in an evolving population - a genome with a fitness score. - * Individuals are comparable by decreasing fitness. - * <p> - * As we are training ranking expressions, the genome, here, is the ranking expression. - * - * @author bratseth - */ -public class Individual extends Evolvable { - - private final RankingExpression genome; - private final TrainingSet trainingSet; - private final double fitness; - - public Individual(RankingExpression genome, TrainingSet trainingSet) { - this.genome = genome; - this.trainingSet = trainingSet; - this.fitness = trainingSet.evaluate(genome); - } - - public RankingExpression getGenome() { return genome; } - - public double calculateAverageError() { - return trainingSet.calculateAverageError(genome); - } - - public double calculateAverageErrorPercentage() { - return trainingSet.calculateAverageErrorPercentage(genome); - } - - @Override - public double getFitness() { return fitness; } - - @Override - public Individual makeSuccessor(int memberNumber, List<RankingExpression> genepool, TrainingEnvironment environment) { - return new Individual(environment.recombiner().recombine(genome, genepool), trainingSet); - } - - @Override - public RankingExpression getGenepool() { - return genome; - } - - @Override - public String toString() { - return toSomewhatShortString() + ", expression: " + genome; - } - - /** Returns a shorter string describing this (not including the expression */ - public String toSomewhatShortString() { - return "Error % " + calculateAverageErrorPercentage() + - " average error " + calculateAverageError() + - " fitness " + getFitness(); - } - - /** Returns a shorter string describing this (not including the expression */ - public String toShortString() { - return "Error: " + calculateAverageErrorPercentage() + " %"; - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java deleted file mode 100644 index 58e569bef33..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import java.awt.KeyEventDispatcher; -import java.awt.KeyboardFocusManager; -import java.awt.event.KeyEvent; - -/** - * TODO - * - * @author bratseth - */ -public class KeyboardChecker { - - private static boolean qPressed = false; - - private final Object lock = new Object(); - - public KeyboardChecker() { - KeyboardFocusManager.getCurrentKeyboardFocusManager().addKeyEventDispatcher(new KeyEventDispatcher() { - - @Override - public boolean dispatchKeyEvent(KeyEvent ke) { - synchronized (lock) { - switch (ke.getID()) { - case KeyEvent.KEY_PRESSED: - if (ke.getKeyCode() == KeyEvent.VK_Q) { - qPressed = true; - } - break; - - case KeyEvent.KEY_RELEASED: - if (ke.getKeyCode() == KeyEvent.VK_Q) { - qPressed = false; - } - break; - } - return false; - } - } - }); - } - - public boolean isQPressed() { - synchronized (lock) { - return qPressed; - } - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java deleted file mode 100644 index 204c03b92b6..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.io.IOUtils; -import com.yahoo.searchlib.mlr.ga.caselist.FileCaseList; -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.parser.ParseException; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; - -/** - * Command line runner for training sessions - * - * @author bratseth - */ -/* -TODO: Switch order of generation and sequence in names -TODO: Output fitness improvement on each step (esp useful for species evolution) -TODO: Detect local optima (no improvement for n rounds) and stop early -TODO: Split into training and validation sets - */ -public class Main { - - public Main(String[] args, Tracker tracker) { - if (args.length < 1 || args[0].trim().equals("help")) { - System.out.println( - "Finds a ranking expression matching a training set given as a case file.\n" + - "Run until the expression seems good enough.\n" + - "Usage: ga <case-file> - \n" + - " where case-file is a file containing case lines on the form \n" + - " targetValue, argument1:value1, ...\n" + - " (comment lines starting by # are also permitted)\n"); - return; - } - - TrainingParameters parameters = new TrainingParameters(); - //parameters.setAllowConditions(false); - parameters.setErrorIsRelative(false); - parameters.setInitialSpeciesSize(40); - parameters.setSpeciesLifespan(100); - parameters.setExcludeFeatures("F7,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F21,F23,F24,F25,F26,F27,F29,F30,F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F44,F46,F47,F48,F49,F50,F52,F53,F55,F56,F57,F58,F59,F60,F61,F62,F63,F64,F65,F67,F69,F70,F71,F72,F73,F75,F76,F78,F79,F80,F81,F82,F83,F84,F85,F86,F87,F88,F90,F92,F93,F94,F95,F96,F98,F99,F100,F101,F102,F103,F104,F105,F106,F107,F108,F109,F66,F89,F110"); - //parameters.setInitialSpeciesSize(20); - - String caseFile = args[0]; - TrainingSet trainingSet = new TrainingSet(FileCaseList.create(caseFile, parameters), parameters); - Trainer trainer = new Trainer(trainingSet); - - if (args.length > 1) { // Evaluate given expression - try { - Individual given = new Individual(new RankingExpression(new BufferedReader(new FileReader(args[1]))), trainingSet); - System.out.println("Error in '" + args[1] + "': error % " + given.calculateAverageErrorPercentage() + - " average error " + given.calculateAverageError() + - " fitness " + given.getFitness()); - } - catch (IOException | ParseException e) { - throw new IllegalArgumentException("Could not evaluate expression in argument 2", e); - } - } - else { // Train expression - // TODO: Move system outs to tracker - System.out.println("Learning ..."); - RankingExpression learntExpression = trainer.train(parameters, tracker); - System.out.println("Learnt expression: " + learntExpression); - } - } - - public static void main(String[] args) { - new Main(args, new PrintingTracker(10, 0, 1)); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java deleted file mode 100644 index 8aa47db6d09..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * A collection of evolvables - * - * @author bratseth - */ -public class Population { - - /** The current members of this population, always sorted by decreasing fitness */ - private List<Evolvable> members; - - public Population(List<Evolvable> initialMembers) { - members = new ArrayList<>(initialMembers); - Collections.sort(members); - } - - /** Returns the most fit member of this population (never null) */ - public Evolvable best() { - return members.get(0); - } - - /** Returns the members of this population as an unmodifiable list sorted by decreasing fitness*/ - public List<Evolvable> members() { return Collections.unmodifiableList(members); } - - public void evolve(int generation, TrainingEnvironment environment) { - TrainingParameters p = environment.parameters(); - int generationSize = p.getInitialSpeciesSize() - - (int)Math.round((p.getInitialSpeciesSize() - p.getFinalSpeciesSize()) * generation/p.getSpeciesLifespan()); - members = breed(members, generationSize * p.getGenerationCandidatesFactor(), environment); - Collections.sort(members); - members = members.subList(0, Math.min(generationSize, members.size())); - } - - private List<Evolvable> breed(List<Evolvable> members, int offspringCount, TrainingEnvironment environment) { - List<Evolvable> offspring = new ArrayList<>(offspringCount); // TODO: Can we do this inline and keep the list forever (and then also the immutable view) - offspring.add(members.get(0)); // keep the best as-is - List<RankingExpression> genePool = collectGenepool(members); - for (int i = 0; i < offspringCount - 1; i++) { - Evolvable child = members.get(i % members.size()).makeSuccessor(i, genePool, environment); - offspring.add(child); - } - return offspring; - } - - private List<RankingExpression> collectGenepool(List<Evolvable> members) { - List<RankingExpression> genepool = new ArrayList<>(); - for (Evolvable member : members) - genepool.add(member.getGenepool()); - return genepool; - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java deleted file mode 100644 index 1bd7980bc3f..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.yolean.Exceptions; - -import java.util.List; - -/** - * A tracker which prints a summary of training events to standard out - * - * @author bratseth - */ -public class PrintingTracker implements Tracker { - - private final int iterationEvery; - private final int survivorsEvery; - private final int printSpeciesCreationLevel; - private final int printSpeciesCompletionLevel; - - public PrintingTracker() { - this(0, 1); - } - - public PrintingTracker(int printSpeciesCreationLevel, int printSpeciesCompletionLevel) { - this(Integer.MAX_VALUE, Integer.MAX_VALUE, printSpeciesCreationLevel, printSpeciesCompletionLevel); - } - - public PrintingTracker(int iterationEvery, int printSpeciesCreationLevel, int printSpeciesCompletionLevel) { - this(iterationEvery, Integer.MAX_VALUE, printSpeciesCreationLevel, printSpeciesCompletionLevel); - } - - public PrintingTracker(int iterationEvery, int survivorsEvery, int printSpeciesCreationLevel, int printSpeciesCompletionLevel) { - this.iterationEvery = iterationEvery; - this.survivorsEvery = survivorsEvery; - this.printSpeciesCreationLevel = printSpeciesCreationLevel; - this.printSpeciesCompletionLevel = printSpeciesCompletionLevel; - } - - @Override - public void newSpecies(Species predecessor, int initialSize, List<RankingExpression> genePool) { - if (predecessor.name().level() > printSpeciesCreationLevel) return; - System.out.println(spaces(predecessor.name().level()*2) + "Creating new species of size " + initialSize + " and a gene pool of size " + genePool.size() + " from predecessor " + predecessor); - } - - @Override - public void newSpeciesCreated(Species species) { - if (species.name().level() > printSpeciesCreationLevel) return; - System.out.println(spaces(species.name().level()*2) + "Created and will now evolve " + species); - } - - @Override - public void speciesCompleted(Species species) { - if (species.name().level() > printSpeciesCompletionLevel) return; - System.out.println(spaces(species.name().level()*2) + "--> Evolution completed for " + species); - } - - /** Called each time a species (or super-species) have completed one generation */ - @Override - public void iteration(Species species, int generation) { - try { - new RankingExpression(species.bestIndividual().getGenome().toString()); - } - catch (Exception e) { - System.err.println("ERROR: " + Exceptions.toMessageString(e) + ": " + species.bestIndividual().getGenome()); - } - - if ( (generation % iterationEvery) == 0) - System.out.println(spaces(species.name().level()*2) + "Gen " + generation + " of " + species); - - if ( (generation % survivorsEvery) == 0) - printPopulation(species.name().level(), species.population().members()); - } - - @Override - public void result(Evolvable winner) { - System.out.println("Learnt expression: " + winner); - } - - private String spaces(int spaces) { - return " ".substring(0,spaces); - } - - private void printPopulation(int level, List<Evolvable> survivors) { - if (survivors.size()<=1) return; - System.out.println(" Population:"); - for (Evolvable individual : survivors) - System.out.println(spaces(level*2) + " " + individual); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java deleted file mode 100644 index 596db4cfd42..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.mlr.ga.CaseList; -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; -import com.yahoo.searchlib.mlr.ga.TrainingSet; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Produces a list of training cases (argument and target value pairs) - * from a Ranking Expression. - * Useful for testing. - * - * @author bratseth - */ -public class RankingExpressionCaseList implements CaseList { - - private final List<TrainingSet.Case> cases = new ArrayList<TrainingSet.Case>(); - - public RankingExpressionCaseList(List<Context> arguments, RankingExpression targetFunction) { - for (Context argument : arguments) - cases.add(new TrainingSet.Case(argument,targetFunction.evaluate(argument).asDouble())); - } - - /** Returns the list of cases generated from the ranking expression */ - @Override - public List<TrainingSet.Case> cases() { return Collections.unmodifiableList(cases); } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java deleted file mode 100644 index 8fd40ec793f..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; -import com.yahoo.searchlib.rankingexpression.rule.*; - -import java.util.*; -import java.util.logging.Logger; - -import static java.lang.Math.abs; -import static java.lang.Math.max; -import static java.lang.Math.min; - -/** - * A class which returns a mutated, recombined genome from a list of parent genomes. - * - * @author bratseth - */ -public class Recombiner { - - // TODO: Either make ranking expressions immutable and get rid of parent pointer, or do clone everywhere below - - private static final Logger log = Logger.getLogger(Trainer.class.getName()); - - private final Random random = new Random(); - - private final List<String> features; - - private final TrainingParameters parameters; - - /** - * Creates a recombiner - * - * @param features the list of feature names which are possible within the space we are training, - * such that these may be spontaneously added to expressions. - */ - public Recombiner(Collection<String> features, TrainingParameters trainingParameters) { - this.features = Collections.unmodifiableList(new ArrayList<>(features)); - this.parameters = trainingParameters; - } - - public RankingExpression recombine(RankingExpression genome, List<RankingExpression> genePool) { - List<ExpressionNode> genePoolRoots = new ArrayList<>(); - for (RankingExpression genePoolGenome : genePool) - genePoolRoots.add(genePoolGenome.getRoot()); - return new RankingExpression(mutate(genome.getRoot(), genePoolRoots, 0)); - } - - private ExpressionNode mutate(ExpressionNode gene, List<ExpressionNode> genePool, int depth) { - // TODO: Extract insert level - if (gene instanceof BooleanNode) - return simplifyCondition(mutateChildren((CompositeNode)gene,genePool,depth+1)); - if (gene instanceof CompositeNode) - return insertNodeLevel(simplify(removeNodeLevel(mutateChildren((CompositeNode)gene,genePool,depth+1))), genePool, depth+1); - else - return insertNodeLevel(mutateLeaf(gene), genePool, depth+1); - } - - private BooleanNode simplifyCondition(ExpressionNode node) { - // Nothing yet - return (BooleanNode)node; - } - - /** Very basic algorithmic simplification */ - private ExpressionNode simplify(ExpressionNode node) { - if (! (node instanceof CompositeNode)) return node; - CompositeNode composite = (CompositeNode)node; - if (maxDepth(composite)>2) return composite; - List<ExpressionNode> children = composite.children(); - if (children.size()!=2) return composite; - if ( ! (children.get(0) instanceof ConstantNode)) return composite; - if ( ! (children.get(1) instanceof ConstantNode)) return composite; - return new ConstantNode(composite.evaluate(null)); - } - - private CompositeNode mutateChildren(CompositeNode gene, List<ExpressionNode> genePool, int depth) { - if (gene instanceof ReferenceNode) return gene; // TODO: Remove if we make this a non-composite - - List<ExpressionNode> mutatedChildren = new ArrayList<>(); - for (ExpressionNode child : gene.children()) - mutatedChildren.add(mutate(child, genePool, depth)); - return gene.setChildren(mutatedChildren); - } - - private ExpressionNode insertNodeLevel(ExpressionNode gene, List<ExpressionNode> genePool, int depth) { - if (probability() < 0.9) return gene; - if (depth + maxDepth(gene) >= parameters.getMaxExpressionDepth()) return gene; - ExpressionNode newChild = generateChild(genePool, depth); - if (probability() < 0.5) - return generateComposite(gene, newChild, genePool, depth); - else - return generateComposite(newChild, gene, genePool, depth); - } - - private ExpressionNode removeNodeLevel(CompositeNode gene) { - if (gene instanceof ReferenceNode) return gene; // TODO: Remove if we make featurenode a non-composite - if (probability() < 0.9) return gene; - return randomFrom(gene.children()); - } - - private ExpressionNode generateComposite(ExpressionNode left, ExpressionNode right, List<ExpressionNode> genePool, int depth) { - int type = random.nextInt(2 + ( parameters.getAllowConditions() ? 1:0 ) ); // pick equally between 2 or 3 types - if (type == 0) { - return new ArithmeticNode(left, pickArithmeticOperator(), right); - } - else if (type == 1) { - Function function = pickFunction(); - if (function.arity() == 1) - return new FunctionNode(function, left); - else // arity==2 - return new FunctionNode(function, left, right); - } - else { - return new IfNode(generateCondition(genePool, depth + 1), left, right); - } - } - - private BooleanNode generateCondition(List<ExpressionNode> genePool, int depth) { - // TODO: Add set membership nodes - return new ComparisonNode(generateChild(genePool, depth), TruthOperator.SMALLER, generateChild(genePool, depth)); - } - - private ExpressionNode generateChild(List<ExpressionNode> genePool, int depth) { - if (genePool.isEmpty() || probability() < 0.1) { // entirely new child - return generateLeaf(); - } - else { // pick from gene pool - ExpressionNode picked = randomFrom(genePool); - int pickedDepth = 0; - // descend until we are at at least the same depth as this depth - // to make sure branches spliced in are shallow enough that we avoid growing - // larger than maxDepth - while (picked instanceof CompositeNode && (pickedDepth++ < depth || probability() < 0.5)) { - if (picked instanceof ReferenceNode) continue; // TODO: Remove if we make referencenode a noncomposite - picked = randomFrom(((CompositeNode)picked).children()); - } - return picked; - } - } - - public ExpressionNode mutateLeaf(ExpressionNode leaf) { - if (probability() < 0.5) return leaf; // TODO: For performance. Drop? - // TODO: Other leaves - ConstantNode constant = (ConstantNode)leaf; - return new ConstantNode(DoubleValue.frozen(constant.getValue().asDouble()*aboutOne())); - } - - public ExpressionNode generateLeaf() { - if (probability()<0.5 || features.size() == 0) - return new ConstantNode(DoubleValue.frozen(random.nextDouble() * 2000 - 1000)); // TODO: Use some non-uniform distribution - else - return new ReferenceNode(randomFrom(features)); - } - - private double aboutOne() { - return 1 + Math.pow(-0.1, random.nextInt(4) + 1); - } - - private double probability() { - return random.nextDouble(); - } - - private <T> T randomFrom(List<T> expressionList) { - return expressionList.get(random.nextInt(expressionList.size())); - } - - private ArithmeticOperator pickArithmeticOperator() { - switch (random.nextInt(4)) { - case 0: return ArithmeticOperator.PLUS; - case 1: return ArithmeticOperator.MINUS; - case 2: return ArithmeticOperator.MULTIPLY; - case 3: return ArithmeticOperator.DIVIDE; - } - throw new RuntimeException("This cannot happen"); - } - - /** Pick among the subset of functions which are probably useful */ - private Function pickFunction() { - switch (random.nextInt(5)) { - case 0: return Function.tanh; - case 1: return Function.exp; - case 2: return Function.log; - case 3: return Function.pow; - case 4: return Function.sqrt; - } - throw new RuntimeException("This cannot happen"); - } - - // TODO: Make ranking expressions immutable and compute this on creation? - private int maxDepth(ExpressionNode node) { - if ( ! (node instanceof CompositeNode)) return 1; - - int maxChildDepth = 0; - for (ExpressionNode child : ((CompositeNode)node).children()) - maxChildDepth = Math.max(maxDepth(child), maxChildDepth); - return maxChildDepth + 1; - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java deleted file mode 100644 index 1870f9c0afc..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; - -import java.util.ArrayList; -import java.util.List; - -/** - * A species is a population of evolvables. - * Contrary to a real species, a species population may contain (sub)species - * rather than individuals - at all levels but the lowest. - * - * @author bratseth - */ -public class Species extends Evolvable { - - private SpeciesName name; - private final Population population; - - /** Create a species having a given initial population */ - public Species(SpeciesName name, Population population) { - this.name = name; - this.population = population; - } - - /** Create a species evolved from a predecessor species, using the given gene pool for mutating it */ - private Species(SpeciesName name, Species predecessor, List<RankingExpression> genepool, TrainingEnvironment environment) { - this.name = name; - environment.tracker().newSpecies(predecessor, environment.parameters().getInitialSpeciesSize(), genepool); - - // Initialize new species with members generated from the predecessor species - List<Evolvable> initialMembers = new ArrayList<>(); - for (int i = 0; i < environment.parameters().getInitialSpeciesSize(); i++) - initialMembers.add(drawFrom(predecessor.population, i).makeSuccessor(i, genepool, environment)); - population = new Population(initialMembers); - - // Evolve the population of this species for the configured number of generations - environment.tracker().newSpeciesCreated(this); - for (int generation = 0; generation < environment.parameters().getSpeciesLifespan(); generation++) { - environment.tracker().iteration(this, generation+1); - population.evolve(generation, environment); - if (Double.isInfinite(bestIndividual().getFitness())) break; // jackpot - // if (keyboardChecker.isQPressed()) break; // user quit TODO: Make work - } - environment.tracker().speciesCompleted(this); - } - - /** - * Draws a member from the given population, where the probability of being drawn is proportional to the - * fitness of the member - */ - private Evolvable drawFrom(Population population, int succession) { - return population.members().get(Math.min(succession % 3, population.members().size() - 1)); // TODO: Probabilistic by fitness? - } - - public SpeciesName name() { return name; } - - /** The fitness of the fittest individual in the population */ - @Override - public double getFitness() { - return population.best().getFitness(); - } - - /** Creates the successor of this, using its genes, mutated drawing from the given gene pool */ - @Override - public Evolvable makeSuccessor(int memberNumber, List<RankingExpression> genepool, TrainingEnvironment environment) { - return new Species(name.successor(memberNumber), this, genepool, environment); - } - - /** Returns the members of this species */ - public Population population() { return population; } - - /** The genes of the fittest individual in the population of this */ - @Override - public RankingExpression getGenepool() { // TODO: Less sharp? - return population.best().getGenepool(); - } - - /** Returns the best individual below this in the species hierarchy (e.g recursively the best leaf) */ - public Individual bestIndividual() { - Evolvable child = this; - while (child instanceof Species) - child = ((Species)child).population.best(); - return (Individual)child; // it is when it is not instanceof Species - } - - @Override - public String toString() { - return "species " + name + ", best member: " + population.best(); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java deleted file mode 100644 index 862d8e8899d..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -/** - * The name of a species. For tracking purposes. - * A name has the form superSpeciesName + "/" + serialNumber.generationNumber. - * - * @author bratseth - */ -public class SpeciesName { - - private final int level, serial, generation; - - private final String name, prefixName; - - private SpeciesName(int level, int serial, int generation, String prefixName) { - this.level = level; - this.serial = serial; - this.generation = generation; - this.prefixName = prefixName; - if (level == 0) - this.name = ""; - else - this.name = prefixName + (prefixName.isEmpty() ? "" : "/") + serial + "." + generation; - } - - /** - * The level in the species hierarchy of the species having this name. - * The root species has level 0. - */ - public int level() { return level; } - - /** Returns the name of the root species: The empty string at level 0 */ - public static SpeciesName createRoot() { - return new SpeciesName(0 ,0 ,0, ""); - } - - @Override - public String toString() { - if (level == 0) return "(root)"; - return name; - } - - /** Returns the name of a new subspecies */ - public SpeciesName subspecies(int serial) { - return new SpeciesName(level+1, serial, 0, name); - } - - /** Returns the name of the successor of this species */ - public SpeciesName successor(int serial) { - return new SpeciesName(level, serial, generation+1, prefixName); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java deleted file mode 100644 index 0a18820560b..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; - -import java.util.List; - -/** - * A tracker receives callbacks about events happening during a training session. - * - * @author bratseth - */ -public interface Tracker { - - public void newSpecies(Species predecessor, int initialSize, List<RankingExpression> genePool); - - public void newSpeciesCreated(Species species); - - public void speciesCompleted(Species species); - - public void iteration(Species species, int generation); - - public void result(Evolvable winner); - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java deleted file mode 100644 index b5268f1bb98..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; -import com.yahoo.searchlib.rankingexpression.parser.ParseException; -import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -/** - * Learns a ranking expression from some seed expressions and a training set. - * - * @author bratseth - */ -public class Trainer { - - // TODO: Simplify this to constructor only ... or maybe remove ... or combine with TrainingEnvironment - // TODO: Also: Rename to Training? - - private final TrainingSet trainingSet; - private final Set<String> argumentNames; - - /** - * Creates a new trainer. - */ - public Trainer(TrainingSet trainingSet) { - this(trainingSet, trainingSet.argumentNames()); - } - - /** - * Creates a new trainer which uses a specified list of expression argument names - * rather than the argument names given by the training set. - */ - public Trainer(TrainingSet trainingSet, Set<String> argumentNames) { - this.trainingSet = trainingSet; - this.argumentNames = new HashSet<>(argumentNames); - } - - public RankingExpression train(TrainingParameters parameters, Tracker tracker) { - TrainingEnvironment environment = new TrainingEnvironment(new Recombiner(argumentNames, parameters), tracker, trainingSet, parameters); - SpeciesName rootName = SpeciesName.createRoot(); - Species genesisSubSpecies = new Species(rootName.subspecies(0), new Population(Collections.<Evolvable>singletonList(new Individual(new RankingExpression(new ConstantNode(new DoubleValue(1))), trainingSet)))); - Species rootSpecies = (Species) new Species(rootName, new Population(Collections.<Evolvable>singletonList(genesisSubSpecies))) - .makeSuccessor(0, Collections.<RankingExpression>emptyList(), environment); - Individual winner = rootSpecies.bestIndividual(); - tracker.result(winner); - return winner.getGenome(); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java deleted file mode 100644 index e874267970c..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -/** - * The static environment of a training session - * - * @author bratseth - */ -public class TrainingEnvironment { - - // TODO: Not sure if this belongs ... or should even be an instance - // TODO: maybe collapse Trainer into this and call it TrainingSession - private final Recombiner recombiner; - private final Tracker tracker; - private final TrainingSet trainingSet; - private final TrainingParameters parameters; - - public TrainingEnvironment(Recombiner recombiner, Tracker tracker, - TrainingSet trainingSet, TrainingParameters parameters) { - this.recombiner = recombiner; - this.tracker = tracker; - this.trainingSet = trainingSet; - this.parameters = parameters; - } - - public Recombiner recombiner() { return recombiner; } - public Tracker tracker() { return tracker; } - public TrainingSet trainingSet() { return trainingSet; } - public TrainingParameters parameters() { return parameters; } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java deleted file mode 100644 index 71ff8bfe259..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import java.util.HashSet; -import java.util.Set; - -/** - * @author bratseth - */ -public class TrainingParameters { - - // A note: - // The total number of species generated and evaluated is - // (generationCandidatesFactor * speciesLifespan * (initialSpeciesSize-finalSpeciesSize)/2 ) ^ speciesLevels - // (speciesLevel is hardcoded to 2 atm) - - private int speciesLifespan = 1000; - private int initialSpeciesSize = 10; - private double finalSpeciesSize = 1; - private int generationCandidatesFactor = 3; - private int maxExpressionDepth = 6; - private boolean allowConditions = true; - private boolean errorIsRelative = true; - private Set<String> excludeFeatures = new HashSet<>(); - private String trainingSetFormat = null; - private double validationFraction = 0.2; - - /** The number of generation which a given species (or super-species at any level) lives. Default:1000 */ - public int getSpeciesLifespan() { return speciesLifespan; } - public void setSpeciesLifespan(int generations) { this.speciesLifespan = generations; } - - /** The number of members in a species (or super-species at any level) as it is created. Default: 10 */ - public int getInitialSpeciesSize() { return initialSpeciesSize; } - public void setInitialSpeciesSize(int initialSpeciesSize) { this.initialSpeciesSize = initialSpeciesSize; } - - /** - * The number of members in a species in its final generation. - * The size of the species will be reduced linearly in each generation to go from initial size to final size. - * Default: 1 - */ - public double getFinalSpeciesSize() { return finalSpeciesSize; } - public void setFinalSpeciesSize(int finalSpeciesSize) { this.finalSpeciesSize = finalSpeciesSize; } - - /* - * The factor determining how many more members are generated than are allowed to survive in each generation of a species. - * Default: 3 - */ - public int getGenerationCandidatesFactor() { return generationCandidatesFactor; } - public void setGenerationCandidatesFactor(int generationCandidatesFactor) { this.generationCandidatesFactor = generationCandidatesFactor; } - - /** - * The max depth of expressions this is allowed to generate. - * Default: 6 - */ - public int getMaxExpressionDepth() { return maxExpressionDepth; } - public void setMaxExpressionDepth(int maxExpressionDepth) { this.maxExpressionDepth = maxExpressionDepth; } - - /** - * Whether mutation should allow creation of condition (if) expressions. - * Default: true - */ - public boolean getAllowConditions() { return allowConditions; } - public void setAllowConditions(boolean allowConditions) { this.allowConditions = allowConditions; } - - /** - * Whether errors are relative to the absolute value of the function at that point or not. - * If true, training will assign equal weight to the error of 1.1 for 1 and 110 for 100. - * If false, training will instead assign a 10x weight to the latter. - * Default: True. - */ - public boolean getErrorIsRelative() { return errorIsRelative; } - public void setErrorIsRelative(boolean errorIsRelative) { this.errorIsRelative = errorIsRelative; } - - /** - * Returns the set of features to exclude during training. - * Returned as an immutable set, never null. - */ - public Set<String> getExcludeFeatures() { return excludeFeatures; } - /** Sets the features to exclude from a comma-separated string */ - public void setExcludeFeatures(String excludeFeatureString) { - for (String featureName : excludeFeatureString.split(",")) - excludeFeatures.add(featureName.trim()); - } - - /** - * Returns the format of the training set to read. "fv" or "cvs" is supported. - * If this is null the format name is taken from the last name of the file instead. - * Default: null. - */ - public String getTrainingSetFormat() { return trainingSetFormat; } - public void setTrainingSetFormat(String trainingSetFormat) { this.trainingSetFormat = trainingSetFormat; } - - /** - * Returns the fraction of the result set to hold out of training and use for validation. - * Default 0.2 - */ - public double getValidationFraction() { return validationFraction; } - public void setValidationFraction(double validationFraction) { this.validationFraction = validationFraction; } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java deleted file mode 100644 index f7917987f91..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga; - -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; - -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -/** - * A training set: a set of <i>cases</i>: Input data to output value pairs - * - * @author bratseth - */ -public class TrainingSet { - - private final TrainingParameters parameters; - private final List<Case> trainingCases; - private final List<Case> validationCases; - private final Set<String> argumentNames = new HashSet<>(); - - /** - * Creates a training set from a list of cases. - * The ownership of the argument list and all the cases are transferred to this by this call. - */ - public TrainingSet(CaseList caseList, TrainingParameters parameters) { - List<Case> cases = caseList.cases(); - - this.parameters = parameters; - for (Case aCase : cases) - argumentNames.addAll(aCase.arguments().names()); - argumentNames.removeAll(parameters.getExcludeFeatures()); - - int validationCaseCount = (int)Math.round((cases.size() * parameters.getValidationFraction())); - this.validationCases = cases.subList(0, validationCaseCount); - this.trainingCases = cases.subList(validationCaseCount, cases.size()); - } - - public Set<String> argumentNames() { - return Collections.unmodifiableSet(argumentNames); - } - - /** - * Returns the fitness of a genome (ranking expression) according to this training set. - * The fitness to be returned by this is the inverse of the average squared difference between the - * target function result and the function result returned by the genome function. - */ - // TODO: Take expression length into account. - public double evaluate(RankingExpression genome) { - boolean constantExpressionGenome = true; - double squaredErrorSum = 0; - Double previousValue = null; - for (Case trainingCase : trainingCases) { - double value = genome.evaluate(trainingCase.arguments()).asDouble(); - double error = saneAbs(effectiveError(trainingCase.targetValue(), value)); - squaredErrorSum += Math.pow(error, 2); - - if (previousValue != null && previousValue != value) - constantExpressionGenome = false; - previousValue = value; - } - if (constantExpressionGenome) return 0; // Disqualify constant expressions as we know we're not looking for them - return 1 / (squaredErrorSum / trainingCases.size()); - } - - private double effectiveError(double a, double b) { - return parameters.getErrorIsRelative() ? errorFraction(a, b) : a - b; - } - - /** Calculate error in a way which is easy to understand (but which behaves badly when the target is around 0 */ - public double calculateAverageError(RankingExpression genome) { - double errorSum=0; - for (Case trainingCase : trainingCases) - errorSum += saneAbs(trainingCase.targetValue() - genome.evaluate(trainingCase.arguments()).asDouble()); - return errorSum/(double) trainingCases.size(); - } - - /** Calculate error in a way which is easy to understand (but which behaves badly when the target is around 0 */ - public double calculateAverageErrorPercentage(RankingExpression genome) { - double errorFractionSum = 0; - for (Case trainingCase : trainingCases) { - double errorFraction = saneAbs(errorFraction(trainingCase.targetValue(), genome.evaluate(trainingCase.arguments()).asDouble())); - // System.out.println("Error %: " + (100 * errorFraction + " Target: " + trainingCase.targetValue() + " Learned: " + genome.evaluate(trainingCase.arguments()).asDouble())); - errorFractionSum += errorFraction; - } - return ( errorFractionSum/(double) trainingCases.size() ) *100; - } - - private double errorFraction(double a, double b) { - double error = a - b; - if (error == 0 ) return 0; // otherwise a or b is different from 0 - if (a != 0) - return error / a; - else - return error / b; - } - - private double saneAbs(double d) { - if (Double.isInfinite(d) || Double.isNaN(d)) return Double.MAX_VALUE; - return Math.abs(d); - } - - public static class Case { - - private Context arguments; - - private double targetValue; - - public Case(Context arguments, double targetValue) { - this.arguments = arguments; - this.targetValue = targetValue; - } - - public double targetValue() { return targetValue; } - - public Context arguments() { return arguments; } - - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java deleted file mode 100644 index c7f4f848b71..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.caselist; - -import com.yahoo.searchlib.mlr.ga.TrainingSet; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; -import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; - -import java.util.Optional; - -/** - * <p>A list of training set cases created by reading a file containing lines specifying a case - * per line using the following syntax - * <code>targetValue, argument1:value, argument2:value2, ...</code> - * where arguments are identifiers and values are doubles.</p> - * - * <p>Comment lines starting with "#" are ignored.</p> - * - * @author bratseth - */ -public class CsvFileCaseList extends FileCaseList { - - public CsvFileCaseList(String fileName) { - super(fileName); - } - - protected Optional<TrainingSet.Case> lineToCase(String line, int lineNumber) { - String[] elements = line.split(","); - if (elements.length<2) - throw new IllegalArgumentException("At line " + lineNumber + ": Expected a comma-separated case on the " + - "form 'targetValue, argument1:value1, ...', but got '" + line ); - - double target; - try { - target = Double.parseDouble(elements[0].trim()); - } - catch (NumberFormatException e) { - throw new IllegalArgumentException("At line " + lineNumber + ": Expected a target value double " + - "at the start of the line, got '" + elements[0] + "'"); - } - - Context context = new MapContext(); - for (int i=1; i<elements.length; i++) { - String[] argumentPair = elements[i].split(":"); - try { - if (argumentPair.length != 2) throw new IllegalArgumentException(); - context.put(argumentPair[0].trim(),Double.parseDouble(argumentPair[1].trim())); - } - catch (IllegalArgumentException e) { - throw new IllegalArgumentException("At line " + lineNumber + ", element " + (i+1) + - ": Expected argument on the form 'identifier:double', got '" + elements[i] + "'"); - } - } - return Optional.of(new TrainingSet.Case(context, target)); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java deleted file mode 100644 index 35a4d58d16c..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.caselist; - -import com.yahoo.searchlib.mlr.ga.CaseList; -import com.yahoo.searchlib.mlr.ga.TrainingParameters; -import com.yahoo.searchlib.mlr.ga.TrainingSet; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Optional; - -/** - * @author bratseth - */ -public abstract class FileCaseList implements CaseList { - - private List<TrainingSet.Case> cases = new ArrayList<>(); - - /** - * Reads a case list from file. - * - * @throws IllegalArgumentException if the file could not be found or opened - */ - public FileCaseList(String fileName) { - try (BufferedReader reader = new BufferedReader(new FileReader(fileName))) { - String line; - int lineNumber=0; - while (null != (line=reader.readLine())) { - lineNumber++; - line = line.trim(); - if (line.startsWith("#")) continue; - if (line.isEmpty()) continue; - Optional<TrainingSet.Case> newCase = lineToCase(line, lineNumber); - if (newCase.isPresent()) - cases.add(newCase.get()); - - } - } - catch (IOException | IllegalArgumentException e) { - throw new IllegalArgumentException("Could not create a case list from file '" + fileName + "'", e); - } - } - - /** Returns the case constructed from reading a line, if any */ - protected abstract Optional<TrainingSet.Case> lineToCase(String line, int lineNumber); - - @Override - public List<TrainingSet.Case> cases() { return Collections.unmodifiableList(cases); } - - /** Creates a file case list of the type specified in the parameters */ - public static FileCaseList create(String fileName, TrainingParameters parameters) { - String format = parameters.getTrainingSetFormat(); - if (format == null) - format = ending(fileName); - - switch (format) { - case "csv" : return new CsvFileCaseList(fileName); - case "fv" : return new FvFileCaseList(fileName); - default : throw new IllegalArgumentException("Unknown file format '" + format + "'"); - } - } - - private static String ending(String fileName) { - int lastDot = fileName.lastIndexOf("."); - if (lastDot <= 0) return null; - return fileName.substring(lastDot + 1, fileName.length()); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java deleted file mode 100644 index 0e5b2aac729..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.caselist; - -import com.yahoo.searchlib.mlr.ga.TrainingSet; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; -import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; - -import java.util.Optional; - -/** - * A list of training set cases created by reading a file containing lines specifying a case - * per line using the following syntax - * <code>feature1\tfeature2\tfeature3\t...\ttarget1</code> - * <p> - * The first line contains the name of each feature in the same order. - * - * <p>Comment lines starting with "#" are ignored.</p> - * - * @author bratseth - */ -// NOTE: If we get another type of case list it is time to abstract into a common CaseList base class -public class FvFileCaseList extends FileCaseList { - - private String[] argumentNames; - - public FvFileCaseList(String fileName) { - super(fileName); - } - - protected Optional<TrainingSet.Case> lineToCase(String line, int lineNumber) { - String[] values = line.split("\t"); - - if (argumentNames == null) { // first line - argumentNames = values; - return Optional.empty(); - } - - if (argumentNames.length != values.length) - throw new IllegalArgumentException("Wrong number of values at line " + lineNumber); - - - Context context = new MapContext(); - for (int i = 0; i < values.length-1; i++) - context.put(argumentNames[i], toDouble(values[i], lineNumber)); - - double target = toDouble(values[values.length-1], lineNumber); - return Optional.of(new TrainingSet.Case(context, target)); - } - - private double toDouble(String s, int lineNumber) { - try { - return Double.parseDouble(s.trim()); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("At line " + lineNumber + ": Expected only double values, " + - "got '" + s + "'"); - } - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java deleted file mode 100644 index 16a4f6f931b..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java +++ /dev/null @@ -1,425 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.gbdt; - -import com.yahoo.searchlib.rankingexpression.rule.SetMembershipNode; -import com.yahoo.yolean.Exceptions; -import com.yahoo.searchlib.mlr.ga.Individual; -import com.yahoo.searchlib.mlr.ga.PrintingTracker; -import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList; -import com.yahoo.searchlib.mlr.ga.Trainer; -import com.yahoo.searchlib.mlr.ga.TrainingParameters; -import com.yahoo.searchlib.mlr.ga.TrainingSet; -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; -import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; -import com.yahoo.searchlib.rankingexpression.evaluation.Value; -import com.yahoo.searchlib.rankingexpression.parser.ParseException; -import com.yahoo.searchlib.rankingexpression.rule.Arguments; -import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode; -import com.yahoo.searchlib.rankingexpression.rule.ComparisonNode; -import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; -import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; -import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; -import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; -import com.yahoo.searchlib.rankingexpression.rule.IfNode; -import com.yahoo.searchlib.rankingexpression.rule.NegativeNode; -import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.io.Reader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Random; - -/** - * A standalone tool which analyzes a GBDT form ranking expression - * - * @author bratseth - */ -public class ExpressionAnalysis { - - private final Map<String, Feature> features = new HashMap<>(); - - private int currentTree; - - private final RankingExpression expression; - - public ExpressionAnalysis(RankingExpression expression) { - this.expression = expression; - if ( ! instanceOf(expression.getRoot(), ArithmeticNode.class)) return; - analyzeSum((ArithmeticNode)expression.getRoot()); - } - - /** Returns the expression analyzed by this */ - public RankingExpression expression() { return expression; } - - /** Returns the analysis of each feature in this expression as a read-only map indexed by feature name */ - private Map<String, Feature> featureMap() { - return Collections.unmodifiableMap(features); - } - - /** Returns list containing the analysis of each feature, sorted by decreasing usage */ - private List<Feature> features() { - List<Feature> featureList = new ArrayList<>(features.values()); - Collections.sort(featureList); - return featureList; - } - - /** Returns the name of each feature, sorted by decreasing usage */ - private List<String> featureNames() { - List<String> featureNameList = new ArrayList<>(features.values().size()); - for (Feature feature : features()) - featureNameList.add(feature.name()); - return featureNameList; - } - - private void analyzeSum(ArithmeticNode node) { - for (ExpressionNode child : node.children()) { - currentTree++; - analyze(child); - } - } - - private void analyze(ExpressionNode node) { - if (node instanceof IfNode) { - analyzeIf((IfNode)node); - } - - if (node instanceof CompositeNode) { - for (ExpressionNode child : ((CompositeNode)node).children()) - analyze(child); - } - } - - private void analyzeIf(IfNode node) { - if (node.getCondition() instanceof ComparisonNode) - analyzeComparisonIf(node); - else if (node.getCondition() instanceof SetMembershipNode) - analyzeSetMembershipIf(node); - else - System.err.println("Warning: Expected a comparison or set membership test, got " + node.getCondition().getClass()); - } - - private void analyzeComparisonIf(IfNode node) { - ComparisonNode comparison = (ComparisonNode)node.getCondition(); - - if (comparison.getOperator() != TruthOperator.SMALLER) { - System.err.println("Warning: This expression has " + comparison.getOperator() + " where we expect < :" + - comparison); - return; - } - - if ( ! instanceOf(comparison.getLeftCondition(), ReferenceNode.class)) return; - String featureName = ((ReferenceNode)comparison.getLeftCondition()).getName(); - - Double value = nodeValue(comparison.getRightCondition()); - if (value == null) return; - - ComparisonFeature feature = (ComparisonFeature)features.get(featureName); - if (feature == null) { - feature = new ComparisonFeature(featureName); - features.put(featureName, feature); - } - feature.isComparedTo(value, currentTree, average(node.getTrueExpression()), average(node.getFalseExpression())); - } - - private void analyzeSetMembershipIf(IfNode node) { - SetMembershipNode membershipTest = (SetMembershipNode)node.getCondition(); - - if ( ! instanceOf(membershipTest.getTestValue(), ReferenceNode.class)) return; - String featureName = ((ReferenceNode)membershipTest.getTestValue()).getName(); - - SetMembershipFeature feature = (SetMembershipFeature)features.get(featureName); - if (feature == null) { - feature = new SetMembershipFeature(featureName); - features.put(featureName, feature); - } - } - - /** - * Returns the value of a constant node, or a negative wrapping a constant. - * Warns and returns null if it is neither. - */ - private Double nodeValue(ExpressionNode node) { - if (node instanceof NegativeNode) { - NegativeNode negativeNode = (NegativeNode)node; - if ( ! instanceOf(negativeNode.getValue(), ConstantNode.class)) return null; - return - ((ConstantNode)negativeNode.getValue()).getValue().asDouble(); - } - else { - if ( ! instanceOf(node, ConstantNode.class)) return null; - return ((ConstantNode)node).getValue().asDouble(); - } - } - - - /** Returns the average value of all the leaf constants below this */ - private double average(ExpressionNode node) { - Sum sum = new Sum(); - average(node, sum); - return sum.average(); - } - - private void average(ExpressionNode node, Sum sum) { - if (node instanceof CompositeNode) { - for (ExpressionNode child : ((CompositeNode)node).children()) - average(child, sum); - } - else { - Double value = nodeValue(node); - if (value == null) return; - sum.add(value); - } - } - - private boolean instanceOf(Object object, Class<?> clazz) { - if (clazz.isAssignableFrom(object.getClass())) return true; - System.err.println("Warning: This expression has " + object.getClass() + " where we expect " + clazz + - ": Instance " + object); - return false; - } - - private List<Context> generateArgumentSets(int count) { - List<Context> argumentSets = new ArrayList<>(count); - for (int i=0; i<count; i++) { - ArgumentIgnoringMapContext context = new ArgumentIgnoringMapContext(); - for (Feature feature : features()) { - if (feature instanceof ComparisonFeature) { - ComparisonFeature comparison = (ComparisonFeature)feature; - context.put(comparison.name(),randomBetween(comparison.lowerBound(), comparison.upperBound())); - } - // TODO: else if (feature instanceof SetMembershipFeature) - } - argumentSets.add(context); - } - return argumentSets; - } - - private Random random = new Random(); - /** Returns a random value in [lowerBound, upperBound> */ - private double randomBetween(double lowerBound, double upperBound) { - return random.nextDouble()*(upperBound-lowerBound)+lowerBound; - } - - private static class ArgumentIgnoringMapContext extends MapContext { - - @Override - public Value get(String name, Arguments arguments,String output) { - return super.get(name, null, output); - } - - } - - /** Generates a textual report from analyzing this expression */ - public String report() { - StringBuilder b = new StringBuilder(); - b.append("Trees: " + currentTree).append("\n"); - b.append("Features:\n"); - for (Feature feature : features()) - b.append(" " + feature).append("\n"); - return b.toString(); - } - - private static final String usage = "\nUsage: ExpressionAnalysis [myExpressionFile.expression]"; - - public static void main(String[] args) { - if (args.length == 0) error("No arguments." + usage); - - ExpressionAnalysis analysis = analysisFromFile(args[0]); - - if (1==1) return; // Turn off ga training - if (args.length == 1) { - new GATraining(analysis); - } - else if (args.length == 2) { - try { - new LearntExpressionAnalysis(analysis, new RankingExpression(args[1])); - } - catch (ParseException e) { - error("Syntax error in argument expression: " + Exceptions.toMessageString(e)); - } - } - else { - error("Unexpectedly got more than 2 arguments." + usage); - } - - } - - private static ExpressionAnalysis analysisFromFile(String fileName) { - try (Reader fileReader = new BufferedReader(new FileReader(fileName))) { - System.out.println("Analyzing " + fileName + "..."); - ExpressionAnalysis analysis = new ExpressionAnalysis(new RankingExpression(fileReader)); - System.out.println(analysis.report()); - return analysis; - } - catch (FileNotFoundException e) { - error("Could not find '" + fileName + "'"); - } - catch (IOException e) { - error("Failed reading '" + fileName + "': " + Exceptions.toMessageString(e)); - } - catch (ParseException e) { - error("Syntax error in '" + fileName + "': " + Exceptions.toMessageString(e)); - } - return null; - } - - private static class LearntExpressionAnalysis { - - public LearntExpressionAnalysis(ExpressionAnalysis analysis, RankingExpression learntExpression) { - int cases = 1000; - TrainingSet newTrainingSet = new TrainingSet(new RankingExpressionCaseList(analysis.generateArgumentSets(cases), - analysis.expression()), new TrainingParameters()); - Individual winner = new Individual(learntExpression, newTrainingSet); - System.out.println("With separate training set: " + winner.toShortString() + " (" + winner.calculateAverageError() + ")"); - } - - } - - private static class GATraining { - - public GATraining(ExpressionAnalysis analysis) { - int skipFeatures = 0; - int featureCount = analysis.featureNames().size(); - int cases = 1000; - TrainingParameters parameters = new TrainingParameters(); - parameters.setInitialSpeciesSize(50); - parameters.setSpeciesLifespan(50); - //parameters.setAllowConditions(false); // disallow non-smooth functions - parameters.setMaxExpressionDepth(8); - TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(analysis.generateArgumentSets(cases), - analysis.expression()), parameters); - Trainer trainer = new Trainer(trainingSet, new HashSet<>(analysis.featureNames().subList(skipFeatures, featureCount))); - - System.out.println("Learning ..."); - RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker(100, 0, 1)); - System.out.println("Learnt expression: " + learntExpression); - - // Check for overtraining - new LearntExpressionAnalysis(analysis, learntExpression); - } - - } - - private static void error(String message) { - System.err.println(message); - System.exit(1); - } - - public abstract static class Feature implements Comparable<Feature> { - - private final String name; - - protected Feature(String name) { - this.name = name; - } - - public String name() { return name; } - - /** Primary sort by type, secondary by name */ - @Override - public int compareTo(Feature other) { - int typeComparison = this.getClass().getName().compareTo(other.getClass().getName()); - if (typeComparison != 0) return typeComparison; - return this.name.compareTo(other.name); - } - - } - - /** A feature used in comparisons. These are the ones on which our serious analysis is focused */ - public static class ComparisonFeature extends Feature { - - private double lowerBound = Double.MAX_VALUE; - private double upperBound = Double.MIN_VALUE; - - /** The number of usages of this feature */ - private int usages = 0; - - /** The sum of the tree numbers where this is accessed */ - private int treeNumberSum = 0; - - /** - * The net times where the left values are smaller than the right values for this - * (which is a measure of correlation between input and output because the comparison is <) - */ - private int correlationCount = 0; - - /** - * The sum difference in returned value between choosing the right and left branch due to this feature - */ - private double netSum = 0; - - public ComparisonFeature(String name) { - super(name); - } - - public double lowerBound() { return lowerBound; } - public double upperBound() { return upperBound; } - - public void isComparedTo(double value, int inTreeNumber, double leftAverage, double rightAverage) { - lowerBound = Math.min(lowerBound, value); - upperBound = Math.max(upperBound, value); - usages++; - treeNumberSum += inTreeNumber; - correlationCount += leftAverage < rightAverage ? 1 : -1; - netSum += rightAverage - leftAverage; - } - - /** Override to do secondary sort by usages */ - public int compareTo(Feature o) { - if ( ! (o instanceof ComparisonFeature)) return super.compareTo(o); - ComparisonFeature other = (ComparisonFeature)o; - return - Integer.compare(this.usages, other.usages); - } - - @Override - public String toString() { - return "Numeric feature: " + name() + - ": range [" + lowerBound + ", " + upperBound + "]" + - ", usages " + usages + - ", average tree occurrence " + (treeNumberSum / usages) + - ", correlation: " + (correlationCount / (double)usages) + - ", net contribution: " + netSum; - } - - } - - /** A feature used in set membership tests */ - public static class SetMembershipFeature extends Feature { - - public SetMembershipFeature(String name) { - super(name); - } - - @Override - public String toString() { - return "Categorical feature: " + name(); - } - - } - - /** A sum which can returns its average */ - private static class Sum { - - private double sum; - private int count; - - public void add(double value) { - sum+=value; - count++; - } - - public double average() { - return sum / count; - } - - } - -} diff --git a/searchlib/src/main/sh/ga b/searchlib/src/main/sh/ga deleted file mode 100644 index 009b9684160..00000000000 --- a/searchlib/src/main/sh/ga +++ /dev/null @@ -1,67 +0,0 @@ -#! /bin/sh -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -# BEGIN environment bootstrap section -# Do not edit between here and END as this section should stay identical in all scripts - -findpath () { - myname=${0} - mypath=${myname%/*} - myname=${myname##*/} - if [ "$mypath" ] && [ -d "$mypath" ]; then - return - fi - mypath=$(pwd) - if [ -f "${mypath}/${myname}" ]; then - return - fi - echo "FATAL: Could not figure out the path where $myname lives from $0" - exit 1 -} - -COMMON_ENV=libexec/vespa/common-env.sh - -source_common_env () { - if [ "$VESPA_HOME" ] && [ -d "$VESPA_HOME" ]; then - export VESPA_HOME - common_env=$VESPA_HOME/$COMMON_ENV - if [ -f "$common_env" ]; then - . $common_env - return - fi - fi - return 1 -} - -findroot () { - source_common_env && return - if [ "$VESPA_HOME" ]; then - echo "FATAL: bad VESPA_HOME value '$VESPA_HOME'" - exit 1 - fi - if [ "$ROOT" ] && [ -d "$ROOT" ]; then - VESPA_HOME="$ROOT" - source_common_env && return - fi - findpath - while [ "$mypath" ]; do - VESPA_HOME=${mypath} - source_common_env && return - mypath=${mypath%/*} - done - echo "FATAL: missing VESPA_HOME environment variable" - echo "Could not locate $COMMON_ENV anywhere" - exit 1 -} - -findroot - -# END environment bootstrap section - -JAR=$VESPA_HOME/lib/jars/searchlib-deploy.jar -if [[ "$1" == *.jar ]]; then - JAR=$1 -fi -shift - -exec java -cp $JAR com.yahoo.searchlib.mlr.ga.Main "$@" diff --git a/searchlib/src/main/sh/gbdt-analysis b/searchlib/src/main/sh/gbdt-analysis deleted file mode 100755 index eae755689b0..00000000000 --- a/searchlib/src/main/sh/gbdt-analysis +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -java -cp target/searchlib.jar com.yahoo.searchlib.mlr.gbdt.ExpressionAnalysis $@ diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java deleted file mode 100644 index 68f705315ad..00000000000 --- a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.test; - -import com.yahoo.searchlib.mlr.ga.TrainingParameters; -import com.yahoo.searchlib.mlr.ga.caselist.CsvFileCaseList; -import com.yahoo.yolean.Exceptions; -import com.yahoo.searchlib.mlr.ga.TrainingSet; -import org.junit.Test; -import static org.junit.Assert.*; - -/** - * @author bratseth - */ -public class CsvFileCaseListTestCase { - - private static final double delta = 0.000001; - - @Test - public void testLegalFile() { - CsvFileCaseList list = new CsvFileCaseList("src/test/files/mlr/cases.csv"); - - assertEquals(3,list.cases().size()); - { - TrainingSet.Case case1 = list.cases().get(0); - assertEquals(1.0, case1.targetValue(), delta); - assertEquals(2, case1.arguments().names().size()); - assertEquals(2.0, case1.arguments().get("arg1").asDouble(),delta); - assertEquals(-1.3, case1.arguments().get("arg2").asDouble(),delta); - } - - { - TrainingSet.Case case2 = list.cases().get(1); - assertEquals(-1.003, case2.targetValue(), delta); - assertEquals(1, case2.arguments().names().size()); - assertEquals(500007, case2.arguments().get("arg1").asDouble(),delta); - } - - { - TrainingSet.Case case3 = list.cases().get(2); - assertEquals(0, case3.targetValue(), delta); - assertEquals(1, case3.arguments().names().size()); - assertEquals(1.0, case3.arguments().get("arg2").asDouble(),delta); - } - - TrainingSet trainingSet = new TrainingSet(list, new TrainingParameters()); - assertEquals(2, trainingSet.argumentNames().size()); - assertTrue(trainingSet.argumentNames().contains("arg1")); - assertTrue(trainingSet.argumentNames().contains("arg2")); - } - - @Test - public void testNonExistingFile() { - try { - new CsvFileCaseList("nosuchfile"); - } - catch (IllegalArgumentException e) { - assertEquals("Could not create a case list from file 'nosuchfile': nosuchfile (No such file or directory)", Exceptions.toMessageString(e)); - } - } - - @Test - public void testInvalidFile1() { - try { - new CsvFileCaseList("src/test/files/mlr/cases-illegal1.csv"); - } - catch (IllegalArgumentException e) { - assertEquals("Could not create a case list from file 'src/test/files/mlr/cases-illegal1.csv': At line 5, element 3: Expected argument on the form 'identifier:double', got ' arg2:'", Exceptions.toMessageString(e)); - } - } - - @Test - public void testInvalidFile2() { - try { - new CsvFileCaseList("src/test/files/mlr/cases-illegal2.csv"); - } - catch (IllegalArgumentException e) { - assertEquals("Could not create a case list from file 'src/test/files/mlr/cases-illegal2.csv': At line 2: Expected a target value double at the start of the line, got '5db'", Exceptions.toMessageString(e)); - } - } - -} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java deleted file mode 100644 index 4de83d16300..00000000000 --- a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.test; - -import com.yahoo.searchlib.mlr.ga.PrintingTracker; -import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList; -import com.yahoo.searchlib.mlr.ga.Trainer; -import com.yahoo.searchlib.mlr.ga.TrainingParameters; -import com.yahoo.searchlib.mlr.ga.TrainingSet; -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; -import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; -import com.yahoo.searchlib.rankingexpression.parser.ParseException; - -import java.util.ArrayList; -import java.util.List; - -/** - * Main class - drives a learning session from the command line. - * - * @author bratseth - */ -public class ExampleLearningSessions { - - public static void main(String[] args) throws ParseException { - test3(); - } - - // Always learnt precisely in less than a second - private static void test1() throws ParseException { - TrainingParameters parameters = new TrainingParameters(); - - RankingExpression target = new RankingExpression("2*x"); - List<Context> arguments = new ArrayList<>(); - arguments.add(MapContext.fromString("x:0").freeze()); - arguments.add(MapContext.fromString("x:1").freeze()); - arguments.add(MapContext.fromString("x:2").freeze()); - TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters); - - Trainer trainer = new Trainer(trainingSet); - - System.out.println("Learning ..."); - RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker()); - } - - // Solved well in a few seconds at most. Slow going thereafter. - private static void test2() throws ParseException { - TrainingParameters parameters = new TrainingParameters(); - parameters.setSpeciesLifespan(100); // Shorter lifespan is faster? - - RankingExpression target = new RankingExpression("5*x*x + 2*x + 13"); - List<Context> arguments = new ArrayList<>(); - arguments.add(MapContext.fromString("x:0").freeze()); - arguments.add(MapContext.fromString("x:1").freeze()); - arguments.add(MapContext.fromString("x:2").freeze()); - arguments.add(MapContext.fromString("x:3").freeze()); - arguments.add(MapContext.fromString("x:4").freeze()); - arguments.add(MapContext.fromString("x:5").freeze()); - arguments.add(MapContext.fromString("x:6").freeze()); - arguments.add(MapContext.fromString("x:7").freeze()); - arguments.add(MapContext.fromString("x:8").freeze()); - arguments.add(MapContext.fromString("x:9").freeze()); - arguments.add(MapContext.fromString("x:10").freeze()); - arguments.add(MapContext.fromString("x:50").freeze()); - arguments.add(MapContext.fromString("x:500").freeze()); - arguments.add(MapContext.fromString("x:5000").freeze()); - arguments.add(MapContext.fromString("x:50000").freeze()); - TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters); - - Trainer trainer = new Trainer(trainingSet); - - System.out.println("Learning ..."); - RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker()); - } - - // Solved well in at most a few minutes - private static void test3() throws ParseException { - TrainingParameters parameters = new TrainingParameters(); - parameters.setAllowConditions(false); // disallow non-smooth functions: Speeds up learning of smooth ones greatly - - RankingExpression target = new RankingExpression("-2.7*x*x*x + 5*x*x + 2*x + 13"); - List<Context> arguments = new ArrayList<>(); - arguments.add(MapContext.fromString("x:-50000").freeze()); - arguments.add(MapContext.fromString("x:-5000").freeze()); - arguments.add(MapContext.fromString("x:-500").freeze()); - arguments.add(MapContext.fromString("x:-50").freeze()); - arguments.add(MapContext.fromString("x:-10").freeze()); - arguments.add(MapContext.fromString("x:0").freeze()); - arguments.add(MapContext.fromString("x:1").freeze()); - arguments.add(MapContext.fromString("x:2").freeze()); - arguments.add(MapContext.fromString("x:3").freeze()); - arguments.add(MapContext.fromString("x:4").freeze()); - arguments.add(MapContext.fromString("x:5").freeze()); - arguments.add(MapContext.fromString("x:6").freeze()); - arguments.add(MapContext.fromString("x:7").freeze()); - arguments.add(MapContext.fromString("x:8").freeze()); - arguments.add(MapContext.fromString("x:9").freeze()); - arguments.add(MapContext.fromString("x:10").freeze()); - arguments.add(MapContext.fromString("x:50").freeze()); - arguments.add(MapContext.fromString("x:500").freeze()); - arguments.add(MapContext.fromString("x:5000").freeze()); - arguments.add(MapContext.fromString("x:50000").freeze()); - TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters); - - Trainer trainer = new Trainer(trainingSet); - - System.out.println("Learning ..."); - RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker()); - } - -} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java deleted file mode 100644 index f5febe2ab68..00000000000 --- a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.test; - -import com.yahoo.searchlib.mlr.ga.Evolvable; -import com.yahoo.searchlib.mlr.ga.Main; -import com.yahoo.searchlib.mlr.ga.PrintingTracker; -import com.yahoo.searchlib.mlr.ga.Species; -import com.yahoo.searchlib.mlr.ga.Tracker; -import com.yahoo.searchlib.mlr.ga.TrainingParameters; -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import org.junit.Test; -import static org.junit.Assert.*; - -import java.util.List; - -/** - * Tests the main class used from the command line - * - * @author bratseth - */ -public class MainTestCase { - - /** Tests that an extremely simple function expressed as cases in a file is learnt perfectly. */ - @Test - public void testMain() { - SilentTestTracker tracker = new SilentTestTracker(); - new Main(new String[] { "src/test/files/mlr/cases-linear.csv"}, tracker); - assertTrue(Double.isInfinite(tracker.winner.getFitness())); - } - - private static class SilentTestTracker implements Tracker { - - public Evolvable winner; - - @Override - public void newSpecies(Species predecessor, int initialSize, List<RankingExpression> genePool) { - } - - @Override - public void newSpeciesCreated(Species predecessor) { - } - - @Override - public void speciesCompleted(Species predecessor) { - } - - @Override - public void iteration(Species species, int generation) { - } - - @Override - public void result(Evolvable winner) { - this.winner = winner; - } - } - -} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java deleted file mode 100644 index 2fc6e6cab3d..00000000000 --- a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.test; - -import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList; -import com.yahoo.searchlib.mlr.ga.TrainingParameters; -import com.yahoo.searchlib.mlr.ga.TrainingSet; -import com.yahoo.searchlib.rankingexpression.RankingExpression; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; -import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; -import com.yahoo.searchlib.rankingexpression.parser.ParseException; -import org.junit.Test; -import static org.junit.Assert.*; - -import java.util.ArrayList; -import java.util.List; - -/** - * @author bratseth - */ -public class MockTrainingSetTestCase { - - @Test - public void testMockTrainingSet() throws ParseException { - RankingExpression target = new RankingExpression("2*x"); - List<Context> arguments = new ArrayList<>(); - arguments.add(MapContext.fromString("x:0")); - arguments.add(MapContext.fromString("x:1")); - arguments.add(MapContext.fromString("x:2")); - TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), new TrainingParameters()); - assertTrue(Double.isInfinite(trainingSet.evaluate(new RankingExpression("2*x")))); - assertEquals(4.0, trainingSet.evaluate(new RankingExpression("x")), 0.001); - assertEquals(0.0, trainingSet.evaluate(new RankingExpression("x/x")), 0.001); - } - - @Test - public void testEvaluation() throws ParseException { - // with freezing - assertEquals(16.0,new RankingExpression("2*x*x*x").evaluate(MapContext.fromString("x:2").freeze()).asDouble(),0.0001); - assertEquals(8.0,new RankingExpression("x*x+x*x").evaluate(MapContext.fromString("x:2").freeze()).asDouble(),0.0001); - - // without freezing - assertEquals(16.0,new RankingExpression("2*x*x*x").evaluate(MapContext.fromString("x:2")).asDouble(),0.0001); - assertEquals(8.0,new RankingExpression("x*x+x*x").evaluate(MapContext.fromString("x:2")).asDouble(),0.0001); - } - -} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java deleted file mode 100644 index 7945e2605b0..00000000000 --- a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.ga.test; - -import com.yahoo.searchlib.mlr.ga.CaseList; -import com.yahoo.searchlib.mlr.ga.TrainingSet; -import com.yahoo.searchlib.rankingexpression.evaluation.Context; -import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.util.*; - -/** - * Reads a tripadvisor Kaggle challenge training set - * - * @author bratseth - */ -public class TripAdvisorFileCaseList implements CaseList { - - private List<TrainingSet.Case> cases = new ArrayList<>(); - private Map<Integer,String> columnNames = new HashMap<>(); - - /** - * Reads a case list from file. - * - * @throws IllegalArgumentException if the file could not be found or opened - */ - public TripAdvisorFileCaseList(String fileName) throws IllegalArgumentException { - System.out.print("Reading training data "); - try (BufferedReader reader = new BufferedReader(new FileReader(fileName))) { - String line; - readColumnNames(reader.readLine()); - int lineNumber=1; - while (null != (line=reader.readLine())) { - lineNumber++; - line = line.trim(); - if (line.startsWith("#")) continue; - if (line.isEmpty()) continue; - cases.add(lineToCase(line, lineNumber)); - } - } - catch (IOException | IllegalArgumentException e) { - throw new IllegalArgumentException("Could not create a case list from file '" + fileName + "'", e); - } - System.out.println("done"); - } - - private void readColumnNames(String line) { - int columnNumber = 0; - for (String columnName : line.split(",")) - columnNames.put(columnNumber++, columnName); - } - - protected TrainingSet.Case lineToCase(String line, int lineNumber) { - if ((lineNumber % 10000) ==0) - System.out.print("."); - - Map<String,Double> columnValues = readColumns(line); - - double targetValue = columnValues.get("click_bool") + columnValues.get("booking_bool")*5; - - Context context = new MapContext(); - for (Map.Entry<String,Double> value : columnValues.entrySet()) { - if (value.getKey().equals("click_bool")) continue; - if (value.getKey().equals("gross_bookings_usd")) continue; - if (value.getKey().equals("booking_bool")) continue; - context.put(value.getKey(),value.getValue()); - } - return new TrainingSet.Case(context, targetValue); - } - - private Map<String, Double> readColumns(String line) { - Map<String,Double> columnValues = new LinkedHashMap<>(); - int columnNumber = 0; - for (String valueString : line.split(",")) { - String columnName = columnNames.get(columnNumber++); - if (columnName.equals("date_time")) continue; - Double columnValue; - if (valueString.equals("NULL")) { - columnValue = 0.0; - } - else { - try { - columnValue = Double.parseDouble(valueString); - } - catch (NumberFormatException e) { - throw new IllegalArgumentException("Could not parse column '" + columnName + "'",e); - } - } - columnValues.put(columnName, columnValue); - } - return columnValues; - } - - @Override - public List<TrainingSet.Case> cases() { return Collections.unmodifiableList(cases); } - -} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java deleted file mode 100644 index 28f90ebb0fc..00000000000 --- a/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.mlr.gbdt; - -import org.junit.Ignore; -import org.junit.Test; - -/** - * Run an expression analyser without having to muck with classpath. - * - * @author bratseth - */ -public class ExpressionAnalysisRunner { - - @Test @Ignore - public void runAnalysis() { - ExpressionAnalysis.main(new String[] { "/Users/bratseth/Downloads/getty_mlr_001.expression"}); - } - -} diff --git a/searchlib/src/tests/btree/btreeaggregation_test.cpp b/searchlib/src/tests/btree/btreeaggregation_test.cpp index e07de5cd9d5..50ef7f21c4c 100644 --- a/searchlib/src/tests/btree/btreeaggregation_test.cpp +++ b/searchlib/src/tests/btree/btreeaggregation_test.cpp @@ -588,11 +588,11 @@ Test::requireThatNodeStealWorks() "[60:160,70:170,80:180[min=160,max=180]]" "[min=110,max=180]]", t)); t.remove(60); - EXPECT_TRUE(assertTree("[40:" - "[10:110,20:120,30:130,40:140" - "[min=110,max=140]]" + EXPECT_TRUE(assertTree("[30:" + "[10:110,20:120,30:130" + "[min=110,max=130]]" ",80:" - "[50:150,70:170,80:180[min=150,max=180]]" + "[40:140,50:150,70:170,80:180[min=140,max=180]]" "[min=110,max=180]]", t)); } { // steal some from right @@ -615,12 +615,12 @@ Test::requireThatNodeStealWorks() "[min=150,max=190]]" "[min=110,max=190]]", t)); t.remove(20); - EXPECT_TRUE(assertTree("[50:" - "[10:110,30:130,50:150" - "[min=110,max=150]]" + EXPECT_TRUE(assertTree("[60:" + "[10:110,30:130,50:150,60:160" + "[min=110,max=160]]" ",90:" - "[60:160,70:170,80:180,90:190" - "[min=160,max=190]]" + "[70:170,80:180,90:190" + "[min=170,max=190]]" "[min=110,max=190]]", t)); } } diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.h b/searchlib/src/vespa/searchlib/btree/btreenode.h index 13fccf72623..94b0016e1b5 100644 --- a/searchlib/src/vespa/searchlib/btree/btreenode.h +++ b/searchlib/src/vespa/searchlib/btree/btreenode.h @@ -398,6 +398,9 @@ private: return *this; } + template <typename NodeAllocatorType> + uint32_t countValidLeaves(uint32_t start, uint32_t end, NodeAllocatorType &allocator); + public: BTreeNode::Ref getChild(uint32_t idx) const { return getData(idx); } void setChild(uint32_t idx, BTreeNode::Ref child) { setData(idx, child); } diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.hpp b/searchlib/src/vespa/searchlib/btree/btreenode.hpp index f307623a8e7..3523641705b 100644 --- a/searchlib/src/vespa/searchlib/btree/btreenode.hpp +++ b/searchlib/src/vespa/searchlib/btree/btreenode.hpp @@ -170,7 +170,7 @@ stealSomeFromLeftNode(NodeType *victim) assert(validSlots() + victim->validSlots() >= NodeType::minSlots()); assert(!getFrozen()); assert(!victim->getFrozen()); - uint32_t median = (validSlots() + victim->validSlots()) / 2; + uint32_t median = (validSlots() + victim->validSlots() + 1) / 2; uint32_t steal = median - validSlots(); _validSlots += steal; for (int32_t i = validSlots() - 1; i >= static_cast<int32_t>(steal); --i) { @@ -193,7 +193,7 @@ stealSomeFromRightNode(NodeType *victim) assert(validSlots() + victim->validSlots() >= NodeType::minSlots()); assert(!getFrozen()); assert(!victim->getFrozen()); - uint32_t median = (validSlots() + victim->validSlots()) / 2; + uint32_t median = (validSlots() + victim->validSlots() + 1) / 2; uint32_t steal = median - validSlots(); for (uint32_t i = 0; i < steal; ++i) { _keys[validSlots() + i] = victim->_keys[i]; @@ -307,6 +307,19 @@ stealAllFromRightNode(const BTreeInternalNode *victim) _validLeaves += victim->_validLeaves; } +template <typename KeyT, typename AggrT, uint32_t NumSlots> +template <typename NodeAllocatorType> +uint32_t +BTreeInternalNode<KeyT, AggrT, NumSlots>::countValidLeaves(uint32_t start, uint32_t end, NodeAllocatorType &allocator) +{ + assert(start <= end); + assert(end <= validSlots()); + uint32_t leaves = 0; + for (uint32_t i = start; i < end; ++i) { + leaves += allocator.validLeaves(getData(i)); + } + return leaves; +} template <typename KeyT, typename AggrT, uint32_t NumSlots> template <typename NodeAllocatorType> @@ -314,26 +327,11 @@ void BTreeInternalNode<KeyT, AggrT, NumSlots>:: stealSomeFromLeftNode(BTreeInternalNode *victim, NodeAllocatorType &allocator) { - assert(validSlots() + victim->validSlots() >= BTreeInternalNode::minSlots()); - assert(!getFrozen()); - assert(!victim->getFrozen()); - uint32_t median = (validSlots() + victim->validSlots()) / 2; - uint32_t steal = median - validSlots(); - _validSlots += steal; - for (int32_t i = validSlots() - 1; i >= static_cast<int32_t>(steal); --i) { - _keys[i] = _keys[i - steal]; - setData(i, getData(i - steal)); - } - uint32_t stolenLeaves = 0; - for (uint32_t i = 0; i < steal; ++i) { - _keys[i] = victim->_keys[victim->validSlots() - steal + i]; - setData(i, victim->getData(victim->validSlots() - steal + i)); - stolenLeaves += allocator.validLeaves(getData(i)); - } - _validLeaves += stolenLeaves; - victim->_validLeaves -= stolenLeaves; - victim->cleanRange(victim->validSlots() - steal, victim->validSlots()); - victim->_validSlots -= steal; + uint16_t oldValidSlots = validSlots(); + ParentType::stealSomeFromLeftNode(victim); + uint32_t stolenLeaves = countValidLeaves(0, validSlots() - oldValidSlots, allocator); + incValidLeaves(stolenLeaves); + victim->decValidLeaves(stolenLeaves); } @@ -343,26 +341,11 @@ void BTreeInternalNode<KeyT, AggrT, NumSlots>:: stealSomeFromRightNode(BTreeInternalNode *victim, NodeAllocatorType &allocator) { - assert(validSlots() + victim->validSlots() >= BTreeInternalNode::minSlots()); - assert(!getFrozen()); - assert(!victim->getFrozen()); - uint32_t median = (validSlots() + victim->validSlots()) / 2; - uint32_t steal = median - validSlots(); - uint32_t stolenLeaves = 0; - for (uint32_t i = 0; i < steal; ++i) { - _keys[validSlots() + i] = victim->_keys[i]; - setData(validSlots() + i, victim->getData(i)); - stolenLeaves += allocator.validLeaves(victim->getData(i)); - } - _validSlots += steal; - _validLeaves += stolenLeaves; - victim->_validLeaves -= stolenLeaves; - for (uint32_t i = steal; i < victim->validSlots(); ++i) { - victim->_keys[i - steal] = victim->_keys[i]; - victim->setData(i - steal, victim->getData(i)); - } - victim->cleanRange(victim->validSlots() - steal, victim->validSlots()); - victim->_validSlots -= steal; + uint16_t oldValidSlots = validSlots(); + ParentType::stealSomeFromRightNode(victim); + uint32_t stolenLeaves = countValidLeaves(oldValidSlots, validSlots(), allocator); + incValidLeaves(stolenLeaves); + victim->decValidLeaves(stolenLeaves); } diff --git a/simplemetrics/README b/simplemetrics/README new file mode 100644 index 00000000000..1ea13ac6221 --- /dev/null +++ b/simplemetrics/README @@ -0,0 +1 @@ +A simple implementation of metrics for Java diff --git a/statistics/README b/statistics/README new file mode 100644 index 00000000000..0b90b092125 --- /dev/null +++ b/statistics/README @@ -0,0 +1,2 @@ +Old metrics gathering and aggregation. +To be replaced with simplemetrics where possible. diff --git a/vespa-application-maven-plugin/pom.xml b/vespa-application-maven-plugin/pom.xml index 57ef584ece0..2464c8208ad 100644 --- a/vespa-application-maven-plugin/pom.xml +++ b/vespa-application-maven-plugin/pom.xml @@ -40,11 +40,6 @@ <dependency> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> - <version>3.0.2</version> - </dependency> - <dependency> - <groupId>org.scala-lang</groupId> - <artifactId>scala-library</artifactId> </dependency> <dependency> <groupId>commons-io</groupId> @@ -54,23 +49,16 @@ <build> <plugins> <plugin> - <groupId>net.alchim31.maven</groupId> - <artifactId>scala-maven-plugin</artifactId> - <executions> - <execution> - <goals> - <goal>add-source</goal> - <goal>compile</goal> - <goal>testCompile</goal> - </goals> - </execution> - </executions> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> <configuration> - <args> - <arg>-unchecked</arg> - <arg>-deprecation</arg> - <arg>-explaintypes</arg> - </args> + <source>1.8</source> + <target>1.8</target> + <compilerArgs> + <arg>-Xlint:all</arg> + <arg>-Xlint:-processing</arg> + <arg>-Werror</arg> + </compilerArgs> </configuration> </plugin> <plugin> diff --git a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java index 8aa5c1bb761..9002340478b 100644 --- a/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java +++ b/vespa-application-maven-plugin/src/main/java/com/yahoo/container/plugin/mojo/ApplicationMojo.java @@ -5,7 +5,6 @@ import org.apache.commons.io.FileUtils; import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; import org.apache.maven.plugin.MojoFailureException; -import org.apache.maven.plugins.annotations.Component; import org.apache.maven.plugins.annotations.LifecyclePhase; import org.apache.maven.plugins.annotations.Mojo; import org.apache.maven.plugins.annotations.Parameter; @@ -22,10 +21,10 @@ import java.util.List; /** * @author tonytv */ -@Mojo(name = "packageApplication", defaultPhase = LifecyclePhase.PACKAGE) +@Mojo(name = "packageApplication", defaultPhase = LifecyclePhase.PACKAGE, threadSafe = true) public class ApplicationMojo extends AbstractMojo { - @Component + @Parameter( defaultValue = "${project}", readonly = true ) protected MavenProject project; @Parameter(defaultValue = "src/main/application") diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 3ac5000fe79..52ae68051af 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -47,6 +47,7 @@ vespa_define_module( src/tests/linkedptr src/tests/make_fixture_macros src/tests/memory + src/tests/net/lazy_resolver src/tests/net/selector src/tests/net/socket src/tests/net/socket_spec diff --git a/vespalib/src/tests/net/lazy_resolver/CMakeLists.txt b/vespalib/src/tests/net/lazy_resolver/CMakeLists.txt new file mode 100644 index 00000000000..440ee7ab873 --- /dev/null +++ b/vespalib/src/tests/net/lazy_resolver/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_lazy_resolver_test_app TEST + SOURCES + lazy_resolver_test.cpp + DEPENDS + vespalib +) +vespa_add_test(NAME vespalib_lazy_resolver_test_app COMMAND vespalib_lazy_resolver_test_app) diff --git a/vespalib/src/tests/net/lazy_resolver/lazy_resolver_test.cpp b/vespalib/src/tests/net/lazy_resolver/lazy_resolver_test.cpp new file mode 100644 index 00000000000..ba448c32cb6 --- /dev/null +++ b/vespalib/src/tests/net/lazy_resolver/lazy_resolver_test.cpp @@ -0,0 +1,174 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/net/lazy_resolver.h> +#include <vespa/vespalib/net/socket_spec.h> + +using namespace vespalib; + +TEST("require that lazy resolver internal duration type is appropriate") { + LazyResolver::seconds my_secs = std::chrono::milliseconds(500); + EXPECT_EQUAL(my_secs.count(), 0.5); +} + +TEST("require that lazy resolver can be used to resolve connect spec") { + vespalib::string spec("tcp/localhost:123"); + auto resolver = LazyResolver::create(); + auto address = resolver->make_address(spec); + auto resolved = address->resolve(); + fprintf(stderr, "resolver(spec:%s) -> '%s'\n", spec.c_str(), resolved.c_str()); + EXPECT_EQUAL(spec, address->spec()); + EXPECT_NOT_EQUAL(resolved, address->spec()); + EXPECT_EQUAL(resolved, SocketSpec(spec).client_address().spec()); + EXPECT_EQUAL(resolved, SocketAddress::select_remote(123, "localhost").spec()); +} + +TEST("require that lazy resolver can be used to resolve host name") { + vespalib::string host_name("localhost"); + auto resolver = LazyResolver::create(); + auto host = resolver->make_host(host_name); + auto resolved = host->resolve(); + fprintf(stderr, "resolver(host_name:%s) -> '%s'\n", host_name.c_str(), resolved.c_str()); + EXPECT_EQUAL(host_name, host->host_name()); + EXPECT_NOT_EQUAL(resolved, host->host_name()); + EXPECT_EQUAL(resolved, SocketSpec("tcp/localhost:123").client_address().ip_address()); + EXPECT_EQUAL(resolved, SocketAddress::select_remote(123, "localhost").ip_address()); + EXPECT_EQUAL(resolved, LazyResolver::default_resolve_host(host_name)); +} + +vespalib::string dummy_resolve_host(const vespalib::string &) { return "ip.addr"; } + +TEST("require that host name resolve function can be overridden (bonus: slow resolve warning)") { + LazyResolver::Params params; + params.resolve_host = dummy_resolve_host; + params.max_resolve_time = LazyResolver::seconds(0); + auto resolver = LazyResolver::create(params); + EXPECT_EQUAL(resolver->make_address("tcp/host_name:123")->resolve(), "tcp/ip.addr:123"); +} + +struct ResolveFixture { + std::mutex ip_lock; + std::map<vespalib::string,vespalib::string> ip_map; + std::map<vespalib::string, size_t> ip_cnt; + LazyResolver::SP resolver; + void set_ip_addr(const vespalib::string &host, const vespalib::string &ip_addr) { + std::lock_guard<std::mutex> guard(ip_lock); + ip_map[host] = ip_addr; + } + vespalib::string get_ip_addr(const vespalib::string &host) { + std::lock_guard<std::mutex> guard(ip_lock); + ++ip_cnt[host]; + return ip_map[host]; + } + size_t get_cnt(const vespalib::string &host) { + std::lock_guard<std::mutex> guard(ip_lock); + return ip_cnt[host]; + } + size_t get_total_cnt() { + size_t total = 0; + std::lock_guard<std::mutex> guard(ip_lock); + for (const auto &entry: ip_cnt) { + total += entry.second; + } + return total; + } + ResolveFixture(double max_age) : ip_lock(), ip_map(), ip_cnt(), resolver() { + LazyResolver::Params params; + params.resolve_host = [this](const vespalib::string &host_name){ return get_ip_addr(host_name); }; + params.max_result_age = LazyResolver::seconds(max_age); + resolver = LazyResolver::create(std::move(params)); + set_ip_addr("localhost", "127.0.0.1"); + set_ip_addr("127.0.0.1", "127.0.0.1"); + } + LazyResolver::Address::SP make(const vespalib::string &spec) { return resolver->make_address(spec); } +}; + +TEST_F("require that lazy resolver can be used to resolve connect specs without host names", ResolveFixture(300)) { + EXPECT_EQUAL(f1.make("this is bogus")->resolve(), "this is bogus"); + EXPECT_EQUAL(f1.make("tcp/123")->resolve(), "tcp/123"); + EXPECT_EQUAL(f1.make("ipc/file:my_socket")->resolve(), "ipc/file:my_socket"); + EXPECT_EQUAL(f1.make("ipc/name:my_socket")->resolve(), "ipc/name:my_socket"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_total_cnt(), 0u); +} + +TEST_F("require that resolved hosts can be shared between addresses", ResolveFixture(300)) { + auto addr1 = f1.make("tcp/localhost:123"); + auto addr2 = f1.make("tcp/localhost:456"); + EXPECT_EQUAL(addr1->resolve(), "tcp/127.0.0.1:123"); + EXPECT_EQUAL(addr2->resolve(), "tcp/127.0.0.1:456"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_cnt("localhost"), 1u); + EXPECT_EQUAL(f1.get_total_cnt(), 1u); +} + +TEST_F("require that resolved hosts are discarded when not used", ResolveFixture(300)) { + EXPECT_EQUAL(f1.make("tcp/localhost:123")->resolve(), "tcp/127.0.0.1:123"); + EXPECT_EQUAL(f1.make("tcp/localhost:456")->resolve(), "tcp/127.0.0.1:456"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_cnt("localhost"), 2u); + EXPECT_EQUAL(f1.get_total_cnt(), 2u); +} + +TEST_F("require that host names resolving to themselves (ip addresses) are not shared", ResolveFixture(300)) { + auto addr1 = f1.make("tcp/127.0.0.1:123"); + auto addr2 = f1.make("tcp/127.0.0.1:456"); + EXPECT_EQUAL(addr1->resolve(), "tcp/127.0.0.1:123"); + EXPECT_EQUAL(addr2->resolve(), "tcp/127.0.0.1:456"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_cnt("127.0.0.1"), 2u); + EXPECT_EQUAL(f1.get_total_cnt(), 2u); +} + +TEST_F("require that resolve changes can be detected", ResolveFixture(0)) { + auto addr = f1.make("tcp/localhost:123"); + f1.set_ip_addr("localhost", "127.0.0.2"); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.1:123"); + f1.resolver->wait_for_pending_updates(); + f1.set_ip_addr("localhost", "127.0.0.3"); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.2:123"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.3:123"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_cnt("localhost"), 4u); + EXPECT_EQUAL(f1.get_total_cnt(), 4u); +} + +TEST_F("require that resolve changes are not detected when old results are still fresh", ResolveFixture(300)) { + auto addr = f1.make("tcp/localhost:123"); + f1.set_ip_addr("localhost", "127.0.0.2"); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.1:123"); + f1.resolver->wait_for_pending_updates(); + f1.set_ip_addr("localhost", "127.0.0.3"); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.1:123"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.1:123"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_cnt("localhost"), 1u); + EXPECT_EQUAL(f1.get_total_cnt(), 1u); +} + +TEST_F("require that missing ip address gives invalid spec", ResolveFixture(300)) { + f1.set_ip_addr("localhost", ""); + auto addr = f1.make("tcp/localhost:123"); + EXPECT_EQUAL(addr->resolve(), "invalid"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_cnt("localhost"), 1u); + EXPECT_EQUAL(f1.get_total_cnt(), 1u); +} + +TEST_F("require that all ip address results are treated equally (including empty ones)", ResolveFixture(0)) { + auto addr = f1.make("tcp/localhost:123"); + f1.set_ip_addr("localhost", ""); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.1:123"); + f1.resolver->wait_for_pending_updates(); + f1.set_ip_addr("localhost", "127.0.0.2"); + EXPECT_EQUAL(addr->resolve(), "invalid"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(addr->resolve(), "tcp/127.0.0.2:123"); + f1.resolver->wait_for_pending_updates(); + EXPECT_EQUAL(f1.get_cnt("localhost"), 4u); + EXPECT_EQUAL(f1.get_total_cnt(), 4u); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/tests/net/socket_spec/socket_spec_test.cpp b/vespalib/src/tests/net/socket_spec/socket_spec_test.cpp index 8e2e2f0e8b7..0c2b92bbb53 100644 --- a/vespalib/src/tests/net/socket_spec/socket_spec_test.cpp +++ b/vespalib/src/tests/net/socket_spec/socket_spec_test.cpp @@ -107,4 +107,20 @@ TEST("require that port-only spec resolves to non-wildcard client address") { EXPECT_TRUE(!SocketSpec("tcp/123").client_address().is_wildcard()); } +TEST("require that replace_host makes new spec with replaced host") { + SocketSpec old_spec("tcp/host:123"); + const SocketSpec &const_spec = old_spec; + SocketSpec new_spec = const_spec.replace_host("foo"); + TEST_DO(verify_host_port(old_spec, "host", 123)); + TEST_DO(verify_host_port(new_spec, "foo", 123)); +} + +TEST("require that replace_host gives invalid spec when used with less than 2 host names") { + TEST_DO(verify_invalid(SocketSpec("bogus").replace_host("foo"))); + TEST_DO(verify_invalid(SocketSpec("tcp/123").replace_host("foo"))); + TEST_DO(verify_invalid(SocketSpec("tcp/host:123").replace_host(""))); + TEST_DO(verify_invalid(SocketSpec("ipc/file:my_socket").replace_host("foo"))); + TEST_DO(verify_invalid(SocketSpec("ipc/name:my_socket").replace_host("foo"))); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/vespa/vespalib/net/CMakeLists.txt b/vespalib/src/vespa/vespalib/net/CMakeLists.txt index ce9a4c87661..26c5c867631 100644 --- a/vespalib/src/vespa/vespalib/net/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/net/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(vespalib_vespalib_net OBJECT SOURCES + lazy_resolver.cpp selector.cpp server_socket.cpp socket.cpp diff --git a/vespalib/src/vespa/vespalib/net/lazy_resolver.cpp b/vespalib/src/vespa/vespalib/net/lazy_resolver.cpp new file mode 100644 index 00000000000..ef2b44958c9 --- /dev/null +++ b/vespalib/src/vespa/vespalib/net/lazy_resolver.cpp @@ -0,0 +1,205 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "lazy_resolver.h" +#include "socket_spec.h" + +#include <vespa/log/log.h> +LOG_SETUP(".vespalib.net.lazy_resolver"); + +namespace vespalib { + +VESPA_THREAD_STACK_TAG(lazy_resolver_executor_thread); + +LazyResolver::Params::Params() + : resolve_host(default_resolve_host), + max_result_age(seconds(300.0)), + max_resolve_time(seconds(1.0)) +{ +} + +//----------------------------------------------------------------------------- + +LazyResolver::Host::Host(const vespalib::string &host_name, LazyResolver::SP resolver, + const vespalib::string &ip_address) + : _host_name(host_name), + _resolver(std::move(resolver)), + _ip_lock(), + _ip_pending(false), + _ip_address(ip_address), + _ip_updated(clock::now()) +{ +} + +void +LazyResolver::Host::update_ip_address(const vespalib::string &ip_address) +{ + std::lock_guard<std::mutex> guard(_ip_lock); + _ip_pending = false; + _ip_address = ip_address; + _ip_updated = clock::now(); +} + +LazyResolver::Host::~Host() +{ + // clean up weak_ptr to this + _resolver->try_lookup_host(_host_name); +} + +vespalib::string +LazyResolver::Host::resolve() +{ + std::lock_guard<std::mutex> guard(_ip_lock); + if (!_ip_pending && _resolver->should_request_update(_ip_updated)) { + // TODO(havardpe): switch to weak_from_this() when available + _ip_pending = _resolver->try_request_update(shared_from_this()); + } + return _ip_address; +} + +//----------------------------------------------------------------------------- + +vespalib::string +LazyResolver::Address::resolve() +{ + if (_host) { + return SocketSpec(_spec).replace_host(_host->resolve()).spec(); + } + return _spec; +} + +//----------------------------------------------------------------------------- + +void +LazyResolver::UpdateTask::run() +{ + if (Host::SP host = weak_host.lock()) { + host->update_ip_address(resolver.resolve_host_now(host->host_name())); + } +} + +//----------------------------------------------------------------------------- + +LazyResolver::LazyResolver(Params params) + : _host_lock(), + _host_map(), + _params(std::move(params)), + _executor(1, 128*1024, lazy_resolver_executor_thread, 4096) +{ +} + +LazyResolver::Host::SP +LazyResolver::try_lookup_host(const vespalib::string &host_name, + const std::lock_guard<std::mutex> &guard) +{ + (void) guard; + auto pos = _host_map.find(host_name); + if (pos != _host_map.end()) { + Host::SP host = pos->second.lock(); + if (host) { + return host; + } else { + _host_map.erase(pos); + } + } + return Host::SP(nullptr); +} + +LazyResolver::Host::SP +LazyResolver::try_lookup_host(const vespalib::string &host_name) +{ + std::lock_guard<std::mutex> guard(_host_lock); + return try_lookup_host(host_name, guard); +} + +LazyResolver::Host::SP +LazyResolver::insert_host(const vespalib::string &host_name, const vespalib::string &ip_address) +{ + std::lock_guard<std::mutex> guard(_host_lock); + Host::SP host = try_lookup_host(host_name, guard); + if (!host) { + host.reset(new Host(host_name, shared_from_this(), ip_address)); + _host_map.emplace(host_name, host); + } + return host; +} + +vespalib::string +LazyResolver::resolve_host_now(const vespalib::string &host_name) +{ + auto before = clock::now(); + vespalib::string ip_address = _params.resolve_host(host_name); + seconds resolve_time = (clock::now() - before); + if (resolve_time >= _params.max_resolve_time) { + LOG(warning, "slow resolve time: '%s' -> '%s' (%g s)", + host_name.c_str(), ip_address.c_str(), resolve_time.count()); + } + if (ip_address.empty()) { + LOG(warning, "could not resolve host name: '%s'", host_name.c_str()); + } + return ip_address; +} + +bool +LazyResolver::should_request_update(clock::time_point ip_updated) +{ + seconds result_age = (clock::now() - ip_updated); + return (result_age >= _params.max_result_age); +} + +bool +LazyResolver::try_request_update(std::weak_ptr<Host> self) +{ + Executor::Task::UP task(new UpdateTask(*this, std::move(self))); + auto rejected = _executor.execute(std::move(task)); + return !rejected; +} + +//----------------------------------------------------------------------------- + +LazyResolver::~LazyResolver() +{ + _executor.shutdown().sync(); +} + +LazyResolver::Host::SP +LazyResolver::make_host(const vespalib::string &host_name) +{ + if (host_name.empty()) { + return Host::SP(nullptr); + } + Host::SP host = try_lookup_host(host_name); + if (host) { + return host; + } + vespalib::string ip_address = resolve_host_now(host_name); + if (ip_address == host_name) { + return Host::SP(nullptr); + } + return insert_host(host_name, ip_address); +} + +LazyResolver::Address::SP +LazyResolver::make_address(const vespalib::string &spec_str) +{ + SocketSpec spec(spec_str); + if (!spec.valid()) { + LOG(warning, "invalid socket spec: '%s'\n", spec_str.c_str()); + } + return Address::SP(new Address(spec_str, make_host(spec.host()))); +} + +//----------------------------------------------------------------------------- + +vespalib::string +LazyResolver::default_resolve_host(const vespalib::string &host_name) +{ + return SocketAddress::select_remote(80, host_name.c_str()).ip_address(); +} + +std::shared_ptr<LazyResolver> +LazyResolver::create(Params params) +{ + return std::shared_ptr<LazyResolver>(new LazyResolver(std::move(params))); +} + +} // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/net/lazy_resolver.h b/vespalib/src/vespa/vespalib/net/lazy_resolver.h new file mode 100644 index 00000000000..e740f3de463 --- /dev/null +++ b/vespalib/src/vespa/vespalib/net/lazy_resolver.h @@ -0,0 +1,108 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "socket_address.h" +#include "socket_spec.h" +#include <vespa/vespalib/util/threadstackexecutor.h> +#include <chrono> +#include <memory> +#include <mutex> +#include <map> + +namespace vespalib { + +/** + * Component used to perform lazy re-resolving of host names. The goal + * of this class is to allow applications to (re-)connect from within + * a network thread without stalling everything due to slow dns + * responses while still being able to pick up on dns changes + * (eventually). The idea is that the make_address function is called + * up front during configuration from a non-critical thread. It will + * (potentially) perform an initial synchronous resolve and return an + * Address object that can later be used to obtain a string-based + * connect spec where any host names have been replaced by ip + * addresses without blocking. The host names are resolved under the + * assumption that they will be used to connect to a remote server. + **/ +class LazyResolver : public std::enable_shared_from_this<LazyResolver> +{ +public: + using resolve_host_t = std::function<vespalib::string(const vespalib::string &)>; + using clock = std::chrono::steady_clock; + using seconds = std::chrono::duration<double>; + using SP = std::shared_ptr<LazyResolver>; + + struct Params + { + resolve_host_t resolve_host; + seconds max_result_age; + seconds max_resolve_time; + Params(); + }; + + class Host : public std::enable_shared_from_this<Host> + { + private: + friend class LazyResolver; + vespalib::string _host_name; + LazyResolver::SP _resolver; + std::mutex _ip_lock; + bool _ip_pending; + vespalib::string _ip_address; + clock::time_point _ip_updated; + Host(const vespalib::string &host_name, LazyResolver::SP resolver, + const vespalib::string &ip_address); + void update_ip_address(const vespalib::string &ip_address); + public: + ~Host(); + using SP = std::shared_ptr<Host>; + const vespalib::string &host_name() const { return _host_name; } + vespalib::string resolve(); + }; + + class Address + { + private: + friend class LazyResolver; + vespalib::string _spec; + Host::SP _host; + Address(const vespalib::string &spec, Host::SP host) + : _spec(spec), _host(std::move(host)) {} + public: + using SP = std::shared_ptr<Address>; + const vespalib::string &spec() const { return _spec; } + vespalib::string resolve(); + }; + +private: + struct UpdateTask : Executor::Task { + LazyResolver &resolver; + std::weak_ptr<Host> weak_host; + UpdateTask(LazyResolver &resolver_in, std::weak_ptr<Host> weak_host_in) + : resolver(resolver_in), weak_host(std::move(weak_host_in)) {} + void run() override; + }; + + std::mutex _host_lock; + std::map<vespalib::string, std::weak_ptr<Host> > _host_map; + Params _params; + ThreadStackExecutor _executor; + LazyResolver(Params params); + Host::SP try_lookup_host(const vespalib::string &host_name, + const std::lock_guard<std::mutex> &guard); + Host::SP try_lookup_host(const vespalib::string &host_name); + Host::SP insert_host(const vespalib::string &host_name, const vespalib::string &ip_address); + vespalib::string resolve_host_now(const vespalib::string &host_name); + bool should_request_update(clock::time_point ip_updated); + bool try_request_update(std::weak_ptr<Host> self); +public: + ~LazyResolver(); + void wait_for_pending_updates() { _executor.sync(); } + Host::SP make_host(const vespalib::string &host_name); + Address::SP make_address(const vespalib::string &spec); + static vespalib::string default_resolve_host(const vespalib::string &host_name); + static SP create(Params params = Params()); +}; + +} // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/net/socket_spec.cpp b/vespalib/src/vespa/vespalib/net/socket_spec.cpp index 7870dd435f8..d1376ce1dd7 100644 --- a/vespalib/src/vespa/vespalib/net/socket_spec.cpp +++ b/vespalib/src/vespa/vespalib/net/socket_spec.cpp @@ -99,4 +99,13 @@ SocketSpec::spec() const return "invalid"; } +SocketSpec +SocketSpec::replace_host(const vespalib::string &new_host) const +{ + if ((_type == Type::HOST_PORT) && !new_host.empty()) { + return from_host_port(new_host, _port); + } + return SocketSpec(); +} + } // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/net/socket_spec.h b/vespalib/src/vespa/vespalib/net/socket_spec.h index 13761387450..f28b14573ac 100644 --- a/vespalib/src/vespa/vespalib/net/socket_spec.h +++ b/vespalib/src/vespa/vespalib/net/socket_spec.h @@ -26,6 +26,7 @@ private: public: explicit SocketSpec(const vespalib::string &spec); vespalib::string spec() const; + SocketSpec replace_host(const vespalib::string &new_host) const; static SocketSpec from_path(const vespalib::string &path) { return SocketSpec(Type::PATH, path, -1); } |