diff options
48 files changed, 1693 insertions, 322 deletions
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java index ef2767249a5..b9909214dfd 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java @@ -44,6 +44,7 @@ import org.junit.jupiter.api.Test; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.OptionalInt; @@ -1525,7 +1526,7 @@ public class ContentClusterTest extends ContentBaseTest { i, i, i); } return services + - String.format(" </group>" + + String.format(Locale.US, " </group>" + " <tuning>" + " <cluster-controller>" + " <groups-allowed-down-ratio>%f</groups-allowed-down-ratio>" + diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/zone/ZoneId.java b/config-provisioning/src/main/java/com/yahoo/config/provision/zone/ZoneId.java index 7c5c15e23e6..b95c0cce149 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/zone/ZoneId.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/zone/ZoneId.java @@ -15,6 +15,17 @@ import java.util.Objects; */ public class ZoneId { + private static final ZoneId CONTROLLER = from(Environment.prod, RegionName.from("controller")); + + /** + * The ZoneId associated with the controller, distinct from all other zones in the system, but a constant across systems. + * + * <p>The controller may also be associated with a real zone, i.e. with a region defining the location like + * aws-us-east-1a. Because such a zone ID is different for different systems, and may clash with a prod zone in the + * same region and system, the virtual zone ID is often used.</p> + */ + public static ZoneId ofVirtualControllerZone() { return CONTROLLER; } + private final Environment environment; private final RegionName region; diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ConfigServerFlagsTarget.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ConfigServerFlagsTarget.java index 5842ee3c3c0..585000cf22c 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ConfigServerFlagsTarget.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ConfigServerFlagsTarget.java @@ -1,9 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.api.systemflags.v1; +import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.athenz.api.AthenzIdentity; +import com.yahoo.vespa.flags.json.FlagData; import java.net.URI; import java.util.List; @@ -20,12 +22,14 @@ import static com.yahoo.vespa.hosted.controller.api.systemflags.v1.FlagsTarget.z */ class ConfigServerFlagsTarget implements FlagsTarget { private final SystemName system; + private final CloudName cloud; private final ZoneId zone; private final URI endpoint; private final AthenzIdentity identity; - ConfigServerFlagsTarget(SystemName system, ZoneId zone, URI endpoint, AthenzIdentity identity) { + ConfigServerFlagsTarget(SystemName system, CloudName cloud, ZoneId zone, URI endpoint, AthenzIdentity identity) { this.system = Objects.requireNonNull(system); + this.cloud = Objects.requireNonNull(cloud); this.zone = Objects.requireNonNull(zone); this.endpoint = Objects.requireNonNull(endpoint); this.identity = Objects.requireNonNull(identity); @@ -36,16 +40,32 @@ class ConfigServerFlagsTarget implements FlagsTarget { @Override public Optional<AthenzIdentity> athenzHttpsIdentity() { return Optional.of(identity); } @Override public String asString() { return String.format("%s.%s", system.value(), zone.value()); } - @Override public boolean equals(Object o) { + @Override + public FlagData partiallyResolveFlagData(FlagData data) { + return FlagsTarget.partialResolve(data, system, cloud, zone); + } + + @Override + public String toString() { + return "ConfigServerFlagsTarget{" + + "system=" + system + + ", cloud=" + cloud + + ", zone=" + zone + + ", endpoint=" + endpoint + + ", identity=" + identity + + '}'; + } + + @Override + public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ConfigServerFlagsTarget that = (ConfigServerFlagsTarget) o; - return system == that.system && - Objects.equals(zone, that.zone) && - Objects.equals(endpoint, that.endpoint) && - Objects.equals(identity, that.identity); + return system == that.system && cloud.equals(that.cloud) && zone.equals(that.zone) && endpoint.equals(that.endpoint) && identity.equals(that.identity); } - @Override public int hashCode() { return Objects.hash(system, zone, endpoint, identity); } + @Override + public int hashCode() { + return Objects.hash(system, cloud, zone, endpoint, identity); + } } - diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ControllerFlagsTarget.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ControllerFlagsTarget.java index efeaf12de1c..043c6ea5963 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ControllerFlagsTarget.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/ControllerFlagsTarget.java @@ -1,8 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.api.systemflags.v1; +import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.athenz.api.AthenzIdentity; +import com.yahoo.vespa.flags.json.FlagData; import java.net.URI; import java.util.List; @@ -18,20 +21,44 @@ import static com.yahoo.vespa.hosted.controller.api.systemflags.v1.FlagsTarget.s */ class ControllerFlagsTarget implements FlagsTarget { private final SystemName system; + private final CloudName cloud; + private final ZoneId zone; - ControllerFlagsTarget(SystemName system) { this.system = Objects.requireNonNull(system); } + ControllerFlagsTarget(SystemName system, CloudName cloud, ZoneId zone) { + this.system = Objects.requireNonNull(system); + this.cloud = Objects.requireNonNull(cloud); + this.zone = Objects.requireNonNull(zone); + } @Override public List<String> flagDataFilesPrioritized() { return List.of(controllerFile(system), systemFile(system), defaultFile()); } @Override public URI endpoint() { return URI.create("https://localhost:4443/"); } // Note: Cannot use VIPs for controllers due to network configuration on AWS @Override public Optional<AthenzIdentity> athenzHttpsIdentity() { return Optional.empty(); } @Override public String asString() { return String.format("%s.controller", system.value()); } - @Override public boolean equals(Object o) { + @Override + public FlagData partiallyResolveFlagData(FlagData data) { + return FlagsTarget.partialResolve(data, system, cloud, zone); + } + + @Override + public String toString() { + return "ControllerFlagsTarget{" + + "system=" + system + + ", cloud=" + cloud + + ", zone=" + zone + + '}'; + } + + @Override + public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ControllerFlagsTarget that = (ControllerFlagsTarget) o; - return system == that.system; + return system == that.system && cloud.equals(that.cloud) && zone.equals(that.zone); } - @Override public int hashCode() { return Objects.hash(system); } + @Override + public int hashCode() { + return Objects.hash(system, cloud, zone); + } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/FlagsTarget.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/FlagsTarget.java index 1c8e68ff378..bad53620c81 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/FlagsTarget.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/FlagsTarget.java @@ -1,20 +1,32 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.api.systemflags.v1; +import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.config.provision.zone.ZoneList; import com.yahoo.vespa.athenz.api.AthenzIdentity; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagDefinition; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.json.FlagData; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneRegistry; import java.net.URI; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import static com.yahoo.vespa.flags.FetchVector.Dimension.CLOUD; +import static com.yahoo.vespa.flags.FetchVector.Dimension.ENVIRONMENT; +import static com.yahoo.vespa.flags.FetchVector.Dimension.SYSTEM; +import static com.yahoo.vespa.flags.FetchVector.Dimension.ZONE_ID; + /** * Represents either configservers in a zone or controllers in a system. * @@ -38,24 +50,28 @@ public interface FlagsTarget { Optional<AthenzIdentity> athenzHttpsIdentity(); String asString(); + FlagData partiallyResolveFlagData(FlagData data); + static Set<FlagsTarget> getAllTargetsInSystem(ZoneRegistry registry, boolean reachableOnly) { - SystemName system = registry.system(); Set<FlagsTarget> targets = new HashSet<>(); ZoneList filteredZones = reachableOnly ? registry.zones().reachable() : registry.zones().all(); for (ZoneApi zone : filteredZones.zones()) { - targets.add(forConfigServer(registry, zone.getId())); + targets.add(forConfigServer(registry, zone)); } - targets.add(forController(system)); + targets.add(forController(registry.systemZone())); return targets; } - static FlagsTarget forController(SystemName systemName) { - return new ControllerFlagsTarget(systemName); + static FlagsTarget forController(ZoneApi controllerZone) { + return new ControllerFlagsTarget(controllerZone.getSystemName(), controllerZone.getCloudName(), controllerZone.getVirtualId()); } - static FlagsTarget forConfigServer(ZoneRegistry registry, ZoneId zoneId) { - return new ConfigServerFlagsTarget( - registry.system(), zoneId, registry.getConfigServerVipUri(zoneId), registry.getConfigServerHttpsIdentity(zoneId)); + static FlagsTarget forConfigServer(ZoneRegistry registry, ZoneApi zone) { + return new ConfigServerFlagsTarget(registry.system(), + zone.getCloudName(), + zone.getVirtualId(), + registry.getConfigServerVipUri(zone.getVirtualId()), + registry.getConfigServerHttpsIdentity(zone.getVirtualId())); } static String defaultFile() { return jsonFile("default"); } @@ -64,6 +80,25 @@ public interface FlagsTarget { static String zoneFile(SystemName system, ZoneId zone) { return jsonFile(system.value() + "." + zone.environment().value() + "." + zone.region().value()); } static String controllerFile(SystemName system) { return jsonFile(system.value() + ".controller"); } + /** Partially resolve inter-zone dimensions, except those dimensions defined by the flag for a controller zone. */ + static FlagData partialResolve(FlagData data, SystemName system, CloudName cloud, ZoneId virtualZoneId) { + Set<FetchVector.Dimension> flagDimensions = + virtualZoneId.equals(ZoneId.ofVirtualControllerZone()) ? + Flags.getFlag(data.id()) + .map(FlagDefinition::getDimensions) + .map(Set::copyOf) + // E.g. testing: Assume unknown flag should resolve any and all dimensions below + .orElse(EnumSet.noneOf(FetchVector.Dimension.class)) : + EnumSet.noneOf(FetchVector.Dimension.class); + + var fetchVector = new FetchVector(); + if (!flagDimensions.contains(CLOUD)) fetchVector = fetchVector.with(CLOUD, cloud.value()); + if (!flagDimensions.contains(ENVIRONMENT)) fetchVector = fetchVector.with(ENVIRONMENT, virtualZoneId.environment().value()); + if (!flagDimensions.contains(SYSTEM)) fetchVector = fetchVector.with(SYSTEM, system.value()); + if (!flagDimensions.contains(ZONE_ID)) fetchVector = fetchVector.with(ZONE_ID, virtualZoneId.value()); + return fetchVector.isEmpty() ? data : data.partialResolve(fetchVector); + } + private static String jsonFile(String nameWithoutExtension) { return nameWithoutExtension + ".json"; } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchive.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchive.java index 60950341a42..1c547fea8ba 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchive.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchive.java @@ -4,9 +4,17 @@ package com.yahoo.vespa.hosted.controller.api.systemflags.v1; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.TenantName; +import com.yahoo.config.provision.zone.ZoneApi; +import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.text.JSON; import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.FlagId; @@ -38,6 +46,9 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import java.util.zip.ZipOutputStream; +import static com.yahoo.config.provision.CloudName.AWS; +import static com.yahoo.config.provision.CloudName.GCP; +import static com.yahoo.config.provision.CloudName.YAHOO; import static com.yahoo.yolean.Exceptions.uncheck; /** @@ -189,7 +200,10 @@ public class SystemFlagsDataArchive { if (rawData.isBlank()) { flagData = new FlagData(directoryDeducedFlagId); } else { - String normalizedRawData = normalizeJson(rawData); + Set<ZoneId> zones = systemDefinition == null ? + Set.of() : + systemDefinition.zones().all().zones().stream().map(ZoneApi::getVirtualId).collect(Collectors.toSet()); + String normalizedRawData = normalizeJson(rawData, zones); flagData = FlagData.deserialize(normalizedRawData); if (!directoryDeducedFlagId.equals(flagData.id())) { throw new IllegalArgumentException( @@ -217,41 +231,63 @@ public class SystemFlagsDataArchive { builder.addFile(filename, flagData); } - static String normalizeJson(String json) { + static String normalizeJson(String json, Set<ZoneId> zones) { JsonNode root = uncheck(() -> mapper.readTree(json)); removeCommentsRecursively(root); - verifyValues(root); + verifyValues(root, zones); return root.toString(); } - private static void verifyValues(JsonNode root) { + private static void verifyValues(JsonNode root, Set<ZoneId> zones) { var cursor = new JsonAccessor(root); cursor.get("rules").forEachArrayElement(rule -> rule.get("conditions").forEachArrayElement(condition -> { - var dimension = condition.get("dimension"); - if (dimension.isEqualTo(DimensionHelper.toWire(FetchVector.Dimension.APPLICATION_ID))) { - condition.get("values").forEachArrayElement(conditionValue -> { - String applicationIdString = conditionValue.asString() - .orElseThrow(() -> new IllegalArgumentException("Non-string application ID: " + conditionValue)); - // Throws exception if not recognized - ApplicationId.fromSerializedForm(applicationIdString); + FetchVector.Dimension dimension = DimensionHelper + .fromWire(condition.get("dimension") + .asString() + .orElseThrow(() -> new IllegalArgumentException("Invalid dimension in condition: " + condition))); + switch (dimension) { + case APPLICATION_ID -> validateStringValues(condition, ApplicationId::fromSerializedForm); + case CONSOLE_USER_EMAIL -> validateStringValues(condition, email -> {}); + case CLOUD -> validateStringValues(condition, cloud -> { + if (!Set.of(YAHOO, AWS, GCP).contains(CloudName.from(cloud))) + throw new IllegalArgumentException("Unknown cloud: " + cloud); }); - } else if (dimension.isEqualTo(DimensionHelper.toWire(FetchVector.Dimension.NODE_TYPE))) { - condition.get("values").forEachArrayElement(conditionValue -> { - String nodeTypeString = conditionValue.asString() - .orElseThrow(() -> new IllegalArgumentException("Non-string node type: " + conditionValue)); - // Throws exception if not recognized - NodeType.valueOf(nodeTypeString); + case CLUSTER_ID -> validateStringValues(condition, ClusterSpec.Id::from); + case CLUSTER_TYPE -> validateStringValues(condition, ClusterSpec.Type::from); + case ENVIRONMENT -> validateStringValues(condition, Environment::from); + case HOSTNAME -> validateStringValues(condition, HostName::of); + case NODE_TYPE -> validateStringValues(condition, NodeType::valueOf); + case SYSTEM -> validateStringValues(condition, system -> { + if (!Set.of(SystemName.cd, SystemName.main, SystemName.PublicCd, SystemName.Public).contains(SystemName.from(system))) + throw new IllegalArgumentException("Unknown system: " + system); + }); + case TENANT_ID -> validateStringValues(condition, TenantName::from); + case VESPA_VERSION -> validateStringValues(condition, versionString -> { + Version vespaVersion = Version.fromString(versionString); + if (vespaVersion.getMajor() < 8) + throw new IllegalArgumentException("Major Vespa version must be at least 8: " + versionString); + }); + case ZONE_ID -> validateStringValues(condition, zoneId -> { + if (!zones.contains(ZoneId.from(zoneId))) + throw new IllegalArgumentException("Unknown zone: " + zoneId); }); - } else if (dimension.isEqualTo(DimensionHelper.toWire(FetchVector.Dimension.CONSOLE_USER_EMAIL))) { - condition.get("values").forEachArrayElement(conditionValue -> conditionValue.asString() - .orElseThrow(() -> new IllegalArgumentException("Non-string email address: " + conditionValue))); - } else if (dimension.isEqualTo(DimensionHelper.toWire(FetchVector.Dimension.TENANT_ID))) { - condition.get("values").forEachArrayElement(conditionValue -> conditionValue.asString() - .orElseThrow(() -> new IllegalArgumentException("Non-string tenant ID: " + conditionValue))); } })); } + private static void validateStringValues(JsonAccessor condition, Consumer<String> valueValidator) { + condition.get("values").forEachArrayElement(conditionValue -> { + String value = conditionValue.asString() + .orElseThrow(() -> { + String dimension = condition.get("dimension").asString().orElseThrow(); + String type = condition.get("type").asString().orElseThrow(); + return new IllegalArgumentException("Non-string value in %s %s condition: %s".formatted( + dimension, type, conditionValue)); + }); + valueValidator.accept(value); + }); + } + private static void removeCommentsRecursively(JsonNode node) { if (node instanceof ObjectNode) { ObjectNode objectNode = (ObjectNode) node; diff --git a/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchiveTest.java b/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchiveTest.java index d010893f1d4..a24bed54a8a 100644 --- a/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchiveTest.java +++ b/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/systemflags/v1/SystemFlagsDataArchiveTest.java @@ -54,15 +54,20 @@ public class SystemFlagsDataArchiveTest { @TempDir public File temporaryFolder; - private static final FlagsTarget mainControllerTarget = FlagsTarget.forController(SYSTEM); - private static final FlagsTarget cdControllerTarget = FlagsTarget.forController(SystemName.cd); + private static final FlagsTarget mainControllerTarget = createControllerTarget(SYSTEM); + private static final FlagsTarget cdControllerTarget = createControllerTarget(SystemName.cd); private static final FlagsTarget prodUsWestCfgTarget = createConfigserverTarget(Environment.prod, "us-west-1"); private static final FlagsTarget prodUsEast3CfgTarget = createConfigserverTarget(Environment.prod, "us-east-3"); private static final FlagsTarget devUsEast1CfgTarget = createConfigserverTarget(Environment.dev, "us-east-1"); + private static FlagsTarget createControllerTarget(SystemName system) { + return new ControllerFlagsTarget(system, CloudName.YAHOO, ZoneId.from(Environment.prod, RegionName.from("us-east-1"))); + } + private static FlagsTarget createConfigserverTarget(Environment environment, String region) { return new ConfigServerFlagsTarget( SYSTEM, + CloudName.YAHOO, ZoneId.from(environment, RegionName.from(region)), URI.create("https://cfg-" + region), new AthenzService("vespa.cfg-" + region)); @@ -177,102 +182,85 @@ public class SystemFlagsDataArchiveTest { " \"comment\": \"comment d\"\n" + " }\n" + " ]\n" + - "}"))); + "}", Set.of()))); } @Test - void normalize_json_fail_on_invalid_application() { - try { - SystemFlagsDataArchive.normalizeJson("{\n" + - " \"id\": \"foo\",\n" + - " \"rules\": [\n" + - " {\n" + - " \"conditions\": [\n" + - " {\n" + - " \"type\": \"whitelist\",\n" + - " \"dimension\": \"application\",\n" + - " \"values\": [ \"a.b.c\" ]\n" + - " }\n" + - " ],\n" + - " \"value\": true\n" + - " }\n" + - " ]\n" + - "}\n"); - fail(); - } catch (IllegalArgumentException e) { - assertEquals("Application ids must be on the form tenant:application:instance, but was a.b.c", e.getMessage()); - } + void normalize_json_succeed_on_valid_values() { + normalizeJson("application", "\"a:b:c\""); + normalizeJson("cloud", "\"yahoo\""); + normalizeJson("cloud", "\"aws\""); + normalizeJson("cloud", "\"gcp\""); + normalizeJson("cluster-id", "\"some-id\""); + normalizeJson("cluster-type", "\"admin\""); + normalizeJson("cluster-type", "\"container\""); + normalizeJson("cluster-type", "\"content\""); + normalizeJson("console-user-email", "\"name@domain.com\""); + normalizeJson("environment", "\"prod\""); + normalizeJson("environment", "\"staging\""); + normalizeJson("environment", "\"test\""); + normalizeJson("hostname", "\"2080046-v6-11.ostk.bm2.prod.gq1.yahoo.com\""); + normalizeJson("node-type", "\"tenant\""); + normalizeJson("node-type", "\"host\""); + normalizeJson("node-type", "\"config\""); + normalizeJson("node-type", "\"host\""); + normalizeJson("system", "\"main\""); + normalizeJson("system", "\"public\""); + normalizeJson("tenant", "\"vespa\""); + normalizeJson("vespa-version", "\"8.201.13\""); + normalizeJson("zone", "\"prod.us-west-1\"", Set.of(ZoneId.from("prod.us-west-1"))); } - @Test - void normalize_json_fail_on_invalid_node_type() { - try { - SystemFlagsDataArchive.normalizeJson("{\n" + - " \"id\": \"foo\",\n" + - " \"rules\": [\n" + - " {\n" + - " \"conditions\": [\n" + - " {\n" + - " \"type\": \"whitelist\",\n" + - " \"dimension\": \"node-type\",\n" + - " \"values\": [ \"footype\" ]\n" + - " }\n" + - " ],\n" + - " \"value\": true\n" + - " }\n" + - " ]\n" + - "}\n"); - fail(); - } catch (IllegalArgumentException e) { - assertEquals("No enum constant com.yahoo.config.provision.NodeType.footype", e.getMessage()); - } + private void normalizeJson(String dimension, String jsonValue) { + normalizeJson(dimension, jsonValue, Set.of()); } - @Test - void normalize_json_fail_on_invalid_email() { - try { - SystemFlagsDataArchive.normalizeJson("{\n" + - " \"id\": \"foo\",\n" + - " \"rules\": [\n" + - " {\n" + - " \"conditions\": [\n" + - " {\n" + - " \"type\": \"whitelist\",\n" + - " \"dimension\": \"console-user-email\",\n" + - " \"values\": [ 123 ]\n" + - " }\n" + - " ],\n" + - " \"value\": true\n" + - " }\n" + - " ]\n" + - "}\n"); - fail(); - } catch (IllegalArgumentException e) { - assertEquals("Non-string email address: 123", e.getMessage()); - } + private void normalizeJson(String dimension, String jsonValue, Set<ZoneId> zones) { + SystemFlagsDataArchive.normalizeJson(""" + { + "id": "foo", + "rules": [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "%s", + "values": [ %s ] + } + ], + "value": true + } + ] + } + """.formatted(dimension, jsonValue), zones); } @Test - void normalize_json_fail_on_invalid_tenant_id() { + void normalize_json_fail_on_invalid_values() { + failNormalizeJson("application", "\"a.b.c\"", "Application ids must be on the form tenant:application:instance, but was a.b.c"); + failNormalizeJson("cloud", "\"foo\"", "Unknown cloud: foo"); + // failNormalizeJson("cluster-id", ... any String is valid + failNormalizeJson("cluster-type", "\"foo\"", "Illegal cluster type 'foo'"); + failNormalizeJson("console-user-email", "123", "Non-string value in console-user-email whitelist condition: 123"); + failNormalizeJson("environment", "\"foo\"", "'foo' is not a valid environment identifier"); + failNormalizeJson("hostname", "\"not:a:hostname\"", "hostname must match '(([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])\\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])\\.?', but got: 'not:a:hostname'"); + failNormalizeJson("node-type", "\"footype\"", "No enum constant com.yahoo.config.provision.NodeType.footype"); + failNormalizeJson("system", "\"bar\"", "'bar' is not a valid system"); + failNormalizeJson("tenant", "123", "Non-string value in tenant whitelist condition: 123"); + failNormalizeJson("vespa-version", "\"not-a-version\"", "Invalid version component in 'not-a-version'"); + failNormalizeJson("zone", "\"dev.non-existing-zone\"", Set.of(ZoneId.from("prod.example-region")), "Unknown zone: dev.non-existing-zone"); + } + + private void failNormalizeJson(String dimension, String jsonValue, String expectedExceptionMessage) { + failNormalizeJson(dimension, jsonValue, Set.of(), expectedExceptionMessage); + } + + private void failNormalizeJson(String dimension, String jsonValue, Set<ZoneId> zones, String expectedExceptionMessage) { try { - SystemFlagsDataArchive.normalizeJson("{\n" + - " \"id\": \"foo\",\n" + - " \"rules\": [\n" + - " {\n" + - " \"conditions\": [\n" + - " {\n" + - " \"type\": \"whitelist\",\n" + - " \"dimension\": \"tenant\",\n" + - " \"values\": [ 123 ]\n" + - " }\n" + - " ],\n" + - " \"value\": true\n" + - " }\n" + - " ]\n" + - "}\n"); + normalizeJson(dimension, jsonValue, zones); fail(); - } catch (IllegalArgumentException e) { - assertEquals("Non-string tenant ID: 123", e.getMessage()); + } catch (RuntimeException e) { + assertEquals(expectedExceptionMessage, e.getMessage()); } } @@ -291,6 +279,11 @@ public class SystemFlagsDataArchiveTest { // Cannot use the standard registry mock as it's located in controller-server module ZoneRegistry registryMock = mock(ZoneRegistry.class); when(registryMock.system()).thenReturn(SystemName.main); + ZoneApi zoneApi = mock(ZoneApi.class); + when(zoneApi.getSystemName()).thenReturn(SystemName.main); + when(zoneApi.getCloudName()).thenReturn(CloudName.YAHOO); + when(zoneApi.getVirtualId()).thenReturn(ZoneId.ofVirtualControllerZone()); + when(registryMock.systemZone()).thenReturn(zoneApi); when(registryMock.getConfigServerVipUri(any())).thenReturn(URI.create("http://localhost:8080/")); when(registryMock.getConfigServerHttpsIdentity(any())).thenReturn(new AthenzService("domain", "servicename")); ZoneList zoneListMock = mock(ZoneList.class); @@ -333,7 +326,7 @@ public class SystemFlagsDataArchiveTest { @Override public SystemName getSystemName() { return SystemName.main; } @Override public ZoneId getId() { return zoneId; } - @Override public CloudName getCloudName() { throw new UnsupportedOperationException(); } + @Override public CloudName getCloudName() { return CloudName.YAHOO; } @Override public String getCloudNativeRegionName() { throw new UnsupportedOperationException(); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployer.java index abc888abccb..355f06fc753 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployer.java @@ -78,8 +78,12 @@ class SystemFlagsDeployer { return SystemFlagsDeployResult.merge(results); } - private SystemFlagsDeployResult deployFlags(FlagsTarget target, List<FlagData> flagData, boolean dryRun) { - Map<FlagId, FlagData> wantedFlagData = lookupTable(flagData); + private SystemFlagsDeployResult deployFlags(FlagsTarget target, List<FlagData> flagDataList, boolean dryRun) { + flagDataList = flagDataList.stream() + .map(target::partiallyResolveFlagData) + .filter(flagData -> !flagData.isEmpty()) + .toList(); + Map<FlagId, FlagData> wantedFlagData = lookupTable(flagDataList); Map<FlagId, FlagData> currentFlagData; List<FlagId> definedFlags; try { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java index 6fd44e09d8d..21fe1f66bc5 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneApiMock.java @@ -36,8 +36,8 @@ public class ZoneApiMock implements ZoneApi { } } - public static ZoneApiMock fromId(String id) { - return from(ZoneId.from(id)); + public static ZoneApiMock fromId(String zoneId) { + return from(ZoneId.from(zoneId)); } public static ZoneApiMock from(Environment environment, RegionName region) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java index e6a9014df94..63d479d4c6c 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java @@ -161,7 +161,7 @@ public class ZoneRegistryMock extends AbstractComponent implements ZoneRegistry @Override public ZoneApi systemZone() { - return ZoneApiMock.fromId("prod.controller"); + return ZoneApiMock.newBuilder().withSystem(system).withVirtualId(ZoneId.ofVirtualControllerZone()).build(); } @Override @@ -180,7 +180,7 @@ public class ZoneRegistryMock extends AbstractComponent implements ZoneRegistry private ZoneApiMock systemAsZone() { return ZoneApiMock.newBuilder() .with(ZoneId.from("prod.us-east-1")) - .withVirtualId(ZoneId.from("prod.controller")) + .withVirtualId(ZoneId.ofVirtualControllerZone()) .build(); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployResultTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployResultTest.java index 36679e0dd91..d0d362abcfc 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployResultTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployResultTest.java @@ -20,9 +20,12 @@ import static org.assertj.core.api.Assertions.assertThat; * @author bjorncs */ public class SystemFlagsDeployResultTest { + private final ZoneApiMock prodUsWest1Zone = ZoneApiMock.fromId("prod.us-west-1"); + private final ZoneRegistryMock registry = new ZoneRegistryMock(SystemName.cd).setZones(prodUsWest1Zone); + @Test void changes_and_errors_are_present_in_wire_format() { - FlagsTarget controllerTarget = FlagsTarget.forController(SystemName.cd); + FlagsTarget controllerTarget = FlagsTarget.forController(registry.systemZone()); FlagId flagOne = new FlagId("flagone"); FlagId flagTwo = new FlagId("flagtwo"); SystemFlagsDeployResult result = new SystemFlagsDeployResult( @@ -41,10 +44,8 @@ public class SystemFlagsDeployResultTest { @Test void identical_errors_and_changes_from_multiple_targets_are_merged() { - ZoneApiMock prodUsWest1Zone = ZoneApiMock.fromId("prod.us-west-1"); - ZoneRegistryMock registry = new ZoneRegistryMock(SystemName.cd).setZones(prodUsWest1Zone); - FlagsTarget prodUsWest1Target = FlagsTarget.forConfigServer(registry, prodUsWest1Zone.getId()); - FlagsTarget controllerTarget = FlagsTarget.forController(SystemName.cd); + FlagsTarget prodUsWest1Target = FlagsTarget.forConfigServer(registry, prodUsWest1Zone); + FlagsTarget controllerTarget = FlagsTarget.forController(registry.systemZone()); FlagId flagOne = new FlagId("flagone"); FlagId flagTwo = new FlagId("flagtwo"); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployerTest.java index 50354639f6f..8ad64a08244 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/systemflags/SystemFlagsDeployerTest.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.hosted.controller.restapi.systemflags; import com.yahoo.config.provision.SystemName; +import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.FlagId; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.json.FlagData; import com.yahoo.vespa.hosted.controller.api.systemflags.v1.FlagsTarget; import com.yahoo.vespa.hosted.controller.api.systemflags.v1.SystemFlagsDataArchive; @@ -15,11 +17,16 @@ import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; +import java.util.Optional; import java.util.Set; import static com.yahoo.vespa.hosted.controller.restapi.systemflags.SystemFlagsDeployResult.FlagDataChange; import static com.yahoo.vespa.hosted.controller.restapi.systemflags.SystemFlagsDeployResult.OperationError; +import static com.yahoo.yolean.Exceptions.uncheck; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -38,12 +45,12 @@ public class SystemFlagsDeployerTest { private final ZoneApiMock prodUsEast3Zone = ZoneApiMock.fromId("prod.us-east-3"); private final ZoneRegistryMock registry = new ZoneRegistryMock(SYSTEM).setZones(prodUsWest1Zone, prodUsEast3Zone); - private final FlagsTarget controllerTarget = FlagsTarget.forController(SYSTEM); - private final FlagsTarget prodUsWest1Target = FlagsTarget.forConfigServer(registry, prodUsWest1Zone.getId()); - private final FlagsTarget prodUsEast3Target = FlagsTarget.forConfigServer(registry, prodUsEast3Zone.getId()); + private final FlagsTarget controllerTarget = FlagsTarget.forController(registry.systemZone()); + private final FlagsTarget prodUsWest1Target = FlagsTarget.forConfigServer(registry, prodUsWest1Zone); + private final FlagsTarget prodUsEast3Target = FlagsTarget.forConfigServer(registry, prodUsEast3Zone); @Test - void deploys_flag_data_to_targets() throws IOException { + void deploys_flag_data_to_targets() { FlagsClient flagsClient = mock(FlagsClient.class); when(flagsClient.listFlagData(controllerTarget)).thenReturn(List.of()); when(flagsClient.listFlagData(prodUsWest1Target)).thenReturn(List.of(flagData("existing-prod.us-west-1.json"))); @@ -74,7 +81,81 @@ public class SystemFlagsDeployerTest { } @Test - void dryrun_should_not_change_flags() throws IOException { + void deploys_partial_flag_data_to_targets() { + // default.json contains one rule with 2 conditions, one of which has a condition on the aws cloud. + // This condition IS resolved for a config server target, but NOT for a controller target, because FLAG_ID + // has the CLOUD dimension set. + deployFlags(Optional.empty(), "partial/default.json", Optional.of("partial/put-controller.json"), true, PutType.CREATE, FetchVector.Dimension.CLOUD); + deployFlags(Optional.empty(), "partial/default.json", Optional.empty(), false, PutType.NONE, FetchVector.Dimension.CLOUD); + deployFlags(Optional.of("partial/initial.json"), "partial/default.json", Optional.of("partial/put-controller.json"), true, PutType.UPDATE, FetchVector.Dimension.CLOUD); + deployFlags(Optional.of("partial/initial.json"), "partial/default.json", Optional.empty(), false, PutType.DELETE, FetchVector.Dimension.CLOUD); + + // When the CLOUD dimension is NOT set on the dimension, the controller target will also resolve that dimension, and + // the result should be identical to the config server target. Let's also verify the config server target is unchanged. + deployFlags(Optional.empty(), "partial/default.json", Optional.empty(), true, PutType.NONE); + deployFlags(Optional.empty(), "partial/default.json", Optional.empty(), false, PutType.NONE); + deployFlags(Optional.of("partial/initial.json"), "partial/default.json", Optional.empty(), true, PutType.DELETE); + deployFlags(Optional.of("partial/initial.json"), "partial/default.json", Optional.empty(), false, PutType.DELETE); + } + + private enum PutType { + CREATE, + UPDATE, + DELETE, + NONE + } + + /** + * @param existingFlagDataPath path to flag data the target already has + * @param defaultFlagDataPath path to default json file + * @param putFlagDataPath path to flag data pushed to target, or empty if nothing should be pushed + * @param controller whether to target the controller, or config server + */ + private void deployFlags(Optional<String> existingFlagDataPath, + String defaultFlagDataPath, + Optional<String> putFlagDataPath, + boolean controller, + PutType putType, + FetchVector.Dimension... flagDimensions) { + List<FlagData> existingFlagData = existingFlagDataPath.map(SystemFlagsDeployerTest::flagData).map(List::of).orElse(List.of()); + FlagData defaultFlagData = flagData(defaultFlagDataPath); + FlagsTarget target = controller ? controllerTarget : prodUsWest1Target; + Optional<FlagData> putFlagData = putFlagDataPath.map(SystemFlagsDeployerTest::flagData); + + try (var replacer = Flags.clearFlagsForTesting()) { + Flags.defineStringFlag(FLAG_ID.toString(), "default", List.of("hakonhall"), "2023-07-27", "2123-07-27", "", "", flagDimensions); + + FlagsClient flagsClient = mock(FlagsClient.class); + when(flagsClient.listFlagData(target)).thenReturn(existingFlagData); + + SystemFlagsDataArchive archive = new SystemFlagsDataArchive.Builder() + .addFile("default.json", defaultFlagData) + .build(); + + SystemFlagsDeployer deployer = new SystemFlagsDeployer(flagsClient, SYSTEM, Set.of(target)); + + List<FlagDataChange> changes = deployer.deployFlags(archive, false).flagChanges(); + + putFlagData.ifPresentOrElse(flagData -> { + verify(flagsClient).putFlagData(target, flagData); + switch (putType) { + case CREATE -> assertThat(changes).containsOnly(FlagDataChange.created(FLAG_ID, target, flagData)); + case UPDATE -> assertThat(changes).containsOnly(FlagDataChange.updated(FLAG_ID, target, flagData, existingFlagData.get(0))); + case DELETE, NONE -> throw new IllegalStateException("Flag data put to the target, but change type is " + putType); + } + }, () -> { + verify(flagsClient, never()).putFlagData(eq(target), any()); + switch (putType) { + case DELETE -> assertThat(changes).containsOnly(FlagDataChange.deleted(FLAG_ID, target)); + case NONE -> assertEquals(changes, List.of()); + default -> throw new IllegalStateException("No flag data is expected to be put to the target but change type is " + putType); + } + }); + } + } + + @Test + void dryrun_should_not_change_flags() { FlagsClient flagsClient = mock(FlagsClient.class); when(flagsClient.listFlagData(controllerTarget)).thenReturn(List.of()); when(flagsClient.listDefinedFlags(controllerTarget)).thenReturn(List.of(new FlagId("my-flag"))); @@ -97,7 +178,7 @@ public class SystemFlagsDeployerTest { } @Test - void creates_error_entries_in_result_if_flag_data_operations_fail() throws IOException { + void creates_error_entries_in_result_if_flag_data_operations_fail() { FlagsClient flagsClient = mock(FlagsClient.class); UncheckedIOException exception = new UncheckedIOException(new IOException("I/O error message")); when(flagsClient.listFlagData(prodUsWest1Target)).thenThrow(exception); @@ -120,7 +201,7 @@ public class SystemFlagsDeployerTest { } @Test - void creates_error_entry_for_invalid_flag_archive() throws IOException { + void creates_error_entry_for_invalid_flag_archive() { FlagsClient flagsClient = mock(FlagsClient.class); FlagData defaultData = flagData("flags/my-flag/main.json"); SystemFlagsDataArchive archive = new SystemFlagsDataArchive.Builder() @@ -135,7 +216,7 @@ public class SystemFlagsDeployerTest { } @Test - void creates_error_entry_for_flag_data_of_undefined_flag() throws IOException { + void creates_error_entry_for_flag_data_of_undefined_flag() { FlagData prodUsEast3Data = flagData("flags/my-flag/main.prod.us-east-3.json"); FlagsClient flagsClient = mock(FlagsClient.class); when(flagsClient.listFlagData(prodUsEast3Target)) @@ -154,7 +235,7 @@ public class SystemFlagsDeployerTest { } @Test - void creates_warning_entry_for_existing_flag_data_for_undefined_flag() throws IOException { + void creates_warning_entry_for_existing_flag_data_for_undefined_flag() { FlagData prodUsEast3Data = flagData("flags/my-flag/main.prod.us-east-3.json"); FlagsClient flagsClient = mock(FlagsClient.class); when(flagsClient.listFlagData(prodUsEast3Target)) @@ -170,8 +251,8 @@ public class SystemFlagsDeployerTest { .containsOnly(OperationError.dataForUndefinedFlag(prodUsEast3Target, new FlagId("my-flag"))); } - private static FlagData flagData(String filename) throws IOException { - return FlagData.deserializeUtf8Json(Files.readAllBytes(Paths.get("src/test/resources/system-flags/" + filename))); + private static FlagData flagData(String filename) { + return FlagData.deserializeUtf8Json(uncheck(() -> Files.readAllBytes(Paths.get("src/test/resources/system-flags/" + filename)))); } } diff --git a/controller-server/src/test/resources/system-flags/partial/default.json b/controller-server/src/test/resources/system-flags/partial/default.json new file mode 100644 index 00000000000..881d4170c3b --- /dev/null +++ b/controller-server/src/test/resources/system-flags/partial/default.json @@ -0,0 +1,20 @@ +{ + "id" : "my-flag", + "rules" : [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "system", + "values": [ "main" ] + }, + { + "type": "whitelist", + "dimension": "cloud", + "values": [ "aws" ] + } + ], + "value" : "foo-value" + } + ] +}
\ No newline at end of file diff --git a/controller-server/src/test/resources/system-flags/partial/initial.json b/controller-server/src/test/resources/system-flags/partial/initial.json new file mode 100644 index 00000000000..a16ea583005 --- /dev/null +++ b/controller-server/src/test/resources/system-flags/partial/initial.json @@ -0,0 +1,15 @@ +{ + "id" : "my-flag", + "rules" : [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "application", + "values": [ "a:b:c" ] + } + ], + "value" : "bar-value" + } + ] +}
\ No newline at end of file diff --git a/controller-server/src/test/resources/system-flags/partial/put-controller.json b/controller-server/src/test/resources/system-flags/partial/put-controller.json new file mode 100644 index 00000000000..47aa0af47ce --- /dev/null +++ b/controller-server/src/test/resources/system-flags/partial/put-controller.json @@ -0,0 +1,15 @@ +{ + "id" : "my-flag", + "rules" : [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "cloud", + "values": [ "aws" ] + } + ], + "value" : "foo-value" + } + ] +}
\ No newline at end of file diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index f91ebab5c5d..5f75b042722 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -97,6 +97,7 @@ <junit.vespa.version>5.8.1</junit.vespa.version> <junit.platform.vespa.version>1.8.1</junit.platform.vespa.version> <junit4.vespa.version>4.13.2</junit4.vespa.version> + <lucene.vespa.version>9.7.0</lucene.vespa.version> <maven-archiver.vespa.version>3.6.0</maven-archiver.vespa.version> <maven-wagon.vespa.version>2.10</maven-wagon.vespa.version> <mimepull.vespa.version>1.9.6</mimepull.vespa.version> diff --git a/flags/src/main/java/com/yahoo/vespa/flags/FetchVector.java b/flags/src/main/java/com/yahoo/vespa/flags/FetchVector.java index c1877373ce2..5bcc1e67547 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/FetchVector.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/FetchVector.java @@ -3,10 +3,13 @@ package com.yahoo.vespa.flags; import com.yahoo.vespa.flags.json.DimensionHelper; +import java.util.Collection; import java.util.EnumMap; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; +import java.util.function.BiConsumer; import java.util.function.Consumer; /** @@ -20,20 +23,28 @@ public class FetchVector { * Note: If this enum is changed, you must also change {@link DimensionHelper}. */ public enum Dimension { - /** Value from TenantName::value, e.g. vespa-team */ - TENANT_ID, - /** Value from ApplicationId::serializedForm of the form tenant:applicationName:instance. */ APPLICATION_ID, - /** Node type from com.yahoo.config.provision.NodeType::name, e.g. tenant, host, confighost, controller, etc. */ - NODE_TYPE, + /** + * Cloud from com.yahoo.config.provision.CloudName::value, e.g. yahoo, aws, gcp. + * + * <p><em>Eager resolution</em>: This dimension is resolved before putting the flag data to the config server + * or controller, unless controller and the flag has declared this dimension. + */ + CLOUD, + + /** Cluster ID from com.yahoo.config.provision.ClusterSpec.Id::value, e.g. cluster-controllers, logserver. */ + CLUSTER_ID, /** Cluster type from com.yahoo.config.provision.ClusterSpec.Type::name, e.g. content, container, admin */ CLUSTER_TYPE, - /** Cluster ID from com.yahoo.config.provision.ClusterSpec.Id::value, e.g. cluster-controllers, logserver. */ - CLUSTER_ID, + /** Email address of user - provided by auth0 in console. */ + CONSOLE_USER_EMAIL, + + /** Hosted Vespa environment from com.yahoo.config.provision.Environment::value, e.g. prod, staging, test. */ + ENVIRONMENT, /** * Fully qualified hostname. @@ -44,6 +55,18 @@ public class FetchVector { */ HOSTNAME, + /** Node type from com.yahoo.config.provision.NodeType::name, e.g. tenant, host, confighost, controller, etc. */ + NODE_TYPE, + + /** + * Hosted Vespa system from com.yahoo.config.provision.SystemName::value, e.g. main, cd, public, publiccd. + * <em>Eager resolution</em>, see {@link #CLOUD}. + */ + SYSTEM, + + /** Value from TenantName::value, e.g. vespa-team */ + TENANT_ID, + /** * Vespa version from Version::toFullString of the form Major.Minor.Micro. * @@ -53,14 +76,9 @@ public class FetchVector { */ VESPA_VERSION, - /** Email address of user - provided by auth0 in console. */ - CONSOLE_USER_EMAIL, - /** - * Zone from ZoneId::value of the form environment.region. - * - * <p>NOTE: There is seldom any need to set ZONE_ID, as all flags are set per zone anyways. The controller - * could PERHAPS use this where it handles multiple zones. + * Virtual zone ID from com.yahoo.config.provision.zone.ZoneId::value of the form environment.region, + * see com.yahoo.config.provision.zone.ZoneApi::getVirtualId. <em>Eager resolution</em>, see {@link #CLOUD}. */ ZONE_ID } @@ -83,15 +101,13 @@ public class FetchVector { return Optional.ofNullable(map.get(dimension)); } - public Map<Dimension, String> toMap() { - return map; - } + public Map<Dimension, String> toMap() { return map; } public boolean isEmpty() { return map.isEmpty(); } - public boolean hasDimension(FetchVector.Dimension dimension) { - return map.containsKey(dimension); - } + public boolean hasDimension(FetchVector.Dimension dimension) { return map.containsKey(dimension);} + + public Set<Dimension> dimensions() { return map.keySet(); } /** * Returns a new FetchVector, identical to {@code this} except for its value in {@code dimension}. @@ -107,13 +123,28 @@ public class FetchVector { return makeFetchVector(vector -> vector.putAll(override.map)); } - private FetchVector makeFetchVector(Consumer<EnumMap<Dimension, String>> mapModifier) { - EnumMap<Dimension, String> mergedMap = new EnumMap<>(Dimension.class); + private FetchVector makeFetchVector(Consumer<Map<Dimension, String>> mapModifier) { + Map<Dimension, String> mergedMap = new EnumMap<>(Dimension.class); mergedMap.putAll(map); mapModifier.accept(mergedMap); return new FetchVector(mergedMap); } + public FetchVector without(Dimension dimension) { + return makeFetchVector(merged -> merged.remove(dimension)); + } + + public FetchVector without(Collection<Dimension> dimensions) { + return makeFetchVector(merged -> merged.keySet().removeAll(dimensions)); + } + + @Override + public String toString() { + return "FetchVector{" + + "map=" + map + + '}'; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 326e8f2dcae..e8f204256c8 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -21,7 +21,6 @@ import static com.yahoo.vespa.flags.FetchVector.Dimension.HOSTNAME; import static com.yahoo.vespa.flags.FetchVector.Dimension.NODE_TYPE; import static com.yahoo.vespa.flags.FetchVector.Dimension.TENANT_ID; import static com.yahoo.vespa.flags.FetchVector.Dimension.VESPA_VERSION; -import static com.yahoo.vespa.flags.FetchVector.Dimension.ZONE_ID; /** * Definitions of feature flags. @@ -53,23 +52,16 @@ public class Flags { List.of("hakonhall", "baldersheim"), "2023-03-06", "2023-08-05", "Drop caches on tenant hosts", "Takes effect on next tick", - ZONE_ID, // The application ID is the exclusive application ID associated with the host, // if any, or otherwise hosted-vespa:tenant-host:default. APPLICATION_ID, TENANT_ID, CLUSTER_ID, CLUSTER_TYPE); - public static final UnboundBooleanFlag SIMPLER_ACL = defineFeatureFlag( - "simpler-acl", true, - List.of("hakonhall"), "2023-07-04", "2023-08-04", - "Simplify ACL in hosted Vespa", - "Takes effect on the next fetch of ACL rules"); - public static final UnboundDoubleFlag DEFAULT_TERM_WISE_LIMIT = defineDoubleFlag( "default-term-wise-limit", 1.0, List.of("baldersheim"), "2020-12-02", "2023-12-31", "Default limit for when to apply termwise query evaluation", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundStringFlag QUERY_DISPATCH_POLICY = defineStringFlag( "query-dispatch-policy", "adaptive", @@ -77,83 +69,83 @@ public class Flags { "Select query dispatch policy, valid values are adaptive, round-robin, best-of-random-2," + " latency-amortized-over-requests, latency-amortized-over-time", "Takes effect at redeployment (requires restart)", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundStringFlag SUMMARY_DECODE_POLICY = defineStringFlag( "summary-decode-policy", "eager", List.of("baldersheim"), "2023-03-30", "2023-12-31", "Select summary decoding policy, valid values are eager and on-demand/ondemand.", "Takes effect at redeployment (requires restart)", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundStringFlag FEED_SEQUENCER_TYPE = defineStringFlag( "feed-sequencer-type", "THROUGHPUT", List.of("baldersheim"), "2020-12-02", "2023-12-31", "Selects type of sequenced executor used for feeding in proton, valid values are LATENCY, ADAPTIVE, THROUGHPUT", "Takes effect at redeployment (requires restart)", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag MAX_UNCOMMITTED_MEMORY = defineIntFlag( "max-uncommitted-memory", 130000, List.of("geirst, baldersheim"), "2021-10-21", "2023-12-31", "Max amount of memory holding updates to an attribute before we do a commit.", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundStringFlag RESPONSE_SEQUENCER_TYPE = defineStringFlag( "response-sequencer-type", "ADAPTIVE", List.of("baldersheim"), "2020-12-02", "2023-12-31", "Selects type of sequenced executor used for mbus responses, valid values are LATENCY, ADAPTIVE, THROUGHPUT", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag RESPONSE_NUM_THREADS = defineIntFlag( "response-num-threads", 2, List.of("baldersheim"), "2020-12-02", "2023-12-31", "Number of threads used for mbus responses, default is 2, negative number = numcores/4", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag SKIP_COMMUNICATIONMANAGER_THREAD = defineFeatureFlag( "skip-communicationmanager-thread", false, List.of("baldersheim"), "2020-12-02", "2023-12-31", "Should we skip the communicationmanager thread", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag SKIP_MBUS_REQUEST_THREAD = defineFeatureFlag( "skip-mbus-request-thread", false, List.of("baldersheim"), "2020-12-02", "2023-12-31", "Should we skip the mbus request thread", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag SKIP_MBUS_REPLY_THREAD = defineFeatureFlag( "skip-mbus-reply-thread", false, List.of("baldersheim"), "2020-12-02", "2023-12-31", "Should we skip the mbus reply thread", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag USE_ASYNC_MESSAGE_HANDLING_ON_SCHEDULE = defineFeatureFlag( "async-message-handling-on-schedule", false, List.of("baldersheim"), "2020-12-02", "2023-12-31", "Optionally deliver async messages in own thread", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundDoubleFlag FEED_CONCURRENCY = defineDoubleFlag( "feed-concurrency", 0.5, List.of("baldersheim"), "2020-12-02", "2023-12-31", "How much concurrency should be allowed for feed", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundDoubleFlag FEED_NICENESS = defineDoubleFlag( "feed-niceness", 0.0, List.of("baldersheim"), "2022-06-24", "2023-12-31", "How nice feeding shall be", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag MBUS_JAVA_NUM_TARGETS = defineIntFlag( @@ -161,71 +153,71 @@ public class Flags { List.of("baldersheim"), "2022-07-05", "2023-12-31", "Number of rpc targets per service", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag MBUS_CPP_NUM_TARGETS = defineIntFlag( "mbus-cpp-num-targets", 2, List.of("baldersheim"), "2022-07-05", "2023-12-31", "Number of rpc targets per service", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag RPC_NUM_TARGETS = defineIntFlag( "rpc-num-targets", 2, List.of("baldersheim"), "2022-07-05", "2023-12-31", "Number of rpc targets per content node", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag MBUS_JAVA_EVENTS_BEFORE_WAKEUP = defineIntFlag( "mbus-java-events-before-wakeup", 1, List.of("baldersheim"), "2022-07-05", "2023-12-31", "Number write events before waking up transport thread", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag MBUS_CPP_EVENTS_BEFORE_WAKEUP = defineIntFlag( "mbus-cpp-events-before-wakeup", 1, List.of("baldersheim"), "2022-07-05", "2023-12-31", "Number write events before waking up transport thread", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag RPC_EVENTS_BEFORE_WAKEUP = defineIntFlag( "rpc-events-before-wakeup", 1, List.of("baldersheim"), "2022-07-05", "2023-12-31", "Number write events before waking up transport thread", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag MBUS_NUM_NETWORK_THREADS = defineIntFlag( "mbus-num-network-threads", 1, List.of("baldersheim"), "2022-07-01", "2023-12-31", "Number of threads used for mbus network", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag SHARED_STRING_REPO_NO_RECLAIM = defineFeatureFlag( "shared-string-repo-no-reclaim", false, List.of("baldersheim"), "2022-06-14", "2023-12-31", "Controls whether we do track usage and reclaim unused enum values in shared string repo", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag CONTAINER_DUMP_HEAP_ON_SHUTDOWN_TIMEOUT = defineFeatureFlag( "container-dump-heap-on-shutdown-timeout", false, List.of("baldersheim"), "2021-09-25", "2023-12-31", "Will trigger a heap dump during if container shutdown times out", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag LOAD_CODE_AS_HUGEPAGES = defineFeatureFlag( "load-code-as-hugepages", false, List.of("baldersheim"), "2022-05-13", "2023-12-31", "Will try to map the code segment with huge (2M) pages", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundDoubleFlag CONTAINER_SHUTDOWN_TIMEOUT = defineDoubleFlag( "container-shutdown-timeout", 50.0, List.of("baldersheim"), "2021-09-25", "2023-12-31", "Timeout for shutdown of a jdisc container", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); // TODO: Move to a permanent flag public static final UnboundListFlag<String> ALLOWED_ATHENZ_PROXY_IDENTITIES = defineListFlag( @@ -240,28 +232,28 @@ public class Flags { "Allows replicas in up to N content groups to not be activated " + "for query visibility if they are out of sync with a majority of other replicas", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundDoubleFlag MIN_NODE_RATIO_PER_GROUP = defineDoubleFlag( "min-node-ratio-per-group", 0.0, List.of("geirst", "vekterli"), "2021-07-16", "2023-09-01", "Minimum ratio of nodes that have to be available (i.e. not Down) in any hierarchic content cluster group for the group to be Up", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundStringFlag SYSTEM_MEMORY_HIGH = defineStringFlag( "system-memory-high", "", List.of("baldersheim"), "2023-02-14", "2023-12-31", "The value to write to /sys/fs/cgroup/system.slice/memory.high, if non-empty.", "Takes effect on next tick.", - ZONE_ID, NODE_TYPE); + NODE_TYPE); public static final UnboundStringFlag SYSTEM_MEMORY_MAX = defineStringFlag( "system-memory-max", "", List.of("baldersheim"), "2023-02-14", "2023-12-31", "The value to write to /sys/fs/cgroup/system.slice/memory.max, if non-empty.", "Takes effect on next tick.", - ZONE_ID, NODE_TYPE); + NODE_TYPE); public static final UnboundBooleanFlag ENABLED_HORIZON_DASHBOARD = defineFeatureFlag( "enabled-horizon-dashboard", false, @@ -276,35 +268,35 @@ public class Flags { List.of("arnej"), "2021-11-12", "2023-12-31", "Whether C++ thread creation should ignore any requested stack size", "Triggers restart, takes effect immediately", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag USE_V8_GEO_POSITIONS = defineFeatureFlag( "use-v8-geo-positions", true, List.of("arnej"), "2021-11-15", "2023-12-31", "Use Vespa 8 types and formats for geographical positions", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag MAX_COMPACT_BUFFERS = defineIntFlag( "max-compact-buffers", 1, List.of("baldersheim", "geirst", "toregge"), "2021-12-15", "2023-12-31", "Upper limit of buffers to compact in a data store at the same time for each reason (memory usage, address space usage)", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag USE_QRSERVER_SERVICE_NAME = defineFeatureFlag( "use-qrserver-service-name", false, List.of("arnej"), "2022-01-18", "2023-12-31", "Use backwards-compatible 'qrserver' service name for containers with only 'search' API", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag AVOID_RENAMING_SUMMARY_FEATURES = defineFeatureFlag( "avoid-renaming-summary-features", true, List.of("arnej"), "2022-01-15", "2023-12-31", "Tell backend about the original name of summary-features that were wrapped in a rankingExpression feature", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag ENABLE_PROXY_PROTOCOL_MIXED_MODE = defineFeatureFlag( "enable-proxy-protocol-mixed-mode", true, @@ -318,7 +310,7 @@ public class Flags { List.of("arnej"), "2022-06-14", "2024-12-31", "Which algorithm to use for compressing log files. Valid values: empty string (default), gzip, zstd", "Takes effect immediately", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag SEPARATE_METRIC_CHECK_CONFIG = defineFeatureFlag( "separate-metric-check-config", false, @@ -354,7 +346,7 @@ public class Flags { List.of("vekterli"), "2022-11-03", "2023-10-01", "Specifies which public key to use for core dump encryption.", "Takes effect on the next tick.", - ZONE_ID, NODE_TYPE, HOSTNAME); + NODE_TYPE, HOSTNAME); public static final UnboundBooleanFlag ENABLE_GLOBAL_PHASE = defineFeatureFlag( "enable-global-phase", true, @@ -368,7 +360,7 @@ public class Flags { List.of("olaa"), "2023-04-12", "2023-08-01", "Whether AthenzCredentialsMaintainer in node-admin should create tenant service identity certificate", "Takes effect on next tick", - ZONE_ID, HOSTNAME, VESPA_VERSION, APPLICATION_ID + HOSTNAME, VESPA_VERSION, APPLICATION_ID ); public static final UnboundBooleanFlag ENABLE_CROWDSTRIKE = defineFeatureFlag( @@ -380,7 +372,7 @@ public class Flags { "allow-more-than-one-content-group-down", false, List.of("hmusum"), "2023-04-14", "2023-08-15", "Whether to enable possible configuration of letting more than one content group down", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag RANDOMIZED_ENDPOINT_NAMES = defineFeatureFlag( "randomized-endpoint-names", false, List.of("andreer"), "2023-04-26", "2023-08-30", @@ -397,8 +389,7 @@ public class Flags { public static final UnboundBooleanFlag ENABLE_THE_ONE_THAT_SHOULD_NOT_BE_NAMED = defineFeatureFlag( "enable-the-one-that-should-not-be-named", false, List.of("hmusum"), "2023-05-08", "2023-08-15", "Whether to enable the one program that should not be named", - "Takes effect at next host-admin tick", - ZONE_ID); + "Takes effect at next host-admin tick"); public static final UnboundListFlag<String> WEIGHTED_ENDPOINT_RECORD_TTL = defineListFlag( "weighted-endpoint-record-ttl", List.of(), String.class, List.of("jonmv"), "2023-05-16", "2023-09-01", @@ -413,42 +404,40 @@ public class Flags { "will initiate a write-repair that evaluates the condition across all mutually inconsistent " + "replicas, with the newest document version (if any) being authoritative", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag ENABLE_DATAPLANE_PROXY = defineFeatureFlag( "enable-dataplane-proxy", false, List.of("mortent", "olaa"), "2023-05-15", "2023-08-01", "Whether to enable dataplane proxy", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID + APPLICATION_ID ); public static final UnboundBooleanFlag ENABLE_NESTED_MULTIVALUE_GROUPING = defineFeatureFlag( "enable-nested-multivalue-grouping", false, List.of("baldersheim"), "2023-06-29", "2023-12-31", "Should we enable proper nested multivalue grouping", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag USE_RECONFIGURABLE_DISPATCHER = defineFeatureFlag( "use-reconfigurable-dispatcher", false, List.of("jonmv"), "2023-07-14", "2023-10-01", "Whether to set up a ReconfigurableDispatcher with config self-sub for backend nodes", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag WRITE_CONFIG_SERVER_SESSION_DATA_AS_ONE_BLOB = defineFeatureFlag( "write-config-server-session-data-as-blob", false, List.of("hmuusm"), "2023-07-19", "2023-09-01", "Whether to write config server session data in one blob or as individual paths", - "Takes effect immediately", - ZONE_ID); + "Takes effect immediately"); public static final UnboundBooleanFlag READ_CONFIG_SERVER_SESSION_DATA_AS_ONE_BLOB = defineFeatureFlag( "read-config-server-session-data-as-blob", false, List.of("hmuusm"), "2023-07-19", "2023-09-01", "Whether to read config server session data from sesion data blob or from individual paths", - "Takes effect immediately", - ZONE_ID); + "Takes effect immediately"); public static final UnboundBooleanFlag USE_VESPA_USER_EVERYWHERE = defineFeatureFlag( "use-vespa-user-everywhere", false, @@ -538,6 +527,15 @@ public class Flags { * For instance, if APPLICATION is one of the dimensions here, you should make sure * APPLICATION is set to the ApplicationId in the fetch vector when fetching the RawFlag * from the FlagSource. + * SYSTEM, CLOUD, ENVIRONMENT, and ZONE_ID are special: These dimensions are resolved just + * before the flag data is published to a zone. This means there is never any need to set + * these dimensions when resolving a flag, and setting these dimensions just before resolving + * the flag will have no effect. + * There is one exception. If any of these dimensions are declared when defining a flag, + * then those dimensions are NOT resolved when published to the controllers. This allows + * the controller to resolve the flag to different values based on which cloud or zone + * it is operating on. Flags should NOT declare these dimensions unless they intend to + * use them in the controller in this way. * @param <T> The boxed type of the flag value, e.g. Boolean for flags guarding features. * @param <U> The type of the unbound flag, e.g. UnboundBooleanFlag. * @return An unbound flag with {@link FetchVector.Dimension#HOSTNAME HOSTNAME} and diff --git a/flags/src/main/java/com/yahoo/vespa/flags/JsonNodeRawFlag.java b/flags/src/main/java/com/yahoo/vespa/flags/JsonNodeRawFlag.java index 753f19a44f6..27852790186 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/JsonNodeRawFlag.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/JsonNodeRawFlag.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import java.util.Collection; +import java.util.Objects; import java.util.concurrent.atomic.AtomicReference; import static com.yahoo.yolean.Exceptions.uncheck; @@ -60,6 +61,26 @@ public class JsonNodeRawFlag implements RawFlag { return jsonNode.toString(); } + @Override + public String toString() { + return "JsonNodeRawFlag{" + + "jsonNode=" + jsonNode + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + JsonNodeRawFlag that = (JsonNodeRawFlag) o; + return jsonNode.equals(that.jsonNode); + } + + @Override + public int hashCode() { + return Objects.hash(jsonNode); + } + /** Initialize object mapper lazily */ private static ObjectMapper objectMapper() { // ObjectMapper is a heavy-weight object so we construct it only when we need it diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index b7e81f56599..18f5f5f860d 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -21,7 +21,6 @@ import static com.yahoo.vespa.flags.FetchVector.Dimension.HOSTNAME; import static com.yahoo.vespa.flags.FetchVector.Dimension.NODE_TYPE; import static com.yahoo.vespa.flags.FetchVector.Dimension.TENANT_ID; import static com.yahoo.vespa.flags.FetchVector.Dimension.VESPA_VERSION; -import static com.yahoo.vespa.flags.FetchVector.Dimension.ZONE_ID; /** * Definition for permanent feature flags @@ -43,19 +42,19 @@ public class PermanentFlags { "jvm-gc-options", "", "Sets default jvm gc options", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundIntFlag HEAP_SIZE_PERCENTAGE = defineIntFlag( "heap-size-percentage", 70, "Sets default jvm heap size percentage", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundDoubleFlag QUERY_DISPATCH_WARMUP = defineDoubleFlag( "query-dispatch-warmup", 5, "Warmup duration for query dispatcher", "Takes effect at redeployment (requires restart)", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag FLEET_CANARY = defineFeatureFlag( "fleet-canary", false, @@ -126,13 +125,13 @@ public class PermanentFlags { "min-disk-throughput-mb-s", 0, "Minimum required disk throughput performance, 0 = default, Only when using remote disk", "Takes effect when node is provisioned", - ZONE_ID, APPLICATION_ID, TENANT_ID, CLUSTER_ID, CLUSTER_TYPE); + APPLICATION_ID, TENANT_ID, CLUSTER_ID, CLUSTER_TYPE); public static final UnboundIntFlag MIN_DISK_IOPS_K = defineIntFlag( "min-disk-iops-k", 0, "Minimum required disk I/O operations per second, unit is kilo, 0 = default, Only when using remote disk", "Takes effect when node is provisioned", - ZONE_ID, APPLICATION_ID, TENANT_ID, CLUSTER_ID, CLUSTER_TYPE); + APPLICATION_ID, TENANT_ID, CLUSTER_ID, CLUSTER_TYPE); public static final UnboundListFlag<String> DISABLED_HOST_ADMIN_TASKS = defineListFlag( "disabled-host-admin-tasks", List.of(), String.class, @@ -145,7 +144,7 @@ public class PermanentFlags { "docker-image-repo", "", "Override default docker image repo. Docker image version will be Vespa version.", "Takes effect on next deployment from controller", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); private static final String VERSION_QUALIFIER_REGEX = "[a-zA-Z0-9_-]+"; private static final Pattern QUALIFIER_PATTERN = Pattern.compile("^" + VERSION_QUALIFIER_REGEX + "$"); @@ -250,29 +249,28 @@ public class PermanentFlags { "A list of environment variables set for all services. " + "Each item should be on the form <ENV_VAR>=<VALUE>", "Takes effect on service restart", - ZONE_ID, APPLICATION_ID + APPLICATION_ID ); public static final UnboundStringFlag CONFIG_PROXY_JVM_ARGS = defineStringFlag( "config-proxy-jvm-args", "", "Sets jvm args for config proxy (added at the end of startup command, will override existing ones)", "Takes effect on restart of Docker container", - ZONE_ID, APPLICATION_ID + APPLICATION_ID ); // This must be set in a feature flag to avoid flickering between the new and old value during config server upgrade public static final UnboundDoubleFlag HOST_MEMORY = defineDoubleFlag( "host-memory", 0.6, "The memory in GB required by a host's management processes.", - "Takes effect immediately", - ZONE_ID + "Takes effect immediately" ); public static final UnboundBooleanFlag FORWARD_ISSUES_AS_ERRORS = defineFeatureFlag( "forward-issues-as-errors", true, "When the backend detects a problematic issue with a query, it will by default send it as an error message to the QRS, which adds it in an ErrorHit in the result. May be disabled using this flag.", "Takes effect immediately", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundBooleanFlag DEACTIVATE_ROUTING = defineFeatureFlag( "deactivate-routing", false, @@ -285,7 +283,7 @@ public class PermanentFlags { "ignored-http-user-agents", List.of(), String.class, "List of user agents to ignore (crawlers etc)", "Takes effect immediately.", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundListFlag<String> INCOMPATIBLE_VERSIONS = defineListFlag( "incompatible-versions", List.of("8"), String.class, @@ -308,7 +306,7 @@ public class PermanentFlags { "(logserver and clustercontroller clusters).", "Takes effect on next redeployment", value -> Set.of("any", "arm64", "x86_64").contains(value), - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundListFlag<String> CLOUD_ACCOUNTS = defineListFlag( "cloud-accounts", List.of(), String.class, @@ -320,7 +318,7 @@ public class PermanentFlags { "fail-deployment-for-files-with-unknown-extension", "FAIL", "Whether to log or fail for deployments when app has a file with unknown extension (valid values: LOG, FAIL)", "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); + APPLICATION_ID); public static final UnboundListFlag<String> DISABLED_DEPLOYMENT_ZONES = defineListFlag( "disabled-deployment-zones", List.of(), String.class, @@ -339,8 +337,7 @@ public class PermanentFlags { "config-server-session-expiry-time", 3600, "Expiry time in seconds for remote sessions (session in ZooKeeper). Default should be equal to session lifetime, " + "but can be lowered if there are incidents/bugs where one needs to delete sessions", - "Takes effect immediately", - ZONE_ID + "Takes effect immediately" ); public static final UnboundBooleanFlag NOTIFICATION_DISPATCH_FLAG = defineFeatureFlag( @@ -353,7 +350,7 @@ public class PermanentFlags { "keep-file-references-on-tenant-nodes", 30, "How many days to keep file references on tenant nodes (based on last modification time)", "Takes effect on restart of Docker container", - ZONE_ID, APPLICATION_ID + APPLICATION_ID ); public static final UnboundIntFlag ENDPOINT_CONNECTION_TTL = defineIntFlag( diff --git a/flags/src/main/java/com/yahoo/vespa/flags/json/DimensionHelper.java b/flags/src/main/java/com/yahoo/vespa/flags/json/DimensionHelper.java index ad1242aa7e9..5e5506b616b 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/json/DimensionHelper.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/json/DimensionHelper.java @@ -15,15 +15,18 @@ public class DimensionHelper { private static final Map<FetchVector.Dimension, String> serializedDimensions = new HashMap<>(); static { - serializedDimensions.put(FetchVector.Dimension.ZONE_ID, "zone"); - serializedDimensions.put(FetchVector.Dimension.HOSTNAME, "hostname"); serializedDimensions.put(FetchVector.Dimension.APPLICATION_ID, "application"); - serializedDimensions.put(FetchVector.Dimension.NODE_TYPE, "node-type"); + serializedDimensions.put(FetchVector.Dimension.CLOUD, "cloud"); serializedDimensions.put(FetchVector.Dimension.CLUSTER_ID, "cluster-id"); serializedDimensions.put(FetchVector.Dimension.CLUSTER_TYPE, "cluster-type"); - serializedDimensions.put(FetchVector.Dimension.VESPA_VERSION, "vespa-version"); serializedDimensions.put(FetchVector.Dimension.CONSOLE_USER_EMAIL, "console-user-email"); + serializedDimensions.put(FetchVector.Dimension.ENVIRONMENT, "environment"); + serializedDimensions.put(FetchVector.Dimension.HOSTNAME, "hostname"); + serializedDimensions.put(FetchVector.Dimension.NODE_TYPE, "node-type"); + serializedDimensions.put(FetchVector.Dimension.SYSTEM, "system"); serializedDimensions.put(FetchVector.Dimension.TENANT_ID, "tenant"); + serializedDimensions.put(FetchVector.Dimension.VESPA_VERSION, "vespa-version"); + serializedDimensions.put(FetchVector.Dimension.ZONE_ID, "zone"); if (serializedDimensions.size() != FetchVector.Dimension.values().length) { throw new IllegalStateException(FetchVectorHelper.class.getName() + " is not in sync with " + diff --git a/flags/src/main/java/com/yahoo/vespa/flags/json/FlagData.java b/flags/src/main/java/com/yahoo/vespa/flags/json/FlagData.java index 19837e7dbe1..acda3b9db42 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/json/FlagData.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/json/FlagData.java @@ -13,9 +13,10 @@ import com.yahoo.vespa.flags.json.wire.WireRule; import java.io.InputStream; import java.io.OutputStream; +import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Optional; -import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -53,6 +54,27 @@ public class FlagData { public boolean isEmpty() { return rules.isEmpty() && defaultFetchVector.isEmpty(); } + public FlagData partialResolve(FetchVector fetchVector) { + // Note: As a result of partialResolve, there could be e.g. two identical rules, and the latter will always be ignored by resolve(). + // Consider deduping. Deduping is actually not specific to partialResolve and could be done e.g. at construction time. + + List<Rule> newRules = new ArrayList<>(); + for (var rule : rules) { + Optional<Rule> partialRule = rule.partialResolve(fetchVector); + if (partialRule.isPresent()) { + newRules.add(partialRule.get()); + if (partialRule.get().conditions().isEmpty()) { + // Any following rule will always be ignored during resolution. + break; + } + } + } + + FetchVector newDefaultFetchVector = defaultFetchVector.without(fetchVector.dimensions()); + + return new FlagData(id, newDefaultFetchVector, newRules); + } + public Optional<RawFlag> resolve(FetchVector fetchVector) { return rules.stream() .filter(rule -> rule.match(defaultFetchVector.with(fetchVector))) @@ -91,6 +113,36 @@ public class FlagData { return wireFlagData; } + /** E.g. verify all RawFlag can be deserialized. */ + public void validate(Deserializer<?> deserializer) { + rules.stream() + .flatMap(rule -> rule.getValueToApply().map(Stream::of).orElse(null)) + .forEach(deserializer::deserialize); + + } + + @Override + public String toString() { + return "FlagData{" + + "id=" + id + + ", rules=" + rules + + ", defaultFetchVector=" + defaultFetchVector + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FlagData flagData = (FlagData) o; + return id.equals(flagData.id) && rules.equals(flagData.rules) && defaultFetchVector.equals(flagData.defaultFetchVector); + } + + @Override + public int hashCode() { + return Objects.hash(id, rules, defaultFetchVector); + } + public static FlagData deserializeUtf8Json(byte[] bytes) { return fromWire(WireFlagData.deserialize(bytes)); } @@ -138,13 +190,5 @@ public class FlagData { if (wireRules == null) return List.of(); return wireRules.stream().map(Rule::fromWire).toList(); } - - /** E.g. verify all RawFlag can be deserialized. */ - public void validate(Deserializer<?> deserializer) { - rules.stream() - .flatMap(rule -> rule.getValueToApply().map(Stream::of).orElse(null)) - .forEach(deserializer::deserialize); - - } } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/json/ListCondition.java b/flags/src/main/java/com/yahoo/vespa/flags/json/ListCondition.java index c4b2d9be117..483f6750a73 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/json/ListCondition.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/json/ListCondition.java @@ -5,6 +5,7 @@ import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.json.wire.WireCondition; import java.util.List; +import java.util.Objects; /** * @author hakonhall @@ -55,4 +56,27 @@ public abstract class ListCondition implements Condition { condition.values = values.isEmpty() ? null : values; return condition; } + + @Override + public String toString() { + return "ListCondition{" + + "type=" + type + + ", dimension=" + dimension + + ", values=" + values + + ", isWhitelist=" + isWhitelist + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ListCondition that = (ListCondition) o; + return isWhitelist == that.isWhitelist && type == that.type && dimension == that.dimension && values.equals(that.values); + } + + @Override + public int hashCode() { + return Objects.hash(type, dimension, values, isWhitelist); + } } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/json/RelationalCondition.java b/flags/src/main/java/com/yahoo/vespa/flags/json/RelationalCondition.java index 0efeb831f2c..749f6830870 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/json/RelationalCondition.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/json/RelationalCondition.java @@ -5,6 +5,7 @@ import com.yahoo.component.Version; import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.json.wire.WireCondition; +import java.util.Objects; import java.util.function.Predicate; /** @@ -75,4 +76,26 @@ public class RelationalCondition implements Condition { condition.predicate = relationalPredicate.toWire(); return condition; } + + @Override + public String toString() { + return "RelationalCondition{" + + "relationalPredicate=" + relationalPredicate + + ", predicate=" + predicate + + ", dimension=" + dimension + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + RelationalCondition that = (RelationalCondition) o; + return relationalPredicate.equals(that.relationalPredicate) && predicate.equals(that.predicate) && dimension == that.dimension; + } + + @Override + public int hashCode() { + return Objects.hash(relationalPredicate, predicate, dimension); + } } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/json/Rule.java b/flags/src/main/java/com/yahoo/vespa/flags/json/Rule.java index bddaf8c9e0e..127c2b4f9da 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/json/Rule.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/json/Rule.java @@ -6,10 +6,11 @@ import com.yahoo.vespa.flags.JsonNodeRawFlag; import com.yahoo.vespa.flags.RawFlag; import com.yahoo.vespa.flags.json.wire.WireRule; +import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.Optional; -import java.util.stream.Collectors; /** * @author hakonhall @@ -45,6 +46,25 @@ public class Rule { .allMatch(condition -> !fetchVector.hasDimension(condition.dimension()) || condition.test(fetchVector)); } + /** + * Returns a copy of this rule without those conditions that can be resolved by the fetch vector. Returns empty + * if any of those conditions are false. + */ + public Optional<Rule> partialResolve(FetchVector fetchVector) { + List<Condition> newConditions = new ArrayList<>(); + for (var condition : andConditions) { + if (fetchVector.hasDimension(condition.dimension())) { + if (!condition.test(fetchVector)) { + return Optional.empty(); + } + } else { + newConditions.add(condition); + } + } + + return Optional.of(new Rule(valueToApply, newConditions)); + } + public Optional<RawFlag> getValueToApply() { return valueToApply; } @@ -68,4 +88,25 @@ public class Rule { Optional<RawFlag> value = wireRule.value == null ? Optional.empty() : Optional.of(JsonNodeRawFlag.fromJsonNode(wireRule.value)); return new Rule(value, conditions); } + + @Override + public String toString() { + return "Rule{" + + "andConditions=" + andConditions + + ", valueToApply=" + valueToApply + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Rule rule = (Rule) o; + return andConditions.equals(rule.andConditions) && valueToApply.equals(rule.valueToApply); + } + + @Override + public int hashCode() { + return Objects.hash(andConditions, valueToApply); + } } diff --git a/flags/src/test/java/com/yahoo/vespa/flags/json/FlagDataTest.java b/flags/src/test/java/com/yahoo/vespa/flags/json/FlagDataTest.java index c89b5883fd1..c7da1abe7e2 100644 --- a/flags/src/test/java/com/yahoo/vespa/flags/json/FlagDataTest.java +++ b/flags/src/test/java/com/yahoo/vespa/flags/json/FlagDataTest.java @@ -15,44 +15,45 @@ import static org.junit.jupiter.api.Assertions.assertTrue; * @author hakonhall */ public class FlagDataTest { - private final String json = "{\n" + - " \"id\": \"id1\",\n" + - " \"rules\": [\n" + - " {\n" + - " \"conditions\": [\n" + - " {\n" + - " \"type\": \"whitelist\",\n" + - " \"dimension\": \"hostname\",\n" + - " \"values\": [ \"host1\", \"host2\" ]\n" + - " },\n" + - " {\n" + - " \"type\": \"blacklist\",\n" + - " \"dimension\": \"application\",\n" + - " \"values\": [ \"app1\", \"app2\" ]\n" + - " }\n" + - " ],\n" + - " \"value\": true\n" + - " },\n" + - " {\n" + - " \"conditions\": [\n" + - " {\n" + - " \"type\": \"whitelist\",\n" + - " \"dimension\": \"zone\",\n" + - " \"values\": [ \"zone1\", \"zone2\" ]\n" + - " }\n" + - " ],\n" + - " \"value\": false\n" + - " }\n" + - " ],\n" + - " \"attributes\": {\n" + - " \"zone\": \"zone1\"\n" + - " }\n" + - "}"; + private final String json = """ + { + "id": "id1", + "rules": [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "hostname", + "values": [ "host1", "host2" ] + }, + { + "type": "blacklist", + "dimension": "application", + "values": [ "app1", "app2" ] + } + ], + "value": true + }, + { + "conditions": [ + { + "type": "whitelist", + "dimension": "zone", + "values": [ "zone1", "zone2" ] + } + ], + "value": false + } + ], + "attributes": { + "zone": "zone1" + } + }"""; private final FetchVector vector = new FetchVector(); @Test - void test() { + void testResolve() { // Second rule matches with the default zone matching verify(Optional.of("false"), vector); @@ -74,6 +75,143 @@ public class FlagDataTest { verify(Optional.empty(), vector.with(FetchVector.Dimension.ZONE_ID, "unknown zone")); } + @Test + void testPartialResolve() { + FlagData data = FlagData.deserialize(json); + assertEquals(data.partialResolve(vector), data); + assertEquals(data.partialResolve(vector.with(FetchVector.Dimension.APPLICATION_ID, "app1")), + FlagData.deserialize(""" + { + "id": "id1", + "rules": [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "zone", + "values": [ "zone1", "zone2" ] + } + ], + "value": false + } + ], + "attributes": { + "zone": "zone1" + } + }""")); + + assertEquals(data.partialResolve(vector.with(FetchVector.Dimension.APPLICATION_ID, "app1")), + FlagData.deserialize(""" + { + "id": "id1", + "rules": [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "zone", + "values": [ "zone1", "zone2" ] + } + ], + "value": false + } + ], + "attributes": { + "zone": "zone1" + } + }""")); + + assertEquals(data.partialResolve(vector.with(FetchVector.Dimension.APPLICATION_ID, "app3")), + FlagData.deserialize(""" + { + "id": "id1", + "rules": [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "hostname", + "values": [ "host1", "host2" ] + } + ], + "value": true + }, + { + "conditions": [ + { + "type": "whitelist", + "dimension": "zone", + "values": [ "zone1", "zone2" ] + } + ], + "value": false + } + ], + "attributes": { + "zone": "zone1" + } + }""")); + + assertEquals(data.partialResolve(vector.with(FetchVector.Dimension.APPLICATION_ID, "app3") + .with(FetchVector.Dimension.HOSTNAME, "host1")), + FlagData.deserialize(""" + { + "id": "id1", + "rules": [ + { + "value": true + } + ], + "attributes": { + "zone": "zone1" + } + }""")); + + assertEquals(data.partialResolve(vector.with(FetchVector.Dimension.APPLICATION_ID, "app3") + .with(FetchVector.Dimension.HOSTNAME, "host3")), + FlagData.deserialize(""" + { + "id": "id1", + "rules": [ + { + "conditions": [ + { + "type": "whitelist", + "dimension": "zone", + "values": [ "zone1", "zone2" ] + } + ], + "value": false + } + ], + "attributes": { + "zone": "zone1" + } + }""")); + + assertEquals(data.partialResolve(vector.with(FetchVector.Dimension.APPLICATION_ID, "app3") + .with(FetchVector.Dimension.HOSTNAME, "host3") + .with(FetchVector.Dimension.ZONE_ID, "zone2")), + FlagData.deserialize(""" + { + "id": "id1", + "rules": [ + { + "value": false + } + ] + }""")); + + FlagData fullyResolved = data.partialResolve(vector.with(FetchVector.Dimension.APPLICATION_ID, "app3") + .with(FetchVector.Dimension.HOSTNAME, "host3") + .with(FetchVector.Dimension.ZONE_ID, "zone3")); + assertEquals(fullyResolved, FlagData.deserialize(""" + { + "id": "id1" + }""")); + assertTrue(fullyResolved.isEmpty()); + } + private void verify(Optional<String> expectedValue, FetchVector vector) { FlagData data = FlagData.deserialize(json); assertEquals("id1", data.id().toString()); diff --git a/lucene-linguistics/README.md b/lucene-linguistics/README.md new file mode 100644 index 00000000000..6329811e458 --- /dev/null +++ b/lucene-linguistics/README.md @@ -0,0 +1,93 @@ +# Vespa Lucene Linguistics + +Linguistics implementation based on Apache Lucene. +Features: +- a list of default analyzers per language; +- building custom analyzers through the configuration of the linguistics component; +- building custom analyzers in Java code and declaring them as `components`. + +## Development + +Build: +```shell +mvn clean test -U package +``` + +To compile configuration classes so that Intellij doesn't complain: +- right click on `pom.xml` +- then `Maven` +- then `Generate Sources and Update Folders` + +## Usage + +Add `<component>` to `services.xml` of your application package, e.g.: +```xml +<component id="com.yahoo.language.lucene.LuceneLinguistics" bundle="lucene-linguistics"> + <config name="com.yahoo.language.lucene.lucene-analysis"> + <configDir>linguistics</configDir> + <analysis> + <item key="en"> + <tokenizer> + <name>standard</name> + </tokenizer> + <tokenFilters> + <item> + <name>reverseString</name> + </item> + </tokenFilters> + </item> + </analysis> + </config> +</component> +``` +into `container` clusters that has `<document-processing/>` and/or `<search>` specified. + +And then package and deploy, e.g.: +```shell +(mvn clean -DskipTests=true -U package && vespa deploy -w 100) +``` + +### Configuration of Lucene Analyzers + +Read the Lucene docs of subclasses of: +- [TokenizerFactory](org.apache.lucene.analysis.TokenizerFactory), e.g. [StandardTokenizerFactory](https://lucene.apache.org/core/9_0_0/core/org/apache/lucene/analysis/standard/StandardTokenizerFactory.html) +- [CharFilterFactory](https://lucene.apache.org/core/9_0_0/core/org/apache/lucene/analysis/CharFilterFactory.html), e.g. [PatternReplaceCharFilterFactory](https://lucene.apache.org/core/8_1_1/analyzers-common/org/apache/lucene/analysis/pattern/PatternReplaceCharFilterFactory.html) +- [TokenFilterFactory](https://lucene.apache.org/core/8_1_1/analyzers-common/org/apache/lucene/analysis/util/TokenFilterFactory.html), e.g. [ReverseStringFilterFactory](https://lucene.apache.org/core/8_1_1/analyzers-common/org/apache/lucene/analysis/reverse/ReverseStringFilterFactory.html) + +E.g. tokenizer `StandardTokenizerFactory` has this config [snippet](https://lucene.apache.org/core/9_0_0/core/org/apache/lucene/analysis/standard/StandardTokenizerFactory.html): +```xml + <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/> + </analyzer> + </fieldType> +``` + +Then go to the [source code](https://github.com/apache/lucene/blob/17c13a76c87c6246f32dd7a78a26db04401ddb6e/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java#L36) of the class on Github. +Copy value of the `public static final String NAME` into the `<name>` and observe the names used for configuring the tokenizer (in this case only `maxTokenLength`). +```xml +<tokenizer> + <name>standard</name> + <config> + <item key="maxTokenLength">255</item> + </config> +</tokenizer> +``` + +The `AnalyzerFactory` constructor logs the available analysis components. + +The analysis components are discovered through Java Service Provider Interface (SPI). +To add more analysis components it should be enough to put a Lucene analyzer dependency into your application package `pom.xml` +or register services and create classes directly in the application package. + +### Resource files + +The resource files are relative to the component config `configDir`. + +## Inspiration + +These projects: +- [vespa-chinese-linguistics](https://github.com/vespa-engine/sample-apps/blob/master/examples/vespa-chinese-linguistics/src/main/java/com/qihoo/language/JiebaLinguistics.java). +- [OpenNlp Linguistics](https://github.com/vespa-engine/vespa/blob/50d7555bfe7bdaec86f8b31c4d316c9ba66bb976/opennlp-linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java) +- [vespa-kuromoji-linguistics](https://github.com/yahoojapan/vespa-kuromoji-linguistics/tree/main) +- [Clojure library](https://github.com/dainiusjocas/lucene-text-analysis) to work with Lucene analyzers diff --git a/lucene-linguistics/abi-spec.json b/lucene-linguistics/abi-spec.json new file mode 100644 index 00000000000..6f31cf5a2e6 --- /dev/null +++ b/lucene-linguistics/abi-spec.json @@ -0,0 +1 @@ +{ }
\ No newline at end of file diff --git a/lucene-linguistics/pom.xml b/lucene-linguistics/pom.xml new file mode 100644 index 00000000000..929d33a0736 --- /dev/null +++ b/lucene-linguistics/pom.xml @@ -0,0 +1,108 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>com.yahoo.vespa</groupId> + <artifactId>parent</artifactId> + <version>8-SNAPSHOT</version> + <relativePath>../parent/pom.xml</relativePath> + </parent> + + <artifactId>lucene-linguistics</artifactId> + <packaging>container-plugin</packaging> + <version>8-SNAPSHOT</version> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-core</artifactId> + </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analysis-common</artifactId> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>component</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>config-bundle</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>configdefinitions</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>annotations</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>vespajlib</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>linguistics</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>com.google.inject</groupId> + <artifactId>guice</artifactId> + <classifier>no_aop</classifier> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>com.yahoo.vespa</groupId> + <artifactId>bundle-plugin</artifactId> + <extensions>true</extensions> + <configuration> + <bundleType>CORE</bundleType> + <suppressWarningMissingImportPackages>true</suppressWarningMissingImportPackages> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + </plugin> + <plugin> + <groupId>com.yahoo.vespa</groupId> + <artifactId>abi-check-plugin</artifactId> + </plugin> + <plugin> + <groupId>com.yahoo.vespa</groupId> + <artifactId>config-class-plugin</artifactId> + <executions> + <execution> + <goals> + <goal>config-gen</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java new file mode 100644 index 00000000000..b7d3a618954 --- /dev/null +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java @@ -0,0 +1,160 @@ +package com.yahoo.language.lucene; + +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.language.Language; +import com.yahoo.language.process.StemMode; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CharFilterFactory; +import org.apache.lucene.analysis.TokenFilterFactory; +import org.apache.lucene.analysis.TokenizerFactory; +import org.apache.lucene.analysis.custom.CustomAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Logger; + +public class AnalyzerFactory { + private static final Logger log = Logger.getLogger(AnalyzerFactory.class.getName()); + + private final LuceneAnalysisConfig config; + + // Root config directory for all analysis components + private final Path configDir; + + // Registry of analyzers per language + // The idea is to create analyzers ONLY WHEN they are needed + // Analyzers are thread safe so no need to recreate them for every document + private final Map<String, Analyzer> languageAnalyzers = new HashMap<>(); + + private final Analyzer defaultAnalyzer = new StandardAnalyzer(); + + private final static String STANDARD_TOKENIZER = "standard"; + + private final ComponentRegistry<Analyzer> analyzerComponents; + private final DefaultAnalyzers defaultAnalyzers; + + public AnalyzerFactory(LuceneAnalysisConfig config, ComponentRegistry<Analyzer> analyzers) { + this.config = config; + this.configDir = config.configDir(); + this.analyzerComponents = analyzers; + this.defaultAnalyzers = DefaultAnalyzers.getInstance(); + log.info("Available in classpath char filters: " + CharFilterFactory.availableCharFilters()); + log.info("Available in classpath tokenizers: " + TokenizerFactory.availableTokenizers()); + log.info("Available in classpath token filters: " + TokenFilterFactory.availableTokenFilters()); + } + + /** + * Retrieves an analyzer with a given params. + * Sets up the analyzer if config is provided. + * Default analyzer is the `StandardAnalyzer`. + * @param language + * @param stemMode + * @param removeAccents + * @return + */ + public Analyzer getAnalyzer(Language language, StemMode stemMode, boolean removeAccents) { + String analyzerKey = generateKey(language, stemMode, removeAccents); + + // If analyzer for language is already known + if (null != languageAnalyzers.get(analyzerKey)) { + return languageAnalyzers.get(analyzerKey); + } + if (null != config.analysis(analyzerKey)) { + return setAndReturn(analyzerKey, setUpAnalyzer(analyzerKey)); + } + if (null != analyzerComponents.getComponent(analyzerKey)) { + log.info("Analyzer for language=" + analyzerKey + " is from components."); + return setAndReturn(analyzerKey, analyzerComponents.getComponent(analyzerKey)); + } + if (null != defaultAnalyzers.get(language)) { + log.info("Analyzer for language=" + analyzerKey + " is from a list of default language analyzers."); + return setAndReturn(analyzerKey, defaultAnalyzers.get(language)); + } + // set the default analyzer for the language + log.info("StandardAnalyzer is used for language=" + analyzerKey); + return setAndReturn(analyzerKey, defaultAnalyzer); + } + + private Analyzer setAndReturn(String analyzerKey, Analyzer analyzer) { + languageAnalyzers.put(analyzerKey, analyzer); + return analyzer; + } + + // TODO: Would it make sense to combine language + stemMode + removeAccents to make + // a composite key so we can have more variations possible? + private String generateKey(Language language, StemMode stemMode, boolean removeAccents) { + return language.languageCode(); + } + + private Analyzer setUpAnalyzer(String analyzerKey) { + try { + LuceneAnalysisConfig.Analysis analysis = config.analysis(analyzerKey); + log.info("Creating analyzer for: '" + analyzerKey + "' with config: " + analysis); + CustomAnalyzer.Builder builder = CustomAnalyzer.builder(configDir); + builder = withTokenizer(builder, analysis); + builder = addCharFilters(builder, analysis); + builder = addTokenFilters(builder, analysis); + return builder.build(); + } catch (Exception e) { + // Failing to set up the Analyzer, should blow up during testing and VAP should not be deployed. + // Most likely cause for problems is that a specified resource is not available in VAP. + // Unit tests should catch such problems and prevent the VAP being deployed. + log.severe("Failed to build analyzer: '" + + analyzerKey + + "', with configuration: '" + + config.analysis(analyzerKey) + + "' with exception: '" + + e.getMessage() + "'" ); + throw new RuntimeException(e); + } + } + + private CustomAnalyzer.Builder withTokenizer(CustomAnalyzer.Builder builder, + LuceneAnalysisConfig.Analysis analysis) throws IOException { + if (null == analysis) { + // By default we use the "standard" tokenizer + return builder.withTokenizer(STANDARD_TOKENIZER, new HashMap<>()); + } + String tokenizerName = analysis.tokenizer().name(); + Map<String, String> conf = analysis.tokenizer().conf(); + return builder.withTokenizer(tokenizerName, toModifiable(conf)); + } + + private CustomAnalyzer.Builder addCharFilters(CustomAnalyzer.Builder builder, + LuceneAnalysisConfig.Analysis analysis) throws IOException { + if (null == analysis) { + // by default there are no char filters + return builder; + } + for (LuceneAnalysisConfig.Analysis.CharFilters charFilter : analysis.charFilters()) { + builder.addCharFilter(charFilter.name(), toModifiable(charFilter.conf())); + } + return builder; + } + + private CustomAnalyzer.Builder addTokenFilters(CustomAnalyzer.Builder builder, + LuceneAnalysisConfig.Analysis analysis) throws IOException { + if (null == analysis) { + // by default no token filters are added + return builder; + } + for (LuceneAnalysisConfig.Analysis.TokenFilters tokenFilter : analysis.tokenFilters()) { + builder.addTokenFilter(tokenFilter.name(), toModifiable(tokenFilter.conf())); + } + return builder; + } + + /** + * A config map coming from the Vespa ConfigInstance is immutable while CustomAnalyzer builders + * mutates the map to mark that a param was consumed. Immutable maps can't be mutated! + * To overcome this conflict we can wrap the ConfigInstance map in a new HashMap. + * @param map + * @return Mutable Map + */ + private Map<String, String> toModifiable(Map<String, String> map) { + return new HashMap<>(map); + } +} diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java new file mode 100644 index 00000000000..955e18474f7 --- /dev/null +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java @@ -0,0 +1,110 @@ +package com.yahoo.language.lucene; + +import com.yahoo.language.Language; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.bn.BengaliAnalyzer; +import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.apache.lucene.analysis.ckb.SoraniAnalyzer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.da.DanishAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.el.GreekAnalyzer; +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.et.EstonianAnalyzer; +import org.apache.lucene.analysis.eu.BasqueAnalyzer; +import org.apache.lucene.analysis.fa.PersianAnalyzer; +import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.ga.IrishAnalyzer; +import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; +import org.apache.lucene.analysis.hy.ArmenianAnalyzer; +import org.apache.lucene.analysis.id.IndonesianAnalyzer; +import org.apache.lucene.analysis.it.ItalianAnalyzer; +import org.apache.lucene.analysis.lt.LithuanianAnalyzer; +import org.apache.lucene.analysis.lv.LatvianAnalyzer; +import org.apache.lucene.analysis.ne.NepaliAnalyzer; +import org.apache.lucene.analysis.nl.DutchAnalyzer; +import org.apache.lucene.analysis.no.NorwegianAnalyzer; +import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.apache.lucene.analysis.ro.RomanianAnalyzer; +import org.apache.lucene.analysis.ru.RussianAnalyzer; +import org.apache.lucene.analysis.sr.SerbianAnalyzer; +import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.apache.lucene.analysis.ta.TamilAnalyzer; +import org.apache.lucene.analysis.te.TeluguAnalyzer; +import org.apache.lucene.analysis.th.ThaiAnalyzer; +import org.apache.lucene.analysis.tr.TurkishAnalyzer; + +import java.util.Map; + +import static java.util.Map.entry; + +public class DefaultAnalyzers { + + private static DefaultAnalyzers INSTANCE; + private final Map<Language, Analyzer> analyzerClasses; + + private DefaultAnalyzers() { + analyzerClasses = Map.ofEntries( + entry(Language.ARABIC, new ArabicAnalyzer()), + entry(Language.BULGARIAN, new BulgarianAnalyzer()), + entry(Language.BENGALI, new BengaliAnalyzer()), + // analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer()) + entry(Language.CATALAN, new CatalanAnalyzer()), + // cjk analyzer? + entry(Language.KURDISH, new SoraniAnalyzer()), + entry(Language.CZECH, new CzechAnalyzer()), + entry(Language.DANISH, new DanishAnalyzer()), + entry(Language.GERMAN, new GermanAnalyzer()), + entry(Language.GREEK, new GreekAnalyzer()), + entry(Language.ENGLISH, new EnglishAnalyzer()), + entry(Language.SPANISH, new SpanishAnalyzer()), + entry(Language.ESTONIAN, new EstonianAnalyzer()), + entry(Language.BASQUE, new BasqueAnalyzer()), + entry(Language.PERSIAN, new PersianAnalyzer()), + entry(Language.FINNISH, new FinnishAnalyzer()), + entry(Language.FRENCH, new FrenchAnalyzer()), + entry(Language.IRISH, new IrishAnalyzer()), + entry(Language.GALICIAN, new GalicianAnalyzer()), + entry(Language.HINDI, new HindiAnalyzer()), + entry(Language.HUNGARIAN, new HungarianAnalyzer()), + entry(Language.ARMENIAN, new ArmenianAnalyzer()), + entry(Language.INDONESIAN, new IndonesianAnalyzer()), + entry(Language.ITALIAN, new ItalianAnalyzer()), + entry(Language.LITHUANIAN, new LithuanianAnalyzer()), + entry(Language.LATVIAN, new LatvianAnalyzer()), + entry(Language.NEPALI, new NepaliAnalyzer()), + entry(Language.DUTCH, new DutchAnalyzer()), + entry(Language.NORWEGIAN_BOKMAL, new NorwegianAnalyzer()), + entry(Language.PORTUGUESE, new PortugueseAnalyzer()), + entry(Language.ROMANIAN, new RomanianAnalyzer()), + entry(Language.RUSSIAN, new RussianAnalyzer()), + entry(Language.SERBIAN, new SerbianAnalyzer()), + entry(Language.SWEDISH, new SwedishAnalyzer()), + entry(Language.TAMIL, new TamilAnalyzer()), + entry(Language.TELUGU, new TeluguAnalyzer()), + entry(Language.THAI, new ThaiAnalyzer()), + entry(Language.TURKISH, new TurkishAnalyzer()) + ); + } + + public static DefaultAnalyzers getInstance() { + if (INSTANCE == null) { + INSTANCE = new DefaultAnalyzers(); + } + return INSTANCE; + } + + public Analyzer get(Language language) { + return analyzerClasses.get(language); + } + + public Analyzer get(String languageCode) { + return analyzerClasses.get(Language.fromLanguageTag(languageCode)); + } +} diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java new file mode 100644 index 00000000000..b5c5ba47ab6 --- /dev/null +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java @@ -0,0 +1,82 @@ +package com.yahoo.language.lucene; + +import com.google.inject.Inject; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.*; +import com.yahoo.language.simple.SimpleLinguistics; +import org.apache.lucene.analysis.Analyzer; + +import java.util.ArrayList; +import java.util.logging.Logger; + +/** + * Factory of Lucene based linguistics processor. + * As described in the Linguistics docstring + * > the tokenizer should typically stem, transform and normalize + * The Stemmer, Transformer, Normalizer, and Segmenter implementations are mostly NOOP. + * + * TODO: docs for all available analysis components. + * TODO: some registry for available language Analyzers. + */ +public class LuceneLinguistics extends SimpleLinguistics { + + private static final Logger log = Logger.getLogger(LuceneLinguistics.class.getName()); + private final Normalizer normalizer; + private final Transformer transformer; + private final Tokenizer tokenizer; + private final Stemmer stemmer; + private final Segmenter segmenter; + private final LuceneAnalysisConfig config; + + @Inject + public LuceneLinguistics(LuceneAnalysisConfig config, ComponentRegistry<Analyzer> analyzers) { + log.info("Creating LuceneLinguistics with: " + config); + this.config = config; + this.tokenizer = new LuceneTokenizer(config, analyzers); + // NOOP stemmer + this.stemmer = (word, stemMode, language) -> { + ArrayList<StemList> stemLists = new ArrayList<>(); + StemList stems = new StemList(); + stems.add(word); + stemLists.add(stems); + return stemLists; + }; + // Segmenter that just wraps a tokenizer + this.segmenter = (string, language) -> { + ArrayList<String> segments = new ArrayList<>(); + Iterable<Token> tokens = tokenizer.tokenize(string, language, StemMode.NONE, false); + tokens.forEach(token -> segments.add(token.getTokenString())); + return segments; + }; + // NOOP normalizer + this.normalizer = (string) -> string; + // NOOP transformer + this.transformer = (string, language) -> string; + } + + @Override + public Stemmer getStemmer() { return stemmer; } + + @Override + public Tokenizer getTokenizer() { return tokenizer; } + + @Override + public Normalizer getNormalizer() { return normalizer; } + + @Override + public Transformer getTransformer() { return transformer; } + + @Override + public Segmenter getSegmenter() { return segmenter; } + + public LuceneAnalysisConfig getConfig() { + return config; + } + + @Override + public boolean equals(Linguistics other) { + return (other instanceof LuceneLinguistics) + // Config actually determines if Linguistics are equal + && config.equals(((LuceneLinguistics) other).getConfig()); } +} diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java new file mode 100644 index 00000000000..0cde849fd6e --- /dev/null +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java @@ -0,0 +1,68 @@ +package com.yahoo.language.lucene; + +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.language.Language; +import com.yahoo.language.process.*; +import com.yahoo.language.simple.SimpleToken; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class LuceneTokenizer implements Tokenizer { + + private static final Logger log = Logger.getLogger(LuceneTokenizer.class.getName()); + + // Dummy value, just to stuff the Lucene interface. + private final static String FIELD_NAME = "F"; + + private final AnalyzerFactory analyzerFactory; + + public LuceneTokenizer(LuceneAnalysisConfig config) { + this(config, new ComponentRegistry<>()); + } + public LuceneTokenizer(LuceneAnalysisConfig config, ComponentRegistry<Analyzer> analyzers) { + this.analyzerFactory = new AnalyzerFactory(config, analyzers); + } + + @Override + public Iterable<Token> tokenize(String input, Language language, StemMode stemMode, boolean removeAccents) { + if (input.isEmpty()) return List.of(); + + List<Token> tokens = textToTokens(input, analyzerFactory.getAnalyzer(language, stemMode, removeAccents)); + log.log(Level.FINEST, "Tokenized '" + language + "' text='" + input + "' into: n=" + tokens.size() + ", tokens=" + tokens); + return tokens; + } + + private List<Token> textToTokens(String text, Analyzer analyzer) { + List<Token> tokens = new ArrayList<>(); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); + + CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); + try { + tokenStream.reset(); + while (tokenStream.incrementToken()) { + // TODO: is SimpleToken good enough? Maybe a custom implementation. + // TODO: what to do with cases when multiple tokens are inserted into the position? + String originalString = text.substring(offsetAttribute.startOffset(), offsetAttribute.endOffset()); + String tokenString = charTermAttribute.toString(); + tokens.add(new SimpleToken(originalString, tokenString) + .setType(TokenType.ALPHABETIC) + .setOffset(offsetAttribute.startOffset()) + .setScript(TokenScript.UNKNOWN)); + } + tokenStream.end(); + tokenStream.close(); + } catch (IOException e) { + throw new RuntimeException("Failed to analyze: " + text, e); + } + return tokens; + } +} diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java new file mode 100644 index 00000000000..14330723224 --- /dev/null +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java @@ -0,0 +1,4 @@ +@ExportPackage +package com.yahoo.language.lucene; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def b/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def new file mode 100644 index 00000000000..e4b5037dcbe --- /dev/null +++ b/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def @@ -0,0 +1,14 @@ +package=com.yahoo.language.lucene + +# The schema ("type") for an application specified config type +# See +# - https://docs.vespa.ai/en/reference/config-files.html + +configDir path +analysis{}.tokenizer.name string default=standard +analysis{}.tokenizer.conf{} string + +analysis{}.charFilters[].name string +analysis{}.charFilters[].conf{} string +analysis{}.tokenFilters[].name string +analysis{}.tokenFilters[].conf{} string diff --git a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java new file mode 100644 index 00000000000..568f295b39d --- /dev/null +++ b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java @@ -0,0 +1,139 @@ +package com.yahoo.language.lucene; + +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.config.FileReference; +import com.yahoo.language.Language; +import com.yahoo.language.process.StemMode; +import com.yahoo.language.process.Token; +import org.junit.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class LuceneTokenizerTest { + + @Test + public void testTokenizer() { + String text = "This is my Text"; + var tokenizer = new LuceneTokenizer(new LuceneAnalysisConfig + .Builder() + .configDir(FileReference.mockFileReferenceForUnitTesting(new File("."))) + .build()); + Iterable<Token> tokens = tokenizer + .tokenize(text, Language.ENGLISH, StemMode.ALL, true); + assertEquals(List.of("my", "text"), tokenStrings(tokens)); + } + + @Test + public void testLithuanianTokenizer() { + String text = "Žalgirio mūšio data yra 1410 metai"; + var tokenizer = new LuceneTokenizer(new LuceneAnalysisConfig + .Builder() + .configDir(FileReference.mockFileReferenceForUnitTesting(new File("."))) + .build()); + Iterable<Token> tokens = tokenizer + .tokenize(text, Language.LITHUANIAN, StemMode.ALL, true); + assertEquals(List.of("žalgir", "mūš", "dat", "1410", "met"), tokenStrings(tokens)); + } + + private void assertToken(String tokenString, Iterator<Token> tokens) { + Token t = tokens.next(); + assertEquals(tokenString, t.getTokenString()); + } + + private List<Token> iterableToList(Iterable<Token> tokens) { + List<Token> tokenList = new ArrayList<>(); + tokens.forEach(tokenList::add); + return tokenList; + } + + private List<String> tokenStrings(Iterable<Token> tokens) { + List<String> tokenList = new ArrayList<>(); + tokens.forEach(token -> { + tokenList.add(token.getTokenString()); + }); + return tokenList; + } + + @Test + public void testAnalyzerConfiguration() { + String languageCode = Language.ENGLISH.languageCode(); + LuceneAnalysisConfig enConfig = new LuceneAnalysisConfig.Builder() + .configDir(FileReference.mockFileReferenceForUnitTesting(new File("."))) + .analysis( + Map.of(languageCode, + new LuceneAnalysisConfig + .Analysis + .Builder() + .tokenFilters(List.of( + new LuceneAnalysisConfig + .Analysis + .TokenFilters + .Builder() + .name("englishMinimalStem"), + new LuceneAnalysisConfig + .Analysis + .TokenFilters + .Builder() + .name("uppercase")))) + ).build(); + LuceneLinguistics linguistics = new LuceneLinguistics(enConfig, new ComponentRegistry<>()); + Iterable<Token> tokens = linguistics + .getTokenizer() + .tokenize("Dogs and cats", Language.ENGLISH, StemMode.ALL, false); + assertEquals(List.of("DOG", "AND", "CAT"), tokenStrings(tokens)); + } + + @Test + public void testEnglishStemmerAnalyzerConfiguration() { + String languageCode = Language.ENGLISH.languageCode(); + LuceneAnalysisConfig enConfig = new LuceneAnalysisConfig.Builder() + .configDir(FileReference.mockFileReferenceForUnitTesting(new File("."))) + .analysis( + Map.of(languageCode, + new LuceneAnalysisConfig.Analysis.Builder().tokenFilters(List.of( + new LuceneAnalysisConfig + .Analysis + .TokenFilters + .Builder() + .name("englishMinimalStem")))) + ).build(); + LuceneLinguistics linguistics = new LuceneLinguistics(enConfig, new ComponentRegistry<>()); + Iterable<Token> tokens = linguistics + .getTokenizer() + .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false); + assertEquals(List.of("Dog", "and", "Cat"), tokenStrings(tokens)); + } + + @Test + public void testStemmerWithStopWords() { + String languageCode = Language.ENGLISH.languageCode(); + LuceneAnalysisConfig enConfig = new LuceneAnalysisConfig.Builder() + .configDir(FileReference.mockFileReferenceForUnitTesting(new File("."))) + .analysis( + Map.of(languageCode, + new LuceneAnalysisConfig.Analysis.Builder().tokenFilters(List.of( + new LuceneAnalysisConfig + .Analysis + .TokenFilters + .Builder() + .name("englishMinimalStem"), + new LuceneAnalysisConfig + .Analysis + .TokenFilters + .Builder() + .name("stop") + .conf("words", "stopwords.txt")))) + ).build(); + LuceneLinguistics linguistics = new LuceneLinguistics(enConfig, new ComponentRegistry<>()); + Iterable<Token> tokens = linguistics + .getTokenizer() + .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false); + assertEquals(List.of("Dog", "Cat"), tokenStrings(tokens)); + } +} diff --git a/lucene-linguistics/src/test/resources/stopwords.txt b/lucene-linguistics/src/test/resources/stopwords.txt new file mode 100644 index 00000000000..e8c07838bf5 --- /dev/null +++ b/lucene-linguistics/src/test/resources/stopwords.txt @@ -0,0 +1 @@ +and diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index a80f07acba2..864566f119e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -616,8 +616,8 @@ public final class Node implements Nodelike { } /** Returns the ACL for the node (trusted nodes, networks and ports) */ - public NodeAcl acl(NodeList allNodes, LoadBalancers loadBalancers, Zone zone, boolean simplerAcl) { - return NodeAcl.from(this, allNodes, loadBalancers, zone, simplerAcl); + public NodeAcl acl(NodeList allNodes, LoadBalancers loadBalancers, Zone zone) { + return NodeAcl.from(this, allNodes, loadBalancers, zone); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 13a6c35e9a7..602314bed96 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -220,11 +220,11 @@ public class NodeRepository extends AbstractComponent { * @param host node for which to generate ACLs * @return the list of node ACLs */ - public List<NodeAcl> getChildAcls(Node host, boolean simplerAcl) { + public List<NodeAcl> getChildAcls(Node host) { if ( ! host.type().isHost()) throw new IllegalArgumentException("Only hosts have children"); NodeList allNodes = nodes().list(); return allNodes.childrenOf(host) - .mapToList(childNode -> childNode.acl(allNodes, loadBalancers, zone, simplerAcl)); + .mapToList(childNode -> childNode.acl(allNodes, loadBalancers, zone)); } /** Removes this application: all nodes are set dirty. */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index c207e3c7ecc..32b59319a88 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -22,7 +22,7 @@ public class Autoscaler { /** What resource difference is worth a reallocation? */ private static final double resourceIncreaseWorthReallocation = 0.03; /** The load increase headroom (as a fraction) we should have before needing to scale up, to decide to scale down */ - static final double headroomRequiredToScaleDown = 0.1; + static final double headroomRequiredToScaleDown = 0.15; private final NodeRepository nodeRepository; private final AllocationOptimizer allocationOptimizer; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java index 7df19c97659..d0e72cea8fc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeAcl.java @@ -45,12 +45,12 @@ public record NodeAcl(Node node, this.trustedUdpPorts = ImmutableSet.copyOf(Objects.requireNonNull(trustedUdpPorts, "trustedUdpPorts must be non-null")); } - public static NodeAcl from(Node node, NodeList allNodes, LoadBalancers loadBalancers, Zone zone, boolean simplerAcl) { + public static NodeAcl from(Node node, NodeList allNodes, LoadBalancers loadBalancers, Zone zone) { Set<TrustedNode> trustedNodes = new TreeSet<>(Comparator.comparing(TrustedNode::hostname)); Set<Integer> trustedPorts = new LinkedHashSet<>(); Set<Integer> trustedUdpPorts = new LinkedHashSet<>(); Set<String> trustedNetworks = new LinkedHashSet<>(); - IP.Space ipSpace = simplerAcl ? IP.Space.of(zone, node.cloudAccount()) : (ip, account) -> true; + IP.Space ipSpace = IP.Space.of(zone, node.cloudAccount()); // For all cases below, trust: // - SSH: If the host has one container, and it is using the host's network namespace, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java index 784f8f82d14..6fe14715355 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.hosted.provision.restapi; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.restapi.SlimeJsonResponse; import com.yahoo.slime.Cursor; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.NodeAcl; @@ -34,9 +33,8 @@ public class NodeAclResponse extends SlimeJsonResponse { Node node = nodeRepository.nodes().node(hostname) .orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'")); - boolean simplerAcl = Flags.SIMPLER_ACL.bindTo(nodeRepository.flagSource()).value(); - List<NodeAcl> acls = aclsForChildren ? nodeRepository.getChildAcls(node, simplerAcl) : - List.of(node.acl(nodeRepository.nodes().list(), nodeRepository.loadBalancers(), nodeRepository.zone(), simplerAcl)); + List<NodeAcl> acls = aclsForChildren ? nodeRepository.getChildAcls(node) : + List.of(node.acl(nodeRepository.nodes().list(), nodeRepository.loadBalancers(), nodeRepository.zone())); Cursor trustedNodesArray = object.setArray("trustedNodes"); acls.forEach(nodeAcl -> toSlime(nodeAcl, trustedNodesArray)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 091060413a9..d33857d1a1e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -88,7 +88,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(7)); fixture.loader().applyCpuLoad(0.1f, 10); fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly", - 6, 1, 1.1, 9.6, 381.5, + 6, 1, 1.1, 9.8, 390.2, fixture.autoscale()); } @@ -666,7 +666,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1)); fixture.loader().applyCpuLoad(0.02, 5); fixture.tester().assertResources("Scaling down since enough time has passed", - 3, 1, 1.0, 26, 111.5, + 5, 1, 1.0, 12.3, 50.7, fixture.autoscale()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java index 0a3e8024a97..379dbb27d87 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -85,7 +85,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 10.5, 42.2, 185.8, + 3, 3, 10.5, 43.2, 190.0, fixture.autoscale()); // Higher query rate @@ -93,7 +93,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 20.9, 42.2, 185.8, + 3, 3, 20.9, 43.2, 190.0, fixture.autoscale()); // Higher headroom @@ -101,7 +101,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 12.4, 42.2, 185.8, + 3, 3, 12.4, 43.2, 190.0, fixture.autoscale()); // Higher per query cost @@ -109,7 +109,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 15.7, 42.2, 185.8, + 3, 3, 15.7, 43.2, 190.0, fixture.autoscale()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java index 26925372b93..94014712930 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java @@ -58,7 +58,7 @@ public class AclProvisioningTest { // Get trusted nodes for the first active node Node node = activeNodes.get(0); List<Node> hostOfNode = node.parentHostname().flatMap(tester.nodeRepository().nodes()::node).map(List::of).orElseGet(List::of); - Supplier<NodeAcl> nodeAcls = () -> node.acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone(), true); + Supplier<NodeAcl> nodeAcls = () -> node.acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone()); // Trusted nodes are active nodes in same application, proxy nodes and config servers assertAcls(trustedNodesOf(List.of(activeNodes, proxyNodes, configServers.asList(), hostOfNode), node.cloudAccount()), @@ -83,7 +83,7 @@ public class AclProvisioningTest { // Get trusted nodes for a parked tenant node Node node = tester.nodeRepository().nodes().list(Node.State.parked).nodeType(NodeType.tenant).first().get(); - NodeAcl nodeAcl = node.acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone(), true); + NodeAcl nodeAcl = node.acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone()); // Trusted nodes are all config-nodes assertAcls(trustedNodesOf(List.of(proxyNodes, configServers.asList()), node.cloudAccount()), List.of(nodeAcl)); @@ -108,7 +108,7 @@ public class AclProvisioningTest { // Get trusted nodes for the first config server Node node = tester.nodeRepository().nodes().node("cfg1") .orElseThrow(() -> new RuntimeException("Failed to find cfg1")); - NodeAcl nodeAcl = node.acl(nodes, tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone(), true); + NodeAcl nodeAcl = node.acl(nodes, tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone()); // Trusted nodes is all tenant nodes, all proxy nodes, all config servers and load balancer subnets // All tenant hosts because nodes are IPv6 and cfg are IPv4, so traffic is NATed. @@ -128,7 +128,7 @@ public class AclProvisioningTest { publicTester.makeConfigServers(3, "default", Version.fromString("6.123.456")); Node publicCfgNode = publicTester.nodeRepository().nodes().node("cfg1") .orElseThrow(() -> new RuntimeException("Failed to find cfg1")); - NodeAcl publicNodeAcl = publicCfgNode.acl(nodes, publicTester.nodeRepository().loadBalancers(), publicTester.nodeRepository().zone(), true); + NodeAcl publicNodeAcl = publicCfgNode.acl(nodes, publicTester.nodeRepository().loadBalancers(), publicTester.nodeRepository().zone()); assertEquals(Set.of(51820), publicNodeAcl.trustedUdpPorts()); } @@ -146,7 +146,7 @@ public class AclProvisioningTest { // Get trusted nodes for first proxy node NodeList proxyNodes = tester.nodeRepository().nodes().list().nodeType(NodeType.proxy); Node node = proxyNodes.first().get(); - NodeAcl nodeAcl = node.acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone(), true); + NodeAcl nodeAcl = node.acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone()); // Trusted nodes is all config servers and all proxy nodes assertAcls(trustedNodesOf(List.of(proxyNodes.asList(), configServers.asList()), node.cloudAccount()), List.of(nodeAcl)); @@ -164,7 +164,7 @@ public class AclProvisioningTest { List<Node> nodes = tester.makeReadyChildren(5, new NodeResources(1, 4, 10, 1), host.hostname()); - List<NodeAcl> acls = tester.nodeRepository().getChildAcls(host, true); + List<NodeAcl> acls = tester.nodeRepository().getChildAcls(host); // ACLs for each container on the host assertFalse(nodes.isEmpty()); @@ -188,7 +188,7 @@ public class AclProvisioningTest { List<Node> controllers = tester.nodeRepository().nodes().list().nodeType(NodeType.controller).asList(); // Controllers and hosts all trust each other - NodeAcl controllerAcl = controllers.get(0).acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone(), true); + NodeAcl controllerAcl = controllers.get(0).acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone()); assertAcls(trustedNodesOf(List.of(controllers), controllers.get(0).cloudAccount()), Set.of("10.2.3.0/24", "10.4.5.0/24"), List.of(controllerAcl)); assertEquals(Set.of(22, 4443, 443), controllerAcl.trustedPorts()); assertEquals(Set.of(), controllerAcl.trustedUdpPorts()); @@ -217,7 +217,7 @@ public class AclProvisioningTest { // ACL for nodes with allocation trust their respective load balancer networks, if any for (var host : hosts) { - List<NodeAcl> acls = tester.nodeRepository().getChildAcls(host, true); + List<NodeAcl> acls = tester.nodeRepository().getChildAcls(host); assertEquals(2, acls.size()); for (var acl : acls) { if (acl.node().allocation().isPresent()) { @@ -235,7 +235,7 @@ public class AclProvisioningTest { tester.makeConfigServers(3, "default", Version.fromString("6.123.456")); List<Node> readyNodes = tester.makeReadyNodes(1, "default", NodeType.proxy); - NodeAcl nodeAcl = readyNodes.get(0).acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone(), true); + NodeAcl nodeAcl = readyNodes.get(0).acl(tester.nodeRepository().nodes().list(), tester.nodeRepository().loadBalancers(), tester.nodeRepository().zone()); assertEquals(3, nodeAcl.trustedNodes().size()); assertEquals(List.of(Set.of("127.0.1.1"), Set.of("127.0.1.2"), Set.of("127.0.1.3")), diff --git a/parent/pom.xml b/parent/pom.xml index d624bb2cd8c..df20b94ec79 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -887,6 +887,16 @@ <version>${opennlp.vespa.version}</version> </dependency> <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-core</artifactId> + <version>${lucene.vespa.version}</version> + </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analysis-common</artifactId> + <version>${lucene.vespa.version}</version> + </dependency> + <dependency> <groupId>org.apache.velocity</groupId> <artifactId>velocity-engine-core</artifactId> <version>2.3</version> @@ -95,6 +95,7 @@ <module>linguistics-components</module> <module>logd</module> <module>logserver</module> + <module>lucene-linguistics</module> <module>messagebus</module> <module>metrics</module> <module>metrics-proxy</module> diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt index 55b94307df6..7684e3ea2ae 100644 --- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt +++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt @@ -97,6 +97,8 @@ org.apache.httpcomponents:httpmime:4.5.14 org.apache.httpcomponents.client5:httpclient5:5.2.1 org.apache.httpcomponents.core5:httpcore5:5.2.2 org.apache.httpcomponents.core5:httpcore5-h2:5.2.2 +org.apache.lucene:lucene-analysis-common:9.7.0 +org.apache.lucene:lucene-core:9.7.0 org.apache.maven:maven-archiver:3.6.0 org.apache.maven:maven-artifact:3.8.7 org.apache.maven:maven-artifact-manager:2.2.1 |