diff options
81 files changed, 2013 insertions, 862 deletions
diff --git a/client/go/go.mod b/client/go/go.mod index 3e721fe2a06..8699f3e9245 100644 --- a/client/go/go.mod +++ b/client/go/go.mod @@ -8,7 +8,7 @@ require ( github.com/fatih/color v1.16.0 // This is the most recent version compatible with Go 1.20. Upgrade when we upgrade our Go version github.com/go-json-experiment/json v0.0.0-20230324203220-04923b7a9528 - github.com/klauspost/compress v1.17.4 + github.com/klauspost/compress v1.17.5 github.com/mattn/go-colorable v0.1.13 github.com/mattn/go-isatty v0.0.20 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c diff --git a/client/go/go.sum b/client/go/go.sum index e2b1c85442d..fc5730a071d 100644 --- a/client/go/go.sum +++ b/client/go/go.sum @@ -20,6 +20,8 @@ github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4s github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.5 h1:d4vBd+7CHydUqpFBgUEKkSdtSugf9YFmSkvUYPquI5E= +github.com/klauspost/compress v1.17.5/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= diff --git a/client/js/app/package.json b/client/js/app/package.json index e6273edbf75..1cc2432f88f 100644 --- a/client/js/app/package.json +++ b/client/js/app/package.json @@ -32,11 +32,11 @@ "eslint-plugin-react-hooks": "^4", "eslint-plugin-react-perf": "^3", "eslint-plugin-unused-imports": "^3", - "husky": "^8", + "husky": "^9.0.0", "jest": "^29", "lodash": "^4", "prettier": "3", - "pretty-quick": "^3", + "pretty-quick": "^4.0.0", "react-router-dom": "^6", "use-context-selector": "^1", "vite": "^5.0.5" diff --git a/client/js/app/yarn.lock b/client/js/app/yarn.lock index ebf1c99db13..10ba47a7b10 100644 --- a/client/js/app/yarn.lock +++ b/client/js/app/yarn.lock @@ -2151,7 +2151,7 @@ cross-spawn@^6.0.0: shebang-command "^1.2.0" which "^1.2.9" -cross-spawn@^7.0.0, cross-spawn@^7.0.2, cross-spawn@^7.0.3: +cross-spawn@^7.0.2, cross-spawn@^7.0.3: version "7.0.3" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== @@ -2664,22 +2664,7 @@ execa@^1.0.0: signal-exit "^3.0.0" strip-eof "^1.0.0" -execa@^4.1.0: - version "4.1.0" - resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a" - integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA== - dependencies: - cross-spawn "^7.0.0" - get-stream "^5.0.0" - human-signals "^1.1.1" - is-stream "^2.0.0" - merge-stream "^2.0.0" - npm-run-path "^4.0.0" - onetime "^5.1.0" - signal-exit "^3.0.2" - strip-final-newline "^2.0.0" - -execa@^5.0.0: +execa@^5.0.0, execa@^5.1.1: version "5.1.1" resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.1.tgz#f80ad9cbf4298f7bd1d4c9555c21e93741c411dd" integrity sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg== @@ -2931,13 +2916,6 @@ get-stream@^4.0.0: dependencies: pump "^3.0.0" -get-stream@^5.0.0: - version "5.2.0" - resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3" - integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA== - dependencies: - pump "^3.0.0" - get-stream@^6.0.0: version "6.0.1" resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7" @@ -3105,20 +3083,15 @@ html-escaper@^2.0.0: resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453" integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg== -human-signals@^1.1.1: - version "1.1.1" - resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3" - integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw== - human-signals@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== -husky@^8: - version "8.0.3" - resolved "https://registry.yarnpkg.com/husky/-/husky-8.0.3.tgz#4936d7212e46d1dea28fef29bb3a108872cd9184" - integrity sha512-+dQSyqPh4x1hlO1swXBiNb2HzTDN1I2IGLQx1GrBuiqFJfoMrnZWwVmatvSiO+Iz8fBUnf+lekwNo4c2LlXItg== +husky@^9.0.0: + version "9.0.6" + resolved "https://registry.yarnpkg.com/husky/-/husky-9.0.6.tgz#cee0245d60480b12279cf492ec6cfc1aeb7fa759" + integrity sha512-EEuw/rfTiMjOfuL7pGO/i9otg1u36TXxqjIA6D9qxVjd/UXoDOsLor/BSFf5hTK50shwzCU3aVVwdXDp/lp7RA== ignore@^5.2.0, ignore@^5.3.0: version "5.3.0" @@ -4301,7 +4274,7 @@ npm-run-path@^2.0.0: dependencies: path-key "^2.0.0" -npm-run-path@^4.0.0, npm-run-path@^4.0.1: +npm-run-path@^4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea" integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw== @@ -4417,7 +4390,7 @@ once@^1.3.0, once@^1.3.1, once@^1.4.0: dependencies: wrappy "1" -onetime@^5.1.0, onetime@^5.1.2: +onetime@^5.1.2: version "5.1.2" resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e" integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg== @@ -4593,13 +4566,13 @@ pretty-format@^29.7.0: ansi-styles "^5.0.0" react-is "^18.0.0" -pretty-quick@^3: - version "3.3.1" - resolved "https://registry.yarnpkg.com/pretty-quick/-/pretty-quick-3.3.1.tgz#cfde97fec77a8d201a0e0c9c71d9990e12587ee2" - integrity sha512-3b36UXfYQ+IXXqex6mCca89jC8u0mYLqFAN5eTQKoXO6oCQYcIVYZEB/5AlBHI7JPYygReM2Vv6Vom/Gln7fBg== +pretty-quick@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/pretty-quick/-/pretty-quick-4.0.0.tgz#ea5cce85a5804bfbec7327b0e064509155d03f39" + integrity sha512-M+2MmeufXb/M7Xw3Afh1gxcYpj+sK0AxEfnfF958ktFeAyi5MsKY5brymVURQLgPLV1QaF5P4pb2oFJ54H3yzQ== dependencies: - execa "^4.1.0" - find-up "^4.1.0" + execa "^5.1.1" + find-up "^5.0.0" ignore "^5.3.0" mri "^1.2.0" picocolors "^1.0.0" diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java index 348b84367d5..cbeac3cc849 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java @@ -160,7 +160,10 @@ public class MetricsProxyContainerCluster extends ContainerCluster<MetricsProxyC builder.consumer.add(toConsumerBuilder(MetricsConsumer.defaultConsumer)); builder.consumer.add(toConsumerBuilder(newDefaultConsumer())); - if (isHostedVespa()) builder.consumer.add(toConsumerBuilder(MetricsConsumer.vespa9)); + if (isHostedVespa()) { + var amendedVespa9Consumer = addMetrics(MetricsConsumer.vespa9, getAdditionalDefaultMetrics().getMetrics()); + builder.consumer.add(toConsumerBuilder(amendedVespa9Consumer)); + } getAdmin() .map(Admin::getAmendedMetricsConsumers) .map(consumers -> consumers.stream().map(ConsumersConfigGenerator::toConsumerBuilder).toList()) diff --git a/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsConsumersTest.java b/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsConsumersTest.java index becb7235c64..88e1ba7a1a6 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsConsumersTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsConsumersTest.java @@ -23,6 +23,7 @@ import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.g import static com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyModelTester.servicesWithAdminOnly; import static java.util.Collections.singleton; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -69,15 +70,21 @@ public class MetricsConsumersTest { @Test void vespa_consumer_can_be_amended_via_admin_object() { - VespaModel model = getModel(servicesWithAdminOnly(), self_hosted); + VespaModel model = getModel(servicesWithAdminOnly(), hosted); var additionalMetric = new Metric("additional-metric"); model.getAdmin().setAdditionalDefaultMetrics(new MetricSet("amender-metrics", singleton(additionalMetric))); ConsumersConfig config = consumersConfigFromModel(model); assertEquals(numMetricsForVespaConsumer + 1, config.consumer(0).metric().size()); - ConsumersConfig.Consumer vespaConsumer = config.consumer(0); + ConsumersConfig.Consumer vespaConsumer = requireConsumer(config, MetricsConsumer.vespa); assertTrue(checkMetric(vespaConsumer, additionalMetric), "Did not contain additional metric"); + + ConsumersConfig.Consumer defaultConsumer = requireConsumer(config, MetricsConsumer.defaultConsumer); + assertFalse(checkMetric(defaultConsumer, additionalMetric), "Contained additional metric"); + + ConsumersConfig.Consumer vespa9Consumer = requireConsumer(config, MetricsConsumer.vespa9); + assertTrue(checkMetric(vespa9Consumer, additionalMetric), "Did not contain additional metric"); } @Test @@ -249,4 +256,11 @@ public class MetricsConsumersTest { assertTrue(checkMetric(consumer, customMetric), "Did not contain metric: " + customMetric); } + private ConsumersConfig.Consumer requireConsumer(ConsumersConfig config, MetricsConsumer consumer) { + return config.consumer() + .stream() + .filter(c -> c.name().equals(consumer.id())) + .findFirst().orElseThrow(); + } + } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationMapper.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationMapper.java index de86e9a9cdc..6b1a75f2f44 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationMapper.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationMapper.java @@ -80,7 +80,8 @@ public final class ApplicationMapper { } public List<Application> listApplications(ApplicationId applicationId) { - return requestHandlers.get(applicationId).applications(); + var applicationVersions = requestHandlers.get(applicationId); + return applicationVersions == null ? List.of() : applicationVersions.applications(); } } diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index fc4887e8a9c..95603d28ca5 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -66,7 +66,7 @@ <!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories --> <athenz.vespa.version>1.11.50</athenz.vespa.version> - <aws-sdk.vespa.version>1.12.644</aws-sdk.vespa.version> + <aws-sdk.vespa.version>1.12.646</aws-sdk.vespa.version> <!-- Athenz END --> <!-- WARNING: If you change curator version, you also need to update @@ -90,7 +90,7 @@ <commons-compress.vespa.version>1.25.0</commons-compress.vespa.version> <commons-cli.vespa.version>1.6.0</commons-cli.vespa.version> <curator.vespa.version>5.6.0</curator.vespa.version> - <dropwizard.metrics.vespa.version>4.2.24</dropwizard.metrics.vespa.version> <!-- ZK 3.9.1 requires this --> + <dropwizard.metrics.vespa.version>4.2.25</dropwizard.metrics.vespa.version> <!-- ZK 3.9.1 requires this --> <eclipse-collections.vespa.version>11.1.0</eclipse-collections.vespa.version> <eclipse-sisu.vespa.version>0.9.0.M2</eclipse-sisu.vespa.version> <failureaccess.vespa.version>1.0.2</failureaccess.vespa.version> diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 3e1743b8d45..b6cf8c6e18b 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -6,8 +6,10 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.DocumentId; import com.yahoo.document.DocumentOperation; +import com.yahoo.document.DocumentPut; import com.yahoo.document.DocumentType; import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; import com.yahoo.document.TestAndSetCondition; import com.yahoo.document.json.document.DocumentParser; import com.yahoo.document.json.readers.DocumentParseInfo; @@ -18,6 +20,9 @@ import java.io.InputStream; import java.util.Optional; import static com.yahoo.document.json.JsonReader.ReaderState.END_OF_FEED; +import static com.yahoo.document.json.document.DocumentParser.CONDITION; +import static com.yahoo.document.json.document.DocumentParser.CREATE_IF_NON_EXISTENT; +import static com.yahoo.document.json.document.DocumentParser.FIELDS; import static com.yahoo.document.json.readers.JsonParserHelpers.expectArrayStart; /** @@ -60,7 +65,7 @@ public class JsonReader { * @param docIdString document ID * @return the parsed document operation */ - public ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { + ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { DocumentId docId = new DocumentId(docIdString); DocumentParseInfo documentParseInfo; try { @@ -78,6 +83,79 @@ public class JsonReader { return operation; } + /** + * Reads a JSON which is expected to contain a single document operation, + * and where other parameters, like the document ID and operation type, are supplied by other means. + * + * @param operationType the type of operation (update or put) + * @param docIdString document ID + * @return the parsed document operation + */ + public ParsedDocumentOperation readSingleDocumentStreaming(DocumentOperationType operationType, String docIdString) { + try { + DocumentId docId = new DocumentId(docIdString); + DocumentParseInfo documentParseInfo = new DocumentParseInfo(); + documentParseInfo.documentId = docId; + documentParseInfo.operationType = operationType; + + if (JsonToken.START_OBJECT != parser.nextValue()) + throw new IllegalArgumentException("expected start of root object, got " + parser.currentToken()); + + Boolean create = null; + String condition = null; + ParsedDocumentOperation operation = null; + while (JsonToken.END_OBJECT != parser.nextValue()) { + switch (parser.getCurrentName()) { + case FIELDS -> { + documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); + VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); + operation = vespaJsonDocumentReader.createDocumentOperation( + getDocumentTypeFromString(documentParseInfo.documentId.getDocType(), typeManager), documentParseInfo); + + if ( ! documentParseInfo.fieldsBuffer.isEmpty()) + throw new IllegalArgumentException("expected all content to be consumed by document parsing, but " + + documentParseInfo.fieldsBuffer.nesting() + " levels remain"); + + } + case CONDITION -> { + if ( ! JsonToken.VALUE_STRING.equals(parser.currentToken()) && ! JsonToken.VALUE_NULL.equals(parser.currentToken())) + throw new IllegalArgumentException("expected string value for condition, got " + parser.currentToken()); + + condition = parser.getValueAsString(); + } + case CREATE_IF_NON_EXISTENT -> { + create = parser.getBooleanValue(); // Throws if not boolean. + } + default -> { + // We ignore stray fields, but need to ensure structural balance in doing do. + if (parser.currentToken().isStructStart()) parser.skipChildren(); + } + } + } + + if (null != parser.nextToken()) + throw new IllegalArgumentException("expected end of input, got " + parser.currentToken()); + + if (null == operation) + throw new IllegalArgumentException("document is missing the required \"fields\" field"); + + if (null != create) { + switch (operationType) { + case PUT -> ((DocumentPut) operation.operation()).setCreateIfNonExistent(create); + case UPDATE -> ((DocumentUpdate) operation.operation()).setCreateIfNonExistent(create); + case REMOVE -> throw new IllegalArgumentException(CREATE_IF_NON_EXISTENT + " is not supported for remove operations"); + } + } + + operation.operation().setCondition(TestAndSetCondition.fromConditionString(Optional.ofNullable(condition))); + + return operation; + } + catch (IOException e) { + throw new IllegalArgumentException("failed parsing document", e); + } + } + /** Returns the next document operation, or null if we have reached the end */ public DocumentOperation next() { switch (state) { diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java new file mode 100644 index 00000000000..0fbdd0b28c7 --- /dev/null +++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java @@ -0,0 +1,64 @@ +package com.yahoo.document.json; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +import java.io.IOException; +import java.util.function.Supplier; + +/** + * A {@link TokenBuffer} which only buffers tokens when needed, i.e., when peeking. + * + * @author jonmv + */ +public class LazyTokenBuffer extends TokenBuffer { + + private final JsonParser parser; + + public LazyTokenBuffer(JsonParser parser) { + this.parser = parser; + try { addFromParser(parser); } + catch (IOException e) { throw new IllegalArgumentException("failed parsing document JSON", e); } + if (JsonToken.START_OBJECT != current()) + throw new IllegalArgumentException("expected start of JSON object, but got " + current()); + updateNesting(current()); + } + + void advance() { + super.advance(); + if (tokens.isEmpty() && nesting() > 0) tokens.add(nextToken()); // Fill current token if needed and possible. + } + + @Override + public Supplier<Token> lookahead() { + return new Supplier<>() { + int localNesting = nesting(); + Supplier<Token> buffered = LazyTokenBuffer.super.lookahead(); + @Override public Token get() { + if (localNesting == 0) + return null; + + Token token = buffered.get(); + if (token == null) { + token = nextToken(); + tokens.add(token); + } + localNesting += nestingOffset(token.token); + return token; + } + }; + } + + private Token nextToken() { + try { + JsonToken token = parser.nextValue(); + if (token == null) + throw new IllegalStateException("no more JSON tokens"); + return new Token(token, parser.getCurrentName(), parser.getText()); + } + catch (IOException e) { + throw new IllegalArgumentException("failed reading document JSON", e); + } + } + +} diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index dec84e46b77..3a48f71c4cd 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -1,15 +1,16 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.json; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.google.common.base.Preconditions; +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; +import java.util.function.Supplier; + /** * Helper class to enable lookahead in the token stream. * @@ -17,101 +18,76 @@ import com.google.common.base.Preconditions; */ public class TokenBuffer { - private final List<Token> tokens; + final Deque<Token> tokens = new ArrayDeque<>(); - private int position = 0; private int nesting = 0; - public TokenBuffer() { - this(new ArrayList<>()); - } - - public TokenBuffer(List<Token> tokens) { - this.tokens = tokens; - if (tokens.size() > 0) - updateNesting(tokens.get(position).token); - } + public TokenBuffer() { } /** Returns whether any tokens are available in this */ - public boolean isEmpty() { return remaining() == 0; } - - public JsonToken previous() { - updateNestingGoingBackwards(current()); - position--; - return current(); - } - - /** Returns the current token without changing position, or null if none */ - public JsonToken current() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.token; - } + public boolean isEmpty() { return tokens.isEmpty(); } + /** Returns the next token, or null, and updates the nesting count of this. */ public JsonToken next() { - position++; + advance(); JsonToken token = current(); updateNesting(token); return token; } - /** Returns a given number of tokens ahead, or null if none */ - public JsonToken peek(int ahead) { - if (tokens.size() <= position + ahead) return null; - return tokens.get(position + ahead).token; + void advance() { + tokens.poll(); + } + + /** Returns the current token without changing position, or null if none */ + public JsonToken current() { + return isEmpty() ? null : tokens.peek().token; } /** Returns the current token name without changing position, or null if none */ public String currentName() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.name; + return isEmpty() ? null : tokens.peek().name; } /** Returns the current token text without changing position, or null if none */ public String currentText() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.text; + return isEmpty() ? null : tokens.peek().text; } - public int remaining() { - return tokens.size() - position; + /** + * Returns a sequence of remaining tokens in this, or nulls when none remain. + * This may fill the token buffer, but not otherwise modify it. + */ + public Supplier<Token> lookahead() { + Iterator<Token> iterator = tokens.iterator(); + if (iterator.hasNext()) iterator.next(); + return () -> iterator.hasNext() ? iterator.next() : null; } private void add(JsonToken token, String name, String text) { - tokens.add(tokens.size(), new Token(token, name, text)); + tokens.add(new Token(token, name, text)); } - public void bufferObject(JsonToken first, JsonParser tokens) { - bufferJsonStruct(first, tokens, JsonToken.START_OBJECT); + public void bufferObject(JsonParser parser) { + bufferJsonStruct(parser, JsonToken.START_OBJECT); } - private void bufferJsonStruct(JsonToken first, JsonParser tokens, JsonToken firstToken) { - int localNesting = 0; - JsonToken t = first; + private void bufferJsonStruct(JsonParser parser, JsonToken firstToken) { + JsonToken token = parser.currentToken(); + Preconditions.checkArgument(token == firstToken, + "Expected %s, got %s.", firstToken.name(), token); + updateNesting(token); - Preconditions.checkArgument(first == firstToken, - "Expected %s, got %s.", firstToken.name(), t); - if (remaining() == 0) { - updateNesting(t); + try { + for (int nesting = addFromParser(parser); nesting > 0; nesting += addFromParser(parser)) + parser.nextValue(); } - localNesting = storeAndPeekNesting(t, localNesting, tokens); - while (localNesting > 0) { - t = nextValue(tokens); - localNesting = storeAndPeekNesting(t, localNesting, tokens); + catch (IOException e) { + throw new IllegalArgumentException(e); } } - private int storeAndPeekNesting(JsonToken t, int nesting, JsonParser tokens) { - addFromParser(t, tokens); - return nesting + nestingOffset(t); - } - - private int nestingOffset(JsonToken token) { + int nestingOffset(JsonToken token) { if (token == null) return 0; if (token.isStructStart()) { return 1; @@ -122,43 +98,23 @@ public class TokenBuffer { } } - private void addFromParser(JsonToken t, JsonParser tokens) { - try { - add(t, tokens.getCurrentName(), tokens.getText()); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - - private JsonToken nextValue(JsonParser tokens) { - try { - return tokens.nextValue(); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } + int addFromParser(JsonParser tokens) throws IOException { + add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); + return nestingOffset(tokens.currentToken()); } - private void updateNesting(JsonToken token) { + void updateNesting(JsonToken token) { nesting += nestingOffset(token); } - private void updateNestingGoingBackwards(JsonToken token) { - nesting -= nestingOffset(token); - } - public int nesting() { return nesting; } public void skipToRelativeNesting(int relativeNesting) { int initialNesting = nesting(); - do { - next(); - } while ( nesting() > initialNesting + relativeNesting); - } - - public List<Token> rest() { - return tokens.subList(position, tokens.size()); + do next(); + while (nesting() > initialNesting + relativeNesting); } public static final class Token { diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index 74656762fe1..77e11dcf2a8 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -20,7 +20,7 @@ public class DocumentParser { private static final String UPDATE = "update"; private static final String PUT = "put"; private static final String ID = "id"; - private static final String CONDITION = "condition"; + public static final String CONDITION = "condition"; public static final String CREATE_IF_NON_EXISTENT = "create"; public static final String FIELDS = "fields"; public static final String REMOVE = "remove"; @@ -86,16 +86,6 @@ public class DocumentParser { private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { JsonToken currentToken = parser.getCurrentToken(); - if (currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) { - try { - if (CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { - documentParseInfo.create = Optional.ofNullable(parser.getBooleanValue()); - return; - } - } catch (IOException e) { - throw new RuntimeException("Got IO exception while parsing document", e); - } - } if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) && CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE); @@ -111,12 +101,11 @@ public class DocumentParser { } } - private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { + private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { - JsonToken currentToken = parser.getCurrentToken(); // "fields" opens a dictionary and is therefore on level two which might be surprising. - if (currentToken == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { - documentParseInfo.fieldsBuffer.bufferObject(currentToken, parser); + if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { + documentParseInfo.fieldsBuffer.bufferObject(parser); processIndent(); } } catch (IOException e) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java index 2dce07cdbe6..e859306f04d 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java +++ b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java @@ -8,6 +8,7 @@ import com.yahoo.document.json.TokenBuffer; import java.util.Optional; public class DocumentParseInfo { + public DocumentParseInfo() { } public DocumentId documentId; public Optional<Boolean> create = Optional.empty(); public Optional<String> condition = Optional.empty(); diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java index 0b7b1ae9996..1fd4029b1a5 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java @@ -4,13 +4,15 @@ package com.yahoo.document.json.readers; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.document.json.TokenBuffer; -import com.yahoo.slime.Inspector; -import com.yahoo.slime.Type; +import com.yahoo.document.json.TokenBuffer.Token; import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.TensorType.Dimension; + +import java.util.function.Supplier; import static com.yahoo.document.json.readers.JsonParserHelpers.*; import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString; @@ -37,36 +39,43 @@ public class TensorReader { Tensor.Builder builder = Tensor.Builder.of(tensorFieldValue.getDataType().getTensorType()); expectOneOf(buffer.current(), JsonToken.START_OBJECT, JsonToken.START_ARRAY); int initNesting = buffer.nesting(); - for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { - if (TENSOR_CELLS.equals(buffer.currentName()) && ! primitiveContent(buffer)) { + while (true) { + Supplier<Token> lookahead = buffer.lookahead(); + Token next = lookahead.get(); + if (TENSOR_CELLS.equals(next.name) && ! primitiveContent(next.token, lookahead.get().token)) { + buffer.next(); readTensorCells(buffer, builder); } - else if (TENSOR_VALUES.equals(buffer.currentName()) && builder.type().dimensions().stream().allMatch(d -> d.isIndexed())) { + else if (TENSOR_VALUES.equals(next.name) && builder.type().dimensions().stream().allMatch(Dimension::isIndexed)) { + buffer.next(); readTensorValues(buffer, builder); } - else if (TENSOR_BLOCKS.equals(buffer.currentName())) { + else if (TENSOR_BLOCKS.equals(next.name)) { + buffer.next(); readTensorBlocks(buffer, builder); } - else if (TENSOR_TYPE.equals(buffer.currentName()) && buffer.current() == JsonToken.VALUE_STRING) { + else if (TENSOR_TYPE.equals(next.name) && next.token == JsonToken.VALUE_STRING) { + buffer.next(); // Ignore input tensor type } + else if (buffer.nesting() == initNesting && JsonToken.END_OBJECT == next.token) { + buffer.next(); + break; + } else { - buffer.previous(); // Back up to the start of the enclosing block readDirectTensorValue(buffer, builder); - buffer.previous(); // ... and back up to the end of the enclosing block + break; } } expectOneOf(buffer.current(), JsonToken.END_OBJECT, JsonToken.END_ARRAY); tensorFieldValue.assign(builder.build()); } - static boolean primitiveContent(TokenBuffer buffer) { - JsonToken cellsValue = buffer.current(); - if (cellsValue.isScalarValue()) return true; - if (cellsValue == JsonToken.START_ARRAY) { - JsonToken firstArrayValue = buffer.peek(1); - if (firstArrayValue == JsonToken.END_ARRAY) return false; - if (firstArrayValue.isScalarValue()) return true; + static boolean primitiveContent(JsonToken current, JsonToken next) { + if (current.isScalarValue()) return true; + if (current == JsonToken.START_ARRAY) { + if (next == JsonToken.END_ARRAY) return false; + if (next.isScalarValue()) return true; } return false; } @@ -186,7 +195,7 @@ public class TensorReader { boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed); boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped); - if (isArrayOfObjects(buffer, 0)) + if (isArrayOfObjects(buffer)) readTensorCells(buffer, builder); else if ( ! hasMapped) readTensorValues(buffer, builder); @@ -196,10 +205,12 @@ public class TensorReader { readTensorCells(buffer, builder); } - private static boolean isArrayOfObjects(TokenBuffer buffer, int ahead) { - if (buffer.peek(ahead++) != JsonToken.START_ARRAY) return false; - if (buffer.peek(ahead) == JsonToken.START_ARRAY) return isArrayOfObjects(buffer, ahead); // nested array - return buffer.peek(ahead) == JsonToken.START_OBJECT; + private static boolean isArrayOfObjects(TokenBuffer buffer) { + if (buffer.current() != JsonToken.START_ARRAY) return false; + Supplier<Token> lookahead = buffer.lookahead(); + Token next; + while ((next = lookahead.get()).token == JsonToken.START_ARRAY) { } + return next.token == JsonToken.START_OBJECT; } private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java index 113b8732b23..067dabdbdab 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java @@ -230,7 +230,7 @@ public class VespaJsonDocumentReader { private static boolean isFieldPath(String field) { - return field.matches("^.*?[.\\[\\{].*$"); + return field.matches("^.*?[.\\[{].*$"); } private static void verifyEndState(TokenBuffer buffer, JsonToken expectedFinalToken) { @@ -238,7 +238,7 @@ public class VespaJsonDocumentReader { "Expected end of JSON struct (%s), got %s", expectedFinalToken, buffer.current()); Preconditions.checkState(buffer.nesting() == 0, "Nesting not zero at end of operation"); Preconditions.checkState(buffer.next() == null, "Dangling data at end of operation"); - Preconditions.checkState(buffer.remaining() == 0, "Dangling data at end of operation"); + Preconditions.checkState(buffer.isEmpty(), "Dangling data at end of operation"); } } diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java index 5a9f02c790d..aa043a25d78 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java @@ -20,6 +20,7 @@ import com.yahoo.document.MapDataType; import com.yahoo.document.PositionDataType; import com.yahoo.document.StructDataType; import com.yahoo.document.TensorDataType; +import com.yahoo.document.TestAndSetCondition; import com.yahoo.document.WeightedSetDataType; import com.yahoo.document.datatypes.Array; import com.yahoo.document.datatypes.BoolFieldValue; @@ -221,6 +222,65 @@ public class JsonReaderTestCase { } @Test + public void readDocumentWithMissingFieldsField() { + assertEquals("document is missing the required \"fields\" field", + assertThrows(IllegalArgumentException.class, + () -> createReader("{ }").readSingleDocumentStreaming(DocumentOperationType.PUT, + "id:unittest:testnull::whee")) + .getMessage()); + } + + @Test + public void readSingleDocumentsPutStreaming() throws IOException { + String json = """ + { + "remove": "id:unittest:smoke::ignored", + "ignored-extra-array": [{ "foo": null }, { }], + "ignored-extra-object": { "foo": [null, { }], "bar": { } }, + "fields": { + "something": "smoketest", + "flag": true, + "nalle": "bamse" + }, + "id": "id:unittest:smoke::ignored", + "create": false, + "condition": "true" + } + """; + ParsedDocumentOperation operation = createReader(json).readSingleDocumentStreaming(DocumentOperationType.PUT,"id:unittest:smoke::doc1"); + DocumentPut put = ((DocumentPut) operation.operation()); + assertFalse(put.getCreateIfNonExistent()); + assertEquals("true", put.getCondition().getSelection()); + smokeTestDoc(put.getDocument()); + } + + @Test + public void readSingleDocumentsUpdateStreaming() throws IOException { + String json = """ + { + "remove": "id:unittest:smoke::ignored", + "ignored-extra-array": [{ "foo": null }, { }], + "ignored-extra-object": { "foo": [null, { }], "bar": { } }, + "fields": { + "something": { "assign": "smoketest" }, + "flag": { "assign": true }, + "nalle": { "assign": "bamse" } + }, + "id": "id:unittest:smoke::ignored", + "create": true, + "condition": "false" + } + """; + ParsedDocumentOperation operation = createReader(json).readSingleDocumentStreaming(DocumentOperationType.UPDATE,"id:unittest:smoke::doc1"); + Document doc = new Document(types.getDocumentType("smoke"), new DocumentId("id:unittest:smoke::doc1")); + DocumentUpdate update = ((DocumentUpdate) operation.operation()); + update.applyTo(doc); + smokeTestDoc(doc); + assertTrue(update.getCreateIfNonExistent()); + assertEquals("false", update.getCondition().getSelection()); + } + + @Test public void readSingleDocumentPut() throws IOException { Document doc = docFromJson(""" { @@ -2120,69 +2180,93 @@ public class JsonReaderTestCase { @Test public void tensor_modify_update_with_replace_operation() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.REPLACE, "sparse_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_add_operation() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.ADD, "sparse_tensor", - inputJson("{", - " 'operation': 'add',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_multiply_operation() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.MULTIPLY, "sparse_tensor", - inputJson("{", - " 'operation': 'multiply',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "multiply", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_create_non_existing_cells_true() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.ADD, true, "sparse_tensor", - inputJson("{", - " 'operation': 'add',", - " 'create': true,", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "create": true, + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_create_non_existing_cells_false() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.ADD, false, "sparse_tensor", - inputJson("{", - " 'operation': 'add',", - " 'create': false,", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "create": false, + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_treats_the_input_tensor_as_sparse() { // Note that the type of the tensor in the modify update is sparse (it only has mapped dimensions). assertTensorModifyUpdate("tensor(x{},y{}):{{x:0,y:0}:2.0, {x:1,y:2}:3.0}", - TensorModifyUpdate.Operation.REPLACE, "dense_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '0' }, 'value': 2.0 },", - " { 'address': { 'x': '1', 'y': '2' }, 'value': 3.0 } ]}")); + TensorModifyUpdate.Operation.REPLACE, "dense_tensor", + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "0" }, "value": 2.0 }, + { "address": { "x": "1", "y": "2" }, "value": 3.0 } + ] + }"""); } @Test public void tensor_modify_update_on_non_tensor_field_throws() { try { - JsonReader reader = createReader(inputJson("{ 'update': 'id:unittest:smoke::doc1',", - " 'fields': {", - " 'something': {", - " 'modify': {} }}}")); + JsonReader reader = createReader(""" + { + "update": "id:unittest:smoke::doc1", + "fields": { + "something": { + "modify": {} + } + } + } + """); reader.readSingleDocument(DocumentOperationType.UPDATE, "id:unittest:smoke::doc1"); fail("Expected exception"); } @@ -2196,95 +2280,125 @@ public class JsonReaderTestCase { public void tensor_modify_update_on_dense_unbound_tensor_throws() { illegalTensorModifyUpdate("Error in 'dense_unbound_tensor': A modify update cannot be applied to tensor types with indexed unbound dimensions. Field 'dense_unbound_tensor' has unsupported tensor type 'tensor(x[],y[])'", "dense_unbound_tensor", - "{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '0' }, 'value': 2.0 } ]}"); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_on_sparse_tensor_with_single_dimension_short_form() { - assertTensorModifyUpdate("{{x:a}:2.0, {x:c}: 3.0}", TensorModifyUpdate.Operation.REPLACE, "sparse_single_dimension_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': {", - " 'a': 2.0,", - " 'c': 3.0 }}")); + assertTensorModifyUpdate("{{x:a}:2.0, {x:c}: 3.0}", TensorModifyUpdate.Operation.REPLACE, "sparse_single_dimension_tensor", + """ + { + "operation": "replace", + "cells": { + "a": 2.0, + "c": 3.0 + } + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed() { assertTensorModifyUpdate("{{x:a,y:0}:2.0}", TensorModifyUpdate.Operation.REPLACE, "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 } ]}")); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed_block_short_form_array() { assertTensorModifyUpdate("{{x:a,y:0}:1,{x:a,y:1}:2,{x:a,y:2}:3}", TensorModifyUpdate.Operation.REPLACE, "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'blocks': [", - " { 'address': { 'x': 'a' }, 'values': [1,2,3] } ]}")); + """ + { + "operation": "replace", + "blocks": [ + { "address": { "x": "a" }, "values": [1,2,3] } + ] + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed_block_short_form_must_specify_full_subspace() { illegalTensorModifyUpdate("Error in 'mixed_tensor': At {x:a}: Expected 3 values, but got 2", - "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'blocks': {", - " 'a': [2,3] } }")); + "mixed_tensor", + """ + { + "operation": "replace", + "blocks": { + "a": [2,3] + } + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed_block_short_form_map() { assertTensorModifyUpdate("{{x:a,y:0}:1,{x:a,y:1}:2,{x:a,y:2}:3}", TensorModifyUpdate.Operation.REPLACE, "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'blocks': {", - " 'a': [1,2,3] } }")); + """ + { + "operation": "replace", + "blocks": { + "a": [1,2,3] + } + }"""); } @Test public void tensor_modify_update_with_add_operation_mixed() { assertTensorModifyUpdate("{{x:a,y:0}:2.0}", TensorModifyUpdate.Operation.ADD, "mixed_tensor", - inputJson("{", - " 'operation': 'add',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_multiply_operation_mixed() { assertTensorModifyUpdate("{{x:a,y:0}:2.0}", TensorModifyUpdate.Operation.MULTIPLY, "mixed_tensor", - inputJson("{", - " 'operation': 'multiply',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 } ]}")); + """ + { + "operation": "multiply", + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_out_of_bound_cells_throws() { illegalTensorModifyUpdate("Error in 'dense_tensor': Dimension 'y' has label '3' but type is tensor(x[2],y[3])", "dense_tensor", - "{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '3' }, 'value': 2.0 } ]}"); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "3" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_out_of_bound_cells_throws_mixed() { illegalTensorModifyUpdate("Error in 'mixed_tensor': Dimension 'y' has label '3' but type is tensor(x{},y[3])", "mixed_tensor", - "{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '3' }, 'value': 2.0 } ]}"); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "3" }, "value": 2.0 } + ] + }"""); } @@ -2292,87 +2406,113 @@ public class JsonReaderTestCase { public void tensor_modify_update_with_unknown_operation_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Unknown operation 'unknown' in modify update for field 'sparse_tensor'", "sparse_tensor", - "{", - " 'operation': 'unknown',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}"); + """ + { + "operation": "unknown", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_without_operation_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Modify update for field 'sparse_tensor' does not contain an operation", "sparse_tensor", - "{", - " 'cells': [] }"); + """ + { + "cells": [] + }"""); } @Test public void tensor_modify_update_without_cells_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Modify update for field 'sparse_tensor' does not contain tensor cells", "sparse_tensor", - "{", - " 'operation': 'replace' }"); + """ + { + "operation": "replace" + }"""); } @Test public void tensor_modify_update_with_unknown_content_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Unknown JSON string 'unknown' in modify update for field 'sparse_tensor'", "sparse_tensor", - "{", - " 'unknown': 'here' }"); + """ + { + "unknown": "here" + }"""); } @Test public void tensor_add_update_on_sparse_tensor() { assertTensorAddUpdate("{{x:a,y:b}:2.0, {x:c,y:d}: 3.0}", "sparse_tensor", - inputJson("{", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 },", - " { 'address': { 'x': 'c', 'y': 'd' }, 'value': 3.0 } ]}")); + """ + { + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 }, + { "address": { "x": "c", "y": "d" }, "value": 3.0 } + ] + }"""); } @Test public void tensor_add_update_on_sparse_tensor_with_single_dimension_short_form() { assertTensorAddUpdate("{{x:a}:2.0, {x:c}: 3.0}", "sparse_single_dimension_tensor", - inputJson("{", - " 'cells': {", - " 'a': 2.0,", - " 'c': 3.0 }}")); + """ + { + "cells": { + "a": 2.0, + "c": 3.0 + } + }"""); } @Test public void tensor_add_update_on_mixed_tensor() { assertTensorAddUpdate("{{x:a,y:0}:2.0, {x:a,y:1}:3.0, {x:a,y:2}:0.0}", "mixed_tensor", - inputJson("{", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 },", - " { 'address': { 'x': 'a', 'y': '1' }, 'value': 3.0 } ]}")); + """ + { + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 }, + { "address": { "x": "a", "y": "1" }, "value": 3.0 } + ] + }"""); } @Test public void tensor_add_update_on_mixed_with_out_of_bound_dense_cells_throws() { illegalTensorAddUpdate("Error in 'mixed_tensor': Index 3 out of bounds for length 3", "mixed_tensor", - "{", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '3' }, 'value': 2.0 } ]}"); + """ + { + "cells": [ + { "address": { "x": "0", "y": "3" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_add_update_on_dense_tensor_throws() { illegalTensorAddUpdate("Error in 'dense_tensor': An add update can only be applied to tensors with at least one sparse dimension. Field 'dense_tensor' has unsupported tensor type 'tensor(x[2],y[3])'", "dense_tensor", - "{", - " 'cells': [] }"); + """ + { + "cells": [ ] + }"""); } @Test public void tensor_add_update_on_not_fully_specified_cell_throws() { illegalTensorAddUpdate("Error in 'sparse_tensor': Missing a label for dimension 'y' for tensor(x{},y{})", "sparse_tensor", - "{", - " 'cells': [", - " { 'address': { 'x': 'a' }, 'value': 2.0 } ]}"); + """ + { + "cells": [ + { "address": { "x": "a" }, "value": 2.0 } + ] + }"""); } @Test @@ -2388,146 +2528,176 @@ public class JsonReaderTestCase { @Test public void tensor_remove_update_on_sparse_tensor() { assertTensorRemoveUpdate("{{x:a,y:b}:1.0,{x:c,y:d}:1.0}", "sparse_tensor", - inputJson("{", - " 'addresses': [", - " { 'x': 'a', 'y': 'b' },", - " { 'x': 'c', 'y': 'd' } ]}")); + """ + { + "addresses": [ + { "x": "a", "y": "b" }, + { "x": "c", "y": "d" } + ] + }"""); } @Test public void tensor_remove_update_on_mixed_tensor() { assertTensorRemoveUpdate("{{x:1}:1.0,{x:2}:1.0}", "mixed_tensor", - inputJson("{", - " 'addresses': [", - " { 'x': '1' },", - " { 'x': '2' } ]}")); + """ + { + "addresses": [ + { "x": "1" }, + { "x": "2" } + ] + }"""); } @Test public void tensor_remove_update_on_sparse_tensor_with_not_fully_specified_address() { assertTensorRemoveUpdate("{{y:b}:1.0,{y:d}:1.0}", "sparse_tensor", - inputJson("{", - " 'addresses': [", - " { 'y': 'b' },", - " { 'y': 'd' } ]}")); + """ + { + "addresses": [ + { "y": "b" }, + { "y": "d" } + ] + }"""); } @Test public void tensor_remove_update_on_mixed_tensor_with_not_fully_specified_address() { assertTensorRemoveUpdate("{{x:1,z:a}:1.0,{x:2,z:b}:1.0}", "mixed_tensor_adv", - inputJson("{", - " 'addresses': [", - " { 'x': '1', 'z': 'a' },", - " { 'x': '2', 'z': 'b' } ]}")); + """ + { + "addresses": [ + { "x": "1", "z": "a" }, + { "x": "2", "z": "b" } + ] + }"""); } @Test public void tensor_remove_update_on_mixed_tensor_with_dense_addresses_throws() { illegalTensorRemoveUpdate("Error in 'mixed_tensor': Indexed dimension address 'y' should not be specified in remove update", "mixed_tensor", - "{", - " 'addresses': [", - " { 'x': '1', 'y': '0' },", - " { 'x': '2', 'y': '0' } ]}"); + """ + { + "addresses": [ + { "x": "1", "y": "0" }, + { "x": "2", "y": "0" } + ] + }"""); } @Test public void tensor_remove_update_on_dense_tensor_throws() { illegalTensorRemoveUpdate("Error in 'dense_tensor': A remove update can only be applied to tensors with at least one sparse dimension. Field 'dense_tensor' has unsupported tensor type 'tensor(x[2],y[3])'", "dense_tensor", - "{", - " 'addresses': [] }"); + """ + { + "addresses": [] + }"""); } @Test public void tensor_remove_update_with_stray_dimension_throws() { illegalTensorRemoveUpdate("Error in 'sparse_tensor': tensor(x{},y{}) does not contain dimension 'foo'", - "sparse_tensor", - "{", - " 'addresses': [", - " { 'x': 'a', 'foo': 'b' } ]}"); + "sparse_tensor", + """ + { + "addresses": [ + { "x": "a", "foo": "b" } + ] + }"""); illegalTensorRemoveUpdate("Error in 'sparse_tensor': tensor(x{}) does not contain dimension 'foo'", - "sparse_tensor", - "{", - " 'addresses': [", - " { 'x': 'c' },", - " { 'x': 'a', 'foo': 'b' } ]}"); + "sparse_tensor", + """ + { + "addresses": [ + { "x": "c" }, + { "x": "a", "foo": "b" } + ] + }"""); } @Test public void tensor_remove_update_without_cells_throws() { illegalTensorRemoveUpdate("Error in 'sparse_tensor': Remove update for field 'sparse_tensor' does not contain tensor addresses", "sparse_tensor", - "{'addresses': [] }"); + """ + { + "addresses": [] + }"""); illegalTensorRemoveUpdate("Error in 'mixed_tensor': Remove update for field 'mixed_tensor' does not contain tensor addresses", "mixed_tensor", - "{'addresses': [] }"); + """ + { + "addresses": [] + }"""); } @Test public void require_that_parser_propagates_datatype_parser_errors_predicate() { assertParserErrorMatches( "Error in document 'id:unittest:testpredicate::0' - could not parse field 'boolean' of type 'predicate': " + - "line 1:10 no viable alternative at character '>'", - - "[", - " {", - " 'fields': {", - " 'boolean': 'timestamp > 9000'", - " },", - " 'put': 'id:unittest:testpredicate::0'", - " }", - "]" - ); + "line 1:10 no viable alternative at character '>'", + """ + [ + { + "fields": { + "boolean": "timestamp > 9000" + }, + "put": "id:unittest:testpredicate::0" + } + ] + """); } @Test public void require_that_parser_propagates_datatype_parser_errors_string_as_int() { assertParserErrorMatches( "Error in document 'id:unittest:testint::0' - could not parse field 'integerfield' of type 'int': " + - "For input string: \" 1\"", - - "[", - " {", - " 'fields': {", - " 'integerfield': ' 1'", - " },", - " 'put': 'id:unittest:testint::0'", - " }", - "]" - ); + "For input string: \" 1\"", + """ + [ + { + "fields": { + "integerfield": " 1" + }, + "put": "id:unittest:testint::0" + } + ] + """); } @Test public void require_that_parser_propagates_datatype_parser_errors_overflowing_int() { assertParserErrorMatches( "Error in document 'id:unittest:testint::0' - could not parse field 'integerfield' of type 'int': " + - "For input string: \"281474976710656\"", - - "[", - " {", - " 'fields': {", - " 'integerfield': 281474976710656", - " },", - " 'put': 'id:unittest:testint::0'", - " }", - "]" - ); + "For input string: \"281474976710656\"", + """ + [ + { + "fields": { + "integerfield": 281474976710656 + }, + "put": "id:unittest:testint::0" + } + ] + """); } @Test public void requireThatUnknownDocTypeThrowsIllegalArgumentException() { - final String jsonData = inputJson( - "[", - " {", - " 'put': 'id:ns:walrus::walrus1',", - " 'fields': {", - " 'aField': 42", - " }", - " }", - "]"); + String jsonData = """ + [ + { + "put": "id:ns:walrus::walrus1", + "fields": { + "aField": 42 + } + } + ] + """; try { new JsonReader(types, jsonToInputStream(jsonData), parserFactory).next(); fail(); @@ -2577,30 +2747,40 @@ public class JsonReaderTestCase { return createPutWithTensor(inputTensor, "sparse_tensor"); } private DocumentPut createPutWithTensor(String inputTensor, String tensorFieldName) { - JsonReader reader = createReader(inputJson("[", - "{ 'put': '" + TENSOR_DOC_ID + "',", - " 'fields': {", - " '" + tensorFieldName + "': " + inputTensor + " }}]")); - return (DocumentPut) reader.next(); + JsonReader streaming = createReader(""" + { + "fields": { + "%s": %s + } + } + """.formatted(tensorFieldName, inputTensor)); + DocumentPut lazyParsed = (DocumentPut) streaming.readSingleDocumentStreaming(DocumentOperationType.PUT, TENSOR_DOC_ID).operation(); + JsonReader reader = createReader(""" + [ + { + "put": "%s", + "fields": { + "%s": %s + } + } + ]""".formatted(TENSOR_DOC_ID, tensorFieldName, inputTensor)); + DocumentPut bufferParsed = (DocumentPut) reader.next(); + assertEquals(lazyParsed, bufferParsed); + return bufferParsed; } private DocumentUpdate createAssignUpdateWithSparseTensor(String inputTensor) { return createAssignUpdateWithTensor(inputTensor, "sparse_tensor"); } private DocumentUpdate createAssignUpdateWithTensor(String inputTensor, String tensorFieldName) { - JsonReader reader = createReader(inputJson("[", - "{ 'update': '" + TENSOR_DOC_ID + "',", - " 'fields': {", - " '" + tensorFieldName + "': {", - " 'assign': " + (inputTensor != null ? inputTensor : "null") + " } } } ]")); - return (DocumentUpdate) reader.next(); + return createTensorUpdate("assign", inputTensor, tensorFieldName); } private static Tensor assertSparseTensorField(String expectedTensor, DocumentPut put) { return assertTensorField(expectedTensor, put, "sparse_tensor"); } private Tensor assertTensorField(String expectedTensor, String fieldName, String inputJson) { - return assertTensorField(expectedTensor, createPutWithTensor(inputJson, fieldName), fieldName); + return assertTensorField(expectedTensor, createPutWithTensor(inputJson(inputJson), fieldName), fieldName); } private static Tensor assertTensorField(String expectedTensor, DocumentPut put, String tensorFieldName) { return assertTensorField(Tensor.from(expectedTensor), put, tensorFieldName); @@ -2673,12 +2853,29 @@ public class JsonReaderTestCase { } private DocumentUpdate createTensorUpdate(String operation, String tensorJson, String tensorFieldName) { - JsonReader reader = createReader(inputJson("[", - "{ 'update': '" + TENSOR_DOC_ID + "',", - " 'fields': {", - " '" + tensorFieldName + "': {", - " '" + operation + "': " + tensorJson + " }}}]")); - return (DocumentUpdate) reader.next(); + JsonReader streaming = createReader(""" + { + "fields": { + "%s": { + "%s": %s + } + } + }""".formatted(tensorFieldName, operation, tensorJson)); + DocumentUpdate lazyParsed = (DocumentUpdate) streaming.readSingleDocumentStreaming(DocumentOperationType.UPDATE, TENSOR_DOC_ID).operation(); + JsonReader reader = createReader(""" + [ + { + "update": "%s", + "fields": { + "%s": { + "%s": %s + } + } + } + ]""".formatted(TENSOR_DOC_ID, tensorFieldName, operation, tensorJson)); + DocumentUpdate bufferParsed = (DocumentUpdate) reader.next(); + assertEquals(lazyParsed, bufferParsed); + return bufferParsed; } private void assertTensorAddUpdate(String expectedTensor, String tensorFieldName, String tensorJson) { diff --git a/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java new file mode 100644 index 00000000000..3ed2ed531c3 --- /dev/null +++ b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java @@ -0,0 +1,132 @@ +package com.yahoo.document.json; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.yahoo.document.json.TokenBuffer.Token; +import org.junit.Test; + +import java.io.IOException; +import java.util.function.Supplier; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * @author jonmv + */ +public class LazyTokenBufferTest { + + @Test + public void testBuffer() throws IOException { + String json = """ + { + "fields": { + "foo": "bar", + "baz": [1, 2, 3], + "quu": { "qux": null } + } + }"""; + JsonParser parser = new JsonFactory().createParser(json); + parser.nextValue(); + parser.nextValue(); + assertEquals(JsonToken.START_OBJECT, parser.currentToken()); + assertEquals("fields", parser.currentName()); + + // Peeking through the buffer doesn't change nesting. + LazyTokenBuffer buffer = new LazyTokenBuffer(parser); + assertEquals(JsonToken.START_OBJECT, buffer.current()); + assertEquals("fields", buffer.currentName()); + assertEquals(1, buffer.nesting()); + + Supplier<Token> lookahead = buffer.lookahead(); + Token peek = lookahead.get(); + assertEquals(JsonToken.VALUE_STRING, peek.token); + assertEquals("foo", peek.name); + assertEquals("bar", peek.text); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.START_ARRAY, peek.token); + assertEquals("baz", peek.name); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("1", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("2", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("3", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.END_ARRAY, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.START_OBJECT, peek.token); + assertEquals("quu", peek.name); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NULL, peek.token); + assertEquals("qux", peek.name); + + peek = lookahead.get(); + assertEquals(JsonToken.END_OBJECT, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.END_OBJECT, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertNull(peek); + + // Parser is now at the end. + assertEquals(JsonToken.END_OBJECT, parser.nextToken()); + assertNull(parser.nextToken()); + + // Repeat iterating through the buffer, this time advancing it, and see that nesting changes. + assertEquals(JsonToken.VALUE_STRING, buffer.next()); + assertEquals("foo", buffer.currentName()); + assertEquals("bar", buffer.currentText()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.START_ARRAY, buffer.next()); + assertEquals("baz", buffer.currentName()); + assertEquals(2, buffer.nesting()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("1", buffer.currentText()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("2", buffer.currentText()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("3", buffer.currentText()); + + assertEquals(JsonToken.END_ARRAY, buffer.next()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.START_OBJECT, buffer.next()); + assertEquals("quu", buffer.currentName()); + assertEquals(2, buffer.nesting()); + + assertEquals(JsonToken.VALUE_NULL, buffer.next()); + assertEquals("qux", buffer.currentName()); + + assertEquals(JsonToken.END_OBJECT, buffer.next()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.END_OBJECT, buffer.next()); + assertEquals(0, buffer.nesting()); + + assertNull(buffer.next()); + } + +} diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 4aac29c5093..8c6126b0897 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -360,21 +360,21 @@ public class Flags { public static final UnboundBooleanFlag MORE_WIREGUARD = defineFeatureFlag( "more-wireguard", false, - List.of("andreer"), "2023-08-21", "2024-01-24", + List.of("andreer"), "2023-08-21", "2024-02-24", "Use wireguard in INternal enCLAVES", "Takes effect on next host-admin run", HOSTNAME, CLOUD_ACCOUNT); public static final UnboundBooleanFlag IPV6_AWS_TARGET_GROUPS = defineFeatureFlag( "ipv6-aws-target-groups", false, - List.of("andreer"), "2023-08-28", "2024-01-24", + List.of("andreer"), "2023-08-28", "2024-02-24", "Always use IPv6 target groups for load balancers in aws", "Takes effect on next load-balancer provisioning", HOSTNAME, CLOUD_ACCOUNT); public static final UnboundBooleanFlag PROVISION_IPV6_ONLY_AWS = defineFeatureFlag( "provision-ipv6-only", false, - List.of("andreer"), "2023-08-28", "2024-01-24", + List.of("andreer"), "2023-08-28", "2024-02-24", "Provision without private IPv4 addresses in INternal enCLAVES in AWS", "Takes effect on next host provisioning / run of host-admin", HOSTNAME, CLOUD_ACCOUNT); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 606605ed1e4..4134ea337ab 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -34,6 +34,7 @@ public class Cluster { private final IntRange groupSize; private final boolean required; private final Autoscaling suggested; + private final List<Autoscaling> suggestions; private final Autoscaling target; private final ClusterInfo clusterInfo; private final BcpGroupInfo bcpGroupInfo; @@ -48,6 +49,7 @@ public class Cluster { IntRange groupSize, boolean required, Autoscaling suggested, + List<Autoscaling> suggestions, Autoscaling target, ClusterInfo clusterInfo, BcpGroupInfo bcpGroupInfo, @@ -59,6 +61,7 @@ public class Cluster { this.groupSize = Objects.requireNonNull(groupSize); this.required = required; this.suggested = Objects.requireNonNull(suggested); + this.suggestions = Objects.requireNonNull(suggestions); Objects.requireNonNull(target); if (target.resources().isPresent() && ! target.resources().get().isWithin(minResources, maxResources)) this.target = target.withResources(Optional.empty()); // Delete illegal target @@ -102,12 +105,21 @@ public class Cluster { */ public Autoscaling suggested() { return suggested; } + /** + * The list of suggested resources, which may or may not be within the min and max limits, + * or empty if there is currently no recorded suggestion. + * List is sorted by preference + */ + public List<Autoscaling> suggestions() { return suggestions; } + /** Returns true if there is a current suggestion and we should actually make this suggestion to users. */ public boolean shouldSuggestResources(ClusterResources currentResources) { - if (suggested.resources().isEmpty()) return false; - if (suggested.resources().get().isWithin(min, max)) return false; - if ( ! Autoscaler.worthRescaling(currentResources, suggested.resources().get())) return false; - return true; + if (suggestions.isEmpty()) return false; + return suggestions.stream().noneMatch(suggestion -> + suggestion.resources().isEmpty() + || suggestion.resources().get().isWithin(min, max) + || ! Autoscaler.worthRescaling(currentResources, suggestion.resources().get()) + ); } public ClusterInfo clusterInfo() { return clusterInfo; } @@ -131,19 +143,23 @@ public class Cluster { public Cluster withConfiguration(boolean exclusive, Capacity capacity) { return new Cluster(id, exclusive, capacity.minResources(), capacity.maxResources(), capacity.groupSize(), capacity.isRequired(), - suggested, target, capacity.clusterInfo(), bcpGroupInfo, scalingEvents); + suggested, suggestions, target, capacity.clusterInfo(), bcpGroupInfo, scalingEvents); } public Cluster withSuggested(Autoscaling suggested) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); + } + + public Cluster withSuggestions(List<Autoscaling> suggestions) { + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } public Cluster withTarget(Autoscaling target) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } public Cluster with(BcpGroupInfo bcpGroupInfo) { - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } /** Add or update (based on "at" time) a scaling event */ @@ -157,7 +173,7 @@ public class Cluster { scalingEvents.add(scalingEvent); prune(scalingEvents); - return new Cluster(id, exclusive, min, max, groupSize, required, suggested, target, clusterInfo, bcpGroupInfo, scalingEvents); + return new Cluster(id, exclusive, min, max, groupSize, required, suggested, suggestions, target, clusterInfo, bcpGroupInfo, scalingEvents); } @Override @@ -189,7 +205,7 @@ public class Cluster { public static Cluster create(ClusterSpec.Id id, boolean exclusive, Capacity requested) { return new Cluster(id, exclusive, requested.minResources(), requested.maxResources(), requested.groupSize(), requested.isRequired(), - Autoscaling.empty(), Autoscaling.empty(), requested.clusterInfo(), BcpGroupInfo.empty(), List.of()); + Autoscaling.empty(), List.of(), Autoscaling.empty(), requested.clusterInfo(), BcpGroupInfo.empty(), List.of()); } /** The predicted time it will take to rescale this cluster. */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index ff30f9d6163..ae12ca13318 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -6,7 +6,10 @@ import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.NodeRepository; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import static com.yahoo.vespa.hosted.provision.autoscale.Autoscaler.headroomRequiredToScaleDown; @@ -37,13 +40,26 @@ public class AllocationOptimizer { public Optional<AllocatableResources> findBestAllocation(Load loadAdjustment, ClusterModel model, Limits limits) { + return findBestAllocations(loadAdjustment, model, limits).stream().findFirst(); + } + + /** + * Searches the space of possible allocations given a target relative load + * and (optionally) cluster limits and returns the best alternative. + * + * @return the best allocations, if there are any possible legal allocations, fulfilling the target + * fully or partially, within the limits. The list contains the three best allocations, sorted from most to least preferred. + */ + public List<AllocatableResources> findBestAllocations(Load loadAdjustment, + ClusterModel model, + Limits limits) { if (limits.isEmpty()) limits = Limits.of(new ClusterResources(minimumNodes, 1, NodeResources.unspecified()), new ClusterResources(maximumNodes, maximumNodes, NodeResources.unspecified()), IntRange.empty()); else limits = atLeast(minimumNodes, limits).fullySpecified(model.current().clusterSpec(), nodeRepository, model.application().id()); - Optional<AllocatableResources> bestAllocation = Optional.empty(); + List<AllocatableResources> bestAllocations = new ArrayList<>(); var availableRealHostResources = nodeRepository.zone().cloud().dynamicProvisioning() ? nodeRepository.flavors().getFlavors().stream().map(flavor -> flavor.resources()).toList() : nodeRepository.nodes().list().hosts().stream().map(host -> host.flavor().resources()) @@ -65,11 +81,20 @@ public class AllocationOptimizer { model, nodeRepository); if (allocatableResources.isEmpty()) continue; - if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get(), model)) - bestAllocation = allocatableResources; + bestAllocations.add(allocatableResources.get()); } } - return bestAllocation; + return bestAllocations.stream() + .sorted((one, other) -> { + if (one.preferableTo(other, model)) + return -1; + else if (other.preferableTo(one, model)) { + return 1; + } + return 0; + }) + .limit(3) + .toList(); } /** Returns the max resources of a host one node may allocate. */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 738abddc31a..40819e709de 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling.Status; import java.time.Duration; +import java.util.List; /** * The autoscaler gives advice about what resources should be allocated to a cluster based on observed behavior. @@ -39,8 +40,14 @@ public class Autoscaler { * @param clusterNodes the list of all the active nodes in a cluster * @return scaling advice for this cluster */ - public Autoscaling suggest(Application application, Cluster cluster, NodeList clusterNodes) { - return autoscale(application, cluster, clusterNodes, Limits.empty()); + public List<Autoscaling> suggest(Application application, Cluster cluster, NodeList clusterNodes) { + var model = model(application, cluster, clusterNodes); + if (model.isEmpty() || ! model.isStable(nodeRepository)) return List.of(); + + var targets = allocationOptimizer.findBestAllocations(model.loadAdjustment(), model, Limits.empty()); + return targets.stream() + .map(target -> toAutoscaling(target, model)) + .toList(); } /** @@ -50,18 +57,8 @@ public class Autoscaler { * @return scaling advice for this cluster */ public Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes) { - return autoscale(application, cluster, clusterNodes, Limits.of(cluster)); - } - - private Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) { - var model = new ClusterModel(nodeRepository, - application, - clusterNodes.not().retired().clusterSpec(), - cluster, - clusterNodes, - new AllocatableResources(clusterNodes.not().retired(), nodeRepository), - nodeRepository.metricsDb(), - nodeRepository.clock()); + var limits = Limits.of(cluster); + var model = model(application, cluster, clusterNodes); if (model.isEmpty()) return Autoscaling.empty(); if (! limits.isEmpty() && cluster.minResources().equals(cluster.maxResources())) @@ -78,18 +75,33 @@ public class Autoscaler { if (target.isEmpty()) return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", model); - if (target.get().nodes() == 1) + return toAutoscaling(target.get(), model); + } + + private ClusterModel model(Application application, Cluster cluster, NodeList clusterNodes) { + return new ClusterModel(nodeRepository, + application, + clusterNodes.not().retired().clusterSpec(), + cluster, + clusterNodes, + new AllocatableResources(clusterNodes.not().retired(), nodeRepository), + nodeRepository.metricsDb(), + nodeRepository.clock()); + } + + private Autoscaling toAutoscaling(AllocatableResources target, ClusterModel model) { + if (target.nodes() == 1) return Autoscaling.dontScale(Status.unavailable, "Autoscaling is disabled in single node clusters", model); - if (! worthRescaling(model.current().realResources(), target.get().realResources())) { - if (target.get().fulfilment() < 0.9999999) + if (! worthRescaling(model.current().realResources(), target.realResources())) { + if (target.fulfilment() < 0.9999999) return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents ideal scaling of this cluster", model); else if ( ! model.safeToScaleDown() && model.idealLoad().any(v -> v < 1.0)) return Autoscaling.dontScale(Status.ideal, "Cooling off before considering to scale down", model); else return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled (within configured limits)", model); } - return Autoscaling.scaleTo(target.get().advertisedResources(), model); + return Autoscaling.scaleTo(target.advertisedResources(), model); } /** Returns true if it is worthwhile to make the given resource change, false if it is too insignificant */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index fd93d202795..fa1be83dbcf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import java.time.Duration; +import java.util.List; import java.util.Map; import java.util.Optional; @@ -63,13 +64,13 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { Application application = applications().get(applicationId).orElse(Application.empty(applicationId)); Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return true; - var suggestion = autoscaler.suggest(application, cluster.get(), clusterNodes); - if (suggestion.status() == Autoscaling.Status.waiting) return true; - if ( ! shouldUpdateSuggestion(cluster.get().suggested(), suggestion)) return true; + var suggestions = autoscaler.suggest(application, cluster.get(), clusterNodes); + if ( ! shouldUpdateSuggestion(cluster.get().suggestions(), suggestions)) + return true; // Wait only a short time for the lock to avoid interfering with change deployments try (Mutex lock = nodeRepository().applications().lock(applicationId, Duration.ofSeconds(1))) { - applications().get(applicationId).ifPresent(a -> updateSuggestion(suggestion, clusterId, a, lock)); + applications().get(applicationId).ifPresent(a -> updateSuggestion(suggestions, clusterId, a, lock)); return true; } catch (ApplicationLockException e) { @@ -77,19 +78,28 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { } } - private boolean shouldUpdateSuggestion(Autoscaling currentSuggestion, Autoscaling newSuggestion) { - return currentSuggestion.resources().isEmpty() - || currentSuggestion.at().isBefore(nodeRepository().clock().instant().minus(Duration.ofDays(7))) - || (newSuggestion.resources().isPresent() && isHigher(newSuggestion.resources().get(), currentSuggestion.resources().get())); + private boolean shouldUpdateSuggestion(List<Autoscaling> currentSuggestions, List<Autoscaling> newSuggestions) { + // Only compare previous best suggestion with current best suggestion + var currentSuggestion = currentSuggestions.stream().findFirst(); + var newSuggestion = newSuggestions.stream().findFirst(); + + if (currentSuggestion.isEmpty()) return true; + if (newSuggestion.isEmpty()) return false; + + return newSuggestion.get().status() != Autoscaling.Status.waiting + && (currentSuggestion.get().resources().isEmpty() + || currentSuggestion.get().at().isBefore(nodeRepository().clock().instant().minus(Duration.ofDays(7))) + || (newSuggestion.get().resources().isPresent() && isHigher(newSuggestion.get().resources().get(), currentSuggestion.get().resources().get()))); } - private void updateSuggestion(Autoscaling autoscaling, + private void updateSuggestion(List<Autoscaling> suggestions, ClusterSpec.Id clusterId, Application application, Mutex lock) { Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return; - applications().put(application.with(cluster.get().withSuggested(autoscaling)), lock); + applications().put(application.with(cluster.get().withSuggestions(suggestions) + .withSuggested(suggestions.stream().findFirst().orElse(Autoscaling.empty()))), lock); } private boolean isHigher(ClusterResources r1, ClusterResources r2) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 6f325700401..2dea70825ee 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Cursor; import com.yahoo.slime.Inspector; import com.yahoo.slime.ObjectTraverser; @@ -56,6 +57,7 @@ public class ApplicationSerializer { private static final String groupSizeKey = "groupSize"; private static final String requiredKey = "required"; private static final String suggestedKey = "suggested"; + private static final String suggestionsKey = "suggestionsKey"; private static final String clusterInfoKey = "clusterInfo"; private static final String bcpDeadlineKey = "bcpDeadline"; private static final String hostTTLKey = "hostTTL"; @@ -139,7 +141,9 @@ public class ApplicationSerializer { toSlime(cluster.maxResources(), clusterObject.setObject(maxResourcesKey)); toSlime(cluster.groupSize(), clusterObject.setObject(groupSizeKey)); clusterObject.setBool(requiredKey, cluster.required()); + // TODO(olaa): Remove 'suggested' once API clients migrate to suggestion list toSlime(cluster.suggested(), clusterObject.setObject(suggestedKey)); + toSlime(cluster.suggestions(), clusterObject.setArray(suggestionsKey)); toSlime(cluster.target(), clusterObject.setObject(targetKey)); if (! cluster.clusterInfo().isEmpty()) toSlime(cluster.clusterInfo(), clusterObject.setObject(clusterInfoKey)); @@ -156,12 +160,20 @@ public class ApplicationSerializer { intRangeFromSlime(clusterObject.field(groupSizeKey)), clusterObject.field(requiredKey).asBool(), autoscalingFromSlime(clusterObject.field(suggestedKey)), + suggestionsFromSlime(clusterObject.field(suggestionsKey)), autoscalingFromSlime(clusterObject.field(targetKey)), clusterInfoFromSlime(clusterObject.field(clusterInfoKey)), bcpGroupInfoFromSlime(clusterObject.field(bcpGroupInfoKey)), scalingEventsFromSlime(clusterObject.field(scalingEventsKey))); } + private static void toSlime(List<Autoscaling> suggestions, Cursor suggestionsArray) { + suggestions.forEach(suggestion -> { + var suggestionObject = suggestionsArray.addObject(); + toSlime(suggestion, suggestionObject); + }); + } + private static void toSlime(Autoscaling autoscaling, Cursor autoscalingObject) { autoscalingObject.setString(statusKey, toAutoscalingStatusCode(autoscaling.status())); autoscalingObject.setString(descriptionKey, autoscaling.description()); @@ -227,6 +239,13 @@ public class ApplicationSerializer { metricsObject.field(cpuCostPerQueryKey).asDouble()); } + private static List<Autoscaling> suggestionsFromSlime(Inspector suggestionsObject) { + var suggestions = new ArrayList<Autoscaling>(); + if (!suggestionsObject.valid()) return suggestions; + suggestionsObject.traverse((ArrayTraverser) (id, suggestion) -> suggestions.add(autoscalingFromSlime(suggestion))); + return suggestions; + } + private static Autoscaling autoscalingFromSlime(Inspector autoscalingObject) { if ( ! autoscalingObject.valid()) return Autoscaling.empty(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 5c788731386..0285e72a8a4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -78,13 +78,23 @@ public class ApplicationSerializer { if ( ! cluster.groupSize().isEmpty()) toSlime(cluster.groupSize(), clusterObject.setObject("groupSize")); toSlime(currentResources, clusterObject.setObject("current")); - if (cluster.shouldSuggestResources(currentResources)) + if (cluster.shouldSuggestResources(currentResources)) { toSlime(cluster.suggested(), clusterObject.setObject("suggested")); + toSlime(cluster.suggestions(), clusterObject.setArray("suggestions")); + + } toSlime(cluster.target(), clusterObject.setObject("target")); scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray("scalingEvents")); clusterObject.setLong("scalingDuration", cluster.scalingDuration(nodes.clusterSpec()).toMillis()); } + private static void toSlime(List<Autoscaling> suggestions, Cursor autoscalingArray) { + suggestions.forEach(suggestion -> { + var autoscalingObject = autoscalingArray.addObject(); + toSlime(suggestion, autoscalingObject); + }); + } + private static void toSlime(Autoscaling autoscaling, Cursor autoscalingObject) { autoscalingObject.setString("status", autoscaling.status().name()); autoscalingObject.setString("description", autoscaling.description()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index d3b88997059..e7c9d1079fb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -233,6 +233,14 @@ public class MockNodeRepository extends NodeRepository { Load.zero(), Load.zero(), Autoscaling.Metrics.zero())); + cluster1 = cluster1.withSuggestions(List.of(new Autoscaling(Autoscaling.Status.unavailable, + "", + Optional.of(new ClusterResources(6, 2, + new NodeResources(3, 20, 100, 1))), + clock().instant(), + Load.zero(), + Load.zero(), + Autoscaling.Metrics.zero()))); cluster1 = cluster1.withTarget(new Autoscaling(Autoscaling.Status.unavailable, "", Optional.of(new ClusterResources(4, 1, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 4236f7ac968..830ff170a90 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -462,12 +462,12 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(0.01, 0.01, 0.01, 0, 0), 120); - Autoscaling suggestion = fixture.suggest(); + List<Autoscaling> suggestions = fixture.suggest(); fixture.tester().assertResources("Choosing the remote disk flavor as it has less disk", 2, 1, 3.0, 100.0, 10.0, - suggestion); + suggestions); assertEquals("Choosing the remote disk flavor as it has less disk", - StorageType.remote, suggestion.resources().get().nodeResources().storageType()); + StorageType.remote, suggestions.stream().findFirst().flatMap(Autoscaling::resources).get().nodeResources().storageType()); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java index df85ca4865f..4ce909fece3 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java @@ -108,7 +108,7 @@ public class Fixture { } /** Compute an autoscaling suggestion for this. */ - public Autoscaling suggest() { + public List<Autoscaling> suggest() { return tester().suggest(applicationId, clusterSpec.id(), capacity.minResources(), capacity.maxResources()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index f8be27300fe..51297a88cad 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -78,6 +78,12 @@ public class ScalingSuggestionsMaintainerTest { assertEquals("7 nodes with [vcpu: 4.1, memory: 5.3 Gb, disk: 16.5 Gb, bandwidth: 0.1 Gbps, architecture: any]", suggestionOf(app2, cluster2, tester).resources().get().toString()); + // Secondary suggestions + assertEquals("7 nodes with [vcpu: 3.7, memory: 4.5 Gb, disk: 10.0 Gb, bandwidth: 0.1 Gbps, architecture: any]", + suggestionsOf(app1, cluster1, tester).get(1).resources().get().toString()); + assertEquals("8 nodes with [vcpu: 3.6, memory: 4.7 Gb, disk: 14.2 Gb, bandwidth: 0.1 Gbps, architecture: any]", + suggestionsOf(app2, cluster2, tester).get(1).resources().get().toString()); + // Utilization goes way down tester.clock().advance(Duration.ofHours(13)); addMeasurements(0.10f, 0.10f, 0.10f, 0, 500, app1, tester.nodeRepository()); @@ -97,7 +103,7 @@ public class ScalingSuggestionsMaintainerTest { tester.clock().advance(Duration.ofDays(3)); addMeasurements(0.7f, 0.7f, 0.7f, 0, 500, app1, tester.nodeRepository()); maintainer.maintain(); - var suggested = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().suggested().resources().get(); + var suggested = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().suggestions().stream().findFirst().flatMap(Autoscaling::resources).get(); tester.deploy(app1, cluster1, Capacity.from(suggested, suggested, IntRange.empty(), false, true, Optional.empty(), ClusterInfo.empty())); tester.clock().advance(Duration.ofDays(2)); @@ -121,7 +127,11 @@ public class ScalingSuggestionsMaintainerTest { } private Autoscaling suggestionOf(ApplicationId app, ClusterSpec cluster, ProvisioningTester tester) { - return tester.nodeRepository().applications().get(app).get().cluster(cluster.id()).get().suggested(); + return suggestionsOf(app, cluster, tester).get(0); + } + + private List<Autoscaling> suggestionsOf(ApplicationId app, ClusterSpec cluster, ProvisioningTester tester) { + return tester.nodeRepository().applications().get(app).get().cluster(cluster.id()).get().suggestions(); } private boolean shouldSuggest(ApplicationId app, ClusterSpec cluster, ProvisioningTester tester) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java index 918a9043c93..90af6dca090 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java @@ -41,6 +41,7 @@ public class ApplicationSerializerTest { IntRange.empty(), true, Autoscaling.empty(), + List.of(), Autoscaling.empty(), ClusterInfo.empty(), BcpGroupInfo.empty(), @@ -60,6 +61,14 @@ public class ApplicationSerializerTest { new Load(0.1, 0.2, 0.3, 0.4, 0.5), new Load(0.4, 0.5, 0.6, 0.7, 0.8), new Autoscaling.Metrics(0.7, 0.8, 0.9)), + List.of(new Autoscaling(Autoscaling.Status.unavailable, + "", + Optional.of(new ClusterResources(20, 10, + new NodeResources(0.5, 4, 14, 16))), + Instant.ofEpochMilli(1234L), + new Load(0.1, 0.2, 0.3, 0.4, 0.5), + new Load(0.4, 0.5, 0.6, 0.7, 0.8), + new Autoscaling.Metrics(0.7, 0.8, 0.9))), new Autoscaling(Autoscaling.Status.insufficient, "Autoscaling status", Optional.of(new ClusterResources(10, 5, @@ -98,6 +107,7 @@ public class ApplicationSerializerTest { assertEquals(originalCluster.groupSize(), serializedCluster.groupSize()); assertEquals(originalCluster.required(), serializedCluster.required()); assertEquals(originalCluster.suggested(), serializedCluster.suggested()); + assertEquals(originalCluster.suggestions(), serializedCluster.suggestions()); assertEquals(originalCluster.target(), serializedCluster.target()); assertEquals(originalCluster.clusterInfo(), serializedCluster.clusterInfo()); assertEquals(originalCluster.bcpGroupInfo(), serializedCluster.bcpGroupInfo()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java index be2b2ca896a..6b6ef49fa5d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Optional; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; /** @@ -143,6 +144,7 @@ public class DynamicProvisioningTester { cluster.groupSize(), cluster.required(), cluster.suggested(), + cluster.suggestions(), cluster.target(), cluster.clusterInfo(), cluster.bcpGroupInfo(), @@ -165,7 +167,7 @@ public class DynamicProvisioningTester { nodeRepository().nodes().list(Node.State.active).owner(applicationId)); } - public Autoscaling suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, + public List<Autoscaling> suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, ClusterResources min, ClusterResources max) { Application application = nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId)) .withCluster(clusterId, false, Capacity.from(min, max)); @@ -199,6 +201,14 @@ public class DynamicProvisioningTester { public ClusterResources assertResources(String message, int nodeCount, int groupCount, double approxCpu, double approxMemory, double approxDisk, + List<Autoscaling> autoscaling) { + assertFalse(autoscaling.isEmpty()); + return assertResources(message, nodeCount, groupCount, approxCpu, approxMemory, approxDisk, autoscaling.get(0)); + } + + public ClusterResources assertResources(String message, + int nodeCount, int groupCount, + double approxCpu, double approxMemory, double approxDisk, Autoscaling autoscaling) { assertTrue("Resources should be present: " + message + " (" + autoscaling + ": " + autoscaling.status() + ")", autoscaling.resources().isPresent()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json index d5bbc648ed8..e74e705e1aa 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json @@ -82,6 +82,45 @@ "cpuCostPerQuery" : 0.0 } }, + "suggestions": [ + { + "at": 123, + "description": "", + "ideal": { + "cpu": 0.0, + "disk": 0.0, + "gpu": 0.0, + "gpuMemory": 0.0, + "memory": 0.0 + }, + "metrics": { + "cpuCostPerQuery": 0.0, + "growthRateHeadroom": 0.0, + "queryRate": 0.0 + }, + "peak": { + "cpu": 0.0, + "disk": 0.0, + "gpu": 0.0, + "gpuMemory": 0.0, + "memory": 0.0 + }, + "resources": { + "groups": 2, + "nodes": 6, + "resources": { + "architecture": "any", + "bandwidthGbps": 1.0, + "diskGb": 100.0, + "diskSpeed": "fast", + "memoryGb": 20.0, + "storageType": "any", + "vcpu": 3.0 + } + }, + "status": "unavailable" + } + ], "target" : { "status" : "unavailable", "description" : "", diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorFunctionNode.java index 55863f51ea7..b3f2f265900 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorFunctionNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorFunctionNode.java @@ -17,7 +17,6 @@ import com.yahoo.tensor.functions.PrimitiveTensorFunction; import com.yahoo.tensor.functions.ScalarFunction; import com.yahoo.tensor.functions.TensorFunction; import com.yahoo.tensor.functions.ToStringContext; -import com.yahoo.tensor.impl.StringTensorAddress; import java.util.ArrayList; import java.util.Collections; @@ -141,7 +140,7 @@ public class TensorFunctionNode extends CompositeNode { labels[allDimensionsIndex++] = mappedDimensionLabel; } - receivingMap.put(StringTensorAddress.unsafeOf(labels), wrapScalar(node)); + receivingMap.put(TensorAddress.of(labels), wrapScalar(node)); } } diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.h b/searchlib/src/vespa/searchlib/query/streaming/hit.h index cd72555ea66..168c09a91ec 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/hit.h +++ b/searchlib/src/vespa/searchlib/query/streaming/hit.h @@ -11,18 +11,22 @@ class Hit uint32_t _field_id; uint32_t _element_id; int32_t _element_weight; + uint32_t _element_length; uint32_t _position; public: Hit(uint32_t field_id_, uint32_t element_id_, int32_t element_weight_, uint32_t position_) noexcept : _field_id(field_id_), _element_id(element_id_), _element_weight(element_weight_), + _element_length(0), _position(position_) { } uint32_t field_id() const noexcept { return _field_id; } uint32_t element_id() const { return _element_id; } int32_t element_weight() const { return _element_weight; } + uint32_t element_length() const { return _element_length; } uint32_t position() const { return _position; } + void set_element_length(uint32_t value) { _element_length = value; } }; using HitList = std::vector<Hit>; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index e0b78633af3..b7e619cfe4c 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -162,9 +162,18 @@ void QueryTerm::resizeFieldId(size_t fieldNo) } } -void QueryTerm::add(uint32_t field_id, uint32_t element_id, int32_t element_weight, uint32_t position) +uint32_t +QueryTerm::add(uint32_t field_id, uint32_t element_id, int32_t element_weight, uint32_t position) { + uint32_t idx = _hitList.size(); _hitList.emplace_back(field_id, element_id, element_weight, position); + return idx; +} + +void +QueryTerm::set_element_length(uint32_t hitlist_idx, uint32_t element_length) +{ + _hitList[hitlist_idx].set_element_length(element_length); } NearestNeighborQueryNode* diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 627fae0532d..504b94de747 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -74,7 +74,8 @@ public: /// Gives you all phrases of this tree. Indicating that they are all const. void getPhrases(ConstQueryNodeRefList & tl) const override; - void add(uint32_t field_id, uint32_t element_id, int32_t element_weight, uint32_t position); + uint32_t add(uint32_t field_id, uint32_t element_id, int32_t element_weight, uint32_t position); + void set_element_length(uint32_t hitlist_idx, uint32_t element_length); EncodingBitMap encoding() const { return _encoding; } size_t termLen() const { return getTermLen(); } const string & index() const { return _index; } diff --git a/storage/src/tests/bucketdb/bucketmanagertest.cpp b/storage/src/tests/bucketdb/bucketmanagertest.cpp index 91e901c7254..45d8fab7061 100644 --- a/storage/src/tests/bucketdb/bucketmanagertest.cpp +++ b/storage/src/tests/bucketdb/bucketmanagertest.cpp @@ -453,7 +453,8 @@ TEST_F(BucketManagerTest, metrics_are_tracked_per_bucket_space) { auto& repo = _node->getComponentRegister().getBucketSpaceRepo(); { bucketdb::StorageBucketInfo entry; - api::BucketInfo info(50, 100, 200); + // checksum, doc count, doc size, meta count, total bucket size (incl meta) + api::BucketInfo info(50, 100, 200, 101, 211); info.setReady(true); entry.setBucketInfo(info); repo.get(document::FixedBucketSpaces::default_space()).bucketDatabase() @@ -461,7 +462,7 @@ TEST_F(BucketManagerTest, metrics_are_tracked_per_bucket_space) { } { bucketdb::StorageBucketInfo entry; - api::BucketInfo info(60, 150, 300); + api::BucketInfo info(60, 150, 300, 153, 307); info.setActive(true); entry.setBucketInfo(info); repo.get(document::FixedBucketSpaces::global_space()).bucketDatabase() @@ -475,6 +476,7 @@ TEST_F(BucketManagerTest, metrics_are_tracked_per_bucket_space) { auto default_m = spaces.find(document::FixedBucketSpaces::default_space()); ASSERT_TRUE(default_m != spaces.end()); EXPECT_EQ(1, default_m->second->buckets_total.getLast()); + EXPECT_EQ(101, default_m->second->entries.getLast()); EXPECT_EQ(100, default_m->second->docs.getLast()); EXPECT_EQ(200, default_m->second->bytes.getLast()); EXPECT_EQ(0, default_m->second->active_buckets.getLast()); @@ -485,6 +487,7 @@ TEST_F(BucketManagerTest, metrics_are_tracked_per_bucket_space) { auto global_m = spaces.find(document::FixedBucketSpaces::global_space()); ASSERT_TRUE(global_m != spaces.end()); EXPECT_EQ(1, global_m->second->buckets_total.getLast()); + EXPECT_EQ(153, global_m->second->entries.getLast()); EXPECT_EQ(150, global_m->second->docs.getLast()); EXPECT_EQ(300, global_m->second->bytes.getLast()); EXPECT_EQ(1, global_m->second->active_buckets.getLast()); @@ -499,7 +502,11 @@ TEST_F(BucketManagerTest, metrics_are_tracked_per_bucket_space) { jsonStream << End(); EXPECT_EQ(std::string("{\"values\":[" "{\"name\":\"vds.datastored.bucket_space.buckets_total\",\"values\":{\"last\":1},\"dimensions\":{\"bucketSpace\":\"global\"}}," + "{\"name\":\"vds.datastored.bucket_space.entries\",\"values\":{\"last\":153},\"dimensions\":{\"bucketSpace\":\"global\"}}," + "{\"name\":\"vds.datastored.bucket_space.docs\",\"values\":{\"last\":150},\"dimensions\":{\"bucketSpace\":\"global\"}}," "{\"name\":\"vds.datastored.bucket_space.buckets_total\",\"values\":{\"last\":1},\"dimensions\":{\"bucketSpace\":\"default\"}}," + "{\"name\":\"vds.datastored.bucket_space.entries\",\"values\":{\"last\":101},\"dimensions\":{\"bucketSpace\":\"default\"}}," + "{\"name\":\"vds.datastored.bucket_space.docs\",\"values\":{\"last\":100},\"dimensions\":{\"bucketSpace\":\"default\"}}," "{\"name\":\"vds.datastored.alldisks.docs\",\"values\":{\"last\":250}}," "{\"name\":\"vds.datastored.alldisks.bytes\",\"values\":{\"last\":500}}," "{\"name\":\"vds.datastored.alldisks.buckets\",\"values\":{\"last\":2}}" diff --git a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp index d12a9f72ac1..5337be6d79f 100644 --- a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp +++ b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp @@ -148,12 +148,13 @@ DistributorInfoGatherer::operator()(uint64_t bucketId, const StorBucketDatabase: struct MetricsUpdater { struct Count { uint64_t docs; + uint64_t entries; // docs + tombstones uint64_t bytes; uint64_t buckets; uint64_t active; uint64_t ready; - constexpr Count() noexcept : docs(0), bytes(0), buckets(0), active(0), ready(0) {} + constexpr Count() noexcept : docs(0), entries(0), bytes(0), buckets(0), active(0), ready(0) {} }; Count count; uint32_t lowestUsedBit; @@ -174,8 +175,9 @@ struct MetricsUpdater { if (data.getBucketInfo().isReady()) { ++count.ready; } - count.docs += data.getBucketInfo().getDocumentCount(); - count.bytes += data.getBucketInfo().getTotalDocumentSize(); + count.docs += data.getBucketInfo().getDocumentCount(); + count.entries += data.getBucketInfo().getMetaCount(); + count.bytes += data.getBucketInfo().getTotalDocumentSize(); if (bucket.getUsedBits() < lowestUsedBit) { lowestUsedBit = bucket.getUsedBits(); @@ -188,6 +190,7 @@ struct MetricsUpdater { const auto& s = rhs.count; d.buckets += s.buckets; d.docs += s.docs; + d.entries += s.entries; d.bytes += s.bytes; d.ready += s.ready; d.active += s.active; @@ -234,11 +237,15 @@ BucketManager::report(vespalib::JsonStream & json) const { MetricsUpdater m = getMetrics(space.second->bucketDatabase()); output(json, "vds.datastored.bucket_space.buckets_total", m.count.buckets, document::FixedBucketSpaces::to_string(space.first)); + output(json, "vds.datastored.bucket_space.entries", m.count.entries, + document::FixedBucketSpaces::to_string(space.first)); + output(json, "vds.datastored.bucket_space.docs", m.count.docs, + document::FixedBucketSpaces::to_string(space.first)); total.add(m); } const auto & src = total.count; - output(json, "vds.datastored.alldisks.docs", src.docs); - output(json, "vds.datastored.alldisks.bytes", src.bytes); + output(json, "vds.datastored.alldisks.docs", src.docs); + output(json, "vds.datastored.alldisks.bytes", src.bytes); output(json, "vds.datastored.alldisks.buckets", src.buckets); } @@ -258,6 +265,7 @@ BucketManager::updateMetrics() const auto bm = _metrics->bucket_spaces.find(space.first); assert(bm != _metrics->bucket_spaces.end()); bm->second->buckets_total.set(m.count.buckets); + bm->second->entries.set(m.count.entries); bm->second->docs.set(m.count.docs); bm->second->bytes.set(m.count.bytes); bm->second->active_buckets.set(m.count.active); diff --git a/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.cpp b/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.cpp index ca9e556f83c..d2b019cc50d 100644 --- a/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.cpp +++ b/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.cpp @@ -31,6 +31,7 @@ ContentBucketDbMetrics::~ContentBucketDbMetrics() = default; BucketSpaceMetrics::BucketSpaceMetrics(const vespalib::string& space_name, metrics::MetricSet* owner) : metrics::MetricSet("bucket_space", {{"bucketSpace", space_name}}, "", owner), buckets_total("buckets_total", {}, "Total number buckets present in the bucket space (ready + not ready)", this), + entries("entries", {}, "Number of entries (documents + tombstones) stored in the bucket space", this), docs("docs", {}, "Documents stored in the bucket space", this), bytes("bytes", {}, "Bytes stored across all documents in the bucket space", this), active_buckets("active_buckets", {}, "Number of active buckets in the bucket space", this), diff --git a/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.h b/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.h index a73bb676526..cab3a397c54 100644 --- a/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.h +++ b/storage/src/vespa/storage/bucketdb/bucketmanagermetrics.h @@ -34,6 +34,7 @@ struct ContentBucketDbMetrics : metrics::MetricSet { struct BucketSpaceMetrics : metrics::MetricSet { // Superficially very similar to DataStoredMetrics, but metric naming and dimensions differ metrics::LongValueMetric buckets_total; + metrics::LongValueMetric entries; metrics::LongValueMetric docs; metrics::LongValueMetric bytes; metrics::LongValueMetric active_buckets; diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp index eb233db9632..daa26b855e8 100644 --- a/streamingvisitors/src/tests/searcher/searcher_test.cpp +++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp @@ -22,6 +22,7 @@ #include <concepts> #include <charconv> #include <stdexcept> +#include <utility> using namespace document; using search::streaming::HitList; @@ -43,7 +44,7 @@ public: Vector<T> & add(T v) { this->push_back(v); return *this; } }; -using Hits = Vector<size_t>; +using Hits = Vector<std::pair<uint32_t, uint32_t>>; using StringList = Vector<std::string> ; using HitsList = Vector<Hits>; using BoolList = Vector<bool>; @@ -365,7 +366,7 @@ assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & f { HitsList hl; for (bool v : exp) { - hl.push_back(v ? Hits().add(0) : Hits()); + hl.push_back(v ? Hits().add({0, 0}) : Hits()); } assertSearch(fs, query, fv, hl); } @@ -401,7 +402,8 @@ assertSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv ASSERT_TRUE(hl.size() == exp[i].size()); for (size_t j = 0; j < hl.size(); ++j) { EXPECT_EQUAL(0u, hl[j].field_id()); - EXPECT_EQUAL((size_t)hl[j].position(), exp[i][j]); + EXPECT_EQUAL((size_t)hl[j].element_id(), exp[i][j].first); + EXPECT_EQUAL((size_t)hl[j].position(), exp[i][j].second); } } } @@ -466,9 +468,9 @@ bool assertCountWords(size_t numWords, const std::string & field) bool testStringFieldInfo(StrChrFieldSearcher & fs) { - assertString(fs, "foo", StringList().add("foo bar baz").add("foo bar").add("baz foo"), Hits().add(0).add(3).add(6)); + assertString(fs, "foo", StringList().add("foo bar baz").add("foo bar").add("baz foo"), Hits().add({0, 0}).add({1, 0}).add({2, 1})); assertString(fs, StringList().add("foo").add("bar"), StringList().add("foo bar baz").add("foo bar").add("baz foo"), - HitsList().add(Hits().add(0).add(3).add(6)).add(Hits().add(1).add(4))); + HitsList().add(Hits().add({0, 0}).add({1, 0}).add({2, 1})).add(Hits().add({0, 1}).add({1, 1}))); bool retval = true; if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo", QTFieldInfo(0, 1, 1)))) retval = false; @@ -497,22 +499,22 @@ testStrChrFieldSearcher(StrChrFieldSearcher & fs) std::string field = "operators and operator overloading with utf8 char oe = \xc3\x98"; assertString(fs, "oper", field, Hits()); assertString(fs, "tor", field, Hits()); - assertString(fs, "oper*", field, Hits().add(0).add(2)); - assertString(fs, "and", field, Hits().add(1)); + assertString(fs, "oper*", field, Hits().add({0, 0}).add({0, 2})); + assertString(fs, "and", field, Hits().add({0, 1})); assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits()).add(Hits())); - assertString(fs, StringList().add("and").add("overloading"), field, HitsList().add(Hits().add(1)).add(Hits().add(3))); + assertString(fs, StringList().add("and").add("overloading"), field, HitsList().add(Hits().add({0, 1})).add(Hits().add({0, 3}))); fs.match_type(FieldSearcher::PREFIX); - assertString(fs, "oper", field, Hits().add(0).add(2)); - assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits().add(0).add(2)).add(Hits())); + assertString(fs, "oper", field, Hits().add({0, 0}).add({0, 2})); + assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits().add({0, 0}).add({0, 2})).add(Hits())); fs.match_type(FieldSearcher::REGULAR); if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false; { // test handling of several underscores StringList query = StringList().add("foo").add("bar"); - HitsList exp = HitsList().add(Hits().add(0)).add(Hits().add(1)); + HitsList exp = HitsList().add(Hits().add({0, 0})).add(Hits().add({0, 1})); assertString(fs, query, "foo_bar", exp); assertString(fs, query, "foo__bar", exp); assertString(fs, query, "foo___bar", exp); @@ -522,9 +524,9 @@ testStrChrFieldSearcher(StrChrFieldSearcher & fs) query = StringList().add("foo").add("thisisaveryveryverylongword"); assertString(fs, query, "foo____________________thisisaveryveryverylongword", exp); - assertString(fs, "bar", "foo bar", Hits().add(1)); - assertString(fs, "bar", "foo____________________bar", Hits().add(1)); - assertString(fs, "bar", "foo____________________thisisaveryveryverylongword____________________bar", Hits().add(2)); + assertString(fs, "bar", "foo bar", Hits().add({0, 1})); + assertString(fs, "bar", "foo____________________bar", Hits().add({0, 1})); + assertString(fs, "bar", "foo____________________thisisaveryveryverylongword____________________bar", Hits().add({0, 2})); } return true; } @@ -594,16 +596,16 @@ testUTF8SubStringFieldSearcher(StrChrFieldSearcher & fs) { std::string field = "operators and operator overloading"; assertString(fs, "rsand", field, Hits()); - assertString(fs, "ove", field, Hits().add(3)); - assertString(fs, "ing", field, Hits().add(3)); - assertString(fs, "era", field, Hits().add(0).add(2)); - assertString(fs, "a", field, Hits().add(0).add(1).add(2).add(3)); + assertString(fs, "ove", field, Hits().add({0, 3})); + assertString(fs, "ing", field, Hits().add({0, 3})); + assertString(fs, "era", field, Hits().add({0, 0}).add({0, 2})); + assertString(fs, "a", field, Hits().add({0, 0}).add({0, 1}).add({0, 2}).add({0, 3})); assertString(fs, StringList().add("dn").add("gn"), field, HitsList().add(Hits()).add(Hits())); - assertString(fs, StringList().add("ato").add("load"), field, HitsList().add(Hits().add(0).add(2)).add(Hits().add(3))); + assertString(fs, StringList().add("ato").add("load"), field, HitsList().add(Hits().add({0, 0}).add({0, 2})).add(Hits().add({0, 3}))); assertString(fs, StringList().add("aa").add("ab"), "aaaab", - HitsList().add(Hits().add(0).add(0).add(0)).add(Hits().add(0))); + HitsList().add(Hits().add({0, 0}).add({0, 0}).add({0, 0})).add(Hits().add({0, 0}))); if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false; return true; @@ -613,20 +615,20 @@ TEST("utf8 substring search") { { UTF8SubStringFieldSearcher fs(0); EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); - assertString(fs, "aa", "aaaa", Hits().add(0).add(0)); + assertString(fs, "aa", "aaaa", Hits().add({0, 0}).add({0, 0})); } { UTF8SubStringFieldSearcher fs(0); EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); - assertString(fs, "abc", "abc bcd abc", Hits().add(0).add(2)); + assertString(fs, "abc", "abc bcd abc", Hits().add({0, 0}).add({0, 2})); fs.maxFieldLength(4); - assertString(fs, "abc", "abc bcd abc", Hits().add(0)); + assertString(fs, "abc", "abc bcd abc", Hits().add({0, 0})); } { UTF8SubstringSnippetModifier fs(0); EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); // we don't have 1 term optimization - assertString(fs, "aa", "aaaa", Hits().add(0).add(0).add(0)); + assertString(fs, "aa", "aaaa", Hits().add({0, 0}).add({0, 0}).add({0, 0})); } } @@ -642,11 +644,11 @@ TEST("utf8 suffix search") { UTF8SuffixStringFieldSearcher fs(0); std::string field = "operators and operator overloading"; TEST_DO(assertString(fs, "rsand", field, Hits())); - TEST_DO(assertString(fs, "tor", field, Hits().add(2))); - TEST_DO(assertString(fs, "tors", field, Hits().add(0))); + TEST_DO(assertString(fs, "tor", field, Hits().add({0, 2}))); + TEST_DO(assertString(fs, "tors", field, Hits().add({0, 0}))); TEST_DO(assertString(fs, StringList().add("an").add("din"), field, HitsList().add(Hits()).add(Hits()))); - TEST_DO(assertString(fs, StringList().add("nd").add("g"), field, HitsList().add(Hits().add(1)).add(Hits().add(3)))); + TEST_DO(assertString(fs, StringList().add("nd").add("g"), field, HitsList().add(Hits().add({0, 1})).add(Hits().add({0, 3})))); EXPECT_TRUE(testStringFieldInfo(fs)); } @@ -654,14 +656,14 @@ TEST("utf8 suffix search") { TEST("utf8 exact match") { UTF8ExactStringFieldSearcher fs(0); // regular - TEST_DO(assertString(fs, "vespa", "vespa", Hits().add(0))); + TEST_DO(assertString(fs, "vespa", "vespa", Hits().add({0, 0}))); TEST_DO(assertString(fs, "vespar", "vespa", Hits())); TEST_DO(assertString(fs, "vespa", "vespar", Hits())); TEST_DO(assertString(fs, "vespa", "vespa vespa", Hits())); TEST_DO(assertString(fs, "vesp", "vespa", Hits())); - TEST_DO(assertString(fs, "vesp*", "vespa", Hits().add(0))); - TEST_DO(assertString(fs, "hutte", "hutte", Hits().add(0))); - TEST_DO(assertString(fs, "hütte", "hütte", Hits().add(0))); + TEST_DO(assertString(fs, "vesp*", "vespa", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "hutte", "hutte", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "hütte", "hütte", Hits().add({0, 0}))); TEST_DO(assertString(fs, "hutte", "hütte", Hits())); TEST_DO(assertString(fs, "hütte", "hutte", Hits())); TEST_DO(assertString(fs, "hütter", "hütte", Hits())); @@ -671,27 +673,27 @@ TEST("utf8 exact match") { TEST("utf8 flexible searcher (except regex)"){ UTF8FlexibleStringFieldSearcher fs(0); // regular - assertString(fs, "vespa", "vespa", Hits().add(0)); + assertString(fs, "vespa", "vespa", Hits().add({0, 0})); assertString(fs, "vesp", "vespa", Hits()); assertString(fs, "esp", "vespa", Hits()); assertString(fs, "espa", "vespa", Hits()); // prefix - assertString(fs, "vesp*", "vespa", Hits().add(0)); + assertString(fs, "vesp*", "vespa", Hits().add({0, 0})); fs.match_type(FieldSearcher::PREFIX); - assertString(fs, "vesp", "vespa", Hits().add(0)); + assertString(fs, "vesp", "vespa", Hits().add({0, 0})); // substring fs.match_type(FieldSearcher::REGULAR); - assertString(fs, "*esp*", "vespa", Hits().add(0)); + assertString(fs, "*esp*", "vespa", Hits().add({0, 0})); fs.match_type(FieldSearcher::SUBSTRING); - assertString(fs, "esp", "vespa", Hits().add(0)); + assertString(fs, "esp", "vespa", Hits().add({0, 0})); // suffix fs.match_type(FieldSearcher::REGULAR); - assertString(fs, "*espa", "vespa", Hits().add(0)); + assertString(fs, "*espa", "vespa", Hits().add({0, 0})); fs.match_type(FieldSearcher::SUFFIX); - assertString(fs, "espa", "vespa", Hits().add(0)); + assertString(fs, "espa", "vespa", Hits().add({0, 0})); fs.match_type(FieldSearcher::REGULAR); EXPECT_TRUE(testStringFieldInfo(fs)); @@ -700,11 +702,11 @@ TEST("utf8 flexible searcher (except regex)"){ TEST("utf8 flexible searcher handles regex and by default has case-insensitive partial match semantics") { UTF8FlexibleStringFieldSearcher fs(0); // Note: the # term prefix is a magic term-as-regex symbol used only for tests in this file - TEST_DO(assertString(fs, "#abc", "ABC", Hits().add(0))); - TEST_DO(assertString(fs, "#bc", "ABC", Hits().add(0))); - TEST_DO(assertString(fs, "#ab", "ABC", Hits().add(0))); - TEST_DO(assertString(fs, "#[a-z]", "ABC", Hits().add(0))); - TEST_DO(assertString(fs, "#(zoid)(berg)", "why not zoidberg?", Hits().add(0))); + TEST_DO(assertString(fs, "#abc", "ABC", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "#bc", "ABC", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "#ab", "ABC", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "#[a-z]", "ABC", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "#(zoid)(berg)", "why not zoidberg?", Hits().add({0, 0}))); TEST_DO(assertString(fs, "#[a-z]", "123", Hits())); } @@ -712,19 +714,19 @@ TEST("utf8 flexible searcher handles case-sensitive regex matching") { UTF8FlexibleStringFieldSearcher fs(0); fs.normalize_mode(Normalizing::NONE); TEST_DO(assertString(fs, "#abc", "ABC", Hits())); - TEST_DO(assertString(fs, "#abc", "abc", Hits().add(0))); - TEST_DO(assertString(fs, "#[A-Z]", "A", Hits().add(0))); - TEST_DO(assertString(fs, "#[A-Z]", "ABC", Hits().add(0))); + TEST_DO(assertString(fs, "#abc", "abc", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "#[A-Z]", "A", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "#[A-Z]", "ABC", Hits().add({0, 0}))); TEST_DO(assertString(fs, "#[A-Z]", "abc", Hits())); } TEST("utf8 flexible searcher handles regexes with explicit anchoring") { UTF8FlexibleStringFieldSearcher fs(0); - TEST_DO(assertString(fs, "#^foo", "food", Hits().add(0))); + TEST_DO(assertString(fs, "#^foo", "food", Hits().add({0, 0}))); TEST_DO(assertString(fs, "#^foo", "afoo", Hits())); - TEST_DO(assertString(fs, "#foo$", "afoo", Hits().add(0))); + TEST_DO(assertString(fs, "#foo$", "afoo", Hits().add({0, 0}))); TEST_DO(assertString(fs, "#foo$", "food", Hits())); - TEST_DO(assertString(fs, "#^foo$", "foo", Hits().add(0))); + TEST_DO(assertString(fs, "#^foo$", "foo", Hits().add({0, 0}))); TEST_DO(assertString(fs, "#^foo$", "food", Hits())); TEST_DO(assertString(fs, "#^foo$", "oo", Hits())); } @@ -744,29 +746,29 @@ TEST("utf8 flexible searcher handles fuzzy search in uncased mode") { // %{k,p}term => fuzzy match "term" with max edits k, prefix lock length p // DFA is used for k in {1, 2} - TEST_DO(assertString(fs, "%{1}abc", "abc", Hits().add(0))); - TEST_DO(assertString(fs, "%{1}ABC", "abc", Hits().add(0))); - TEST_DO(assertString(fs, "%{1}abc", "ABC", Hits().add(0))); - TEST_DO(assertString(fs, "%{1}Abc", "abd", Hits().add(0))); - TEST_DO(assertString(fs, "%{1}abc", "ABCD", Hits().add(0))); + TEST_DO(assertString(fs, "%{1}abc", "abc", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1}ABC", "abc", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1}abc", "ABC", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1}Abc", "abd", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1}abc", "ABCD", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{1}abc", "abcde", Hits())); - TEST_DO(assertString(fs, "%{2}abc", "abcde", Hits().add(0))); + TEST_DO(assertString(fs, "%{2}abc", "abcde", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{2}abc", "xabcde", Hits())); // Fallback to non-DFA matcher when k not in {1, 2} - TEST_DO(assertString(fs, "%{3}abc", "abc", Hits().add(0))); - TEST_DO(assertString(fs, "%{3}abc", "XYZ", Hits().add(0))); + TEST_DO(assertString(fs, "%{3}abc", "abc", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{3}abc", "XYZ", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{3}abc", "XYZ!", Hits())); } TEST("utf8 flexible searcher handles fuzzy search in cased mode") { UTF8FlexibleStringFieldSearcher fs(0); fs.normalize_mode(Normalizing::NONE); - TEST_DO(assertString(fs, "%{1}abc", "abc", Hits().add(0))); - TEST_DO(assertString(fs, "%{1}abc", "Abc", Hits().add(0))); + TEST_DO(assertString(fs, "%{1}abc", "abc", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1}abc", "Abc", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{1}ABC", "abc", Hits())); - TEST_DO(assertString(fs, "%{2}Abc", "abc", Hits().add(0))); - TEST_DO(assertString(fs, "%{2}abc", "AbC", Hits().add(0))); - TEST_DO(assertString(fs, "%{3}abc", "ABC", Hits().add(0))); + TEST_DO(assertString(fs, "%{2}Abc", "abc", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{2}abc", "AbC", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{3}abc", "ABC", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{3}abc", "ABCD", Hits())); } @@ -774,56 +776,56 @@ TEST("utf8 flexible searcher handles fuzzy search with prefix locking") { UTF8FlexibleStringFieldSearcher fs(0); // DFA TEST_DO(assertString(fs, "%{1,4}zoid", "zoi", Hits())); - TEST_DO(assertString(fs, "%{1,4}zoid", "zoid", Hits().add(0))); - TEST_DO(assertString(fs, "%{1,4}zoid", "ZOID", Hits().add(0))); + TEST_DO(assertString(fs, "%{1,4}zoid", "zoid", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1,4}zoid", "ZOID", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{1,4}zoidberg", "zoid", Hits())); - TEST_DO(assertString(fs, "%{1,4}zoidberg", "ZoidBerg", Hits().add(0))); - TEST_DO(assertString(fs, "%{1,4}zoidberg", "ZoidBergg", Hits().add(0))); - TEST_DO(assertString(fs, "%{1,4}zoidberg", "zoidborg", Hits().add(0))); + TEST_DO(assertString(fs, "%{1,4}zoidberg", "ZoidBerg", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1,4}zoidberg", "ZoidBergg", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1,4}zoidberg", "zoidborg", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{1,4}zoidberg", "zoidblergh", Hits())); - TEST_DO(assertString(fs, "%{2,4}zoidberg", "zoidblergh", Hits().add(0))); + TEST_DO(assertString(fs, "%{2,4}zoidberg", "zoidblergh", Hits().add({0, 0}))); // Fallback - TEST_DO(assertString(fs, "%{3,4}zoidberg", "zoidblergh", Hits().add(0))); - TEST_DO(assertString(fs, "%{3,4}zoidberg", "zoidbooorg", Hits().add(0))); + TEST_DO(assertString(fs, "%{3,4}zoidberg", "zoidblergh", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{3,4}zoidberg", "zoidbooorg", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{3,4}zoidberg", "zoidzooorg", Hits())); fs.normalize_mode(Normalizing::NONE); // DFA TEST_DO(assertString(fs, "%{1,4}zoid", "ZOID", Hits())); TEST_DO(assertString(fs, "%{1,4}ZOID", "zoid", Hits())); - TEST_DO(assertString(fs, "%{1,4}zoidberg", "zoidBerg", Hits().add(0))); // 1 edit + TEST_DO(assertString(fs, "%{1,4}zoidberg", "zoidBerg", Hits().add({0, 0}))); // 1 edit TEST_DO(assertString(fs, "%{1,4}zoidberg", "zoidBblerg", Hits())); // 2 edits, 1 max - TEST_DO(assertString(fs, "%{2,4}zoidberg", "zoidBblerg", Hits().add(0))); // 2 edits, 2 max + TEST_DO(assertString(fs, "%{2,4}zoidberg", "zoidBblerg", Hits().add({0, 0}))); // 2 edits, 2 max // Fallback TEST_DO(assertString(fs, "%{3,4}zoidberg", "zoidBERG", Hits())); // 4 edits, 3 max - TEST_DO(assertString(fs, "%{4,4}zoidberg", "zoidBERG", Hits().add(0))); // 4 edits, 4 max + TEST_DO(assertString(fs, "%{4,4}zoidberg", "zoidBERG", Hits().add({0, 0}))); // 4 edits, 4 max } TEST("utf8 flexible searcher fuzzy match with max_edits=0 implies exact match") { UTF8FlexibleStringFieldSearcher fs(0); TEST_DO(assertString(fs, "%{0}zoid", "zoi", Hits())); TEST_DO(assertString(fs, "%{0,4}zoid", "zoi", Hits())); - TEST_DO(assertString(fs, "%{0}zoid", "zoid", Hits().add(0))); - TEST_DO(assertString(fs, "%{0}zoid", "ZOID", Hits().add(0))); - TEST_DO(assertString(fs, "%{0,4}zoid", "ZOID", Hits().add(0))); + TEST_DO(assertString(fs, "%{0}zoid", "zoid", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{0}zoid", "ZOID", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{0,4}zoid", "ZOID", Hits().add({0, 0}))); fs.normalize_mode(Normalizing::NONE); TEST_DO(assertString(fs, "%{0}zoid", "ZOID", Hits())); TEST_DO(assertString(fs, "%{0,4}zoid", "ZOID", Hits())); - TEST_DO(assertString(fs, "%{0}zoid", "zoid", Hits().add(0))); - TEST_DO(assertString(fs, "%{0,4}zoid", "zoid", Hits().add(0))); + TEST_DO(assertString(fs, "%{0}zoid", "zoid", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{0,4}zoid", "zoid", Hits().add({0, 0}))); } TEST("utf8 flexible searcher caps oversized fuzzy prefix length to term length") { UTF8FlexibleStringFieldSearcher fs(0); // DFA - TEST_DO(assertString(fs, "%{1,5}zoid", "zoid", Hits().add(0))); - TEST_DO(assertString(fs, "%{1,9001}zoid", "zoid", Hits().add(0))); + TEST_DO(assertString(fs, "%{1,5}zoid", "zoid", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{1,9001}zoid", "zoid", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{1,9001}zoid", "boid", Hits())); // Fallback - TEST_DO(assertString(fs, "%{0,5}zoid", "zoid", Hits().add(0))); - TEST_DO(assertString(fs, "%{5,5}zoid", "zoid", Hits().add(0))); - TEST_DO(assertString(fs, "%{0,9001}zoid", "zoid", Hits().add(0))); - TEST_DO(assertString(fs, "%{5,9001}zoid", "zoid", Hits().add(0))); + TEST_DO(assertString(fs, "%{0,5}zoid", "zoid", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{5,5}zoid", "zoid", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{0,9001}zoid", "zoid", Hits().add({0, 0}))); + TEST_DO(assertString(fs, "%{5,9001}zoid", "zoid", Hits().add({0, 0}))); TEST_DO(assertString(fs, "%{5,9001}zoid", "boid", Hits())); } @@ -873,9 +875,9 @@ TEST("integer search") TEST_DO(assertInt(fs, StringList().add("9").add("10"), 10, BoolList().add(false).add(true))); TEST_DO(assertInt(fs, StringList().add("10").add(">9"), 10, BoolList().add(true).add(true))); - TEST_DO(assertInt(fs, "10", LongList().add(10).add(20).add(10).add(30), Hits().add(0).add(2))); + TEST_DO(assertInt(fs, "10", LongList().add(10).add(20).add(10).add(30), Hits().add({0, 0}).add({2, 0}))); TEST_DO(assertInt(fs, StringList().add("10").add("20"), LongList().add(10).add(20).add(10).add(30), - HitsList().add(Hits().add(0).add(2)).add(Hits().add(1)))); + HitsList().add(Hits().add({0, 0}).add({2, 0})).add(Hits().add({1, 0})))); TEST_DO(assertFieldInfo(fs, "10", 10, QTFieldInfo(0, 1, 1))); TEST_DO(assertFieldInfo(fs, "10", LongList().add(10).add(20).add(10).add(30), QTFieldInfo(0, 2, 4))); @@ -908,9 +910,9 @@ TEST("floating point search") TEST_DO(assertFloat(fs, StringList().add("10").add("10.5"), 10.5, BoolList().add(false).add(true))); TEST_DO(assertFloat(fs, StringList().add(">10.4").add("10.5"), 10.5, BoolList().add(true).add(true))); - TEST_DO(assertFloat(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), Hits().add(0).add(2))); + TEST_DO(assertFloat(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), Hits().add({0, 0}).add({2, 0}))); TEST_DO(assertFloat(fs, StringList().add("10.5").add("20.5"), FloatList().add(10.5).add(20.5).add(10.5).add(30.5), - HitsList().add(Hits().add(0).add(2)).add(Hits().add(1)))); + HitsList().add(Hits().add({0, 0}).add({2, 0})).add(Hits().add({1, 0})))); TEST_DO(assertFieldInfo(fs, "10.5", 10.5, QTFieldInfo(0, 1, 1))); TEST_DO(assertFieldInfo(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), QTFieldInfo(0, 2, 4))); @@ -1106,8 +1108,23 @@ TEST("counting of words") { // check that 'a' is counted as 1 word UTF8StrChrFieldSearcher fs(0); StringList field = StringList().add("a").add("aa bb cc"); - assertString(fs, "bb", field, Hits().add(2)); - assertString(fs, StringList().add("bb").add("not"), field, HitsList().add(Hits().add(2)).add(Hits())); + assertString(fs, "bb", field, Hits().add({1, 1})); + assertString(fs, StringList().add("bb").add("not"), field, HitsList().add(Hits().add({1, 1})).add(Hits())); +} + +TEST("element lengths") +{ + UTF8StrChrFieldSearcher fs(0); + auto field = StringList().add("a").add("b a c").add("d a"); + auto query = StringList().add("a"); + auto qtv = performSearch(fs, query, getFieldValue(field)); + EXPECT_EQUAL(1u, qtv.size()); + auto& qt = *qtv[0]; + auto& hl = qt.getHitList(); + EXPECT_EQUAL(3u, hl.size()); + EXPECT_EQUAL(1u, hl[0].element_length()); + EXPECT_EQUAL(3u, hl[1].element_length()); + EXPECT_EQUAL(2u, hl[2].element_length()); } vespalib::string NormalizationInput = "test That Somehing happens with during NårmØlization"; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp index cdaf14eef9b..eebd9a79c07 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp @@ -45,10 +45,15 @@ RankManager::Snapshot::addProperties(const vespa::config::search::RankProfilesCo FieldInfo::DataType to_data_type(VsmfieldsConfig::Fieldspec::Searchmethod search_method) { - if (search_method == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR) { + // detecting DataType from Searchmethod will not give correct results, + // we should probably use the document type + if (search_method == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR || + search_method == VsmfieldsConfig::Fieldspec::Searchmethod::NONE) + { return FieldInfo::DataType::TENSOR; } // This is the default FieldInfo data type if not specified. + // Wrong in most cases. return FieldInfo::DataType::DOUBLE; } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 0a64ee7c093..070563859a5 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -335,7 +335,7 @@ RankProcessor::unpack_match_data(uint32_t docid, MatchData &matchData, QueryWrap if (tmd != nullptr) { // adjust so that the position for phrase terms equals the match for the first term TermFieldMatchDataPosition pos(hit.element_id(), hit.position() - term.getPosAdjust(), - hit.element_weight(), fieldLen); + hit.element_weight(), hit.element_length()); tmd->appendPosition(pos); LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)", pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight()); diff --git a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp index d0cfa4d9956..aa25b0e75d3 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp @@ -53,7 +53,7 @@ void BoolFieldSearcher::onValue(const document::FieldValue & fv) addHit(*_qtl[j], 0); } } - ++_words; + set_element_length(1); } } diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp index 5e06ae41a03..c75ab7fccd3 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp @@ -5,6 +5,7 @@ #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> #include <vespa/searchlib/query/streaming/multi_term.h> #include <vespa/vespalib/stllike/hash_set.h> +#include <cassert> #include <vespa/log/log.h> LOG_SETUP(".vsm.searcher.fieldsearcher"); @@ -55,6 +56,7 @@ FieldSearcher::FieldSearcher(FieldIdT fId, bool defaultPrefix) noexcept _maxFieldLength(0x100000), _currentElementId(0), _currentElementWeight(1), + _element_length_fixups(), _words(0), _badUtf8Count(0) { @@ -70,6 +72,7 @@ FieldSearcher::search(const StorageDocument & doc) fInfo.setHitOffset(qt->getHitList().size()); } onSearch(doc); + assert(_element_length_fixups.empty()); for (auto qt : _qtl) { QueryTerm::FieldInfo & fInfo = qt->getFieldInfo(field()); fInfo.setHitCount(qt->getHitList().size() - fInfo.getHitOffset()); @@ -276,4 +279,16 @@ FieldSearcher::IteratorHandler::onStructStart(const Content & c) _searcher.onStructValue(static_cast<const document::StructFieldValue &>(c.getValue())); } +void +FieldSearcher::set_element_length(uint32_t element_length) +{ + _words += element_length; + if (!_element_length_fixups.empty()) { + for (auto& fixup : _element_length_fixups) { + fixup.first->set_element_length(fixup.second, element_length); + } + _element_length_fixups.clear(); + } +} + } diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h index e339e4bdf5a..2af68c553b8 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h @@ -6,6 +6,7 @@ #include <vespa/vsm/common/document.h> #include <vespa/vsm/common/storagedocument.h> #include <vespa/vespalib/util/array.h> +#include <utility> namespace search::fef { class IQueryEnvironment; } @@ -96,6 +97,7 @@ private: unsigned _maxFieldLength; uint32_t _currentElementId; int32_t _currentElementWeight; // Contains the weight of the current item being evaluated. + std::vector<std::pair<search::streaming::QueryTerm*, uint32_t>> _element_length_fixups; protected: /// Number of terms searched. unsigned _words; @@ -105,9 +107,10 @@ protected: * Adds a hit to the given query term. * For each call to onValue() a batch of words are processed, and the position is local to this batch. **/ - void addHit(search::streaming::QueryTerm & qt, uint32_t pos) const { - qt.add(field(), _currentElementId, _currentElementWeight, _words + pos); + void addHit(search::streaming::QueryTerm & qt, uint32_t pos) { + _element_length_fixups.emplace_back(&qt, qt.add(field(), _currentElementId, _currentElementWeight, pos)); } + void set_element_length(uint32_t element_length); public: static search::byte _foldLowCase[256]; static search::byte _wordChar[256]; diff --git a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp index 8558522003f..70e5bb4b82c 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp @@ -55,7 +55,7 @@ void FloatFieldSearcherT<T>::onValue(const document::FieldValue & fv) addHit(*_qtl[j], 0); } } - ++_words; + set_element_length(1); } template<typename T> diff --git a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp index 5ecc9a5a06e..bbeb3be986f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp @@ -58,7 +58,7 @@ void GeoPosFieldSearcher::onStructValue(const document::StructFieldValue & fv) { addHit(*_qtl[j], 0); } } - ++_words; + set_element_length(1); } bool GeoPosFieldSearcher::GeoPosInfo::cmp(const document::StructFieldValue & sfv) const { diff --git a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp index e73c7f5c1a7..3984254274f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp @@ -43,7 +43,7 @@ void IntFieldSearcher::onValue(const document::FieldValue & fv) addHit(*_qtl[j], 0); } } - ++_words; + set_element_length(1); } } diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp index ba52444101d..673cf11b2cf 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp @@ -25,22 +25,28 @@ void StrChrFieldSearcher::onValue(const document::FieldValue & fv) bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef) { + size_t element_length = 0; + bool need_count_words = false; if (_qtl.size() > 1) { size_t mintsz = shortestTerm(); if (fieldRef.size() >= mintsz) { - _words += matchTerms(fieldRef, mintsz); + element_length = matchTerms(fieldRef, mintsz); } else { - _words += countWords(fieldRef); + need_count_words = true; } } else { for (auto qt : _qtl) { if (fieldRef.size() >= qt->termLen() || qt->isRegex() || qt->isFuzzy()) { - _words += matchTerm(fieldRef, *qt); + element_length = std::max(element_length, matchTerm(fieldRef, *qt)); } else { - _words += countWords(fieldRef); + need_count_words = true; } } } + if (need_count_words) { + element_length = std::max(element_length, countWords(fieldRef)); + } + set_element_length(element_length); return true; } diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java index 19e0c0dc77d..5ff7b4592a1 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java @@ -435,7 +435,8 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { return ignoredContent; } - private ContentChannel getDocument(HttpRequest request, DocumentPath path, ResponseHandler handler) { + private ContentChannel getDocument(HttpRequest request, DocumentPath path, ResponseHandler rawHandler) { + ResponseHandler handler = new MeasuringResponseHandler(request, rawHandler, com.yahoo.documentapi.metrics.DocumentOperationType.GET, clock.instant()); disallow(request, DRY_RUN); enqueueAndDispatch(request, handler, () -> { DocumentOperationParameters rawParameters = parametersFromRequest(request, CLUSTER, FIELD_SET); @@ -1057,7 +1058,7 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { private ParsedDocumentOperation parse(InputStream inputStream, String docId, DocumentOperationType operation) { try { - return new JsonReader(manager, inputStream, jsonFactory).readSingleDocument(operation, docId); + return new JsonReader(manager, inputStream, jsonFactory).readSingleDocumentStreaming(operation, docId); } catch (IllegalArgumentException e) { incrementMetricParseError(); throw e; diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationType.java b/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationType.java index 1c0b8c560ac..63bf520f4d3 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationType.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/documentapi/metrics/DocumentOperationType.java @@ -11,7 +11,7 @@ import com.yahoo.messagebus.Message; */ public enum DocumentOperationType { - PUT, REMOVE, UPDATE, ERROR; + GET, PUT, REMOVE, UPDATE, ERROR; public static DocumentOperationType fromMessage(Message msg) { if (msg instanceof PutDocumentMessage) { diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/vespa/http/server/MetricNames.java b/vespaclient-container-plugin/src/main/java/com/yahoo/vespa/http/server/MetricNames.java index bf740014edd..efcffb16a2b 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/vespa/http/server/MetricNames.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/vespa/http/server/MetricNames.java @@ -7,9 +7,10 @@ import ai.vespa.metrics.ContainerMetrics; * Place to store the metric names so where the metrics are logged can be found * more easily in an IDE. * - * @author steinar + * @author Steinar Knutsen */ public final class MetricNames { + public static final String NUM_OPERATIONS = ContainerMetrics.HTTPAPI_NUM_OPERATIONS.baseName(); public static final String NUM_PUTS = ContainerMetrics.HTTPAPI_NUM_PUTS.baseName(); public static final String NUM_REMOVES = ContainerMetrics.HTTPAPI_NUM_REMOVES.baseName(); @@ -26,7 +27,6 @@ public final class MetricNames { public static final String FAILED_TIMEOUT = ContainerMetrics.HTTPAPI_FAILED_TIMEOUT.baseName(); public static final String FAILED_INSUFFICIENT_STORAGE = ContainerMetrics.HTTPAPI_FAILED_INSUFFICIENT_STORAGE.baseName(); - private MetricNames() { - } + private MetricNames() { } } diff --git a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java index c8fcb4c4635..04639db4dac 100644 --- a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java +++ b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java @@ -411,6 +411,7 @@ public class DocumentV1ApiTest { DocumentUpdate expectedUpdate = new DocumentUpdate(doc3.getDataType(), doc3.getId()); expectedUpdate.addFieldUpdate(FieldUpdate.createAssign(doc3.getField("artist"), new StringFieldValue("Lisa Ekdahl"))); expectedUpdate.setCondition(new TestAndSetCondition("true")); + expectedUpdate.setCreateIfNonExistent(true); assertEquals(expectedUpdate, update); parameters.responseHandler().get().handleResponse(new UpdateResponse(0, false)); assertEquals(parameters().withRoute("content"), parameters); @@ -419,10 +420,16 @@ public class DocumentV1ApiTest { response = driver.sendRequest("http://localhost/document/v1/space/music/docid?selection=true&cluster=content&timeChunk=10", PUT, """ { + "extra-ignored-field": { "foo": [{ }], "bar": null }, + "another-ignored-field": [{ "foo": [{ }] }], + "remove": "id:ns:type::ignored", + "put": "id:ns:type::ignored", "fields": { "artist": { "assign": "Lisa Ekdahl" }, "nonexisting": { "assign": "Ignored" } - } + }, + "post": "id:ns:type::ignored", + "create": true }"""); assertSameJson(""" { @@ -778,7 +785,7 @@ public class DocumentV1ApiTest { response = driver.sendRequest("http://localhost/document/v1/space/music/number/1/two?condition=test%20it", POST, ""); assertSameJson("{" + " \"pathId\": \"/document/v1/space/music/number/1/two\"," + - " \"message\": \"Could not read document, no document?\"" + + " \"message\": \"expected start of root object, got null\"" + "}", response.readAll()); assertEquals(400, response.getStatus()); @@ -791,7 +798,8 @@ public class DocumentV1ApiTest { "}"); Inspector responseRoot = SlimeUtils.jsonToSlime(response.readAll()).get(); assertEquals("/document/v1/space/music/number/1/two", responseRoot.field("pathId").asString()); - assertTrue(responseRoot.field("message").asString().startsWith("Unexpected character ('┻' (code 9531 / 0x253b)): was expecting double-quote to start field name")); + assertTrue(responseRoot.field("message").asString(), + responseRoot.field("message").asString().startsWith("failed parsing document: Unexpected character ('┻' (code 9531 / 0x253b)): was expecting double-quote to start field name")); assertEquals(400, response.getStatus()); // PUT on a unknown document type is a 400 diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index df75a6f6d1f..1f44d90f924 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -1265,7 +1265,9 @@ "public static com.yahoo.tensor.Tensor from(java.lang.String)", "public static com.yahoo.tensor.Tensor from(double)" ], - "fields" : [ ] + "fields" : [ + "public static final int INVALID_INDEX" + ] }, "com.yahoo.tensor.TensorAddress$Builder" : { "superClass" : "java.lang.Object", @@ -1277,6 +1279,7 @@ "public void <init>(com.yahoo.tensor.TensorType)", "public com.yahoo.tensor.TensorAddress$Builder add(java.lang.String)", "public com.yahoo.tensor.TensorAddress$Builder add(java.lang.String, java.lang.String)", + "public com.yahoo.tensor.TensorAddress$Builder add(java.lang.String, int)", "public com.yahoo.tensor.TensorAddress$Builder copy()", "public com.yahoo.tensor.TensorType type()", "public com.yahoo.tensor.TensorAddress build()" @@ -1309,16 +1312,19 @@ "public static com.yahoo.tensor.TensorAddress of(java.lang.String[])", "public static varargs com.yahoo.tensor.TensorAddress ofLabels(java.lang.String[])", "public static varargs com.yahoo.tensor.TensorAddress of(long[])", + "public static varargs com.yahoo.tensor.TensorAddress of(int[])", "public abstract int size()", "public abstract java.lang.String label(int)", "public abstract long numericLabel(int)", "public abstract com.yahoo.tensor.TensorAddress withLabel(int, long)", "public final boolean isEmpty()", "public int compareTo(com.yahoo.tensor.TensorAddress)", - "public int hashCode()", - "public boolean equals(java.lang.Object)", + "public java.lang.String toString()", "public final java.lang.String toString(com.yahoo.tensor.TensorType)", "public static java.lang.String labelToString(java.lang.String)", + "public com.yahoo.tensor.TensorAddress partialCopy(int[])", + "public com.yahoo.tensor.TensorAddress fullAddressOf(java.util.List, int[])", + "public com.yahoo.tensor.TensorAddress sparsePartialAddress(com.yahoo.tensor.TensorType, java.util.List)", "public bridge synthetic int compareTo(java.lang.Object)" ], "fields" : [ ] diff --git a/vespajlib/src/main/java/com/yahoo/compress/Hasher.java b/vespajlib/src/main/java/com/yahoo/compress/Hasher.java index 92a9ed26085..7a3d34eca7b 100644 --- a/vespajlib/src/main/java/com/yahoo/compress/Hasher.java +++ b/vespajlib/src/main/java/com/yahoo/compress/Hasher.java @@ -8,8 +8,25 @@ import net.openhft.hashing.LongHashFunction; * @author baldersheim */ public class Hasher { + private final LongHashFunction hasher; /** Uses net.openhft.hashing.LongHashFunction.xx3() */ public static long xxh3(byte [] data) { return LongHashFunction.xx3().hashBytes(data); } + public static long xxh3(byte [] data, long seed) { + return LongHashFunction.xx3(seed).hashBytes(data); + } + + private Hasher(LongHashFunction hasher) { + this.hasher = hasher; + } + public static Hasher withSeed(long seed) { + return new Hasher(LongHashFunction.xx3(seed)); + } + public long hash(long v) { + return hasher.hashLong(v); + } + public long hash(String s) { + return hasher.hashChars(s); + } } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java index 53f50fc4d02..085f9172095 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java @@ -78,6 +78,9 @@ class IndexedDoubleTensor extends IndexedTensor { @Override public Builder cell(TensorAddress address, double value) { + if (address == null) { + return null; + } values[(int)toValueIndex(address, sizes(), type)] = value; return this; } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java index f26174d9576..a428524612b 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java @@ -171,10 +171,8 @@ public abstract class IndexedTensor implements Tensor { } static long toValueIndex(TensorAddress address, DimensionSizes sizes, TensorType type) { - if (address.isEmpty()) return 0; - long valueIndex = 0; - for (int i = 0; i < address.size(); i++) { + for (int i = 0, sz = address.size(); i < sz; i++) { long label = address.numericLabel(i); if (label >= sizes.size(i)) throw new IllegalArgumentException(address + " is not within the bounds of " + type); @@ -893,8 +891,8 @@ public abstract class IndexedTensor implements Tensor { private static long computeSize(DimensionSizes sizes, List<Integer> iterateDimensions) { long size = 1; - for (int iterateDimension : iterateDimensions) - size *= sizes.size(iterateDimension); + for (int i = 0; i < iterateDimensions.size(); i++) + size *= sizes.size(iterateDimensions.get(i)); return size; } @@ -1060,7 +1058,7 @@ public abstract class IndexedTensor implements Tensor { /** In this case we can reuse the source index computation for the iteration index */ private final static class EqualSizeMultiDimensionIndexes extends MultiDimensionIndexes { - private long lastComputedSourceValueIndex = -1; + private long lastComputedSourceValueIndex = Tensor.INVALID_INDEX; private EqualSizeMultiDimensionIndexes(DimensionSizes sizes, List<Integer> iterateDimensions, long[] initialIndexes, long size) { super(sizes, sizes, iterateDimensions, initialIndexes, size); diff --git a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java index 95d1d70118a..d4469f447cb 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java @@ -4,8 +4,6 @@ package com.yahoo.tensor; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.yahoo.tensor.impl.NumericTensorAddress; -import com.yahoo.tensor.impl.StringTensorAddress; import java.util.ArrayList; import java.util.Arrays; @@ -111,7 +109,7 @@ public class MixedTensor implements Tensor { return new Iterator<>() { final Iterator<DenseSubspace> blockIterator = index.denseSubspaces.iterator(); DenseSubspace currBlock = null; - final long[] labels = new long[index.indexedDimensions.size()]; + final int[] labels = new int[index.indexedDimensions.size()]; int currOffset = index.denseSubspaceSize; int prevOffset = -1; @Override @@ -127,7 +125,7 @@ public class MixedTensor implements Tensor { if (currOffset != prevOffset) { // Optimization for index.denseSubspaceSize == 1 index.denseOffsetToAddress(currOffset, labels); } - TensorAddress fullAddr = index.fullAddressOf(currBlock.sparseAddress, labels); + TensorAddress fullAddr = currBlock.sparseAddress.fullAddressOf(index.type.dimensions(), labels); prevOffset = currOffset; double value = currBlock.cells[currOffset++]; return new Cell(fullAddr, value); @@ -321,7 +319,7 @@ public class MixedTensor implements Tensor { @Override public Tensor.Builder cell(TensorAddress address, double value) { - TensorAddress sparsePart = index.sparsePartialAddress(address); + TensorAddress sparsePart = address.sparsePartialAddress(index.sparseType, index.type.dimensions()); int denseOffset = index.denseOffsetOf(address); double[] denseSubspace = denseSubspace(sparsePart); denseSubspace[denseOffset] = value; @@ -475,7 +473,7 @@ public class MixedTensor implements Tensor { } private DenseSubspace blockOf(TensorAddress address) { - TensorAddress sparsePart = sparsePartialAddress(address); + TensorAddress sparsePart = address.sparsePartialAddress(sparseType, type.dimensions()); Integer blockNum = sparseMap.get(sparsePart); if (blockNum == null || blockNum >= denseSubspaces.size()) { return null; @@ -502,19 +500,7 @@ public class MixedTensor implements Tensor { return denseSubspaceSize; } - private TensorAddress sparsePartialAddress(TensorAddress address) { - if (type.dimensions().size() != address.size()) - throw new IllegalArgumentException("Tensor type of " + this + " is not the same size as " + address); - TensorAddress.Builder builder = new TensorAddress.Builder(sparseType); - for (int i = 0; i < type.dimensions().size(); ++i) { - TensorType.Dimension dimension = type.dimensions().get(i); - if ( ! dimension.isIndexed()) - builder.add(dimension.name(), address.label(i)); - } - return builder.build(); - } - - private void denseOffsetToAddress(long denseOffset, long [] labels) { + private void denseOffsetToAddress(long denseOffset, int [] labels) { if (denseOffset < 0 || denseOffset > denseSubspaceSize) { throw new IllegalArgumentException("Offset out of bounds"); } @@ -524,28 +510,11 @@ public class MixedTensor implements Tensor { for (int i = 0; i < labels.length; ++i) { innerSize /= indexedDimensionsSize[i]; - labels[i] = restSize / innerSize; + labels[i] = (int) (restSize / innerSize); restSize %= innerSize; } } - private TensorAddress fullAddressOf(TensorAddress sparsePart, long [] densePart) { - String[] labels = new String[type.dimensions().size()]; - int mappedIndex = 0; - int indexedIndex = 0; - for (int i = 0; i < type.dimensions().size(); i++) { - TensorType.Dimension d = type.dimensions().get(i); - if (d.isIndexed()) { - labels[i] = NumericTensorAddress.asString(densePart[indexedIndex]); - indexedIndex++; - } else { - labels[i] = sparsePart.label(mappedIndex); - mappedIndex++; - } - } - return StringTensorAddress.unsafeOf(labels); - } - @Override public String toString() { return "index into " + type; diff --git a/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java index 3e41e6d94eb..da643d8c173 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java @@ -1,9 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.tensor; -import com.yahoo.tensor.impl.StringTensorAddress; - -import java.util.Arrays; +import com.yahoo.tensor.impl.Label; /** * An address to a subset of a tensors' cells, specifying a label for some but not necessarily all of the tensors @@ -20,7 +18,7 @@ public class PartialAddress { // Two arrays which contains corresponding dimension:label pairs. // The sizes of these are always equal. private final String[] dimensionNames; - private final Object[] labels; + private final long[] labels; private PartialAddress(Builder builder) { this.dimensionNames = builder.dimensionNames; @@ -37,15 +35,15 @@ public class PartialAddress { public long numericLabel(String dimensionName) { for (int i = 0; i < dimensionNames.length; i++) if (dimensionNames[i].equals(dimensionName)) - return asLong(labels[i]); - return -1; + return labels[i]; + return Tensor.INVALID_INDEX; } /** Returns the label of this dimension, or null if no label is specified for it */ public String label(String dimensionName) { for (int i = 0; i < dimensionNames.length; i++) if (dimensionNames[i].equals(dimensionName)) - return labels[i].toString(); + return Label.fromNumber(labels[i]); return null; } @@ -57,7 +55,7 @@ public class PartialAddress { public String label(int i) { if (i >= size()) throw new IllegalArgumentException("No label at position " + i + " in " + this); - return labels[i].toString(); + return Label.fromNumber(labels[i]); } public int size() { return dimensionNames.length; } @@ -67,40 +65,14 @@ public class PartialAddress { public TensorAddress asAddress(TensorType type) { if (type.rank() != size()) throw new IllegalArgumentException(type + " has a different rank than " + this); - if (Arrays.stream(labels).allMatch(l -> l instanceof Long)) { - long[] numericLabels = new long[labels.length]; - for (int i = 0; i < type.dimensions().size(); i++) { - long label = numericLabel(type.dimensions().get(i).name()); - if (label < 0) - throw new IllegalArgumentException(type + " dimension names does not match " + this); - numericLabels[i] = label; - } - return TensorAddress.of(numericLabels); - } - else { - String[] stringLabels = new String[labels.length]; - for (int i = 0; i < type.dimensions().size(); i++) { - String label = label(type.dimensions().get(i).name()); - if (label == null) - throw new IllegalArgumentException(type + " dimension names does not match " + this); - stringLabels[i] = label; - } - return StringTensorAddress.unsafeOf(stringLabels); - } - } - - private long asLong(Object label) { - if (label instanceof Long) { - return (Long) label; - } - else { - try { - return Long.parseLong(label.toString()); - } - catch (NumberFormatException e) { - throw new IllegalArgumentException("Label '" + label + "' is not numeric"); - } + long[] numericLabels = new long[labels.length]; + for (int i = 0; i < type.dimensions().size(); i++) { + long label = numericLabel(type.dimensions().get(i).name()); + if (label == Tensor.INVALID_INDEX) + throw new IllegalArgumentException(type + " dimension names does not match " + this); + numericLabels[i] = label; } + return TensorAddress.of(numericLabels); } @Override @@ -116,12 +88,12 @@ public class PartialAddress { public static class Builder { private String[] dimensionNames; - private Object[] labels; + private long[] labels; private int index = 0; public Builder(int size) { dimensionNames = new String[size]; - labels = new Object[size]; + labels = new long[size]; } public Builder add(String dimensionName, long label) { @@ -133,7 +105,7 @@ public class PartialAddress { public Builder add(String dimensionName, String label) { dimensionNames[index] = dimensionName; - labels[index] = label; + labels[index] = Label.toNumber(label); index++; return this; } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java index d034ac551f8..d650b88f202 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java @@ -20,7 +20,7 @@ import com.yahoo.tensor.functions.Rename; import com.yahoo.tensor.functions.Softmax; import com.yahoo.tensor.functions.XwPlusB; import com.yahoo.tensor.functions.Expand; -import com.yahoo.tensor.impl.NumericTensorAddress; +import com.yahoo.tensor.impl.Label; import java.util.ArrayList; import java.util.Arrays; @@ -55,6 +55,7 @@ import static com.yahoo.tensor.functions.ScalarFunctions.Hamming; * @author bratseth */ public interface Tensor { + int INVALID_INDEX = -1; // ----------------- Accessors @@ -506,7 +507,7 @@ public interface Tensor { * This is for optimizations mapping between tensors where this is possible without creating a * TensorAddress. */ - long getDirectIndex() { return -1; } + long getDirectIndex() { return INVALID_INDEX; } /** Returns the value as a double */ @Override @@ -626,7 +627,7 @@ public interface Tensor { public TensorType type() { return tensorBuilder.type(); } public CellBuilder label(String dimension, long label) { - return label(dimension, NumericTensorAddress.asString(label)); + return label(dimension, Label.fromNumber(label)); } public Builder value(double cellValue) { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java index 1b88a5d1b2f..59a5e2a49b1 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java @@ -1,13 +1,11 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.tensor; -import com.yahoo.tensor.impl.NumericTensorAddress; -import com.yahoo.tensor.impl.StringTensorAddress; -import net.jpountz.xxhash.XXHash32; -import net.jpountz.xxhash.XXHashFactory; +import com.yahoo.tensor.impl.Label; +import com.yahoo.tensor.impl.TensorAddressAny; -import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.List; import java.util.Objects; /** @@ -18,23 +16,25 @@ import java.util.Objects; */ public abstract class TensorAddress implements Comparable<TensorAddress> { - private static final XXHash32 hasher = XXHashFactory.fastestJavaInstance().hash32(); - public static TensorAddress of(String[] labels) { - return StringTensorAddress.of(labels); + return TensorAddressAny.of(labels); } - public static TensorAddress ofLabels(String ... labels) { - return StringTensorAddress.of(labels); + public static TensorAddress ofLabels(String... labels) { + return TensorAddressAny.of(labels); } - public static TensorAddress of(long ... labels) { - return NumericTensorAddress.of(labels); + public static TensorAddress of(long... labels) { + return TensorAddressAny.of(labels); } - private int cached_hash = 0; + public static TensorAddress of(int... labels) { + return TensorAddressAny.of(labels); + } - /** Returns the number of labels in this */ + /** + * Returns the number of labels in this + */ public abstract int size(); /** @@ -67,32 +67,22 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { } @Override - public int hashCode() { - if (cached_hash != 0) return cached_hash; - - int hash = 0; - for (int i = 0; i < size(); i++) { - String label = label(i); - if (label != null) { - byte [] buf = label.getBytes(StandardCharsets.UTF_8); - hash = hasher.hash(buf, 0, buf.length, hash); + public String toString() { + StringBuilder sb = new StringBuilder("cell address ("); + int sz = size(); + if (sz > 0) { + sb.append(label(0)); + for (int i = 1; i < sz; i++) { + sb.append(',').append(label(i)); } } - return cached_hash = hash; - } - @Override - public boolean equals(Object o) { - if (o == this) return true; - if ( ! (o instanceof TensorAddress other)) return false; - if (other.size() != this.size()) return false; - for (int i = 0; i < this.size(); i++) - if ( ! Objects.equals(this.label(i), other.label(i))) - return false; - return true; + return sb.append(')').toString(); } - /** Returns this as a string on the appropriate form given the type */ + /** + * Returns this as a string on the appropriate form given the type + */ public final String toString(TensorType type) { StringBuilder b = new StringBuilder("{"); for (int i = 0; i < size(); i++) { @@ -105,24 +95,72 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return b.toString(); } - /** Returns a label as a string with appropriate quoting/escaping when necessary */ + /** + * Returns a label as a string with appropriate quoting/escaping when necessary + */ public static String labelToString(String label) { if (TensorType.labelMatcher.matches(label)) return label; // no quoting if (label.contains("'")) return "\"" + label + "\""; return "'" + label + "'"; } + /** Returns an address with only some of the dimension */ + public TensorAddress partialCopy(int[] indexMap) { + int[] labels = new int[indexMap.length]; + for (int i = 0; i < labels.length; ++i) { + labels[i] = (int)numericLabel(indexMap[i]); + } + return TensorAddressAny.ofUnsafe(labels); + } + + /** Creates a complete address by taking the sparse dimmensions from this and the indexed from the densePart */ + public TensorAddress fullAddressOf(List<TensorType.Dimension> dimensions, int [] densePart) { + int [] labels = new int[dimensions.size()]; + int mappedIndex = 0; + int indexedIndex = 0; + for (int i = 0; i < labels.length; i++) { + TensorType.Dimension d = dimensions.get(i); + if (d.isIndexed()) { + labels[i] = densePart[indexedIndex]; + indexedIndex++; + } else { + labels[i] = (int)numericLabel(mappedIndex); + mappedIndex++; + } + } + return TensorAddressAny.ofUnsafe(labels); + } + + /** Extracts the sparse(non-indexed) dimensions of the address */ + public TensorAddress sparsePartialAddress(TensorType sparseType, List<TensorType.Dimension> dimensions) { + if (dimensions.size() != size()) + throw new IllegalArgumentException("Tensor type of " + this + " is not the same size as " + this); + TensorAddress.Builder builder = new TensorAddress.Builder(sparseType); + for (int i = 0; i < dimensions.size(); ++i) { + TensorType.Dimension dimension = dimensions.get(i); + if ( ! dimension.isIndexed()) + builder.add(dimension.name(), (int)numericLabel(i)); + } + return builder.build(); + } + /** Builder of a tensor address */ public static class Builder { final TensorType type; - final String[] labels; + final int[] labels; + + private static int [] createEmptyLabels(int size) { + int [] labels = new int[size]; + Arrays.fill(labels, Tensor.INVALID_INDEX); + return labels; + } public Builder(TensorType type) { - this(type, new String[type.dimensions().size()]); + this(type, createEmptyLabels(type.dimensions().size())); } - private Builder(TensorType type, String[] labels) { + private Builder(TensorType type, int[] labels) { this.type = type; this.labels = labels; } @@ -152,6 +190,14 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { int labelIndex = type.indexOfDimensionAsInt(dimension); if ( labelIndex < 0) throw new IllegalArgumentException(type + " does not contain dimension '" + dimension + "'"); + labels[labelIndex] = Label.toNumber(label); + return this; + } + public Builder add(String dimension, int label) { + Objects.requireNonNull(dimension, "dimension cannot be null"); + int labelIndex = type.indexOfDimensionAsInt(dimension); + if ( labelIndex < 0) + throw new IllegalArgumentException(type + " does not contain dimension '" + dimension + "'"); labels[labelIndex] = label; return this; } @@ -166,14 +212,14 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { void validate() { for (int i = 0; i < labels.length; i++) - if (labels[i] == null) + if (labels[i] == Tensor.INVALID_INDEX) throw new IllegalArgumentException("Missing a label for dimension '" + type.dimensions().get(i).name() + "' for " + type); } public TensorAddress build() { validate(); - return TensorAddress.of(labels); + return TensorAddressAny.ofUnsafe(labels); } } @@ -185,7 +231,7 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { super(type); } - private PartialBuilder(TensorType type, String[] labels) { + private PartialBuilder(TensorType type, int[] labels) { super(type, labels); } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java index dcfee88d599..62ed4ad683c 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java @@ -204,7 +204,7 @@ public class TensorType { for (int i = 0; i < dimensions.size(); i++) if (dimensions.get(i).name().equals(dimension)) return i; - return -1; + return Tensor.INVALID_INDEX; } /* Returns the bound of this dimension if it is present and bound in this, empty otherwise */ diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java index 866b710b72e..37ca7f979a1 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java @@ -10,7 +10,6 @@ import com.yahoo.tensor.TypeResolver; import com.yahoo.tensor.evaluation.EvaluationContext; import com.yahoo.tensor.evaluation.Name; import com.yahoo.tensor.evaluation.TypeContext; -import com.yahoo.tensor.impl.StringTensorAddress; import java.util.Arrays; import java.util.HashMap; @@ -173,7 +172,7 @@ public class Concat<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET private TensorAddress combineAddresses(TensorAddress a, int[] aToIndexes, TensorAddress b, int[] bToIndexes, TensorType concatType, long concatOffset, String concatDimension) { long[] combinedLabels = new long[concatType.dimensions().size()]; - Arrays.fill(combinedLabels, -1); + Arrays.fill(combinedLabels, Tensor.INVALID_INDEX); int concatDimensionIndex = concatType.indexOfDimension(concatDimension).get(); mapContent(a, combinedLabels, aToIndexes, concatDimensionIndex, concatOffset); // note: This sets a nonsensical value in the concat dimension boolean compatible = mapContent(b, combinedLabels, bToIndexes, concatDimensionIndex, concatOffset); // ... which is overwritten by the right value here @@ -192,7 +191,7 @@ public class Concat<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET private int[] mapIndexes(TensorType fromType, TensorType toType) { int[] toIndexes = new int[fromType.dimensions().size()]; for (int i = 0; i < fromType.dimensions().size(); i++) - toIndexes[i] = toType.indexOfDimension(fromType.dimensions().get(i).name()).orElse(-1); + toIndexes[i] = toType.indexOfDimension(fromType.dimensions().get(i).name()).orElse(Tensor.INVALID_INDEX); return toIndexes; } @@ -209,7 +208,7 @@ public class Concat<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET to[toIndex] = from.numericLabel(i) + concatOffset; } else { - if (to[toIndex] != -1 && to[toIndex] != from.numericLabel(i)) return false; + if (to[toIndex] != Tensor.INVALID_INDEX && to[toIndex] != from.numericLabel(i)) return false; to[toIndex] = from.numericLabel(i); } } @@ -369,7 +368,7 @@ public class Concat<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET default -> throw new IllegalArgumentException("cannot handle: " + how); } } - return StringTensorAddress.unsafeOf(labels); + return TensorAddress.of(labels); } Tensor merge(CellVectorMapMap a, CellVectorMapMap b) { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java index e0ac549651c..047d8ee6ef0 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java @@ -12,9 +12,11 @@ import com.yahoo.tensor.TypeResolver; import com.yahoo.tensor.evaluation.EvaluationContext; import com.yahoo.tensor.evaluation.Name; import com.yahoo.tensor.evaluation.TypeContext; -import com.yahoo.tensor.impl.StringTensorAddress; +import com.yahoo.tensor.impl.Convert; +import com.yahoo.tensor.impl.TensorAddressAny; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -206,7 +208,7 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP Tensor.Builder builder = Tensor.Builder.of(joinedType); for (Iterator<Tensor.Cell> i = superspace.cellIterator(); i.hasNext(); ) { Map.Entry<TensorAddress, Double> supercell = i.next(); - TensorAddress subaddress = mapAddressToSubspace(supercell.getKey(), subspaceIndexes); + TensorAddress subaddress = supercell.getKey().partialCopy(subspaceIndexes); Double subspaceValue = subspace.getAsDouble(subaddress); if (subspaceValue != null) { builder.cell(supercell.getKey(), @@ -226,13 +228,6 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP return subspaceIndexes; } - private static TensorAddress mapAddressToSubspace(TensorAddress superAddress, int[] subspaceIndexes) { - String[] subspaceLabels = new String[subspaceIndexes.length]; - for (int i = 0; i < subspaceIndexes.length; i++) - subspaceLabels[i] = superAddress.label(subspaceIndexes[i]); - return StringTensorAddress.unsafeOf(subspaceLabels); - } - /** Slow join which works for any two tensors */ private static Tensor generalJoin(Tensor a, Tensor b, TensorType joinedType, DoubleBinaryOperator combinator) { if (a instanceof IndexedTensor && b instanceof IndexedTensor) @@ -253,9 +248,9 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP private static void joinTo(IndexedTensor a, IndexedTensor b, TensorType joinedType, DimensionSizes joinedSize, int[] aToIndexes, int[] bToIndexes, Tensor.Builder builder, DoubleBinaryOperator combinator) { - Set<String> sharedDimensions = Sets.intersection(a.type().dimensionNames(), b.type().dimensionNames()); + Set<String> sharedDimensions = Set.copyOf(Sets.intersection(a.type().dimensionNames(), b.type().dimensionNames())); int sharedDimensionSize = sharedDimensions.size(); // Expensive to compute size after intersection - Set<String> dimensionsOnlyInA = Sets.difference(a.type().dimensionNames(), b.type().dimensionNames()); + Set<String> dimensionsOnlyInA = Set.copyOf(Sets.difference(a.type().dimensionNames(), b.type().dimensionNames())); DimensionSizes aIterateSize = joinedSizeOf(a.type(), joinedType, joinedSize); DimensionSizes bIterateSize = joinedSizeOf(b.type(), joinedType, joinedSize); @@ -266,7 +261,9 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP // for each combination of dimensions in a which is also in b while (aSubspace.hasNext()) { Tensor.Cell aCell = aSubspace.next(); - PartialAddress matchingBCells = partialAddress(a.type(), aSubspace.address(), sharedDimensions, sharedDimensionSize); + PartialAddress matchingBCells = sharedDimensionSize > 0 + ? partialAddress(a.type(), aSubspace.address(), sharedDimensions, sharedDimensionSize) + : empty; // for each matching combination of dimensions ony in b for (IndexedTensor.SubspaceIterator bSubspace = b.cellIterator(matchingBCells, bIterateSize); bSubspace.hasNext(); ) { Tensor.Cell bCell = bSubspace.next(); @@ -278,12 +275,15 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP } } + private static PartialAddress empty = new PartialAddress.Builder(0).build(); private static PartialAddress partialAddress(TensorType addressType, TensorAddress address, Set<String> retainDimensions, int sharedDimensionSize) { PartialAddress.Builder builder = new PartialAddress.Builder(sharedDimensionSize); - for (int i = 0; i < addressType.dimensions().size(); i++) - if (retainDimensions.contains(addressType.dimensions().get(i).name())) - builder.add(addressType.dimensions().get(i).name(), address.numericLabel(i)); + for (int i = 0; i < addressType.dimensions().size(); i++) { + String dimension = addressType.dimensions().get(i).name(); + if (retainDimensions.contains(dimension)) + builder.add(dimension, address.numericLabel(i)); + } return builder.build(); } @@ -338,7 +338,7 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP Map<TensorAddress, List<Tensor.Cell>> aCellsByCommonAddress = new HashMap<>(a.sizeAsInt()); for (Iterator<Tensor.Cell> cellIterator = a.cellIterator(); cellIterator.hasNext(); ) { Tensor.Cell aCell = cellIterator.next(); - TensorAddress partialCommonAddress = partialCommonAddress(aCell, aIndexesInCommon); + TensorAddress partialCommonAddress = aCell.getKey().partialCopy(aIndexesInCommon); aCellsByCommonAddress.computeIfAbsent(partialCommonAddress, (key) -> new ArrayList<>()).add(aCell); } @@ -346,7 +346,7 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP Tensor.Builder builder = Tensor.Builder.of(joinedType); for (Iterator<Tensor.Cell> cellIterator = b.cellIterator(); cellIterator.hasNext(); ) { Tensor.Cell bCell = cellIterator.next(); - TensorAddress partialCommonAddress = partialCommonAddress(bCell, bIndexesInCommon); + TensorAddress partialCommonAddress = bCell.getKey().partialCopy(bIndexesInCommon); for (Tensor.Cell aCell : aCellsByCommonAddress.getOrDefault(partialCommonAddress, List.of())) { TensorAddress combinedAddress = joinAddresses(aCell.getKey(), aIndexesInJoined, bCell.getKey(), bIndexesInJoined, joinedType); @@ -377,11 +377,12 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP private static TensorAddress joinAddresses(TensorAddress a, int[] aToIndexes, TensorAddress b, int[] bToIndexes, TensorType joinedType) { - String[] joinedLabels = new String[joinedType.dimensions().size()]; + int[] joinedLabels = new int[joinedType.dimensions().size()]; + Arrays.fill(joinedLabels, Tensor.INVALID_INDEX); mapContent(a, joinedLabels, aToIndexes); boolean compatible = mapContent(b, joinedLabels, bToIndexes); if ( ! compatible) return null; - return StringTensorAddress.unsafeOf(joinedLabels); + return TensorAddressAny.ofUnsafe(joinedLabels); } /** @@ -390,11 +391,12 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP * @return true if the mapping was successful, false if one of the destination positions was * occupied by a different value */ - private static boolean mapContent(TensorAddress from, String[] to, int[] indexMap) { - for (int i = 0; i < from.size(); i++) { + private static boolean mapContent(TensorAddress from, int[] to, int[] indexMap) { + for (int i = 0, sz = from.size(); i < sz; i++) { int toIndex = indexMap[i]; - String label = from.label(i); - if (to[toIndex] != null && ! to[toIndex].equals(label)) return false; + int label = Convert.safe2Int(from.numericLabel(i)); + if (to[toIndex] != Tensor.INVALID_INDEX && to[toIndex] != label) + return false; to[toIndex] = label; } return true; @@ -417,14 +419,5 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP return typeBuilder.build(); } - private static TensorAddress partialCommonAddress(Tensor.Cell cell, int[] indexMap) { - TensorAddress address = cell.getKey(); - String[] labels = new String[indexMap.length]; - for (int i = 0; i < labels.length; ++i) { - labels[i] = address.label(indexMap[i]); - } - return StringTensorAddress.unsafeOf(labels); - } - } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/Reduce.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/Reduce.java index 77e82b818a7..0985e48c4e4 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/Reduce.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/Reduce.java @@ -10,7 +10,6 @@ import com.yahoo.tensor.evaluation.EvaluationContext; import com.yahoo.tensor.evaluation.Name; import com.yahoo.tensor.evaluation.TypeContext; import com.yahoo.tensor.impl.Convert; -import com.yahoo.tensor.impl.StringTensorAddress; import java.util.ArrayList; import java.util.Collections; @@ -164,7 +163,7 @@ public class Reduce<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET int reducedLabelIndex = 0; for (int toKeep : indexesToKeep) reducedLabels[reducedLabelIndex++] = address.label(toKeep); - return StringTensorAddress.unsafeOf(reducedLabels); + return TensorAddress.of(reducedLabels); } private static Tensor reduceAllGeneral(Tensor argument, Aggregator aggregator) { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/Rename.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/Rename.java index ecd302db361..910c5900495 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/Rename.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/Rename.java @@ -8,7 +8,6 @@ import com.yahoo.tensor.TypeResolver; import com.yahoo.tensor.evaluation.EvaluationContext; import com.yahoo.tensor.evaluation.Name; import com.yahoo.tensor.evaluation.TypeContext; -import com.yahoo.tensor.impl.StringTensorAddress; import java.util.HashMap; import java.util.Iterator; @@ -123,7 +122,7 @@ public class Rename<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET String[] reorderedLabels = new String[toIndexes.length]; for (int i = 0; i < toIndexes.length; i++) reorderedLabels[toIndexes[i]] = address.label(i); - return StringTensorAddress.unsafeOf(reorderedLabels); + return TensorAddress.of(reorderedLabels); } private String toVectorString(List<String> elements) { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java new file mode 100644 index 00000000000..0ab1454eb58 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java @@ -0,0 +1,70 @@ +package com.yahoo.tensor.impl; + + +import com.yahoo.tensor.Tensor; + +import java.util.Arrays; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class Label { + private static final String [] SMALL_INDEXES = createSmallIndexesAsStrings(1000); + private final static Map<String, Integer> string2Enum = new ConcurrentHashMap<>(); + // Index 0 is unused, that is a valid positive number + // 1(-1) is reserved for the Tensor.INVALID_INDEX + private static volatile String [] uniqueStrings = {"UNIQUE_UNUSED_MAGIC", "Tensor.INVALID_INDEX"}; + private static int numUniqeStrings = 2; + + private static String[] createSmallIndexesAsStrings(int count) { + String [] asStrings = new String[count]; + for (int i = 0; i < count; i++) { + asStrings[i] = String.valueOf(i); + } + return asStrings; + } + + private static int addNewUniqueString(String s) { + synchronized (string2Enum) { + if (numUniqeStrings >= uniqueStrings.length) { + uniqueStrings = Arrays.copyOf(uniqueStrings, uniqueStrings.length*2); + } + uniqueStrings[numUniqeStrings] = s; + return -numUniqeStrings++; + } + } + + private static String asNumericString(long index) { + return ((index >= 0) && (index < SMALL_INDEXES.length)) ? SMALL_INDEXES[(int)index] : String.valueOf(index); + } + + private static boolean validNumericIndex(String s) { + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if ((c < '0') || (c > '9')) return false; + } + return true; + } + + public static int toNumber(String s) { + if (s == null) { return Tensor.INVALID_INDEX; } + try { + if (validNumericIndex(s)) { + return Integer.parseInt(s); + } + } catch (NumberFormatException e) { + } + return string2Enum.computeIfAbsent(s, Label::addNewUniqueString); + } + public static String fromNumber(int v) { + if (v >= 0) { + return asNumericString(v); + } else { + if (v == Tensor.INVALID_INDEX) { return null; } + return uniqueStrings[-v]; + } + } + public static String fromNumber(long v) { + return fromNumber(Convert.safe2Int(v)); + } + +} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/NumericTensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/NumericTensorAddress.java deleted file mode 100644 index 983074c9c90..00000000000 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/NumericTensorAddress.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.yahoo.tensor.impl; - -import com.yahoo.tensor.TensorAddress; - -import java.util.Arrays; -import java.util.stream.Collectors; - -public final class NumericTensorAddress extends TensorAddress { - private static final String [] SMALL_INDEXES = createSmallIndexesAsStrings(1000); - - private final long[] labels; - - private static String[] createSmallIndexesAsStrings(int count) { - String [] asStrings = new String[count]; - for (int i = 0; i < count; i++) { - asStrings[i] = String.valueOf(i); - } - return asStrings; - } - - private NumericTensorAddress(long[] labels) { - this.labels = labels; - } - - public static NumericTensorAddress of(long ... labels) { - return new NumericTensorAddress(Arrays.copyOf(labels, labels.length)); - } - - public static NumericTensorAddress unsafeOf(long ... labels) { - return new NumericTensorAddress(labels); - } - - @Override - public int size() { return labels.length; } - - @Override - public String label(int i) { return asString(labels[i]); } - - @Override - public long numericLabel(int i) { return labels[i]; } - - @Override - public TensorAddress withLabel(int index, long label) { - long[] labels = Arrays.copyOf(this.labels, this.labels.length); - labels[index] = label; - return new NumericTensorAddress(labels); - } - - @Override - public String toString() { - return "cell address (" + Arrays.stream(labels).mapToObj(NumericTensorAddress::asString).collect(Collectors.joining(",")) + ")"; - } - - public static String asString(long index) { - return ((index >= 0) && (index < SMALL_INDEXES.length)) ? SMALL_INDEXES[(int)index] : String.valueOf(index); - } - -} - diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/StringTensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/StringTensorAddress.java deleted file mode 100644 index ca54494a19c..00000000000 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/StringTensorAddress.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.yahoo.tensor.impl; - -import com.yahoo.tensor.TensorAddress; - -import java.util.Arrays; - -public final class StringTensorAddress extends TensorAddress { - - private final String[] labels; - - private StringTensorAddress(String [] labels) { - this.labels = labels; - } - - public static StringTensorAddress of(String[] labels) { - return new StringTensorAddress(Arrays.copyOf(labels, labels.length)); - } - - public static StringTensorAddress unsafeOf(String[] labels) { - return new StringTensorAddress(labels); - } - - @Override - public int size() { return labels.length; } - - @Override - public String label(int i) { return labels[i]; } - - @Override - public long numericLabel(int i) { - try { - return Long.parseLong(labels[i]); - } - catch (NumberFormatException e) { - throw new IllegalArgumentException("Expected an integer label in " + this + " at position " + i + " but got '" + labels[i] + "'"); - } - } - - @Override - public TensorAddress withLabel(int index, long label) { - String[] labels = Arrays.copyOf(this.labels, this.labels.length); - labels[index] = NumericTensorAddress.asString(label); - return new StringTensorAddress(labels); - } - - - @Override - public String toString() { - return "cell address (" + String.join(",", labels) + ")"; - } - -} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny.java new file mode 100644 index 00000000000..31863c99a74 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny.java @@ -0,0 +1,136 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.tensor.impl; + +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorAddress; + +import static com.yahoo.tensor.impl.Convert.safe2Int; +import static com.yahoo.tensor.impl.Label.toNumber; +import static com.yahoo.tensor.impl.Label.fromNumber; + +/** + * Parent of tensor address family centered around each dimension as int. + * A positive number represents a numeric index usable as a direect addressing. + * - 1 is representing an invalid/null address + * Other negative numbers are an enumeration maintained in {@link Label} + * + * @author baldersheim + */ +abstract public class TensorAddressAny extends TensorAddress { + @Override + public String label(int i) { + return fromNumber((int)numericLabel(i)); + } + + public static TensorAddress of() { + return TensorAddressEmpty.empty; + } + public static TensorAddress of(String label) { + return new TensorAddressAny1(toNumber(label)); + } + public static TensorAddress of(String label0, String label1) { + return new TensorAddressAny2(toNumber(label0), toNumber(label1)); + } + public static TensorAddress of(String label0, String label1, String label2) { + return new TensorAddressAny3(toNumber(label0), toNumber(label1), toNumber(label2)); + } + public static TensorAddress of(String label0, String label1, String label2, String label3) { + return new TensorAddressAny4(toNumber(label0), toNumber(label1), toNumber(label2), toNumber(label3)); + } + public static TensorAddress of(String [] labels) { + int [] labelsAsInt = new int[labels.length]; + for (int i = 0; i < labels.length; i++) { + labelsAsInt[i] = toNumber(labels[i]); + } + return ofUnsafe(labelsAsInt); + } + public static TensorAddress of(int label) { + return new TensorAddressAny1(sanitize(label)); + } + public static TensorAddress of(int label0, int label1) { + return new TensorAddressAny2(sanitize(label0), sanitize(label1)); + } + public static TensorAddress of(int label0, int label1, int label2) { + return new TensorAddressAny3(sanitize(label0), sanitize(label1), sanitize(label2)); + } + public static TensorAddress of(int label0, int label1, int label2, int label3) { + return new TensorAddressAny4(sanitize(label0), sanitize(label1), sanitize(label2), sanitize(label3)); + } + public static TensorAddress of(int ... labels) { + return switch (labels.length) { + case 0 -> of(); + case 1 -> new TensorAddressAny1(sanitize(labels[0])); + case 2 -> new TensorAddressAny2(sanitize(labels[0]), sanitize(labels[1])); + case 3 -> new TensorAddressAny3(sanitize(labels[0]), sanitize(labels[1]), sanitize(labels[2])); + case 4 -> new TensorAddressAny4(sanitize(labels[0]), sanitize(labels[1]), sanitize(labels[2]), sanitize(labels[3])); + default -> { + for (int i = 0; i < labels.length; i++) { + sanitize(labels[i]); + } + yield new TensorAddressAnyN(labels); + } + }; + } + public static TensorAddress of(long label) { + return of(safe2Int(label)); + } + + public static TensorAddress of(long label0, long label1) { + return of(safe2Int(label0), safe2Int(label1)); + } + + public static TensorAddress of(long label0, long label1, long label2) { + return of(safe2Int(label0), safe2Int(label1), safe2Int(label2)); + } + + public static TensorAddress of(long label0, long label1, long label2, long label3) { + return of(safe2Int(label0), safe2Int(label1), safe2Int(label2), safe2Int(label3)); + } + + public static TensorAddress of(long ... labels) { + return switch (labels.length) { + case 0 -> of(); + case 1 -> ofUnsafe(safe2Int(labels[0])); + case 2 -> ofUnsafe(safe2Int(labels[0]), safe2Int(labels[1])); + case 3 -> ofUnsafe(safe2Int(labels[0]), safe2Int(labels[1]), safe2Int(labels[2])); + case 4 -> ofUnsafe(safe2Int(labels[0]), safe2Int(labels[1]), safe2Int(labels[2]), safe2Int(labels[3])); + default -> { + int [] labelsAsInt = new int[labels.length]; + for (int i = 0; i < labels.length; i++) { + labelsAsInt[i] = safe2Int(labels[i]); + } + yield of(labelsAsInt); + } + }; + } + + private static TensorAddress ofUnsafe(int label) { + return new TensorAddressAny1(label); + } + private static TensorAddress ofUnsafe(int label0, int label1) { + return new TensorAddressAny2(label0, label1); + } + private static TensorAddress ofUnsafe(int label0, int label1, int label2) { + return new TensorAddressAny3(label0, label1, label2); + } + private static TensorAddress ofUnsafe(int label0, int label1, int label2, int label3) { + return new TensorAddressAny4(label0, label1, label2, label3); + } + public static TensorAddress ofUnsafe(int ... labels) { + return switch (labels.length) { + case 0 -> of(); + case 1 -> ofUnsafe(labels[0]); + case 2 -> ofUnsafe(labels[0], labels[1]); + case 3 -> ofUnsafe(labels[0], labels[1], labels[2]); + case 4 -> ofUnsafe(labels[0], labels[1], labels[2], labels[3]); + default -> new TensorAddressAnyN(labels); + }; + } + private static int sanitize(int label) { + if (label < Tensor.INVALID_INDEX) { + throw new IndexOutOfBoundsException("cell label " + label + " must be positive"); + } + return label; + } +} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny1.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny1.java new file mode 100644 index 00000000000..a2b0d318a50 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny1.java @@ -0,0 +1,37 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.tensor.impl; + +import com.yahoo.tensor.TensorAddress; + +/** + * Single dimension + * @author baldersheim + */ +final class TensorAddressAny1 extends TensorAddressAny { + private final int label; + TensorAddressAny1(int label) { this.label = label; } + + @Override public int size() { return 1; } + + @Override + public long numericLabel(int i) { + if (i == 0) { + return label; + } + throw new IndexOutOfBoundsException("Index is not zero: " + i); + } + + @Override + public TensorAddress withLabel(int labelIndex, long label) { + if (labelIndex == 0) return new TensorAddressAny1(Convert.safe2Int(label)); + throw new IllegalArgumentException("No label " + labelIndex); + } + + @Override public int hashCode() { return Math.abs(label); } + + @Override + public boolean equals(Object o) { + return (o instanceof TensorAddressAny1 any) && (label == any.label); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny2.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny2.java new file mode 100644 index 00000000000..d77a689852f --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny2.java @@ -0,0 +1,49 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.tensor.impl; + +import com.yahoo.tensor.TensorAddress; + +import static java.lang.Math.abs; + +/** + * 2 dimensional address + * @author baldersheim + */ +final class TensorAddressAny2 extends TensorAddressAny { + private final int label0, label1; + TensorAddressAny2(int label0, int label1) { + this.label0 = label0; + this.label1 = label1; + } + + @Override public int size() { return 2; } + + @Override + public long numericLabel(int i) { + return switch (i) { + case 0 -> label0; + case 1 -> label1; + default -> throw new IndexOutOfBoundsException("Index is not in [0,1]: " + i); + }; + } + + @Override + public TensorAddress withLabel(int labelIndex, long label) { + return switch (labelIndex) { + case 0 -> new TensorAddressAny2(Convert.safe2Int(label), label1); + case 1 -> new TensorAddressAny2(label0, Convert.safe2Int(label)); + default -> throw new IllegalArgumentException("No label " + labelIndex); + }; + } + + @Override + public int hashCode() { + return abs(label0) | (abs(label1) << 32 - Integer.numberOfLeadingZeros(abs(label0))); + } + + @Override + public boolean equals(Object o) { + return (o instanceof TensorAddressAny2 any) && (label0 == any.label0) && (label1 == any.label1); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny3.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny3.java new file mode 100644 index 00000000000..95e14bd375c --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny3.java @@ -0,0 +1,57 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.tensor.impl; + +import com.yahoo.tensor.TensorAddress; + +import static java.lang.Math.abs; + +/** + * 3 dimensional address + * @author baldersheim + */ +final class TensorAddressAny3 extends TensorAddressAny { + private final int label0, label1, label2; + TensorAddressAny3(int label0, int label1, int label2) { + this.label0 = label0; + this.label1 = label1; + this.label2 = label2; + } + + @Override public int size() { return 3; } + + @Override + public long numericLabel(int i) { + return switch (i) { + case 0 -> label0; + case 1 -> label1; + case 2 -> label2; + default -> throw new IndexOutOfBoundsException("Index is not in [0,2]: " + i); + }; + } + + @Override + public TensorAddress withLabel(int labelIndex, long label) { + return switch (labelIndex) { + case 0 -> new TensorAddressAny3(Convert.safe2Int(label), label1, label2); + case 1 -> new TensorAddressAny3(label0, Convert.safe2Int(label), label2); + case 2 -> new TensorAddressAny3(label0, label1, Convert.safe2Int(label)); + default -> throw new IllegalArgumentException("No label " + labelIndex); + }; + } + + @Override + public int hashCode() { + return abs(label0) | + (abs(label1) << (1*32 - Integer.numberOfLeadingZeros(abs(label0)))) | + (abs(label2) << (2*32 - (Integer.numberOfLeadingZeros(abs(label0)) + Integer.numberOfLeadingZeros(abs(label1))))); + } + + @Override + public boolean equals(Object o) { + return (o instanceof TensorAddressAny3 any) && + (label0 == any.label0) && + (label1 == any.label1) && + (label2 == any.label2); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny4.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny4.java new file mode 100644 index 00000000000..8a45483340e --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny4.java @@ -0,0 +1,62 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.tensor.impl; + +import com.yahoo.tensor.TensorAddress; + +import static java.lang.Math.abs; + +/** + * 4 dimensional address + * @author baldersheim + */ +final class TensorAddressAny4 extends TensorAddressAny { + private final int label0, label1, label2, label3; + TensorAddressAny4(int label0, int label1, int label2, int label3) { + this.label0 = label0; + this.label1 = label1; + this.label2 = label2; + this.label3 = label3; + } + + @Override public int size() { return 4; } + + @Override + public long numericLabel(int i) { + return switch (i) { + case 0 -> label0; + case 1 -> label1; + case 2 -> label2; + case 3 -> label3; + default -> throw new IndexOutOfBoundsException("Index is not in [0,3]: " + i); + }; + } + + @Override + public TensorAddress withLabel(int labelIndex, long label) { + return switch (labelIndex) { + case 0 -> new TensorAddressAny4(Convert.safe2Int(label), label1, label2, label3); + case 1 -> new TensorAddressAny4(label0, Convert.safe2Int(label), label2, label3); + case 2 -> new TensorAddressAny4(label0, label1, Convert.safe2Int(label), label3); + case 3 -> new TensorAddressAny4(label0, label1, label2, Convert.safe2Int(label)); + default -> throw new IllegalArgumentException("No label " + labelIndex); + }; + } + + @Override + public int hashCode() { + return abs(label0) | + (abs(label1) << (1*32 - Integer.numberOfLeadingZeros(abs(label0)))) | + (abs(label2) << (2*32 - (Integer.numberOfLeadingZeros(abs(label0)) + Integer.numberOfLeadingZeros(abs(label1))))) | + (abs(label3) << (3*32 - (Integer.numberOfLeadingZeros(abs(label0)) + Integer.numberOfLeadingZeros(abs(label1)) + Integer.numberOfLeadingZeros(abs(label1))))); + } + + @Override + public boolean equals(Object o) { + return (o instanceof TensorAddressAny4 any) && + (label0 == any.label0) && + (label1 == any.label1) && + (label2 == any.label2) && + (label3 == any.label3); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAnyN.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAnyN.java new file mode 100644 index 00000000000..acd7ed60722 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAnyN.java @@ -0,0 +1,48 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.tensor.impl; + +import com.yahoo.tensor.TensorAddress; + +import java.util.Arrays; + +import static java.lang.Math.abs; + +/** + * N dimensional address + * @author baldersheim + */ +final class TensorAddressAnyN extends TensorAddressAny { + private final int [] labels; + TensorAddressAnyN(int [] labels) { + if (labels.length < 1) throw new IllegalArgumentException("Need at least 1 label"); + this.labels = labels; + } + + @Override public int size() { return labels.length; } + @Override public long numericLabel(int i) { return labels[i]; } + + @Override + public TensorAddress withLabel(int labelIndex, long label) { + int [] copy = Arrays.copyOf(labels, labels.length); + copy[labelIndex] = Convert.safe2Int(label); + return new TensorAddressAnyN(copy); + } + + @Override public int hashCode() { + int hash = abs(labels[0]); + for (int i = 0; i < size(); i++) { + hash = hash | (abs(labels[i]) << (32 - Integer.numberOfLeadingZeros(hash))); + } + return hash; + } + + @Override + public boolean equals(Object o) { + if (! (o instanceof TensorAddressAnyN any) || (size() != any.size())) return false; + for (int i = 0; i < size(); i++) { + if (labels[i] != any.labels[i]) return false; + } + return true; + } +} diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressEmpty.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressEmpty.java new file mode 100644 index 00000000000..2d9cd3eed78 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressEmpty.java @@ -0,0 +1,26 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.tensor.impl; + +import com.yahoo.tensor.TensorAddress; + +/** + * 0 dimesional/empty address + * @author baldersheim + */ +final class TensorAddressEmpty extends TensorAddressAny { + static TensorAddress empty = new TensorAddressEmpty(); + private TensorAddressEmpty() {} + @Override public int size() { return 0; } + @Override public long numericLabel(int i) { throw new IllegalArgumentException("Empty address with no labels"); } + + @Override + public TensorAddress withLabel(int labelIndex, long label) { + throw new IllegalArgumentException("No label " + labelIndex); + } + + @Override + public int hashCode() { return 0; } + @Override + public boolean equals(Object o) { return o instanceof TensorAddressEmpty; } +} diff --git a/vespajlib/src/test/java/com/yahoo/tensor/IndexedTensorTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/IndexedTensorTestCase.java index afc95d295f0..528ca57d256 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/IndexedTensorTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/IndexedTensorTestCase.java @@ -46,12 +46,7 @@ public class IndexedTensorTestCase { @Test public void testNegativeLabels() { - TensorAddress numeric = TensorAddress.of(-1, 0, 1, 1234567, -1234567); - assertEquals("-1", numeric.label(0)); - assertEquals("0", numeric.label(1)); - assertEquals("1", numeric.label(2)); - assertEquals("1234567", numeric.label(3)); - assertEquals("-1234567", numeric.label(4)); + assertThrows(IndexOutOfBoundsException.class, () ->TensorAddress.of(-1, 0, 1, 1234567, -1234567)); } private void verifyFloat(String spec) { diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorAddressTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorAddressTestCase.java new file mode 100644 index 00000000000..472ebca2360 --- /dev/null +++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorAddressTestCase.java @@ -0,0 +1,72 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.tensor; + +import static com.yahoo.tensor.TensorAddress.of; +import static com.yahoo.tensor.TensorAddress.ofLabels; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +/** + * Test for tensor address. + * + * @author baldersheim + */ +public class TensorAddressTestCase { + public static void equal(TensorAddress a, TensorAddress b) { + assertEquals(a.hashCode(), b.hashCode()); + assertEquals(a, b); + assertEquals(a.size(), b.size()); + for (int i = 0; i < a.size(); i++) { + assertEquals(a.label(i), b.label(i)); + assertEquals(a.numericLabel(i), b.numericLabel(i)); + } + } + public static void notEqual(TensorAddress a, TensorAddress b) { + assertNotEquals(a.hashCode(), b.hashCode()); // This might not hold, but is bad if not very rare + assertNotEquals(a, b); + } + @Test + void testStringVersusNumericAddressEquality() { + equal(ofLabels("1"), of(1)); + } + @Test + void testInEquality() { + notEqual(ofLabels("1"), ofLabels("2")); + notEqual(of(1), of(2)); + } + @Test + void testDimensionsEffectsEqualityAndHash() { + notEqual(ofLabels("1"), ofLabels("1", "1")); + notEqual(of(1), of(1, 1)); + } + @Test + void testAllowNullDimension() { + TensorAddress s1 = ofLabels("1", null, "2"); + TensorAddress s2 = ofLabels("1", "2"); + assertNotEquals(s1, s2); + assertEquals(-1, s1.numericLabel(1)); + assertEquals(null, s1.label(1)); + } + + private static void verifyWithLabel(int dimensions) { + int [] indexes = new int[dimensions]; + Arrays.fill(indexes, 1); + TensorAddress next = of(indexes); + for (int i = 0; i < dimensions; i++) { + indexes[i] = 3; + assertEquals(of(indexes), next = next.withLabel(i, 3)); + } + } + @Test + void testWithLabel() { + for (int i=0; i < 10; i++) { + verifyWithLabel(i); + } + } + +} diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java index 74237a218fb..91880c9af93 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java @@ -73,7 +73,7 @@ public class TensorFunctionBenchmark { for (int i = 0; i < vectorCount; i++) { Tensor.Builder builder = Tensor.Builder.of(type); for (int j = 0; j < vectorSize; j++) { - builder.cell().label("x", String.valueOf(j)).value(random.nextDouble()); + builder.cell().label("x", j).value(random.nextDouble()); } tensors.add(builder.build()); } @@ -88,8 +88,8 @@ public class TensorFunctionBenchmark { for (int i = 0; i < vectorCount; i++) { for (int j = 0; j < vectorSize; j++) { builder.cell() - .label("i", String.valueOf(i)) - .label("x", String.valueOf(j)) + .label("i", i) + .label("x", j) .value(random.nextDouble()); } } @@ -110,6 +110,7 @@ public class TensorFunctionBenchmark { double time = 0; // ---------------- Indexed unbound: + time = new TensorFunctionBenchmark().benchmark(50000, vectors(100, 300, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, false); System.out.printf("Indexed unbound vectors, time per join: %1$8.3f ms\n", time); time = new TensorFunctionBenchmark().benchmark(50000, matrix(100, 300, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, false); @@ -132,6 +133,7 @@ public class TensorFunctionBenchmark { // ---------------- Indexed (unbound) with extra space (sidesteps current special-case optimizations): time = new TensorFunctionBenchmark().benchmark(500, vectors(100, 300, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, true); System.out.printf("Indexed vectors, x space time per join: %1$8.3f ms\n", time); + time = new TensorFunctionBenchmark().benchmark(500, matrix(100, 300, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, true); System.out.printf("Indexed matrix, x space time per join: %1$8.3f ms\n", time); @@ -143,16 +145,16 @@ public class TensorFunctionBenchmark { System.out.printf("Mapped matrix, x space time per join: %1$8.3f ms\n", time); /* 2.4Ghz Intel Core i9, Macbook Pro 2019 - * Indexed unbound vectors, time per join: 0,067 ms - * Indexed unbound matrix, time per join: 0,107 ms - * Indexed bound vectors, time per join: 0,068 ms - * Indexed bound matrix, time per join: 0,105 ms - * Mapped vectors, time per join: 1,342 ms - * Mapped matrix, time per join: 3,448 ms - * Indexed vectors, x space time per join: 6,398 ms - * Indexed matrix, x space time per join: 3,220 ms - * Mapped vectors, x space time per join: 14,984 ms - * Mapped matrix, x space time per join: 19,873 ms + Indexed unbound vectors, time per join: 0,066 ms + Indexed unbound matrix, time per join: 0,108 ms + Indexed bound vectors, time per join: 0,068 ms + Indexed bound matrix, time per join: 0,106 ms + Mapped vectors, time per join: 0,845 ms + Mapped matrix, time per join: 1,779 ms + Indexed vectors, x space time per join: 5,778 ms + Indexed matrix, x space time per join: 3,342 ms + Mapped vectors, x space time per join: 8,184 ms + Mapped matrix, x space time per join: 11,547 ms */ } diff --git a/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java index 7cf0bd35b38..85619dca16c 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java @@ -33,7 +33,7 @@ public class DynamicTensorTestCase { public void testDynamicMappedRank1TensorFunction() { TensorType sparse = TensorType.fromSpec("tensor(x{})"); DynamicTensor<Name> t2 = DynamicTensor.from(sparse, - Collections.singletonMap(new TensorAddress.Builder(sparse).add("x", "a").build(), + java.util.Map.of(new TensorAddress.Builder(sparse).add("x", "a").build(), new Constant(5))); assertEquals(Tensor.from(sparse, "{{x:a}:5}"), t2.evaluate()); assertEquals("tensor(x{}):{{x:a}:5.0}", t2.toString()); diff --git a/vespajlib/src/test/java/com/yahoo/tensor/impl/TensorAddressAnyTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/impl/TensorAddressAnyTestCase.java new file mode 100644 index 00000000000..ae13b95052b --- /dev/null +++ b/vespajlib/src/test/java/com/yahoo/tensor/impl/TensorAddressAnyTestCase.java @@ -0,0 +1,31 @@ +package com.yahoo.tensor.impl; + +import static com.yahoo.tensor.impl.TensorAddressAny.of; +import static com.yahoo.tensor.TensorAddressTestCase.equal; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +public class TensorAddressAnyTestCase { + @Test + void testSize() { + for (int i = 0; i < 10; i++) { + int [] indexes = new int [i]; + assertEquals(i, of(indexes).size()); + } + } + + @Test + void testNumericStringEquality() { + for (int i = 0; i < 10; i++) { + int [] numericIndexes = new int [i]; + String [] stringIndexes = new String[i]; + for (int j = 0; j < i; j++) { + numericIndexes[j] = j; + stringIndexes[j] = String.valueOf(j); + } + equal(of(stringIndexes), of(numericIndexes)); + } + } + +} |