diff options
55 files changed, 1353 insertions, 475 deletions
diff --git a/client/go/go.mod b/client/go/go.mod index 3e721fe2a06..8699f3e9245 100644 --- a/client/go/go.mod +++ b/client/go/go.mod @@ -8,7 +8,7 @@ require ( github.com/fatih/color v1.16.0 // This is the most recent version compatible with Go 1.20. Upgrade when we upgrade our Go version github.com/go-json-experiment/json v0.0.0-20230324203220-04923b7a9528 - github.com/klauspost/compress v1.17.4 + github.com/klauspost/compress v1.17.5 github.com/mattn/go-colorable v0.1.13 github.com/mattn/go-isatty v0.0.20 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c diff --git a/client/go/go.sum b/client/go/go.sum index e2b1c85442d..fc5730a071d 100644 --- a/client/go/go.sum +++ b/client/go/go.sum @@ -20,6 +20,8 @@ github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4s github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.5 h1:d4vBd+7CHydUqpFBgUEKkSdtSugf9YFmSkvUYPquI5E= +github.com/klauspost/compress v1.17.5/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/http/Client.java b/config-model/src/main/java/com/yahoo/vespa/model/container/http/Client.java index 29222817d17..e4abef4eb33 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/http/Client.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/http/Client.java @@ -4,28 +4,36 @@ package com.yahoo.vespa.model.container.http; import com.yahoo.config.provision.DataplaneToken; import java.security.cert.X509Certificate; +import java.util.Collection; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.yahoo.vespa.model.container.http.Client.Permission.READ; +import static com.yahoo.vespa.model.container.http.Client.Permission.WRITE; /** * Represents a client. The client is identified by one of the provided certificates and have a set of permissions. * * @author mortent + * @author bjorncs */ public class Client { private final String id; - private final List<String> permissions; + private final Set<Permission> permissions; private final List<X509Certificate> certificates; private final List<DataplaneToken> tokens; private final boolean internal; - public Client(String id, List<String> permissions, List<X509Certificate> certificates, List<DataplaneToken> tokens) { + public Client(String id, Collection<Permission> permissions, List<X509Certificate> certificates, List<DataplaneToken> tokens) { this(id, permissions, certificates, tokens, false); } - private Client(String id, List<String> permissions, List<X509Certificate> certificates, List<DataplaneToken> tokens, + private Client(String id, Collection<Permission> permissions, List<X509Certificate> certificates, List<DataplaneToken> tokens, boolean internal) { this.id = id; - this.permissions = List.copyOf(permissions); + this.permissions = Set.copyOf(permissions); this.certificates = List.copyOf(certificates); this.tokens = List.copyOf(tokens); this.internal = internal; @@ -35,7 +43,7 @@ public class Client { return id; } - public List<String> permissions() { + public Set<Permission> permissions() { return permissions; } @@ -50,6 +58,29 @@ public class Client { } public static Client internalClient(List<X509Certificate> certificates) { - return new Client("_internal", List.of("read","write"), certificates, List.of(), true); + return new Client("_internal", Set.of(READ, WRITE), certificates, List.of(), true); + } + + public enum Permission { + READ, WRITE; + + public String asString() { + return switch (this) { + case READ -> "read"; + case WRITE -> "write"; + }; + } + + public static Permission fromString(String v) { + return switch (v) { + case "read" -> READ; + case "write" -> WRITE; + default -> throw new IllegalArgumentException("Invalid permission '%s'. Valid values are 'read' and 'write'.".formatted(v)); + }; + } + + public static Set<Permission> fromCommaSeparatedString(String str) { + return Stream.of(str.split(",")).map(v -> Permission.fromString(v.strip())).collect(Collectors.toSet()); + } } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudDataPlaneFilter.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudDataPlaneFilter.java index a1b569fa110..0574e13e387 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudDataPlaneFilter.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudDataPlaneFilter.java @@ -46,7 +46,7 @@ class CloudDataPlaneFilter extends Filter implements CloudDataPlaneFilterConfig. .map(x -> new CloudDataPlaneFilterConfig.Clients.Builder() .id(x.id()) .certificates(x.certificates().stream().map(X509CertificateUtils::toPem).toList()) - .permissions(x.permissions())) + .permissions(x.permissions().stream().map(Client.Permission::asString).sorted().toList())) .toList(); builder.clients(clientsCfg).legacyMode(false); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilter.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilter.java index bb24f96784e..e2a522103e6 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilter.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilter.java @@ -44,7 +44,7 @@ class CloudTokenDataPlaneFilter extends Filter implements CloudTokenDataPlaneFil .map(x -> new CloudTokenDataPlaneFilterConfig.Clients.Builder() .id(x.id()) .tokens(tokensConfig(x.tokens())) - .permissions(x.permissions())) + .permissions(x.permissions().stream().map(Client.Permission::asString).sorted().toList())) .toList(); builder.clients(clientsCfg).tokenContext(tokenContext); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index e4038a5bca6..8eca29215d4 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -518,10 +518,8 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { String clientId = XML.attribute("id", clientElement).orElseThrow(); if (clientId.startsWith("_")) throw new IllegalArgumentException("Invalid client id '%s', id cannot start with '_'".formatted(clientId)); - List<String> permissions = XML.attribute("permissions", clientElement) - .map(p -> p.split(",")).stream() - .flatMap(Arrays::stream) - .toList(); + var permissions = XML.attribute("permissions", clientElement) + .map(Client.Permission::fromCommaSeparatedString).orElse(Set.of()); var certificates = XML.getChildren(clientElement, "certificate").stream() .flatMap(certElem -> { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java index c89ea421b39..1c5eb16be80 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/CloudTokenDataPlaneFilterTest.java @@ -16,7 +16,6 @@ import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.http.ConnectorConfig; import com.yahoo.jdisc.http.filter.security.cloud.config.CloudTokenDataPlaneFilterConfig; -import com.yahoo.processing.response.Data; import com.yahoo.vespa.model.container.ApplicationContainer; import com.yahoo.vespa.model.container.ContainerModel; import com.yahoo.vespa.model.container.http.ConnectorFactory; @@ -41,14 +40,14 @@ import static com.yahoo.vespa.model.container.xml.CloudDataPlaneFilterTest.creat import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; public class CloudTokenDataPlaneFilterTest extends ContainerModelBuilderTestBase { private static final String servicesXmlTemplate = """ <container version='1.0'> <clients> - <client id="foo" permissions="read,write"> + <client id="foo" permissions="read, write"> <certificate file="%s"/> </client> <client id="bar" permissions="read"> @@ -145,6 +144,24 @@ public class CloudTokenDataPlaneFilterTest extends ContainerModelBuilderTestBase } + @Test + void fails_on_unknown_permission() throws IOException { + var certFile = securityFolder.resolve("foo.pem"); + var servicesXml = """ + <container version='1.0'> + <clients> + <client id="foo" permissions="read,unknown-permission"> + <certificate file="%s"/> + </client> + </clients> + </container> + """.formatted(applicationFolder.toPath().relativize(certFile).toString()); + var clusterElem = DomBuilderTest.parse(servicesXml); + createCertificate(certFile); + var exception = assertThrows(IllegalArgumentException.class, () -> buildModel(Set.of(mtlsEndpoint), defaultTokens, clusterElem)); + assertEquals("Invalid permission 'unknown-permission'. Valid values are 'read' and 'write'.", exception.getMessage()); + } + private static CloudTokenDataPlaneFilterConfig.Clients.Tokens tokenConfig( String id, Collection<String> fingerprints, Collection<String> accessCheckHashes, Collection<String> expirations) { return new CloudTokenDataPlaneFilterConfig.Clients.Tokens.Builder() diff --git a/container-search/src/main/java/com/yahoo/fs4/MapEncoder.java b/container-search/src/main/java/com/yahoo/fs4/MapEncoder.java index 84b2b482403..4f31db0fc86 100644 --- a/container-search/src/main/java/com/yahoo/fs4/MapEncoder.java +++ b/container-search/src/main/java/com/yahoo/fs4/MapEncoder.java @@ -20,7 +20,7 @@ public class MapEncoder { // TODO: Time to refactor - private static byte [] getUtf8(Object value) { + private static byte[] getUtf8(Object value) { if (value == null) { return Utf8.toBytes(""); } else if (value instanceof Tensor) { @@ -62,7 +62,7 @@ public class MapEncoder { public static int encodeMap(String mapName, Map<String,?> map, ByteBuffer buffer) { if (map.isEmpty()) return 0; - byte [] utf8 = Utf8.toBytes(mapName); + byte[] utf8 = Utf8.toBytes(mapName); buffer.putInt(utf8.length); buffer.put(utf8); buffer.putInt(map.size()); diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java index 441c4326355..88cc7ad7b2d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java @@ -172,6 +172,21 @@ public class ClusterSearcher extends Searcher { } @Override + public Result search(Query query, Execution execution) { + validateQueryTimeout(query); + validateQueryCache(query); + Searcher searcher = server; + if (searcher == null) { + return new Result(query, ErrorMessage.createNoBackendsInService("Could not search")); + } + if (query.getTimeLeft() <= 0) { + return new Result(query, ErrorMessage.createTimeout("No time left for searching")); + } + + return doSearch(searcher, query, execution); + } + + @Override public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { Query query = result.getQuery(); @@ -192,21 +207,6 @@ public class ClusterSearcher extends Searcher { } } - @Override - public Result search(Query query, Execution execution) { - validateQueryTimeout(query); - validateQueryCache(query); - Searcher searcher = server; - if (searcher == null) { - return new Result(query, ErrorMessage.createNoBackendsInService("Could not search")); - } - if (query.getTimeLeft() <= 0) { - return new Result(query, ErrorMessage.createTimeout("No time left for searching")); - } - - return doSearch(searcher, query, execution); - } - private void validateQueryTimeout(Query query) { if (query.getTimeout() <= maxQueryTimeout) return; diff --git a/container-search/src/main/java/com/yahoo/search/query/ranking/RankProperties.java b/container-search/src/main/java/com/yahoo/search/query/ranking/RankProperties.java index 4ac5375807b..fd0b6543f28 100644 --- a/container-search/src/main/java/com/yahoo/search/query/ranking/RankProperties.java +++ b/container-search/src/main/java/com/yahoo/search/query/ranking/RankProperties.java @@ -38,16 +38,12 @@ public class RankProperties implements Cloneable { /** Adds a property by full name to a value */ public void put(String name, Object value) { - List<Object> list = properties.get(name); - if (list == null) { - list = new ArrayList<>(); - properties.put(name, list); - } + List<Object> list = properties.computeIfAbsent(name, k -> new ArrayList<>()); list.add(value); } /** - * Returns a read-only list of properties properties by full name. + * Returns a read-only list of properties by full name. * If this is not set, null is returned. If this is explicitly set to * have no values, and empty list is returned. */ diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index a4fcb85dde4..95603d28ca5 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -66,7 +66,7 @@ <!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories --> <athenz.vespa.version>1.11.50</athenz.vespa.version> - <aws-sdk.vespa.version>1.12.645</aws-sdk.vespa.version> + <aws-sdk.vespa.version>1.12.646</aws-sdk.vespa.version> <!-- Athenz END --> <!-- WARNING: If you change curator version, you also need to update diff --git a/document/src/main/java/com/yahoo/document/json/JsonReader.java b/document/src/main/java/com/yahoo/document/json/JsonReader.java index 3e1743b8d45..b6cf8c6e18b 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonReader.java +++ b/document/src/main/java/com/yahoo/document/json/JsonReader.java @@ -6,8 +6,10 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.DocumentId; import com.yahoo.document.DocumentOperation; +import com.yahoo.document.DocumentPut; import com.yahoo.document.DocumentType; import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.DocumentUpdate; import com.yahoo.document.TestAndSetCondition; import com.yahoo.document.json.document.DocumentParser; import com.yahoo.document.json.readers.DocumentParseInfo; @@ -18,6 +20,9 @@ import java.io.InputStream; import java.util.Optional; import static com.yahoo.document.json.JsonReader.ReaderState.END_OF_FEED; +import static com.yahoo.document.json.document.DocumentParser.CONDITION; +import static com.yahoo.document.json.document.DocumentParser.CREATE_IF_NON_EXISTENT; +import static com.yahoo.document.json.document.DocumentParser.FIELDS; import static com.yahoo.document.json.readers.JsonParserHelpers.expectArrayStart; /** @@ -60,7 +65,7 @@ public class JsonReader { * @param docIdString document ID * @return the parsed document operation */ - public ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { + ParsedDocumentOperation readSingleDocument(DocumentOperationType operationType, String docIdString) { DocumentId docId = new DocumentId(docIdString); DocumentParseInfo documentParseInfo; try { @@ -78,6 +83,79 @@ public class JsonReader { return operation; } + /** + * Reads a JSON which is expected to contain a single document operation, + * and where other parameters, like the document ID and operation type, are supplied by other means. + * + * @param operationType the type of operation (update or put) + * @param docIdString document ID + * @return the parsed document operation + */ + public ParsedDocumentOperation readSingleDocumentStreaming(DocumentOperationType operationType, String docIdString) { + try { + DocumentId docId = new DocumentId(docIdString); + DocumentParseInfo documentParseInfo = new DocumentParseInfo(); + documentParseInfo.documentId = docId; + documentParseInfo.operationType = operationType; + + if (JsonToken.START_OBJECT != parser.nextValue()) + throw new IllegalArgumentException("expected start of root object, got " + parser.currentToken()); + + Boolean create = null; + String condition = null; + ParsedDocumentOperation operation = null; + while (JsonToken.END_OBJECT != parser.nextValue()) { + switch (parser.getCurrentName()) { + case FIELDS -> { + documentParseInfo.fieldsBuffer = new LazyTokenBuffer(parser); + VespaJsonDocumentReader vespaJsonDocumentReader = new VespaJsonDocumentReader(typeManager.getIgnoreUndefinedFields()); + operation = vespaJsonDocumentReader.createDocumentOperation( + getDocumentTypeFromString(documentParseInfo.documentId.getDocType(), typeManager), documentParseInfo); + + if ( ! documentParseInfo.fieldsBuffer.isEmpty()) + throw new IllegalArgumentException("expected all content to be consumed by document parsing, but " + + documentParseInfo.fieldsBuffer.nesting() + " levels remain"); + + } + case CONDITION -> { + if ( ! JsonToken.VALUE_STRING.equals(parser.currentToken()) && ! JsonToken.VALUE_NULL.equals(parser.currentToken())) + throw new IllegalArgumentException("expected string value for condition, got " + parser.currentToken()); + + condition = parser.getValueAsString(); + } + case CREATE_IF_NON_EXISTENT -> { + create = parser.getBooleanValue(); // Throws if not boolean. + } + default -> { + // We ignore stray fields, but need to ensure structural balance in doing do. + if (parser.currentToken().isStructStart()) parser.skipChildren(); + } + } + } + + if (null != parser.nextToken()) + throw new IllegalArgumentException("expected end of input, got " + parser.currentToken()); + + if (null == operation) + throw new IllegalArgumentException("document is missing the required \"fields\" field"); + + if (null != create) { + switch (operationType) { + case PUT -> ((DocumentPut) operation.operation()).setCreateIfNonExistent(create); + case UPDATE -> ((DocumentUpdate) operation.operation()).setCreateIfNonExistent(create); + case REMOVE -> throw new IllegalArgumentException(CREATE_IF_NON_EXISTENT + " is not supported for remove operations"); + } + } + + operation.operation().setCondition(TestAndSetCondition.fromConditionString(Optional.ofNullable(condition))); + + return operation; + } + catch (IOException e) { + throw new IllegalArgumentException("failed parsing document", e); + } + } + /** Returns the next document operation, or null if we have reached the end */ public DocumentOperation next() { switch (state) { diff --git a/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java new file mode 100644 index 00000000000..0fbdd0b28c7 --- /dev/null +++ b/document/src/main/java/com/yahoo/document/json/LazyTokenBuffer.java @@ -0,0 +1,64 @@ +package com.yahoo.document.json; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +import java.io.IOException; +import java.util.function.Supplier; + +/** + * A {@link TokenBuffer} which only buffers tokens when needed, i.e., when peeking. + * + * @author jonmv + */ +public class LazyTokenBuffer extends TokenBuffer { + + private final JsonParser parser; + + public LazyTokenBuffer(JsonParser parser) { + this.parser = parser; + try { addFromParser(parser); } + catch (IOException e) { throw new IllegalArgumentException("failed parsing document JSON", e); } + if (JsonToken.START_OBJECT != current()) + throw new IllegalArgumentException("expected start of JSON object, but got " + current()); + updateNesting(current()); + } + + void advance() { + super.advance(); + if (tokens.isEmpty() && nesting() > 0) tokens.add(nextToken()); // Fill current token if needed and possible. + } + + @Override + public Supplier<Token> lookahead() { + return new Supplier<>() { + int localNesting = nesting(); + Supplier<Token> buffered = LazyTokenBuffer.super.lookahead(); + @Override public Token get() { + if (localNesting == 0) + return null; + + Token token = buffered.get(); + if (token == null) { + token = nextToken(); + tokens.add(token); + } + localNesting += nestingOffset(token.token); + return token; + } + }; + } + + private Token nextToken() { + try { + JsonToken token = parser.nextValue(); + if (token == null) + throw new IllegalStateException("no more JSON tokens"); + return new Token(token, parser.getCurrentName(), parser.getText()); + } + catch (IOException e) { + throw new IllegalArgumentException("failed reading document JSON", e); + } + } + +} diff --git a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java index dec84e46b77..3a48f71c4cd 100644 --- a/document/src/main/java/com/yahoo/document/json/TokenBuffer.java +++ b/document/src/main/java/com/yahoo/document/json/TokenBuffer.java @@ -1,15 +1,16 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.json; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.google.common.base.Preconditions; +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; +import java.util.function.Supplier; + /** * Helper class to enable lookahead in the token stream. * @@ -17,101 +18,76 @@ import com.google.common.base.Preconditions; */ public class TokenBuffer { - private final List<Token> tokens; + final Deque<Token> tokens = new ArrayDeque<>(); - private int position = 0; private int nesting = 0; - public TokenBuffer() { - this(new ArrayList<>()); - } - - public TokenBuffer(List<Token> tokens) { - this.tokens = tokens; - if (tokens.size() > 0) - updateNesting(tokens.get(position).token); - } + public TokenBuffer() { } /** Returns whether any tokens are available in this */ - public boolean isEmpty() { return remaining() == 0; } - - public JsonToken previous() { - updateNestingGoingBackwards(current()); - position--; - return current(); - } - - /** Returns the current token without changing position, or null if none */ - public JsonToken current() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.token; - } + public boolean isEmpty() { return tokens.isEmpty(); } + /** Returns the next token, or null, and updates the nesting count of this. */ public JsonToken next() { - position++; + advance(); JsonToken token = current(); updateNesting(token); return token; } - /** Returns a given number of tokens ahead, or null if none */ - public JsonToken peek(int ahead) { - if (tokens.size() <= position + ahead) return null; - return tokens.get(position + ahead).token; + void advance() { + tokens.poll(); + } + + /** Returns the current token without changing position, or null if none */ + public JsonToken current() { + return isEmpty() ? null : tokens.peek().token; } /** Returns the current token name without changing position, or null if none */ public String currentName() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.name; + return isEmpty() ? null : tokens.peek().name; } /** Returns the current token text without changing position, or null if none */ public String currentText() { - if (isEmpty()) return null; - Token token = tokens.get(position); - if (token == null) return null; - return token.text; + return isEmpty() ? null : tokens.peek().text; } - public int remaining() { - return tokens.size() - position; + /** + * Returns a sequence of remaining tokens in this, or nulls when none remain. + * This may fill the token buffer, but not otherwise modify it. + */ + public Supplier<Token> lookahead() { + Iterator<Token> iterator = tokens.iterator(); + if (iterator.hasNext()) iterator.next(); + return () -> iterator.hasNext() ? iterator.next() : null; } private void add(JsonToken token, String name, String text) { - tokens.add(tokens.size(), new Token(token, name, text)); + tokens.add(new Token(token, name, text)); } - public void bufferObject(JsonToken first, JsonParser tokens) { - bufferJsonStruct(first, tokens, JsonToken.START_OBJECT); + public void bufferObject(JsonParser parser) { + bufferJsonStruct(parser, JsonToken.START_OBJECT); } - private void bufferJsonStruct(JsonToken first, JsonParser tokens, JsonToken firstToken) { - int localNesting = 0; - JsonToken t = first; + private void bufferJsonStruct(JsonParser parser, JsonToken firstToken) { + JsonToken token = parser.currentToken(); + Preconditions.checkArgument(token == firstToken, + "Expected %s, got %s.", firstToken.name(), token); + updateNesting(token); - Preconditions.checkArgument(first == firstToken, - "Expected %s, got %s.", firstToken.name(), t); - if (remaining() == 0) { - updateNesting(t); + try { + for (int nesting = addFromParser(parser); nesting > 0; nesting += addFromParser(parser)) + parser.nextValue(); } - localNesting = storeAndPeekNesting(t, localNesting, tokens); - while (localNesting > 0) { - t = nextValue(tokens); - localNesting = storeAndPeekNesting(t, localNesting, tokens); + catch (IOException e) { + throw new IllegalArgumentException(e); } } - private int storeAndPeekNesting(JsonToken t, int nesting, JsonParser tokens) { - addFromParser(t, tokens); - return nesting + nestingOffset(t); - } - - private int nestingOffset(JsonToken token) { + int nestingOffset(JsonToken token) { if (token == null) return 0; if (token.isStructStart()) { return 1; @@ -122,43 +98,23 @@ public class TokenBuffer { } } - private void addFromParser(JsonToken t, JsonParser tokens) { - try { - add(t, tokens.getCurrentName(), tokens.getText()); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - - private JsonToken nextValue(JsonParser tokens) { - try { - return tokens.nextValue(); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } + int addFromParser(JsonParser tokens) throws IOException { + add(tokens.currentToken(), tokens.getCurrentName(), tokens.getText()); + return nestingOffset(tokens.currentToken()); } - private void updateNesting(JsonToken token) { + void updateNesting(JsonToken token) { nesting += nestingOffset(token); } - private void updateNestingGoingBackwards(JsonToken token) { - nesting -= nestingOffset(token); - } - public int nesting() { return nesting; } public void skipToRelativeNesting(int relativeNesting) { int initialNesting = nesting(); - do { - next(); - } while ( nesting() > initialNesting + relativeNesting); - } - - public List<Token> rest() { - return tokens.subList(position, tokens.size()); + do next(); + while (nesting() > initialNesting + relativeNesting); } public static final class Token { diff --git a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java index 74656762fe1..77e11dcf2a8 100644 --- a/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java +++ b/document/src/main/java/com/yahoo/document/json/document/DocumentParser.java @@ -20,7 +20,7 @@ public class DocumentParser { private static final String UPDATE = "update"; private static final String PUT = "put"; private static final String ID = "id"; - private static final String CONDITION = "condition"; + public static final String CONDITION = "condition"; public static final String CREATE_IF_NON_EXISTENT = "create"; public static final String FIELDS = "fields"; public static final String REMOVE = "remove"; @@ -86,16 +86,6 @@ public class DocumentParser { private void handleIdentLevelOne(DocumentParseInfo documentParseInfo, boolean docIdAndOperationIsSetExternally) throws IOException { JsonToken currentToken = parser.getCurrentToken(); - if (currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) { - try { - if (CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { - documentParseInfo.create = Optional.ofNullable(parser.getBooleanValue()); - return; - } - } catch (IOException e) { - throw new RuntimeException("Got IO exception while parsing document", e); - } - } if ((currentToken == JsonToken.VALUE_TRUE || currentToken == JsonToken.VALUE_FALSE) && CREATE_IF_NON_EXISTENT.equals(parser.getCurrentName())) { documentParseInfo.create = Optional.of(currentToken == JsonToken.VALUE_TRUE); @@ -111,12 +101,11 @@ public class DocumentParser { } } - private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { + private void handleIdentLevelTwo(DocumentParseInfo documentParseInfo) { try { - JsonToken currentToken = parser.getCurrentToken(); // "fields" opens a dictionary and is therefore on level two which might be surprising. - if (currentToken == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { - documentParseInfo.fieldsBuffer.bufferObject(currentToken, parser); + if (parser.currentToken() == JsonToken.START_OBJECT && FIELDS.equals(parser.getCurrentName())) { + documentParseInfo.fieldsBuffer.bufferObject(parser); processIndent(); } } catch (IOException e) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java index 2dce07cdbe6..e859306f04d 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java +++ b/document/src/main/java/com/yahoo/document/json/readers/DocumentParseInfo.java @@ -8,6 +8,7 @@ import com.yahoo.document.json.TokenBuffer; import java.util.Optional; public class DocumentParseInfo { + public DocumentParseInfo() { } public DocumentId documentId; public Optional<Boolean> create = Optional.empty(); public Optional<String> condition = Optional.empty(); diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java index 0b7b1ae9996..1fd4029b1a5 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java @@ -4,13 +4,15 @@ package com.yahoo.document.json.readers; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.document.json.TokenBuffer; -import com.yahoo.slime.Inspector; -import com.yahoo.slime.Type; +import com.yahoo.document.json.TokenBuffer.Token; import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.TensorType.Dimension; + +import java.util.function.Supplier; import static com.yahoo.document.json.readers.JsonParserHelpers.*; import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString; @@ -37,36 +39,43 @@ public class TensorReader { Tensor.Builder builder = Tensor.Builder.of(tensorFieldValue.getDataType().getTensorType()); expectOneOf(buffer.current(), JsonToken.START_OBJECT, JsonToken.START_ARRAY); int initNesting = buffer.nesting(); - for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { - if (TENSOR_CELLS.equals(buffer.currentName()) && ! primitiveContent(buffer)) { + while (true) { + Supplier<Token> lookahead = buffer.lookahead(); + Token next = lookahead.get(); + if (TENSOR_CELLS.equals(next.name) && ! primitiveContent(next.token, lookahead.get().token)) { + buffer.next(); readTensorCells(buffer, builder); } - else if (TENSOR_VALUES.equals(buffer.currentName()) && builder.type().dimensions().stream().allMatch(d -> d.isIndexed())) { + else if (TENSOR_VALUES.equals(next.name) && builder.type().dimensions().stream().allMatch(Dimension::isIndexed)) { + buffer.next(); readTensorValues(buffer, builder); } - else if (TENSOR_BLOCKS.equals(buffer.currentName())) { + else if (TENSOR_BLOCKS.equals(next.name)) { + buffer.next(); readTensorBlocks(buffer, builder); } - else if (TENSOR_TYPE.equals(buffer.currentName()) && buffer.current() == JsonToken.VALUE_STRING) { + else if (TENSOR_TYPE.equals(next.name) && next.token == JsonToken.VALUE_STRING) { + buffer.next(); // Ignore input tensor type } + else if (buffer.nesting() == initNesting && JsonToken.END_OBJECT == next.token) { + buffer.next(); + break; + } else { - buffer.previous(); // Back up to the start of the enclosing block readDirectTensorValue(buffer, builder); - buffer.previous(); // ... and back up to the end of the enclosing block + break; } } expectOneOf(buffer.current(), JsonToken.END_OBJECT, JsonToken.END_ARRAY); tensorFieldValue.assign(builder.build()); } - static boolean primitiveContent(TokenBuffer buffer) { - JsonToken cellsValue = buffer.current(); - if (cellsValue.isScalarValue()) return true; - if (cellsValue == JsonToken.START_ARRAY) { - JsonToken firstArrayValue = buffer.peek(1); - if (firstArrayValue == JsonToken.END_ARRAY) return false; - if (firstArrayValue.isScalarValue()) return true; + static boolean primitiveContent(JsonToken current, JsonToken next) { + if (current.isScalarValue()) return true; + if (current == JsonToken.START_ARRAY) { + if (next == JsonToken.END_ARRAY) return false; + if (next.isScalarValue()) return true; } return false; } @@ -186,7 +195,7 @@ public class TensorReader { boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed); boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped); - if (isArrayOfObjects(buffer, 0)) + if (isArrayOfObjects(buffer)) readTensorCells(buffer, builder); else if ( ! hasMapped) readTensorValues(buffer, builder); @@ -196,10 +205,12 @@ public class TensorReader { readTensorCells(buffer, builder); } - private static boolean isArrayOfObjects(TokenBuffer buffer, int ahead) { - if (buffer.peek(ahead++) != JsonToken.START_ARRAY) return false; - if (buffer.peek(ahead) == JsonToken.START_ARRAY) return isArrayOfObjects(buffer, ahead); // nested array - return buffer.peek(ahead) == JsonToken.START_OBJECT; + private static boolean isArrayOfObjects(TokenBuffer buffer) { + if (buffer.current() != JsonToken.START_ARRAY) return false; + Supplier<Token> lookahead = buffer.lookahead(); + Token next; + while ((next = lookahead.get()).token == JsonToken.START_ARRAY) { } + return next.token == JsonToken.START_OBJECT; } private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) { diff --git a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java index 113b8732b23..067dabdbdab 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/VespaJsonDocumentReader.java @@ -230,7 +230,7 @@ public class VespaJsonDocumentReader { private static boolean isFieldPath(String field) { - return field.matches("^.*?[.\\[\\{].*$"); + return field.matches("^.*?[.\\[{].*$"); } private static void verifyEndState(TokenBuffer buffer, JsonToken expectedFinalToken) { @@ -238,7 +238,7 @@ public class VespaJsonDocumentReader { "Expected end of JSON struct (%s), got %s", expectedFinalToken, buffer.current()); Preconditions.checkState(buffer.nesting() == 0, "Nesting not zero at end of operation"); Preconditions.checkState(buffer.next() == null, "Dangling data at end of operation"); - Preconditions.checkState(buffer.remaining() == 0, "Dangling data at end of operation"); + Preconditions.checkState(buffer.isEmpty(), "Dangling data at end of operation"); } } diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java index 5a9f02c790d..aa043a25d78 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java @@ -20,6 +20,7 @@ import com.yahoo.document.MapDataType; import com.yahoo.document.PositionDataType; import com.yahoo.document.StructDataType; import com.yahoo.document.TensorDataType; +import com.yahoo.document.TestAndSetCondition; import com.yahoo.document.WeightedSetDataType; import com.yahoo.document.datatypes.Array; import com.yahoo.document.datatypes.BoolFieldValue; @@ -221,6 +222,65 @@ public class JsonReaderTestCase { } @Test + public void readDocumentWithMissingFieldsField() { + assertEquals("document is missing the required \"fields\" field", + assertThrows(IllegalArgumentException.class, + () -> createReader("{ }").readSingleDocumentStreaming(DocumentOperationType.PUT, + "id:unittest:testnull::whee")) + .getMessage()); + } + + @Test + public void readSingleDocumentsPutStreaming() throws IOException { + String json = """ + { + "remove": "id:unittest:smoke::ignored", + "ignored-extra-array": [{ "foo": null }, { }], + "ignored-extra-object": { "foo": [null, { }], "bar": { } }, + "fields": { + "something": "smoketest", + "flag": true, + "nalle": "bamse" + }, + "id": "id:unittest:smoke::ignored", + "create": false, + "condition": "true" + } + """; + ParsedDocumentOperation operation = createReader(json).readSingleDocumentStreaming(DocumentOperationType.PUT,"id:unittest:smoke::doc1"); + DocumentPut put = ((DocumentPut) operation.operation()); + assertFalse(put.getCreateIfNonExistent()); + assertEquals("true", put.getCondition().getSelection()); + smokeTestDoc(put.getDocument()); + } + + @Test + public void readSingleDocumentsUpdateStreaming() throws IOException { + String json = """ + { + "remove": "id:unittest:smoke::ignored", + "ignored-extra-array": [{ "foo": null }, { }], + "ignored-extra-object": { "foo": [null, { }], "bar": { } }, + "fields": { + "something": { "assign": "smoketest" }, + "flag": { "assign": true }, + "nalle": { "assign": "bamse" } + }, + "id": "id:unittest:smoke::ignored", + "create": true, + "condition": "false" + } + """; + ParsedDocumentOperation operation = createReader(json).readSingleDocumentStreaming(DocumentOperationType.UPDATE,"id:unittest:smoke::doc1"); + Document doc = new Document(types.getDocumentType("smoke"), new DocumentId("id:unittest:smoke::doc1")); + DocumentUpdate update = ((DocumentUpdate) operation.operation()); + update.applyTo(doc); + smokeTestDoc(doc); + assertTrue(update.getCreateIfNonExistent()); + assertEquals("false", update.getCondition().getSelection()); + } + + @Test public void readSingleDocumentPut() throws IOException { Document doc = docFromJson(""" { @@ -2120,69 +2180,93 @@ public class JsonReaderTestCase { @Test public void tensor_modify_update_with_replace_operation() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.REPLACE, "sparse_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_add_operation() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.ADD, "sparse_tensor", - inputJson("{", - " 'operation': 'add',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_multiply_operation() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.MULTIPLY, "sparse_tensor", - inputJson("{", - " 'operation': 'multiply',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "multiply", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_create_non_existing_cells_true() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.ADD, true, "sparse_tensor", - inputJson("{", - " 'operation': 'add',", - " 'create': true,", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "create": true, + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_create_non_existing_cells_false() { assertTensorModifyUpdate("{{x:a,y:b}:2.0}", TensorModifyUpdate.Operation.ADD, false, "sparse_tensor", - inputJson("{", - " 'operation': 'add',", - " 'create': false,", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "create": false, + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_treats_the_input_tensor_as_sparse() { // Note that the type of the tensor in the modify update is sparse (it only has mapped dimensions). assertTensorModifyUpdate("tensor(x{},y{}):{{x:0,y:0}:2.0, {x:1,y:2}:3.0}", - TensorModifyUpdate.Operation.REPLACE, "dense_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '0' }, 'value': 2.0 },", - " { 'address': { 'x': '1', 'y': '2' }, 'value': 3.0 } ]}")); + TensorModifyUpdate.Operation.REPLACE, "dense_tensor", + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "0" }, "value": 2.0 }, + { "address": { "x": "1", "y": "2" }, "value": 3.0 } + ] + }"""); } @Test public void tensor_modify_update_on_non_tensor_field_throws() { try { - JsonReader reader = createReader(inputJson("{ 'update': 'id:unittest:smoke::doc1',", - " 'fields': {", - " 'something': {", - " 'modify': {} }}}")); + JsonReader reader = createReader(""" + { + "update": "id:unittest:smoke::doc1", + "fields": { + "something": { + "modify": {} + } + } + } + """); reader.readSingleDocument(DocumentOperationType.UPDATE, "id:unittest:smoke::doc1"); fail("Expected exception"); } @@ -2196,95 +2280,125 @@ public class JsonReaderTestCase { public void tensor_modify_update_on_dense_unbound_tensor_throws() { illegalTensorModifyUpdate("Error in 'dense_unbound_tensor': A modify update cannot be applied to tensor types with indexed unbound dimensions. Field 'dense_unbound_tensor' has unsupported tensor type 'tensor(x[],y[])'", "dense_unbound_tensor", - "{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '0' }, 'value': 2.0 } ]}"); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_on_sparse_tensor_with_single_dimension_short_form() { - assertTensorModifyUpdate("{{x:a}:2.0, {x:c}: 3.0}", TensorModifyUpdate.Operation.REPLACE, "sparse_single_dimension_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': {", - " 'a': 2.0,", - " 'c': 3.0 }}")); + assertTensorModifyUpdate("{{x:a}:2.0, {x:c}: 3.0}", TensorModifyUpdate.Operation.REPLACE, "sparse_single_dimension_tensor", + """ + { + "operation": "replace", + "cells": { + "a": 2.0, + "c": 3.0 + } + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed() { assertTensorModifyUpdate("{{x:a,y:0}:2.0}", TensorModifyUpdate.Operation.REPLACE, "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 } ]}")); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed_block_short_form_array() { assertTensorModifyUpdate("{{x:a,y:0}:1,{x:a,y:1}:2,{x:a,y:2}:3}", TensorModifyUpdate.Operation.REPLACE, "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'blocks': [", - " { 'address': { 'x': 'a' }, 'values': [1,2,3] } ]}")); + """ + { + "operation": "replace", + "blocks": [ + { "address": { "x": "a" }, "values": [1,2,3] } + ] + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed_block_short_form_must_specify_full_subspace() { illegalTensorModifyUpdate("Error in 'mixed_tensor': At {x:a}: Expected 3 values, but got 2", - "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'blocks': {", - " 'a': [2,3] } }")); + "mixed_tensor", + """ + { + "operation": "replace", + "blocks": { + "a": [2,3] + } + }"""); } @Test public void tensor_modify_update_with_replace_operation_mixed_block_short_form_map() { assertTensorModifyUpdate("{{x:a,y:0}:1,{x:a,y:1}:2,{x:a,y:2}:3}", TensorModifyUpdate.Operation.REPLACE, "mixed_tensor", - inputJson("{", - " 'operation': 'replace',", - " 'blocks': {", - " 'a': [1,2,3] } }")); + """ + { + "operation": "replace", + "blocks": { + "a": [1,2,3] + } + }"""); } @Test public void tensor_modify_update_with_add_operation_mixed() { assertTensorModifyUpdate("{{x:a,y:0}:2.0}", TensorModifyUpdate.Operation.ADD, "mixed_tensor", - inputJson("{", - " 'operation': 'add',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 } ]}")); + """ + { + "operation": "add", + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_multiply_operation_mixed() { assertTensorModifyUpdate("{{x:a,y:0}:2.0}", TensorModifyUpdate.Operation.MULTIPLY, "mixed_tensor", - inputJson("{", - " 'operation': 'multiply',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 } ]}")); + """ + { + "operation": "multiply", + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_out_of_bound_cells_throws() { illegalTensorModifyUpdate("Error in 'dense_tensor': Dimension 'y' has label '3' but type is tensor(x[2],y[3])", "dense_tensor", - "{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '3' }, 'value': 2.0 } ]}"); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "3" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_with_out_of_bound_cells_throws_mixed() { illegalTensorModifyUpdate("Error in 'mixed_tensor': Dimension 'y' has label '3' but type is tensor(x{},y[3])", "mixed_tensor", - "{", - " 'operation': 'replace',", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '3' }, 'value': 2.0 } ]}"); + """ + { + "operation": "replace", + "cells": [ + { "address": { "x": "0", "y": "3" }, "value": 2.0 } + ] + }"""); } @@ -2292,87 +2406,113 @@ public class JsonReaderTestCase { public void tensor_modify_update_with_unknown_operation_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Unknown operation 'unknown' in modify update for field 'sparse_tensor'", "sparse_tensor", - "{", - " 'operation': 'unknown',", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 } ]}"); + """ + { + "operation": "unknown", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_modify_update_without_operation_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Modify update for field 'sparse_tensor' does not contain an operation", "sparse_tensor", - "{", - " 'cells': [] }"); + """ + { + "cells": [] + }"""); } @Test public void tensor_modify_update_without_cells_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Modify update for field 'sparse_tensor' does not contain tensor cells", "sparse_tensor", - "{", - " 'operation': 'replace' }"); + """ + { + "operation": "replace" + }"""); } @Test public void tensor_modify_update_with_unknown_content_throws() { illegalTensorModifyUpdate("Error in 'sparse_tensor': Unknown JSON string 'unknown' in modify update for field 'sparse_tensor'", "sparse_tensor", - "{", - " 'unknown': 'here' }"); + """ + { + "unknown": "here" + }"""); } @Test public void tensor_add_update_on_sparse_tensor() { assertTensorAddUpdate("{{x:a,y:b}:2.0, {x:c,y:d}: 3.0}", "sparse_tensor", - inputJson("{", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 },", - " { 'address': { 'x': 'c', 'y': 'd' }, 'value': 3.0 } ]}")); + """ + { + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 }, + { "address": { "x": "c", "y": "d" }, "value": 3.0 } + ] + }"""); } @Test public void tensor_add_update_on_sparse_tensor_with_single_dimension_short_form() { assertTensorAddUpdate("{{x:a}:2.0, {x:c}: 3.0}", "sparse_single_dimension_tensor", - inputJson("{", - " 'cells': {", - " 'a': 2.0,", - " 'c': 3.0 }}")); + """ + { + "cells": { + "a": 2.0, + "c": 3.0 + } + }"""); } @Test public void tensor_add_update_on_mixed_tensor() { assertTensorAddUpdate("{{x:a,y:0}:2.0, {x:a,y:1}:3.0, {x:a,y:2}:0.0}", "mixed_tensor", - inputJson("{", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': '0' }, 'value': 2.0 },", - " { 'address': { 'x': 'a', 'y': '1' }, 'value': 3.0 } ]}")); + """ + { + "cells": [ + { "address": { "x": "a", "y": "0" }, "value": 2.0 }, + { "address": { "x": "a", "y": "1" }, "value": 3.0 } + ] + }"""); } @Test public void tensor_add_update_on_mixed_with_out_of_bound_dense_cells_throws() { illegalTensorAddUpdate("Error in 'mixed_tensor': Index 3 out of bounds for length 3", "mixed_tensor", - "{", - " 'cells': [", - " { 'address': { 'x': '0', 'y': '3' }, 'value': 2.0 } ]}"); + """ + { + "cells": [ + { "address": { "x": "0", "y": "3" }, "value": 2.0 } + ] + }"""); } @Test public void tensor_add_update_on_dense_tensor_throws() { illegalTensorAddUpdate("Error in 'dense_tensor': An add update can only be applied to tensors with at least one sparse dimension. Field 'dense_tensor' has unsupported tensor type 'tensor(x[2],y[3])'", "dense_tensor", - "{", - " 'cells': [] }"); + """ + { + "cells": [ ] + }"""); } @Test public void tensor_add_update_on_not_fully_specified_cell_throws() { illegalTensorAddUpdate("Error in 'sparse_tensor': Missing a label for dimension 'y' for tensor(x{},y{})", "sparse_tensor", - "{", - " 'cells': [", - " { 'address': { 'x': 'a' }, 'value': 2.0 } ]}"); + """ + { + "cells": [ + { "address": { "x": "a" }, "value": 2.0 } + ] + }"""); } @Test @@ -2388,146 +2528,176 @@ public class JsonReaderTestCase { @Test public void tensor_remove_update_on_sparse_tensor() { assertTensorRemoveUpdate("{{x:a,y:b}:1.0,{x:c,y:d}:1.0}", "sparse_tensor", - inputJson("{", - " 'addresses': [", - " { 'x': 'a', 'y': 'b' },", - " { 'x': 'c', 'y': 'd' } ]}")); + """ + { + "addresses": [ + { "x": "a", "y": "b" }, + { "x": "c", "y": "d" } + ] + }"""); } @Test public void tensor_remove_update_on_mixed_tensor() { assertTensorRemoveUpdate("{{x:1}:1.0,{x:2}:1.0}", "mixed_tensor", - inputJson("{", - " 'addresses': [", - " { 'x': '1' },", - " { 'x': '2' } ]}")); + """ + { + "addresses": [ + { "x": "1" }, + { "x": "2" } + ] + }"""); } @Test public void tensor_remove_update_on_sparse_tensor_with_not_fully_specified_address() { assertTensorRemoveUpdate("{{y:b}:1.0,{y:d}:1.0}", "sparse_tensor", - inputJson("{", - " 'addresses': [", - " { 'y': 'b' },", - " { 'y': 'd' } ]}")); + """ + { + "addresses": [ + { "y": "b" }, + { "y": "d" } + ] + }"""); } @Test public void tensor_remove_update_on_mixed_tensor_with_not_fully_specified_address() { assertTensorRemoveUpdate("{{x:1,z:a}:1.0,{x:2,z:b}:1.0}", "mixed_tensor_adv", - inputJson("{", - " 'addresses': [", - " { 'x': '1', 'z': 'a' },", - " { 'x': '2', 'z': 'b' } ]}")); + """ + { + "addresses": [ + { "x": "1", "z": "a" }, + { "x": "2", "z": "b" } + ] + }"""); } @Test public void tensor_remove_update_on_mixed_tensor_with_dense_addresses_throws() { illegalTensorRemoveUpdate("Error in 'mixed_tensor': Indexed dimension address 'y' should not be specified in remove update", "mixed_tensor", - "{", - " 'addresses': [", - " { 'x': '1', 'y': '0' },", - " { 'x': '2', 'y': '0' } ]}"); + """ + { + "addresses": [ + { "x": "1", "y": "0" }, + { "x": "2", "y": "0" } + ] + }"""); } @Test public void tensor_remove_update_on_dense_tensor_throws() { illegalTensorRemoveUpdate("Error in 'dense_tensor': A remove update can only be applied to tensors with at least one sparse dimension. Field 'dense_tensor' has unsupported tensor type 'tensor(x[2],y[3])'", "dense_tensor", - "{", - " 'addresses': [] }"); + """ + { + "addresses": [] + }"""); } @Test public void tensor_remove_update_with_stray_dimension_throws() { illegalTensorRemoveUpdate("Error in 'sparse_tensor': tensor(x{},y{}) does not contain dimension 'foo'", - "sparse_tensor", - "{", - " 'addresses': [", - " { 'x': 'a', 'foo': 'b' } ]}"); + "sparse_tensor", + """ + { + "addresses": [ + { "x": "a", "foo": "b" } + ] + }"""); illegalTensorRemoveUpdate("Error in 'sparse_tensor': tensor(x{}) does not contain dimension 'foo'", - "sparse_tensor", - "{", - " 'addresses': [", - " { 'x': 'c' },", - " { 'x': 'a', 'foo': 'b' } ]}"); + "sparse_tensor", + """ + { + "addresses": [ + { "x": "c" }, + { "x": "a", "foo": "b" } + ] + }"""); } @Test public void tensor_remove_update_without_cells_throws() { illegalTensorRemoveUpdate("Error in 'sparse_tensor': Remove update for field 'sparse_tensor' does not contain tensor addresses", "sparse_tensor", - "{'addresses': [] }"); + """ + { + "addresses": [] + }"""); illegalTensorRemoveUpdate("Error in 'mixed_tensor': Remove update for field 'mixed_tensor' does not contain tensor addresses", "mixed_tensor", - "{'addresses': [] }"); + """ + { + "addresses": [] + }"""); } @Test public void require_that_parser_propagates_datatype_parser_errors_predicate() { assertParserErrorMatches( "Error in document 'id:unittest:testpredicate::0' - could not parse field 'boolean' of type 'predicate': " + - "line 1:10 no viable alternative at character '>'", - - "[", - " {", - " 'fields': {", - " 'boolean': 'timestamp > 9000'", - " },", - " 'put': 'id:unittest:testpredicate::0'", - " }", - "]" - ); + "line 1:10 no viable alternative at character '>'", + """ + [ + { + "fields": { + "boolean": "timestamp > 9000" + }, + "put": "id:unittest:testpredicate::0" + } + ] + """); } @Test public void require_that_parser_propagates_datatype_parser_errors_string_as_int() { assertParserErrorMatches( "Error in document 'id:unittest:testint::0' - could not parse field 'integerfield' of type 'int': " + - "For input string: \" 1\"", - - "[", - " {", - " 'fields': {", - " 'integerfield': ' 1'", - " },", - " 'put': 'id:unittest:testint::0'", - " }", - "]" - ); + "For input string: \" 1\"", + """ + [ + { + "fields": { + "integerfield": " 1" + }, + "put": "id:unittest:testint::0" + } + ] + """); } @Test public void require_that_parser_propagates_datatype_parser_errors_overflowing_int() { assertParserErrorMatches( "Error in document 'id:unittest:testint::0' - could not parse field 'integerfield' of type 'int': " + - "For input string: \"281474976710656\"", - - "[", - " {", - " 'fields': {", - " 'integerfield': 281474976710656", - " },", - " 'put': 'id:unittest:testint::0'", - " }", - "]" - ); + "For input string: \"281474976710656\"", + """ + [ + { + "fields": { + "integerfield": 281474976710656 + }, + "put": "id:unittest:testint::0" + } + ] + """); } @Test public void requireThatUnknownDocTypeThrowsIllegalArgumentException() { - final String jsonData = inputJson( - "[", - " {", - " 'put': 'id:ns:walrus::walrus1',", - " 'fields': {", - " 'aField': 42", - " }", - " }", - "]"); + String jsonData = """ + [ + { + "put": "id:ns:walrus::walrus1", + "fields": { + "aField": 42 + } + } + ] + """; try { new JsonReader(types, jsonToInputStream(jsonData), parserFactory).next(); fail(); @@ -2577,30 +2747,40 @@ public class JsonReaderTestCase { return createPutWithTensor(inputTensor, "sparse_tensor"); } private DocumentPut createPutWithTensor(String inputTensor, String tensorFieldName) { - JsonReader reader = createReader(inputJson("[", - "{ 'put': '" + TENSOR_DOC_ID + "',", - " 'fields': {", - " '" + tensorFieldName + "': " + inputTensor + " }}]")); - return (DocumentPut) reader.next(); + JsonReader streaming = createReader(""" + { + "fields": { + "%s": %s + } + } + """.formatted(tensorFieldName, inputTensor)); + DocumentPut lazyParsed = (DocumentPut) streaming.readSingleDocumentStreaming(DocumentOperationType.PUT, TENSOR_DOC_ID).operation(); + JsonReader reader = createReader(""" + [ + { + "put": "%s", + "fields": { + "%s": %s + } + } + ]""".formatted(TENSOR_DOC_ID, tensorFieldName, inputTensor)); + DocumentPut bufferParsed = (DocumentPut) reader.next(); + assertEquals(lazyParsed, bufferParsed); + return bufferParsed; } private DocumentUpdate createAssignUpdateWithSparseTensor(String inputTensor) { return createAssignUpdateWithTensor(inputTensor, "sparse_tensor"); } private DocumentUpdate createAssignUpdateWithTensor(String inputTensor, String tensorFieldName) { - JsonReader reader = createReader(inputJson("[", - "{ 'update': '" + TENSOR_DOC_ID + "',", - " 'fields': {", - " '" + tensorFieldName + "': {", - " 'assign': " + (inputTensor != null ? inputTensor : "null") + " } } } ]")); - return (DocumentUpdate) reader.next(); + return createTensorUpdate("assign", inputTensor, tensorFieldName); } private static Tensor assertSparseTensorField(String expectedTensor, DocumentPut put) { return assertTensorField(expectedTensor, put, "sparse_tensor"); } private Tensor assertTensorField(String expectedTensor, String fieldName, String inputJson) { - return assertTensorField(expectedTensor, createPutWithTensor(inputJson, fieldName), fieldName); + return assertTensorField(expectedTensor, createPutWithTensor(inputJson(inputJson), fieldName), fieldName); } private static Tensor assertTensorField(String expectedTensor, DocumentPut put, String tensorFieldName) { return assertTensorField(Tensor.from(expectedTensor), put, tensorFieldName); @@ -2673,12 +2853,29 @@ public class JsonReaderTestCase { } private DocumentUpdate createTensorUpdate(String operation, String tensorJson, String tensorFieldName) { - JsonReader reader = createReader(inputJson("[", - "{ 'update': '" + TENSOR_DOC_ID + "',", - " 'fields': {", - " '" + tensorFieldName + "': {", - " '" + operation + "': " + tensorJson + " }}}]")); - return (DocumentUpdate) reader.next(); + JsonReader streaming = createReader(""" + { + "fields": { + "%s": { + "%s": %s + } + } + }""".formatted(tensorFieldName, operation, tensorJson)); + DocumentUpdate lazyParsed = (DocumentUpdate) streaming.readSingleDocumentStreaming(DocumentOperationType.UPDATE, TENSOR_DOC_ID).operation(); + JsonReader reader = createReader(""" + [ + { + "update": "%s", + "fields": { + "%s": { + "%s": %s + } + } + } + ]""".formatted(TENSOR_DOC_ID, tensorFieldName, operation, tensorJson)); + DocumentUpdate bufferParsed = (DocumentUpdate) reader.next(); + assertEquals(lazyParsed, bufferParsed); + return bufferParsed; } private void assertTensorAddUpdate(String expectedTensor, String tensorFieldName, String tensorJson) { diff --git a/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java new file mode 100644 index 00000000000..3ed2ed531c3 --- /dev/null +++ b/document/src/test/java/com/yahoo/document/json/LazyTokenBufferTest.java @@ -0,0 +1,132 @@ +package com.yahoo.document.json; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.yahoo.document.json.TokenBuffer.Token; +import org.junit.Test; + +import java.io.IOException; +import java.util.function.Supplier; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * @author jonmv + */ +public class LazyTokenBufferTest { + + @Test + public void testBuffer() throws IOException { + String json = """ + { + "fields": { + "foo": "bar", + "baz": [1, 2, 3], + "quu": { "qux": null } + } + }"""; + JsonParser parser = new JsonFactory().createParser(json); + parser.nextValue(); + parser.nextValue(); + assertEquals(JsonToken.START_OBJECT, parser.currentToken()); + assertEquals("fields", parser.currentName()); + + // Peeking through the buffer doesn't change nesting. + LazyTokenBuffer buffer = new LazyTokenBuffer(parser); + assertEquals(JsonToken.START_OBJECT, buffer.current()); + assertEquals("fields", buffer.currentName()); + assertEquals(1, buffer.nesting()); + + Supplier<Token> lookahead = buffer.lookahead(); + Token peek = lookahead.get(); + assertEquals(JsonToken.VALUE_STRING, peek.token); + assertEquals("foo", peek.name); + assertEquals("bar", peek.text); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.START_ARRAY, peek.token); + assertEquals("baz", peek.name); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("1", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("2", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NUMBER_INT, peek.token); + assertEquals("3", peek.text); + + peek = lookahead.get(); + assertEquals(JsonToken.END_ARRAY, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.START_OBJECT, peek.token); + assertEquals("quu", peek.name); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.VALUE_NULL, peek.token); + assertEquals("qux", peek.name); + + peek = lookahead.get(); + assertEquals(JsonToken.END_OBJECT, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertEquals(JsonToken.END_OBJECT, peek.token); + assertEquals(1, buffer.nesting()); + + peek = lookahead.get(); + assertNull(peek); + + // Parser is now at the end. + assertEquals(JsonToken.END_OBJECT, parser.nextToken()); + assertNull(parser.nextToken()); + + // Repeat iterating through the buffer, this time advancing it, and see that nesting changes. + assertEquals(JsonToken.VALUE_STRING, buffer.next()); + assertEquals("foo", buffer.currentName()); + assertEquals("bar", buffer.currentText()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.START_ARRAY, buffer.next()); + assertEquals("baz", buffer.currentName()); + assertEquals(2, buffer.nesting()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("1", buffer.currentText()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("2", buffer.currentText()); + + assertEquals(JsonToken.VALUE_NUMBER_INT, buffer.next()); + assertEquals("3", buffer.currentText()); + + assertEquals(JsonToken.END_ARRAY, buffer.next()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.START_OBJECT, buffer.next()); + assertEquals("quu", buffer.currentName()); + assertEquals(2, buffer.nesting()); + + assertEquals(JsonToken.VALUE_NULL, buffer.next()); + assertEquals("qux", buffer.currentName()); + + assertEquals(JsonToken.END_OBJECT, buffer.next()); + assertEquals(1, buffer.nesting()); + + assertEquals(JsonToken.END_OBJECT, buffer.next()); + assertEquals(0, buffer.nesting()); + + assertNull(buffer.next()); + } + +} diff --git a/document/src/vespa/document/repo/configbuilder.cpp b/document/src/vespa/document/repo/configbuilder.cpp index 5f40bde1966..cf563c5c783 100644 --- a/document/src/vespa/document/repo/configbuilder.cpp +++ b/document/src/vespa/document/repo/configbuilder.cpp @@ -19,6 +19,7 @@ DatatypeConfig::DatatypeConfig() { } DatatypeConfig::DatatypeConfig(const DatatypeConfig&) = default; +DatatypeConfig::~DatatypeConfig() = default; DatatypeConfig& DatatypeConfig::operator=(const DatatypeConfig&) = default; void DatatypeConfig::addNestedType(const TypeOrId &t) { diff --git a/document/src/vespa/document/repo/configbuilder.h b/document/src/vespa/document/repo/configbuilder.h index 4ef17425c1b..61924b2b41a 100644 --- a/document/src/vespa/document/repo/configbuilder.h +++ b/document/src/vespa/document/repo/configbuilder.h @@ -17,8 +17,8 @@ struct DatatypeConfig : DocumenttypesConfig::Documenttype::Datatype { std::vector<DatatypeConfig> nested_types; DatatypeConfig(); - DatatypeConfig(const DatatypeConfig&); + ~DatatypeConfig(); DatatypeConfig& operator=(const DatatypeConfig&); DatatypeConfig &setId(int32_t i) { id = i; return *this; } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 8c6126b0897..7a038988302 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -211,7 +211,7 @@ public class Flags { // TODO: Move to a permanent flag public static final UnboundListFlag<String> ALLOWED_ATHENZ_PROXY_IDENTITIES = defineListFlag( "allowed-athenz-proxy-identities", List.of(), String.class, - List.of("bjorncs", "tokle"), "2021-02-10", "2024-02-01", + List.of("bjorncs", "tokle"), "2021-02-10", "2024-04-01", "Allowed Athenz proxy identities", "takes effect at redeployment"); @@ -272,7 +272,7 @@ public class Flags { public static final UnboundBooleanFlag ENABLE_PROXY_PROTOCOL_MIXED_MODE = defineFeatureFlag( "enable-proxy-protocol-mixed-mode", true, - List.of("tokle"), "2022-05-09", "2024-02-01", + List.of("tokle"), "2022-05-09", "2024-04-01", "Enable or disable proxy protocol mixed mode", "Takes effect on redeployment", INSTANCE_ID); @@ -410,7 +410,7 @@ public class Flags { public static final UnboundStringFlag ENDPOINT_CONFIG = defineStringFlag( "endpoint-config", "legacy", - List.of("mpolden", "tokle"), "2023-10-06", "2024-02-01", + List.of("mpolden", "tokle"), "2023-10-06", "2024-06-01", "Set the endpoint config to use for an application. Must be 'legacy', 'combined' or 'generated'. See EndpointConfig for further details", "Takes effect on next deployment through controller", TENANT_ID, APPLICATION, INSTANCE_ID); @@ -428,13 +428,6 @@ public class Flags { "Takes effect immediately", TENANT_ID, CONSOLE_USER_EMAIL); - public static final UnboundBooleanFlag CENTRALIZED_AUTHZ = defineFeatureFlag( - "centralized-authz", true, - List.of("mortent"), "2023-11-27", "2024-02-01", - "Use centralized authorization checks", - "Takes effect immediately", - CONSOLE_USER_EMAIL); - public static final UnboundBooleanFlag RESTART_ON_DEPLOY_WHEN_ONNX_MODEL_CHANGES = defineFeatureFlag( "restart-on-deploy-when-onnx-model-changes", false, List.of("hmusum"), "2023-12-04", "2024-02-01", diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 7bec70c00cb..0e97621f228 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -204,6 +204,7 @@ vespa_define_module( src/tests/queryeval/monitoring_search_iterator src/tests/queryeval/multibitvectoriterator src/tests/queryeval/nearest_neighbor + src/tests/queryeval/or_speed src/tests/queryeval/parallel_weak_and src/tests/queryeval/predicate src/tests/queryeval/profiled_iterator diff --git a/searchlib/src/tests/queryeval/or_speed/CMakeLists.txt b/searchlib/src/tests/queryeval/or_speed/CMakeLists.txt new file mode 100644 index 00000000000..950a3a965be --- /dev/null +++ b/searchlib/src/tests/queryeval/or_speed/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_or_speed_test_app TEST + SOURCES + or_speed_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_or_speed_test_app COMMAND searchlib_or_speed_test_app) diff --git a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp new file mode 100644 index 00000000000..8662281f2ef --- /dev/null +++ b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp @@ -0,0 +1,309 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/unpackinfo.h> +#include <vespa/searchlib/queryeval/multibitvectoriterator.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vector> +#include <random> + +using namespace search; +using namespace vespalib; +using search::queryeval::SearchIterator; +using search::queryeval::OrSearch; +using search::queryeval::UnpackInfo; +using TMD = search::fef::TermFieldMatchData; +using vespalib::make_string_short::fmt; + +double budget = 0.25; +size_t bench_docs = 1000; +constexpr uint32_t default_seed = 5489u; +std::mt19937 gen(default_seed); + +BitVector::UP make_bitvector(size_t size, size_t num_bits) { + EXPECT_GT(size, num_bits); + auto bv = BitVector::create(size); + size_t bits_left = num_bits; + // bit 0 is never set since it is reserved + // all other bits have equal probability to be set + for (size_t i = 1; i < size; ++i) { + std::uniform_int_distribution<size_t> space(0,size-i-1); + if (space(gen) < bits_left) { + bv->setBit(i); + --bits_left; + } + } + bv->invalidateCachedCount(); + EXPECT_EQ(bv->countTrueBits(), num_bits); + return bv; +} + +// simple strict array-based iterator +// This class has 2 uses: +// 1: better performance for few hits compared to bitvector +// 2: not a bitvector, useful when testing multi-bitvector interactions +struct ArrayIterator : SearchIterator { + uint32_t my_offset = 0; + uint32_t my_limit; + std::vector<uint32_t> my_hits; + TMD &my_match_data; + ArrayIterator(const BitVector &bv, TMD &tmd) + : my_limit(bv.size()), my_match_data(tmd) + { + uint32_t next = bv.getStartIndex(); + for (;;) { + next = bv.getNextTrueBit(next); + if (next >= my_limit) { + break; + } + my_hits.push_back(next++); + } + my_match_data.reset(0); + } + void initRange(uint32_t begin, uint32_t end) final { + SearchIterator::initRange(begin, end); + my_offset = 0; + } + void doSeek(uint32_t docid) final { + while (my_offset < my_hits.size() && my_hits[my_offset] < docid) { + ++my_offset; + } + if (my_offset < my_hits.size()) { + setDocId(my_hits[my_offset]); + } else { + setAtEnd(); + } + } + Trinary is_strict() const final { return Trinary::True; } + void doUnpack(uint32_t docId) final { my_match_data.resetOnlyDocId(docId); } +}; + +struct OrSetup { + uint32_t docid_limit; + bool unpack_all = true; + bool unpack_none = true; + std::vector<std::unique_ptr<TMD>> match_data; + std::vector<BitVector::UP> child_hits; + std::vector<bool> use_array; + OrSetup(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {} + size_t per_child(double target, size_t child_cnt) { + size_t result = (docid_limit * target) / child_cnt; + return (result >= docid_limit) ? (docid_limit - 1) : result; + } + bool should_use_array(size_t hits) { + return (docid_limit / hits) >= 32; + } + OrSetup &add(size_t num_hits, bool use_array_in, bool need_unpack) { + match_data.push_back(std::make_unique<TMD>()); + child_hits.push_back(make_bitvector(docid_limit, num_hits)); + use_array.push_back(use_array_in); + if (need_unpack) { + match_data.back()->setNeedNormalFeatures(true); + match_data.back()->setNeedInterleavedFeatures(true); + unpack_none = false; + } else { + match_data.back()->tagAsNotNeeded(); + unpack_all = false; + } + return *this; + } + SearchIterator::UP make_leaf(size_t i) { + if (use_array[i]) { + return std::make_unique<ArrayIterator>(*child_hits[i], *match_data[i]); + } else { + return BitVectorIterator::create(child_hits[i].get(), *match_data[i], true); + } + } + SearchIterator::UP make_or(bool optimize = false) { + assert(!child_hits.empty()); + if (child_hits.size() == 1) { + // use child directly if there is only one + return make_leaf(0); + } + std::vector<SearchIterator::UP> children; + for (size_t i = 0; i < child_hits.size(); ++i) { + children.push_back(make_leaf(i)); + } + UnpackInfo unpack; + if (unpack_all) { + unpack.forceAll(); + } else if (!unpack_none) { + for (size_t i = 0; i < match_data.size(); ++i) { + if (!match_data[i]->isNotNeeded()) { + unpack.add(i); + } + } + } + auto result = OrSearch::create(std::move(children), true, unpack); + if (optimize) { + result = queryeval::MultiBitVectorIteratorBase::optimize(std::move(result)); + } + return result; + } + OrSetup &prepare_bm(size_t child_cnt, size_t hits_per_child) { + for (size_t i = 0; i < child_cnt; ++i) { + add(hits_per_child, should_use_array(hits_per_child), false); + } + return *this; + } + std::pair<size_t,double> bm_search_ms(bool optimized = false) { + auto search_up = make_or(optimized); + SearchIterator &search = *search_up; + size_t hits = 0; + BenchmarkTimer timer(budget); + while (timer.has_budget()) { + timer.before(); + hits = 0; + search.initRange(1, docid_limit); + uint32_t docid = search.seekFirst(1); + while (docid < docid_limit) { + ++hits; + docid = search.seekNext(docid + 1); + // no unpack + } + timer.after(); + } + return std::make_pair(hits, timer.min_time() * 1000.0); + } + void verify_not_match(uint32_t docid) { + for (size_t i = 0; i < match_data.size(); ++i) { + EXPECT_FALSE(child_hits[i]->testBit(docid)); + } + } + void verify_match(uint32_t docid, bool unpacked, bool check_skipped_unpack) { + bool match = false; + for (size_t i = 0; i < match_data.size(); ++i) { + if (child_hits[i]->testBit(docid)) { + match = true; + if (unpacked) { + if (!match_data[i]->isNotNeeded()) { + EXPECT_EQ(match_data[i]->getDocId(), docid) << "unpack was needed"; + } else if (check_skipped_unpack) { + EXPECT_NE(match_data[i]->getDocId(), docid) << "unpack was not needed"; + } + } else { + EXPECT_NE(match_data[i]->getDocId(), docid) << "document was not unpacked"; + } + } else { + EXPECT_NE(match_data[i]->getDocId(), docid) << "document was not a match"; + } + } + EXPECT_TRUE(match); + } + void reset_match_data() { + // this is needed since we re-search the same docid space + // multiple times and may end up finding a result we are not + // unpacking that was unpacked in the last iteration thus + // breaking the "document was not unpacked" test condition. + for (auto &tmd: match_data) { + tmd->resetOnlyDocId(0); + } + } + void verify_seek_unpack(bool check_skipped_unpack = false, bool optimized = false) { + auto search_up = make_or(optimized); + SearchIterator &search = *search_up; + for (size_t unpack_nth: {1, 3}) { + for (size_t skip: {1, 31}) { + uint32_t hits = 0; + uint32_t check_at = 1; + search.initRange(1, docid_limit); + uint32_t docid = search.seekFirst(1); + while (docid < docid_limit) { + for (; check_at < docid; ++check_at) { + verify_not_match(check_at); + } + if (++hits % unpack_nth == 0) { + search.unpack(docid); + verify_match(check_at, true, check_skipped_unpack); + } else { + verify_match(check_at, false, check_skipped_unpack); + } + check_at = docid + skip; + docid = search.seekNext(docid + skip); + } + for (; check_at < docid_limit; ++check_at) { + verify_not_match(check_at); + } + reset_match_data(); + } + } + } + ~OrSetup(); +}; +OrSetup::~OrSetup() = default; + +TEST(OrSpeed, array_iterator_seek_unpack) { + OrSetup setup(100); + setup.add(10, true, true); + setup.verify_seek_unpack(); +} + +TEST(OrSpeed, or_seek_unpack) { + for (bool optimize: {false, true}) { + for (double target: {0.1, 0.5, 1.0, 10.0}) { + for (int unpack: {0,1,2}) { + OrSetup setup(1000); + size_t part = setup.per_child(target, 13); + SCOPED_TRACE(fmt("optimize: %s, part: %zu, unpack: %d", + optimize ? "true" : "false", part, unpack)); + for (size_t i = 0; i < 13; ++i) { + bool use_array = (i/2)%2 == 0; + bool need_unpack = unpack > 0; + if (unpack == 2 && i % 2 == 0) { + need_unpack = false; + } + setup.add(part, use_array, need_unpack); + } + setup.verify_seek_unpack(true, optimize); + } + } + } +} + +TEST(OrSpeed, bm_array_vs_bitvector) { + for (size_t one_of: {16, 32, 64}) { + double target = 1.0 / one_of; + size_t hits = target * bench_docs; + OrSetup setup(bench_docs); + setup.add(hits, false, false); + for (bool use_array: {false, true}) { + setup.use_array[0] = use_array; + auto result = setup.bm_search_ms(); + fprintf(stderr, "LEAF(%s): (one of %4zu) hits: %8zu, time: %10.3f ms, time per hits: %10.3f ns\n", use_array + ? " array" + : "bitvector", one_of, result.first, result.second, (result.second * 1000.0 * 1000.0) / result.first); + } + } +} + +TEST(OrSpeed, bm_strict_or) { + for (double target: {0.001, 0.01, 0.1, 1.0, 10.0}) { + for (size_t child_cnt: {5, 10, 100, 1000}) { + OrSetup setup(bench_docs); + size_t part = setup.per_child(target, child_cnt); + if (part > 0) { + auto result = setup.prepare_bm(child_cnt, part).bm_search_ms(); + fprintf(stderr, "OR bench(children: %4zu, hits_per_child: %8zu): total_hits: %8zu, time: %10.3f ms, time per hits: %10.3f ns\n", + child_cnt, part, result.first, result.second, (result.second * 1000.0 * 1000.0) / result.first); + } + } + } +} + +int main(int argc, char **argv) { + if (argc > 1 && (argv[1] == std::string("bench"))) { + budget = 5.0; + bench_docs = 10'000'000; + fprintf(stderr, "running in benchmarking mode\n"); + ++argv; + --argc; + } + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp index 4e6e565022a..5cc299983f0 100644 --- a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp +++ b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp @@ -5,6 +5,7 @@ #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/text/utf8.h> +#include <algorithm> using search::FoldedStringCompare; using vespalib::LowerCase; diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp index 868c0013dd5..60129a9e577 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp @@ -4,6 +4,7 @@ #include "i_enum_store.h" #include "i_enum_store_dictionary.h" #include <vespa/vespalib/util/array.hpp> +#include <algorithm> namespace search::enumstore { diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp index 0bbdf89bab7..2b25aa29747 100644 --- a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp @@ -8,7 +8,6 @@ using search::fef::TermFieldMatchData; using std::unique_ptr; -using std::transform; using std::vector; using vespalib::ObjectVisitor; diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java index 5c63b07dcc0..5ff7b4592a1 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java @@ -1058,7 +1058,7 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { private ParsedDocumentOperation parse(InputStream inputStream, String docId, DocumentOperationType operation) { try { - return new JsonReader(manager, inputStream, jsonFactory).readSingleDocument(operation, docId); + return new JsonReader(manager, inputStream, jsonFactory).readSingleDocumentStreaming(operation, docId); } catch (IllegalArgumentException e) { incrementMetricParseError(); throw e; diff --git a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java index c8fcb4c4635..04639db4dac 100644 --- a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java +++ b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java @@ -411,6 +411,7 @@ public class DocumentV1ApiTest { DocumentUpdate expectedUpdate = new DocumentUpdate(doc3.getDataType(), doc3.getId()); expectedUpdate.addFieldUpdate(FieldUpdate.createAssign(doc3.getField("artist"), new StringFieldValue("Lisa Ekdahl"))); expectedUpdate.setCondition(new TestAndSetCondition("true")); + expectedUpdate.setCreateIfNonExistent(true); assertEquals(expectedUpdate, update); parameters.responseHandler().get().handleResponse(new UpdateResponse(0, false)); assertEquals(parameters().withRoute("content"), parameters); @@ -419,10 +420,16 @@ public class DocumentV1ApiTest { response = driver.sendRequest("http://localhost/document/v1/space/music/docid?selection=true&cluster=content&timeChunk=10", PUT, """ { + "extra-ignored-field": { "foo": [{ }], "bar": null }, + "another-ignored-field": [{ "foo": [{ }] }], + "remove": "id:ns:type::ignored", + "put": "id:ns:type::ignored", "fields": { "artist": { "assign": "Lisa Ekdahl" }, "nonexisting": { "assign": "Ignored" } - } + }, + "post": "id:ns:type::ignored", + "create": true }"""); assertSameJson(""" { @@ -778,7 +785,7 @@ public class DocumentV1ApiTest { response = driver.sendRequest("http://localhost/document/v1/space/music/number/1/two?condition=test%20it", POST, ""); assertSameJson("{" + " \"pathId\": \"/document/v1/space/music/number/1/two\"," + - " \"message\": \"Could not read document, no document?\"" + + " \"message\": \"expected start of root object, got null\"" + "}", response.readAll()); assertEquals(400, response.getStatus()); @@ -791,7 +798,8 @@ public class DocumentV1ApiTest { "}"); Inspector responseRoot = SlimeUtils.jsonToSlime(response.readAll()).get(); assertEquals("/document/v1/space/music/number/1/two", responseRoot.field("pathId").asString()); - assertTrue(responseRoot.field("message").asString().startsWith("Unexpected character ('â”»' (code 9531 / 0x253b)): was expecting double-quote to start field name")); + assertTrue(responseRoot.field("message").asString(), + responseRoot.field("message").asString().startsWith("failed parsing document: Unexpected character ('â”»' (code 9531 / 0x253b)): was expecting double-quote to start field name")); assertEquals(400, response.getStatus()); // PUT on a unknown document type is a 400 diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index 1f44d90f924..452b2dc0da9 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -1266,7 +1266,7 @@ "public static com.yahoo.tensor.Tensor from(double)" ], "fields" : [ - "public static final int INVALID_INDEX" + "public static final int invalidIndex" ] }, "com.yahoo.tensor.TensorAddress$Builder" : { @@ -1324,7 +1324,7 @@ "public static java.lang.String labelToString(java.lang.String)", "public com.yahoo.tensor.TensorAddress partialCopy(int[])", "public com.yahoo.tensor.TensorAddress fullAddressOf(java.util.List, int[])", - "public com.yahoo.tensor.TensorAddress sparsePartialAddress(com.yahoo.tensor.TensorType, java.util.List)", + "public com.yahoo.tensor.TensorAddress mappedPartialAddress(com.yahoo.tensor.TensorType, java.util.List)", "public bridge synthetic int compareTo(java.lang.Object)" ], "fields" : [ ] diff --git a/vespajlib/src/main/java/com/yahoo/tensor/DirectIndexedAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/DirectIndexedAddress.java index 37752361876..4379d50520c 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/DirectIndexedAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/DirectIndexedAddress.java @@ -7,19 +7,25 @@ package com.yahoo.tensor; * long stride = addr.getStride(dimension) * i = 0...size_of_dimension * double value = tensor.get(base + i * stride); + * + * @author baldersheim */ public final class DirectIndexedAddress { + private final DimensionSizes sizes; - private final int [] indexes; + private final int[] indexes; private long directIndex; + private DirectIndexedAddress(DimensionSizes sizes) { this.sizes = sizes; indexes = new int[sizes.dimensions()]; directIndex = 0; } + static DirectIndexedAddress of(DimensionSizes sizes) { return new DirectIndexedAddress(sizes); } + /** Sets the current index of a dimension */ public void setIndex(int dimension, int index) { if (index < 0 || index >= sizes.size(dimension)) { @@ -29,10 +35,13 @@ public final class DirectIndexedAddress { directIndex += getStride(dimension) * diff; indexes[dimension] = index; } + /** Retrieve the index that can be used for direct lookup in an indexed tensor. */ public long getDirectIndex() { return directIndex; } + /** returns the stride to be used for the given dimension */ public long getStride(int dimension) { return sizes.productOfDimensionsAfter(dimension); } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java index 085f9172095..53f50fc4d02 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedDoubleTensor.java @@ -78,9 +78,6 @@ class IndexedDoubleTensor extends IndexedTensor { @Override public Builder cell(TensorAddress address, double value) { - if (address == null) { - return null; - } values[(int)toValueIndex(address, sizes(), type)] = value; return this; } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java index a428524612b..fc0473c635a 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/IndexedTensor.java @@ -172,7 +172,7 @@ public abstract class IndexedTensor implements Tensor { static long toValueIndex(TensorAddress address, DimensionSizes sizes, TensorType type) { long valueIndex = 0; - for (int i = 0, sz = address.size(); i < sz; i++) { + for (int i = 0, size = address.size(); i < size; i++) { long label = address.numericLabel(i); if (label >= sizes.size(i)) throw new IllegalArgumentException(address + " is not within the bounds of " + type); @@ -1058,7 +1058,7 @@ public abstract class IndexedTensor implements Tensor { /** In this case we can reuse the source index computation for the iteration index */ private final static class EqualSizeMultiDimensionIndexes extends MultiDimensionIndexes { - private long lastComputedSourceValueIndex = Tensor.INVALID_INDEX; + private long lastComputedSourceValueIndex = Tensor.invalidIndex; private EqualSizeMultiDimensionIndexes(DimensionSizes sizes, List<Integer> iterateDimensions, long[] initialIndexes, long size) { super(sizes, sizes, iterateDimensions, initialIndexes, size); diff --git a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java index d4469f447cb..65c6677e7e3 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java @@ -107,27 +107,30 @@ public class MixedTensor implements Tensor { @Override public Iterator<Cell> cellIterator() { return new Iterator<>() { + final Iterator<DenseSubspace> blockIterator = index.denseSubspaces.iterator(); - DenseSubspace currBlock = null; final int[] labels = new int[index.indexedDimensions.size()]; + DenseSubspace currentBlock = null; int currOffset = index.denseSubspaceSize; int prevOffset = -1; + @Override public boolean hasNext() { return (currOffset < index.denseSubspaceSize || blockIterator.hasNext()); } + @Override public Cell next() { if (currOffset == index.denseSubspaceSize) { - currBlock = blockIterator.next(); + currentBlock = blockIterator.next(); currOffset = 0; } if (currOffset != prevOffset) { // Optimization for index.denseSubspaceSize == 1 index.denseOffsetToAddress(currOffset, labels); } - TensorAddress fullAddr = currBlock.sparseAddress.fullAddressOf(index.type.dimensions(), labels); + TensorAddress fullAddr = currentBlock.sparseAddress.fullAddressOf(index.type.dimensions(), labels); prevOffset = currOffset; - double value = currBlock.cells[currOffset++]; + double value = currentBlock.cells[currOffset++]; return new Cell(fullAddr, value); } }; @@ -140,20 +143,23 @@ public class MixedTensor implements Tensor { @Override public Iterator<Double> valueIterator() { return new Iterator<>() { + final Iterator<DenseSubspace> blockIterator = index.denseSubspaces.iterator(); - double[] currBlock = null; + double[] currentBlock = null; int currOffset = index.denseSubspaceSize; + @Override public boolean hasNext() { return (currOffset < index.denseSubspaceSize || blockIterator.hasNext()); } + @Override public Double next() { if (currOffset == index.denseSubspaceSize) { - currBlock = blockIterator.next().cells; + currentBlock = blockIterator.next().cells; currOffset = 0; } - return currBlock[currOffset++]; + return currentBlock[currOffset++]; } }; } @@ -319,7 +325,7 @@ public class MixedTensor implements Tensor { @Override public Tensor.Builder cell(TensorAddress address, double value) { - TensorAddress sparsePart = address.sparsePartialAddress(index.sparseType, index.type.dimensions()); + TensorAddress sparsePart = address.mappedPartialAddress(index.sparseType, index.type.dimensions()); int denseOffset = index.denseOffsetOf(address); double[] denseSubspace = denseSubspace(sparsePart); denseSubspace[denseOffset] = value; @@ -438,7 +444,7 @@ public class MixedTensor implements Tensor { private final TensorType denseType; private final List<TensorType.Dimension> mappedDimensions; private final List<TensorType.Dimension> indexedDimensions; - private final int [] indexedDimensionsSize; + private final int[] indexedDimensionsSize; private ImmutableMap<TensorAddress, Integer> sparseMap; private List<DenseSubspace> denseSubspaces; @@ -473,7 +479,7 @@ public class MixedTensor implements Tensor { } private DenseSubspace blockOf(TensorAddress address) { - TensorAddress sparsePart = address.sparsePartialAddress(sparseType, type.dimensions()); + TensorAddress sparsePart = address.mappedPartialAddress(sparseType, type.dimensions()); Integer blockNum = sparseMap.get(sparsePart); if (blockNum == null || blockNum >= denseSubspaces.size()) { return null; diff --git a/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java index da643d8c173..8852bcd1ff3 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java @@ -4,13 +4,13 @@ package com.yahoo.tensor; import com.yahoo.tensor.impl.Label; /** - * An address to a subset of a tensors' cells, specifying a label for some but not necessarily all of the tensors + * An address to a subset of a tensors' cells, specifying a label for some, but not necessarily all, of the tensors * dimensions. * * @author bratseth */ // Implementation notes: -// - These are created in inner (though not inner-most) loops so they are implemented with minimal allocation. +// - These are created in inner (though not innermost) loops, so they are implemented with minimal allocation. // We also avoid non-essential error checking. // - We can add support for string labels later without breaking the API public class PartialAddress { @@ -36,7 +36,7 @@ public class PartialAddress { for (int i = 0; i < dimensionNames.length; i++) if (dimensionNames[i].equals(dimensionName)) return labels[i]; - return Tensor.INVALID_INDEX; + return Tensor.invalidIndex; } /** Returns the label of this dimension, or null if no label is specified for it */ @@ -68,7 +68,7 @@ public class PartialAddress { long[] numericLabels = new long[labels.length]; for (int i = 0; i < type.dimensions().size(); i++) { long label = numericLabel(type.dimensions().get(i).name()); - if (label == Tensor.INVALID_INDEX) + if (label == Tensor.invalidIndex) throw new IllegalArgumentException(type + " dimension names does not match " + this); numericLabels[i] = label; } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java index d650b88f202..ac9dc4e4eca 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/Tensor.java @@ -40,7 +40,7 @@ import static com.yahoo.tensor.functions.ScalarFunctions.Hamming; * A multidimensional array which can be used in computations. * <p> * A tensor consists of a set of <i>dimension</i> names and a set of <i>cells</i> containing scalar <i>values</i>. - * Each cell is is identified by its <i>address</i>, which consists of a set of dimension-label pairs which defines + * Each cell is identified by its <i>address</i>, which consists of a set of dimension-label pairs which defines * the location of that cell. Both dimensions and labels are string on the form of an identifier or integer. * <p> * The size of the set of dimensions of a tensor is called its <i>rank</i>. @@ -55,7 +55,9 @@ import static com.yahoo.tensor.functions.ScalarFunctions.Hamming; * @author bratseth */ public interface Tensor { - int INVALID_INDEX = -1; + + /** The constant signaling a nonexisting value in operations addressing tensor values by index. */ + int invalidIndex = -1; // ----------------- Accessors @@ -65,25 +67,24 @@ public interface Tensor { default boolean isEmpty() { return size() == 0; } /** - * Returns the number of cells in this. - * Allows for very large tensors, but if you only handle size in the int range - * prefer sizeAsInt(). - **/ + * Returns the number of cells in this, allowing for very large tensors. + * Prefer sizeAsInt in implementations that cannot handle sizes outside the int range. + */ default long size() { return sizeAsInt(); } /** - * Safe way to get size as an int and detect when not possible. - * Prefer this over size() as - * @return size() as an int + * Returns the size of this as an int or throws an exception if it is too large to fit in an int. + * Prefer this over size() with implementations that only handle sizes in the int range. + * + * @throws IndexOutOfBoundsException if the size is too large to fit in an int */ default int sizeAsInt() { - long sz = size(); - if (sz > Integer.MAX_VALUE) { - throw new IndexOutOfBoundsException("size = " + sz + ", which is too large to fit in an int"); - } - return (int) sz; + long size = size(); + if (size > Integer.MAX_VALUE) + throw new IndexOutOfBoundsException("size = " + size + ", which is too large to fit in an int"); + return (int) size; } /** Returns the value of a cell, or 0.0 if this cell does not exist */ @@ -91,7 +92,8 @@ public interface Tensor { /** Returns true if this cell exists */ boolean has(TensorAddress address); - /** null = no value present. More efficient that if (t.has(key)) t.get(key) */ + + /** Returns the value at this address, or null of it does not exist. */ Double getAsDouble(TensorAddress address); /** @@ -132,7 +134,7 @@ public interface Tensor { /** * Returns a new tensor where existing cells in this tensor have been * modified according to the given operation and cells in the given map. - * Cells in the map outside of existing cells are thus ignored. + * Cells in the map outside existing cells are thus ignored. * * @param op the modifying function * @param cells the cells to modify @@ -151,9 +153,9 @@ public interface Tensor { /** * Returns a new tensor where existing cells in this tensor have been - * removed according to the given set of addresses. Only valid for sparse + * removed according to the given set of addresses. Only valid for mapped * or mixed tensors. For mixed tensors, addresses are assumed to only - * contain the sparse dimensions, as the entire dense subspace is removed. + * contain the mapped dimensions, as the entire indexed subspace is removed. * * @param addresses list of addresses to remove * @return a new tensor where cells have been removed @@ -503,11 +505,10 @@ public interface Tensor { public TensorAddress getKey() { return address; } /** - * Returns the direct index which can be used to locate this cell, or -1 if not available. - * This is for optimizations mapping between tensors where this is possible without creating a - * TensorAddress. + * Returns the direct index which can be used to locate this cell, or Tensor.invalidIndex if not available. + * This is for optimizations mapping between tensors where this is possible without creating a TensorAddress. */ - long getDirectIndex() { return INVALID_INDEX; } + long getDirectIndex() { return invalidIndex; } /** Returns the value as a double */ @Override diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java index 59a5e2a49b1..5c2c4d77fad 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java @@ -32,9 +32,7 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return TensorAddressAny.of(labels); } - /** - * Returns the number of labels in this - */ + /** Returns the number of labels in this */ public abstract int size(); /** @@ -69,10 +67,10 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { @Override public String toString() { StringBuilder sb = new StringBuilder("cell address ("); - int sz = size(); - if (sz > 0) { + int size = size(); + if (size > 0) { sb.append(label(0)); - for (int i = 1; i < sz; i++) { + for (int i = 1; i < size; i++) { sb.append(',').append(label(i)); } } @@ -113,9 +111,9 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return TensorAddressAny.ofUnsafe(labels); } - /** Creates a complete address by taking the sparse dimmensions from this and the indexed from the densePart */ - public TensorAddress fullAddressOf(List<TensorType.Dimension> dimensions, int [] densePart) { - int [] labels = new int[dimensions.size()]; + /** Creates a complete address by taking the mapped dimmensions from this and the indexed from the indexedPart */ + public TensorAddress fullAddressOf(List<TensorType.Dimension> dimensions, int[] densePart) { + int[] labels = new int[dimensions.size()]; int mappedIndex = 0; int indexedIndex = 0; for (int i = 0; i < labels.length; i++) { @@ -131,11 +129,17 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return TensorAddressAny.ofUnsafe(labels); } - /** Extracts the sparse(non-indexed) dimensions of the address */ - public TensorAddress sparsePartialAddress(TensorType sparseType, List<TensorType.Dimension> dimensions) { + /** + * Returns an address containing the mapped dimensions of this. + * + * @param mappedType the type of the mapped subset of the type this is an address of; + * which is also the type of the returned address + * @param dimensions all the dimensions of the type this is an address of + */ + public TensorAddress mappedPartialAddress(TensorType mappedType, List<TensorType.Dimension> dimensions) { if (dimensions.size() != size()) throw new IllegalArgumentException("Tensor type of " + this + " is not the same size as " + this); - TensorAddress.Builder builder = new TensorAddress.Builder(sparseType); + TensorAddress.Builder builder = new TensorAddress.Builder(mappedType); for (int i = 0; i < dimensions.size(); ++i) { TensorType.Dimension dimension = dimensions.get(i); if ( ! dimension.isIndexed()) @@ -150,9 +154,9 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { final TensorType type; final int[] labels; - private static int [] createEmptyLabels(int size) { - int [] labels = new int[size]; - Arrays.fill(labels, Tensor.INVALID_INDEX); + private static int[] createEmptyLabels(int size) { + int[] labels = new int[size]; + Arrays.fill(labels, Tensor.invalidIndex); return labels; } @@ -174,7 +178,7 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { var mappedSubtype = type.mappedSubtype(); if (mappedSubtype.rank() != 1) throw new IllegalArgumentException("Cannot add a label without explicit dimension to a tensor of type " + - type + ": Must have exactly one sparse dimension"); + type + ": Must have exactly one mapped dimension"); add(mappedSubtype.dimensions().get(0).name(), label); return this; } @@ -212,7 +216,7 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { void validate() { for (int i = 0; i < labels.length; i++) - if (labels[i] == Tensor.INVALID_INDEX) + if (labels[i] == Tensor.invalidIndex) throw new IllegalArgumentException("Missing a label for dimension '" + type.dimensions().get(i).name() + "' for " + type); } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java index 62ed4ad683c..6b81d023a9a 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java @@ -204,7 +204,7 @@ public class TensorType { for (int i = 0; i < dimensions.size(); i++) if (dimensions.get(i).name().equals(dimension)) return i; - return Tensor.INVALID_INDEX; + return Tensor.invalidIndex; } /* Returns the bound of this dimension if it is present and bound in this, empty otherwise */ diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java index 37ca7f979a1..dcfba5ecfad 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/Concat.java @@ -172,7 +172,7 @@ public class Concat<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET private TensorAddress combineAddresses(TensorAddress a, int[] aToIndexes, TensorAddress b, int[] bToIndexes, TensorType concatType, long concatOffset, String concatDimension) { long[] combinedLabels = new long[concatType.dimensions().size()]; - Arrays.fill(combinedLabels, Tensor.INVALID_INDEX); + Arrays.fill(combinedLabels, Tensor.invalidIndex); int concatDimensionIndex = concatType.indexOfDimension(concatDimension).get(); mapContent(a, combinedLabels, aToIndexes, concatDimensionIndex, concatOffset); // note: This sets a nonsensical value in the concat dimension boolean compatible = mapContent(b, combinedLabels, bToIndexes, concatDimensionIndex, concatOffset); // ... which is overwritten by the right value here @@ -191,7 +191,7 @@ public class Concat<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET private int[] mapIndexes(TensorType fromType, TensorType toType) { int[] toIndexes = new int[fromType.dimensions().size()]; for (int i = 0; i < fromType.dimensions().size(); i++) - toIndexes[i] = toType.indexOfDimension(fromType.dimensions().get(i).name()).orElse(Tensor.INVALID_INDEX); + toIndexes[i] = toType.indexOfDimension(fromType.dimensions().get(i).name()).orElse(Tensor.invalidIndex); return toIndexes; } @@ -208,7 +208,7 @@ public class Concat<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMET to[toIndex] = from.numericLabel(i) + concatOffset; } else { - if (to[toIndex] != Tensor.INVALID_INDEX && to[toIndex] != from.numericLabel(i)) return false; + if (to[toIndex] != Tensor.invalidIndex && to[toIndex] != from.numericLabel(i)) return false; to[toIndex] = from.numericLabel(i); } } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java index 047d8ee6ef0..14adcc8f18a 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/Join.java @@ -378,7 +378,7 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP private static TensorAddress joinAddresses(TensorAddress a, int[] aToIndexes, TensorAddress b, int[] bToIndexes, TensorType joinedType) { int[] joinedLabels = new int[joinedType.dimensions().size()]; - Arrays.fill(joinedLabels, Tensor.INVALID_INDEX); + Arrays.fill(joinedLabels, Tensor.invalidIndex); mapContent(a, joinedLabels, aToIndexes); boolean compatible = mapContent(b, joinedLabels, bToIndexes); if ( ! compatible) return null; @@ -392,10 +392,10 @@ public class Join<NAMETYPE extends Name> extends PrimitiveTensorFunction<NAMETYP * occupied by a different value */ private static boolean mapContent(TensorAddress from, int[] to, int[] indexMap) { - for (int i = 0, sz = from.size(); i < sz; i++) { + for (int i = 0, size = from.size(); i < size; i++) { int toIndex = indexMap[i]; int label = Convert.safe2Int(from.numericLabel(i)); - if (to[toIndex] != Tensor.INVALID_INDEX && to[toIndex] != label) + if (to[toIndex] != Tensor.invalidIndex && to[toIndex] != label) return false; to[toIndex] = label; } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java index 0ab1454eb58..a09c0223d28 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java @@ -7,16 +7,26 @@ import java.util.Arrays; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +/** + * A label is a value of a mapped dimension of a tensor. + * This class provides a mapping of labels to numbers which allow for more efficient computation with + * mapped tensor dimensions. + * + * @author baldersheim + */ public class Label { - private static final String [] SMALL_INDEXES = createSmallIndexesAsStrings(1000); + + private static final String[] SMALL_INDEXES = createSmallIndexesAsStrings(1000); + private final static Map<String, Integer> string2Enum = new ConcurrentHashMap<>(); + // Index 0 is unused, that is a valid positive number // 1(-1) is reserved for the Tensor.INVALID_INDEX - private static volatile String [] uniqueStrings = {"UNIQUE_UNUSED_MAGIC", "Tensor.INVALID_INDEX"}; + private static volatile String[] uniqueStrings = {"UNIQUE_UNUSED_MAGIC", "Tensor.INVALID_INDEX"}; private static int numUniqeStrings = 2; private static String[] createSmallIndexesAsStrings(int count) { - String [] asStrings = new String[count]; + String[] asStrings = new String[count]; for (int i = 0; i < count; i++) { asStrings[i] = String.valueOf(i); } @@ -46,7 +56,7 @@ public class Label { } public static int toNumber(String s) { - if (s == null) { return Tensor.INVALID_INDEX; } + if (s == null) { return Tensor.invalidIndex; } try { if (validNumericIndex(s)) { return Integer.parseInt(s); @@ -55,14 +65,16 @@ public class Label { } return string2Enum.computeIfAbsent(s, Label::addNewUniqueString); } + public static String fromNumber(int v) { if (v >= 0) { return asNumericString(v); } else { - if (v == Tensor.INVALID_INDEX) { return null; } + if (v == Tensor.invalidIndex) { return null; } return uniqueStrings[-v]; } } + public static String fromNumber(long v) { return fromNumber(Convert.safe2Int(v)); } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny.java index 31863c99a74..2e70811a67c 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny.java @@ -18,6 +18,7 @@ import static com.yahoo.tensor.impl.Label.fromNumber; * @author baldersheim */ abstract public class TensorAddressAny extends TensorAddress { + @Override public String label(int i) { return fromNumber((int)numericLabel(i)); @@ -26,37 +27,47 @@ abstract public class TensorAddressAny extends TensorAddress { public static TensorAddress of() { return TensorAddressEmpty.empty; } + public static TensorAddress of(String label) { return new TensorAddressAny1(toNumber(label)); } + public static TensorAddress of(String label0, String label1) { return new TensorAddressAny2(toNumber(label0), toNumber(label1)); } + public static TensorAddress of(String label0, String label1, String label2) { return new TensorAddressAny3(toNumber(label0), toNumber(label1), toNumber(label2)); } + public static TensorAddress of(String label0, String label1, String label2, String label3) { return new TensorAddressAny4(toNumber(label0), toNumber(label1), toNumber(label2), toNumber(label3)); } - public static TensorAddress of(String [] labels) { - int [] labelsAsInt = new int[labels.length]; + + public static TensorAddress of(String[] labels) { + int[] labelsAsInt = new int[labels.length]; for (int i = 0; i < labels.length; i++) { labelsAsInt[i] = toNumber(labels[i]); } return ofUnsafe(labelsAsInt); } + public static TensorAddress of(int label) { return new TensorAddressAny1(sanitize(label)); } + public static TensorAddress of(int label0, int label1) { return new TensorAddressAny2(sanitize(label0), sanitize(label1)); } + public static TensorAddress of(int label0, int label1, int label2) { return new TensorAddressAny3(sanitize(label0), sanitize(label1), sanitize(label2)); } + public static TensorAddress of(int label0, int label1, int label2, int label3) { return new TensorAddressAny4(sanitize(label0), sanitize(label1), sanitize(label2), sanitize(label3)); } + public static TensorAddress of(int ... labels) { return switch (labels.length) { case 0 -> of(); @@ -72,6 +83,7 @@ abstract public class TensorAddressAny extends TensorAddress { } }; } + public static TensorAddress of(long label) { return of(safe2Int(label)); } @@ -96,7 +108,7 @@ abstract public class TensorAddressAny extends TensorAddress { case 3 -> ofUnsafe(safe2Int(labels[0]), safe2Int(labels[1]), safe2Int(labels[2])); case 4 -> ofUnsafe(safe2Int(labels[0]), safe2Int(labels[1]), safe2Int(labels[2]), safe2Int(labels[3])); default -> { - int [] labelsAsInt = new int[labels.length]; + int[] labelsAsInt = new int[labels.length]; for (int i = 0; i < labels.length; i++) { labelsAsInt[i] = safe2Int(labels[i]); } @@ -108,15 +120,19 @@ abstract public class TensorAddressAny extends TensorAddress { private static TensorAddress ofUnsafe(int label) { return new TensorAddressAny1(label); } + private static TensorAddress ofUnsafe(int label0, int label1) { return new TensorAddressAny2(label0, label1); } + private static TensorAddress ofUnsafe(int label0, int label1, int label2) { return new TensorAddressAny3(label0, label1, label2); } + private static TensorAddress ofUnsafe(int label0, int label1, int label2, int label3) { return new TensorAddressAny4(label0, label1, label2, label3); } + public static TensorAddress ofUnsafe(int ... labels) { return switch (labels.length) { case 0 -> of(); @@ -127,10 +143,12 @@ abstract public class TensorAddressAny extends TensorAddress { default -> new TensorAddressAnyN(labels); }; } + private static int sanitize(int label) { - if (label < Tensor.INVALID_INDEX) { + if (label < Tensor.invalidIndex) { throw new IndexOutOfBoundsException("cell label " + label + " must be positive"); } return label; } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny1.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny1.java index a2b0d318a50..a9be6173781 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny1.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny1.java @@ -5,11 +5,14 @@ package com.yahoo.tensor.impl; import com.yahoo.tensor.TensorAddress; /** - * Single dimension + * A one-dimensional address. + * * @author baldersheim */ final class TensorAddressAny1 extends TensorAddressAny { + private final int label; + TensorAddressAny1(int label) { this.label = label; } @Override public int size() { return 1; } @@ -34,4 +37,5 @@ final class TensorAddressAny1 extends TensorAddressAny { public boolean equals(Object o) { return (o instanceof TensorAddressAny1 any) && (label == any.label); } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny2.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny2.java index d77a689852f..43f65d495cf 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny2.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny2.java @@ -7,11 +7,14 @@ import com.yahoo.tensor.TensorAddress; import static java.lang.Math.abs; /** - * 2 dimensional address + * A two-dimensional address. + * * @author baldersheim */ final class TensorAddressAny2 extends TensorAddressAny { + private final int label0, label1; + TensorAddressAny2(int label0, int label1) { this.label0 = label0; this.label1 = label1; @@ -46,4 +49,5 @@ final class TensorAddressAny2 extends TensorAddressAny { public boolean equals(Object o) { return (o instanceof TensorAddressAny2 any) && (label0 == any.label0) && (label1 == any.label1); } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny3.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny3.java index 95e14bd375c..c22ff47b3c4 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny3.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny3.java @@ -7,11 +7,14 @@ import com.yahoo.tensor.TensorAddress; import static java.lang.Math.abs; /** - * 3 dimensional address + * A three-dimensional address. + * * @author baldersheim */ final class TensorAddressAny3 extends TensorAddressAny { + private final int label0, label1, label2; + TensorAddressAny3(int label0, int label1, int label2) { this.label0 = label0; this.label1 = label1; @@ -54,4 +57,5 @@ final class TensorAddressAny3 extends TensorAddressAny { (label1 == any.label1) && (label2 == any.label2); } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny4.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny4.java index 8a45483340e..6eb6b9216bf 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny4.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAny4.java @@ -7,11 +7,14 @@ import com.yahoo.tensor.TensorAddress; import static java.lang.Math.abs; /** - * 4 dimensional address + * A four-dimensional address. + * * @author baldersheim */ final class TensorAddressAny4 extends TensorAddressAny { + private final int label0, label1, label2, label3; + TensorAddressAny4(int label0, int label1, int label2, int label3) { this.label0 = label0; this.label1 = label1; @@ -59,4 +62,5 @@ final class TensorAddressAny4 extends TensorAddressAny { (label2 == any.label2) && (label3 == any.label3); } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAnyN.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAnyN.java index acd7ed60722..d5bac62bf18 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAnyN.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressAnyN.java @@ -9,22 +9,26 @@ import java.util.Arrays; import static java.lang.Math.abs; /** - * N dimensional address + * An n-dimensional address. + * * @author baldersheim */ final class TensorAddressAnyN extends TensorAddressAny { - private final int [] labels; - TensorAddressAnyN(int [] labels) { + + private final int[] labels; + + TensorAddressAnyN(int[] labels) { if (labels.length < 1) throw new IllegalArgumentException("Need at least 1 label"); this.labels = labels; } @Override public int size() { return labels.length; } + @Override public long numericLabel(int i) { return labels[i]; } @Override public TensorAddress withLabel(int labelIndex, long label) { - int [] copy = Arrays.copyOf(labels, labels.length); + int[] copy = Arrays.copyOf(labels, labels.length); copy[labelIndex] = Convert.safe2Int(label); return new TensorAddressAnyN(copy); } @@ -45,4 +49,5 @@ final class TensorAddressAnyN extends TensorAddressAny { } return true; } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressEmpty.java b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressEmpty.java index 2d9cd3eed78..eb7e62e913b 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressEmpty.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/impl/TensorAddressEmpty.java @@ -5,13 +5,18 @@ package com.yahoo.tensor.impl; import com.yahoo.tensor.TensorAddress; /** - * 0 dimesional/empty address + * A zero-dimensional address. + * * @author baldersheim */ final class TensorAddressEmpty extends TensorAddressAny { + static TensorAddress empty = new TensorAddressEmpty(); + private TensorAddressEmpty() {} + @Override public int size() { return 0; } + @Override public long numericLabel(int i) { throw new IllegalArgumentException("Empty address with no labels"); } @Override @@ -21,6 +26,8 @@ final class TensorAddressEmpty extends TensorAddressAny { @Override public int hashCode() { return 0; } + @Override public boolean equals(Object o) { return o instanceof TensorAddressEmpty; } + } diff --git a/vespajlib/src/test/java/com/yahoo/tensor/impl/TensorAddressAnyTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/impl/TensorAddressAnyTestCase.java index ae13b95052b..18ff1f6a1d3 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/impl/TensorAddressAnyTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/impl/TensorAddressAnyTestCase.java @@ -6,11 +6,15 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.Test; +/** + * @author baldersheim + */ public class TensorAddressAnyTestCase { + @Test void testSize() { for (int i = 0; i < 10; i++) { - int [] indexes = new int [i]; + int[] indexes = new int[i]; assertEquals(i, of(indexes).size()); } } @@ -18,8 +22,8 @@ public class TensorAddressAnyTestCase { @Test void testNumericStringEquality() { for (int i = 0; i < 10; i++) { - int [] numericIndexes = new int [i]; - String [] stringIndexes = new String[i]; + int[] numericIndexes = new int[i]; + String[] stringIndexes = new String[i]; for (int j = 0; j < i; j++) { numericIndexes[j] = j; stringIndexes[j] = String.valueOf(j); diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.hpp index f416f329331..d3349044fd9 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.hpp @@ -3,6 +3,7 @@ #pragma once #include "unique_store_hash_dictionary_read_snapshot.h" +#include <algorithm> namespace vespalib::datastore { diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h index 095da1d7c7c..490582b5bf7 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h +++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h @@ -101,7 +101,7 @@ public: explicit ExplicitLevenshteinDfaImpl(bool is_cased) noexcept : _is_cased(is_cased) {} - ~ExplicitLevenshteinDfaImpl() override = default; + ~ExplicitLevenshteinDfaImpl() override; static constexpr uint8_t max_edits() noexcept { return MaxEdits; } diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp index 5265178cef4..55dd459ff26 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp +++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp @@ -94,6 +94,9 @@ struct ExplicitDfaMatcher { }; template <uint8_t MaxEdits> +ExplicitLevenshteinDfaImpl<MaxEdits>::~ExplicitLevenshteinDfaImpl() = default; + +template <uint8_t MaxEdits> LevenshteinDfa::MatchResult ExplicitLevenshteinDfaImpl<MaxEdits>::match(std::string_view u8str) const { ExplicitDfaMatcher<MaxEdits> matcher(_nodes, _is_cased); diff --git a/vespalib/src/vespa/vespalib/portal/portal.cpp b/vespalib/src/vespa/vespalib/portal/portal.cpp index 8e91e2b5caf..32cc9e4c644 100644 --- a/vespalib/src/vespa/vespalib/portal/portal.cpp +++ b/vespalib/src/vespa/vespalib/portal/portal.cpp @@ -4,6 +4,7 @@ #include "http_connection.h" #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/host_name.h> +#include <algorithm> #include <cassert> namespace vespalib { |