diff options
57 files changed, 1270 insertions, 630 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetrics.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetrics.java index ffbd8d579e5..296b58658b3 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetrics.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/DefaultMetrics.java @@ -48,24 +48,24 @@ public class DefaultMetrics { } private static void addSearchChainMetrics(Set<Metric> metrics) { - metrics.add(new Metric("queries.rate")); - metrics.add(new Metric("query_latency.sum")); - metrics.add(new Metric("query_latency.count")); - metrics.add(new Metric("query_latency.max")); + metrics.add(new Metric(ContainerMetrics.QUERIES.rate())); + metrics.add(new Metric(ContainerMetrics.QUERY_LATENCY.sum())); + metrics.add(new Metric(ContainerMetrics.QUERY_LATENCY.count())); + metrics.add(new Metric(ContainerMetrics.QUERY_LATENCY.max())); + metrics.add(new Metric(ContainerMetrics.QUERY_LATENCY.ninety_five_percentile())); + metrics.add(new Metric(ContainerMetrics.QUERY_LATENCY.ninety_nine_percentile())); + metrics.add(new Metric(ContainerMetrics.HITS_PER_QUERY.sum())); + metrics.add(new Metric(ContainerMetrics.HITS_PER_QUERY.count())); + metrics.add(new Metric(ContainerMetrics.HITS_PER_QUERY.max())); + metrics.add(new Metric(ContainerMetrics.TOTAL_HITS_PER_QUERY.sum())); + metrics.add(new Metric(ContainerMetrics.TOTAL_HITS_PER_QUERY.count())); + metrics.add(new Metric(ContainerMetrics.TOTAL_HITS_PER_QUERY.max())); + metrics.add(new Metric(ContainerMetrics.DEGRADED_QUERIES.rate())); + metrics.add(new Metric(ContainerMetrics.FAILED_QUERIES.rate())); metrics.add(new Metric("query_latency.average")); // TODO: Remove with Vespa 9 - metrics.add(new Metric("query_latency.95percentile")); - metrics.add(new Metric("query_latency.99percentile")); - metrics.add(new Metric("hits_per_query.sum")); - metrics.add(new Metric("hits_per_query.count")); - metrics.add(new Metric("hits_per_query.max")); metrics.add(new Metric("hits_per_query.average")); // TODO: Remove with Vespa 9 - metrics.add(new Metric("totalhits_per_query.sum")); - metrics.add(new Metric("totalhits_per_query.count")); - metrics.add(new Metric("totalhits_per_query.max")); metrics.add(new Metric("totalhits_per_query.average")); // TODO: Remove with Vespa 9 - metrics.add(new Metric("degraded_queries.rate")); - metrics.add(new Metric("failed_queries.rate")); - metrics.add(new Metric("serverActiveThreads.average")); + metrics.add(new Metric("serverActiveThreads.average")); // TODO: Remove on Vespa 9. Use jdisc.thread_pool.active_threads. } private static void addContentMetrics(Set<Metric> metrics) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index 6bbd1b7eef2..d9a23001e90 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -294,75 +294,45 @@ public class VespaMetricSet { private static Set<Metric> getSearchChainMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); - metrics.add(new Metric("peak_qps.max")); - metrics.add(new Metric("search_connections.max")); - metrics.add(new Metric("search_connections.sum")); - metrics.add(new Metric("search_connections.count")); - metrics.add(new Metric("feed.latency.max")); - metrics.add(new Metric("feed.latency.sum")); - metrics.add(new Metric("feed.latency.count")); - metrics.add(new Metric("feed.http-requests.count")); - metrics.add(new Metric("feed.http-requests.rate")); - metrics.add(new Metric("queries.rate")); - metrics.add(new Metric("query_container_latency.max")); - metrics.add(new Metric("query_container_latency.sum")); - metrics.add(new Metric("query_container_latency.count")); - metrics.add(new Metric("query_latency.max")); - metrics.add(new Metric("query_latency.sum")); - metrics.add(new Metric("query_latency.count")); - metrics.add(new Metric("query_latency.95percentile")); - metrics.add(new Metric("query_latency.99percentile")); - metrics.add(new Metric("query_timeout.min")); - metrics.add(new Metric("query_timeout.max")); - metrics.add(new Metric("query_timeout.sum")); - metrics.add(new Metric("query_timeout.count")); - metrics.add(new Metric("query_timeout.95percentile")); - metrics.add(new Metric("query_timeout.99percentile")); - metrics.add(new Metric("failed_queries.rate")); - metrics.add(new Metric("degraded_queries.rate")); - metrics.add(new Metric("hits_per_query.max")); - metrics.add(new Metric("hits_per_query.sum")); - metrics.add(new Metric("hits_per_query.count")); - metrics.add(new Metric("hits_per_query.95percentile")); - metrics.add(new Metric("hits_per_query.99percentile")); - metrics.add(new Metric("query_hit_offset.max")); - metrics.add(new Metric("query_hit_offset.sum")); - metrics.add(new Metric("query_hit_offset.count")); - metrics.add(new Metric("documents_covered.count")); - metrics.add(new Metric("documents_total.count")); - metrics.add(new Metric("documents_target_total.count")); - addMetric(metrics, "jdisc.render.latency", Set.of("min", "max", "count", "sum", "last", "average")); - addMetric(metrics, "query_item_count", Set.of("max", "sum", "count")); - - metrics.add(new Metric("totalhits_per_query.max")); - metrics.add(new Metric("totalhits_per_query.sum")); - metrics.add(new Metric("totalhits_per_query.count")); - metrics.add(new Metric("totalhits_per_query.95percentile")); - metrics.add(new Metric("totalhits_per_query.99percentile")); - metrics.add(new Metric("empty_results.rate")); - metrics.add(new Metric("requestsOverQuota.rate")); - metrics.add(new Metric("requestsOverQuota.count")); - - metrics.add(new Metric("relevance.at_1.sum")); - metrics.add(new Metric("relevance.at_1.count")); - metrics.add(new Metric("relevance.at_3.sum")); - metrics.add(new Metric("relevance.at_3.count")); - metrics.add(new Metric("relevance.at_10.sum")); - metrics.add(new Metric("relevance.at_10.count")); - + addMetric(metrics, ContainerMetrics.PEAK_QPS.max()); + addMetric(metrics, ContainerMetrics.SEARCH_CONNECTIONS.baseName(), Set.of("sum", "count", "max")); + addMetric(metrics, ContainerMetrics.FEED_LATENCY.baseName(), Set.of("sum", "count", "max")); + addMetric(metrics, ContainerMetrics.FEED_HTTP_REQUESTS.baseName(), Set.of("count", "rate")); + addMetric(metrics, ContainerMetrics.QUERIES.rate()); + addMetric(metrics, ContainerMetrics.QUERY_CONTAINER_LATENCY.baseName(), Set.of("sum", "count", "max")); + addMetric(metrics, ContainerMetrics.QUERY_LATENCY.baseName(), Set.of("sum", "count", "max", "95percentile", "99percentile")); + addMetric(metrics, ContainerMetrics.QUERY_TIMEOUT.baseName(), Set.of("sum", "count", "max", "min", "95percentile", "99percentile")); + addMetric(metrics, ContainerMetrics.FAILED_QUERIES.rate()); + addMetric(metrics, ContainerMetrics.DEGRADED_QUERIES.rate()); + addMetric(metrics, ContainerMetrics.HITS_PER_QUERY.baseName(), Set.of("sum", "count", "max", "95percentile", "99percentile")); + addMetric(metrics, ContainerMetrics.SEARCH_CONNECTIONS.baseName(), Set.of("sum", "count", "max")); + addMetric(metrics, ContainerMetrics.QUERY_HIT_OFFSET.baseName(), Set.of("sum", "count", "max")); + addMetric(metrics, ContainerMetrics.DOCUMENTS_COVERED.count()); + addMetric(metrics, ContainerMetrics.DOCUMENTS_TOTAL.count()); + addMetric(metrics, ContainerMetrics.DOCUMENTS_TARGET_TOTAL.count()); + addMetric(metrics, ContainerMetrics.JDISC_RENDER_LATENCY.baseName(), Set.of("min", "max", "count", "sum", "last", "average")); + addMetric(metrics, ContainerMetrics.QUERY_ITEM_COUNT.baseName(), Set.of("max", "sum", "count")); + addMetric(metrics, ContainerMetrics.TOTAL_HITS_PER_QUERY.baseName(), Set.of("sum", "count", "max", "95percentile", "99percentile")); + addMetric(metrics, ContainerMetrics.EMPTY_RESULTS.rate()); + addMetric(metrics, ContainerMetrics.REQUESTS_OVER_QUOTA.baseName(), Set.of("rate", "count")); + + addMetric(metrics, ContainerMetrics.RELEVANCE_AT_1.baseName(), Set.of("sum", "count")); + addMetric(metrics, ContainerMetrics.RELEVANCE_AT_3.baseName(), Set.of("sum", "count")); + addMetric(metrics, ContainerMetrics.RELEVANCE_AT_10.baseName(), Set.of("sum", "count")); + // Errors from search container - metrics.add(new Metric("error.timeout.rate")); - metrics.add(new Metric("error.backends_oos.rate")); - metrics.add(new Metric("error.plugin_failure.rate")); - metrics.add(new Metric("error.backend_communication_error.rate")); - metrics.add(new Metric("error.empty_document_summaries.rate")); - metrics.add(new Metric("error.invalid_query_parameter.rate")); - metrics.add(new Metric("error.internal_server_error.rate")); - metrics.add(new Metric("error.misconfigured_server.rate")); - metrics.add(new Metric("error.invalid_query_transformation.rate")); - metrics.add(new Metric("error.result_with_errors.rate")); - metrics.add(new Metric("error.unspecified.rate")); - metrics.add(new Metric("error.unhandled_exception.rate")); + addMetric(metrics, ContainerMetrics.ERROR_TIMEOUT.rate()); + addMetric(metrics, ContainerMetrics.ERROR_BACKENDS_OOS.rate()); + addMetric(metrics, ContainerMetrics.ERROR_PLUGIN_FAILURE.rate()); + addMetric(metrics, ContainerMetrics.ERROR_BACKEND_COMMUNICATION_ERROR.rate()); + addMetric(metrics, ContainerMetrics.ERROR_EMPTY_DOCUMENT_SUMMARIES.rate()); + addMetric(metrics, ContainerMetrics.ERROR_INVALID_QUERY_PARAMETER.rate()); + addMetric(metrics, ContainerMetrics.ERROR_INTERNAL_SERVER_ERROR.rate()); + addMetric(metrics, ContainerMetrics.ERROR_MISCONFIGURED_SERVER.rate()); + addMetric(metrics, ContainerMetrics.ERROR_INVALID_QUERY_TRANSFORMATION.rate()); + addMetric(metrics, ContainerMetrics.ERROR_RESULTS_WITH_ERRORS.rate()); + addMetric(metrics, ContainerMetrics.ERROR_UNSPECIFIED.rate()); + addMetric(metrics, ContainerMetrics.ERROR_UNHANDLED_EXCEPTION.rate()); return metrics; } diff --git a/config-proxy/pom.xml b/config-proxy/pom.xml index 51fba61563f..365c3e83048 100644 --- a/config-proxy/pom.xml +++ b/config-proxy/pom.xml @@ -71,11 +71,6 @@ <build> <plugins> <plugin> - <groupId>com.yahoo.vespa</groupId> - <artifactId>bundle-plugin</artifactId> - <extensions>true</extensions> - </plugin> - <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> </plugin> diff --git a/configserver/pom.xml b/configserver/pom.xml index 0708d1fc46d..ef0fde3e57e 100644 --- a/configserver/pom.xml +++ b/configserver/pom.xml @@ -112,6 +112,11 @@ </dependency> <dependency> <groupId>com.yahoo.vespa</groupId> + <artifactId>airlift-zstd</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> <artifactId>defaults</artifactId> <version>${project.version}</version> <scope>provided</scope> diff --git a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java index 5c2f1c1be79..a3a0068a105 100644 --- a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java +++ b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java @@ -82,15 +82,15 @@ public enum ContainerMetrics { DOCUMENTS_TOTAL("documents_total", Unit.DOCUMENT, "The number of documents to be evaluated if all requests had been fully executed"), DOCUMENTS_TARGET_TOTAL("documents_target_total", Unit.DOCUMENT, "The target number of total documents to be evaluated when when all data is in sync"), JDISC_RENDER_LATENCY("jdisc.render.latency", Unit.MILLISECOND, "The time used by the container to render responses"), - QUERY_ITEM_COUNT("query_item_count", Unit.QUERY_ITEM, "The number of query items (terms, phrases, etc)"), + QUERY_ITEM_COUNT("query_item_count", Unit.ITEM, "The number of query items (terms, phrases, etc)"), TOTAL_HITS_PER_QUERY("totalhits_per_query", Unit.HIT, "The total number of documents found to match queries"), EMPTY_RESULTS("empty_results", Unit.OPERATION, "Number of queries matching no documents"), REQUESTS_OVER_QUOTA("requestsOverQuota", Unit.OPERATION, "The number of requests rejected due to exceeding quota"), - RELEVANCE_AT_1("relevance.at_1", Unit.RELEVANCE, "The relevance of hit number 1"), - RELEVANCE_AT_3("relevance.at_3", Unit.RELEVANCE, "The relevance of hit number 3"), - RELEVANCE_AT_10("relevance.at_10", Unit.RELEVANCE, "The relevance of hit number 10"), + RELEVANCE_AT_1("relevance.at_1", Unit.SCORE, "The relevance of hit number 1"), + RELEVANCE_AT_3("relevance.at_3", Unit.SCORE, "The relevance of hit number 3"), + RELEVANCE_AT_10("relevance.at_10", Unit.SCORE, "The relevance of hit number 10"), // Errors from search container ERROR_TIMEOUT("error.timeout", Unit.OPERATION, "Requests that timed out"), diff --git a/container-core/src/main/java/com/yahoo/metrics/Unit.java b/container-core/src/main/java/com/yahoo/metrics/Unit.java index 58960170325..6f60ce9056b 100644 --- a/container-core/src/main/java/com/yahoo/metrics/Unit.java +++ b/container-core/src/main/java/com/yahoo/metrics/Unit.java @@ -12,16 +12,16 @@ public enum Unit { FRACTION(BaseUnit.FRACTION), HIT(BaseUnit.HIT), HIT_PER_QUERY(BaseUnit.HIT, BaseUnit.QUERY), + ITEM(BaseUnit.ITEM), MILLISECOND(BaseUnit.MILLISECOND), OPERATION(BaseUnit.OPERATION), OPERATION_PER_SECOND(BaseUnit.OPERATION, BaseUnit.SECOND), QUERY(BaseUnit.QUERY), QUERY_PER_SECOND(BaseUnit.QUERY, BaseUnit.SECOND), - QUERY_ITEM(BaseUnit.QUERY_ITEM), - RELEVANCE(BaseUnit.RELEVANCE), REQUEST(BaseUnit.REQUEST), RESPONSE(BaseUnit.RESPONSE), RESPONSE_PER_SECOND(BaseUnit.RESPONSE, BaseUnit.SECOND), + SCORE(BaseUnit.SCORE), SECOND(BaseUnit.SECOND), THREAD(BaseUnit.THREAD); @@ -57,13 +57,13 @@ public enum Unit { DOCUMENT("document"), FRACTION("fraction"), HIT("hit"), + ITEM("item"), MILLISECOND("millisecond", "ms"), OPERATION("operation"), QUERY("query"), - QUERY_ITEM("query-item"), - RELEVANCE("relevance"), REQUEST("request"), RESPONSE("response"), + SCORE("score"), SECOND("second", "s"), THREAD("thread"); diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index b70a88d09a0..a71082ecba3 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -899,19 +899,19 @@ ], "fields" : [ ] }, - "com.yahoo.prelude.query.MultiRangeItem$Limit": { - "superClass": "java.lang.Enum", - "interfaces": [], - "attributes": [ + "com.yahoo.prelude.query.MultiRangeItem$Limit" : { + "superClass" : "java.lang.Enum", + "interfaces" : [ ], + "attributes" : [ "public", "final", "enum" ], - "methods": [ + "methods" : [ "public static com.yahoo.prelude.query.MultiRangeItem$Limit[] values()", "public static com.yahoo.prelude.query.MultiRangeItem$Limit valueOf(java.lang.String)" ], - "fields": [ + "fields" : [ "public static final enum com.yahoo.prelude.query.MultiRangeItem$Limit INCLUSIVE", "public static final enum com.yahoo.prelude.query.MultiRangeItem$Limit EXCLUSIVE" ] @@ -5318,7 +5318,10 @@ "public void setSummaryFields(java.lang.String)", "public boolean getTensorShortForm()", "public void setTensorShortForm(java.lang.String)", + "public void setTensorFormat(java.lang.String)", "public void setTensorShortForm(boolean)", + "public boolean getTensorDirectValues()", + "public void setTensorDirectValues(boolean)", "public void prepare()", "public boolean equals(java.lang.Object)", "public int hashCode()" @@ -7678,6 +7681,7 @@ "public com.yahoo.data.access.Inspector inspect()", "public java.lang.String toJson()", "public java.lang.String toJson(boolean)", + "public java.lang.String toJson(boolean, boolean)", "public java.lang.StringBuilder writeJson(java.lang.StringBuilder)", "public java.lang.Double getDouble(java.lang.String)", "public com.yahoo.tensor.Tensor getTensor(java.lang.String)", @@ -8725,4 +8729,4 @@ ], "fields" : [ ] } -} +}
\ No newline at end of file diff --git a/container-search/src/main/java/com/yahoo/search/query/Presentation.java b/container-search/src/main/java/com/yahoo/search/query/Presentation.java index afa87eb4a06..b949d1edabd 100644 --- a/container-search/src/main/java/com/yahoo/search/query/Presentation.java +++ b/container-search/src/main/java/com/yahoo/search/query/Presentation.java @@ -77,6 +77,9 @@ public class Presentation implements Cloneable { /** Whether to renders tensors in short form */ private boolean tensorShortForm = true; + /** Whether to renders tensors in short form */ + private boolean tensorDirectValues = false; // TODO: Flip default on Vespa 9 + /** Set of explicitly requested summary fields, instead of summary classes */ private Set<String> summaryFields = LazySet.newHashSet(); @@ -178,34 +181,61 @@ public class Presentation implements Cloneable { /** * Returns whether tensors should use short form in JSON and textual representations, see - * <a href="https://docs.vespa.ai/en/reference/document-json-format.html#tensor">https://docs.vespa.ai/en/reference/document-json-format.html#tensor</a> - * and <a href="https://docs.vespa.ai/en/reference/tensor.html#tensor-literal-form">https://docs.vespa.ai/en/reference/tensor.html#tensor-literal-form</a>. + * <a href="https://docs.vespa.ai/en/reference/document-json-format.html#tensor">https://docs.vespa.ai/en/reference/document-json-format.html#tensor</a>. * Default is true. */ public boolean getTensorShortForm() { return tensorShortForm; } + /** @deprecated use setTensorFormat(). */ + @Deprecated // TODO: Remove on Vespa 9 + public void setTensorShortForm(String value) { + setTensorFormat(value); + } /** * Sets whether tensors should use short form in JSON and textual representations from a string. * * @param value a string which must be either 'short' or 'long' * @throws IllegalArgumentException if any other value is passed */ - public void setTensorShortForm(String value) { - tensorShortForm = toTensorShortForm(value); - } - - private boolean toTensorShortForm(String value) { + public void setTensorFormat(String value) { switch (value) { - case "short": return true; - case "long": return false; - default: throw new IllegalArgumentException("Value must be 'long' or 'short', not '" + value + "'"); - } + case "short" : + tensorShortForm = true; + tensorDirectValues = false; + break; + case "long" : + tensorShortForm = false; + tensorDirectValues = false; + break; + case "short-value" : + tensorShortForm = true; + tensorDirectValues = true; + break; + case "long-value" : + tensorShortForm = false; + tensorDirectValues = true; + break; + default : throw new IllegalArgumentException("Value must be 'long', 'short', 'long-value', or 'short-value', not '" + value + "'"); + }; } public void setTensorShortForm(boolean tensorShortForm) { this.tensorShortForm = tensorShortForm; } + /** + * Returns whether tensor content should be rendered directly, or inside a JSON object containing a + * "type" entry having the tensor type, and a "cells"/"values"/"blocks" entry (depending on type), + * having the tensor content. See + * <a href="https://docs.vespa.ai/en/reference/document-json-format.html#tensor">https://docs.vespa.ai/en/reference/document-json-format.html#tensor</a>. + * Default is false: Render wrapped in a JSON object. + */ + public boolean getTensorDirectValues() { return tensorDirectValues; } + + public void setTensorDirectValues(boolean tensorDirectValues) { + this.tensorDirectValues = tensorDirectValues; + } + /** Prepares this for binary serialization. For internal use - see {@link Query#prepare} */ public void prepare() { if (highlight != null) @@ -214,8 +244,7 @@ public class Presentation implements Cloneable { @Override public boolean equals(Object o) { - if ( ! (o instanceof Presentation)) return false; - Presentation p = (Presentation) o; + if ( ! (o instanceof Presentation p)) return false; return QueryHelper.equals(bolding, p.bolding) && QueryHelper.equals(summary, p.summary); } diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java index d5dc8120f29..e4a83972fae 100644 --- a/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java +++ b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java @@ -303,7 +303,7 @@ public class QueryProperties extends Properties { } else if (key.size() == 3 && key.get(1).equals(Presentation.FORMAT)) { if (key.last().equals(Presentation.TENSORS)) - query.getPresentation().setTensorShortForm(asString(value, "short")); + query.getPresentation().setTensorFormat(asString(value, "short")); // TODO: Switch default to short-value on Vespa 9 else throwIllegalParameter(key.last(), Presentation.FORMAT); } diff --git a/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java b/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java index 31f99ab1927..352a31553e7 100644 --- a/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java +++ b/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java @@ -45,6 +45,7 @@ import com.yahoo.search.result.Hit; import com.yahoo.search.result.HitGroup; import com.yahoo.search.result.NanNumber; import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; import com.yahoo.tensor.serialization.JsonFormat; import java.io.IOException; @@ -132,6 +133,7 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { volatile boolean jsonMapsAll = true; volatile boolean jsonWsetsAll = false; volatile boolean tensorShortForm = true; + volatile boolean tensorDirectValues = false; boolean convertDeep() { return (jsonDeepMaps || jsonWsets); } void init() { this.debugRendering = false; @@ -140,6 +142,7 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { this.jsonMapsAll = true; this.jsonWsetsAll = true; this.tensorShortForm = true; + this.tensorDirectValues = false; } void getSettings(Query q) { if (q == null) { @@ -154,7 +157,8 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { this.jsonMapsAll = props.getBoolean(WRAP_DEEP_MAPS, true); this.jsonWsetsAll = props.getBoolean(WRAP_WSETS, true); this.tensorShortForm = q.getPresentation().getTensorShortForm(); - } + this.tensorDirectValues = q.getPresentation().getTensorDirectValues(); + } } private volatile FieldConsumerSettings fieldConsumerSettings; @@ -776,7 +780,7 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { } else if (field instanceof Tensor) { renderTensor(Optional.of((Tensor)field)); } else if (field instanceof FeatureData) { - generator().writeRawValue(((FeatureData)field).toJson(settings.tensorShortForm)); + generator().writeRawValue(((FeatureData)field).toJson(settings.tensorShortForm, settings.tensorDirectValues)); } else if (field instanceof Inspectable) { renderInspectorDirect(((Inspectable)field).inspect()); } else if (field instanceof JsonProducer) { @@ -814,24 +818,15 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { } private void renderTensor(Optional<Tensor> tensor) throws IOException { - if (tensor.isEmpty()) { - generator().writeStartObject(); - generator().writeArrayFieldStart("cells"); - generator().writeEndArray(); - generator().writeEndObject(); - return; - } - if (settings.tensorShortForm) { - generator().writeRawValue(new String(JsonFormat.encodeShortForm(tensor.get()), StandardCharsets.UTF_8)); - } else { - generator().writeRawValue(new String(JsonFormat.encode(tensor.get()), StandardCharsets.UTF_8)); - } + generator().writeRawValue(new String(JsonFormat.encode(tensor.orElse(Tensor.Builder.of(TensorType.empty).build()), + settings.tensorShortForm, settings.tensorDirectValues), + StandardCharsets.UTF_8)); } private JsonGenerator generator() { if (generator == null) throw new UnsupportedOperationException("Generator required but not assigned. " + - "All accept() methods must be overridden when sub-classing FieldConsumer"); + "All accept() methods must be overridden when sub-classing FieldConsumer"); return generator; } diff --git a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java index 2cb5e0e07e9..421f19475a6 100644 --- a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java +++ b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java @@ -65,16 +65,20 @@ public class FeatureData implements Inspectable, JsonProducer { } public String toJson(boolean tensorShortForm) { + return toJson(tensorShortForm, false); + } + + public String toJson(boolean tensorShortForm, boolean tensorDirectValues) { if (this == empty) return "{}"; if (jsonForm != null) return jsonForm; - jsonForm = JsonRender.render(value, new Encoder(new StringBuilder(), true, tensorShortForm)).toString(); + jsonForm = JsonRender.render(value, new Encoder(new StringBuilder(), true, tensorShortForm, tensorDirectValues)).toString(); return jsonForm; } @Override public StringBuilder writeJson(StringBuilder target) { - return JsonRender.render(value, new Encoder(target, true, false)); + return JsonRender.render(value, new Encoder(target, true, false, false)); } /** @@ -173,17 +177,19 @@ public class FeatureData implements Inspectable, JsonProducer { private static class Encoder extends JsonRender.StringEncoder { private final boolean tensorShortForm; + private final boolean tensorDirectValues; - Encoder(StringBuilder out, boolean compact, boolean tensorShortForm) { + Encoder(StringBuilder out, boolean compact, boolean tensorShortForm, boolean tensorDirectValues) { super(out, compact); this.tensorShortForm = tensorShortForm; + this.tensorDirectValues = tensorDirectValues; } @Override public void encodeDATA(byte[] value) { // This could be done more efficiently ... Tensor tensor = TypedBinaryFormat.decode(Optional.empty(), GrowableByteBuffer.wrap(value)); - byte[] encodedTensor = tensorShortForm ? JsonFormat.encodeShortForm(tensor) : JsonFormat.encodeWithType(tensor); + byte[] encodedTensor = JsonFormat.encode(tensor, tensorShortForm, tensorDirectValues); target().append(new String(encodedTensor, StandardCharsets.UTF_8)); } diff --git a/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java b/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java index 9486eeb92de..b3ed85911b9 100644 --- a/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java @@ -53,6 +53,7 @@ import com.yahoo.slime.Slime; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; import com.yahoo.tensor.serialization.TypedBinaryFormat; +import com.yahoo.text.JSON; import com.yahoo.text.Utf8; import com.yahoo.yolean.Exceptions; import com.yahoo.yolean.trace.TraceNode; @@ -156,37 +157,136 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @Timeout(300) - void testTensorShortForm() throws ExecutionException, InterruptedException, IOException { - String expected = "{" + - "\"root\":{" + - "\"id\":\"toplevel\"," + - "\"relevance\":1.0," + - "\"fields\":{" + - "\"totalCount\":1" + - "}," + - "\"children\":[{" + - "\"id\":\"tensors\"," + - "\"relevance\":1.0," + - "\"fields\":{" + - "\"tensor_standard\":{\"type\":\"tensor(x{},y{})\",\"cells\":[{\"address\":{\"x\":\"a\",\"y\":\"0\"},\"value\":1.0},{\"address\":{\"x\":\"b\",\"y\":\"1\"},\"value\":2.0}]}," + - "\"tensor_indexed\":{\"type\":\"tensor(x[2],y[3])\",\"values\":[[1.0,2.0,3.0],[4.0,5.0,6.0]]}," + - "\"tensor_single_mapped\":{\"type\":\"tensor(x{})\",\"cells\":{\"a\":1.0,\"b\":2.0}}," + - "\"tensor_mixed\":{\"type\":\"tensor(x{},y[2])\",\"blocks\":{\"a\":[1.0,2.0],\"b\":[3.0,4.0]}}," + - "\"summaryfeatures\":{" + - "\"tensor_standard\":{\"type\":\"tensor(x{},y{})\",\"cells\":[{\"address\":{\"x\":\"a\",\"y\":\"0\"},\"value\":1.0},{\"address\":{\"x\":\"b\",\"y\":\"1\"},\"value\":2.0}]}," + - "\"tensor_indexed\":{\"type\":\"tensor(x[2],y[3])\",\"values\":[[1.0,2.0,3.0],[4.0,5.0,6.0]]}," + - "\"tensor_single_mapped\":{\"type\":\"tensor(x{})\",\"cells\":{\"a\":1.0,\"b\":2.0}}," + - "\"tensor_mixed\":{\"type\":\"tensor(x{},y[2])\",\"blocks\":{\"a\":[1.0,2.0],\"b\":[3.0,4.0]}}" + - "}" + - "}" + - "}]" + - "}}\n"; + void testTensorRendering() throws ExecutionException, InterruptedException, IOException { + String shortJson = """ + { + "root": { + "id":"toplevel", + "relevance":1.0, + "fields":{ + "totalCount":1 + }, + "children":[{ + "id":"tensors", + "relevance":1.0, + "fields":{ + "tensor_standard":{"type":"tensor(x{},y{})","cells":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}]}, + "tensor_indexed":{"type":"tensor(x[2],y[3])","values":[[1.0,2.0,3.0],[4.0,5.0,6.0]]}, + "tensor_single_mapped":{"type":"tensor(x{})","cells":{"a":1.0,"b":2.0}}, + "tensor_mixed":{"type":"tensor(x{},y[2])","blocks":{"a":[1.0,2.0],"b":[3.0,4.0]}}, + "summaryfeatures":{ + "tensor_standard":{"type":"tensor(x{},y{})","cells":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}]}, + "tensor_indexed":{"type":"tensor(x[2],y[3])","values":[[1.0,2.0,3.0],[4.0,5.0,6.0]]}, + "tensor_single_mapped":{"type":"tensor(x{})","cells":{"a":1.0,"b":2.0}}, + "tensor_mixed":{"type":"tensor(x{},y[2])","blocks":{"a":[1.0,2.0],"b":[3.0,4.0]}} + } + } + }] + } + }"""; + + String longJson = """ + { + "root": { + "id":"toplevel", + "relevance":1.0, + "fields":{ + "totalCount":1 + }, + "children":[{ + "id":"tensors", + "relevance":1.0, + "fields":{ + "tensor_standard":{"type":"tensor(x{},y{})","cells":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}]}, + "tensor_indexed":{"type":"tensor(x[2],y[3])","cells":[{"address":{"x":"0","y":"0"},"value":1.0},{"address":{"x":"0","y":"1"},"value":2.0},{"address":{"x":"0","y":"2"},"value":3.0},{"address":{"x":"1","y":"0"},"value":4.0},{"address":{"x":"1","y":"1"},"value":5.0},{"address":{"x":"1","y":"2"},"value":6.0}]}, + "tensor_single_mapped":{"type":"tensor(x{})","cells":[{"address":{"x":"a"},"value":1.0},{"address":{"x":"b"},"value":2.0}]}, + "tensor_mixed":{"type":"tensor(x{},y[2])","cells":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"a","y":"1"},"value":2.0},{"address":{"x":"b","y":"0"},"value":3.0},{"address":{"x":"b","y":"1"},"value":4.0}]}, + "summaryfeatures":{ + "tensor_standard":{"type":"tensor(x{},y{})","cells":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}]}, + "tensor_indexed":{"type":"tensor(x[2],y[3])","cells":[{"address":{"x":"0","y":"0"},"value":1.0},{"address":{"x":"0","y":"1"},"value":2.0},{"address":{"x":"0","y":"2"},"value":3.0},{"address":{"x":"1","y":"0"},"value":4.0},{"address":{"x":"1","y":"1"},"value":5.0},{"address":{"x":"1","y":"2"},"value":6.0}]}, + "tensor_single_mapped":{"type":"tensor(x{})","cells":[{"address":{"x":"a"},"value":1.0},{"address":{"x":"b"},"value":2.0}]}, + "tensor_mixed":{"type":"tensor(x{},y[2])","cells":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"a","y":"1"},"value":2.0},{"address":{"x":"b","y":"0"},"value":3.0},{"address":{"x":"b","y":"1"},"value":4.0}]} + } + } + }] + } + }"""; + + String shortDirectJson = """ + { + "root": { + "id":"toplevel", + "relevance":1.0, + "fields":{ + "totalCount":1 + }, + "children":[{ + "id":"tensors", + "relevance":1.0, + "fields":{ + "tensor_standard":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}], + "tensor_indexed":[[1.0,2.0,3.0],[4.0,5.0,6.0]], + "tensor_single_mapped":{"a":1.0,"b":2.0}, + "tensor_mixed":{"a":[1.0,2.0],"b":[3.0,4.0]}, + "summaryfeatures":{ + "tensor_standard":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}], + "tensor_indexed":[[1.0,2.0,3.0],[4.0,5.0,6.0]], + "tensor_single_mapped":{"a":1.0,"b":2.0}, + "tensor_mixed":{"a":[1.0,2.0],"b":[3.0,4.0]} + } + } + }] + } + }"""; + + String longDirectJson = """ + { + "root": { + "id":"toplevel", + "relevance":1.0, + "fields":{ + "totalCount":1 + }, + "children":[{ + "id":"tensors", + "relevance":1.0, + "fields":{ + "tensor_standard":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}], + "tensor_indexed":[{"address":{"x":"0","y":"0"},"value":1.0},{"address":{"x":"0","y":"1"},"value":2.0},{"address":{"x":"0","y":"2"},"value":3.0},{"address":{"x":"1","y":"0"},"value":4.0},{"address":{"x":"1","y":"1"},"value":5.0},{"address":{"x":"1","y":"2"},"value":6.0}], + "tensor_single_mapped":[{"address":{"x":"a"},"value":1.0},{"address":{"x":"b"},"value":2.0}], + "tensor_mixed":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"a","y":"1"},"value":2.0},{"address":{"x":"b","y":"0"},"value":3.0},{"address":{"x":"b","y":"1"},"value":4.0}], + "summaryfeatures":{ + "tensor_standard":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"b","y":"1"},"value":2.0}], + "tensor_indexed":[{"address":{"x":"0","y":"0"},"value":1.0},{"address":{"x":"0","y":"1"},"value":2.0},{"address":{"x":"0","y":"2"},"value":3.0},{"address":{"x":"1","y":"0"},"value":4.0},{"address":{"x":"1","y":"1"},"value":5.0},{"address":{"x":"1","y":"2"},"value":6.0}], + "tensor_single_mapped":[{"address":{"x":"a"},"value":1.0},{"address":{"x":"b"},"value":2.0}], + "tensor_mixed":[{"address":{"x":"a","y":"0"},"value":1.0},{"address":{"x":"a","y":"1"},"value":2.0},{"address":{"x":"b","y":"0"},"value":3.0},{"address":{"x":"b","y":"1"},"value":4.0}] + } + } + }] + } + }"""; + + assertTensorRendering(shortJson, "short"); + assertTensorRendering(longJson, "long"); + assertTensorRendering(shortDirectJson, "short-value"); + assertTensorRendering(longDirectJson, "long-value"); + try { + render(new Result(new Query("/?presentation.format.tensors=unknown"))); + fail("Expected exception"); + } + catch (IllegalArgumentException e) { + assertEquals("Could not set 'presentation.format.tensors' to 'unknown': Value must be 'long', 'short', 'long-value', or 'short-value', not 'unknown'", + Exceptions.toMessageString(e)); + } + } + + private void assertTensorRendering(String expected, String format) throws ExecutionException, InterruptedException, IOException { Slime slime = new Slime(); Cursor features = slime.setObject(); features.setData("tensor_standard", TypedBinaryFormat.encode(Tensor.from("tensor(x{},y{}):{ {x:a,y:0}:1.0, {x:b,y:1}:2.0 }"))); @@ -202,26 +302,16 @@ public class JsonRendererTestCase { h.setField("tensor_mixed", new TensorFieldValue(Tensor.from("tensor(x{},y[2]):{a:[1,2], b:[3,4]}"))); h.setField("summaryfeatures", summaryFeatures); - Result result1 = new Result(new Query("/?presentation.format.tensors=short")); + Result result1 = new Result(new Query("/?presentation.format.tensors=" + format)); result1.hits().add(h); result1.setTotalHitCount(1L); - String summary1 = render(result1); - assertEqualJson(expected, summary1); + assertEqualJson(expected, render(result1)); - Result result2 = new Result(new Query("/?format.tensors=short")); + // Alias + Result result2 = new Result(new Query("/?format.tensors=" + format)); result2.hits().add(h); result2.setTotalHitCount(1L); - String summary2 = render(result2); - assertEqualJson(expected, summary2); - - try { - render(new Result(new Query("/?presentation.format.tensors=unknown"))); - fail("Expected exception"); - } - catch (IllegalArgumentException e) { - assertEquals("Could not set 'presentation.format.tensors' to 'unknown': Value must be 'long' or 'short', not 'unknown'", - Exceptions.toMessageString(e)); - } + assertEqualJson(expected, render(result2)); } @Test @@ -241,7 +331,7 @@ public class JsonRendererTestCase { + " \"string\": \"stuff\"," + " \"predicate\": \"a in [b]\"," + " \"tensor1\": { \"type\": \"tensor(x{})\", \"cells\": { \"a\":2.0 } }," - + " \"tensor2\": { \"cells\": [] }," + + " \"tensor2\": { \"type\": \"tensor()\", \"values\":[0.0] }," + " \"tensor3\": { \"type\": \"tensor(x{},y{})\", \"cells\": [ { \"address\": {\"x\": \"a\", \"y\": \"0\"}, \"value\":2.0 }, { \"address\": {\"x\": \"a\", \"y\": \"1\"}, \"value\":-1.0 } ] }," + " \"summaryfeatures\": {" + " \"scalar1\":1.5," @@ -281,7 +371,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } private FeatureData createSummaryFeatures() { @@ -349,7 +439,7 @@ public class JsonRendererTestCase { subQuery.trace("yellow", 1); q.trace("marker", 1); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -415,7 +505,7 @@ public class JsonRendererTestCase { subQuery.trace(access, 1); q.trace("marker", 1); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -441,7 +531,7 @@ public class JsonRendererTestCase { subQuery.trace("yellow", 1); q.trace("marker", 1); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @SuppressWarnings({"unchecked"}) @@ -562,7 +652,7 @@ public class JsonRendererTestCase { execution.trace().traceNode().add(child); q.trace("something", 1); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -604,7 +694,7 @@ public class JsonRendererTestCase { execution.trace().traceNode().add(child); q.trace("something", 1); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -653,7 +743,7 @@ public class JsonRendererTestCase { childOfChild.add(new TraceNode("in OO languages, nesting is for birds", 0L)); execution.trace().traceNode().add(child); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -741,7 +831,7 @@ public class JsonRendererTestCase { r.hits().add(gg); r.hits().addError(ErrorMessage.createInternalServerError("boom")); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -776,7 +866,7 @@ public class JsonRendererTestCase { r.setCoverage(new Coverage(500, 600).setDegradedReason(5)); String summary = render(execution, r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -813,7 +903,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -843,7 +933,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -871,7 +961,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -905,7 +995,7 @@ public class JsonRendererTestCase { ErrorMessage e = new ErrorMessage(1234, "hello", "top of the day", t); r.hits().addError(e); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -999,7 +1089,7 @@ public class JsonRendererTestCase { r.hits().add(rg); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -1063,7 +1153,7 @@ public class JsonRendererTestCase { r.hits().add(rg); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -1110,7 +1200,7 @@ public class JsonRendererTestCase { h.setField("json producer", struct); r.hits().add(h); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -1146,7 +1236,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -1172,7 +1262,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -1204,7 +1294,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -1240,7 +1330,7 @@ public class JsonRendererTestCase { r.getElapsedTime().add(t); renderer.setTimeSource(() -> 8L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } @Test @@ -1278,7 +1368,7 @@ public class JsonRendererTestCase { String json = summary.substring(jsonCallback.length() + 1, summary.length() - 2); assertEquals(jsonCallback + "(", jsonCallbackBegin); - assertEqualJson(expected, json); + assertEqualJsonContent(expected, json); assertEquals(");", jsonCallbackEnd); } @@ -1327,7 +1417,7 @@ public class JsonRendererTestCase { r.hits().add(h); r.setTotalHitCount(1L); String summary = render(r); - assertEqualJson(expected, summary); + assertEqualJsonContent(expected, summary); } private static SlimeAdapter dataFromSimplified(String simplified) { @@ -1512,8 +1602,14 @@ public class JsonRendererTestCase { } } + private void assertEqualJson(String expected, String generated) { + assertEquals("", validateJSON(expected)); + assertEquals("", validateJSON(generated)); + assertEquals(JSON.canonical(expected), JSON.canonical(generated)); + } + @SuppressWarnings("unchecked") - private void assertEqualJson(String expected, String generated) throws IOException { + private void assertEqualJsonContent(String expected, String generated) throws IOException { assertEquals("", validateJSON(expected)); assertEquals("", validateJSON(generated)); diff --git a/docproc/src/main/java/com/yahoo/docproc/Processing.java b/docproc/src/main/java/com/yahoo/docproc/Processing.java index f067a3b8436..41d7345543f 100644 --- a/docproc/src/main/java/com/yahoo/docproc/Processing.java +++ b/docproc/src/main/java/com/yahoo/docproc/Processing.java @@ -199,6 +199,10 @@ public final class Processing extends ProcessingAccess { } } + /** + * Returns the operations in this processing. + * This can be mutated to add or remove operations to be performed. + */ public List<DocumentOperation> getDocumentOperations() { updateDocumentOperations(); return documentOperations; diff --git a/docproc/src/test/java/com/yahoo/docproc/jdisc/DocumentProcessingHandlerTransformingMessagesTestCase.java b/docproc/src/test/java/com/yahoo/docproc/jdisc/DocumentProcessingHandlerTransformingMessagesTestCase.java index c3d1686e7c8..6908b977cf3 100644 --- a/docproc/src/test/java/com/yahoo/docproc/jdisc/DocumentProcessingHandlerTransformingMessagesTestCase.java +++ b/docproc/src/test/java/com/yahoo/docproc/jdisc/DocumentProcessingHandlerTransformingMessagesTestCase.java @@ -35,7 +35,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** - * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + * @author Einar M R Rosenvinge */ public class DocumentProcessingHandlerTransformingMessagesTestCase extends DocumentProcessingHandlerTestBase { diff --git a/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java index 7003d19e7d1..105739da508 100644 --- a/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java +++ b/document/src/main/java/com/yahoo/document/datatypes/TensorFieldValue.java @@ -166,9 +166,8 @@ public class TensorFieldValue extends FieldValue { @Override public boolean equals(Object o) { if (this == o) return true; - if ( ! (o instanceof TensorFieldValue)) return false; + if ( ! (o instanceof TensorFieldValue other)) return false; - TensorFieldValue other = (TensorFieldValue)o; if ( ! getTensorType().equals(other.getTensorType())) return false; if ( ! getTensor().equals(other.getTensor())) return false; return true; diff --git a/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java b/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java index 1f92bed5e75..8b44910e75c 100644 --- a/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java +++ b/document/src/main/java/com/yahoo/document/json/DocumentUpdateJsonSerializer.java @@ -369,7 +369,7 @@ public class DocumentUpdateJsonSerializer { @Override public void write(FieldBase field, TensorFieldValue value) { - serializeTensorField(generator, field, value); + serializeTensorField(generator, field, value, false, false); } @Override diff --git a/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java b/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java index 7f6ead528fe..110564bea46 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java +++ b/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java @@ -74,32 +74,22 @@ public class JsonSerializationHelper { } } - public static void serializeTensorFieldShortForm(JsonGenerator generator, FieldBase field, TensorFieldValue value) { + public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value, + boolean shortForm, boolean directValues) { wrapIOException(() -> { fieldNameIfNotNull(generator, field); if (value.getTensor().isPresent()) { Tensor tensor = value.getTensor().get(); - generator.writeRawValue(new String(JsonFormat.encodeShortForm(tensor), StandardCharsets.UTF_8)); - } else { + byte[] encoded = JsonFormat.encode(tensor, shortForm, directValues); + generator.writeRawValue(new String(encoded, StandardCharsets.UTF_8)); + } + else { generator.writeStartObject(); generator.writeEndObject(); } }); } - public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value) { - wrapIOException(() -> { - fieldNameIfNotNull(generator, field); - generator.writeStartObject(); - - if (value.getTensor().isPresent()) { - Tensor tensor = value.getTensor().get(); - serializeTensorCells(generator, tensor); - } - generator.writeEndObject(); - }); - } - static void serializeTensorCells(JsonGenerator generator, Tensor tensor) throws IOException { generator.writeArrayFieldStart(TensorReader.TENSOR_CELLS); for (Map.Entry<TensorAddress, Double> cell : tensor.cells().entrySet()) { diff --git a/document/src/main/java/com/yahoo/document/json/JsonWriter.java b/document/src/main/java/com/yahoo/document/json/JsonWriter.java index 04edfeea26e..33243ab832c 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonWriter.java +++ b/document/src/main/java/com/yahoo/document/json/JsonWriter.java @@ -63,7 +63,6 @@ import static com.yahoo.document.json.JsonSerializationHelper.serializeStringFie import static com.yahoo.document.json.JsonSerializationHelper.serializeStructField; import static com.yahoo.document.json.JsonSerializationHelper.serializeStructuredField; import static com.yahoo.document.json.JsonSerializationHelper.serializeTensorField; -import static com.yahoo.document.json.JsonSerializationHelper.serializeTensorFieldShortForm; import static com.yahoo.document.json.JsonSerializationHelper.serializeWeightedSet; import static com.yahoo.document.json.document.DocumentParser.FIELDS; import static com.yahoo.document.json.document.DocumentParser.REMOVE; @@ -79,30 +78,20 @@ public class JsonWriter implements DocumentWriter { private final JsonGenerator generator; private final boolean tensorShortForm; + private final boolean tensorDirectValues; - // I really hate exception unsafe constructors, but the alternative - // requires generator to not be a final /** + * Creates a JsonWriter. * - * @param out - * the target output stream - * @throws RuntimeException - * if unable to create the internal JSON generator + * @param out the target output stream + * @throws RuntimeException if unable to create the internal JSON generator */ public JsonWriter(OutputStream out) { this(createPrivateGenerator(out)); } - public JsonWriter(OutputStream out, boolean tensorShortForm) { - this(createPrivateGenerator(out), tensorShortForm); - } - - private static JsonGenerator createPrivateGenerator(OutputStream out) { - try { - return jsonFactory.createGenerator(out); - } catch (IOException e) { - throw new RuntimeException(e); - } + public JsonWriter(OutputStream out, boolean tensorShortForm, boolean tensorDirectValues) { + this(createPrivateGenerator(out), tensorShortForm, tensorDirectValues); } /** @@ -111,16 +100,26 @@ public class JsonWriter implements DocumentWriter { * after having written a full Document instance. In other words, JsonWriter * will <i>not</i> take ownership of the generator. * - * @param generator - * the output JSON generator + * @param generator the output JSON generator + * @param tensorShortForm whether to use the short type-dependent form for tensor values + * @param tensorDirectValues whether to output tensor values directly or wrapped in a map also containing the type */ - public JsonWriter(JsonGenerator generator) { - this(generator, false); - } - - public JsonWriter(JsonGenerator generator, boolean tensorShortForm) { + public JsonWriter(JsonGenerator generator, boolean tensorShortForm, boolean tensorDirectValues) { this.generator = generator; this.tensorShortForm = tensorShortForm; + this.tensorDirectValues = tensorDirectValues; + } + + private static JsonGenerator createPrivateGenerator(OutputStream out) { + try { + return jsonFactory.createGenerator(out); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public JsonWriter(JsonGenerator generator) { + this(generator, false, false); } /** @@ -129,8 +128,7 @@ public class JsonWriter implements DocumentWriter { * updating this class. This implementation throws an exception if it is * reached. * - * @throws UnsupportedOperationException - * if invoked + * @throws UnsupportedOperationException if invoked */ @Override public void write(FieldBase field, FieldValue value) { @@ -218,11 +216,7 @@ public class JsonWriter implements DocumentWriter { @Override public void write(FieldBase field, TensorFieldValue value) { - if (tensorShortForm) { - serializeTensorFieldShortForm(generator, field, value); - } else { - serializeTensorField(generator, field, value); - } + serializeTensorField(generator, field, value, tensorShortForm, tensorDirectValues); } @Override @@ -270,12 +264,14 @@ public class JsonWriter implements DocumentWriter { * Utility method to easily serialize a single document. * * @param document the document to be serialized - * @param tensorShortForm whether tensors should be serialized in short form + * @param tensorShortForm whether tensors should be serialized in a type-dependent short form + * @param tensorDirectValues whether tensors should be serialized as direct values or wrapped in a + * map also containing the type * @return the input document serialised as UTF-8 encoded JSON */ - public static byte[] toByteArray(Document document, boolean tensorShortForm) { + public static byte[] toByteArray(Document document, boolean tensorShortForm, boolean tensorDirectValues) { ByteArrayOutputStream out = new ByteArrayOutputStream(); - JsonWriter writer = new JsonWriter(out, tensorShortForm); + JsonWriter writer = new JsonWriter(out, tensorShortForm, tensorDirectValues); writer.write(document); return out.toByteArray(); } @@ -287,8 +283,8 @@ public class JsonWriter implements DocumentWriter { * @return the input document serialised as UTF-8 encoded JSON */ public static byte[] toByteArray(Document document) { - // TODO Vespa 9: change tensorShortForm default to true - return toByteArray(document, false); + // TODO Vespa 9: change tensorShortForm and tensorDirectValues default to true + return toByteArray(document, false, false); } /** diff --git a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java index 0d971859550..f7b819e7eb8 100644 --- a/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java +++ b/document/src/main/java/com/yahoo/document/json/readers/TensorReader.java @@ -4,6 +4,8 @@ package com.yahoo.document.json.readers; import com.fasterxml.jackson.core.JsonToken; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.document.json.TokenBuffer; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Type; import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.Tensor; @@ -22,6 +24,7 @@ import static com.yahoo.tensor.serialization.JsonFormat.decodeHexString; */ public class TensorReader { + public static final String TENSOR_TYPE = "type"; public static final String TENSOR_ADDRESS = "address"; public static final String TENSOR_CELLS = "cells"; public static final String TENSOR_VALUES = "values"; @@ -43,6 +46,9 @@ public class TensorReader { else if (TENSOR_BLOCKS.equals(buffer.currentName())) { readTensorBlocks(buffer, builder); } + else if (TENSOR_TYPE.equals(buffer.currentName()) && buffer.current() == JsonToken.VALUE_STRING) { + // Ignore input tensor type + } else { buffer.previous(); // Back up to the start of the enclosing block readDirectTensorValue(buffer, builder); @@ -119,6 +125,7 @@ public class TensorReader { int index = 0; int initNesting = buffer.nesting(); for (buffer.next(); buffer.nesting() >= initNesting; buffer.next()) { + if (buffer.current() == JsonToken.START_ARRAY || buffer.current() == JsonToken.END_ARRAY) continue; // nested arrays: Skip indexedBuilder.cellByDirectIndex(index++, readDouble(buffer)); } if (index == 0) @@ -178,7 +185,9 @@ public class TensorReader { boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed); boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped); - if ( ! hasMapped) + if (isArrayOfObjects(buffer, 0)) + readTensorCells(buffer, builder); + else if ( ! hasMapped) readTensorValues(buffer, builder); else if (hasMapped && hasIndexed) readTensorBlocks(buffer, builder); @@ -186,6 +195,12 @@ public class TensorReader { readTensorCells(buffer, builder); } + private static boolean isArrayOfObjects(TokenBuffer buffer, int ahead) { + if (buffer.peek(ahead++) != JsonToken.START_ARRAY) return false; + if (buffer.peek(ahead) == JsonToken.START_ARRAY) return isArrayOfObjects(buffer, ahead); // nested array + return buffer.peek(ahead) == JsonToken.START_OBJECT; + } + private static TensorAddress readAddress(TokenBuffer buffer, TensorType type) { expectObjectStart(buffer.current()); TensorAddress.Builder builder = new TensorAddress.Builder(type); diff --git a/document/src/main/java/com/yahoo/document/serialization/DocumentWriter.java b/document/src/main/java/com/yahoo/document/serialization/DocumentWriter.java index b9e67a65a8d..2d31a6b6734 100644 --- a/document/src/main/java/com/yahoo/document/serialization/DocumentWriter.java +++ b/document/src/main/java/com/yahoo/document/serialization/DocumentWriter.java @@ -6,18 +6,15 @@ import com.yahoo.document.DocumentId; import com.yahoo.document.DocumentType; /** - * @author <a href="mailto:ravishar@yahoo-inc.com">ravishar</a> + * @author ravishar */ public interface DocumentWriter extends FieldWriter { - /** - * write out a document - * - * @param document - * document to be written - */ + + /** Writes a document. */ void write(Document document); void write(DocumentId id); void write(DocumentType type); + } diff --git a/document/src/main/java/com/yahoo/document/serialization/FieldWriter.java b/document/src/main/java/com/yahoo/document/serialization/FieldWriter.java index deb1e7386df..43e25b24207 100644 --- a/document/src/main/java/com/yahoo/document/serialization/FieldWriter.java +++ b/document/src/main/java/com/yahoo/document/serialization/FieldWriter.java @@ -25,203 +25,70 @@ import com.yahoo.vespa.objects.FieldBase; import com.yahoo.vespa.objects.Serializer; /** - * Interface for writing out com.yahoo.document.datatypes.FieldValue. - * - * @author <a href="mailto:ravishar@yahoo-inc.com">ravishar</a> + * Interface for writing a com.yahoo.document.datatypes.FieldValue. * + * @author ravishar */ public interface FieldWriter extends Serializer { - /** - * Write out the value of field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a field. */ void write(FieldBase field, FieldValue value); - /** - * Write out the value of field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ - public void write(FieldBase field, Document value); - - /** - * Write out the value of array field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a field. */ + void write(FieldBase field, Document value); + + /** Writes the value of an array field. */ <T extends FieldValue> void write(FieldBase field, Array<T> value); - /** - * Write the value of a map field - */ - <K extends FieldValue, V extends FieldValue> void write(FieldBase field, - MapFieldValue<K, V> map); - - /** - * Write out the value of byte field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a map field. */ + <K extends FieldValue, V extends FieldValue> void write(FieldBase field, MapFieldValue<K, V> map); + + /*** Writes the value of a byte field. */ void write(FieldBase field, ByteFieldValue value); - /** - * Write out the value of byte field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of byte field. */ void write(FieldBase field, BoolFieldValue value); - /** - * Write out the value of collection field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ - <T extends FieldValue> void write(FieldBase field, - CollectionFieldValue<T> value); - - /** - * Write out the value of double field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a collection field. */ + <T extends FieldValue> void write(FieldBase field, CollectionFieldValue<T> value); + + /** Writes the value of a double field. */ void write(FieldBase field, DoubleFieldValue value); - /** - * Write out the value of float field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a flosat field. */ void write(FieldBase field, FloatFieldValue value); - /** - * Write out the value of integer field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of an integer collection field. */ void write(FieldBase field, IntegerFieldValue value); - /** - * Write out the value of long field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a long field. */ void write(FieldBase field, LongFieldValue value); - /** - * Write out the value of raw field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a raw field. */ void write(FieldBase field, Raw value); - /** - * Write out the value of predicate field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a predicate field. */ void write(FieldBase field, PredicateFieldValue value); - /** - * Write out the value of string field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a string field. */ void write(FieldBase field, StringFieldValue value); - /** - * Write out the value of the given tensor field value. - * - * @param field field description (name and data type) - * @param value tensor field value - */ + /** Writes the value of a tensor field. */ void write(FieldBase field, TensorFieldValue value); - /** - * Write out the value of the given reference field value. - * - * @param field field description (name and data type) - * @param value reference field value - */ + /** Writes the value of a reference field. */ void write(FieldBase field, ReferenceFieldValue value); - /** - * Write out the value of struct field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a struct field. */ void write(FieldBase field, Struct value); - /** - * Write out the value of structured field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a structured field. */ void write(FieldBase field, StructuredFieldValue value); - /** - * Write out the value of weighted set field - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of a weighted set field. */ <T extends FieldValue> void write(FieldBase field, WeightedSet<T> value); - /** - * Write out the value of annotation data. - * - * @param field - * field description (name and data type) - * @param value - * field value - */ + /** Writes the value of an annotation reference. */ void write(FieldBase field, AnnotationReference value); + } diff --git a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java index 7bdb526bb1c..08a5c9a124c 100644 --- a/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java +++ b/document/src/test/java/com/yahoo/document/json/DocumentUpdateJsonSerializerTest.java @@ -285,6 +285,7 @@ public class DocumentUpdateJsonSerializerTest { " 'fields': {", " 'sparse_tensor': {", " 'assign': {", + " 'type': 'tensor(x{},y{})',", " 'cells': [", " { 'address': { 'x': 'a', 'y': 'b' }, 'value': 2.0 },", " { 'address': { 'x': 'c', 'y': 'b' }, 'value': 3.0 }", diff --git a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java index c19094ff231..0c130ab9a42 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonReaderTestCase.java @@ -1427,14 +1427,16 @@ public class JsonReaderTestCase { @Test public void testParsingOfSparseTensorWithCells() { Tensor tensor = assertSparseTensorField("{{x:a,y:b}:2.0,{x:c,y:b}:3.0}}", - createPutWithSparseTensor(inputJson("{", - " 'cells': [", - " { 'address': { 'x': 'a', 'y': 'b' },", - " 'value': 2.0 },", - " { 'address': { 'x': 'c', 'y': 'b' },", - " 'value': 3.0 }", - " ]", - "}"))); + createPutWithSparseTensor( + """ + { + "type": "tensor(x{},y{})", + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 }, + { "address": { "x": "c", "y": "b" }, "value": 3.0 } + ] + } + """)); assertTrue(tensor instanceof MappedTensor); // any functional instance is fine } @@ -1510,6 +1512,7 @@ public class JsonReaderTestCase { public void testDirectValue() { assertTensorField("tensor(x{}):{a:2, b:3}", "sparse_single_dimension_tensor", "{'a':2.0, 'b':3.0}"); assertTensorField("tensor(x[2],y[3]):[2, 3, 4, 5, 6, 7]]", "dense_tensor", "[2, 3, 4, 5, 6, 7]"); + assertTensorField("tensor(x[2],y[3]):[2, 3, 4, 5, 6, 7]]", "dense_tensor", "[[2, 3, 4], [5, 6, 7]]"); assertTensorField("tensor(x{},y[3]):{a:[2, 3, 4], b:[4, 5, 6]}", "mixed_tensor", "{'a':[2, 3, 4], 'b':[4, 5, 6]}"); assertTensorField("tensor(x{},y{}):{{x:a,y:0}:2, {x:b,y:1}:3}", "sparse_tensor", "[{'address':{'x':'a','y':'0'},'value':2}, {'address':{'x':'b','y':'1'},'value':3}]"); @@ -1542,13 +1545,33 @@ public class JsonReaderTestCase { builder.cell().label("x", 1).label("y", 2).value(7.0); Tensor expected = builder.build(); - String mixedJson = "{\"blocks\":[" + - "{\"address\":{\"x\":\"0\"},\"values\":[2.0,3.0,4.0]}," + - "{\"address\":{\"x\":\"1\"},\"values\":[5.0,6.0,7.0]}" + - "]}"; + String mixedJson = + """ + { + "blocks":[ + {"address":{"x":"0"},"values":[2.0,3.0,4.0]}, + {"address":{"x":"1"},"values":[5.0,6.0,7.0]} + ] + } + """; Tensor tensor = assertTensorField(expected, createPutWithTensor(inputJson(mixedJson), "mixed_tensor"), "mixed_tensor"); assertTrue(tensor instanceof MixedTensor); // this matters for performance + + String mixedJsonDirect = + """ + [ + {"address":{"x":"0","y":"0"},"value":2.0}, + {"address":{"x":"0","y":"1"},"value":3.0}, + {"address":{"x":"0","y":"2"},"value":4.0}, + {"address":{"x":"1","y":"0"},"value":5.0}, + {"address":{"x":"1","y":"1"},"value":6.0}, + {"address":{"x":"1","y":"2"},"value":7.0} + ] + """; + Tensor tensorDirect = assertTensorField(expected, + createPutWithTensor(inputJson(mixedJsonDirect), "mixed_tensor"), "mixed_tensor"); + assertTrue(tensorDirect instanceof MixedTensor); // this matters for performance } @Test @@ -1602,8 +1625,8 @@ public class JsonReaderTestCase { @Test public void testAssignUpdateOfNullTensor() { ClearValueUpdate clearUpdate = (ClearValueUpdate) getTensorField(createAssignUpdateWithSparseTensor(null)).getValueUpdate(0); - assertTrue(clearUpdate != null); - assertTrue(clearUpdate.getValue() == null); + assertNotNull(clearUpdate); + assertNull(clearUpdate.getValue()); } @Test diff --git a/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java b/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java index e7368a691ab..eab33afc3e4 100644 --- a/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java +++ b/document/src/test/java/com/yahoo/document/json/JsonWriterTestCase.java @@ -396,19 +396,21 @@ public class JsonWriterTestCase { @Test public void testWritingOfEmptyTensor() throws IOException { - assertTensorRoundTripEquality("{}","{ \"cells\": [] }"); + assertTensorRoundTripEquality("{}","{ \"type\":\"tensor(x{},y{})\", \"cells\": [] }"); } @Test public void testWritingOfTensorWithCellsOnly() throws IOException { assertTensorRoundTripEquality("{ " - + " \"cells\": [ " + + " \"type\": \"tensor(x{},y{})\"," + + " \"cells\": [ " + " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, " + " \"value\": 2.0 }, " + " { \"address\": { \"x\": \"c\", \"y\": \"b\" }, " + " \"value\": 3.0 } " + " ]" + "}", "{ " + + " \"type\": \"tensor(x{},y{})\"," + " \"cells\": [ " + " { \"address\": { \"x\": \"a\", \"y\": \"b\" }, " + " \"value\": 2.0 }, " @@ -449,17 +451,17 @@ public class JsonWriterTestCase { Tensor tensor = Tensor.from("tensor(x[3]):[1,2,3]"); doc.setFieldValue(tensorField, new TensorFieldValue(tensor)); - assertEqualJson(asDocument(docId, "{ \"tensorfield\": {\"cells\":[{\"address\":{\"x\":\"0\"},\"value\":1.0},{\"address\":{\"x\":\"1\"},\"value\":2.0},{\"address\":{\"x\":\"2\"},\"value\":3.0}]} }"), - writeDocument(doc, false)); + assertEqualJson(asDocument(docId, "{ \"tensorfield\": {\"type\":\"tensor(x[3])\", \"cells\":[{\"address\":{\"x\":\"0\"},\"value\":1.0},{\"address\":{\"x\":\"1\"},\"value\":2.0},{\"address\":{\"x\":\"2\"},\"value\":3.0}]} }"), + writeDocument(doc, false, false)); assertEqualJson(asDocument(docId, "{ \"tensorfield\": {\"type\":\"tensor(x[3])\", \"values\":[1.0, 2.0, 3.0] } }"), - writeDocument(doc, true)); + writeDocument(doc, true, false)); } - private byte[] writeDocument(Document doc, boolean tensorShortForm) throws IOException { + private byte[] writeDocument(Document doc, boolean tensorShortForm, boolean tensorDirectValues) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); JsonFactory factory = new JsonFactory(); JsonGenerator generator = factory.createGenerator(out); - JsonWriter writer = new JsonWriter(generator, tensorShortForm); + JsonWriter writer = new JsonWriter(generator, tensorShortForm, tensorDirectValues); writer.write(doc); return out.toByteArray(); } diff --git a/documentapi/abi-spec.json b/documentapi/abi-spec.json index 72905c3e080..7277e58d0c5 100644 --- a/documentapi/abi-spec.json +++ b/documentapi/abi-spec.json @@ -542,7 +542,7 @@ ], "methods" : [ "public void <init>()", - "public void setDefaultTimeout(java.time.Duration)", + "public com.yahoo.documentapi.SyncParameters$Builder setDefaultTimeout(java.time.Duration)", "public com.yahoo.documentapi.SyncParameters build()" ], "fields" : [ ] diff --git a/documentapi/src/main/java/com/yahoo/documentapi/SyncParameters.java b/documentapi/src/main/java/com/yahoo/documentapi/SyncParameters.java index 15546a3ce4a..1d6fc52d94b 100755 --- a/documentapi/src/main/java/com/yahoo/documentapi/SyncParameters.java +++ b/documentapi/src/main/java/com/yahoo/documentapi/SyncParameters.java @@ -33,8 +33,9 @@ public class SyncParameters extends Parameters { /** * Set default timeout for all messagebus operations. */ - public void setDefaultTimeout(Duration defaultTimeout) { + public Builder setDefaultTimeout(Duration defaultTimeout) { this.defaultTimeout = defaultTimeout; + return this; } public SyncParameters build() { diff --git a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceCompressor.java b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceCompressor.java index 2ea41a9fd8c..8d6f9ea1af3 100644 --- a/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceCompressor.java +++ b/filedistribution/src/main/java/com/yahoo/vespa/filedistribution/FileReferenceCompressor.java @@ -1,6 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.filedistribution; +import ai.vespa.airlift.zstd.ZstdInputStream; +import com.yahoo.compress.ZstdOutputStream; import net.jpountz.lz4.LZ4BlockInputStream; import net.jpountz.lz4.LZ4BlockOutputStream; import org.apache.commons.compress.archivers.ArchiveEntry; @@ -21,7 +23,6 @@ import java.util.List; import java.util.Objects; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -124,7 +125,7 @@ public class FileReferenceCompressor { return switch (compressionType) { case gzip -> new GZIPOutputStream(new FileOutputStream(outputFile)); case lz4 -> new LZ4BlockOutputStream(new FileOutputStream(outputFile)); - default -> throw new RuntimeException("Unknown compression type " + compressionType); + case zstd -> new ZstdOutputStream(new FileOutputStream(outputFile)); }; case file: return new FileOutputStream(outputFile); @@ -140,7 +141,7 @@ public class FileReferenceCompressor { return switch (compressionType) { case gzip -> new GZIPInputStream(new FileInputStream(inputFile)); case lz4 -> new LZ4BlockInputStream(new FileInputStream(inputFile)); - default -> throw new RuntimeException("Unknown compression type " + compressionType); + case zstd -> new ZstdInputStream(new FileInputStream(inputFile)); }; case file: return new FileInputStream(inputFile); @@ -150,4 +151,3 @@ public class FileReferenceCompressor { } } - diff --git a/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java b/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java index 84e7a07340e..59ebd571431 100644 --- a/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java +++ b/filedistribution/src/test/java/com/yahoo/vespa/filedistribution/FileReceiverTest.java @@ -19,6 +19,7 @@ import java.nio.file.Files; import static com.yahoo.vespa.filedistribution.FileReferenceData.CompressionType; import static com.yahoo.vespa.filedistribution.FileReferenceData.CompressionType.gzip; import static com.yahoo.vespa.filedistribution.FileReferenceData.CompressionType.lz4; +import static com.yahoo.vespa.filedistribution.FileReferenceData.CompressionType.zstd; import static com.yahoo.vespa.filedistribution.FileReferenceData.Type.compressed; import static com.yahoo.vespa.filedistribution.FileReferenceData.Type.file; import static org.junit.Assert.assertEquals; @@ -61,20 +62,18 @@ public class FileReceiverTest { writerB.write("2"); writerB.close(); + testWithCompression(dirWithFiles, gzip); + testWithCompression(dirWithFiles, lz4); + testWithCompression(dirWithFiles, zstd); + } + + private void testWithCompression(File dirWithFiles, CompressionType compressionType) throws IOException { File tempFile = temporaryFolder.newFile(); - File file = new FileReferenceCompressor(compressed, gzip).compress(dirWithFiles, tempFile); - transferCompressedData(gzip, new FileReference("ref"), "a", IOUtils.readFileBytes(file)); + File file = new FileReferenceCompressor(compressed, compressionType).compress(dirWithFiles, tempFile); + transferCompressedData(compressionType, new FileReference("ref"), "a", IOUtils.readFileBytes(file)); File downloadDir = new File(root, "ref"); assertEquals("1", IOUtils.readFile(new File(downloadDir, "a"))); assertEquals("2", IOUtils.readFile(new File(downloadDir, "b"))); - - tempFile = temporaryFolder.newFile(); - FileReferenceCompressor compressor = new FileReferenceCompressor(compressed, lz4); - file = compressor.compress(dirWithFiles, tempFile); - transferCompressedData(lz4, new FileReference("ref"), "a", IOUtils.readFileBytes(file)); - downloadDir = new File(root, "ref"); - assertEquals("1", IOUtils.readFile(new File(downloadDir, "a"))); - assertEquals("2", IOUtils.readFile(new File(downloadDir, "b"))); } private void transferPartsAndAssert(FileReference ref, String fileName, String all, int numParts) throws IOException { diff --git a/model-evaluation/src/main/java/ai/vespa/models/handler/ModelsEvaluationHandler.java b/model-evaluation/src/main/java/ai/vespa/models/handler/ModelsEvaluationHandler.java index ef04b6641e5..1bcd6363d2d 100644 --- a/model-evaluation/src/main/java/ai/vespa/models/handler/ModelsEvaluationHandler.java +++ b/model-evaluation/src/main/java/ai/vespa/models/handler/ModelsEvaluationHandler.java @@ -91,15 +91,15 @@ public class ModelsEvaluationHandler extends ThreadedHttpRequestHandler { } } Tensor result = evaluator.evaluate(); - - Optional<String> format = property(request, "format.tensors"); - if (format.isPresent() && format.get().equalsIgnoreCase("long")) { - return new Response(200, JsonFormat.encode(result)); - } - else if (format.isPresent() && format.get().equalsIgnoreCase("string")) { - return new Response(200, result.toString().getBytes(StandardCharsets.UTF_8)); - } - return new Response(200, JsonFormat.encodeShortForm(result)); + return switch (property(request, "format.tensors").orElse("short").toLowerCase()) { + case "short" -> new Response(200, JsonFormat.encode(result, true, false)); + case "long" -> new Response(200, JsonFormat.encode(result, false, false)); + case "short-value" -> new Response(200, JsonFormat.encode(result, true, true)); + case "long-value" -> new Response(200, JsonFormat.encode(result, false, true)); + case "string" -> new Response(200, result.toString(true, true).getBytes(StandardCharsets.UTF_8)); + case "string-long " -> new Response(200, result.toString(true, false ).getBytes(StandardCharsets.UTF_8)); + default -> new ErrorResponse(400, "Unknown tensor format '" + property(request, "format.tensors") + "'"); + }; } private HttpResponse listAllModels(HttpRequest request) { diff --git a/model-evaluation/src/test/java/ai/vespa/models/handler/HandlerTester.java b/model-evaluation/src/test/java/ai/vespa/models/handler/HandlerTester.java index 00531e373ee..6c4dd886f4b 100644 --- a/model-evaluation/src/test/java/ai/vespa/models/handler/HandlerTester.java +++ b/model-evaluation/src/test/java/ai/vespa/models/handler/HandlerTester.java @@ -6,6 +6,7 @@ import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.serialization.JsonFormat; +import com.yahoo.text.JSON; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -25,7 +26,18 @@ class HandlerTester { return s -> true; } private static Predicate<String> matchString(String expected) { - return s -> expected.equals(s); + return s -> { + //System.out.println("Expected: " + expected); + //System.out.println("Actual: " + s); + return expected.equals(s); + }; + } + private static Predicate<String> matchJsonString(String expected) { + return s -> { + //System.out.println("Expected: " + expected); + //System.out.println("Actual: " + s); + return JSON.canonical(expected).equals(JSON.canonical(s)); + }; } public static Predicate<String> matchJson(String... expectedJson) { var jExp = String.join("\n", expectedJson).replaceAll("'", "\""); @@ -68,6 +80,10 @@ class HandlerTester { } void assertResponse(String url, Map<String, String> properties, int expectedCode, String expectedResult, Map<String, String> headers) { + checkResponse(url, properties, expectedCode, matchJsonString(expectedResult), headers); + } + + void assertStringResponse(String url, Map<String, String> properties, int expectedCode, String expectedResult, Map<String, String> headers) { checkResponse(url, properties, expectedCode, matchString(expectedResult), headers); } @@ -87,15 +103,11 @@ class HandlerTester { assertResponse(getRequest, expectedCode, expectedResult); } - void assertResponse(HttpRequest request, int expectedCode, String expectedResult) { - checkResponse(request, expectedCode, matchString(expectedResult)); - } - void checkResponse(HttpRequest request, int expectedCode, Predicate<String> check) { HttpResponse response = handler.handle(request); assertEquals("application/json", response.getContentType()); - assertEquals(expectedCode, response.getStatus()); assertEquals(true, check.test(getContents(response))); + assertEquals(expectedCode, response.getStatus()); } void assertResponse(HttpRequest request, int expectedCode, Tensor expectedResult) { diff --git a/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java b/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java index c0e5dd9ccda..9b2b793212b 100644 --- a/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java +++ b/model-evaluation/src/test/java/ai/vespa/models/handler/ModelsEvaluationHandlerTest.java @@ -107,7 +107,7 @@ public class ModelsEvaluationHandlerTest { properties.put("non-existing-binding", "-1"); properties.put("format.tensors", "long"); String url = "http://localhost/model-evaluation/v1/xgboost_2_2/eval"; - String expected = "{\"cells\":[{\"address\":{},\"value\":-7.936679999999999}]}"; + String expected = "{\"type\":\"tensor()\",\"cells\":[{\"address\":{},\"value\":-7.936679999999999}]}"; handler.assertResponse(url, properties, 200, expected); } @@ -191,22 +191,82 @@ public class ModelsEvaluationHandlerTest { } @Test - public void testMnistSoftmaxEvaluateSpecificFunctionWithBindingsShortForm() { + public void testMnistSoftmaxEvaluateSpecificFunctionWithShortOutput() { + Map<String, String> properties = new HashMap<>(); + properties.put("Placeholder", inputTensorShortForm()); + properties.put("format.tensors", "short"); + String url = "http://localhost/model-evaluation/v1/mnist_softmax/default.add/eval"; + String expected = + """ + { + "type":"tensor(d0[],d1[10])", + "values":[[-0.3546536862850189,0.3759574592113495,0.06054411828517914,-0.251544713973999,0.017951013520359993,1.2899067401885986,-0.10389615595340729,0.6367976665496826,-1.4136744737625122,-0.2573896050453186]] + } + """; + handler.assertResponse(url, properties, 200, expected); + } + + @Test + public void testMnistSoftmaxEvaluateSpecificFunctionWithLongOutput() { Map<String, String> properties = new HashMap<>(); properties.put("Placeholder", inputTensorShortForm()); properties.put("format.tensors", "long"); String url = "http://localhost/model-evaluation/v1/mnist_softmax/default.add/eval"; - String expected = "{\"cells\":[{\"address\":{\"d0\":\"0\",\"d1\":\"0\"},\"value\":-0.3546536862850189},{\"address\":{\"d0\":\"0\",\"d1\":\"1\"},\"value\":0.3759574592113495},{\"address\":{\"d0\":\"0\",\"d1\":\"2\"},\"value\":0.06054411828517914},{\"address\":{\"d0\":\"0\",\"d1\":\"3\"},\"value\":-0.251544713973999},{\"address\":{\"d0\":\"0\",\"d1\":\"4\"},\"value\":0.017951013520359993},{\"address\":{\"d0\":\"0\",\"d1\":\"5\"},\"value\":1.2899067401885986},{\"address\":{\"d0\":\"0\",\"d1\":\"6\"},\"value\":-0.10389615595340729},{\"address\":{\"d0\":\"0\",\"d1\":\"7\"},\"value\":0.6367976665496826},{\"address\":{\"d0\":\"0\",\"d1\":\"8\"},\"value\":-1.4136744737625122},{\"address\":{\"d0\":\"0\",\"d1\":\"9\"},\"value\":-0.2573896050453186}]}"; + String expected = + """ + { + "type":"tensor(d0[],d1[10])", + "cells":[ + {"address":{"d0":"0","d1":"0"},"value":-0.3546536862850189}, + {"address":{"d0":"0","d1":"1"},"value":0.3759574592113495}, + {"address":{"d0":"0","d1":"2"},"value":0.06054411828517914}, + {"address":{"d0":"0","d1":"3"},"value":-0.251544713973999}, + {"address":{"d0":"0","d1":"4"},"value":0.017951013520359993}, + {"address":{"d0":"0","d1":"5"},"value":1.2899067401885986}, + {"address":{"d0":"0","d1":"6"},"value":-0.10389615595340729}, + {"address":{"d0":"0","d1":"7"},"value":0.6367976665496826}, + {"address":{"d0":"0","d1":"8"},"value":-1.4136744737625122}, + {"address":{"d0":"0","d1":"9"},"value":-0.2573896050453186} + ] + } + """; handler.assertResponse(url, properties, 200, expected); } @Test - public void testMnistSoftmaxEvaluateSpecificFunctionWithShortOutput() { + public void testMnistSoftmaxEvaluateSpecificFunctionWithShortDirectOutput() { Map<String, String> properties = new HashMap<>(); properties.put("Placeholder", inputTensorShortForm()); - properties.put("format.tensors", "short"); + properties.put("format.tensors", "short-value"); String url = "http://localhost/model-evaluation/v1/mnist_softmax/default.add/eval"; - String expected = "{\"type\":\"tensor(d0[],d1[10])\",\"values\":[[-0.3546536862850189,0.3759574592113495,0.06054411828517914,-0.251544713973999,0.017951013520359993,1.2899067401885986,-0.10389615595340729,0.6367976665496826,-1.4136744737625122,-0.2573896050453186]]}"; + String expected = + """ + [[-0.3546536862850189,0.3759574592113495,0.06054411828517914,-0.251544713973999,0.017951013520359993,1.2899067401885986,-0.10389615595340729,0.6367976665496826,-1.4136744737625122,-0.2573896050453186]] + """; + handler.assertResponse(url, properties, 200, expected); + } + + @Test + public void testMnistSoftmaxEvaluateSpecificFunctionWithLongDirectOutput() { + Map<String, String> properties = new HashMap<>(); + properties.put("Placeholder", inputTensorShortForm()); + properties.put("format.tensors", "long-value"); + String url = "http://localhost/model-evaluation/v1/mnist_softmax/default.add/eval"; + String expected = + """ + [ + {"address":{"d0":"0","d1":"0"},"value":-0.3546536862850189}, + {"address":{"d0":"0","d1":"1"},"value":0.3759574592113495}, + {"address":{"d0":"0","d1":"2"},"value":0.06054411828517914}, + {"address":{"d0":"0","d1":"3"},"value":-0.251544713973999}, + {"address":{"d0":"0","d1":"4"},"value":0.017951013520359993}, + {"address":{"d0":"0","d1":"5"},"value":1.2899067401885986}, + {"address":{"d0":"0","d1":"6"},"value":-0.10389615595340729}, + {"address":{"d0":"0","d1":"7"},"value":0.6367976665496826}, + {"address":{"d0":"0","d1":"8"},"value":-1.4136744737625122}, + {"address":{"d0":"0","d1":"9"},"value":-0.2573896050453186} + ] + """; handler.assertResponse(url, properties, 200, expected); } @@ -251,14 +311,14 @@ public class ModelsEvaluationHandlerTest { Map<String, String> properties = new HashMap<>(); properties.put("format.tensors", "string"); String url = "http://localhost/model-evaluation/v1/vespa_model/"; - handler.assertResponse(url + "test_mapped/eval", properties, 200, - "tensor(d0{}):{a:1.0, b:2.0}"); - handler.assertResponse(url + "test_indexed/eval", properties, 200, - "tensor(d0[2],d1[3]):[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]"); - handler.assertResponse(url + "test_mixed/eval", properties, 200, - "tensor(x{},y[3]):{a:[1.0, 2.0, 3.0], b:[4.0, 5.0, 6.0]}"); - handler.assertResponse(url + "test_mixed_2/eval", properties, 200, - "tensor(a[2],b[2],c{},d[2]):{a:[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]], b:[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]}"); + handler.assertStringResponse(url + "test_mapped/eval", properties, 200, + "tensor(d0{}):{a:1.0, b:2.0}", Map.of()); + handler.assertStringResponse(url + "test_indexed/eval", properties, 200, + "tensor(d0[2],d1[3]):[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]", Map.of()); + handler.assertStringResponse(url + "test_mixed/eval", properties, 200, + "tensor(x{},y[3]):{a:[1.0, 2.0, 3.0], b:[4.0, 5.0, 6.0]}", Map.of()); + handler.assertStringResponse(url + "test_mixed_2/eval", properties, 200, + "tensor(a[2],b[2],c{},d[2]):{a:[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]], b:[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]}", Map.of()); } @Test diff --git a/model-evaluation/src/test/java/ai/vespa/models/handler/OnnxEvaluationHandlerTest.java b/model-evaluation/src/test/java/ai/vespa/models/handler/OnnxEvaluationHandlerTest.java index 29795fbcd95..86f56e14e2d 100644 --- a/model-evaluation/src/test/java/ai/vespa/models/handler/OnnxEvaluationHandlerTest.java +++ b/model-evaluation/src/test/java/ai/vespa/models/handler/OnnxEvaluationHandlerTest.java @@ -83,7 +83,7 @@ public class OnnxEvaluationHandlerTest { properties.put("input2", "tensor<float>(d0[1]):[3]"); properties.put("format.tensors", "long"); String url = "http://localhost/model-evaluation/v1/add_mul/output1/eval"; - String expected = "{\"cells\":[{\"address\":{\"d0\":\"0\"},\"value\":6.0}]}"; // output1 is a mul + String expected = "{\"type\":\"tensor<float>(d0[1])\",\"cells\":[{\"address\":{\"d0\":\"0\"},\"value\":6.0}]}"; // output1 is a mul handler.assertResponse(url, properties, 200, expected); } @@ -94,7 +94,7 @@ public class OnnxEvaluationHandlerTest { properties.put("input2", "tensor<float>(d0[1]):[3]"); properties.put("format.tensors", "long"); String url = "http://localhost/model-evaluation/v1/add_mul/output2/eval"; - String expected = "{\"cells\":[{\"address\":{\"d0\":\"0\"},\"value\":5.0}]}"; // output2 is an add + String expected = "{\"type\":\"tensor<float>(d0[1])\",\"cells\":[{\"address\":{\"d0\":\"0\"},\"value\":5.0}]}"; // output2 is an add handler.assertResponse(url, properties, 200, expected); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java index 589468c48b8..29ee378c906 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CachingCurator.java @@ -68,7 +68,7 @@ public class CachingCurator { .toList(); } - // TODO(mpolden): Remove this + // TODO(mpolden): Remove after 2023-02-01 public void deleteRecursively(Path path) { curator.delete(path); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java index 74d88d4e73c..9211a7cce15 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java @@ -94,8 +94,7 @@ public class CuratorDb { private void initZK() { db.create(root); db.create(nodesPath); - // TODO(mpolden): Remove state paths after migration to nodesPath - // removeLegacyPaths(); + removeLegacyPaths(); // TODO(mpolden): Remove after 2023-02-01 db.create(applicationsPath); db.create(inactiveJobsPath); db.create(infrastructureVersionsPath); diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 4f6fc90fa37..03429b956a4 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -199,6 +199,7 @@ vespa_define_module( src/tests/queryeval/nearest_neighbor src/tests/queryeval/parallel_weak_and src/tests/queryeval/predicate + src/tests/queryeval/profiled_iterator src/tests/queryeval/same_element src/tests/queryeval/simple_phrase src/tests/queryeval/sourceblender diff --git a/searchlib/src/tests/queryeval/profiled_iterator/CMakeLists.txt b/searchlib/src/tests/queryeval/profiled_iterator/CMakeLists.txt new file mode 100644 index 00000000000..b4c36e7e00c --- /dev/null +++ b/searchlib/src/tests/queryeval/profiled_iterator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_queryeval_profiled_iterator_test_app TEST + SOURCES + profiled_iterator_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_queryeval_profiled_iterator_test_app COMMAND searchlib_queryeval_profiled_iterator_test_app) diff --git a/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp b/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp new file mode 100644 index 00000000000..b89dc8f9e17 --- /dev/null +++ b/searchlib/src/tests/queryeval/profiled_iterator/profiled_iterator_test.cpp @@ -0,0 +1,45 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/util/execution_profiler.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/searchlib/queryeval/profiled_iterator.h> +#include <vespa/searchlib/queryeval/simplesearch.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> + +#include <memory> + +using namespace search::queryeval; +using vespalib::ExecutionProfiler; +using vespalib::Slime; + +SearchIterator::UP create_term(const vespalib::string &name, std::vector<uint32_t> hits) { + auto search = std::make_unique<SimpleSearch>(SimpleResult(hits)); + search->tag(name); + return search; +} + +SearchIterator::UP create_iterator_tree() { + return AndSearch::create({OrSearch::create({create_term("A", {1,3,5}), + create_term("B", {2,4,6})}, true), + OrSearch::create({create_term("C", {4,6,8}), + create_term("D", {5,7,9})}, false)}, + true); +} + +TEST(ProfiledIteratorTest, iterator_tree_can_be_profiled) { + ExecutionProfiler profiler(64); + auto root = create_iterator_tree(); + root = ProfiledIterator::profile(profiler, std::move(root)); + fprintf(stderr, "%s", root->asString().c_str()); + SimpleResult expect({4,5,6}); + SimpleResult actual; + actual.searchStrict(*root, 100); + EXPECT_EQ(actual, expect); + Slime slime; + profiler.report(slime.setObject()); + fprintf(stderr, "%s", slime.toString().c_str()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt index 840f5b6b376..adbb06910d7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt @@ -43,6 +43,7 @@ vespa_add_library(searchlib_queryeval OBJECT posting_info.cpp predicate_blueprint.cpp predicate_search.cpp + profiled_iterator.cpp ranksearch.cpp same_element_blueprint.cpp same_element_search.cpp diff --git a/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp b/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp index 16f4012f0e7..faf63f54af3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp @@ -55,6 +55,14 @@ MultiSearch::initRange(uint32_t beginid, uint32_t endid) } void +MultiSearch::disclose_children(std::vector<UP*> &dst) +{ + for (auto &child: _children) { + dst.push_back(&child); + } +} + +void MultiSearch::visitMembers(vespalib::ObjectVisitor &visitor) const { visit(visitor, "children", _children); diff --git a/searchlib/src/vespa/searchlib/queryeval/multisearch.h b/searchlib/src/vespa/searchlib/queryeval/multisearch.h index 9216391b85d..fea0cb63b5b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/multisearch.h +++ b/searchlib/src/vespa/searchlib/queryeval/multisearch.h @@ -41,6 +41,7 @@ public: void insert(size_t index, SearchIterator::UP search); virtual bool needUnpack(size_t index) const { (void) index; return true; } void initRange(uint32_t beginId, uint32_t endId) override; + void disclose_children(std::vector<UP*> &dst) override; protected: MultiSearch(); void doUnpack(uint32_t docid) override; diff --git a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp new file mode 100644 index 00000000000..8ae54606146 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.cpp @@ -0,0 +1,118 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "profiled_iterator.h" +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/vespalib/objects/visit.hpp> +#include <vespa/vespalib/util/classname.h> +#include <vespa/vespalib/util/stringfmt.h> + +using vespalib::make_string_short::fmt; + +namespace search::queryeval { + +namespace { + +using Profiler = vespalib::ExecutionProfiler; + +struct TaskGuard { + Profiler &profiler; + TaskGuard(Profiler &profiler_in, Profiler::TaskId task) noexcept + : profiler(profiler_in) { profiler.start(task); } + ~TaskGuard() { profiler.complete(); } +}; + +vespalib::string name_of(const auto &obj) { + auto name = vespalib::getClassName(obj); + auto end = name.find("<"); + auto ns = name.rfind("::", end); + size_t begin = (ns > name.size()) ? 0 : ns + 2; + return name.substr(begin, end - begin); +} + +std::unique_ptr<SearchIterator> create(Profiler &profiler, + const vespalib::string &path, + std::unique_ptr<SearchIterator> search, + auto ctor_token) +{ + vespalib::string prefix = fmt("%s%s/", path.c_str(), name_of(*search).c_str()); + return std::make_unique<ProfiledIterator>(profiler, std::move(search), + profiler.resolve(prefix + "init"), + profiler.resolve(prefix + "seek"), + profiler.resolve(prefix + "unpack"), + profiler.resolve(prefix + "termwise"), + ctor_token); +} + +} + +void +ProfiledIterator::initRange(uint32_t begin_id, uint32_t end_id) +{ + TaskGuard guard(_profiler, _init_tag); + SearchIterator::initRange(begin_id, end_id); + _search->initRange(begin_id, end_id); + setDocId(_search->getDocId()); +} + +void +ProfiledIterator::doSeek(uint32_t docid) +{ + TaskGuard guard(_profiler, _seek_tag); + _search->doSeek(docid); + setDocId(_search->getDocId()); +} + +void +ProfiledIterator::doUnpack(uint32_t docid) +{ + TaskGuard guard(_profiler, _unpack_tag); + _search->doUnpack(docid); +} + +std::unique_ptr<BitVector> +ProfiledIterator::get_hits(uint32_t begin_id) +{ + TaskGuard guard(_profiler, _termwise_tag); + return _search->get_hits(begin_id); +} + +void +ProfiledIterator::or_hits_into(BitVector &result, uint32_t begin_id) +{ + TaskGuard guard(_profiler, _termwise_tag); + _search->or_hits_into(result, begin_id); +} + +void +ProfiledIterator::and_hits_into(BitVector &result, uint32_t begin_id) +{ + TaskGuard guard(_profiler, _termwise_tag); + _search->and_hits_into(result, begin_id); +} + +void +ProfiledIterator::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "search", _search); +} + +std::unique_ptr<SearchIterator> +ProfiledIterator::profile(Profiler &profiler, std::unique_ptr<SearchIterator> root) +{ + std::vector<UP*> links({&root}); + std::vector<vespalib::string> paths({"/"}); + for (size_t offset = 0; offset < links.size(); ++offset) { + UP &link = *(links[offset]); + vespalib::string path = paths[offset]; + size_t first_child = links.size(); + link->disclose_children(links); + size_t num_children = links.size() - first_child; + for (size_t i = 0; i < num_children; ++i) { + paths.push_back(fmt("%s%zu/", path.c_str(), i)); + } + link = create(profiler, path, std::move(link), ctor_tag{}); + } + return root; +} + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h new file mode 100644 index 00000000000..e8c15501267 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/profiled_iterator.h @@ -0,0 +1,58 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" +#include <vespa/vespalib/util/execution_profiler.h> + +namespace search::queryeval { + +/** + * Wraps a search iterator to profile its operations. Each iterator + * has 4 distinct operations that will be profiled separately: + * + * 'init' -> initRange + * 'seek' -> doSeek + * 'unpack' -> doUnpack + * 'termwise' -> get_hits, or_hits_into, and_hits_into + * + * The full name of each profiled task will be the path down the + * iterator tree combined with the class name and the operation name. + **/ +class ProfiledIterator : public SearchIterator +{ +private: + using Profiler = vespalib::ExecutionProfiler; + Profiler &_profiler; + std::unique_ptr<SearchIterator> _search; + Profiler::TaskId _init_tag; + Profiler::TaskId _seek_tag; + Profiler::TaskId _unpack_tag; + Profiler::TaskId _termwise_tag; + struct ctor_tag{}; +public: + ProfiledIterator(Profiler &profiler, + std::unique_ptr<SearchIterator> search, + Profiler::TaskId init_tag, + Profiler::TaskId seek_tag, + Profiler::TaskId unpack_tag, + Profiler::TaskId termwise_tag, + ctor_tag) noexcept + : _profiler(profiler), _search(std::move(search)), + _init_tag(init_tag), _seek_tag(seek_tag), + _unpack_tag(unpack_tag), _termwise_tag(termwise_tag) {} + void initRange(uint32_t begin_id, uint32_t end_id) override; + void doSeek(uint32_t docid) override; + void doUnpack(uint32_t docid) override; + std::unique_ptr<BitVector> get_hits(uint32_t begin_id) override; + void or_hits_into(BitVector &result, uint32_t begin_id) override; + void and_hits_into(BitVector &result, uint32_t begin_id) override; + void visitMembers(vespalib::ObjectVisitor &visitor) const override; + Trinary is_strict() const override { return _search->is_strict(); } + Trinary matches_any() const override { return _search->matches_any(); } + const PostingInfo *getPostingInfo() const override { return _search->getPostingInfo(); } + static std::unique_ptr<SearchIterator> profile(Profiler &profiler, + std::unique_ptr<SearchIterator> root); +}; + +} // namespace diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp index a63b8e54eb4..5da3d6c3279 100644 --- a/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp @@ -117,6 +117,11 @@ SearchIterator::visitMembers(vespalib::ObjectVisitor &visitor) const visit(visitor, "endid", _endid); } +void +SearchIterator::disclose_children(std::vector<UP*> &) +{ +} + } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h index 9ac69735806..0ce0d19a226 100644 --- a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h @@ -361,6 +361,12 @@ public: // (Undefined -> use seek to find out) // number of matches: (False <= Undefined <= True) virtual Trinary matches_any() const { return Trinary::Undefined; } + + // Disclose children by giving out references to owning + // pointers. This allows re-wiring from the outside, which is + // needed for deep decoration used by match profiling. Only + // disclose children that are treated as generic SearchIterators. + virtual void disclose_children(std::vector<UP*> &dst); }; } @@ -369,4 +375,3 @@ void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, const search::queryeval::SearchIterator &obj); void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, const search::queryeval::SearchIterator *obj); - diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java index 7a66ba2ee79..1e2ee3968fa 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java @@ -749,14 +749,25 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { private boolean tensorShortForm() { if (request != null && request.parameters().containsKey("format.tensors") && - request.parameters().get("format.tensors").contains("long")) { + ( request.parameters().get("format.tensors").contains("long") + || request.parameters().get("format.tensors").contains("long-value"))) { return false; } return true; // default } + private boolean tensorDirectValues() { + if (request != null && + request.parameters().containsKey("format.tensors") && + ( request.parameters().get("format.tensors").contains("short-value") + || request.parameters().get("format.tensors").contains("long-value"))) { + return true; + } + return false; // TODO: Flip default on Vespa 9 + } + synchronized void writeSingleDocument(Document document) throws IOException { - new JsonWriter(json, tensorShortForm()).writeFields(document); + new JsonWriter(json, tensorShortForm(), tensorDirectValues()).writeFields(document); } synchronized void writeDocumentsArrayStart() throws IOException { @@ -775,7 +786,7 @@ public class DocumentV1ApiHandler extends AbstractRequestHandler { ByteArrayOutputStream myOut = new ByteArrayOutputStream(1); myOut.write(','); // Prepend rather than append, to avoid double memory copying. try (JsonGenerator myJson = jsonFactory.createGenerator(myOut)) { - new JsonWriter(myJson, tensorShortForm()).write(document); + new JsonWriter(myJson, tensorShortForm(), tensorDirectValues()).write(document); } docs.add(myOut); diff --git a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java index 973d0a98b24..b6ad7ba5570 100644 --- a/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java +++ b/vespaclient-container-plugin/src/test/java/com/yahoo/document/restapi/resource/DocumentV1ApiTest.java @@ -521,16 +521,66 @@ public class DocumentV1ApiTest { parameters.responseHandler().get().handleResponse(new DocumentResponse(0, doc1)); return new Result(); }); + // -- short tensors + response = driver.sendRequest("http://localhost/document/v1/space/music/docid/one?format.tensors=short"); + String shortJson = + """ + { + "pathId": "/document/v1/space/music/docid/one", + "id": "id:space:music::one", + "fields": { + "artist": "Tom Waits", + "embedding": { "type": "tensor(x[3])","values": [1.0, 2.0, 3.0]} + } + } + """; + assertEquals(200, response.getStatus()); + assertSameJson(shortJson, response.readAll()); + // -- long tensors response = driver.sendRequest("http://localhost/document/v1/space/music/docid/one?format.tensors=long"); - assertSameJson("{" + - " \"pathId\": \"/document/v1/space/music/docid/one\"," + - " \"id\": \"id:space:music::one\"," + - " \"fields\": {" + - " \"artist\": \"Tom Waits\"," + - " \"embedding\": { \"cells\": [{\"address\":{\"x\":\"0\"},\"value\":1.0},{\"address\":{\"x\":\"1\"},\"value\": 2.0},{\"address\":{\"x\":\"2\"},\"value\": 3.0}]}" + - " }" + - "}", response.readAll()); + String longJson = + """ + { + "pathId": "/document/v1/space/music/docid/one", + "id": "id:space:music::one", + "fields": { + "artist": "Tom Waits", + "embedding": { "type": "tensor(x[3])","cells": [{"address":{"x":"0"},"value":1.0},{"address":{"x":"1"},"value": 2.0},{"address":{"x":"2"},"value": 3.0}]} + } + } + """; + assertEquals(200, response.getStatus()); + assertSameJson(longJson, response.readAll()); + // -- short direct tensors + response = driver.sendRequest("http://localhost/document/v1/space/music/docid/one?format.tensors=short-value"); + String shortDirectJson = + """ + { + "pathId": "/document/v1/space/music/docid/one", + "id": "id:space:music::one", + "fields": { + "artist": "Tom Waits", + "embedding": [1.0, 2.0, 3.0]} + } + } + """; + assertEquals(200, response.getStatus()); + assertSameJson(shortDirectJson, response.readAll()); + // -- long direct tensors + response = driver.sendRequest("http://localhost/document/v1/space/music/docid/one?format.tensors=long-value"); + String longDirectJson = + """ + { + "pathId": "/document/v1/space/music/docid/one", + "id": "id:space:music::one", + "fields": { + "artist": "Tom Waits", + "embedding": [{"address":{"x":"0"},"value":1.0},{"address":{"x":"1"},"value": 2.0},{"address":{"x":"2"},"value": 3.0}] + } + } + """; assertEquals(200, response.getStatus()); + assertSameJson(longDirectJson, response.readAll()); // GET with not encoded / in user specified part of document id is perfectly OK ... щ(ಥДಥщ) access.session.expect((id, parameters) -> { diff --git a/vespaclient-java/src/main/java/com/yahoo/vespa/feed/perf/SimpleFeeder.java b/vespaclient-java/src/main/java/com/yahoo/vespa/feed/perf/SimpleFeeder.java index c40e2c21561..7f85f37436b 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespa/feed/perf/SimpleFeeder.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespa/feed/perf/SimpleFeeder.java @@ -308,6 +308,7 @@ public class SimpleFeeder implements ReplyHandler { return new DocumentUpdate(deserializer); } } + @Override public FeedOperation read() throws Exception { int read = readExact(in, prefix); @@ -352,8 +353,6 @@ public class SimpleFeeder implements ReplyHandler { return new JsonDestination(params.getDumpStream(), failure, numReplies); } - - @SuppressWarnings("deprecation") SimpleFeeder(FeederParams params) { inputStreams = params.getInputStreams(); out = params.getStdOut(); diff --git a/vespaclient-java/src/main/java/com/yahoo/vespaget/ClientParameters.java b/vespaclient-java/src/main/java/com/yahoo/vespaget/ClientParameters.java index 7e464431f9a..91837cb4b09 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespaget/ClientParameters.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespaget/ClientParameters.java @@ -39,13 +39,15 @@ public class ClientParameters { public final boolean jsonOutput; // Output JSON tensors in short form public final boolean tensorShortForm; - + // Output JSON tensorvalues directly + public final boolean tensorDirectValues; private ClientParameters( boolean help, Iterator<String> documentIds, boolean printIdsOnly, String fieldSet, String route, String cluster, String configId, boolean showDocSize, double timeout, boolean noRetry, int traceLevel, - DocumentProtocol.Priority priority, boolean jsonOutput, boolean tensorShortForm) { + DocumentProtocol.Priority priority, boolean jsonOutput, boolean tensorShortForm, + boolean tensorDirectValues) { this.help = help; this.documentIds = documentIds; @@ -61,6 +63,7 @@ public class ClientParameters { this.priority = priority; this.jsonOutput = jsonOutput; this.tensorShortForm = tensorShortForm; + this.tensorDirectValues = tensorDirectValues; } public static class Builder { @@ -78,6 +81,7 @@ public class ClientParameters { private DocumentProtocol.Priority priority; private boolean jsonOutput; private boolean tensorShortForm; + private boolean tensorDirectValues; public Builder setHelp(boolean help) { this.help = help; @@ -149,10 +153,15 @@ public class ClientParameters { return this; } + public Builder setTensorDirectValues(boolean tensorDirectValues) { + this.tensorDirectValues = tensorDirectValues; + return this; + } + public ClientParameters build() { return new ClientParameters( help, documentIds, printIdsOnly, fieldSet, route, cluster, configId, - showDocSize, timeout, noRetry, traceLevel, priority, jsonOutput, tensorShortForm); + showDocSize, timeout, noRetry, traceLevel, priority, jsonOutput, tensorShortForm, tensorDirectValues); } } diff --git a/vespaclient-java/src/main/java/com/yahoo/vespaget/CommandLineOptions.java b/vespaclient-java/src/main/java/com/yahoo/vespaget/CommandLineOptions.java index b059ca6e62a..f13ed13b92a 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespaget/CommandLineOptions.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespaget/CommandLineOptions.java @@ -39,6 +39,7 @@ public class CommandLineOptions { public static final String JSONOUTPUT_OPTION = "jsonoutput"; public static final String XMLOUTPUT_OPTION = "xmloutput"; public static final String SHORTTENSORS_OPTION = "shorttensors"; + public static final String DIRECTTENSORS_OPTION = "directtensors"; private final Options options = createOptions(); private final InputStream stdIn; @@ -167,6 +168,7 @@ public class CommandLineOptions { boolean jsonOutput = cl.hasOption(JSONOUTPUT_OPTION); boolean xmlOutput = cl.hasOption(XMLOUTPUT_OPTION); boolean shortTensors = cl.hasOption(SHORTTENSORS_OPTION); + boolean directTensors = cl.hasOption(DIRECTTENSORS_OPTION); int trace = getTrace(cl); DocumentProtocol.Priority priority = getPriority(cl); double timeout = getTimeout(cl); @@ -218,6 +220,7 @@ public class CommandLineOptions { .setTimeout(timeout) .setJsonOutput(!xmlOutput) .setTensorShortForm(shortTensors) + .setTensorDirectValues(directTensors) .build(); } catch (ParseException pe) { throw new IllegalArgumentException(pe.getMessage()); diff --git a/vespaclient-java/src/main/java/com/yahoo/vespaget/DocumentRetriever.java b/vespaclient-java/src/main/java/com/yahoo/vespaget/DocumentRetriever.java index 1a2f3424b3c..0f17fa587e4 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespaget/DocumentRetriever.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespaget/DocumentRetriever.java @@ -168,7 +168,7 @@ public class DocumentRetriever { System.out.println(document.getId()); } else { if (params.jsonOutput) { - System.out.print(Utf8.toString(JsonWriter.toByteArray(document, params.tensorShortForm))); + System.out.print(Utf8.toString(JsonWriter.toByteArray(document, params.tensorShortForm, params.tensorDirectValues))); } else { System.out.print(document.toXML(" ")); } diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java index 2ac0510a2a3..0c7ad81f212 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/StdOutVisitorHandler.java @@ -33,30 +33,33 @@ import java.util.logging.Logger; * @author Thomas Gundersen */ public class StdOutVisitorHandler extends VdsVisitHandler { + private static final Logger log = Logger.getLogger( StdOutVisitorHandler.class.getName()); - private boolean printIds; - private boolean indentXml; - private int processTimeMilliSecs; - private PrintStream out; + private final boolean printIds; + private final boolean indentXml; + private final int processTimeMilliSecs; + private final PrintStream out; private final boolean jsonOutput; private final boolean tensorShortForm; + private final boolean tensorDirectValues; - private VisitorDataHandler dataHandler; + private final VisitorDataHandler dataHandler; public StdOutVisitorHandler(boolean printIds, boolean indentXml, boolean showProgress, boolean showStatistics, boolean doStatistics, boolean abortOnClusterDown, int processtime, boolean jsonOutput, - boolean tensorShortForm) + boolean tensorShortForm, + boolean tensorDirectValues) { this(printIds, indentXml, showProgress, showStatistics, doStatistics, abortOnClusterDown, processtime, - jsonOutput, tensorShortForm, createStdOutPrintStream()); + jsonOutput, tensorShortForm, tensorDirectValues, createStdOutPrintStream()); } StdOutVisitorHandler(boolean printIds, boolean indentXml, boolean showProgress, boolean showStatistics, boolean doStatistics, boolean abortOnClusterDown, int processtime, boolean jsonOutput, - boolean tensorShortForm, PrintStream out) + boolean tensorShortForm, boolean tensorDirectValues, PrintStream out) { super(showProgress, showStatistics, abortOnClusterDown); this.printIds = printIds; @@ -64,6 +67,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler { this.processTimeMilliSecs = processtime; this.jsonOutput = jsonOutput; this.tensorShortForm = tensorShortForm; + this.tensorDirectValues = tensorDirectValues; this.out = out; this.dataHandler = new DataHandler(doStatistics); } @@ -174,7 +178,7 @@ public class StdOutVisitorHandler extends VdsVisitHandler { private void writeJsonDocument(Document doc) throws IOException { writeFeedStartOrRecordSeparator(); - out.write(JsonWriter.toByteArray(doc, tensorShortForm)); + out.write(JsonWriter.toByteArray(doc, tensorShortForm, tensorDirectValues)); } @Override diff --git a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java index f84cb7270bd..340d4a7eb81 100644 --- a/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java +++ b/vespaclient-java/src/main/java/com/yahoo/vespavisit/VdsVisit.java @@ -371,6 +371,7 @@ public class VdsVisit { private int fullTimeout = 7 * 24 * 60 * 60 * 1000; private boolean jsonOutput = false; private boolean tensorShortForm = false; // TODO Vespa 9: change default to true + private boolean tensorDirectValues = false; // TODO Vespa 9: change default to true public VisitorParameters getVisitorParameters() { return visitorParameters; @@ -447,16 +448,25 @@ public class VdsVisit { public void setTensorShortForm(boolean tensorShortForm) { this.tensorShortForm = tensorShortForm; } + + public boolean tensorDirectValues() { + return tensorDirectValues; + } + + public void setTensorDirectValues(boolean tensorDirectValues) { + this.tensorDirectValues = tensorDirectValues; + } + } protected static class ArgumentParser { - private Options options; + private final Options options; public ArgumentParser(Options options) { this.options = options; } - public VdsVisitParameters parse(String args[]) throws org.apache.commons.cli.ParseException { + public VdsVisitParameters parse(String[] args) throws org.apache.commons.cli.ParseException { VdsVisitParameters allParams = new VdsVisitParameters(); VisitorParameters params = new VisitorParameters(""); CommandLineParser parser = new DefaultParser(); @@ -572,6 +582,9 @@ public class VdsVisit { if (line.hasOption("shorttensors")) { allParams.setTensorShortForm(true); } + if (line.hasOption("tensorvalues")) { + allParams.setTensorDirectValues(true); + } boolean jsonOutput = line.hasOption("jsonoutput"); boolean xmlOutput = line.hasOption("xmloutput"); @@ -743,7 +756,8 @@ public class VdsVisit { params.getAbortOnClusterDown(), params.getProcessTime(), params.jsonOutput, - params.tensorShortForm); + params.tensorShortForm, + params.tensorDirectValues); if (visitorParameters.getResumeFileName() != null) { handler.setProgressFileName(visitorParameters.getResumeFileName()); diff --git a/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java b/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java index a2e9f91d503..c1bbe8711a5 100644 --- a/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java +++ b/vespaclient-java/src/test/java/com/yahoo/vespavisit/StdOutVisitorHandlerTest.java @@ -42,7 +42,7 @@ public class StdOutVisitorHandlerTest { initStdOutVisitorHandlerTest(jsonOutput); ByteArrayOutputStream out = new ByteArrayOutputStream(); StdOutVisitorHandler visitorHandler = - new StdOutVisitorHandler(/*printIds*/true, false, false, false, false, false, 0, jsonOutput, false, new PrintStream(out, true)); + new StdOutVisitorHandler(/*printIds*/true, false, false, false, false, false, 0, jsonOutput, false, false, new PrintStream(out, true)); VisitorDataHandler dataHandler = visitorHandler.getDataHandler(); dataHandler.onDone(); String output = out.toString(); @@ -55,7 +55,7 @@ public class StdOutVisitorHandlerTest { initStdOutVisitorHandlerTest(jsonOutput); ByteArrayOutputStream out = new ByteArrayOutputStream(); StdOutVisitorHandler visitorHandler = - new StdOutVisitorHandler(/*printIds*/false, false, false, false, false, false, 0, jsonOutput, false, new PrintStream(out, true)); + new StdOutVisitorHandler(/*printIds*/false, false, false, false, false, false, 0, jsonOutput, false, false, new PrintStream(out, true)); VisitorDataHandler dataHandler = visitorHandler.getDataHandler(); dataHandler.onDone(); String expectedOutput = jsonOutput ? "[]" : ""; @@ -63,7 +63,7 @@ public class StdOutVisitorHandlerTest { assertEquals(expectedOutput, output); } - void do_test_json_tensor_fields_can_be_output_in_short_or_long_form(boolean tensorShortForm, String expectedOutput) { + void do_test_json_tensor_fields_rendering(boolean tensorShortForm, boolean tensorDirectValues, String expectedOutput) { var docType = new DocumentType("foo"); docType.addField("bar", TensorDataType.getTensor(TensorType.fromSpec("tensor(x[3])"))); var doc = new Document(docType, "id:baz:foo::tensor-stuff"); @@ -72,7 +72,7 @@ public class StdOutVisitorHandlerTest { var out = new ByteArrayOutputStream(); var visitorHandler = new StdOutVisitorHandler(/*printIds*/false, false, false, false, false, false, - 0, true, tensorShortForm, new PrintStream(out, true)); + 0, true, tensorShortForm, tensorDirectValues, new PrintStream(out, true)); var dataHandler = visitorHandler.getDataHandler(); var controlSession = mock(VisitorControlSession.class); var ackToken = mock(AckToken.class); @@ -88,8 +88,8 @@ public class StdOutVisitorHandlerTest { void json_tensor_fields_can_be_output_in_long_form() { var expectedOutput = """ [ - {"id":"id:baz:foo::tensor-stuff","fields":{"bar":{"cells":[{"address":{"x":"0"},"value":1.0},{"address":{"x":"1"},"value":2.0},{"address":{"x":"2"},"value":3.0}]}}}]"""; - do_test_json_tensor_fields_can_be_output_in_short_or_long_form(false, expectedOutput); + {"id":"id:baz:foo::tensor-stuff","fields":{"bar":{"type":"tensor(x[3])","cells":[{"address":{"x":"0"},"value":1.0},{"address":{"x":"1"},"value":2.0},{"address":{"x":"2"},"value":3.0}]}}}]"""; + do_test_json_tensor_fields_rendering(false, false, expectedOutput); } @Test @@ -97,7 +97,7 @@ public class StdOutVisitorHandlerTest { var expectedOutput = """ [ {"id":"id:baz:foo::tensor-stuff","fields":{"bar":{"type":"tensor(x[3])","values":[1.0,2.0,3.0]}}}]"""; - do_test_json_tensor_fields_can_be_output_in_short_or_long_form(true, expectedOutput); + do_test_json_tensor_fields_rendering(true, false, expectedOutput); } } diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index 8a2e68a8d8c..7f4a19b029d 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -3079,7 +3079,8 @@ "methods" : [ "public static java.lang.String encode(java.util.Map)", "public static java.lang.String escape(java.lang.String)", - "public static boolean equals(java.lang.String, java.lang.String)" + "public static boolean equals(java.lang.String, java.lang.String)", + "public static java.lang.String canonical(java.lang.String)" ], "fields" : [ ] }, diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java index 0e8fbc30bb6..b7e6e67ce73 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java @@ -37,52 +37,78 @@ import java.util.stream.Collectors; */ public class JsonFormat { - /** Serializes the given tensor value into JSON format */ - public static byte[] encode(Tensor tensor) { + /** + * Serializes the given tensor value into JSON format. + * + * @param tensor the tensor to serialize + * @param shortForm whether to encode in a short type-dependent format + * @param directValues whether to encode values directly, or wrapped in am object containing "type" and "cells" + */ + public static byte[] encode(Tensor tensor, boolean shortForm, boolean directValues) { Slime slime = new Slime(); - Cursor root = slime.setObject(); - encodeCells(tensor, root); + Cursor root = null; + if ( ! directValues) { + root = slime.setObject(); + root.setString("type", tensor.type().toString()); + } + + if (shortForm) { + if (tensor instanceof IndexedTensor denseTensor) { + // Encode as nested lists if indexed tensor + Cursor parent = root == null ? slime.setArray() : root.setArray("values"); + encodeValues(denseTensor, parent, new long[denseTensor.dimensionSizes().dimensions()], 0); + } else if (tensor instanceof MappedTensor && tensor.type().dimensions().size() == 1) { + // Short form for a single mapped dimension + Cursor parent = root == null ? slime.setObject() : root.setObject("cells"); + encodeSingleDimensionCells((MappedTensor) tensor, parent); + } else if (tensor instanceof MixedTensor && + tensor.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped)) { + // Short form for a mixed tensor + boolean singleMapped = tensor.type().dimensions().stream().filter(TensorType.Dimension::isMapped).count() == 1; + Cursor parent = root == null ? ( singleMapped ? slime.setObject() : slime.setArray() ) + : ( singleMapped ? root.setObject("blocks") : root.setArray("blocks")); + encodeBlocks((MixedTensor) tensor, parent); + } else { + // default to standard cell address output + Cursor parent = root == null ? slime.setArray() : root.setArray("cells"); + encodeCells(tensor, parent); + } + + return com.yahoo.slime.JsonFormat.toJsonBytes(slime); + } + else { + Cursor parent = root == null ? slime.setArray() : root.setArray("cells"); + encodeCells(tensor, parent); + } return com.yahoo.slime.JsonFormat.toJsonBytes(slime); } - /** Serializes the given tensor type and value into JSON format */ + /** Serializes the given tensor value into JSON format, in long format, wrapped in an object containing "cells" only. */ + public static byte[] encode(Tensor tensor) { + return encode(tensor, false, false); + } + + /** + * Serializes the given tensor type and value into JSON format. + * + * @deprecated use #encode(#Tensor, boolean, boolean) + */ + @Deprecated // TODO: Remove on Vespa 9 public static byte[] encodeWithType(Tensor tensor) { - Slime slime = new Slime(); - Cursor root = slime.setObject(); - root.setString("type", tensor.type().toString()); - encodeCells(tensor, root); - return com.yahoo.slime.JsonFormat.toJsonBytes(slime); + return encode(tensor, false, false); } - /** Serializes the given tensor type and value into a short-form JSON format */ + /** + * Serializes the given tensor type and value into a short-form JSON format. + * + * @deprecated use #encode(#Tensor, boolean, boolean) + */ + @Deprecated // TODO: Remove on Vespa 9 public static byte[] encodeShortForm(Tensor tensor) { - Slime slime = new Slime(); - Cursor root = slime.setObject(); - root.setString("type", tensor.type().toString()); - - if (tensor instanceof IndexedTensor denseTensor) { - // Encode as nested lists if indexed tensor - encodeValues(denseTensor, root.setArray("values"), new long[denseTensor.dimensionSizes().dimensions()], 0); - } - else if (tensor instanceof MappedTensor && tensor.type().dimensions().size() == 1) { - // Short form for a single mapped dimension - encodeSingleDimensionCells((MappedTensor) tensor, root); - } - else if (tensor instanceof MixedTensor && - tensor.type().dimensions().stream().filter(TensorType.Dimension::isMapped).count() >= 1) { - // Short form for a mixed tensor - encodeBlocks((MixedTensor) tensor, root); - } - else { - // No other short forms exist: default to standard cell address output - encodeCells(tensor, root); - } - - return com.yahoo.slime.JsonFormat.toJsonBytes(slime); + return encode(tensor, true, false); } - private static void encodeCells(Tensor tensor, Cursor rootObject) { - Cursor cellsArray = rootObject.setArray("cells"); + private static void encodeCells(Tensor tensor, Cursor cellsArray) { for (Iterator<Tensor.Cell> i = tensor.cellIterator(); i.hasNext(); ) { Tensor.Cell cell = i.next(); Cursor cellObject = cellsArray.addObject(); @@ -91,8 +117,7 @@ public class JsonFormat { } } - private static void encodeSingleDimensionCells(MappedTensor tensor, Cursor cursor) { - Cursor cells = cursor.setObject("cells"); + private static void encodeSingleDimensionCells(MappedTensor tensor, Cursor cells) { if (tensor.type().dimensions().size() > 1) throw new IllegalStateException("JSON encode of mapped tensor can only contain a single dimension"); tensor.cells().forEach((k,v) -> cells.setDouble(k.label(0), v)); @@ -124,7 +149,6 @@ public class JsonFormat { if (mappedDimensions.size() < 1) { throw new IllegalArgumentException("Should be ensured by caller"); } - cursor = (mappedDimensions.size() == 1) ? cursor.setObject("blocks") : cursor.setArray("blocks"); // Create tensor type for mapped dimensions subtype TensorType mappedSubType = new TensorType.Builder(mappedDimensions).build(); @@ -216,48 +240,52 @@ public class JsonFormat { } private static void decodeValues(Inspector values, Tensor.Builder builder) { + decodeValues(values, builder, new MutableInteger(0)); + } + + private static void decodeValues(Inspector values, Tensor.Builder builder, MutableInteger index) { if ( ! (builder instanceof IndexedTensor.BoundBuilder indexedBuilder)) - throw new IllegalArgumentException("The 'values' field can only be used with dense tensors. " + - "Use 'cells' or 'blocks' instead"); + throw new IllegalArgumentException("An array of values can only be used with a dense tensor. Use a map instead"); if (values.type() == Type.STRING) { double[] decoded = decodeHexString(values.asString(), builder.type().valueType()); if (decoded.length == 0) - throw new IllegalArgumentException("The 'values' string does not contain any values"); + throw new IllegalArgumentException("The values string does not contain any values"); for (int i = 0; i < decoded.length; i++) { indexedBuilder.cellByDirectIndex(i, decoded[i]); } return; } if (values.type() != Type.ARRAY) - throw new IllegalArgumentException("Excepted 'values' to contain an array, not " + values.type()); + throw new IllegalArgumentException("Excepted values to be an array, not " + values.type()); if (values.entries() == 0) - throw new IllegalArgumentException("The 'values' array does not contain any values"); + throw new IllegalArgumentException("The values array does not contain any values"); - MutableInteger index = new MutableInteger(0); values.traverse((ArrayTraverser) (__, value) -> { - if (value.type() != Type.LONG && value.type() != Type.DOUBLE) { - throw new IllegalArgumentException("Excepted the values array to contain numbers, not " + value.type()); - } - indexedBuilder.cellByDirectIndex(index.next(), value.asDouble()); + if (value.type() == Type.ARRAY) + decodeValues(value, builder, index); + else if (value.type() == Type.LONG || value.type() == Type.DOUBLE) + indexedBuilder.cellByDirectIndex(index.next(), value.asDouble()); + else + throw new IllegalArgumentException("Excepted the values array to contain numbers or nested arrays, not " + value.type()); }); } private static void decodeBlocks(Inspector values, Tensor.Builder builder) { if ( ! (builder instanceof MixedTensor.BoundBuilder mixedBuilder)) - throw new IllegalArgumentException("The 'blocks' field can only be used with mixed tensors with bound dimensions. " + - "Use 'cells' or 'values' instead"); + throw new IllegalArgumentException("Blocks of values can only be used with mixed (sparse and dense) tensors." + + "Use an array of cell values instead."); if (values.type() == Type.ARRAY) values.traverse((ArrayTraverser) (__, value) -> decodeBlock(value, mixedBuilder)); else if (values.type() == Type.OBJECT) values.traverse((ObjectTraverser) (key, value) -> decodeSingleDimensionBlock(key, value, mixedBuilder)); else - throw new IllegalArgumentException("Excepted 'blocks' to contain an array or object, not " + values.type()); + throw new IllegalArgumentException("Excepted the block to contain an array or object, not " + values.type()); } private static void decodeBlock(Inspector block, MixedTensor.BoundBuilder mixedBuilder) { if (block.type() != Type.OBJECT) - throw new IllegalArgumentException("Expected an item in a 'blocks' array to be an object, not " + block.type()); + throw new IllegalArgumentException("Expected an item in a blocks array to be an object, not " + block.type()); mixedBuilder.block(decodeAddress(block.field("address"), mixedBuilder.type().mappedSubtype()), decodeValues(block.field("values"), mixedBuilder)); } @@ -267,7 +295,9 @@ public class JsonFormat { boolean hasIndexed = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isIndexed); boolean hasMapped = builder.type().dimensions().stream().anyMatch(TensorType.Dimension::isMapped); - if ( ! hasMapped) + if (isArrayOfObjects(root)) + decodeCells(root, builder); + else if ( ! hasMapped) decodeValues(root, builder); else if (hasMapped && hasIndexed) decodeBlocks(root, builder); @@ -275,9 +305,17 @@ public class JsonFormat { decodeCells(root, builder); } + private static boolean isArrayOfObjects(Inspector inspector) { + if (inspector.type() != Type.ARRAY) return false; + if (inspector.entries() == 0) return false; + Inspector firstItem = inspector.entry(0); + if (firstItem.type() == Type.ARRAY) return isArrayOfObjects(firstItem); + return firstItem.type() == Type.OBJECT; + } + private static void decodeSingleDimensionBlock(String key, Inspector value, MixedTensor.BoundBuilder mixedBuilder) { if (value.type() != Type.ARRAY) - throw new IllegalArgumentException("Expected an item in a 'blocks' array to be an array, not " + value.type()); + throw new IllegalArgumentException("Expected an item in a blocks array to be an array, not " + value.type()); mixedBuilder.block(asAddress(key, mixedBuilder.type().mappedSubtype()), decodeValues(value, mixedBuilder)); } @@ -361,19 +399,19 @@ public class JsonFormat { double[] values = new double[(int)mixedBuilder.denseSubspaceSize()]; if (valuesField.type() == Type.ARRAY) { if (valuesField.entries() == 0) { - throw new IllegalArgumentException("The 'block' value array does not contain any values"); + throw new IllegalArgumentException("The block value array does not contain any values"); } valuesField.traverse((ArrayTraverser) (index, value) -> values[index] = decodeNumeric(value)); } else if (valuesField.type() == Type.STRING) { double[] decoded = decodeHexString(valuesField.asString(), mixedBuilder.type().valueType()); if (decoded.length == 0) { - throw new IllegalArgumentException("The 'block' value string does not contain any values"); + throw new IllegalArgumentException("The block value string does not contain any values"); } for (int i = 0; i < decoded.length; i++) { values[i] = decoded[i]; } } else { - throw new IllegalArgumentException("Expected a block to contain a 'values' array"); + throw new IllegalArgumentException("Expected a block to contain an array of values"); } return values; } diff --git a/vespajlib/src/main/java/com/yahoo/text/JSON.java b/vespajlib/src/main/java/com/yahoo/text/JSON.java index 6f8ef9a289f..8ef66b745cc 100644 --- a/vespajlib/src/main/java/com/yahoo/text/JSON.java +++ b/vespajlib/src/main/java/com/yahoo/text/JSON.java @@ -75,4 +75,8 @@ public final class JSON { return leftSlime.equalTo(rightSlime); } + public static String canonical(String jsonString) { + return SlimeUtils.jsonToSlimeOrThrow(jsonString).toString(); + } + } diff --git a/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java index 6a6bb3c6781..7c7391ff895 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/serialization/JsonFormatTestCase.java @@ -3,7 +3,9 @@ package com.yahoo.tensor.serialization; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; +import com.yahoo.text.JSON; import org.junit.Test; +import org.junit.jupiter.api.Assertions; import java.nio.charset.StandardCharsets; @@ -20,7 +22,9 @@ public class JsonFormatTestCase { public void testDirectValue() { assertDecoded("tensor(x{}):{a:2, b:3}", "{'a':2.0, 'b':3.0}"); assertDecoded("tensor(x{}):{a:2, b:3}", "{'a':2.0, 'b':3.0}"); - assertDecoded("tensor(x[2]):[2, 3]]", "[2.0, 3.0]"); + assertDecoded("tensor(x[2]):[1.0, 2.0]]", "[1, 2]"); + assertDecoded("tensor(x[2],y[3]):[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]", "[1, 2, 3, 4, 5, 6]"); + assertDecoded("tensor(x[2],y[3]):[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]", "[[1, 2, 3], [4, 5, 6]]"); assertDecoded("tensor(x{},y[2]):{a:[2, 3], b:[4, 5]}", "{'a':[2, 3], 'b':[4, 5]}"); assertDecoded("tensor(x{},y{}):{{x:a,y:0}:2, {x:b,y:1}:3}", "[{'address':{'x':'a','y':'0'},'value':2}, {'address':{'x':'b','y':'1'},'value':3}]"); @@ -32,35 +36,21 @@ public class JsonFormatTestCase { assertDecoded("tensor(x{}):{cells:2, b:3}", "{'cells':2.0, 'b':3.0}"); assertDecoded("tensor(x{}):{values:2, b:3}", "{'values':2.0, 'b':3.0}"); assertDecoded("tensor(x{}):{block:2, b:3}", "{'block':2.0, 'b':3.0}"); + assertDecoded("tensor(x{}):{type:2, b:3}", "{'type':2.0, 'b':3.0}"); // Multi-valued assertDecoded("tensor(x{},y[2]):{cells:[2, 3], b:[4, 5]}", "{'cells':[2, 3], 'b':[4, 5]}"); assertDecoded("tensor(x{},y[2]):{values:[2, 3], b:[4, 5]}", "{'values':[2, 3], 'b':[4, 5]}"); assertDecoded("tensor(x{},y[2]):{block:[2, 3], b:[4, 5]}", "{'block':[2, 3], 'b':[4, 5]}"); - } - - @Test - public void testSparseTensor() { - Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{},y{})")); - builder.cell().label("x", "a").label("y", "b").value(2.0); - builder.cell().label("x", "c").label("y", "d").value(3.0); - Tensor tensor = builder.build(); - byte[] json = JsonFormat.encode(tensor); - assertEquals("{\"cells\":[" + - "{\"address\":{\"x\":\"a\",\"y\":\"b\"},\"value\":2.0}," + - "{\"address\":{\"x\":\"c\",\"y\":\"d\"},\"value\":3.0}" + - "]}", - new String(json, StandardCharsets.UTF_8)); - Tensor decoded = JsonFormat.decode(tensor.type(), json); - assertEquals(tensor, decoded); + assertDecoded("tensor(x{},y[2]):{type:[2, 3], b:[4, 5]}", "{'type':[2, 3], 'b':[4, 5]}"); } @Test public void testEmptySparseTensor() { Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{},y{})")); Tensor tensor = builder.build(); - byte[] json = JsonFormat.encode(tensor); - assertEquals("{\"cells\":[]}", + byte[] json = JsonFormat.encode(tensor, false, false); + assertEquals("{\"type\":\"tensor(x{},y{})\",\"cells\":[]}", new String(json, StandardCharsets.UTF_8)); Tensor decoded = JsonFormat.decode(tensor.type(), json); assertEquals(tensor, decoded); @@ -86,6 +76,45 @@ public class JsonFormatTestCase { } @Test + public void testEmptyTensor() { + Tensor tensor = Tensor.Builder.of(TensorType.empty).build(); + + String shortJson = """ + { + "type":"tensor()", + "values":[0.0] + } + """; + byte[] shortEncoded = JsonFormat.encode(tensor, true, false); + assertEqualJson(shortJson, new String(shortEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortEncoded)); + + String longJson = """ + { + "type":"tensor()", + "cells":[{"address":{},"value":0.0}] + } + """; + byte[] longEncoded = JsonFormat.encode(tensor, false, false); + assertEqualJson(longJson, new String(longEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longEncoded)); + + String shortDirectJson = """ + [0.0] + """; + byte[] shortDirectEncoded = JsonFormat.encode(tensor, true, true); + assertEqualJson(shortDirectJson, new String(shortDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortDirectEncoded)); + + String longDirectJson = """ + [{"address":{},"value":0.0}] + """; + byte[] longDirectEncoded = JsonFormat.encode(tensor, false, true); + assertEqualJson(longDirectJson, new String(longDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longDirectEncoded)); + } + + @Test public void testDenseTensor() { Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x[2],y[2])")); builder.cell().label("x", 0).label("y", 0).value(2.0); @@ -93,31 +122,183 @@ public class JsonFormatTestCase { builder.cell().label("x", 1).label("y", 0).value(5.0); builder.cell().label("x", 1).label("y", 1).value(7.0); Tensor tensor = builder.build(); - byte[] json = JsonFormat.encode(tensor); - assertEquals("{\"cells\":[" + - "{\"address\":{\"x\":\"0\",\"y\":\"0\"},\"value\":2.0}," + - "{\"address\":{\"x\":\"0\",\"y\":\"1\"},\"value\":3.0}," + - "{\"address\":{\"x\":\"1\",\"y\":\"0\"},\"value\":5.0}," + - "{\"address\":{\"x\":\"1\",\"y\":\"1\"},\"value\":7.0}" + - "]}", - new String(json, StandardCharsets.UTF_8)); - Tensor decoded = JsonFormat.decode(tensor.type(), json); - assertEquals(tensor, decoded); + + String shortJson = """ + { + "type":"tensor(x[2],y[2])", + "values":[[2.0,3.0],[5.0,7.0]] + } + """; + byte[] shortEncoded = JsonFormat.encode(tensor, true, false); + assertEqualJson(shortJson, new String(shortEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortEncoded)); + + String longJson = """ + { + "type":"tensor(x[2],y[2])", + "cells":[ + {"address":{"x":"0","y":"0"},"value":2.0}, + {"address":{"x":"0","y":"1"},"value":3.0}, + {"address":{"x":"1","y":"0"},"value":5.0}, + {"address":{"x":"1","y":"1"},"value":7.0} + ] + } + """; + byte[] longEncoded = JsonFormat.encode(tensor, false, false); + assertEqualJson(longJson, new String(longEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longEncoded)); + + String shortDirectJson = """ + [[2.0, 3.0], [5.0, 7.0]] + """; + byte[] shortDirectEncoded = JsonFormat.encode(tensor, true, true); + assertEqualJson(shortDirectJson, new String(shortDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortDirectEncoded)); + + String longDirectJson = """ + [ + {"address":{"x":"0","y":"0"},"value":2.0}, + {"address":{"x":"0","y":"1"},"value":3.0}, + {"address":{"x":"1","y":"0"},"value":5.0}, + {"address":{"x":"1","y":"1"},"value":7.0} + ] + """; + byte[] longDirectEncoded = JsonFormat.encode(tensor, false, true); + assertEqualJson(longDirectJson, new String(longDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longDirectEncoded)); + } + + @Test + public void testMixedTensor() { + Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{},y[2])")); + builder.cell().label("x", "a").label("y", 0).value(2.0); + builder.cell().label("x", "a").label("y", 1).value(3.0); + builder.cell().label("x", "b").label("y", 0).value(5.0); + builder.cell().label("x", "b").label("y", 1).value(7.0); + Tensor tensor = builder.build(); + + String shortJson = """ + { + "type":"tensor(x{},y[2])", + "blocks":{"a":[2.0,3.0],"b":[5.0,7.0]} + } + """; + byte[] shortEncoded = JsonFormat.encode(tensor, true, false); + assertEqualJson(shortJson, new String(shortEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortEncoded)); + + String longJson = """ + { + "type":"tensor(x{},y[2])", + "cells":[ + {"address":{"x":"a","y":"0"},"value":2.0}, + {"address":{"x":"a","y":"1"},"value":3.0}, + {"address":{"x":"b","y":"0"},"value":5.0}, + {"address":{"x":"b","y":"1"},"value":7.0} + ] + } + """; + byte[] longEncoded = JsonFormat.encode(tensor, false, false); + assertEqualJson(longJson, new String(longEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longEncoded)); + + String shortDirectJson = """ + {"a":[2.0,3.0],"b":[5.0,7.0]} + """; + byte[] shortDirectEncoded = JsonFormat.encode(tensor, true, true); + assertEqualJson(shortDirectJson, new String(shortDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortDirectEncoded)); + + String longDirectJson = """ + [ + {"address":{"x":"a","y":"0"},"value":2.0}, + {"address":{"x":"a","y":"1"},"value":3.0}, + {"address":{"x":"b","y":"0"},"value":5.0}, + {"address":{"x":"b","y":"1"},"value":7.0} + ] + """; + byte[] longDirectEncoded = JsonFormat.encode(tensor, false, true); + assertEqualJson(longDirectJson, new String(longDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longDirectEncoded)); + } + + @Test + public void testSparseTensor() { + Tensor.Builder builder = Tensor.Builder.of(TensorType.fromSpec("tensor(x{},y{})")); + builder.cell().label("x", "a").label("y", 0).value(2.0); + builder.cell().label("x", "a").label("y", 1).value(3.0); + builder.cell().label("x", "b").label("y", 0).value(5.0); + builder.cell().label("x", "b").label("y", 1).value(7.0); + Tensor tensor = builder.build(); + + String shortJson = """ + { + "type":"tensor(x{},y{})", + "cells": [ + {"address":{"x":"a","y":"0"},"value":2.0}, + {"address":{"x":"a","y":"1"},"value":3.0}, + {"address":{"x":"b","y":"0"},"value":5.0}, + {"address":{"x":"b","y":"1"},"value":7.0} + ] + } + """; + byte[] shortEncoded = JsonFormat.encode(tensor, true, false); + assertEqualJson(shortJson, new String(shortEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortEncoded)); + + String longJson = """ + { + "type":"tensor(x{},y{})", + "cells":[ + {"address":{"x":"a","y":"0"},"value":2.0}, + {"address":{"x":"a","y":"1"},"value":3.0}, + {"address":{"x":"b","y":"0"},"value":5.0}, + {"address":{"x":"b","y":"1"},"value":7.0} + ] + } + """; + byte[] longEncoded = JsonFormat.encode(tensor, false, false); + assertEqualJson(longJson, new String(longEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longEncoded)); + + String shortDirectJson = """ + [ + {"address":{"x":"a","y":"0"},"value":2.0}, + {"address":{"x":"a","y":"1"},"value":3.0}, + {"address":{"x":"b","y":"0"},"value":5.0}, + {"address":{"x":"b","y":"1"},"value":7.0} + ] + """; + byte[] shortDirectEncoded = JsonFormat.encode(tensor, true, true); + assertEqualJson(shortDirectJson, new String(shortDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), shortDirectEncoded)); + + String longDirectJson = """ + [ + {"address":{"x":"a","y":"0"},"value":2.0}, + {"address":{"x":"a","y":"1"},"value":3.0}, + {"address":{"x":"b","y":"0"},"value":5.0}, + {"address":{"x":"b","y":"1"},"value":7.0} + ] + """; + byte[] longDirectEncoded = JsonFormat.encode(tensor, false, true); + assertEqualJson(longDirectJson, new String(longDirectEncoded, StandardCharsets.UTF_8)); + assertEquals(tensor, JsonFormat.decode(tensor.type(), longDirectEncoded)); } @Test public void testDisallowedEmptyDenseTensor() { TensorType type = TensorType.fromSpec("tensor(x[3])"); - assertDecodeFails(type, "{\"values\":[]}", "The 'values' array does not contain any values"); - assertDecodeFails(type, "{\"values\":\"\"}", "The 'values' string does not contain any values"); + assertDecodeFails(type, "{\"values\":[]}", "The values array does not contain any values"); + assertDecodeFails(type, "{\"values\":\"\"}", "The values string does not contain any values"); } @Test public void testDisallowedEmptyMixedTensor() { TensorType type = TensorType.fromSpec("tensor(x{},y[3])"); - assertDecodeFails(type, "{\"blocks\":{ \"a\": [] } }", "The 'block' value array does not contain any values"); + assertDecodeFails(type, "{\"blocks\":{ \"a\": [] } }", "The block value array does not contain any values"); assertDecodeFails(type, "{\"blocks\":[ {\"address\":{\"x\":\"a\"}, \"values\": [] } ] }", - "The 'block' value array does not contain any values"); + "The block value array does not contain any values"); } @Test @@ -204,9 +385,14 @@ public class JsonFormatTestCase { builder.cell().label("x", 1).label("y", 1).value(0.0); builder.cell().label("x", 1).label("y", 2).value(42.0); Tensor expected = builder.build(); + String denseJson = "{\"values\":\"027FFF80002A\"}"; Tensor decoded = JsonFormat.decode(expected.type(), denseJson.getBytes(StandardCharsets.UTF_8)); assertEquals(expected, decoded); + + denseJson = "\"027FFF80002A\""; + decoded = JsonFormat.decode(expected.type(), denseJson.getBytes(StandardCharsets.UTF_8)); + assertEquals(expected, decoded); } @Test @@ -231,6 +417,7 @@ public class JsonFormatTestCase { builder.cell().label("x", 1).label("y", 1).value(6.0); builder.cell().label("x", 1).label("y", 2).value(7.0); Tensor expected = builder.build(); + String mixedJson = "{\"blocks\":[" + "{\"address\":{\"x\":\"0\"},\"values\":\"020304\"}," + "{\"address\":{\"x\":\"1\"},\"values\":\"050607\"}" + @@ -373,7 +560,7 @@ public class JsonFormatTestCase { } private void assertEncodeDecode(Tensor tensor) { - Tensor decoded = JsonFormat.decode(tensor.type(), JsonFormat.encodeWithType(tensor)); + Tensor decoded = JsonFormat.decode(tensor.type(), JsonFormat.encode(tensor, false, false)); assertEquals(tensor, decoded); assertEquals(tensor.type(), decoded.type()); } @@ -401,7 +588,7 @@ public class JsonFormatTestCase { } private void assertEncodeShortForm(Tensor tensor, String expected) { - byte[] json = JsonFormat.encodeShortForm(tensor); + byte[] json = JsonFormat.encode(tensor, true, false); assertEquals(expected, new String(json, StandardCharsets.UTF_8)); } @@ -418,8 +605,12 @@ public class JsonFormatTestCase { Tensor decoded = JsonFormat.decode(type, format.getBytes(StandardCharsets.UTF_8)); fail("Did not get exception as expected, decoded as: " + decoded); } catch (IllegalArgumentException e) { - assertEquals(e.getMessage(), msg); + assertEquals(msg, e.getMessage()); } } + private void assertEqualJson(String expected, String generated) { + Assertions.assertEquals(JSON.canonical(expected), JSON.canonical(generated)); + } + } |