diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-05-29 17:05:19 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@oath.com> | 2018-05-29 17:05:19 +0200 |
commit | e86e19b21db079a62127ddce8f13e3a10f256779 (patch) | |
tree | b3b87b9286a71150b91db035ac59a45ab4bf34d1 /container-search/src | |
parent | 6cd4e8945facda874bf1ada7ea8694c2c633f9da (diff) |
Avoid encoding between utf-8 and 16 unnecessarily
Diffstat (limited to 'container-search/src')
7 files changed, 288 insertions, 144 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java index a5f83021bee..7d68e7b6679 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java @@ -96,4 +96,7 @@ public abstract class DocsumField { */ public abstract Object convert(Inspector value); + /** Returns whether this is the string field type. */ + boolean isString() { return false; } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java index c517742f0e5..1160ea0a204 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java @@ -228,6 +228,13 @@ public class FastHit extends Hit { } @Override + public void forEachFieldAsRaw(RawUtf8Consumer consumer) { + super.forEachField(consumer); + for (SummaryData summaryData : summaries) + summaryData.forEachFieldAsRaw(consumer); + } + + @Override public Map<String, Object> fields() { Map<String, Object> fields = new HashMap<>(); for (Iterator<Map.Entry<String, Object>> i = fieldIterator(); i.hasNext(); ) { @@ -534,9 +541,28 @@ public class FastHit extends Hit { void forEachField(BiConsumer<String, Object> consumer) { data.traverse((ObjectTraverser)(name, value) -> { - Object convertedValue = type.convert(name, value); - if ( convertedValue != null && !shadowed(name) && !removed(name)) { - consumer.accept(name, convertedValue); + if (!shadowed(name) && !removed(name)) { + Object convertedValue = type.convert(name, value); + if (convertedValue != null) + consumer.accept(name, convertedValue); + } + }); + } + + void forEachFieldAsRaw(RawUtf8Consumer consumer) { + data.traverse((ObjectTraverser)(name, value) -> { + if (!shadowed(name) && !removed(name)) { + DocsumField fieldType = type.getField(name); + if (fieldType != null) { + if (fieldType.isString()) { + byte[] utf8Value = value.asUtf8(); + consumer.accept(name, utf8Value, 0, utf8Value.length); + } else { + Object convertedValue = fieldType.convert(value); + if (convertedValue != null) + consumer.accept(name, convertedValue); + } + } } }); } diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java index 0fa4b7ee342..408cbbbb62d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java @@ -31,4 +31,6 @@ public class StringField extends DocsumField { return value.asString(""); } + boolean isString() { return true; } + } diff --git a/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java b/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java index 6c7018317c3..8375e63984d 100644 --- a/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java +++ b/container-search/src/main/java/com/yahoo/search/rendering/JsonRenderer.java @@ -68,6 +68,7 @@ import java.util.function.LongSupplier; * JSON renderer for search results. * * @author Steinar Knutsen + * @author bratseth */ // NOTE: The JSON format is a public API. If new elements are added be sure to update the reference doc. public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { @@ -75,18 +76,6 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { private static final CompoundName DEBUG_RENDERING_KEY = new CompoundName("renderer.json.debug"); private static final CompoundName JSON_CALLBACK = new CompoundName("jsoncallback"); - private enum RenderDecision { - YES, NO, DO_NOT_KNOW; - - boolean booleanValue() { - switch (this) { - case YES: return true; - case NO: return false; - default: throw new IllegalStateException(); - } - } - } - // if this must be optimized, simply use com.fasterxml.jackson.core.SerializableString private static final String BUCKET_LIMITS = "limits"; private static final String BUCKET_TO = "to"; @@ -133,6 +122,7 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { private final JsonFactory generatorFactory; private JsonGenerator generator; + private FieldConsumer fieldConsumer; private Deque<Integer> renderedChildren; private boolean debugRendering; private LongSupplier timeSource; @@ -304,9 +294,9 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { @Override public void init() { super.init(); - generator = null; - renderedChildren = null; debugRendering = false; + setGenerator(null, debugRendering); + renderedChildren = null; timeSource = System::currentTimeMillis; stream = null; } @@ -314,9 +304,9 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { @Override public void beginResponse(OutputStream stream) throws IOException { beginJsonCallback(stream); - generator = generatorFactory.createGenerator(stream, JsonEncoding.UTF8); - renderedChildren = new ArrayDeque<>(); debugRendering = getDebugRendering(getResult().getQuery()); + setGenerator(generatorFactory.createGenerator(stream, JsonEncoding.UTF8), debugRendering); + renderedChildren = new ArrayDeque<>(); generator.writeStartObject(); renderTrace(getExecution().trace()); renderTiming(); @@ -473,7 +463,7 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { return ! (hit instanceof DefaultErrorHit); } - private void fieldsStart(MutableBoolean hasFieldsField) throws IOException { + private static void fieldsStart(MutableBoolean hasFieldsField, JsonGenerator generator) throws IOException { if (hasFieldsField.get()) return; generator.writeObjectFieldStart(FIELDS); hasFieldsField.set(true); @@ -516,32 +506,8 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { } private void renderStandardFields(Hit hit, MutableBoolean hasFieldsField) { - hit.forEachField((name, value) -> { - try { - if (shouldRender(name, value)) { - fieldsStart(hasFieldsField); - renderField(name, value); - } - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - }); - } - - private boolean shouldRender(String name, Object value) { - if (debugRendering) return true; - - if (name.startsWith(VESPA_HIDDEN_FIELD_PREFIX)) return false; - - if (value instanceof CharSequence && ((CharSequence) value).length() == 0) return false; - - // StringFieldValue cannot hold a null, so checking length directly is OK: - if (value instanceof StringFieldValue && ((StringFieldValue) value).getString().isEmpty()) return false; - - if (value instanceof NanNumber) return false; - - return true; + fieldConsumer.setCurrent(hasFieldsField); + hit.forEachFieldAsRaw(fieldConsumer); } private void renderSpecialCasesForGrouping(Hit hit) throws IOException { @@ -609,95 +575,10 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { private void renderTotalHitCount(Hit hit, MutableBoolean hasFieldsField) throws IOException { if ( ! (getRecursionLevel() == 1 && hit instanceof HitGroup)) return; - fieldsStart(hasFieldsField); + fieldsStart(hasFieldsField, generator); generator.writeNumberField(TOTAL_COUNT, getResult().getTotalHitCount()); } - private void renderField(String name, Object value) throws IOException { - generator.writeFieldName(name); - renderFieldContents(value); - } - - private void renderFieldContents(Object field) throws IOException { - if (field == null) { - generator.writeNull(); - } else if (field instanceof Number) { - renderNumberField((Number) field); - } else if (field instanceof TreeNode) { - generator.writeTree((TreeNode) field); - } else if (field instanceof Tensor) { - renderTensor(Optional.of((Tensor)field)); - } else if (field instanceof JsonProducer) { - generator.writeRawValue(((JsonProducer) field).toJson()); - } else if (field instanceof Inspectable) { - StringBuilder intermediate = new StringBuilder(); - JsonRender.render((Inspectable) field, intermediate, true); - generator.writeRawValue(intermediate.toString()); - } else if (field instanceof StringFieldValue) { - // This needs special casing as JsonWriter hides empty strings now - generator.writeString(((StringFieldValue)field).getString()); - } else if (field instanceof TensorFieldValue) { - renderTensor(((TensorFieldValue)field).getTensor()); - } else if (field instanceof FieldValue) { - // the null below is the field which has already been written - ((FieldValue) field).serialize(null, new JsonWriter(generator)); - } else if (field instanceof JSONArray || field instanceof JSONObject) { - // org.json returns null if the object would not result in - // syntactically correct JSON - String s = field.toString(); - if (s == null) { - generator.writeNull(); - } else { - generator.writeRawValue(s); - } - } else { - generator.writeString(field.toString()); - } - } - - private void renderNumberField(Number field) throws IOException { - if (field instanceof Integer) { - generator.writeNumber(field.intValue()); - } else if (field instanceof Float) { - generator.writeNumber(field.floatValue()); - } else if (field instanceof Double) { - generator.writeNumber(field.doubleValue()); - } else if (field instanceof Long) { - generator.writeNumber(field.longValue()); - } else if (field instanceof Byte || field instanceof Short) { - generator.writeNumber(field.intValue()); - } else if (field instanceof BigInteger) { - generator.writeNumber((BigInteger) field); - } else if (field instanceof BigDecimal) { - generator.writeNumber((BigDecimal) field); - } else { - generator.writeNumber(field.doubleValue()); - } - } - - private void renderTensor(Optional<Tensor> tensor) throws IOException { - generator.writeStartObject(); - generator.writeArrayFieldStart("cells"); - if (tensor.isPresent()) { - for (Iterator<Tensor.Cell> i = tensor.get().cellIterator(); i.hasNext(); ) { - Tensor.Cell cell = i.next(); - - generator.writeStartObject(); - - generator.writeObjectFieldStart("address"); - for (int d = 0; d < cell.getKey().size(); d++) - generator.writeObjectField(tensor.get().type().dimensions().get(d).name(), cell.getKey().label(d)); - generator.writeEndObject(); - - generator.writeObjectField("value", cell.getValue()); - - generator.writeEndObject(); - } - } - generator.writeEndArray(); - generator.writeEndObject(); - } - @Override public void data(Data data) throws IOException { Preconditions.checkArgument(data instanceof Hit, @@ -774,11 +655,9 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { return null; } - /** - * Only for testing. Never to be used in any other context. - */ - void setGenerator(JsonGenerator generator) { + private void setGenerator(JsonGenerator generator, boolean debugRendering) { this.generator = generator; + this.fieldConsumer = generator == null ? null : new FieldConsumer(generator, debugRendering); } /** @@ -787,5 +666,156 @@ public class JsonRenderer extends AsynchronousSectionedRenderer<Result> { void setTimeSource(LongSupplier timeSource) { this.timeSource = timeSource; } - + + /** + * Received callbacks when fields of hits are encountered. + * This instance is reused for all hits of a Result since we are in a single-threaded context + * and want to limit object creation. + */ + private static class FieldConsumer implements Hit.RawUtf8Consumer { + + private final JsonGenerator generator; + private final boolean debugRendering; + + private MutableBoolean hasFieldsField; + + public FieldConsumer(JsonGenerator generator, boolean debugRendering) { + this.generator = generator; + this.debugRendering = debugRendering; + } + + /** + * Called before each use of this for a hit to keep track of whether we + * have created the "fields" field of the JSON object + */ + void setCurrent(MutableBoolean hasFieldsField) { + this.hasFieldsField = hasFieldsField; + } + + @Override + public void accept(String name, Object value) { + try { + if (shouldRender(name, value)) { + fieldsStart(hasFieldsField, generator); + generator.writeFieldName(name); + renderFieldContents(value); + } + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public void accept(String name, byte[] utf8Data, int offset, int length) { + try { + if (shouldRenderUtf8Value(name, length)) { + fieldsStart(hasFieldsField, generator); + generator.writeFieldName(name); + generator.writeUTF8String(utf8Data, offset, length); + } + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private boolean shouldRender(String name, Object value) { + if (debugRendering) return true; + if (name.startsWith(VESPA_HIDDEN_FIELD_PREFIX)) return false; + if (value instanceof CharSequence && ((CharSequence) value).length() == 0) return false; + // StringFieldValue cannot hold a null, so checking length directly is OK: + if (value instanceof StringFieldValue && ((StringFieldValue) value).getString().isEmpty()) return false; + if (value instanceof NanNumber) return false; + return true; + } + + private boolean shouldRenderUtf8Value(String name, int length) { + if (debugRendering) return true; + if (name.startsWith(VESPA_HIDDEN_FIELD_PREFIX)) return false; + if (length == 0) return false; + return true; + } + + private void renderFieldContents(Object field) throws IOException { + if (field == null) { + generator.writeNull(); + } else if (field instanceof Number) { + renderNumberField((Number) field); + } else if (field instanceof TreeNode) { + generator.writeTree((TreeNode) field); + } else if (field instanceof Tensor) { + renderTensor(Optional.of((Tensor)field)); + } else if (field instanceof JsonProducer) { + generator.writeRawValue(((JsonProducer) field).toJson()); + } else if (field instanceof Inspectable) { + StringBuilder intermediate = new StringBuilder(); + JsonRender.render((Inspectable) field, intermediate, true); + generator.writeRawValue(intermediate.toString()); + } else if (field instanceof StringFieldValue) { + generator.writeString(((StringFieldValue)field).getString()); + } else if (field instanceof TensorFieldValue) { + renderTensor(((TensorFieldValue)field).getTensor()); + } else if (field instanceof FieldValue) { + // the null below is the field which has already been written + ((FieldValue) field).serialize(null, new JsonWriter(generator)); + } else if (field instanceof JSONArray || field instanceof JSONObject) { + // org.json returns null if the object would not result in + // syntactically correct JSON + String s = field.toString(); + if (s == null) { + generator.writeNull(); + } else { + generator.writeRawValue(s); + } + } else { + generator.writeString(field.toString()); + } + } + + private void renderNumberField(Number field) throws IOException { + if (field instanceof Integer) { + generator.writeNumber(field.intValue()); + } else if (field instanceof Float) { + generator.writeNumber(field.floatValue()); + } else if (field instanceof Double) { + generator.writeNumber(field.doubleValue()); + } else if (field instanceof Long) { + generator.writeNumber(field.longValue()); + } else if (field instanceof Byte || field instanceof Short) { + generator.writeNumber(field.intValue()); + } else if (field instanceof BigInteger) { + generator.writeNumber((BigInteger) field); + } else if (field instanceof BigDecimal) { + generator.writeNumber((BigDecimal) field); + } else { + generator.writeNumber(field.doubleValue()); + } + } + + private void renderTensor(Optional<Tensor> tensor) throws IOException { + generator.writeStartObject(); + generator.writeArrayFieldStart("cells"); + if (tensor.isPresent()) { + for (Iterator<Tensor.Cell> i = tensor.get().cellIterator(); i.hasNext(); ) { + Tensor.Cell cell = i.next(); + + generator.writeStartObject(); + + generator.writeObjectFieldStart("address"); + for (int d = 0; d < cell.getKey().size(); d++) + generator.writeObjectField(tensor.get().type().dimensions().get(d).name(), cell.getKey().label(d)); + generator.writeEndObject(); + + generator.writeObjectField("value", cell.getValue()); + + generator.writeEndObject(); + } + } + generator.writeEndArray(); + generator.writeEndObject(); + } + + } + } diff --git a/container-search/src/main/java/com/yahoo/search/result/Hit.java b/container-search/src/main/java/com/yahoo/search/result/Hit.java index f68916c8a68..74c31aa33c5 100644 --- a/container-search/src/main/java/com/yahoo/search/result/Hit.java +++ b/container-search/src/main/java/com/yahoo/search/result/Hit.java @@ -404,13 +404,25 @@ public class Hit extends ListenableFreezableClass implements Data, Comparable<Hi /** * Receive a callback on the given object for each field in this hit. - * This is the most resource efficient way of traversing all the fields of a hit. + * This is more efficient than accessing the fields as a map or iterator. */ public void forEachField(BiConsumer<String, Object> consumer) { if (fields == null) return; fields.forEach(consumer); } + /** + * Receive a callback on the given object for each field in this hit, + * where the callback will provide raw utf-8 byte data for strings whose data + * is already available at this form. + * This is the most resource efficient way of traversing all the fields of a hit + * in renderers which produces utf-8. + */ + public void forEachFieldAsRaw(RawUtf8Consumer consumer) { + if (fields == null) return; + fields.forEach(consumer); // No utf-8 fields available in Hit + } + /** Returns the fields of this as a read-only map. This is more costly than fieldIterator() */ public Map<String, Object> fields() { return getUnmodifiableFieldMap(); } @@ -800,4 +812,18 @@ public class Hit extends ListenableFreezableClass implements Data, Comparable<Hi return "hit " + getId() + " (relevance " + getRelevance() + ")"; } + public interface RawUtf8Consumer extends BiConsumer<String, Object> { + + /** + * Called for fields which are available as UTF-8 instead of accept(String, Object). + * + * @param fieldName the name of the field + * @param utf8Data raw utf-8 data. The reciver <b>must not</b> modify this data + * @param offset the start index of the data to accept into the utf8Data array + * @param length the length of the data to accept into the utf8Data array + */ + void accept(String fieldName, byte[] utf8Data, int offset, int length); + + } + } diff --git a/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java b/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java index a9eb9c5e6ce..421f70c8d07 100644 --- a/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/fastsearch/SlimeSummaryTestCase.java @@ -141,9 +141,9 @@ public class SlimeSummaryTestCase { @Test public void testFieldAccessAPI() { - DocsumDefinitionSet docsum = createDocsumDefinitionSet(summary_cf); DocsumDefinitionSet partialDocsum1 = createDocsumDefinitionSet(partial_summary1_cf); DocsumDefinitionSet partialDocsum2 = createDocsumDefinitionSet(partial_summary2_cf); + DocsumDefinitionSet fullDocsum = createDocsumDefinitionSet(summary_cf); FastHit hit = new FastHit(); Map<String, Object> expected = new HashMap<>(); @@ -261,6 +261,18 @@ public class SlimeSummaryTestCase { fieldIterator.remove(); expected.remove("integer_field"); assertFields(expected, hit); + + // --- Add full summary + assertNull(fullDocsum.lazyDecode("default", fullishSummary(), hit)); + expected.put("integer_field", 4); + expected.put("short_field", (short)2); + expected.put("byte_field", (byte)1); + expected.put("float_field", 4.5f); + expected.put("double_field", 8.75d); + expected.put("int64_field", 8L); + expected.put("string_field", "string_value"); + expected.put("longstring_field", "longstring_value"); + assertFields(expected, hit); } @@ -274,11 +286,16 @@ public class SlimeSummaryTestCase { traversed.put(name, value); }); assertEquals(expected, traversed); + // raw utf8 field traverser + Map<String, Object> traversedUtf8 = new HashMap<>(); + hit.forEachFieldAsRaw(new Utf8FieldTraverser(traversedUtf8)); + assertEquals(expected, traversedUtf8); // fieldKeys int fieldNameIteratorFieldCount = 0; for (Iterator<String> i = hit.fieldKeys().iterator(); i.hasNext(); ) { fieldNameIteratorFieldCount++; - assertTrue(expected.containsKey(i.next())); + String name = i.next(); + assertTrue("Expected field " + name, expected.containsKey(name)); } assertEquals(expected.size(), fieldNameIteratorFieldCount); // fieldKeys @@ -304,7 +321,7 @@ public class SlimeSummaryTestCase { return encode((slime)); } - private byte [] timeoutSummary() { + private byte[] timeoutSummary() { Slime slime = new Slime(); slime.setString("Timed out...."); return encode((slime)); @@ -327,6 +344,22 @@ public class SlimeSummaryTestCase { return encode((slime)); } + private byte[] fullishSummary() { + Slime slime = new Slime(); + Cursor docsum = slime.setObject(); + docsum.setLong("integer_field", 4); + docsum.setLong("short_field", 2); + docsum.setLong("byte_field", 1); + docsum.setDouble("float_field", 4.5); + docsum.setDouble("double_field", 8.75); + docsum.setLong("int64_field", 8); + docsum.setString("string_field", "string_value"); + //docsum.setData("data_field", "data_value".getBytes(StandardCharsets.UTF_8)); + docsum.setString("longstring_field", "longstring_value"); + //docsum.setData("longdata_field", "longdata_value".getBytes(StandardCharsets.UTF_8)); + return encode((slime)); + } + private byte[] fullSummary(Tensor tensor1, Tensor tensor2) { Slime slime = new Slime(); Cursor docsum = slime.setObject(); @@ -346,8 +379,10 @@ public class SlimeSummaryTestCase { field.setLong("foo", 1); field.setLong("bar", 2); } - docsum.setData("tensor_field1", TypedBinaryFormat.encode(tensor1)); - docsum.setData("tensor_field2", TypedBinaryFormat.encode(tensor2)); + if (tensor1 != null) + docsum.setData("tensor_field1", TypedBinaryFormat.encode(tensor1)); + if (tensor2 != null) + docsum.setData("tensor_field2", TypedBinaryFormat.encode(tensor2)); return encode((slime)); } @@ -371,4 +406,26 @@ public class SlimeSummaryTestCase { return new DocsumDefinitionSet(config.documentdb(0), legacyEmulationConfig); } + private static class Utf8FieldTraverser implements Hit.RawUtf8Consumer { + + private Map<String, Object> traversed; + + public Utf8FieldTraverser(Map<String, Object> traversed) { + this.traversed = traversed; + } + + @Override + public void accept(String fieldName, byte[] utf8Data, int offset, int length) { + traversed.put(fieldName, new String(utf8Data, offset, length, StandardCharsets.UTF_8)); + } + + @Override + public void accept(String name, Object value) { + if (name.equals("string_value")) + fail("Expected string_value to be received as UTF-8"); + traversed.put(name, value); + } + + } + } diff --git a/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java b/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java index 4a0075c0cf1..bf56ad19f44 100644 --- a/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/rendering/JsonRendererTestCase.java @@ -101,7 +101,7 @@ public class JsonRendererTestCase { } @Test - public void testDocumentId() throws IOException, InterruptedException, ExecutionException, JSONException { + public void testDocumentId() throws IOException, InterruptedException, ExecutionException { String expected = "{\n" + " \"root\": {\n" + " \"children\": [\n" |