// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.fastsearch;
import com.google.common.collect.ImmutableSet;
import com.yahoo.data.access.slime.SlimeAdapter;
import com.yahoo.prelude.hitfield.JSONString;
import com.yahoo.prelude.hitfield.RawData;
import com.yahoo.prelude.hitfield.XMLString;
import com.yahoo.search.result.FeatureData;
import com.yahoo.search.result.Hit;
import com.yahoo.search.result.StructuredData;
import com.yahoo.search.schema.DocumentSummary;
import com.yahoo.search.schema.Schema;
import com.yahoo.slime.BinaryFormat;
import com.yahoo.slime.Cursor;
import com.yahoo.slime.Slime;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.serialization.TypedBinaryFormat;
import org.junit.jupiter.api.Test;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*;
public class SlimeSummaryTestCase {
private static final String cf_pre = "file:src/test/java/com/yahoo/prelude/fastsearch/";
private static final String summary_cf = cf_pre + "summary.cfg";
private static final String partial_summary1_cf = cf_pre + "partial-summary1.cfg";
private static final String partial_summary2_cf = cf_pre + "partial-summary2.cfg";
private static final String partial_summary3_cf = cf_pre + "partial-summary3.cfg";
@Test
void testDecodingEmpty() {
DocsumDefinitionSet docsum = createDocsumDefinitionSet();
FastHit hit = new FastHit();
assertNull(docsum.lazyDecode("default", emptySummary(), hit));
assertNull(hit.getField("integer_field"));
assertNull(hit.getField("short_field"));
assertNull(hit.getField("byte_field"));
assertNull(hit.getField("float_field"));
assertNull(hit.getField("double_field"));
assertNull(hit.getField("int64_field"));
assertNull(hit.getField("string_field"));
assertNull(hit.getField("data_field"));
assertNull(hit.getField("data_field"));
assertNull(hit.getField("longstring_field"));
assertNull(hit.getField("longdata_field"));
assertNull(hit.getField("longdata_field"));
assertNull(hit.getField("xmlstring_field"));
assertNull(hit.getField("xmlstring_field"));
assertNull(hit.getField("jsonstring_field"));
assertNull(hit.getField("tensor_field1"));
assertNull(hit.getField("tensor_field2"));
assertNull(hit.getField("summaryfeatures"));
assertTrue(hit.features().featureNames().isEmpty());
}
@Test
void testTimeout() {
DocsumDefinitionSet docsum = createDocsumDefinitionSet();
FastHit hit = new FastHit();
assertEquals("Hit hit index:null/0/000000000000000000000000 (relevance 0.0) [fasthit, globalid: 0 0 0 0 0 0 0 0 0 0 0 0, partId: 0, distributionkey: 0] failed: Timed out....",
docsum.lazyDecode("default", timeoutSummary(), hit));
}
@Test
void testDecoding() {
Tensor tensor1 = Tensor.from("tensor(x{},y{}):{{x:foo,y:bar}:0.1}");
Tensor tensor2 = Tensor.from("tensor(x[1],y[1]):{{x:0,y:0}:-0.3}");
DocsumDefinitionSet docsum = createDocsumDefinitionSet();
FastHit hit = new FastHit();
assertNull(docsum.lazyDecode("default", fullSummary(tensor1, tensor2), hit));
assertEquals(4, hit.getField("integer_field"));
assertEquals((short) 2, hit.getField("short_field"));
assertEquals((byte) 1, hit.getField("byte_field"));
assertEquals(4.5F, hit.getField("float_field"));
assertEquals(8.75, hit.getField("double_field"));
assertEquals(8L, hit.getField("int64_field"));
assertEquals("string_value", hit.getField("string_field"));
assertEquals(RawData.class, hit.getField("data_field").getClass());
assertEquals("data_value", hit.getField("data_field").toString());
assertEquals("longstring_value", hit.getField("longstring_field"));
assertEquals(RawData.class, hit.getField("longdata_field").getClass());
assertEquals("longdata_value", hit.getField("longdata_field").toString());
assertEquals(XMLString.class, hit.getField("xmlstring_field").getClass());
assertEquals("xmlstring_value", hit.getField("xmlstring_field").toString());
if (hit.getField("jsonstring_field") instanceof JSONString) {
JSONString jstr = (JSONString) hit.getField("jsonstring_field");
assertEquals("{\"foo\":1,\"bar\":2}", jstr.getContent());
assertNotNull(jstr.getContent());
com.yahoo.data.access.Inspector value = jstr.inspect();
assertEquals(1L, value.field("foo").asLong());
assertEquals(2L, value.field("bar").asLong());
} else {
StructuredData sdata = (StructuredData) hit.getField("jsonstring_field");
assertEquals("{\"foo\":1,\"bar\":2}", sdata.toJson());
com.yahoo.data.access.Inspector value = sdata.inspect();
assertEquals(1L, value.field("foo").asLong());
assertEquals(2L, value.field("bar").asLong());
}
assertEquals(tensor1, hit.getField("tensor_field1"));
assertEquals(tensor2, hit.getField("tensor_field2"));
FeatureData featureData = hit.features();
assertEquals("double_feature,rankingExpression(tensor1_feature),tensor2_feature",
featureData.featureNames().stream().sorted().collect(Collectors.joining(",")));
assertEquals(0.5, featureData.getDouble("double_feature"), 0.00000001);
assertEquals(tensor1, featureData.getTensor("tensor1_feature"));
assertEquals(tensor1, featureData.getTensor("rankingExpression(tensor1_feature)"));
assertEquals(tensor2, featureData.getTensor("tensor2_feature"));
}
@Test
void testFieldAccessAPI() {
DocsumDefinitionSet partialDocsum1 = createPartialDocsumDefinitionSet1();
DocsumDefinitionSet partialDocsum2 = createPartialDocsumDefinitionSet2();
DocsumDefinitionSet partialDocsum3 = createPartialDocsumDefinitionSet3();
DocsumDefinitionSet fullDocsum = createDocsumDefinitionSet();
FastHit hit = new FastHit();
Map expected = new HashMap<>();
assertFields(expected, hit);
partialDocsum1.lazyDecode("partial1", partialSummary1(), hit);
expected.put("integer_field", 4);
expected.put("short_field", (short) 2);
assertFields(expected, hit);
partialDocsum2.lazyDecode("partial2", partialSummary2(), hit);
expected.put("float_field", 4.5F);
expected.put("double_field", 8.75D);
assertFields(expected, hit);
hit.removeField("short_field");
expected.remove("short_field");
assertFields(expected, hit);
hit.setField("string", "hello");
expected.put("string", "hello");
assertFields(expected, hit);
hit.setField("short_field", 3.8F);
expected.put("short_field", 3.8F);
assertFields(expected, hit);
hit.removeField("string");
expected.remove("string");
assertFields(expected, hit);
hit.removeField("integer_field");
hit.removeField("double_field");
expected.remove("integer_field");
expected.remove("double_field");
assertFields(expected, hit);
hit.clearFields();
expected.clear();
assertFields(expected, hit);
// --- Re-populate
partialDocsum1.lazyDecode("partial1", partialSummary1(), hit);
expected.put("integer_field", 4);
expected.put("short_field", (short) 2);
partialDocsum2.lazyDecode("partial2", partialSummary2(), hit);
expected.put("float_field", 4.5F);
expected.put("double_field", 8.75D);
hit.setField("string1", "hello");
hit.setField("string2", "hello");
expected.put("string1", "hello");
expected.put("string2", "hello");
assertFields(expected, hit);
Set keys = hit.fieldKeys();
assertTrue(keys.remove("integer_field"));
expected.remove("integer_field");
assertTrue(keys.remove("string2"));
expected.remove("string2");
assertFields(expected, hit);
assertFalse(keys.remove("notpresent"));
assertTrue(keys.retainAll(ImmutableSet.of("string1", "notpresent", "double_field")));
expected.remove("short_field");
expected.remove("float_field");
assertFields(expected, hit);
Iterator keyIterator = keys.iterator();
assertEquals("string1", keyIterator.next());
keyIterator.remove();
expected.remove("string1");
assertFields(expected, hit);
assertEquals("double_field", keyIterator.next());
keyIterator.remove();
expected.remove("double_field");
assertFields(expected, hit);
// --- Re-populate
partialDocsum1.lazyDecode("partial1", partialSummary1(), hit);
expected.put("integer_field", 4);
expected.put("short_field", (short) 2);
partialDocsum2.lazyDecode("partial2", partialSummary2(), hit);
expected.put("float_field", 4.5F);
expected.put("double_field", 8.75D);
hit.setField("string", "hello");
expected.put("string", "hello");
assertFields(expected, hit);
Iterator> fieldIterator = hit.fieldIterator();
assertEquals("string", fieldIterator.next().getKey());
fieldIterator.remove();
expected.remove("string");
assertFields(expected, hit);
fieldIterator.next();
assertEquals("short_field", fieldIterator.next().getKey());
fieldIterator.remove();
expected.remove("short_field");
assertFields(expected, hit);
fieldIterator.next();
assertEquals("double_field", fieldIterator.next().getKey());
fieldIterator.remove();
expected.remove("double_field");
assertFields(expected, hit);
fieldIterator = hit.fieldIterator();
assertEquals("float_field", fieldIterator.next().getKey());
fieldIterator.remove();
expected.remove("float_field");
assertFields(expected, hit);
assertEquals("integer_field", fieldIterator.next().getKey());
fieldIterator.remove();
expected.remove("integer_field");
assertFields(expected, hit);
// --- Add full summary
Tensor tensor1 = Tensor.from("tensor(x{},y{}):{{x:foo,y:bar}:0.1}");
Tensor tensor2 = Tensor.from("tensor(x[1],y[1]):{{x:0,y:0}:-0.3}");
assertNull(fullDocsum.lazyDecode("default", fullishSummary(tensor1, tensor2), hit));
expected.put("integer_field", 4);
expected.put("short_field", (short) 2);
expected.put("byte_field", (byte) 1);
expected.put("float_field", 4.5f);
expected.put("double_field", 8.75d);
expected.put("int64_field", 8L);
expected.put("string_field", "string_value");
expected.put("longstring_field", "longstring_value");
expected.put("tensor_field1", tensor1);
expected.put("tensor_field2", tensor2);
Slime slime = new Slime();
Cursor summaryFeatures = slime.setObject();
summaryFeatures.setDouble("double_feature", 0.5);
summaryFeatures.setData("rankingExpression(tensor1_feature)", TypedBinaryFormat.encode(tensor1));
summaryFeatures.setData("tensor2_feature", TypedBinaryFormat.encode(tensor2));
expected.put("summaryfeatures", new FeatureData(new SlimeAdapter(slime.get())));
hit.removeField("string_field");
hit.removeField("integer_field");
partialDocsum3.lazyDecode("partial3", partialSummary3(), hit);
expected.put("string_field", "new str val");
expected.put("integer_field", 5);
assertFields(expected, hit);
hit.removeField("integer_field");
partialDocsum2.lazyDecode("partial2", partialSummary2(), hit);
expected.put("integer_field", 4);
assertFields(expected, hit);
}
/** Asserts that the expected fields are what is returned from every access method of Hit */
private void assertFields(Map expected, Hit hit) {
// field traverser
Map traversed = new HashMap<>();
hit.forEachField((name, value) -> {
if (traversed.containsKey(name))
fail("Multiple callbacks for " + name);
traversed.put(name, value);
});
assertEqualMaps(expected, traversed);
// raw utf8 field traverser
Map traversedUtf8 = new HashMap<>();
hit.forEachFieldAsRaw(new Utf8FieldTraverser(traversedUtf8));
assertEquals(expected, traversedUtf8);
// fieldKeys
int fieldNameIteratorFieldCount = 0;
for (Iterator i = hit.fieldKeys().iterator(); i.hasNext(); ) {
fieldNameIteratorFieldCount++;
String name = i.next();
assertTrue(expected.containsKey(name), "Expected field " + name);
}
assertEquals(expected.size(), fieldNameIteratorFieldCount);
// fieldKeys
assertEquals(expected.keySet(), hit.fieldKeys());
// fields
assertEqualMaps(expected, hit.fields());
// fieldIterator
int fieldIteratorFieldCount = 0;
for (Iterator> i = hit.fieldIterator(); i.hasNext(); ) {
fieldIteratorFieldCount++;
Map.Entry field = i.next();
assertEquals(field.getValue(), expected.get(field.getKey()));
}
assertEquals(expected.size(), fieldIteratorFieldCount);
// getField
for (Map.Entry field : expected.entrySet())
assertEquals(field.getValue(), hit.getField(field.getKey()));
}
private void assertEqualMaps(Map expected, Map actual) {
assertEquals(expected.size(), actual.size(), "Map sizes");
assertEquals(expected.keySet(), actual.keySet(), "Keys");
for (var expectedEntry : expected.entrySet()) {
assertEquals(expectedEntry.getValue(), actual.get(expectedEntry.getKey()), "Key '" + expectedEntry.getKey() + "'");
}
}
private byte[] emptySummary() {
Slime slime = new Slime();
slime.setObject();
return encode((slime));
}
private byte[] timeoutSummary() {
Slime slime = new Slime();
slime.setString("Timed out....");
return encode((slime));
}
private byte[] partialSummary1() {
Slime slime = new Slime();
Cursor docsum = slime.setObject();
docsum.setLong("integer_field", 4);
docsum.setLong("short_field", 2);
return encode((slime));
}
private byte[] partialSummary2() {
Slime slime = new Slime();
Cursor docsum = slime.setObject();
docsum.setLong("integer_field", 4);
docsum.setDouble("float_field", 4.5);
docsum.setDouble("double_field", 8.75);
return encode((slime));
}
private byte[] partialSummary3() {
Slime slime = new Slime();
Cursor docsum = slime.setObject();
docsum.setString("string_field", "new str val");
docsum.setLong("integer_field", 5);
return encode((slime));
}
private byte[] fullishSummary(Tensor tensor1, Tensor tensor2) {
Slime slime = new Slime();
Cursor docsum = slime.setObject();
docsum.setLong("integer_field", 4);
docsum.setLong("short_field", 2);
docsum.setLong("byte_field", 1);
docsum.setDouble("float_field", 4.5);
docsum.setDouble("double_field", 8.75);
docsum.setLong("int64_field", 8);
docsum.setString("string_field", "string_value");
//docsum.setData("data_field", "data_value".getBytes(StandardCharsets.UTF_8));
docsum.setString("longstring_field", "longstring_value");
//docsum.setData("longdata_field", "longdata_value".getBytes(StandardCharsets.UTF_8));
addTensors(tensor1, tensor2, docsum);
return encode((slime));
}
private byte[] fullSummary(Tensor tensor1, Tensor tensor2) {
Slime slime = new Slime();
Cursor docsum = slime.setObject();
docsum.setLong("integer_field", 4);
docsum.setLong("short_field", 2);
docsum.setLong("byte_field", 1);
docsum.setDouble("float_field", 4.5);
docsum.setDouble("double_field", 8.75);
docsum.setLong("int64_field", 8);
docsum.setString("string_field", "string_value");
docsum.setData("data_field", "data_value".getBytes(StandardCharsets.UTF_8));
docsum.setString("longstring_field", "longstring_value");
docsum.setData("longdata_field", "longdata_value".getBytes(StandardCharsets.UTF_8));
docsum.setString("xmlstring_field", "xmlstring_value");
{
Cursor field = docsum.setObject("jsonstring_field");
field.setLong("foo", 1);
field.setLong("bar", 2);
}
addTensors(tensor1, tensor2, docsum);
return encode((slime));
}
private void addTensors(Tensor tensor1, Tensor tensor2, Cursor docsum) {
if (tensor1 != null)
docsum.setData("tensor_field1", TypedBinaryFormat.encode(tensor1));
if (tensor2 != null)
docsum.setData("tensor_field2", TypedBinaryFormat.encode(tensor2));
if (tensor1 !=null && tensor2 != null) {
Cursor summaryFeatures = docsum.setObject("summaryfeatures");
summaryFeatures.setDouble("double_feature", 0.5);
// Values produced by functions are wrapped in rankingExpression(function-name)
summaryFeatures.setData("rankingExpression(tensor1_feature)", TypedBinaryFormat.encode(tensor1));
summaryFeatures.setData("tensor2_feature", TypedBinaryFormat.encode(tensor2));
}
}
private byte[] encode(Slime slime) {
byte[] tmp = BinaryFormat.encode(slime);
ByteBuffer buf = ByteBuffer.allocate(tmp.length + 4);
buf.order(ByteOrder.LITTLE_ENDIAN);
buf.putInt(DocsumDefinitionSet.SLIME_MAGIC_ID);
buf.order(ByteOrder.BIG_ENDIAN);
buf.put(tmp);
return buf.array();
}
private DocsumDefinitionSet createDocsumDefinitionSet() {
var schema = new Schema.Builder("test");
var summary = new DocumentSummary.Builder("default");
summary.add(new DocumentSummary.Field("integer_field", "integer"));
summary.add(new DocumentSummary.Field("short_field", "short"));
summary.add(new DocumentSummary.Field("byte_field", "byte"));
summary.add(new DocumentSummary.Field("float_field", "float"));
summary.add(new DocumentSummary.Field("double_field", "double"));
summary.add(new DocumentSummary.Field("int64_field", "int64"));
summary.add(new DocumentSummary.Field("string_field", "string"));
summary.add(new DocumentSummary.Field("data_field", "data"));
summary.add(new DocumentSummary.Field("longstring_field", "longstring"));
summary.add(new DocumentSummary.Field("longdata_field", "longdata"));
summary.add(new DocumentSummary.Field("xmlstring_field", "xmlstring"));
summary.add(new DocumentSummary.Field("jsonstring_field", "jsonstring"));
summary.add(new DocumentSummary.Field("tensor_field1", "tensor"));
summary.add(new DocumentSummary.Field("tensor_field2", "tensor"));
summary.add(new DocumentSummary.Field("summaryfeatures", "featuredata"));
schema.add(summary.build());
return new DocsumDefinitionSet(schema.build());
}
private DocsumDefinitionSet createPartialDocsumDefinitionSet1() {
var schema = new Schema.Builder("test");
var summary = new DocumentSummary.Builder("default");
summary.add(new DocumentSummary.Field("integer_field", "integer"));
summary.add(new DocumentSummary.Field("short_field", "short"));
schema.add(summary.build());
return new DocsumDefinitionSet(schema.build());
}
private DocsumDefinitionSet createPartialDocsumDefinitionSet2() {
var schema = new Schema.Builder("test");
var summary = new DocumentSummary.Builder("default");
summary.add(new DocumentSummary.Field("integer_field", "integer"));
summary.add(new DocumentSummary.Field("float_field", "float"));
summary.add(new DocumentSummary.Field("double_field", "double"));
schema.add(summary.build());
return new DocsumDefinitionSet(schema.build());
}
private DocsumDefinitionSet createPartialDocsumDefinitionSet3() {
var schema = new Schema.Builder("test");
var summary = new DocumentSummary.Builder("default");
summary.add(new DocumentSummary.Field("integer_field", "integer"));
summary.add(new DocumentSummary.Field("string_field", "string"));
schema.add(summary.build());
return new DocsumDefinitionSet(schema.build());
}
private static class Utf8FieldTraverser implements Hit.RawUtf8Consumer {
private final Map traversed;
public Utf8FieldTraverser(Map traversed) {
this.traversed = traversed;
}
@Override
public void accept(String fieldName, byte[] utf8Data, int offset, int length) {
traversed.put(fieldName, new String(utf8Data, offset, length, StandardCharsets.UTF_8));
}
@Override
public void accept(String name, Object value) {
if (name.equals("string_value"))
fail("Expected string_value to be received as UTF-8");
traversed.put(name, value);
}
}
}