diff options
49 files changed, 1005 insertions, 265 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java index 11bd14cbe46..8b07aa48a24 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java @@ -66,26 +66,31 @@ public class DerivedConfiguration implements AttributesConfig.Producer { * schema is later modified. */ public DerivedConfiguration(Schema schema, DeployState deployState) { - Validator.ensureNotNull("Schema", schema); - this.schema = schema; - this.queryProfiles = deployState.getQueryProfiles().getRegistry(); - this.maxUncommittedMemory = deployState.getProperties().featureFlags().maxUnCommittedMemory(); - if ( ! schema.isDocumentsOnly()) { - streamingFields = new VsmFields(schema); - streamingSummary = new VsmSummary(schema); + try { + Validator.ensureNotNull("Schema", schema); + this.schema = schema; + this.queryProfiles = deployState.getQueryProfiles().getRegistry(); + this.maxUncommittedMemory = deployState.getProperties().featureFlags().maxUnCommittedMemory(); + if (!schema.isDocumentsOnly()) { + streamingFields = new VsmFields(schema); + streamingSummary = new VsmSummary(schema); + } + if (!schema.isDocumentsOnly()) { + attributeFields = new AttributeFields(schema); + summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags()); + juniperrc = new Juniperrc(schema); + rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState); + indexingScript = new IndexingScript(schema); + indexInfo = new IndexInfo(schema); + schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries); + indexSchema = new IndexSchema(schema); + importedFields = new ImportedFields(schema); + } + Validation.validate(this, schema); } - if ( ! schema.isDocumentsOnly()) { - attributeFields = new AttributeFields(schema); - summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags()); - juniperrc = new Juniperrc(schema); - rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState); - indexingScript = new IndexingScript(schema); - indexInfo = new IndexInfo(schema); - schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries); - indexSchema = new IndexSchema(schema); - importedFields = new ImportedFields(schema); + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid " + schema, e); } - Validation.validate(this, schema); } /** diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformer.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformer.java index ad050d4ca63..3d5d29fb3c7 100644 --- a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformer.java +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformer.java @@ -36,12 +36,12 @@ public class BooleanExpressionTransformer extends ExpressionTransformer<Transfor node = transformChildren(composite, context); if (node instanceof OperationNode arithmetic) - node = transformBooleanArithmetics(arithmetic); + node = transformBooleanArithmetics(arithmetic, context); return node; } - private ExpressionNode transformBooleanArithmetics(OperationNode node) { + private ExpressionNode transformBooleanArithmetics(OperationNode node, TransformContext context) { Iterator<ExpressionNode> child = node.children().iterator(); // Transform in precedence order: @@ -51,24 +51,25 @@ public class BooleanExpressionTransformer extends ExpressionTransformer<Transfor Operator op = it.next(); if ( ! stack.isEmpty()) { while (stack.size() > 1 && ! op.hasPrecedenceOver(stack.peek().op)) { - popStack(stack); + popStack(stack, context); } } stack.push(new ChildNode(op, child.next())); } while (stack.size() > 1) - popStack(stack); + popStack(stack, context); return stack.getFirst().child; } - private void popStack(Deque<ChildNode> stack) { + private void popStack(Deque<ChildNode> stack, TransformContext context) { ChildNode rhs = stack.pop(); ChildNode lhs = stack.peek(); + // isDefinitelyPrimitive is expensive so only invoke it when necessary ExpressionNode combination; - if (rhs.op == Operator.and) + if (rhs.op == Operator.and && isDefinitelyPrimitive(lhs.child, context) && isDefinitelyPrimitive(rhs.child, context)) combination = andByIfNode(lhs.child, rhs.child); - else if (rhs.op == Operator.or) + else if (rhs.op == Operator.or && isDefinitelyPrimitive(lhs.child, context) && isDefinitelyPrimitive(rhs.child, context)) combination = orByIfNode(lhs.child, rhs.child); else { combination = resolve(lhs, rhs); @@ -77,6 +78,22 @@ public class BooleanExpressionTransformer extends ExpressionTransformer<Transfor lhs.child = combination; } + private boolean isDefinitelyPrimitive(ExpressionNode node, TransformContext context) { + try { + return node.type(context.types()).rank() == 0; + } + catch (IllegalArgumentException e) { + // Types can only be reliably resolved top down, which has not done here. + // E.g + // function(nameArg) { + // attribute(nameArg) + // } + // is supported. + // So, we return false when something cannot be resolved. + return false; + } + } + private static OperationNode resolve(ChildNode left, ChildNode right) { if (! (left.child instanceof OperationNode) && ! (right.child instanceof OperationNode)) return new OperationNode(left.child, right.op, right.child); @@ -103,7 +120,6 @@ public class BooleanExpressionTransformer extends ExpressionTransformer<Transfor joinedChildren.add(node.child); } - private IfNode andByIfNode(ExpressionNode a, ExpressionNode b) { return new IfNode(a, b, new ConstantNode(new BooleanValue(false))); } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java index e1daf7810e5..3e7a1f7613b 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java @@ -22,7 +22,7 @@ import java.util.logging.Level; /** * Resolves and assigns types to all functions in a ranking expression, and - * validates the types of all ranking expressions under a search instance: + * validates the types of all ranking expressions under a schema instance: * Some operators constrain the types of inputs, and first-and second-phase expressions * must return scalar values. * diff --git a/config-model/src/test/derived/neuralnet/neuralnet.sd b/config-model/src/test/derived/neuralnet/neuralnet.sd index 54f6cefc6f4..95b7341a42f 100644 --- a/config-model/src/test/derived/neuralnet/neuralnet.sd +++ b/config-model/src/test/derived/neuralnet/neuralnet.sd @@ -3,6 +3,10 @@ schema neuralnet { document neuralnet { + field uniqueRCount type double { + indexing: attribute + } + field pinned type int { indexing: attribute } diff --git a/config-model/src/test/derived/neuralnet_noqueryprofile/neuralnet.sd b/config-model/src/test/derived/neuralnet_noqueryprofile/neuralnet.sd index 073813d2198..e083b152aba 100644 --- a/config-model/src/test/derived/neuralnet_noqueryprofile/neuralnet.sd +++ b/config-model/src/test/derived/neuralnet_noqueryprofile/neuralnet.sd @@ -3,6 +3,10 @@ schema neuralnet { document neuralnet { + field uniqueRCount type double { + indexing: attribute + } + field pinned type int { indexing: attribute } diff --git a/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg b/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg index 524a1253480..82bba81f0d5 100644 --- a/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg +++ b/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg @@ -10,6 +10,9 @@ schema[].summaryclass[].fields[].name "documentid" schema[].summaryclass[].fields[].type "longstring" schema[].summaryclass[].fields[].dynamic false schema[].summaryclass[].name "attributeprefetch" +schema[].summaryclass[].fields[].name "uniqueRCount" +schema[].summaryclass[].fields[].type "double" +schema[].summaryclass[].fields[].dynamic false schema[].summaryclass[].fields[].name "pinned" schema[].summaryclass[].fields[].type "integer" schema[].summaryclass[].fields[].dynamic false diff --git a/config-model/src/test/derived/rankingexpression/rankexpression.sd b/config-model/src/test/derived/rankingexpression/rankexpression.sd index a5e7f07f6ac..7d8c79da5fb 100644 --- a/config-model/src/test/derived/rankingexpression/rankexpression.sd +++ b/config-model/src/test/derived/rankingexpression/rankexpression.sd @@ -3,6 +3,14 @@ schema rankexpression { document rankexpression { + field nrtgmp type double { + indexing: attribute + } + + field glmpfw type double { + indexing: attribute + } + field artist type string { indexing: summary | index } diff --git a/config-model/src/test/derived/rankingexpression/summary.cfg b/config-model/src/test/derived/rankingexpression/summary.cfg index 1c1453a8a89..b52cb055164 100644 --- a/config-model/src/test/derived/rankingexpression/summary.cfg +++ b/config-model/src/test/derived/rankingexpression/summary.cfg @@ -24,9 +24,15 @@ classes[].fields[].source "" classes[].fields[].name "documentid" classes[].fields[].command "documentid" classes[].fields[].source "" -classes[].id 1736696699 +classes[].id 399614584 classes[].name "attributeprefetch" classes[].omitsummaryfeatures false +classes[].fields[].name "nrtgmp" +classes[].fields[].command "attribute" +classes[].fields[].source "nrtgmp" +classes[].fields[].name "glmpfw" +classes[].fields[].command "attribute" +classes[].fields[].source "glmpfw" classes[].fields[].name "year" classes[].fields[].command "attribute" classes[].fields[].source "year" diff --git a/config-model/src/test/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformerTestCase.java b/config-model/src/test/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformerTestCase.java index d692b69d3c8..d06573f7bae 100644 --- a/config-model/src/test/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformerTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/expressiontransforms/BooleanExpressionTransformerTestCase.java @@ -2,10 +2,13 @@ package com.yahoo.schema.expressiontransforms; import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.Reference; import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; import com.yahoo.searchlib.rankingexpression.evaluation.MapTypeContext; import com.yahoo.searchlib.rankingexpression.rule.OperationNode; import com.yahoo.searchlib.rankingexpression.transform.TransformContext; +import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.evaluation.TypeContext; import org.junit.jupiter.api.Test; import java.util.Map; @@ -20,7 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class BooleanExpressionTransformerTestCase { @Test - public void testTransformer() throws Exception { + public void booleanTransformation() throws Exception { assertTransformed("if (a, b, false)", "a && b"); assertTransformed("if (a, true, b)", "a || b"); assertTransformed("if (a, true, b + c)", "a || b + c"); @@ -33,16 +36,17 @@ public class BooleanExpressionTransformerTestCase { } @Test - public void testIt() throws Exception { - assertTransformed("if(1 - 1, true, 1 - 1)", "1 - 1 || 1 - 1"); + public void noTransformationOnTensorTypes() throws Exception { + var typeContext = new MapTypeContext(); + typeContext.setType(Reference.fromIdentifier("tensorA"), TensorType.fromSpec("tensor(x{})")); + typeContext.setType(Reference.fromIdentifier("tensorB"), TensorType.fromSpec("tensor(x{})")); + assertUntransformed("tensorA && tensorB", typeContext); + assertTransformed("a && (tensorA * tensorB)","a && ( tensorA * tensorB)", typeContext); } @Test public void testNotSkewingNonBoolean() throws Exception { - assertTransformed("a + b + c * d + e + f", "a + b + c * d + e + f"); - var expr = new BooleanExpressionTransformer() - .transform(new RankingExpression("a + b + c * d + e + f"), - new TransformContext(Map.of(), new MapTypeContext())); + var expr = assertTransformed("a + b + c * d + e + f", "a + b + c * d + e + f"); assertTrue(expr.getRoot() instanceof OperationNode); OperationNode root = (OperationNode) expr.getRoot(); assertEquals(5, root.operators().size()); @@ -51,41 +55,53 @@ public class BooleanExpressionTransformerTestCase { @Test public void testTransformPreservesPrecedence() throws Exception { - assertUnTransformed("a"); - assertUnTransformed("a + b"); - assertUnTransformed("a + b + c"); - assertUnTransformed("a * b"); - assertUnTransformed("a + b * c + d"); - assertUnTransformed("a + b + c * d + e + f"); - assertUnTransformed("a * b + c + d + e * f"); - assertUnTransformed("(a * b) + c + d + e * f"); - assertUnTransformed("(a * b + c) + d + e * f"); - assertUnTransformed("a * (b + c) + d + e * f"); - assertUnTransformed("(a * b) + (c + (d + e)) * f"); + assertUntransformed("a"); + assertUntransformed("a + b"); + assertUntransformed("a + b + c"); + assertUntransformed("a * b"); + assertUntransformed("a + b * c + d"); + assertUntransformed("a + b + c * d + e + f"); + assertUntransformed("a * b + c + d + e * f"); + assertUntransformed("(a * b) + c + d + e * f"); + assertUntransformed("(a * b + c) + d + e * f"); + assertUntransformed("a * (b + c) + d + e * f"); + assertUntransformed("(a * b) + (c + (d + e)) * f"); + } + + private void assertUntransformed(String input) throws Exception { + assertUntransformed(input, new MapTypeContext()); + } + + private void assertUntransformed(String input, MapTypeContext typeContext) throws Exception { + assertTransformed(input, input, typeContext); } - private void assertUnTransformed(String input) throws Exception { - assertTransformed(input, input); + private RankingExpression assertTransformed(String expected, String input) throws Exception { + return assertTransformed(expected, input, new MapTypeContext()); } - private void assertTransformed(String expected, String input) throws Exception { + private RankingExpression assertTransformed(String expected, String input, MapTypeContext typeContext) throws Exception { + MapContext context = contextWithSingleLetterVariables(typeContext); var transformedExpression = new BooleanExpressionTransformer() .transform(new RankingExpression(input), - new TransformContext(Map.of(), new MapTypeContext())); + new TransformContext(Map.of(), typeContext)); assertEquals(new RankingExpression(expected), transformedExpression, "Transformed as expected"); - MapContext context = contextWithSingleLetterVariables(); var inputExpression = new RankingExpression(input); assertEquals(inputExpression.evaluate(context).asBoolean(), transformedExpression.evaluate(context).asBoolean(), "Transform and original input are equivalent"); + return transformedExpression; } - private MapContext contextWithSingleLetterVariables() { + private MapContext contextWithSingleLetterVariables(MapTypeContext typeContext) { var context = new MapContext(); - for (int i = 0; i < 26; i++) - context.put(Character.toString(i + 97), Math.floorMod(i, 2)); + for (int i = 0; i < 26; i++) { + String name = Character.toString(i + 97); + typeContext.setType(Reference.fromIdentifier(name), TensorType.empty); + context.put(name, Math.floorMod(i, 2)); + } return context; } diff --git a/config-model/src/test/java/com/yahoo/schema/processing/RankingExpressionWithOnnxTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/RankingExpressionWithOnnxTestCase.java index 83d19b010bb..2f53dba7bb4 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/RankingExpressionWithOnnxTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/RankingExpressionWithOnnxTestCase.java @@ -31,6 +31,8 @@ public class RankingExpressionWithOnnxTestCase { private final static String name = "mnist_softmax"; private final static String vespaExpression = "join(reduce(join(rename(Placeholder, (d0, d1), (d0, d2)), constant(mnist_softmax_layer_Variable), f(a,b)(a * b)), sum, d2) * 1.0, constant(mnist_softmax_layer_Variable_1) * 1.0, f(a,b)(a + b))"; + private final static String vespaExpressionConstants = "constant mnist_softmax_layer_Variable { file: ignored\ntype: tensor<float>(d0[1],d1[784]) }\n" + + "constant mnist_softmax_layer_Variable_1 { file: ignored\ntype: tensor<float>(d0[1],d1[784]) }\n"; @AfterEach public void removeGeneratedModelFiles() { @@ -41,7 +43,7 @@ public class RankingExpressionWithOnnxTestCase { void testOnnxReferenceWithConstantFeature() { RankProfileSearchFixture search = fixtureWith("constant(mytensor)", "onnx_vespa('mnist_softmax.onnx')", - "constant mytensor { file: ignored\ntype: tensor<float>(d0[1],d1[784]) }", + vespaExpressionConstants + "constant mytensor { file: ignored\ntype: tensor<float>(d0[1],d1[784]) }", null); search.assertFirstPhaseExpression(vespaExpression, "my_profile"); } @@ -58,7 +60,7 @@ public class RankingExpressionWithOnnxTestCase { queryProfileType); RankProfileSearchFixture search = fixtureWith("query(mytensor)", "onnx_vespa('mnist_softmax.onnx')", - null, + vespaExpressionConstants, null, "Placeholder", application); @@ -70,7 +72,7 @@ public class RankingExpressionWithOnnxTestCase { StoringApplicationPackage application = new StoringApplicationPackage(applicationDir); RankProfileSearchFixture search = fixtureWith("attribute(mytensor)", "onnx_vespa('mnist_softmax.onnx')", - null, + vespaExpressionConstants, "field mytensor type tensor<float>(d0[1],d1[784]) { indexing: attribute }", "Placeholder", application); @@ -88,7 +90,7 @@ public class RankingExpressionWithOnnxTestCase { StoringApplicationPackage application = new StoringApplicationPackage(applicationDir, queryProfile, queryProfileType); RankProfileSearchFixture search = fixtureWith("sum(query(mytensor) * attribute(mytensor) * constant(mytensor),d2)", "onnx_vespa('mnist_softmax.onnx')", - "constant mytensor { file: ignored\ntype: tensor<float>(d0[1],d1[784]) }", + vespaExpressionConstants + "constant mytensor { file: ignored\ntype: tensor<float>(d0[1],d1[784]) }", "field mytensor type tensor<float>(d0[1],d1[784]) { indexing: attribute }", "Placeholder", application); @@ -99,21 +101,24 @@ public class RankingExpressionWithOnnxTestCase { @Test void testNestedOnnxReference() { RankProfileSearchFixture search = fixtureWith("tensor<float>(d0[1],d1[784])(0.0)", - "5 + sum(onnx_vespa('mnist_softmax.onnx'))"); + "5 + sum(onnx_vespa('mnist_softmax.onnx'))", + vespaExpressionConstants); search.assertFirstPhaseExpression("5 + reduce(" + vespaExpression + ", sum)", "my_profile"); } @Test void testOnnxReferenceWithSpecifiedOutput() { RankProfileSearchFixture search = fixtureWith("tensor<float>(d0[1],d1[784])(0.0)", - "onnx_vespa('mnist_softmax.onnx', 'layer_add')"); + "onnx_vespa('mnist_softmax.onnx', 'layer_add')", + vespaExpressionConstants); search.assertFirstPhaseExpression(vespaExpression, "my_profile"); } @Test void testOnnxReferenceWithSpecifiedOutputAndSignature() { RankProfileSearchFixture search = fixtureWith("tensor<float>(d0[1],d1[784])(0.0)", - "onnx_vespa('mnist_softmax.onnx', 'default.layer_add')"); + "onnx_vespa('mnist_softmax.onnx', 'default.layer_add')", + vespaExpressionConstants); search.assertFirstPhaseExpression(vespaExpression, "my_profile"); } @@ -177,7 +182,8 @@ public class RankingExpressionWithOnnxTestCase { @Test void testImportingFromStoredExpressions() throws IOException { RankProfileSearchFixture search = fixtureWith("tensor<float>(d0[1],d1[784])(0.0)", - "onnx_vespa(\"mnist_softmax.onnx\")"); + "onnx_vespa(\"mnist_softmax.onnx\")", + vespaExpressionConstants); search.assertFirstPhaseExpression(vespaExpression, "my_profile"); // At this point the expression is stored - copy application to another location which do not have a models dir @@ -187,12 +193,14 @@ public class RankingExpressionWithOnnxTestCase { IOUtils.copyDirectory(applicationDir.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile(), storedApplicationDirectory.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile()); StoringApplicationPackage storedApplication = new StoringApplicationPackage(storedApplicationDirectory); + String constants = "constant mnist_softmax_layer_Variable { file: ignored\ntype: tensor<float>(d0[2],d1[784]) }\n" + + "constant mnist_softmax_layer_Variable_1 { file: ignored\ntype: tensor<float>(d0[2],d1[784]) }\n"; RankProfileSearchFixture searchFromStored = fixtureWith("tensor<float>(d0[2],d1[784])(0.0)", - "onnx_vespa('mnist_softmax.onnx')", - null, - null, - "Placeholder", - storedApplication); + "onnx_vespa('mnist_softmax.onnx')", + constants, + null, + "Placeholder", + storedApplication); searchFromStored.assertFirstPhaseExpression(vespaExpression, "my_profile"); // Verify that the constants exists, but don't verify the content as we are not // simulating file distribution in this test @@ -221,7 +229,8 @@ public class RankingExpressionWithOnnxTestCase { String vespaExpressionWithoutConstant = "join(reduce(join(rename(Placeholder, (d0, d1), (d0, d2)), " + name + "_layer_Variable, f(a,b)(a * b)), sum, d2) * 1.0, constant(" + name + "_layer_Variable_1) * 1.0, f(a,b)(a + b))"; - RankProfileSearchFixture search = uncompiledFixtureWith(rankProfile, new StoringApplicationPackage(applicationDir)); + String constant = "constant mnist_softmax_layer_Variable_1 { file: ignored\ntype: tensor<float>(d0[1],d1[10]) }\n"; + RankProfileSearchFixture search = uncompiledFixtureWith(rankProfile, new StoringApplicationPackage(applicationDir), constant); search.compileRankProfile("my_profile", applicationDir.append("models")); search.compileRankProfile("my_profile_child", applicationDir.append("models")); search.assertFirstPhaseExpression(vespaExpressionWithoutConstant, "my_profile"); @@ -237,7 +246,7 @@ public class RankingExpressionWithOnnxTestCase { IOUtils.copyDirectory(applicationDir.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile(), storedApplicationDirectory.append(ApplicationPackage.MODELS_GENERATED_DIR).toFile()); StoringApplicationPackage storedApplication = new StoringApplicationPackage(storedApplicationDirectory); - RankProfileSearchFixture searchFromStored = uncompiledFixtureWith(rankProfile, storedApplication); + RankProfileSearchFixture searchFromStored = uncompiledFixtureWith(rankProfile, storedApplication, constant); searchFromStored.compileRankProfile("my_profile", applicationDir.append("models")); searchFromStored.compileRankProfile("my_profile_child", applicationDir.append("models")); searchFromStored.assertFirstPhaseExpression(vespaExpressionWithoutConstant, "my_profile"); @@ -326,7 +335,11 @@ public class RankingExpressionWithOnnxTestCase { } private RankProfileSearchFixture fixtureWith(String placeholderExpression, String firstPhaseExpression) { - return fixtureWith(placeholderExpression, firstPhaseExpression, null, null, "Placeholder", + return fixtureWith(placeholderExpression, firstPhaseExpression, null); + } + + private RankProfileSearchFixture fixtureWith(String placeholderExpression, String firstPhaseExpression, String constant) { + return fixtureWith(placeholderExpression, firstPhaseExpression, constant, null, "Placeholder", new StoringApplicationPackage(applicationDir)); } @@ -337,9 +350,13 @@ public class RankingExpressionWithOnnxTestCase { } private RankProfileSearchFixture uncompiledFixtureWith(String rankProfile, StoringApplicationPackage application) { + return uncompiledFixtureWith(rankProfile, application, null); + } + + private RankProfileSearchFixture uncompiledFixtureWith(String rankProfile, StoringApplicationPackage application, String constant) { try { return new RankProfileSearchFixture(application, application.getQueryProfiles(), - rankProfile, null, null); + rankProfile, constant, null); } catch (ParseException e) { throw new IllegalArgumentException(e); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 92ebc5b7177..19775ef420d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -391,15 +391,23 @@ public class ApplicationController { .map(VespaVersion::versionNumber) .filter(systemCompatible) .max(naturalOrder()); - if (nonBroken.isPresent()) return nonBroken.get(); - // Fall back to the newest, system-compatible version with unknown confidence. + // Fall back to the newest, system-compatible version with unknown confidence. For public systems, this implies high confidence. Set<Version> knownVersions = versionStatus.versions().stream().map(VespaVersion::versionNumber).collect(toSet()); Optional<Version> unknown = controller.mavenRepository().metadata().versions().stream() .filter(version -> ! knownVersions.contains(version)) .filter(systemCompatible) .max(naturalOrder()); - if (unknown.isPresent()) return unknown.get(); + + if (nonBroken.isPresent()) { + if (controller.system().isPublic() && unknown.isPresent() && unknown.get().isAfter(nonBroken.get())) + return unknown.get(); + + return nonBroken.get(); + } + + if (unknown.isPresent()) + return unknown.get(); throw new IllegalArgumentException("no suitable, released compile version exists" + (wantedMajor.isPresent() ? " for specified major: " + wantedMajor.getAsInt() : "")); diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 4b42c055865..fb88ce7886a 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -227,7 +227,7 @@ public class Flags { public static final UnboundIntFlag MAX_ACTIVATION_INHIBITED_OUT_OF_SYNC_GROUPS = defineIntFlag( "max-activation-inhibited-out-of-sync-groups", 0, - List.of("vekterli"), "2021-02-19", "2022-10-01", + List.of("vekterli"), "2021-02-19", "2022-12-01", "Allows replicas in up to N content groups to not be activated " + "for query visibility if they are out of sync with a majority of other replicas", "Takes effect at redeployment", @@ -235,14 +235,14 @@ public class Flags { public static final UnboundIntFlag MAX_CONCURRENT_MERGES_PER_NODE = defineIntFlag( "max-concurrent-merges-per-node", 16, - List.of("balder", "vekterli"), "2021-06-06", "2022-10-01", + List.of("balder", "vekterli"), "2021-06-06", "2022-12-01", "Specifies max concurrent merges per content node.", "Takes effect at redeploy", ZONE_ID, APPLICATION_ID); public static final UnboundIntFlag MAX_MERGE_QUEUE_SIZE = defineIntFlag( "max-merge-queue-size", 100, - List.of("balder", "vekterli"), "2021-06-06", "2022-10-01", + List.of("balder", "vekterli"), "2021-06-06", "2022-12-01", "Specifies max size of merge queue.", "Takes effect at redeploy", ZONE_ID, APPLICATION_ID); @@ -256,7 +256,7 @@ public class Flags { public static final UnboundBooleanFlag ENABLED_HORIZON_DASHBOARD = defineFeatureFlag( "enabled-horizon-dashboard", false, - List.of("olaa"), "2021-09-13", "2022-10-01", + List.of("olaa"), "2021-09-13", "2023-01-01", "Enable Horizon dashboard", "Takes effect immediately", TENANT_ID, CONSOLE_USER_EMAIL @@ -293,7 +293,7 @@ public class Flags { public static final UnboundStringFlag MERGE_THROTTLING_POLICY = defineStringFlag( "merge-throttling-policy", "STATIC", - List.of("vekterli"), "2022-01-25", "2022-10-01", + List.of("vekterli"), "2022-01-25", "2022-12-01", "Sets the policy used for merge throttling on the content nodes. " + "Valid values: STATIC, DYNAMIC", "Takes effect at redeployment", @@ -301,7 +301,7 @@ public class Flags { public static final UnboundDoubleFlag PERSISTENCE_THROTTLING_WS_DECREMENT_FACTOR = defineDoubleFlag( "persistence-throttling-ws-decrement-factor", 1.2, - List.of("vekterli"), "2022-01-27", "2022-10-01", + List.of("vekterli"), "2022-01-27", "2022-12-01", "Sets the dynamic throttle policy window size decrement factor for persistence " + "async throttling. Only applies if DYNAMIC policy is used.", "Takes effect on redeployment", @@ -309,7 +309,7 @@ public class Flags { public static final UnboundDoubleFlag PERSISTENCE_THROTTLING_WS_BACKOFF = defineDoubleFlag( "persistence-throttling-ws-backoff", 0.95, - List.of("vekterli"), "2022-01-27", "2022-10-01", + List.of("vekterli"), "2022-01-27", "2022-12-01", "Sets the dynamic throttle policy window size backoff for persistence " + "async throttling. Only applies if DYNAMIC policy is used. Valid range [0, 1]", "Takes effect on redeployment", @@ -414,7 +414,7 @@ public class Flags { public static final UnboundBooleanFlag CLEANUP_TENANT_ROLES = defineFeatureFlag( "cleanup-tenant-roles", false, - List.of("olaa"), "2022-08-10", "2022-10-01", + List.of("olaa"), "2022-08-10", "2023-01-01", "Determines whether old tenant roles should be deleted", "Takes effect next maintenance run" ); diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 76dadc5605e..a7d831aa623 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -224,6 +224,7 @@ vespa_define_module( src/tests/tensor/hnsw_index src/tests/tensor/hnsw_saver src/tests/tensor/tensor_buffer_operations + src/tests/tensor/tensor_buffer_store src/tests/transactionlog src/tests/transactionlogstress src/tests/true diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/OperationNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/OperationNode.java index 0512e1dad2f..1c66686a9fe 100755 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/OperationNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/OperationNode.java @@ -80,7 +80,7 @@ public final class OperationNode extends CompositeNode { @Override public TensorType type(TypeContext<Reference> context) { - // Compute type using tensor types as arithmetic operators are supported on tensors + // Compute type using tensor types as operation operators are supported on tensors // and is correct also in the special case of doubles. // As all our functions are type-commutative, we don't need to take operator precedence into account TensorType type = children.get(0).type(context); diff --git a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp index 34f9f7d27a9..564824031a6 100644 --- a/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/feature_store_test.cpp @@ -90,8 +90,7 @@ TEST_F(FeatureStoreTest, features_can_be_added_and_retrieved) r = fs.addFeatures(0, f); r1 = r.first; EXPECT_TRUE(r.second > 0); - EXPECT_EQ(FeatureStore::RefType::align(1u), - FeatureStore::RefType(r1).offset()); + EXPECT_EQ(1u, FeatureStore::RefType(r1).offset()); EXPECT_EQ(0u, FeatureStore::RefType(r1).bufferId()); LOG(info, "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", @@ -131,8 +130,7 @@ TEST_F(FeatureStoreTest, next_words_are_working) r = fs.addFeatures(0, f); r1 = r.first; EXPECT_TRUE(r.second > 0); - EXPECT_EQ(FeatureStore::RefType::align(1u), - FeatureStore::RefType(r1).offset()); + EXPECT_EQ(1u, FeatureStore::RefType(r1).offset()); EXPECT_EQ(0u, FeatureStore::RefType(r1).bufferId()); LOG(info, "bits(%" PRIu64 "), ref.offset(%zu), ref.bufferId(%u)", diff --git a/searchlib/src/tests/memoryindex/datastore/word_store_test.cpp b/searchlib/src/tests/memoryindex/datastore/word_store_test.cpp index 698780a1dc2..1ca87467fc6 100644 --- a/searchlib/src/tests/memoryindex/datastore/word_store_test.cpp +++ b/searchlib/src/tests/memoryindex/datastore/word_store_test.cpp @@ -18,14 +18,14 @@ TEST(WordStoreTest, words_can_be_added_and_retrieved) EntryRef r1 = ws.addWord(w1); EntryRef r2 = ws.addWord(w2); EntryRef r3 = ws.addWord(w3); - uint32_t invp = WordStore::RefType::align(1); // Reserved as invalid + uint32_t invp = WordStore::buffer_array_size; // Reserved as invalid uint32_t w1s = w1.size() + 1; - uint32_t w1p = WordStore::RefType::pad(w1s); + uint32_t w1p = WordStore::calc_pad(w1s); uint32_t w2s = w2.size() + 1; - uint32_t w2p = WordStore::RefType::pad(w2s); - EXPECT_EQ(invp, WordStore::RefType(r1).offset()); - EXPECT_EQ(invp + w1s + w1p, WordStore::RefType(r2).offset()); - EXPECT_EQ(invp + w1s + w1p + w2s + w2p, WordStore::RefType(r3).offset()); + uint32_t w2p = WordStore::calc_pad(w2s); + EXPECT_EQ(invp, WordStore::RefType(r1).offset() * WordStore::buffer_array_size); + EXPECT_EQ(invp + w1s + w1p, WordStore::RefType(r2).offset() * WordStore::buffer_array_size); + EXPECT_EQ(invp + w1s + w1p + w2s + w2p, WordStore::RefType(r3).offset() * WordStore::buffer_array_size); EXPECT_EQ(0u, WordStore::RefType(r1).bufferId()); EXPECT_EQ(0u, WordStore::RefType(r2).bufferId()); EXPECT_EQ(0u, WordStore::RefType(r3).bufferId()); diff --git a/searchlib/src/tests/tensor/tensor_buffer_store/CMakeLists.txt b/searchlib/src/tests/tensor/tensor_buffer_store/CMakeLists.txt new file mode 100644 index 00000000000..749d38a1383 --- /dev/null +++ b/searchlib/src/tests/tensor/tensor_buffer_store/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensor_buffer_store_test_app TEST + SOURCES + tensor_buffer_store_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_tensor_buffer_store_test_app COMMAND searchlib_tensor_buffer_store_test_app) diff --git a/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp b/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp new file mode 100644 index 00000000000..101b84e01aa --- /dev/null +++ b/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp @@ -0,0 +1,164 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/tensor/tensor_buffer_store.h> +#include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/eval/value.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::tensor::TensorBufferStore; +using vespalib::datastore::EntryRef; +using vespalib::eval::SimpleValue; +using vespalib::eval::TensorSpec; +using vespalib::eval::Value; +using vespalib::eval::ValueType; + +const vespalib::string tensor_type_spec("tensor(x{})"); + +class TensorBufferStoreTest : public testing::Test +{ +protected: + ValueType _tensor_type; + TensorBufferStore _store; + TensorBufferStoreTest(); + ~TensorBufferStoreTest() override; + EntryRef store_tensor(const Value& tensor); + EntryRef store_tensor(const TensorSpec& spec); + std::unique_ptr<Value> load_tensor(EntryRef ref); + TensorSpec load_tensor_spec(EntryRef ref); + vespalib::nbostream encode_stored_tensor(EntryRef ref); + void assert_store_load(const TensorSpec& tensor_spec); + void assert_store_load_many(const TensorSpec& tensor_spec); + void assert_store_move_load(const TensorSpec& tensor_spec); + void assert_store_encode_store_encoded_load(const TensorSpec& tensor_spec); +}; + +TensorBufferStoreTest::TensorBufferStoreTest() + : testing::Test(), + _tensor_type(ValueType::from_spec(tensor_type_spec)), + _store(_tensor_type, {}, 4) +{ +} + +TensorBufferStoreTest::~TensorBufferStoreTest() = default; + +EntryRef +TensorBufferStoreTest::store_tensor(const Value& tensor) +{ + EXPECT_EQ(_tensor_type, tensor.type()); + return _store.store_tensor(tensor); +} + +EntryRef +TensorBufferStoreTest::store_tensor(const TensorSpec& spec) +{ + auto tensor = SimpleValue::from_spec(spec); + return store_tensor(*tensor); +} + +std::unique_ptr<Value> +TensorBufferStoreTest::load_tensor(EntryRef ref) +{ + return _store.get_tensor(ref); +} + +vespalib::nbostream +TensorBufferStoreTest::encode_stored_tensor(EntryRef ref) +{ + vespalib::nbostream out; + _store.encode_stored_tensor(ref, out); + return out; +} + +TensorSpec +TensorBufferStoreTest::load_tensor_spec(EntryRef ref) +{ + auto loaded = load_tensor(ref); + return TensorSpec::from_value(*loaded); +} + +void +TensorBufferStoreTest::assert_store_load(const TensorSpec& tensor_spec) +{ + auto ref = store_tensor(tensor_spec); + auto loaded_spec = load_tensor_spec(ref); + _store.holdTensor(ref); + EXPECT_EQ(tensor_spec, loaded_spec); +} + +void +TensorBufferStoreTest::assert_store_load_many(const TensorSpec& tensor_spec) +{ + constexpr uint32_t cnt = 2000; + std::vector<EntryRef> refs; + for (uint32_t i = 0; i < cnt; ++i) { + refs.emplace_back(store_tensor(tensor_spec)); + } + for (auto ref : refs) { + auto loaded_spec = load_tensor_spec(ref); + _store.holdTensor(ref); + EXPECT_EQ(tensor_spec, loaded_spec); + } +} + +void +TensorBufferStoreTest::assert_store_move_load(const TensorSpec& tensor_spec) +{ + auto ref = store_tensor(tensor_spec); + auto ref2 = _store.move(ref); + EXPECT_NE(ref, ref2); + auto loaded_spec = load_tensor_spec(ref2); + _store.holdTensor(ref2); + EXPECT_EQ(tensor_spec, loaded_spec); +} + +void +TensorBufferStoreTest::assert_store_encode_store_encoded_load(const TensorSpec& tensor_spec) +{ + auto ref = store_tensor(tensor_spec); + auto encoded = encode_stored_tensor(ref); + _store.holdTensor(ref); + auto ref2 = _store.store_encoded_tensor(encoded); + EXPECT_NE(ref, ref2); + auto loaded_spec = load_tensor_spec(ref2); + _store.holdTensor(ref2); + EXPECT_EQ(tensor_spec, loaded_spec); +} + +std::vector<TensorSpec> tensor_specs = { + TensorSpec(tensor_type_spec), + TensorSpec(tensor_type_spec).add({{"x", "a"}}, 4.5), + TensorSpec(tensor_type_spec).add({{"x", "a"}}, 4.5).add({{"x", "b"}}, 5.5), + TensorSpec(tensor_type_spec).add({{"x", "a"}}, 4.5).add({{"x", "b"}}, 5.5).add({{"x", "c"}}, 6.5), + TensorSpec(tensor_type_spec).add({{"x", "a"}}, 4.5).add({{"x", "b"}}, 5.5).add({{"x", "c"}}, 6.5).add({{"x", "d"}}, 7.5) +}; + +TEST_F(TensorBufferStoreTest, tensor_can_be_stored_and_loaded) +{ + for (auto& tensor_spec : tensor_specs) { + assert_store_load(tensor_spec); + } +} + +TEST_F(TensorBufferStoreTest, tensor_can_be_stored_and_loaded_many_times) +{ + for (auto& tensor_spec : tensor_specs) { + assert_store_load_many(tensor_spec); + } +} + +TEST_F(TensorBufferStoreTest, stored_tensor_can_be_copied) +{ + for (auto& tensor_spec : tensor_specs) { + assert_store_move_load(tensor_spec); + } +} + +TEST_F(TensorBufferStoreTest, stored_tensor_can_be_encoded_and_stored_as_encoded_and_loaded) +{ + for (auto& tensor_spec : tensor_specs) { + assert_store_encode_store_encoded_load(tensor_spec); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp index b37300375a8..b5810d06047 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.cpp @@ -9,6 +9,7 @@ namespace search::memoryindex { constexpr size_t MIN_BUFFER_ARRAYS = 1024u; using index::SchemaUtil; +using vespalib::datastore::EntryRef; uint64_t FeatureStore::writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &features) @@ -26,10 +27,10 @@ FeatureStore::writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &featur return oldOffset; } -vespalib::datastore::EntryRef +EntryRef FeatureStore::addFeatures(const uint8_t *src, uint64_t byteLen) { - uint32_t pad = RefType::pad(byteLen); + uint32_t pad = calc_pad(byteLen); auto result = _store.rawAllocator<uint8_t>(_typeId).alloc(byteLen + pad, DECODE_SAFETY); uint8_t *dst = result.data; memcpy(dst, src, byteLen); @@ -42,7 +43,7 @@ FeatureStore::addFeatures(const uint8_t *src, uint64_t byteLen) return result.ref; } -std::pair<vespalib::datastore::EntryRef, uint64_t> +std::pair<EntryRef, uint64_t> FeatureStore::addFeatures(uint64_t beginOffset, uint64_t endOffset) { uint64_t bitLen = (endOffset - beginOffset); @@ -52,18 +53,18 @@ FeatureStore::addFeatures(uint64_t beginOffset, uint64_t endOffset) assert(wordLen > 0); assert(byteLen > 0); const uint8_t *src = reinterpret_cast<const uint8_t *>(_f._valI - wordLen); - RefType ref = addFeatures(src, byteLen); + EntryRef ref = addFeatures(src, byteLen); return std::make_pair(ref, bitLen); } -vespalib::datastore::EntryRef -FeatureStore::moveFeatures(vespalib::datastore::EntryRef ref, uint64_t bitLen) +EntryRef +FeatureStore::moveFeatures(EntryRef ref, uint64_t bitLen) { const uint8_t *src = getBits(ref); uint64_t byteLen = (bitLen + 7) / 8; - RefType newRef = addFeatures(src, byteLen); + EntryRef newRef = addFeatures(src, byteLen); // Mark old features as dead - _store.incDead(ref, byteLen + RefType::pad(byteLen)); + _store.incDead(ref, byteLen + calc_pad(byteLen)); return newRef; } @@ -74,8 +75,7 @@ FeatureStore::FeatureStore(const Schema &schema) _d(nullptr), _fieldsParams(), _schema(schema), - _type(RefType::align(1u), MIN_BUFFER_ARRAYS, - RefType::offsetSize() / RefType::align(1u)), + _type(buffer_array_size, MIN_BUFFER_ARRAYS, RefType::offsetSize()), _typeId(0) { _f.setWriteContext(&_fctx); @@ -96,7 +96,7 @@ FeatureStore::~FeatureStore() _store.dropBuffers(); } -std::pair<vespalib::datastore::EntryRef, uint64_t> +std::pair<EntryRef, uint64_t> FeatureStore::addFeatures(uint32_t packedIndex, const DocIdAndFeatures &features) { uint64_t oldOffset = writeFeatures(packedIndex, features); @@ -109,14 +109,14 @@ void FeatureStore::add_features_guard_bytes() { uint32_t len = DECODE_SAFETY; - uint32_t pad = RefType::pad(len); - auto result = _store.rawAllocator<int8_t>(_typeId).alloc(len + pad); + uint32_t pad = calc_pad(len); + auto result = _store.rawAllocator<uint8_t>(_typeId).alloc(len + pad); memset(result.data, 0, len + pad); _store.incDead(result.ref, len + pad); } void -FeatureStore::getFeatures(uint32_t packedIndex, vespalib::datastore::EntryRef ref, DocIdAndFeatures &features) +FeatureStore::getFeatures(uint32_t packedIndex, EntryRef ref, DocIdAndFeatures &features) { setupForField(packedIndex, _d); setupForReadFeatures(ref, _d); @@ -124,7 +124,7 @@ FeatureStore::getFeatures(uint32_t packedIndex, vespalib::datastore::EntryRef re } size_t -FeatureStore::bitSize(uint32_t packedIndex, vespalib::datastore::EntryRef ref) +FeatureStore::bitSize(uint32_t packedIndex, EntryRef ref) { setupForField(packedIndex, _d); setupForUnpackFeatures(ref, _d); @@ -136,8 +136,8 @@ FeatureStore::bitSize(uint32_t packedIndex, vespalib::datastore::EntryRef ref) return bitLen; } -vespalib::datastore::EntryRef -FeatureStore::moveFeatures(uint32_t packedIndex, vespalib::datastore::EntryRef ref) +EntryRef +FeatureStore::moveFeatures(uint32_t packedIndex, EntryRef ref) { uint64_t bitLen = bitSize(packedIndex, ref); return moveFeatures(ref, bitLen); diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h index a96ae9a8f2d..b1d975d0926 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h @@ -14,11 +14,14 @@ namespace search::memoryindex { */ class FeatureStore { public: - using DataStoreType = vespalib::datastore::DataStoreT<vespalib::datastore::AlignedEntryRefT<22, 2>>; + using DataStoreType = vespalib::datastore::DataStoreT<vespalib::datastore::EntryRefT<22>>; using RefType = DataStoreType::RefType; using EncodeContext = bitcompression::EG2PosOccEncodeContext<true>; using DecodeContextCooked = bitcompression::EG2PosOccDecodeContextCooked<true>; using generation_t = vespalib::GenerationHandler::generation_t; + static constexpr uint32_t buffer_array_size = 4u; // Must be a power of 2 + static constexpr uint32_t pad_constant = buffer_array_size - 1u; + static uint32_t calc_pad(uint32_t val) { return (-val & pad_constant); } private: using Schema = index::Schema; @@ -154,7 +157,7 @@ public: uint32_t bufferId = RefType(ref).bufferId(); const vespalib::datastore::BufferState &state = _store.getBufferState(bufferId); decoder.setEnd( - ((_store.getEntry<uint8_t>(RefType(0, bufferId)) + state.size() - + ((_store.getEntryArray<uint8_t>(RefType(0, bufferId), buffer_array_size) + state.size() - bits) + 7) / 8, false); } @@ -188,7 +191,7 @@ public: */ const uint8_t *getBits(vespalib::datastore::EntryRef ref) const { RefType iRef(ref); - return _store.getEntry<uint8_t>(iRef); + return _store.getEntryArray<uint8_t>(iRef, buffer_array_size); } /** diff --git a/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp index e5ec4ab7808..441587eb718 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/word_store.cpp @@ -10,16 +10,13 @@ constexpr size_t MIN_BUFFER_ARRAYS = 1024; WordStore::WordStore() : _store(), _numWords(0), - _type(RefType::align(1), - MIN_BUFFER_ARRAYS, - RefType::offsetSize() / RefType::align(1)), + _type(buffer_array_size, MIN_BUFFER_ARRAYS, RefType::offsetSize()), _typeId(0) { _store.addType(&_type); _store.init_primary_buffers(); } - WordStore::~WordStore() { _store.dropBuffers(); @@ -29,7 +26,7 @@ vespalib::datastore::EntryRef WordStore::addWord(const vespalib::stringref word) { size_t wordSize = word.size() + 1; - size_t bufferSize = RefType::align(wordSize); + size_t bufferSize = wordSize + calc_pad(wordSize); auto result = _store.rawAllocator<char>(_typeId).alloc(bufferSize); char *be = result.data; for (size_t i = 0; i < word.size(); ++i) { diff --git a/searchlib/src/vespa/searchlib/memoryindex/word_store.h b/searchlib/src/vespa/searchlib/memoryindex/word_store.h index b27ae65d776..913f6bc3ea5 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/word_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/word_store.h @@ -9,8 +9,11 @@ namespace search::memoryindex { class WordStore { public: - using DataStoreType = vespalib::datastore::DataStoreT<vespalib::datastore::AlignedEntryRefT<22, 2>>; + using DataStoreType = vespalib::datastore::DataStoreT<vespalib::datastore::EntryRefT<22>>; using RefType = DataStoreType::RefType; + static constexpr uint32_t buffer_array_size = 4u; // Must be a power of 2 + static constexpr uint32_t pad_constant = buffer_array_size - 1u; + static uint32_t calc_pad(uint32_t val) { return (-val & pad_constant); } private: DataStoreType _store; @@ -24,7 +27,7 @@ public: vespalib::datastore::EntryRef addWord(const vespalib::stringref word); const char *getWord(vespalib::datastore::EntryRef ref) const { RefType internalRef(ref); - return _store.getEntry<char>(internalRef); + return _store.getEntryArray<char>(internalRef, buffer_array_size); } vespalib::MemoryUsage getMemoryUsage() const { diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 7815ef7e770..46bfc0909aa 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -24,13 +24,17 @@ vespa_add_library(searchlib_tensor OBJECT imported_tensor_attribute_vector_read_guard.cpp inner_product_distance.cpp inv_log_level_generator.cpp + large_subspaces_buffer_type.cpp nearest_neighbor_index.cpp nearest_neighbor_index_saver.cpp serialized_fast_value_attribute.cpp + small_subspaces_buffer_type.cpp streamed_value_saver.cpp streamed_value_store.cpp tensor_attribute.cpp tensor_buffer_operations.cpp + tensor_buffer_store.cpp + tensor_buffer_type_mapper.cpp tensor_deserialize.cpp tensor_store.cpp reusable_set_visited_tracker.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp new file mode 100644 index 00000000000..cdd4d35c1df --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.cpp @@ -0,0 +1,86 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "large_subspaces_buffer_type.h" +#include "tensor_buffer_operations.h" +#include "tensor_buffer_type_mapper.h" +#include <vespa/vespalib/datastore/buffer_type.hpp> +#include <vespa/vespalib/util/array.hpp> +#include <vespa/vespalib/util/arrayref.h> + +using vespalib::alloc::MemoryAllocator; + +namespace search::tensor { + +LargeSubspacesBufferType::LargeSubspacesBufferType(const AllocSpec& spec, std::shared_ptr<MemoryAllocator> memory_allocator, TensorBufferTypeMapper& type_mapper) noexcept + : ParentType(1u, spec.minArraysInBuffer, spec.maxArraysInBuffer, spec.numArraysForNewBuffer, spec.allocGrowFactor), + _memory_allocator(std::move(memory_allocator)), + _ops(type_mapper.get_tensor_buffer_operations()) +{ +} + +LargeSubspacesBufferType::~LargeSubspacesBufferType() = default; + +void +LargeSubspacesBufferType::cleanHold(void* buffer, size_t offset, ElemCount numElems, CleanContext cleanCtx) +{ + auto elem = static_cast<ArrayType*>(buffer) + offset; + for (size_t i = 0; i < numElems; ++i) { + if (!elem->empty()) { + cleanCtx.extraBytesCleaned(elem->size()); + _ops.reclaim_labels({elem->data(), elem->size()}); + ArrayType().swap(*elem); + } + ++elem; + } +} + +void +LargeSubspacesBufferType::destroyElements(void *buffer, ElemCount numElems) +{ + auto elem = static_cast<ArrayType*>(buffer); + for (size_t i = 0; i < numElems; ++i) { + if (!elem->empty()) { + _ops.reclaim_labels({elem->data(), elem->size()}); + ArrayType().swap(*elem); + } + ++elem; + } +} + +void +LargeSubspacesBufferType::fallbackCopy(void *newBuffer, const void *oldBuffer, ElemCount numElems) +{ + auto old_elems = static_cast<const ArrayType*>(oldBuffer); + auto new_elems = static_cast<ArrayType*>(newBuffer); + for (size_t i = 0; i < numElems; ++i) { + auto& old_elem = old_elems[i]; + new (new_elems + i) ArrayType(old_elem); + if (!old_elem.empty()) { + _ops.copied_labels({old_elem.data(), old_elem.size()}); + } + } +} + +void +LargeSubspacesBufferType::initializeReservedElements(void *buffer, ElemCount reservedElements) +{ + auto new_elems = static_cast<ArrayType*>(buffer); + const auto& empty = empty_entry(); + for (size_t i = 0; i < reservedElements; ++i) { + new (new_elems + i) ArrayType(empty); + } +} + +const vespalib::alloc::MemoryAllocator* +LargeSubspacesBufferType::get_memory_allocator() const +{ + return _memory_allocator.get(); +} + +} + +namespace vespalib::datastore { + +template class BufferType<Array<char>>; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.h b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.h new file mode 100644 index 00000000000..cfab8ef20af --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/large_subspaces_buffer_type.h @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/array_store_config.h> +#include <vespa/vespalib/datastore/buffer_type.h> +#include <vespa/vespalib/util/array.h> +#include <memory> + +namespace vespalib::alloc { class MemoryAllocator; } + +namespace search::tensor { + +class TensorBufferOperations; +class TensorBufferTypeMapper; + +/* + * Class representing buffer type for tensors with a large number of + * subspaces in array store. Tensor buffers are externally allocated + * (cf. vespalib::Array). + */ +class LargeSubspacesBufferType : public vespalib::datastore::BufferType<vespalib::Array<char>> +{ + using AllocSpec = vespalib::datastore::ArrayStoreConfig::AllocSpec; + using ArrayType = vespalib::Array<char>; + using ParentType = vespalib::datastore::BufferType<ArrayType>; + using CleanContext = typename ParentType::CleanContext; + std::shared_ptr<vespalib::alloc::MemoryAllocator> _memory_allocator; + TensorBufferOperations& _ops; +public: + LargeSubspacesBufferType(const AllocSpec& spec, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator, TensorBufferTypeMapper& type_mapper) noexcept; + ~LargeSubspacesBufferType() override; + void cleanHold(void* buffer, size_t offset, ElemCount numElems, CleanContext cleanCtx) override; + void destroyElements(void *buffer, ElemCount numElems) override; + void fallbackCopy(void *newBuffer, const void *oldBuffer, ElemCount numElems) override; + void initializeReservedElements(void *buffer, ElemCount reservedElements) override; + const vespalib::alloc::MemoryAllocator* get_memory_allocator() const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp index 2233eb77e89..3e9f41c812c 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp @@ -48,13 +48,7 @@ SerializedFastValueAttribute::getTensor(DocId docId) const if (docId < getCommittedDocIdLimit()) { ref = acquire_entry_ref(docId); } - if (!ref.valid()) { - return {}; - } - if (const auto * ptr = _streamedValueStore.get_tensor_entry(ref)) { - return ptr->create_fast_value_view(_tensor_type); - } - return {}; + return _streamedValueStore.get_tensor(ref); } bool diff --git a/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp new file mode 100644 index 00000000000..adbd3dee2b7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.cpp @@ -0,0 +1,67 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "small_subspaces_buffer_type.h" +#include "tensor_buffer_operations.h" +#include "tensor_buffer_type_mapper.h" +#include <vespa/vespalib/util/arrayref.h> + +using vespalib::alloc::MemoryAllocator; + +namespace search::tensor { + +SmallSubspacesBufferType::SmallSubspacesBufferType(uint32_t array_size, const AllocSpec& spec, std::shared_ptr<MemoryAllocator> memory_allocator, TensorBufferTypeMapper& type_mapper) noexcept + : ParentType(array_size, spec.minArraysInBuffer, spec.maxArraysInBuffer, spec.numArraysForNewBuffer, spec.allocGrowFactor), + _memory_allocator(std::move(memory_allocator)), + _ops(type_mapper.get_tensor_buffer_operations()) +{ +} + +SmallSubspacesBufferType::~SmallSubspacesBufferType() = default; + +void +SmallSubspacesBufferType::cleanHold(void* buffer, size_t offset, ElemCount numElems, CleanContext) +{ + char* elem = static_cast<char *>(buffer) + offset; + while (numElems >= getArraySize()) { + _ops.reclaim_labels(vespalib::ArrayRef<char>(elem, getArraySize())); + elem += getArraySize(); + numElems -= getArraySize(); + } +} + +void +SmallSubspacesBufferType::destroyElements(void *buffer, ElemCount numElems) +{ + char* elem = static_cast<char *>(buffer); + while (numElems >= getArraySize()) { + _ops.reclaim_labels(vespalib::ArrayRef<char>(elem, getArraySize())); + elem += getArraySize(); + numElems -= getArraySize(); + } +} + +void +SmallSubspacesBufferType::fallbackCopy(void *newBuffer, const void *oldBuffer, ElemCount numElems) +{ + memcpy(newBuffer, oldBuffer, numElems); + const char *elem = static_cast<const char *>(oldBuffer); + while (numElems >= getArraySize()) { + _ops.copied_labels(vespalib::ConstArrayRef<char>(elem, getArraySize())); + elem += getArraySize(); + numElems -= getArraySize(); + } +} + +void +SmallSubspacesBufferType::initializeReservedElements(void *buffer, ElemCount reservedElements) +{ + memset(buffer, 0, reservedElements); +} + +const vespalib::alloc::MemoryAllocator* +SmallSubspacesBufferType::get_memory_allocator() const +{ + return _memory_allocator.get(); +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.h b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.h new file mode 100644 index 00000000000..a778183c5a2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/small_subspaces_buffer_type.h @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/array_store_config.h> +#include <vespa/vespalib/datastore/buffer_type.h> +#include <memory> + +namespace vespalib::alloc { class MemoryAllocator; } + +namespace search::tensor { + +class TensorBufferOperations; +class TensorBufferTypeMapper; + +/* + * Class representing buffer type for tensors with a small number of + * subspaces in array store. Tensor buffers are internal in data store buffer. + */ +class SmallSubspacesBufferType : public vespalib::datastore::BufferType<char> +{ + using AllocSpec = vespalib::datastore::ArrayStoreConfig::AllocSpec; + using ParentType = vespalib::datastore::BufferType<char>; + std::shared_ptr<vespalib::alloc::MemoryAllocator> _memory_allocator; + TensorBufferOperations& _ops; +public: + SmallSubspacesBufferType(const SmallSubspacesBufferType&) = delete; + SmallSubspacesBufferType& operator=(const SmallSubspacesBufferType&) = delete; + SmallSubspacesBufferType(SmallSubspacesBufferType&&) noexcept = default; + SmallSubspacesBufferType& operator=(SmallSubspacesBufferType&&) noexcept = default; + SmallSubspacesBufferType(uint32_t array_size, const AllocSpec& spec, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator, TensorBufferTypeMapper& type_mapper) noexcept; + ~SmallSubspacesBufferType() override; + void cleanHold(void* buffer, size_t offset, ElemCount numElems, CleanContext cleanCtx) override; + void destroyElements(void *buffer, ElemCount numElems) override; + void fallbackCopy(void *newBuffer, const void *oldBuffer, ElemCount numElems) override; + void initializeReservedElements(void *buffer, ElemCount reservedElements) override; + const vespalib::alloc::MemoryAllocator* get_memory_allocator() const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_saver.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_saver.cpp index f7b93654c33..25d3901d761 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_saver.cpp +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_saver.cpp @@ -31,7 +31,7 @@ StreamedValueSaver::onSave(IAttributeSaveTarget &saveTarget) const uint32_t docIdLimit(_refs.size()); vespalib::nbostream stream; for (uint32_t lid = 0; lid < docIdLimit; ++lid) { - if (_tensorStore.encode_tensor(_refs[lid], stream)) { + if (_tensorStore.encode_stored_tensor(_refs[lid], stream)) { uint32_t sz = stream.size(); datWriter->write(&sz, sizeof(sz)); datWriter->write(stream.peek(), stream.size()); diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp index 58e625e6aca..763486f82e2 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp @@ -204,6 +204,15 @@ StreamedValueStore::get_tensor_entry(EntryRef ref) const return entry.get(); } +std::unique_ptr<vespalib::eval::Value> +StreamedValueStore::get_tensor(EntryRef ref) const +{ + if (const auto * ptr = get_tensor_entry(ref)) { + return ptr->create_fast_value_view(_tensor_type); + } + return {}; +} + void StreamedValueStore::holdTensor(EntryRef ref) { @@ -229,7 +238,7 @@ StreamedValueStore::move(EntryRef ref) } bool -StreamedValueStore::encode_tensor(EntryRef ref, vespalib::nbostream &target) const +StreamedValueStore::encode_stored_tensor(EntryRef ref, vespalib::nbostream &target) const { if (const auto * entry = get_tensor_entry(ref)) { entry->encode_value(_tensor_type, target); diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h index 29201dc0e61..9c5c5a91d18 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h @@ -60,6 +60,7 @@ private: TensorStoreType _concrete_store; const vespalib::eval::ValueType _tensor_type; EntryRef add_entry(TensorEntry::SP tensor); + const TensorEntry* get_tensor_entry(EntryRef ref) const; public: StreamedValueStore(const vespalib::eval::ValueType &tensor_type); ~StreamedValueStore() override; @@ -69,8 +70,8 @@ public: void holdTensor(EntryRef ref) override; EntryRef move(EntryRef ref) override; - const TensorEntry * get_tensor_entry(EntryRef ref) const; - bool encode_tensor(EntryRef ref, vespalib::nbostream &target) const; + std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const; + bool encode_stored_tensor(EntryRef ref, vespalib::nbostream &target) const; EntryRef store_tensor(const vespalib::eval::Value &tensor); EntryRef store_encoded_tensor(vespalib::nbostream &encoded); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp new file mode 100644 index 00000000000..34454e9f780 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.cpp @@ -0,0 +1,97 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_buffer_store.h" +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/streamed/streamed_value_builder_factory.h> +#include <vespa/vespalib/datastore/array_store.hpp> +#include <vespa/vespalib/datastore/buffer_type.hpp> +#include <vespa/vespalib/datastore/datastore.hpp> +#include <vespa/vespalib/util/size_literals.h> + +using vespalib::alloc::MemoryAllocator; +using vespalib::datastore::EntryRef; +using vespalib::eval::StreamedValueBuilderFactory; +using vespalib::eval::Value; +using vespalib::eval::ValueType; + +namespace search::tensor { + +namespace { + +constexpr float ALLOC_GROW_FACTOR = 0.2; + +} + +TensorBufferStore::TensorBufferStore(const ValueType& tensor_type, std::shared_ptr<MemoryAllocator> allocator, uint32_t max_small_subspaces_type_id) + : TensorStore(ArrayStoreType::get_data_store_base(_array_store)), + _tensor_type(tensor_type), + _ops(_tensor_type), + _array_store(ArrayStoreType::optimizedConfigForHugePage(max_small_subspaces_type_id, + TensorBufferTypeMapper(max_small_subspaces_type_id, &_ops), + MemoryAllocator::HUGEPAGE_SIZE, 4_Ki, 8_Ki, ALLOC_GROW_FACTOR), + std::move(allocator), TensorBufferTypeMapper(max_small_subspaces_type_id, &_ops)) +{ +} + +TensorBufferStore::~TensorBufferStore() = default; + +void +TensorBufferStore::holdTensor(EntryRef ref) +{ + _array_store.remove(ref); +} + +EntryRef +TensorBufferStore::move(EntryRef ref) +{ + if (!ref.valid()) { + return EntryRef(); + } + auto buf = _array_store.get(ref); + auto new_ref = _array_store.add(buf); + _ops.copied_labels(buf); + _array_store.remove(ref); + return new_ref; +} + +EntryRef +TensorBufferStore::store_tensor(const Value &tensor) +{ + uint32_t num_subspaces = tensor.index().size(); + auto array_size = _ops.get_array_size(num_subspaces); + auto ref = _array_store.allocate(array_size); + auto buf = _array_store.get_writable(ref); + _ops.store_tensor(buf, tensor); + return ref; +} + +EntryRef +TensorBufferStore::store_encoded_tensor(vespalib::nbostream &encoded) +{ + const auto &factory = StreamedValueBuilderFactory::get(); + auto val = vespalib::eval::decode_value(encoded, factory); + return store_tensor(*val); +} + +std::unique_ptr<Value> +TensorBufferStore::get_tensor(EntryRef ref) const +{ + if (!ref.valid()) { + return {}; + } + auto buf = _array_store.get(ref); + return _ops.make_fast_view(buf, _tensor_type); +} + +bool +TensorBufferStore::encode_stored_tensor(EntryRef ref, vespalib::nbostream &target) const +{ + if (!ref.valid()) { + return false; + } + auto buf = _array_store.get(ref); + _ops.encode_stored_tensor(buf, _tensor_type, target); + return true; +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h new file mode 100644 index 00000000000..18b98efa8fa --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h @@ -0,0 +1,37 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor_store.h" +#include "tensor_buffer_operations.h" +#include "tensor_buffer_type_mapper.h" +#include "large_subspaces_buffer_type.h" +#include "small_subspaces_buffer_type.h" +#include <vespa/eval/eval/value_type.h> +#include <vespa/vespalib/datastore/array_store.h> + +namespace search::tensor { + +/** + * Class for storing tensor buffers in memory and making tensor views + * based on stored tensor buffer. + */ +class TensorBufferStore : public TensorStore +{ + using RefType = vespalib::datastore::EntryRefT<19>; + using ArrayStoreType = vespalib::datastore::ArrayStore<char, RefType, TensorBufferTypeMapper>; + vespalib::eval::ValueType _tensor_type; + TensorBufferOperations _ops; + ArrayStoreType _array_store; +public: + TensorBufferStore(const vespalib::eval::ValueType& tensor_type, std::shared_ptr<vespalib::alloc::MemoryAllocator> allocator, uint32_t max_small_subspaces_type_id); + ~TensorBufferStore(); + void holdTensor(EntryRef ref) override; + EntryRef move(EntryRef ref) override; + EntryRef store_tensor(const vespalib::eval::Value &tensor); + EntryRef store_encoded_tensor(vespalib::nbostream &encoded); + std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const; + bool encode_stored_tensor(EntryRef ref, vespalib::nbostream &target) const; +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp new file mode 100644 index 00000000000..b4b0b9bbc79 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.cpp @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "tensor_buffer_type_mapper.h" +#include "tensor_buffer_operations.h" +#include <algorithm> + +namespace search::tensor { + +TensorBufferTypeMapper::TensorBufferTypeMapper() + : _array_sizes(), + _ops(nullptr) +{ +} + +TensorBufferTypeMapper::TensorBufferTypeMapper(uint32_t max_small_subspaces_type_id, TensorBufferOperations* ops) + : _array_sizes(), + _ops(ops) +{ + _array_sizes.reserve(max_small_subspaces_type_id + 1); + _array_sizes.emplace_back(0); // type id 0 uses LargeSubspacesBufferType + for (uint32_t type_id = 1; type_id <= max_small_subspaces_type_id; ++type_id) { + auto num_subspaces = type_id - 1; + _array_sizes.emplace_back(_ops->get_array_size(num_subspaces)); + } +} + +TensorBufferTypeMapper::~TensorBufferTypeMapper() = default; + +uint32_t +TensorBufferTypeMapper::get_type_id(size_t array_size) const +{ + assert(!_array_sizes.empty()); + auto result = std::lower_bound(_array_sizes.begin() + 1, _array_sizes.end(), array_size); + if (result == _array_sizes.end()) { + return 0; // type id 0 uses LargeSubspacesBufferType + } + return result - _array_sizes.begin(); +} + +size_t +TensorBufferTypeMapper::get_array_size(uint32_t type_id) const +{ + assert(type_id > 0 && type_id < _array_sizes.size()); + return _array_sizes[type_id]; +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h new file mode 100644 index 00000000000..1e02c1cb608 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_type_mapper.h @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> +#include <vector> + +namespace search::tensor { + +class LargeSubspacesBufferType; +class SmallSubspacesBufferType; +class TensorBufferOperations; + +/* + * This class provides mapping between type ids and array sizes needed for + * storing a tensor. + */ +class TensorBufferTypeMapper +{ + std::vector<size_t> _array_sizes; + TensorBufferOperations* _ops; +public: + using SmallBufferType = SmallSubspacesBufferType; + using LargeBufferType = LargeSubspacesBufferType; + + TensorBufferTypeMapper(); + TensorBufferTypeMapper(uint32_t max_small_subspaces_type_id, TensorBufferOperations* ops); + ~TensorBufferTypeMapper(); + + uint32_t get_type_id(size_t array_size) const; + size_t get_array_size(uint32_t type_id) const; + TensorBufferOperations& get_tensor_buffer_operations() const noexcept { return *_ops; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp index 433f543ab92..11b6a1e3020 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp @@ -199,14 +199,14 @@ FakeMemTreeOccMgr::sync() void FakeMemTreeOccMgr::add(uint32_t wordIdx, index::DocIdAndFeatures &features) { - typedef FeatureStore::RefType RefType; - const FakeWord *fw = _fakeWords[wordIdx]; std::pair<EntryRef, uint64_t> r = _featureStore.addFeatures(fw->getPackedIndex(), features); + size_t feature_size = (r.second + 7) / 8; + feature_size += FeatureStore::calc_pad(feature_size); - _featureSizes[wordIdx] += RefType::align((r.second + 7) / 8) * 8; + _featureSizes[wordIdx] += feature_size * 8; _unflushed.push_back(PendingOp(wordIdx, features.doc_id(), r.first)); @@ -240,7 +240,6 @@ FakeMemTreeOccMgr::sortUnflushed() void FakeMemTreeOccMgr::flush() { - typedef FeatureStore::RefType RefType; typedef std::vector<PendingOp>::iterator I; if (_unflushed.empty()) @@ -264,7 +263,9 @@ FakeMemTreeOccMgr::flush() if (i->getRemove()) { if (itr.valid() && itr.getKey() == docId) { uint64_t bits = _featureStore.bitSize(fw->getPackedIndex(), EntryRef(itr.getData().get_features_relaxed())); - _featureSizes[wordIdx] -= RefType::align((bits + 7) / 8) * 8; + size_t feature_size = (bits + 7) / 8; + feature_size += FeatureStore::calc_pad(feature_size); + _featureSizes[wordIdx] -= feature_size * 8; tree.remove(itr); } } else { diff --git a/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/aws/AwsCredentials.java b/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/aws/AwsCredentials.java index c9a5dbbcbfc..e3bf7bb02d3 100644 --- a/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/aws/AwsCredentials.java +++ b/vespa-athenz/src/main/java/com/yahoo/vespa/athenz/aws/AwsCredentials.java @@ -69,7 +69,7 @@ public class AwsCredentials implements AutoCloseable { } /* - * Checks credential expiration, returns true if it will expipre in the next MIN_EXPIRY minutes + * Checks credential expiration, returns true if it will expire in the next MIN_EXPIRY minutes */ static boolean shouldRefresh(AwsTemporaryCredentials credentials) { Instant expiration = Optional.ofNullable(credentials).map(AwsTemporaryCredentials::expiration).orElse(Instant.EPOCH); diff --git a/vespalib/src/tests/datastore/array_store/array_store_test.cpp b/vespalib/src/tests/datastore/array_store/array_store_test.cpp index 74bbf59625b..2ff2897461b 100644 --- a/vespalib/src/tests/datastore/array_store/array_store_test.cpp +++ b/vespalib/src/tests/datastore/array_store/array_store_test.cpp @@ -29,8 +29,8 @@ constexpr float ALLOC_GROW_FACTOR = 0.2; } -template <typename EntryT, typename RefT = EntryRefT<19> > -struct ArrayStoreTest : public testing::Test +template <typename TestT, typename EntryT, typename RefT = EntryRefT<19> > +struct ArrayStoreTest : public TestT { using EntryRefType = RefT; using ArrayStoreType = ArrayStore<EntryT, RefT>; @@ -44,25 +44,39 @@ struct ArrayStoreTest : public testing::Test ArrayStoreType store; ReferenceStore refStore; generation_t generation; - ArrayStoreTest(uint32_t maxSmallArraySize = 3, bool enable_free_lists = true) + bool add_using_allocate; + ArrayStoreTest(uint32_t maxSmallArraySize = 3, bool enable_free_lists = true, bool add_using_allocate_in = false) : store(ArrayStoreConfig(maxSmallArraySize, ArrayStoreConfig::AllocSpec(16, RefT::offsetSize(), 8_Ki, ALLOC_GROW_FACTOR)).enable_free_lists(enable_free_lists), std::make_unique<MemoryAllocatorObserver>(stats)), refStore(), - generation(1) + generation(1), + add_using_allocate(add_using_allocate_in) {} ArrayStoreTest(const ArrayStoreConfig &storeCfg) : store(storeCfg, std::make_unique<MemoryAllocatorObserver>(stats)), refStore(), - generation(1) + generation(1), + add_using_allocate(false) {} void assertAdd(const EntryVector &input) { EntryRef ref = add(input); assertGet(ref, input); } EntryRef add(const EntryVector &input) { - EntryRef result = store.add(ConstArrayRef(input)); + EntryRef result; + if (add_using_allocate) { + result = store.allocate(input.size()); + auto dest = store.get_writable(result); + assert(dest.size() == input.size()); + for (size_t i = 0; i < input.size(); ++i) { + dest[i] = input[i]; + } + } else { + // This is default and preferred way of adding an array. + result = store.add(ConstArrayRef(input)); + } assert(refStore.count(result) == 0); refStore.insert(std::make_pair(result, input)); return result; @@ -148,21 +162,48 @@ struct ArrayStoreTest : public testing::Test size_t largeArraySize() const { return sizeof(LargeArray); } }; -using NumberStoreTest = ArrayStoreTest<uint32_t>; -using StringStoreTest = ArrayStoreTest<std::string>; -using SmallOffsetNumberStoreTest = ArrayStoreTest<uint32_t, EntryRefT<10>>; +struct TestParam { + bool add_using_allocate; + TestParam(bool add_using_allocate_in) : add_using_allocate(add_using_allocate_in) {} +}; + +std::ostream& operator<<(std::ostream& os, const TestParam& param) +{ + os << (param.add_using_allocate ? "add_using_allocate" : "basic_add"); + return os; +} + +using NumberStoreTestWithParam = ArrayStoreTest<testing::TestWithParam<TestParam>, uint32_t>; + +struct NumberStoreTest : public NumberStoreTestWithParam { + NumberStoreTest() : NumberStoreTestWithParam(3, true, GetParam().add_using_allocate) {} +}; -struct NumberStoreFreeListsDisabledTest : public NumberStoreTest { - NumberStoreFreeListsDisabledTest() : NumberStoreTest(3, false) {} +struct NumberStoreFreeListsDisabledTest : public NumberStoreTestWithParam { + NumberStoreFreeListsDisabledTest() : NumberStoreTestWithParam(3, false, GetParam().add_using_allocate) {} }; +using NumberStoreBasicTest = ArrayStoreTest<testing::Test, uint32_t>; +using StringStoreTest = ArrayStoreTest<testing::Test, std::string>; +using SmallOffsetNumberStoreTest = ArrayStoreTest<testing::Test, uint32_t, EntryRefT<10>>; + TEST(BasicStoreTest, test_with_trivial_and_non_trivial_types) { - EXPECT_TRUE(vespalib::can_skip_destruction<NumberStoreTest::value_type>); + EXPECT_TRUE(vespalib::can_skip_destruction<NumberStoreBasicTest::value_type>); EXPECT_FALSE(vespalib::can_skip_destruction<StringStoreTest::value_type>); } -TEST_F(NumberStoreTest, control_static_sizes) { +VESPA_GTEST_INSTANTIATE_TEST_SUITE_P(NumberStoreMultiTest, + NumberStoreTest, + testing::Values(TestParam(false), TestParam(true)), + testing::PrintToStringParamName()); + +VESPA_GTEST_INSTANTIATE_TEST_SUITE_P(NumberStoreFreeListsDisabledMultiTest, + NumberStoreFreeListsDisabledTest, + testing::Values(TestParam(false), TestParam(true)), + testing::PrintToStringParamName()); + +TEST_P(NumberStoreTest, control_static_sizes) { #ifdef _LIBCPP_VERSION EXPECT_EQ(440u, sizeof(f.store)); EXPECT_EQ(296u, sizeof(NumberStoreTest::ArrayStoreType::DataStoreType)); @@ -176,7 +217,7 @@ TEST_F(NumberStoreTest, control_static_sizes) { EXPECT_EQ(32u, usage.usedBytes()); } -TEST_F(NumberStoreTest, add_and_get_small_arrays_of_trivial_type) +TEST_P(NumberStoreTest, add_and_get_small_arrays_of_trivial_type) { assertAdd({}); assertAdd({1}); @@ -192,7 +233,7 @@ TEST_F(StringStoreTest, add_and_get_small_arrays_of_non_trivial_type) assertAdd({"ddd", "eeee", "fffff"}); } -TEST_F(NumberStoreTest, add_and_get_large_arrays_of_simple_type) +TEST_P(NumberStoreTest, add_and_get_large_arrays_of_simple_type) { assertAdd({1,2,3,4}); assertAdd({2,3,4,5,6}); @@ -204,7 +245,7 @@ TEST_F(StringStoreTest, add_and_get_large_arrays_of_non_trivial_type) assertAdd({"ddd", "eee", "ffff", "gggg", "hhhh"}); } -TEST_F(NumberStoreTest, elements_are_put_on_hold_when_a_small_array_is_removed) +TEST_P(NumberStoreTest, elements_are_put_on_hold_when_a_small_array_is_removed) { EntryRef ref = add({1,2,3}); assertBufferState(ref, MemStats().used(3).hold(0)); @@ -212,7 +253,7 @@ TEST_F(NumberStoreTest, elements_are_put_on_hold_when_a_small_array_is_removed) assertBufferState(ref, MemStats().used(3).hold(3)); } -TEST_F(NumberStoreTest, elements_are_put_on_hold_when_a_large_array_is_removed) +TEST_P(NumberStoreTest, elements_are_put_on_hold_when_a_large_array_is_removed) { EntryRef ref = add({1,2,3,4}); // Note: The first buffer has the first element reserved -> we expect 2 elements used here. @@ -221,23 +262,23 @@ TEST_F(NumberStoreTest, elements_are_put_on_hold_when_a_large_array_is_removed) assertBufferState(ref, MemStats().used(2).hold(1).dead(1)); } -TEST_F(NumberStoreTest, small_arrays_are_allocated_from_free_lists_when_enabled) { +TEST_P(NumberStoreTest, small_arrays_are_allocated_from_free_lists_when_enabled) { assert_ref_reused({1,2,3}, {4,5,6}, true); } -TEST_F(NumberStoreTest, large_arrays_are_allocated_from_free_lists_when_enabled) { +TEST_P(NumberStoreTest, large_arrays_are_allocated_from_free_lists_when_enabled) { assert_ref_reused({1,2,3,4}, {5,6,7,8}, true); } -TEST_F(NumberStoreFreeListsDisabledTest, small_arrays_are_NOT_allocated_from_free_lists_when_disabled) { +TEST_P(NumberStoreFreeListsDisabledTest, small_arrays_are_NOT_allocated_from_free_lists_when_disabled) { assert_ref_reused({1,2,3}, {4,5,6}, false); } -TEST_F(NumberStoreFreeListsDisabledTest, large_arrays_are_NOT_allocated_from_free_lists_when_disabled) { +TEST_P(NumberStoreFreeListsDisabledTest, large_arrays_are_NOT_allocated_from_free_lists_when_disabled) { assert_ref_reused({1,2,3,4}, {5,6,7,8}, false); } -TEST_F(NumberStoreTest, track_size_of_large_array_allocations_with_free_lists_enabled) { +TEST_P(NumberStoreTest, track_size_of_large_array_allocations_with_free_lists_enabled) { EntryRef ref = add({1,2,3,4}); assert_buffer_stats(ref, BufferStats().used(2).hold(0).dead(1).extra_used(16)); remove({1,2,3,4}); @@ -269,7 +310,7 @@ TEST_F(SmallOffsetNumberStoreTest, new_underlying_buffer_is_allocated_when_curre namespace { void -test_compaction(NumberStoreTest &f) +test_compaction(NumberStoreBasicTest &f) { EntryRef size1Ref = f.add({1}); EntryRef size2Ref = f.add({2,2}); @@ -300,8 +341,8 @@ test_compaction(NumberStoreTest &f) } -struct NumberStoreTwoSmallBufferTypesTest : public NumberStoreTest { - NumberStoreTwoSmallBufferTypesTest() : NumberStoreTest(2) {} +struct NumberStoreTwoSmallBufferTypesTest : public NumberStoreBasicTest { + NumberStoreTwoSmallBufferTypesTest() : NumberStoreBasicTest(2) {} }; TEST_F(NumberStoreTwoSmallBufferTypesTest, buffer_with_most_dead_space_is_compacted) @@ -372,23 +413,23 @@ void testCompaction(NumberStoreTest &f, bool compactMemory, bool compactAddressS } -TEST_F(NumberStoreTest, compactWorst_selects_on_only_memory) { +TEST_P(NumberStoreTest, compactWorst_selects_on_only_memory) { testCompaction(*this, true, false); } -TEST_F(NumberStoreTest, compactWorst_selects_on_only_address_space) { +TEST_P(NumberStoreTest, compactWorst_selects_on_only_address_space) { testCompaction(*this, false, true); } -TEST_F(NumberStoreTest, compactWorst_selects_on_both_memory_and_address_space) { +TEST_P(NumberStoreTest, compactWorst_selects_on_both_memory_and_address_space) { testCompaction(*this, true, true); } -TEST_F(NumberStoreTest, compactWorst_selects_on_neither_memory_nor_address_space) { +TEST_P(NumberStoreTest, compactWorst_selects_on_neither_memory_nor_address_space) { testCompaction(*this, false, false); } -TEST_F(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_small_arrays) +TEST_P(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_small_arrays) { MemStats exp(store.getMemoryUsage()); add({1,2,3}); @@ -399,7 +440,7 @@ TEST_F(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_small_a assertMemoryUsage(exp.holdToDead(entrySize() * 3)); } -TEST_F(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_large_arrays) +TEST_P(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_large_arrays) { MemStats exp(store.getMemoryUsage()); add({1,2,3,4}); @@ -411,7 +452,7 @@ TEST_F(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_large_a dead(largeArraySize())); } -TEST_F(NumberStoreTest, address_space_usage_is_ratio_between_used_arrays_and_number_of_possible_arrays) +TEST_P(NumberStoreTest, address_space_usage_is_ratio_between_used_arrays_and_number_of_possible_arrays) { add({2,2}); add({3,3,3}); @@ -435,8 +476,8 @@ TEST_F(NumberStoreTest, address_space_usage_is_ratio_between_used_arrays_and_num EXPECT_EQ(expLimit, store.addressSpaceUsage().limit()); } -struct ByteStoreTest : public ArrayStoreTest<uint8_t> { - ByteStoreTest() : ArrayStoreTest<uint8_t>(ByteStoreTest::ArrayStoreType:: +struct ByteStoreTest : public ArrayStoreTest<testing::Test, uint8_t> { + ByteStoreTest() : ArrayStoreTest<testing::Test, uint8_t>(ByteStoreTest::ArrayStoreType:: optimizedConfigForHugePage(1023, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE, 4_Ki, 8_Ki, ALLOC_GROW_FACTOR)) {} @@ -452,7 +493,7 @@ TEST_F(ByteStoreTest, offset_in_EntryRefT_is_within_bounds_when_allocating_memor assertStoreContent(); } -TEST_F(NumberStoreTest, provided_memory_allocator_is_used) +TEST_P(NumberStoreTest, provided_memory_allocator_is_used) { EXPECT_EQ(AllocStats(4, 0), stats); } diff --git a/vespalib/src/tests/datastore/datastore/datastore_test.cpp b/vespalib/src/tests/datastore/datastore/datastore_test.cpp index 964978e5510..b77599c4e34 100644 --- a/vespalib/src/tests/datastore/datastore/datastore_test.cpp +++ b/vespalib/src/tests/datastore/datastore/datastore_test.cpp @@ -215,34 +215,6 @@ TEST(DataStoreTest, require_that_entry_ref_is_working) } } -TEST(DataStoreTest, require_that_aligned_entry_ref_is_working) -{ - using MyRefType = AlignedEntryRefT<22, 2>; // 4 byte alignement - EXPECT_EQ(16_Mi, MyRefType::offsetSize()); - EXPECT_EQ(1_Ki, MyRefType::numBuffers()); - EXPECT_EQ(0u, MyRefType::align(0)); - EXPECT_EQ(4u, MyRefType::align(1)); - EXPECT_EQ(4u, MyRefType::align(2)); - EXPECT_EQ(4u, MyRefType::align(3)); - EXPECT_EQ(4u, MyRefType::align(4)); - EXPECT_EQ(8u, MyRefType::align(5)); - { - MyRefType r(0, 0); - EXPECT_EQ(0u, r.offset()); - EXPECT_EQ(0u, r.bufferId()); - } - { - MyRefType r(237, 13); - EXPECT_EQ(MyRefType::align(237), r.offset()); - EXPECT_EQ(13u, r.bufferId()); - } - { - MyRefType r(MyRefType::offsetSize() - 4, 1023); - EXPECT_EQ(MyRefType::align(MyRefType::offsetSize() - 4), r.offset()); - EXPECT_EQ(1023u, r.bufferId()); - } -} - TEST(DataStoreTest, require_that_entries_can_be_added_and_retrieved) { using IntStore = DataStore<int>; diff --git a/vespalib/src/vespa/vespalib/datastore/array_store.h b/vespalib/src/vespa/vespalib/datastore/array_store.h index fd0271915dd..db037ee12fb 100644 --- a/vespalib/src/vespa/vespalib/datastore/array_store.h +++ b/vespalib/src/vespa/vespalib/datastore/array_store.h @@ -51,7 +51,9 @@ private: void initArrayTypes(const ArrayStoreConfig &cfg, std::shared_ptr<alloc::MemoryAllocator> memory_allocator); EntryRef addSmallArray(const ConstArrayRef &array); + EntryRef allocate_small_array(size_t array_size); EntryRef addLargeArray(const ConstArrayRef &array); + EntryRef allocate_large_array(size_t array_size); ConstArrayRef getSmallArray(RefT ref, size_t arraySize) const { const EntryT *buf = _store.template getEntryArray<EntryT>(ref, arraySize); return ConstArrayRef(buf, arraySize); @@ -81,6 +83,17 @@ public: } /** + * Allocate an array of the given size without any instantiation of EntryT elements. + * + * Use get_writable() to get a reference to the array for writing. + * + * NOTE: In most cases add() should be used instead. + * This function is however relevant when serializing objects into char buffers + * when e.g. using an ArrayStore<char> for memory management. + */ + EntryRef allocate(size_t array_size); + + /** * Returns a writeable reference to the given array. * * NOTE: Use with care if reader threads are accessing arrays at the same time. @@ -110,7 +123,8 @@ public: static vespalib::GenerationHolder &getGenerationHolderLocation(ArrayStore &self) { return DataStoreBase::getGenerationHolderLocation(self._store); } - + // need object location before construction + static DataStoreBase& get_data_store_base(ArrayStore &self) { return self._store; } // Should only be used for unit testing const BufferState &bufferState(EntryRef ref) const; diff --git a/vespalib/src/vespa/vespalib/datastore/array_store.hpp b/vespalib/src/vespa/vespalib/datastore/array_store.hpp index 308afd2b122..4fc13396f6b 100644 --- a/vespalib/src/vespa/vespalib/datastore/array_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/array_store.hpp @@ -76,6 +76,20 @@ ArrayStore<EntryT, RefT, TypeMapperT>::add(const ConstArrayRef &array) template <typename EntryT, typename RefT, typename TypeMapperT> EntryRef +ArrayStore<EntryT, RefT, TypeMapperT>::allocate(size_t array_size) +{ + if (array_size == 0) { + return EntryRef(); + } + if (array_size <= _maxSmallArraySize) { + return allocate_small_array(array_size); + } else { + return allocate_large_array(array_size); + } +} + +template <typename EntryT, typename RefT, typename TypeMapperT> +EntryRef ArrayStore<EntryT, RefT, TypeMapperT>::addSmallArray(const ConstArrayRef &array) { uint32_t typeId = _mapper.get_type_id(array.size()); @@ -85,6 +99,14 @@ ArrayStore<EntryT, RefT, TypeMapperT>::addSmallArray(const ConstArrayRef &array) template <typename EntryT, typename RefT, typename TypeMapperT> EntryRef +ArrayStore<EntryT, RefT, TypeMapperT>::allocate_small_array(size_t array_size) +{ + uint32_t type_id = _mapper.get_type_id(array_size); + return _store.template freeListRawAllocator<EntryT>(type_id).alloc(array_size).ref; +} + +template <typename EntryT, typename RefT, typename TypeMapperT> +EntryRef ArrayStore<EntryT, RefT, TypeMapperT>::addLargeArray(const ConstArrayRef &array) { using NoOpReclaimer = DefaultReclaimer<LargeArray>; @@ -96,6 +118,17 @@ ArrayStore<EntryT, RefT, TypeMapperT>::addLargeArray(const ConstArrayRef &array) } template <typename EntryT, typename RefT, typename TypeMapperT> +EntryRef +ArrayStore<EntryT, RefT, TypeMapperT>::allocate_large_array(size_t array_size) +{ + using NoOpReclaimer = DefaultReclaimer<LargeArray>; + auto handle = _store.template freeListAllocator<LargeArray, NoOpReclaimer>(_largeArrayTypeId).alloc(array_size); + auto& state = _store.getBufferState(RefT(handle.ref).bufferId()); + state.incExtraUsedBytes(sizeof(EntryT) * array_size); + return handle.ref; +} + +template <typename EntryT, typename RefT, typename TypeMapperT> void ArrayStore<EntryT, RefT, TypeMapperT>::remove(EntryRef ref) { diff --git a/vespalib/src/vespa/vespalib/datastore/datastore.hpp b/vespalib/src/vespa/vespalib/datastore/datastore.hpp index 666da55975c..4d09ffe4bc6 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastore.hpp +++ b/vespalib/src/vespa/vespalib/datastore/datastore.hpp @@ -13,8 +13,7 @@ namespace vespalib::datastore { template <typename RefT> DataStoreT<RefT>::DataStoreT() - : DataStoreBase(RefType::numBuffers(), - RefType::unscaled_offset_size()) + : DataStoreBase(RefType::numBuffers(), RefType::offsetSize()) { } @@ -42,7 +41,7 @@ DataStoreT<RefT>::free_elem_internal(EntryRef ref, size_t numElems, bool was_hel state.decHoldElems(numElems); } state.cleanHold(getBuffer(intRef.bufferId()), - intRef.unscaled_offset() * state.getArraySize(), numElems); + intRef.offset() * state.getArraySize(), numElems); } template <typename RefT> @@ -50,15 +49,14 @@ void DataStoreT<RefT>::holdElem(EntryRef ref, size_t numElems, size_t extraBytes) { RefType intRef(ref); - size_t alignedLen = RefType::align(numElems); BufferState &state = getBufferState(intRef.bufferId()); assert(state.isActive()); if (state.hasDisabledElemHoldList()) { - state.incDeadElems(alignedLen); + state.incDeadElems(numElems); return; } - _elemHold1List.push_back(ElemHold1ListElem(ref, alignedLen)); - state.incHoldElems(alignedLen); + _elemHold1List.push_back(ElemHold1ListElem(ref, numElems)); + state.incHoldElems(numElems); state.incExtraHoldBytes(extraBytes); } diff --git a/vespalib/src/vespa/vespalib/datastore/entryref.h b/vespalib/src/vespa/vespalib/datastore/entryref.h index 7667cc3d2c1..a0016f4fdcb 100644 --- a/vespalib/src/vespa/vespalib/datastore/entryref.h +++ b/vespalib/src/vespa/vespalib/datastore/entryref.h @@ -40,34 +40,6 @@ public: uint32_t bufferId() const noexcept { return _ref >> OffsetBits; } static size_t offsetSize() noexcept { return 1ul << OffsetBits; } static uint32_t numBuffers() noexcept { return 1 << BufferBits; } - static size_t align(size_t val) noexcept { return val; } - static size_t pad(size_t val) noexcept { (void) val; return 0ul; } - static constexpr bool isAlignedType = false; - // TODO: Remove following temporary methods when removing - // AlignedEntryRefT - size_t unscaled_offset() const noexcept { return offset(); } - static size_t unscaled_offset_size() noexcept { return offsetSize(); } -}; - -/** - * Class for entry reference that is similar to EntryRefT, - * except that we use (2^OffsetAlign) byte alignment on the offset. - **/ -template <uint32_t OffsetBits, uint32_t OffsetAlign> -class AlignedEntryRefT : public EntryRefT<OffsetBits> { -private: - typedef EntryRefT<OffsetBits> ParentType; - static const uint32_t PadConstant = ((1 << OffsetAlign) - 1); -public: - AlignedEntryRefT() noexcept : ParentType() {} - AlignedEntryRefT(size_t offset_, uint32_t bufferId_) noexcept : - ParentType(align(offset_) >> OffsetAlign, bufferId_) {} - AlignedEntryRefT(const EntryRef & ref_) noexcept : ParentType(ref_) {} - size_t offset() const { return ParentType::offset() << OffsetAlign; } - static size_t offsetSize() { return ParentType::offsetSize() << OffsetAlign; } - static size_t align(size_t val) { return val + pad(val); } - static size_t pad(size_t val) { return (-val & PadConstant); } - static constexpr bool isAlignedType = true; }; vespalib::asciistream& operator<<(vespalib::asciistream& os, const EntryRef& ref); diff --git a/vespalib/src/vespa/vespalib/datastore/free_list_raw_allocator.hpp b/vespalib/src/vespa/vespalib/datastore/free_list_raw_allocator.hpp index 55d57b1bcb9..c4689cd9e4a 100644 --- a/vespalib/src/vespa/vespalib/datastore/free_list_raw_allocator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/free_list_raw_allocator.hpp @@ -24,10 +24,8 @@ FreeListRawAllocator<EntryT, RefT>::alloc(size_t numElems) assert(state.isActive()); assert(state.getArraySize() == numElems); RefT ref = state.popFreeList(); - // If entry ref is not aligned we must scale the offset according to array size as it was divided when the entry ref was created. - EntryT *entry = !RefT::isAlignedType ? - _store.template getEntryArray<EntryT>(ref, state.getArraySize()) : - _store.template getEntry<EntryT>(ref); + // We must scale the offset according to array size as it was divided when the entry ref was created. + EntryT *entry = _store.template getEntryArray<EntryT>(ref, state.getArraySize()); return HandleType(ref, entry); } diff --git a/vespalib/src/vespa/vespalib/datastore/raw_allocator.hpp b/vespalib/src/vespa/vespalib/datastore/raw_allocator.hpp index a17c3a28ced..0d67bf71c20 100644 --- a/vespalib/src/vespa/vespalib/datastore/raw_allocator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/raw_allocator.hpp @@ -23,21 +23,13 @@ RawAllocator<EntryT, RefT>::alloc(size_t numElems, size_t extraElems) BufferState &state = _store.getBufferState(buffer_id); assert(state.isActive()); size_t oldBufferSize = state.size(); - if (RefT::isAlignedType) { - // AlignedEntryRef constructor scales down offset by alignment - RefT ref(oldBufferSize, buffer_id); - EntryT *buffer = _store.getEntry<EntryT>(ref); - state.pushed_back(numElems); - return HandleType(ref, buffer); - } else { - // Must perform scaling ourselves, according to array size - size_t arraySize = state.getArraySize(); - assert((numElems % arraySize) == 0u); - RefT ref((oldBufferSize / arraySize), buffer_id); - EntryT *buffer = _store.getEntryArray<EntryT>(ref, arraySize); - state.pushed_back(numElems); - return HandleType(ref, buffer); - } + // Must perform scaling ourselves, according to array size + size_t arraySize = state.getArraySize(); + assert((numElems % arraySize) == 0u); + RefT ref((oldBufferSize / arraySize), buffer_id); + EntryT *buffer = _store.getEntryArray<EntryT>(ref, arraySize); + state.pushed_back(numElems); + return HandleType(ref, buffer); } } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp index a252763fb5b..cbb8369e1f2 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp @@ -113,8 +113,8 @@ private: RefT iRef(oldRef); uint32_t buffer_id = iRef.bufferId(); auto &inner_mapping = _mapping[buffer_id]; - assert(iRef.unscaled_offset() < inner_mapping.size()); - EntryRef &mappedRef = inner_mapping[iRef.unscaled_offset()]; + assert(iRef.offset() < inner_mapping.size()); + EntryRef &mappedRef = inner_mapping[iRef.offset()]; assert(!mappedRef.valid()); EntryRef newRef = _store.move(oldRef); mappedRef = newRef; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h index e6627eb80e6..c4baff2206b 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h @@ -47,8 +47,8 @@ public: uint32_t mapEntryRefToEnumValue(EntryRef ref) const { if (ref.valid()) { RefType iRef(ref); - assert(iRef.unscaled_offset() < _enumValues[iRef.bufferId()].size()); - uint32_t enumValue = _enumValues[iRef.bufferId()][iRef.unscaled_offset()]; + assert(iRef.offset() < _enumValues[iRef.bufferId()].size()); + uint32_t enumValue = _enumValues[iRef.bufferId()][iRef.offset()]; assert(enumValue != 0); return enumValue; } else { @@ -59,8 +59,8 @@ public: uint32_t map_entry_ref_to_enum_value_or_zero(EntryRef ref) const { if (ref.valid()) { RefType iRef(ref); - if (iRef.unscaled_offset() < _enumValues[iRef.bufferId()].size()) { - return _enumValues[iRef.bufferId()][iRef.unscaled_offset()]; + if (iRef.offset() < _enumValues[iRef.bufferId()].size()) { + return _enumValues[iRef.bufferId()][iRef.offset()]; } else { return 0u; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp index 0cc6b4eded2..52437fc765c 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp @@ -31,8 +31,8 @@ UniqueStoreEnumerator<RefT>::enumerateValue(EntryRef ref) { RefType iRef(ref); assert(iRef.valid()); - assert(iRef.unscaled_offset() < _enumValues[iRef.bufferId()].size()); - uint32_t &enumVal = _enumValues[iRef.bufferId()][iRef.unscaled_offset()]; + assert(iRef.offset() < _enumValues[iRef.bufferId()].size()); + uint32_t &enumVal = _enumValues[iRef.bufferId()][iRef.offset()]; assert(enumVal == 0u); enumVal = _next_enum_val; ++_next_enum_val; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h b/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h index 80486f55ad8..4babd6204c7 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h @@ -32,8 +32,8 @@ public: EntryRef remap(EntryRef ref) const { RefType internal_ref(ref); auto &inner_mapping = _mapping[internal_ref.bufferId()]; - assert(internal_ref.unscaled_offset() < inner_mapping.size()); - EntryRef mapped_ref = inner_mapping[internal_ref.unscaled_offset()]; + assert(internal_ref.offset() < inner_mapping.size()); + EntryRef mapped_ref = inner_mapping[internal_ref.offset()]; assert(mapped_ref.valid()); return mapped_ref; } |