diff options
170 files changed, 1567 insertions, 3048 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java index d568a61fc69..507d95c1d7b 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java @@ -9,8 +9,15 @@ package com.yahoo.config.provision; */ public class NodeAllocationException extends RuntimeException { - public NodeAllocationException(String message) { + private final boolean retryable; + + public NodeAllocationException(String message, boolean retryable) { super(message); + this.retryable = retryable; + } + + public boolean retryable() { + return retryable; } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java index 25ae21f3383..dc3a05e65f9 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java @@ -51,7 +51,8 @@ public class HttpHandler extends ThreadedHttpRequestHandler { } catch (IllegalArgumentException | UnsupportedOperationException e) { return HttpErrorResponse.badRequest(getMessage(e, request)); } catch (NodeAllocationException e) { - return HttpErrorResponse.nodeAllocationFailure(getMessage(e, request)); + return e.retryable() ? HttpErrorResponse.nodeAllocationFailure(getMessage(e, request)) + : HttpErrorResponse.invalidApplicationPackage(getMessage(e, request)); } catch (InternalServerException e) { return HttpErrorResponse.internalServerError(getMessage(e, request)); } catch (UnknownVespaVersionException e) { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java index 2b07cffffce..8e39460db71 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java @@ -243,7 +243,7 @@ public class SessionPrepareHandlerTest extends SessionHandlerTest { FailingSessionPrepareHandler handler = new FailingSessionPrepareHandler(SessionPrepareHandler.testContext(), applicationRepository, configserverConfig, - new NodeAllocationException(exceptionMessage)); + new NodeAllocationException(exceptionMessage, true)); HttpResponse response = handler.handle(createTestRequest(pathPrefix, HttpRequest.Method.PUT, Cmd.PREPARED, sessionId)); assertEquals(400, response.getStatus()); Slime data = getData(response); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index 9bc2c5a5595..0fe9a84f5fa 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -54,6 +54,7 @@ import static com.yahoo.config.application.api.DeploymentSpec.RevisionTarget.nex import static com.yahoo.config.provision.Environment.prod; import static com.yahoo.config.provision.Environment.staging; import static com.yahoo.config.provision.Environment.test; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static java.util.Comparator.comparing; import static java.util.Comparator.naturalOrder; import static java.util.Comparator.reverseOrder; @@ -1027,10 +1028,11 @@ public class DeploymentStatus { Versions lastVersions = job.lastCompleted().get().versions(); Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id())); if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty(); - if ( job.id().type().environment().isTest() + if ( job.id().type().environment().isTest() && ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true) - && job.isNodeAllocationFailure()) return Optional.empty(); + && job.isNodeAllocationFailure()) return Optional.empty(); + if (job.lastStatus().get() == invalidApplication) return Optional.of(status.now.plus(Duration.ofDays(36524))); // 100 years Instant firstFailing = job.firstFailing().get().end().get(); Instant lastCompleted = job.lastCompleted().get().end().get(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index e685e5d167e..dcdfea6e594 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -73,6 +73,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Nod import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.noTests; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset; @@ -257,6 +258,8 @@ public class InternalStepRunner implements StepRunner { ? result : Optional.of(nodeAllocationFailure); case INVALID_APPLICATION_PACKAGE: + logger.log(WARNING, e.getMessage()); + return Optional.of(invalidApplication); case BAD_REQUEST: logger.log(WARNING, e.getMessage()); return Optional.of(deploymentFailed); @@ -838,9 +841,12 @@ public class InternalStepRunner implements StepRunner { case nodeAllocationFailure: if ( ! run.id().type().environment().isTest()) updater.accept("could not allocate the requested capacity to your tenant. Please contact Vespa Cloud support."); return; - case deploymentFailed: + case invalidApplication: updater.accept("invalid application configuration. Please review warnings and errors in the deployment job log."); return; + case deploymentFailed: + updater.accept("failure processing application configuration. Please review warnings and errors in the deployment job log."); + return; case installationFailed: updater.accept("nodes were not able to deploy to the new configuration. Please check the Vespa log for errors, and contact Vespa Cloud support if unable to resolve these."); return; @@ -867,6 +873,7 @@ public class InternalStepRunner implements StepRunner { case nodeAllocationFailure: return run.id().type().isProduction() ? Optional.of(mails.nodeAllocationFailure(run.id(), recipients)) : Optional.empty(); case deploymentFailed: + case invalidApplication: return Optional.of(mails.deploymentFailure(run.id(), recipients)); case installationFailed: return Optional.of(mails.installationFailure(run.id(), recipients)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 36036d6d36d..4c84f311458 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -5,7 +5,6 @@ import com.google.common.collect.ImmutableSortedMap; import com.yahoo.component.Version; import com.yahoo.component.VersionCompatibility; import com.yahoo.concurrent.UncheckedTimeoutException; -import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneId; @@ -127,7 +126,7 @@ public class JobController { this.curator = controller.curator(); this.logs = new BufferedLogStore(curator, controller.serviceRegistry().runDataStore()); this.cloud = controller.serviceRegistry().testerCloud(); - this.metric = new JobMetrics(controller.metric(), controller::system); + this.metric = new JobMetrics(controller.metric()); } public TesterCloud cloud() { return cloud; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java index 14fce806152..d1fa00d1c41 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java @@ -19,6 +19,7 @@ public class JobMetrics { public static final String nodeAllocationFailure = "deployment.nodeAllocationFailure"; public static final String endpointCertificateTimeout = "deployment.endpointCertificateTimeout"; public static final String deploymentFailure = "deployment.deploymentFailure"; + public static final String invalidApplication = "deployment.invalidApplication"; public static final String convergenceFailure = "deployment.convergenceFailure"; public static final String testFailure = "deployment.testFailure"; public static final String noTests = "deployment.noTests"; @@ -27,11 +28,9 @@ public class JobMetrics { public static final String success = "deployment.success"; private final Metric metric; - private final Supplier<SystemName> system; - public JobMetrics(Metric metric, Supplier<SystemName> system) { + public JobMetrics(Metric metric) { this.metric = metric; - this.system = system; } public void jobStarted(JobId id) { @@ -51,18 +50,19 @@ public class JobMetrics { } static String valueOf(RunStatus status) { - switch (status) { - case nodeAllocationFailure: return nodeAllocationFailure; - case endpointCertificateTimeout: return endpointCertificateTimeout; - case deploymentFailed: return deploymentFailure; - case installationFailed: return convergenceFailure; - case testFailure: return testFailure; - case noTests: return noTests; - case error: return error; - case aborted: return abort; - case success: return success; - default: throw new IllegalArgumentException("Unexpected run status '" + status + "'"); - } + return switch (status) { + case nodeAllocationFailure -> nodeAllocationFailure; + case endpointCertificateTimeout -> endpointCertificateTimeout; + case invalidApplication -> invalidApplication; + case deploymentFailed -> deploymentFailure; + case installationFailed -> convergenceFailure; + case testFailure -> testFailure; + case noTests -> noTests; + case error -> error; + case aborted -> abort; + case success -> success; + default -> throw new IllegalArgumentException("Unexpected run status '" + status + "'"); + }; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java index 9ca634b19fd..aa727b602e1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java @@ -14,7 +14,10 @@ public enum RunStatus { /** Deployment was rejected due node allocation failure. */ nodeAllocationFailure, - /** Deployment of the real application was rejected. */ + /** Deployment of the real application was rejected because the package is faulty. */ + invalidApplication, + + /** Deployment of the real application was rejected, for other reasons. */ deploymentFailed, /** Deployment timed out waiting for endpoint certificate */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java index fcc6d99aec2..49d108d08df 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java @@ -34,6 +34,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentF import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.endpointCertificateTimeout; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.noTests; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset; @@ -329,39 +330,38 @@ class RunSerializer { } static String valueOf(RunStatus status) { - switch (status) { - case running : return "running"; - case nodeAllocationFailure : return "nodeAllocationFailure"; - case endpointCertificateTimeout : return "endpointCertificateTimeout"; - case deploymentFailed : return "deploymentFailed"; - case installationFailed : return "installationFailed"; - case testFailure : return "testFailure"; - case noTests : return "noTests"; - case error : return "error"; - case success : return "success"; - case aborted : return "aborted"; - case reset : return "reset"; - - default: throw new AssertionError("No value defined for '" + status + "'!"); - } + return switch (status) { + case running -> "running"; + case nodeAllocationFailure -> "nodeAllocationFailure"; + case endpointCertificateTimeout -> "endpointCertificateTimeout"; + case deploymentFailed -> "deploymentFailed"; + case invalidApplication -> "invalidApplication"; + case installationFailed -> "installationFailed"; + case testFailure -> "testFailure"; + case noTests -> "noTests"; + case error -> "error"; + case success -> "success"; + case aborted -> "aborted"; + case reset -> "reset"; + }; } static RunStatus runStatusOf(String status) { - switch (status) { - case "running" : return running; - case "nodeAllocationFailure" : return nodeAllocationFailure; - case "endpointCertificateTimeout" : return endpointCertificateTimeout; - case "deploymentFailed" : return deploymentFailed; - case "installationFailed" : return installationFailed; - case "noTests" : return noTests; - case "testFailure" : return testFailure; - case "error" : return error; - case "success" : return success; - case "aborted" : return aborted; - case "reset" : return reset; - - default: throw new IllegalArgumentException("No run status defined by '" + status + "'!"); - } + return switch (status) { + case "running" -> running; + case "nodeAllocationFailure" -> nodeAllocationFailure; + case "endpointCertificateTimeout" -> endpointCertificateTimeout; + case "deploymentFailed" -> deploymentFailed; + case "invalidApplication" -> invalidApplication; + case "installationFailed" -> installationFailed; + case "noTests" -> noTests; + case "testFailure" -> testFailure; + case "error" -> error; + case "success" -> success; + case "aborted" -> aborted; + case "reset" -> reset; + default -> throw new IllegalArgumentException("No run status defined by '" + status + "'!"); + }; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 8c601f8c678..592fbd0e856 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -227,20 +227,18 @@ class JobControllerApiHandlerHelper { } private static String nameOf(RunStatus status) { - switch (status) { - case reset: // This means the run will reset and keep running. - case running: return "running"; - case aborted: return "aborted"; - case error: return "error"; - case testFailure: return "testFailure"; - case noTests: return "noTests"; - case endpointCertificateTimeout: return "endpointCertificateTimeout"; - case nodeAllocationFailure: return "nodeAllocationFailure"; - case installationFailed: return "installationFailed"; - case deploymentFailed: return "deploymentFailed"; - case success: return "success"; - default: throw new IllegalArgumentException("Unexpected status '" + status + "'"); - } + return switch (status) { + case reset, running -> "running"; + case aborted -> "aborted"; + case error -> "error"; + case testFailure -> "testFailure"; + case noTests -> "noTests"; + case endpointCertificateTimeout -> "endpointCertificateTimeout"; + case nodeAllocationFailure -> "nodeAllocationFailure"; + case installationFailed -> "installationFailed"; + case invalidApplication, deploymentFailed -> "deploymentFailed"; + case success -> "success"; + }; } /** @@ -440,7 +438,7 @@ class JobControllerApiHandlerHelper { runObject.setString("url", baseUriForJob.resolve(baseUriForJob.getPath() + "/run/" + run.id().number()).toString()); runObject.setLong("start", run.start().toEpochMilli()); run.end().ifPresent(end -> runObject.setLong("end", end.toEpochMilli())); - runObject.setString("status", run.status().name()); + runObject.setString("status", nameOf(run.status())); run.reason().ifPresent(reason -> runObject.setString("reason", reason)); toSlime(runObject.setObject("versions"), run.versions(), application); Cursor runStepsArray = runObject.setArray("steps"); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 537090c6d68..6bc99b865e4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -14,6 +14,8 @@ import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.Instance; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; @@ -118,9 +120,17 @@ public class DeploymentTriggerTest { tester.triggerJobs(); app.assertRunning(productionUsWest1); + tester.configServer().throwOnNextPrepare(new ConfigServerException(ErrorCode.INVALID_APPLICATION_PACKAGE, "nope", "bah")); + tester.runner().run(); + assertEquals(RunStatus.invalidApplication, tester.jobs().last(app.instanceId(), productionUsWest1).get().status()); + tester.triggerJobs(); + app.assertNotRunning(productionUsWest1); + // production-us-west-1 fails, but the app loses its projectId, and the job isn't retried. + app.submit(applicationPackage).runJob(systemTest).runJob(stagingTest).triggerJobs(); tester.applications().lockApplicationOrThrow(app.application().id(), locked -> tester.applications().store(locked.withProjectId(OptionalLong.empty()))); + app.timeOutConvergence(productionUsWest1); tester.triggerJobs(); assertEquals(0, tester.jobs().active().size(), "Job is not triggered when no projectId is present"); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java index 71e3607983c..6555277b06b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java @@ -39,6 +39,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.tes import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.applicationPackage; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -87,7 +88,7 @@ public class JobControllerApiHandlerHelperTest { // us-east-3 eats the deployment failure and fails before deployment, while us-west-1 fails after. tester.configServer().throwOnNextPrepare(new ConfigServerException(INVALID_APPLICATION_PACKAGE, "ERROR!", "Failed to deploy application")); tester.runner().run(); - assertEquals(deploymentFailed, tester.jobs().last(app.instanceId(), productionUsEast3).get().status()); + assertEquals(invalidApplication, tester.jobs().last(app.instanceId(), productionUsEast3).get().status()); tester.runner().run(); tester.clock().advance(Duration.ofHours(4).plusSeconds(1)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 822ed338b56..6756c692bd2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -293,7 +293,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { nodeRepository().nodes().addNodes(hosts, Agent.DynamicProvisioningMaintainer); return hosts; } catch (NodeAllocationException | IllegalArgumentException | IllegalStateException e) { - throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage()); + throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage(), + ! (e instanceof NodeAllocationException nae) || nae.retryable()); } catch (RuntimeException e) { throw new RuntimeException("Failed to provision " + count + " " + nodeResources + ", will retry in " + interval(), e); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index 5d9d13c48dc..35f04683157 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -132,9 +132,9 @@ public class GroupPreparer { } if (! allocation.fulfilled() && requestedNodes.canFail()) - throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " + - cluster.group().get() : "") + - allocation.allocationFailureDetails()); + throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " + cluster.group().get() + : "") + allocation.allocationFailureDetails(), + true); // Carry out and return allocation nodeRepository.nodes().reserve(allocation.reservableNodes()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index ef6c0da9169..820a654c620 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -43,7 +43,8 @@ class Preparer { catch (NodeAllocationException e) { throw new NodeAllocationException("Could not satisfy " + requestedNodes + ( wantedGroups > 1 ? " (in " + wantedGroups + " groups)" : "") + - " in " + application + " " + cluster + ": " + e.getMessage()); + " in " + application + " " + cluster + ": " + e.getMessage(), + e.retryable()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index 13753c12664..3ebaf764115 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -66,7 +66,7 @@ public class MockHostProvisioner implements HostProvisioner { Optional<CloudAccount> cloudAccount) { Flavor hostFlavor = this.hostFlavor.orElseGet(() -> flavors.stream().filter(f -> compatible(f, resources)) .findFirst() - .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources))); + .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources, true))); List<ProvisionedHost> hosts = new ArrayList<>(); for (int index : provisionIndices) { String hostHostname = hostType == NodeType.host ? "hostname" + index : hostType.name() + index; diff --git a/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp b/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp index c66b2dd15dc..19b8348fb7a 100644 --- a/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp +++ b/searchcore/src/tests/proton/attribute/attribute_populator/attribute_populator_test.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchcore/proton/attribute/attribute_populator.h> +#include <vespa/document/repo/documenttyperepo.h> #include <vespa/searchcore/proton/attribute/attributemanager.h> #include <vespa/searchcore/proton/common/hw_info.h> #include <vespa/searchcore/proton/test/test.h> diff --git a/searchcore/src/tests/proton/attribute/imported_attributes_context/imported_attributes_context_test.cpp b/searchcore/src/tests/proton/attribute/imported_attributes_context/imported_attributes_context_test.cpp index c52978261a7..7a5c76b201a 100644 --- a/searchcore/src/tests/proton/attribute/imported_attributes_context/imported_attributes_context_test.cpp +++ b/searchcore/src/tests/proton/attribute/imported_attributes_context/imported_attributes_context_test.cpp @@ -56,10 +56,11 @@ hasActiveEnumGuards(AttributeVector &attr) } void -assertGuards(AttributeVector &attr, generation_t expCurrentGeneration, generation_t expFirstUsedGeneration, bool expHasActiveEnumGuards) +assertGuards(AttributeVector &attr, generation_t expCurrentGeneration, generation_t exp_oldest_used_generation, + bool expHasActiveEnumGuards) { EXPECT_EQUAL(expCurrentGeneration, attr.getCurrentGeneration()); - EXPECT_EQUAL(expFirstUsedGeneration, attr.getFirstUsedGeneration()); + EXPECT_EQUAL(exp_oldest_used_generation, attr.get_oldest_used_generation()); EXPECT_EQUAL(expHasActiveEnumGuards, hasActiveEnumGuards(attr)); } diff --git a/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp b/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp index e89d5eef078..4fc38992368 100644 --- a/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp +++ b/searchcore/src/tests/proton/documentdb/feedhandler/feedhandler_test.cpp @@ -3,7 +3,11 @@ #include <vespa/persistence/spi/result.h> #include <vespa/document/datatype/tensor_data_type.h> #include <vespa/document/datatype/documenttype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/tensorfieldvalue.h> #include <vespa/document/update/assignvalueupdate.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/documenttyperepo.h> #include <vespa/document/update/documentupdate.h> #include <vespa/document/update/clearvalueupdate.h> @@ -29,7 +33,7 @@ #include <vespa/searchcore/proton/server/ireplayconfig.h> #include <vespa/searchcore/proton/test/dummy_feed_view.h> #include <vespa/searchcore/proton/test/transport_helper.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/transactionlog/translogserver.h> #include <vespa/vespalib/testkit/testapp.h> @@ -271,20 +275,33 @@ MyFeedView::~MyFeedView() = default; struct SchemaContext { - Schema::SP schema; - std::unique_ptr<DocBuilder> builder; + Schema::SP schema; + EmptyDocBuilder builder; SchemaContext(); + SchemaContext(bool has_i2); ~SchemaContext(); DocTypeName getDocType() const { - return DocTypeName(builder->getDocumentType().getName()); + return DocTypeName(builder.get_document_type().getName()); } - const std::shared_ptr<const document::DocumentTypeRepo> &getRepo() const { return builder->getDocumentTypeRepo(); } + std::shared_ptr<const document::DocumentTypeRepo> getRepo() const { return builder.get_repo_sp(); } void addField(vespalib::stringref fieldName); }; SchemaContext::SchemaContext() + : SchemaContext(false) +{ +} + +SchemaContext::SchemaContext(bool has_i2) : schema(std::make_shared<Schema>()), - builder() + builder([has_i2](auto& header) { + header.addTensorField("tensor", "tensor(x{},y{})") + .addTensorField("tensor2", "tensor(x{},y{})") + .addField("i1", document::DataType::T_STRING); + if (has_i2) { + header.addField("i2", document::DataType::T_STRING); + } + }) { schema->addAttributeField(Schema::AttributeField("tensor", DataType::TENSOR, CollectionType::SINGLE, "tensor(x{},y{})")); schema->addAttributeField(Schema::AttributeField("tensor2", DataType::TENSOR, CollectionType::SINGLE, "tensor(x{},y{})")); @@ -298,14 +315,13 @@ void SchemaContext::addField(vespalib::stringref fieldName) { schema->addIndexField(Schema::IndexField(fieldName, DataType::STRING, CollectionType::SINGLE)); - builder = std::make_unique<DocBuilder>(*schema); } struct DocumentContext { Document::SP doc; BucketId bucketId; - DocumentContext(const vespalib::string &docId, DocBuilder &builder) : - doc(builder.startDocument(docId).endDocument().release()), + DocumentContext(const vespalib::string &docId, EmptyDocBuilder &builder) : + doc(builder.make_document(docId)), bucketId(BucketFactory::getBucketId(doc->getId())) { } @@ -313,7 +329,7 @@ struct DocumentContext { struct TwoFieldsSchemaContext : public SchemaContext { TwoFieldsSchemaContext() - : SchemaContext() + : SchemaContext(true) { addField("i2"); } @@ -324,8 +340,8 @@ TensorDataType tensor1DType(ValueType::from_spec("tensor(x{})")); struct UpdateContext { DocumentUpdate::SP update; BucketId bucketId; - UpdateContext(const vespalib::string &docId, DocBuilder &builder) : - update(std::make_shared<DocumentUpdate>(*builder.getDocumentTypeRepo(), builder.getDocumentType(), DocumentId(docId))), + UpdateContext(const vespalib::string &docId, EmptyDocBuilder &builder) : + update(std::make_shared<DocumentUpdate>(builder.get_repo(), builder.get_document_type(), DocumentId(docId))), bucketId(BucketFactory::getBucketId(update->getId())) { } @@ -464,7 +480,7 @@ TEST_F("require that heartBeat calls FeedView's heartBeat", TEST_F("require that outdated remove is ignored", FeedHandlerFixture) { - DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder); + DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder); auto op = std::make_unique<RemoveOperationWithDocId>(doc_context.bucketId, Timestamp(10), doc_context.doc->getId()); static_cast<DocumentOperation &>(*op).setPrevDbDocumentId(DbDocumentId(4)); static_cast<DocumentOperation &>(*op).setPrevTimestamp(Timestamp(10000)); @@ -476,7 +492,7 @@ TEST_F("require that outdated remove is ignored", FeedHandlerFixture) TEST_F("require that outdated put is ignored", FeedHandlerFixture) { - DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder); + DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder); auto op =std::make_unique<PutOperation>(doc_context.bucketId, Timestamp(10), std::move(doc_context.doc)); static_cast<DocumentOperation &>(*op).setPrevTimestamp(Timestamp(10000)); FeedTokenContext token_context; @@ -496,7 +512,7 @@ addLidToRemove(RemoveDocumentsOperation &op) TEST_F("require that handleMove calls FeedView", FeedHandlerFixture) { - DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder); + DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder); MoveOperation op(doc_context.bucketId, Timestamp(2), doc_context.doc, DbDocumentId(0, 2), 1); op.setDbDocumentId(DbDocumentId(1, 2)); f.runAsMaster([&]() { f.handler.handleMove(op, IDestructorCallback::SP()); }); @@ -556,7 +572,7 @@ TEST_F("require that flush cannot unprune", FeedHandlerFixture) TEST_F("require that remove of unknown document with known data type stores remove", FeedHandlerFixture) { - DocumentContext doc_context("id:test:searchdocument::foo", *f.schema.builder); + DocumentContext doc_context("id:test:searchdocument::foo", f.schema.builder); auto op = std::make_unique<RemoveOperationWithDocId>(doc_context.bucketId, Timestamp(10), doc_context.doc->getId()); FeedTokenContext token_context; f.handler.performOperation(std::move(token_context.token), std::move(op)); @@ -566,7 +582,7 @@ TEST_F("require that remove of unknown document with known data type stores remo TEST_F("require that partial update for non-existing document is tagged as such", FeedHandlerFixture) { - UpdateContext upCtx("id:test:searchdocument::foo", *f.schema.builder); + UpdateContext upCtx("id:test:searchdocument::foo", f.schema.builder); auto op = std::make_unique<UpdateOperation>(upCtx.bucketId, Timestamp(10), upCtx.update); FeedTokenContext token_context; f.handler.performOperation(std::move(token_context.token), std::move(op)); @@ -582,7 +598,7 @@ TEST_F("require that partial update for non-existing document is tagged as such" TEST_F("require that partial update for non-existing document is created if specified", FeedHandlerFixture) { f.handler.setSerialNum(15); - UpdateContext upCtx("id:test:searchdocument::foo", *f.schema.builder); + UpdateContext upCtx("id:test:searchdocument::foo", f.schema.builder); upCtx.update->setCreateIfNonExistent(true); f.feedView.metaStore.insert(upCtx.update->getId().getGlobalId(), MyDocumentMetaStore::Entry(5, 5, Timestamp(10))); auto op = std::make_unique<UpdateOperation>(upCtx.bucketId, Timestamp(10), upCtx.update); @@ -605,7 +621,7 @@ TEST_F("require that put is rejected if resource limit is reached", FeedHandlerF f.writeFilter._acceptWriteOperation = false; f.writeFilter._message = "Attribute resource limit reached"; - DocumentContext docCtx("id:test:searchdocument::foo", *f.schema.builder); + DocumentContext docCtx("id:test:searchdocument::foo", f.schema.builder); auto op = std::make_unique<PutOperation>(docCtx.bucketId, Timestamp(10), std::move(docCtx.doc)); FeedTokenContext token; f.handler.performOperation(std::move(token.token), std::move(op)); @@ -620,7 +636,7 @@ TEST_F("require that update is rejected if resource limit is reached", FeedHandl f.writeFilter._acceptWriteOperation = false; f.writeFilter._message = "Attribute resource limit reached"; - UpdateContext updCtx("id:test:searchdocument::foo", *f.schema.builder); + UpdateContext updCtx("id:test:searchdocument::foo", f.schema.builder); updCtx.addFieldUpdate("tensor"); auto op = std::make_unique<UpdateOperation>(updCtx.bucketId, Timestamp(10), updCtx.update); FeedTokenContext token; @@ -637,7 +653,7 @@ TEST_F("require that remove is NOT rejected if resource limit is reached", FeedH f.writeFilter._acceptWriteOperation = false; f.writeFilter._message = "Attribute resource limit reached"; - DocumentContext docCtx("id:test:searchdocument::foo", *f.schema.builder); + DocumentContext docCtx("id:test:searchdocument::foo", f.schema.builder); auto op = std::make_unique<RemoveOperationWithDocId>(docCtx.bucketId, Timestamp(10), docCtx.doc->getId()); FeedTokenContext token; f.handler.performOperation(std::move(token.token), std::move(op)); @@ -651,7 +667,7 @@ checkUpdate(FeedHandlerFixture &f, SchemaContext &schemaContext, const vespalib::string &fieldName, bool expectReject, bool existing) { f.handler.setSerialNum(15); - UpdateContext updCtx("id:test:searchdocument::foo", *schemaContext.builder); + UpdateContext updCtx("id:test:searchdocument::foo", schemaContext.builder); updCtx.addFieldUpdate(fieldName); if (existing) { f.feedView.metaStore.insert(updCtx.update->getId().getGlobalId(), MyDocumentMetaStore::Entry(5, 5, Timestamp(9))); @@ -733,7 +749,7 @@ TEST_F("require that tensor update with wrong tensor type fails", FeedHandlerFix TEST_F("require that put with different document type repo is ok", FeedHandlerFixture) { TwoFieldsSchemaContext schema; - DocumentContext doc_context("id:ns:searchdocument::foo", *schema.builder); + DocumentContext doc_context("id:ns:searchdocument::foo", schema.builder); auto op = std::make_unique<PutOperation>(doc_context.bucketId, Timestamp(10), std::move(doc_context.doc)); FeedTokenContext token_context; @@ -747,7 +763,7 @@ TEST_F("require that put with different document type repo is ok", FeedHandlerFi TEST_F("require that feed stats are updated", FeedHandlerFixture) { - DocumentContext doc_context("id:ns:searchdocument::foo", *f.schema.builder); + DocumentContext doc_context("id:ns:searchdocument::foo", f.schema.builder); auto op =std::make_unique<PutOperation>(doc_context.bucketId, Timestamp(10), std::move(doc_context.doc)); FeedTokenContext token_context; f.handler.performOperation(std::move(token_context.token), std::move(op)); diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp index 9c68d7d5974..b3a2e9cad83 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.cpp @@ -127,7 +127,8 @@ MyHandler::handleCompactLidSpace(const CompactLidSpaceOperation &op, std::shared } MyHandler::MyHandler(bool storeMoveDoneContexts, bool bucketIdEqualLid) - : _stats(), + : _builder(), + _stats(), _moveFromLid(0), _moveToLid(0), _handleMoveCnt(0), @@ -140,9 +141,8 @@ MyHandler::MyHandler(bool storeMoveDoneContexts, bool bucketIdEqualLid) _rm_listener(), _docs() { - DocBuilder builder = DocBuilder(Schema()); for (uint32_t i(0); i < 10; i++) { - auto doc = builder.startDocument(fmt("%s%d", DOC_ID.c_str(), i)).endDocument(); + auto doc = _builder.make_document(fmt("%s%d", DOC_ID.c_str(), i)); _docs.emplace_back(DocumentMetaData(i, TIMESTAMP_1, createBucketId(i), doc->getId().getGlobalId()), std::move(doc)); } } diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h index b404fc6956a..806729a108c 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_common.h @@ -17,11 +17,14 @@ #include <vespa/searchcore/proton/test/test.h> #include <vespa/searchcore/proton/test/dummy_document_store.h> #include <vespa/vespalib/util/idestructorcallback.h> -#include <vespa/searchlib/index/docbuilder.h> -using namespace document; +using document::BucketId; +using document::GlobalId; +using document::Document; +using document::DocumentId; +using document::DocumentTypeRepo; using namespace proton; -using namespace search::index; +using search::index::EmptyDocBuilder; using namespace search; using namespace vespalib; using vespalib::IDestructorCallback; @@ -60,6 +63,7 @@ struct MyScanIterator : public IDocumentScanIterator { }; struct MyHandler : public ILidSpaceCompactionHandler { + EmptyDocBuilder _builder; std::vector<LidUsageStats> _stats; std::vector<LidVector> _lids; mutable uint32_t _moveFromLid; diff --git a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp index bc9cd9a93fa..fd38853dca1 100644 --- a/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp +++ b/searchcore/src/tests/proton/documentdb/lid_space_compaction/lid_space_handler_test.cpp @@ -5,7 +5,7 @@ #include <vespa/vespalib/gtest/gtest.h> struct HandlerTest : public ::testing::Test { - DocBuilder _docBuilder; + EmptyDocBuilder _docBuilder; std::shared_ptr<bucketdb::BucketDBOwner> _bucketDB; MyDocumentStore _docStore; MySubDb _subDb; @@ -15,13 +15,13 @@ struct HandlerTest : public ::testing::Test { }; HandlerTest::HandlerTest() - : _docBuilder(Schema()), + : _docBuilder(), _bucketDB(std::make_shared<bucketdb::BucketDBOwner>()), _docStore(), - _subDb(_bucketDB, _docStore, _docBuilder.getDocumentTypeRepo()), + _subDb(_bucketDB, _docStore, _docBuilder.get_repo_sp()), _handler(_subDb.maintenance_sub_db, "test") { - _docStore._readDoc = _docBuilder.startDocument(DOC_ID).endDocument(); + _docStore._readDoc = _docBuilder.make_document(DOC_ID); } HandlerTest::~HandlerTest() = default; diff --git a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp index ea4d556c502..915402122b8 100644 --- a/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp +++ b/searchcore/src/tests/proton/documentdb/maintenancecontroller/maintenancecontroller_test.cpp @@ -35,7 +35,6 @@ #include <vespa/searchcore/proton/test/test.h> #include <vespa/searchcore/proton/test/transport_helper.h> #include <vespa/searchlib/common/idocumentmetastore.h> -#include <vespa/searchlib/index/docbuilder.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/util/destructor_callbacks.h> @@ -99,11 +98,11 @@ class MyDocumentSubDB uint32_t _subDBId; DocumentMetaStore::SP _metaStoreSP; DocumentMetaStore & _metaStore; - const std::shared_ptr<const document::DocumentTypeRepo> &_repo; + std::shared_ptr<const document::DocumentTypeRepo> _repo; const DocTypeName &_docTypeName; public: - MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, const std::shared_ptr<const document::DocumentTypeRepo> &repo, + MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, std::shared_ptr<const document::DocumentTypeRepo> repo, std::shared_ptr<bucketdb::BucketDBOwner> bucketDB, const DocTypeName &docTypeName); ~MyDocumentSubDB(); @@ -136,7 +135,7 @@ public: const IDocumentMetaStore &getMetaStore() const { return _metaStore; } }; -MyDocumentSubDB::MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, const std::shared_ptr<const document::DocumentTypeRepo> &repo, +MyDocumentSubDB::MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, std::shared_ptr<const document::DocumentTypeRepo> repo, std::shared_ptr<bucketdb::BucketDBOwner> bucketDB, const DocTypeName &docTypeName) : _docs(), _subDBId(subDBId), @@ -144,7 +143,7 @@ MyDocumentSubDB::MyDocumentSubDB(uint32_t subDBId, SubDbType subDbType, const st std::move(bucketDB), DocumentMetaStore::getFixedName(), search::GrowStrategy(), subDbType)), _metaStore(*_metaStoreSP), - _repo(repo), + _repo(std::move(repo)), _docTypeName(docTypeName) { _metaStore.constructFreeList(); diff --git a/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp b/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp index 00694b6b78f..67342df5613 100644 --- a/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp +++ b/searchcore/src/tests/proton/documentdb/storeonlyfeedview/storeonlyfeedview_test.cpp @@ -2,6 +2,7 @@ #include <vespa/document/base/documentid.h> #include <vespa/document/datatype/datatype.h> +#include <vespa/document/fieldvalue/document.h> #include <vespa/searchcommon/common/schema.h> #include <vespa/searchcore/proton/server/putdonecontext.h> #include <vespa/searchcore/proton/server/removedonecontext.h> @@ -13,7 +14,7 @@ #include <vespa/searchcore/proton/test/mock_summary_adapter.h> #include <vespa/searchcore/proton/test/transport_helper.h> #include <vespa/searchcore/proton/test/thread_utils.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/vespalib/util/destructor_callbacks.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/testkit/testapp.h> @@ -32,7 +33,7 @@ using namespace proton; using search::DocumentIdT; using vespalib::IDestructorCallback; using search::SerialNum; -using search::index::DocBuilder; +using search::index::EmptyDocBuilder; using search::index::Schema; using storage::spi::Timestamp; using vespalib::make_string; @@ -59,9 +60,8 @@ public: }; std::shared_ptr<const DocumentTypeRepo> myGetDocumentTypeRepo() { - Schema schema; - DocBuilder builder(schema); - std::shared_ptr<const DocumentTypeRepo> repo = builder.getDocumentTypeRepo(); + EmptyDocBuilder builder; + std::shared_ptr<const DocumentTypeRepo> repo = builder.get_repo_sp(); ASSERT_TRUE(repo.get()); return repo; } diff --git a/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp b/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp index 157d10e6652..8d8674da4f0 100644 --- a/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/lid_allocator/lid_allocator_test.cpp @@ -66,8 +66,8 @@ protected: _allocator.holdLids(lids, get_size(), 0); } - void trim_hold_lists() { - _allocator.trimHoldLists(1); + void reclaim_memory() { + _allocator.reclaim_memory(1); } std::vector<uint32_t> get_valid_lids() { @@ -117,7 +117,7 @@ TEST_F(LidAllocatorTest, unregister_lids) assert_valid_lids({2, 4, 6}); assert_active_lids({4, 6}); hold_lids({1, 3, 5}); - trim_hold_lists(); + reclaim_memory(); EXPECT_EQ((std::vector<uint32_t>{1, 3, 5, 7, 8}), alloc_lids(5)); } diff --git a/searchcore/src/tests/proton/documentmetastore/lid_state_vector/lid_state_vector_test.cpp b/searchcore/src/tests/proton/documentmetastore/lid_state_vector/lid_state_vector_test.cpp index 68958bbe3e4..cbc11126b25 100644 --- a/searchcore/src/tests/proton/documentmetastore/lid_state_vector/lid_state_vector_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/lid_state_vector/lid_state_vector_test.cpp @@ -47,7 +47,7 @@ TEST_F(LidStateVectorTest, basic_free_list_is_working) EXPECT_EQ(0u, freeLids.count()); EXPECT_EQ(3u, list.size()); - list.trimHoldLists(20, freeLids); + list.reclaim_memory(20, freeLids); EXPECT_FALSE(freeLids.empty()); EXPECT_EQ(1u, freeLids.count()); @@ -57,7 +57,7 @@ TEST_F(LidStateVectorTest, basic_free_list_is_working) EXPECT_EQ(0u, freeLids.count()); EXPECT_EQ(2u, list.size()); - list.trimHoldLists(31, freeLids); + list.reclaim_memory(31, freeLids); EXPECT_FALSE(freeLids.empty()); EXPECT_EQ(2u, freeLids.count()); diff --git a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp index ac540ad2e2d..49f13d8c5b5 100644 --- a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp +++ b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp @@ -3,6 +3,8 @@ #include <vespa/document/datatype/datatype.h> #include <vespa/document/fieldvalue/document.h> #include <vespa/document/fieldvalue/fieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/common/documentsummary.h> #include <vespa/vespalib/util/sequencedtaskexecutor.h> #include <vespa/searchlib/common/flush_token.h> @@ -10,8 +12,9 @@ #include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/fef/fef.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/memory_index.h> #include <vespa/searchlib/test/index/mock_field_length_inspector.h> #include <vespa/searchlib/query/base.h> @@ -31,6 +34,7 @@ LOG_SETUP("feed_and_search_test"); using document::DataType; using document::Document; using document::FieldValue; +using document::StringFieldValue; using search::DocumentIdT; using search::FlushToken; using search::TuneFileIndexing; @@ -44,9 +48,10 @@ using search::fef::MatchData; using search::fef::MatchDataLayout; using search::fef::TermFieldHandle; using search::fef::TermFieldMatchData; -using search::index::DocBuilder; +using search::index::EmptyDocBuilder; using search::index::DummyFileHeaderContext; using search::index::Schema; +using search::index::StringFieldBuilder; using search::index::test::MockFieldLengthInspector; using search::memoryindex::MemoryIndex; using search::query::SimpleStringTerm; @@ -113,14 +118,13 @@ Schema getSchema() { return schema; } -Document::UP buildDocument(DocBuilder & doc_builder, int id, +Document::UP buildDocument(EmptyDocBuilder & doc_builder, int id, const string &word) { ostringstream ost; ost << "id:ns:searchdocument::" << id; - doc_builder.startDocument(ost.str()); - doc_builder.startIndexField(field_name) - .addStr(noise).addStr(word).endField(); - return doc_builder.endDocument(); + auto doc = doc_builder.make_document(ost.str()); + doc->setValue(field_name, StringFieldBuilder(doc_builder).word(noise).space().word(word).build()); + return doc; } // Performs a search using a Searchable. @@ -165,7 +169,7 @@ void Test::requireThatMemoryIndexCanBeDumpedAndSearched() { auto indexFieldInverter = vespalib::SequencedTaskExecutor::create(invert_executor, 2); auto indexFieldWriter = vespalib::SequencedTaskExecutor::create(write_executor, 2); MemoryIndex memory_index(schema, MockFieldLengthInspector(), *indexFieldInverter, *indexFieldWriter); - DocBuilder doc_builder(schema); + EmptyDocBuilder doc_builder([](auto& header) { header.addField(field_name, DataType::T_STRING); }); Document::UP doc = buildDocument(doc_builder, doc_id1, word1); memory_index.insertDocument(doc_id1, *doc, {}); diff --git a/searchcore/src/tests/proton/index/fusionrunner_test.cpp b/searchcore/src/tests/proton/index/fusionrunner_test.cpp index 850f8a8f0d1..166d34f366b 100644 --- a/searchcore/src/tests/proton/index/fusionrunner_test.cpp +++ b/searchcore/src/tests/proton/index/fusionrunner_test.cpp @@ -1,15 +1,19 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchcorespi/index/fusionrunner.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchcore/proton/index/indexmanager.h> #include <vespa/searchcore/proton/test/transport_helper.h> -#include <vespa/searchcorespi/index/fusionrunner.h> #include <vespa/vespalib/util/isequencedtaskexecutor.h> #include <vespa/searchlib/common/flush_token.h> #include <vespa/searchlib/diskindex/diskindex.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/fef/matchdatalayout.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/memory_index.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/test/index/mock_field_length_inspector.h> @@ -25,6 +29,7 @@ using document::Document; using document::FieldValue; +using document::StringFieldValue; using proton::ExecutorThreadingService; using proton::index::IndexManager; using search::FixedSourceSelector; @@ -38,9 +43,10 @@ using search::fef::MatchData; using search::fef::MatchDataLayout; using search::fef::TermFieldHandle; using search::fef::TermFieldMatchData; -using search::index::DocBuilder; +using search::index::EmptyDocBuilder; using search::index::DummyFileHeaderContext; using search::index::Schema; +using search::index::StringFieldBuilder; using search::index::schema::DataType; using search::index::test::MockFieldLengthInspector; using search::memoryindex::MemoryIndex; @@ -149,15 +155,15 @@ void Test::tearDown() { _selector.reset(0); } -Document::UP buildDocument(DocBuilder & doc_builder, int id, const string &word) { +Document::UP buildDocument(EmptyDocBuilder & doc_builder, int id, const string &word) { vespalib::asciistream ost; ost << "id:ns:searchdocument::" << id; - doc_builder.startDocument(ost.str()); - doc_builder.startIndexField(field_name).addStr(word).endField(); - return doc_builder.endDocument(); + auto doc = doc_builder.make_document(ost.str()); + doc->setValue(field_name, StringFieldBuilder(doc_builder).word(word).build()); + return doc; } -void addDocument(DocBuilder & doc_builder, MemoryIndex &index, ISourceSelector &selector, +void addDocument(EmptyDocBuilder & doc_builder, MemoryIndex &index, ISourceSelector &selector, uint8_t index_id, uint32_t docid, const string &word) { Document::UP doc = buildDocument(doc_builder, docid, word); index.insertDocument(docid, *doc, {}); @@ -181,7 +187,7 @@ void Test::createIndex(const string &dir, uint32_t id, bool fusion) { _selector->setDefaultSource(id - _selector->getBaseId()); Schema schema = getSchema(); - DocBuilder doc_builder(schema); + EmptyDocBuilder doc_builder([](auto& header) { header.addField(field_name, document::DataType::T_STRING); }); MemoryIndex memory_index(schema, MockFieldLengthInspector(), _service.write().indexFieldInverter(), _service.write().indexFieldWriter()); diff --git a/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp b/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp index 75e6b01b46f..7202d7f0abe 100644 --- a/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp +++ b/searchcore/src/tests/proton/index/index_writer/index_writer_test.cpp @@ -1,10 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> - #include <vespa/searchcore/proton/index/index_writer.h> +#include <vespa/document/fieldvalue/document.h> #include <vespa/searchcore/proton/test/mock_index_manager.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/stringfmt.h> + #include <vespa/log/log.h> LOG_SETUP("index_writer_test"); @@ -80,21 +82,18 @@ struct Fixture IIndexManager::SP iim; MyIndexManager &mim; IndexWriter iw; - Schema schema; - DocBuilder builder; + EmptyDocBuilder builder; Document::UP dummyDoc; Fixture() : iim(new MyIndexManager()), mim(static_cast<MyIndexManager &>(*iim)), iw(iim), - schema(), - builder(schema), + builder(), dummyDoc(createDoc(1234)) // This content of this is not used { } Document::UP createDoc(uint32_t lid) { - builder.startDocument(vespalib::make_string("id:ns:searchdocument::%u", lid)); - return builder.endDocument(); + return builder.make_document(vespalib::make_string("id:ns:searchdocument::%u", lid)); } void put(SerialNum serialNum, const search::DocumentIdT lid) { iw.put(serialNum, *dummyDoc, lid, {}); diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp index b427daa4ad1..886978f7465 100644 --- a/searchcore/src/tests/proton/index/indexmanager_test.cpp +++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp @@ -1,6 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/searchcore/proton/index/indexmanager.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/document/fieldvalue/document.h> #include <vespa/searchcore/proton/test/transport_helper.h> #include <vespa/searchcorespi/index/index_manager_stats.h> #include <vespa/searchcorespi/index/indexcollection.h> @@ -9,8 +13,9 @@ #include <vespa/vespalib/util/sequencedtaskexecutor.h> #include <vespa/searchlib/common/flush_token.h> #include <vespa/searchlib/common/serialnum.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/compact_words_store.h> #include <vespa/searchlib/memoryindex/document_inverter.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> @@ -34,6 +39,7 @@ LOG_SETUP("indexmanager_test"); using document::Document; using document::FieldValue; +using document::StringFieldValue; using proton::index::IndexConfig; using proton::index::IndexManager; using vespalib::SequencedTaskExecutor; @@ -42,10 +48,11 @@ using search::TuneFileAttributes; using search::TuneFileIndexManager; using search::TuneFileIndexing; using vespalib::datastore::EntryRef; -using search::index::DocBuilder; +using search::index::EmptyDocBuilder; using search::index::DummyFileHeaderContext; using search::index::FieldLengthInfo; using search::index::Schema; +using search::index::StringFieldBuilder; using search::index::schema::DataType; using search::index::test::MockFieldLengthInspector; using search::memoryindex::CompactWordsStore; @@ -88,13 +95,13 @@ void removeTestData() { std::filesystem::remove_all(std::filesystem::path(index_dir)); } -Document::UP buildDocument(DocBuilder &doc_builder, int id, +Document::UP buildDocument(EmptyDocBuilder &doc_builder, int id, const string &word) { vespalib::asciistream ost; ost << "id:ns:searchdocument::" << id; - doc_builder.startDocument(ost.str()); - doc_builder.startIndexField(field_name).addStr(word).endField(); - return doc_builder.endDocument(); + auto doc = doc_builder.make_document(ost.str()); + doc->setValue(field_name, StringFieldBuilder(doc_builder).word(word).build()); + return doc; } void push_documents_and_wait(search::memoryindex::DocumentInverter &inverter) { @@ -110,7 +117,7 @@ struct IndexManagerTest : public ::testing::Test { TransportAndExecutorService _service; std::unique_ptr<IndexManager> _index_manager; Schema _schema; - DocBuilder _builder; + EmptyDocBuilder _builder; IndexManagerTest() : _serial_num(0), @@ -119,7 +126,7 @@ struct IndexManagerTest : public ::testing::Test { _service(1), _index_manager(), _schema(getSchema()), - _builder(_schema) + _builder([](auto& header) { header.addField(field_name, document::DataType::T_STRING); }) { removeTestData(); std::filesystem::create_directory(std::filesystem::path(index_dir)); diff --git a/searchcore/src/tests/proton/reference/gid_to_lid_mapper/gid_to_lid_mapper_test.cpp b/searchcore/src/tests/proton/reference/gid_to_lid_mapper/gid_to_lid_mapper_test.cpp index 2f4c26094c7..5152d09fae5 100644 --- a/searchcore/src/tests/proton/reference/gid_to_lid_mapper/gid_to_lid_mapper_test.cpp +++ b/searchcore/src/tests/proton/reference/gid_to_lid_mapper/gid_to_lid_mapper_test.cpp @@ -126,11 +126,11 @@ struct Fixture return std::make_shared<GidToLidMapperFactory>(_dmsContext); } - void assertGenerations(generation_t currentGeneration, generation_t firstUsedGeneration) + void assertGenerations(generation_t currentGeneration, generation_t oldest_used_generation) { const GenerationHandler &handler = _dms->getGenerationHandler(); EXPECT_EQUAL(currentGeneration, handler.getCurrentGeneration()); - EXPECT_EQUAL(firstUsedGeneration, handler.getFirstUsedGeneration()); + EXPECT_EQUAL(oldest_used_generation, handler.get_oldest_used_generation()); } template <typename Function> diff --git a/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp b/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp index da645f9a94b..719e762288e 100644 --- a/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp +++ b/searchcore/src/tests/proton/reprocessing/document_reprocessing_handler/document_reprocessing_handler_test.cpp @@ -3,7 +3,7 @@ LOG_SETUP("document_reprocessing_handler_test"); #include <vespa/searchcore/proton/reprocessing/document_reprocessing_handler.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/vespalib/testkit/testapp.h> using namespace document; @@ -32,17 +32,17 @@ const vespalib::string DOC_ID = "id:test:searchdocument::0"; struct FixtureBase { DocumentReprocessingHandler _handler; - DocBuilder _docBuilder; + EmptyDocBuilder _docBuilder; FixtureBase(uint32_t docIdLimit); ~FixtureBase(); std::shared_ptr<Document> createDoc() { - return _docBuilder.startDocument(DOC_ID).endDocument(); + return _docBuilder.make_document(DOC_ID); } }; FixtureBase::FixtureBase(uint32_t docIdLimit) : _handler(docIdLimit), - _docBuilder(Schema()) + _docBuilder() { } FixtureBase::~FixtureBase() {} diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp index eeb8cafb859..c153f873480 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp @@ -47,7 +47,7 @@ convertStatusToSlime(const Status &status, Cursor &object) void convertGenerationToSlime(const AttributeVector &attr, Cursor &object) { - object.setLong("firstUsed", attr.getFirstUsedGeneration()); + object.setLong("oldest_used", attr.get_oldest_used_generation()); object.setLong("current", attr.getCurrentGeneration()); } diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp index 1853e939d42..ead9f142508 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp @@ -241,20 +241,20 @@ DocumentMetaStore::onUpdateStat() } void -DocumentMetaStore::onGenerationChange(generation_t generation) +DocumentMetaStore::before_inc_generation(generation_t current_gen) { _gidToLidMap.getAllocator().freeze(); - _gidToLidMap.getAllocator().transferHoldLists(generation - 1); - getGenerationHolder().assign_generation(generation - 1); + _gidToLidMap.getAllocator().assign_generation(current_gen); + getGenerationHolder().assign_generation(current_gen); updateStat(false); } void -DocumentMetaStore::removeOldGenerations(generation_t firstUsed) +DocumentMetaStore::reclaim_memory(generation_t oldest_used_gen) { - _gidToLidMap.getAllocator().trimHoldLists(firstUsed); - _lidAlloc.trimHoldLists(firstUsed); - getGenerationHolder().reclaim(firstUsed); + _gidToLidMap.getAllocator().reclaim_memory(oldest_used_gen); + _lidAlloc.reclaim_memory(oldest_used_gen); + getGenerationHolder().reclaim(oldest_used_gen); } std::unique_ptr<search::AttributeSaver> @@ -318,7 +318,7 @@ DocumentMetaStore::onLoad(vespalib::Executor *) _gidToLidMap.assign(treeBuilder); _gidToLidMap.getAllocator().freeze(); // create initial frozen tree generation_t generation = getGenerationHandler().getCurrentGeneration(); - _gidToLidMap.getAllocator().transferHoldLists(generation); + _gidToLidMap.getAllocator().assign_generation(generation); setNumDocs(_metaDataStore.size()); setCommittedDocIdLimit(_metaDataStore.size()); @@ -433,7 +433,7 @@ DocumentMetaStore::DocumentMetaStore(BucketDBOwnerSP bucketDB, setCommittedDocIdLimit(1u); // lid 0 is reserved _gidToLidMap.getAllocator().freeze(); // create initial frozen tree generation_t generation = getGenerationHandler().getCurrentGeneration(); - _gidToLidMap.getAllocator().transferHoldLists(generation); + _gidToLidMap.getAllocator().assign_generation(generation); updateStat(true); } diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h index c4010a07709..2dc85f07cf4 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h @@ -93,8 +93,8 @@ private: void onUpdateStat() override; // Implements AttributeVector - void onGenerationChange(generation_t generation) override; - void removeOldGenerations(generation_t firstUsed) override; + void before_inc_generation(generation_t current_gen) override; + void reclaim_memory(generation_t oldest_used_gen) override; std::unique_ptr<search::AttributeSaver> onInitSave(vespalib::stringref fileName) override; bool onLoad(vespalib::Executor *executor) override; diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.h index 6118701b0dc..95a8cf85279 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_allocator.h @@ -41,8 +41,8 @@ public: void unregisterLid(DocId lid); void unregister_lids(const std::vector<DocId>& lids); size_t getUsedLidsSize() const { return _usedLids.byteSize(); } - void trimHoldLists(generation_t firstUsed) { - _holdLids.trimHoldLists(firstUsed, _freeLids); + void reclaim_memory(generation_t oldest_used_gen) { + _holdLids.reclaim_memory(oldest_used_gen, _freeLids); } void moveLidBegin(DocId fromLid, DocId toLid); void moveLidEnd(DocId fromLid, DocId toLid); diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.cpp index 7157a40c5d5..ef0a244fc37 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.cpp @@ -23,9 +23,9 @@ LidHoldList::clear() { } void -LidHoldList::trimHoldLists(generation_t firstUsed, LidStateVector &freeLids) +LidHoldList::reclaim_memory(generation_t oldest_used_gen, LidStateVector &freeLids) { - while (!_holdList.empty() && _holdList.front().second < firstUsed) { + while (!_holdList.empty() && _holdList.front().second < oldest_used_gen) { uint32_t lid = _holdList.front().first; freeLids.setBit(lid); _holdList.pop_front(); diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.h index fc32fcb7510..565d8bf25e1 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/lid_hold_list.h @@ -43,9 +43,9 @@ public: void clear(); /** - * Frees up elements with generation < first used generation for reuse. + * Frees up elements with generation < oldest used generation for reuse. **/ - void trimHoldLists(generation_t firstUsed, LidStateVector &freeLids); + void reclaim_memory(generation_t oldest_used_gen, LidStateVector &freeLids); }; diff --git a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp index 2cdf1c45485..f9f98705144 100644 --- a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp +++ b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.cpp @@ -5,8 +5,7 @@ namespace proton::test { UserDocumentsBuilder::UserDocumentsBuilder() - : _schema(), - _builder(_schema), + : _builder(), _docs() { } @@ -17,7 +16,7 @@ UserDocumentsBuilder & UserDocumentsBuilder::createDoc(uint32_t userId, search::DocumentIdT lid) { vespalib::string docId = vespalib::make_string("id:test:searchdocument:n=%u:%u", userId, lid); - document::Document::SP doc(_builder.startDocument(docId).endDocument().release()); + document::Document::SP doc(_builder.make_document(docId)); _docs.addDoc(userId, Document(doc, lid, storage::spi::Timestamp(lid))); return *this; } diff --git a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h index f05b6da11de..9e806c8a0bf 100644 --- a/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h +++ b/searchcore/src/vespa/searchcore/proton/test/userdocumentsbuilder.h @@ -2,7 +2,7 @@ #pragma once #include "userdocuments.h" -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/vespalib/util/stringfmt.h> namespace proton::test { @@ -13,14 +13,13 @@ namespace proton::test { class UserDocumentsBuilder { private: - search::index::Schema _schema; - search::index::DocBuilder _builder; + search::index::EmptyDocBuilder _builder; UserDocuments _docs; public: UserDocumentsBuilder(); ~UserDocumentsBuilder(); - const std::shared_ptr<const document::DocumentTypeRepo> &getRepo() const { - return _builder.getDocumentTypeRepo(); + std::shared_ptr<const document::DocumentTypeRepo> getRepo() const { + return _builder.get_repo_sp(); } UserDocumentsBuilder &createDoc(uint32_t userId, search::DocumentIdT lid); UserDocumentsBuilder &createDocs(uint32_t userId, search::DocumentIdT begin, diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 62aca6d68cc..c8b3db42340 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -168,9 +168,8 @@ vespa_define_module( src/tests/grouping src/tests/groupingengine src/tests/hitcollector - src/tests/index/docbuilder - src/tests/index/doctypebuilder src/tests/index/field_length_calculator + src/tests/index/string_field_builder src/tests/indexmetainfo src/tests/ld-library-path src/tests/memoryindex/compact_words_store diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp index 7f0a88c9f86..3fa74b78d2a 100644 --- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -38,8 +38,7 @@ public: generation_t getGen() const { return getCurrentGeneration(); } uint32_t getRefCount(generation_t gen) const { return getGenerationRefCount(gen); } void incGen() { incGeneration(); } - void updateFirstUsedGen() { updateFirstUsedGeneration(); } - generation_t getFirstUsedGen() const { return getFirstUsedGeneration(); } + generation_t oldest_used_gen() const { return get_oldest_used_generation(); } }; @@ -49,35 +48,35 @@ TEST("Test attribute guards") TestAttribute * v = static_cast<TestAttribute *> (vec.get()); EXPECT_EQUAL(v->getGen(), unsigned(0)); EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); { AttributeGuard g0(vec); EXPECT_EQUAL(v->getGen(), unsigned(0)); EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); { AttributeGuard g1(vec); EXPECT_EQUAL(v->getGen(), unsigned(0)); EXPECT_EQUAL(v->getRefCount(0), unsigned(2)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0)); v->incGen(); EXPECT_EQUAL(v->getGen(), unsigned(1)); EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); { AttributeGuard g0(vec); EXPECT_EQUAL(v->getGen(), unsigned(1)); EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); { v->incGen(); AttributeGuard g1(vec); @@ -85,19 +84,19 @@ TEST("Test attribute guards") EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); EXPECT_EQUAL(v->getRefCount(2), unsigned(1)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); } EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); - v->updateFirstUsedGeneration(); - EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2)); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1)); + v->update_oldest_used_generation(); + EXPECT_EQUAL(v->oldest_used_gen(), unsigned(2)); EXPECT_EQUAL(v->getGen(), unsigned(2)); } diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp index e27065f1c25..b89a3827cc2 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp @@ -135,7 +135,7 @@ DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest() _postings.clear(tree); } _postings.clearBuilder(); - _postings.clearHoldLists(); + _postings.reclaim_all_memory(); inc_generation(); } @@ -143,10 +143,10 @@ void DocumentWeightOrFilterSearchTest::inc_generation() { _postings.freeze(); - _postings.transferHoldLists(_gens.getCurrentGeneration()); + _postings.assign_generation(_gens.getCurrentGeneration()); _gens.incGeneration(); - _gens.updateFirstUsedGeneration(); - _postings.trimHoldLists(_gens.getFirstUsedGeneration()); + _gens.update_oldest_used_generation(); + _postings.reclaim_memory(_gens.get_oldest_used_generation()); } TEST_F(DocumentWeightOrFilterSearchTest, daat_or) diff --git a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp index 1d76473754f..9d717202551 100644 --- a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp +++ b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp @@ -147,9 +147,9 @@ TEST("requireThatComparatorWithTreeIsWorking") EXPECT_EQUAL(101, exp); t.clear(m); m.freeze(); - m.transferHoldLists(g.getCurrentGeneration()); + m.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - m.trimHoldLists(g.getFirstUsedGeneration()); + m.reclaim_memory(g.get_oldest_used_generation()); } TEST("requireThatFoldedLessIsWorking") diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 02ff01043b0..0542a253cc5 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -345,8 +345,8 @@ TEST(EnumStoreTest, test_hold_lists_and_generation) // check readers again checkReaders(ses, readers); - ses.transfer_hold_lists(sesGen); - ses.trim_hold_lists(sesGen + 1); + ses.assign_generation(sesGen); + ses.reclaim_memory(sesGen + 1); } void @@ -357,8 +357,8 @@ dec_ref_count(NumericEnumStore& store, NumericEnumStore::Index idx) updater.commit(); generation_t gen = 5; - store.transfer_hold_lists(gen); - store.trim_hold_lists(gen + 1); + store.assign_generation(gen); + store.reclaim_memory(gen + 1); } TEST(EnumStoreTest, address_space_usage_is_reported) @@ -882,9 +882,9 @@ namespace { void inc_generation(generation_t &gen, NumericEnumStore &store) { store.freeze_dictionary(); - store.transfer_hold_lists(gen); + store.assign_generation(gen); ++gen; - store.trim_hold_lists(gen); + store.reclaim_memory(gen); } } diff --git a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp index b9f3c23213e..0d2ce048111 100644 --- a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp +++ b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp @@ -73,14 +73,14 @@ TEST_F("makeReadGuard(false) acquires guards on both target and reference attrib EXPECT_EQUAL(2u, f.target_attr->getCurrentGeneration()); EXPECT_EQUAL(2u, f.reference_attr->getCurrentGeneration()); // Should still be holding guard for first generation of writes for both attributes - EXPECT_EQUAL(1u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(1u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(1u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(1u, f.reference_attr->get_oldest_used_generation()); } // Force a generation handler update add_n_docs_with_undefined_values(*f.reference_attr, 1); add_n_docs_with_undefined_values(*f.target_attr, 1); - EXPECT_EQUAL(3u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(3u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(3u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(3u, f.reference_attr->get_oldest_used_generation()); } TEST_F("makeReadGuard(true) acquires enum guard on target and regular guard on reference attribute", Fixture) { @@ -95,15 +95,15 @@ TEST_F("makeReadGuard(true) acquires enum guard on target and regular guard on r EXPECT_EQUAL(5u, f.target_attr->getCurrentGeneration()); EXPECT_EQUAL(2u, f.reference_attr->getCurrentGeneration()); - EXPECT_EQUAL(3u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(1u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(3u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(1u, f.reference_attr->get_oldest_used_generation()); EXPECT_TRUE(has_active_enum_guards(*f.target_attr)); } // Force a generation handler update add_n_docs_with_undefined_values(*f.reference_attr, 1); add_n_docs_with_undefined_values(*f.target_attr, 1); - EXPECT_EQUAL(7u, f.target_attr->getFirstUsedGeneration()); - EXPECT_EQUAL(3u, f.reference_attr->getFirstUsedGeneration()); + EXPECT_EQUAL(7u, f.target_attr->get_oldest_used_generation()); + EXPECT_EQUAL(3u, f.reference_attr->get_oldest_used_generation()); EXPECT_FALSE(has_active_enum_guards(*f.target_attr)); } diff --git a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp index 735ebcff6cf..8b8f4d2c4d4 100644 --- a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp +++ b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp @@ -41,11 +41,11 @@ class MyAttribute : public search::NotImplementedAttribute _mvMapping.shrink(committedDocIdLimit); setNumDocs(committedDocIdLimit); } - virtual void removeOldGenerations(generation_t firstUsed) override { - _mvMapping.trimHoldLists(firstUsed); + virtual void reclaim_memory(generation_t oldest_used_gen) override { + _mvMapping.reclaim_memory(oldest_used_gen); } - virtual void onGenerationChange(generation_t generation) override { - _mvMapping.transferHoldLists(generation - 1); + virtual void before_inc_generation(generation_t current_gen) override { + _mvMapping.assign_generation(current_gen); } public: @@ -115,8 +115,8 @@ public: ConstArrayRef act = get(docId); EXPECT_EQ(exp, std::vector<EntryT>(act.cbegin(), act.cend())); } - void transferHoldLists(generation_t generation) { _mvMapping->transferHoldLists(generation); } - void trimHoldLists(generation_t firstUsed) { _mvMapping->trimHoldLists(firstUsed); } + void assign_generation(generation_t current_gen) { _mvMapping->assign_generation(current_gen); } + void reclaim_memory(generation_t oldest_used_gen) { _mvMapping->reclaim_memory(oldest_used_gen); } void addDocs(uint32_t numDocs) { for (uint32_t i = 0; i < numDocs; ++i) { uint32_t doc = 0; @@ -245,12 +245,12 @@ TEST_F(IntMappingTest, test_that_old_value_is_not_overwritten_while_held) auto old3 = get(3); assertArray({5}, old3); set(3, {7}); - transferHoldLists(10); + assign_generation(10); assertArray({5}, old3); assertGet(3, {7}); - trimHoldLists(10); + reclaim_memory(10); assertArray({5}, old3); - trimHoldLists(11); + reclaim_memory(11); assertArray({0}, old3); } diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp index 36babec6a89..75e7faf0227 100644 --- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp +++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp @@ -64,11 +64,11 @@ protected: { _value_store.freeze_dictionary(); _store.freeze(); - _value_store.transfer_hold_lists(_gen_handler.getCurrentGeneration()); - _store.transferHoldLists(_gen_handler.getCurrentGeneration()); + _value_store.assign_generation(_gen_handler.getCurrentGeneration()); + _store.assign_generation(_gen_handler.getCurrentGeneration()); _gen_handler.incGeneration(); - _value_store.trim_hold_lists(_gen_handler.getFirstUsedGeneration()); - _store.trimHoldLists(_gen_handler.getFirstUsedGeneration()); + _value_store.reclaim_memory(_gen_handler.get_oldest_used_generation()); + _store.reclaim_memory(_gen_handler.get_oldest_used_generation()); } EntryRef add_sequence(int start_key, int end_key) diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp index 54efb3261c8..1eed3a015e1 100644 --- a/searchlib/src/tests/attribute/postinglist/postinglist.cpp +++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp @@ -201,7 +201,7 @@ private: PostingListNodeAllocator &postingsAlloc); void - removeOldGenerations(Tree &tree, + reclaim_memory(Tree &tree, ValueHandle &valueHandle, PostingList &postings, PostingListNodeAllocator &postingsAlloc); @@ -259,12 +259,12 @@ AttributePostingListTest::freeTree(bool verbose) static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); _intNodeAlloc->freeze(); _intPostings->freeze(); - _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + _intNodeAlloc->assign_generation(_handler.getCurrentGeneration()); _intPostings->clearBuilder(); - _intPostings->transferHoldLists(_handler.getCurrentGeneration()); + _intPostings->assign_generation(_handler.getCurrentGeneration()); _handler.incGeneration(); - _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration()); - _intPostings->trimHoldLists(_handler.getFirstUsedGeneration()); + _intNodeAlloc->reclaim_memory(_handler.get_oldest_used_generation()); + _intPostings->reclaim_memory(_handler.get_oldest_used_generation()); LOG(info, "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), @@ -613,9 +613,9 @@ AttributePostingListTest::doCompactEnumStore(Tree &tree, valueHandle.holdBuffer(*it); } generation_t generation = _handler.getCurrentGeneration(); - valueHandle.transferHoldLists(generation); + valueHandle.assign_generation(generation); _handler.incGeneration(); - valueHandle.trimHoldLists(_handler.getFirstUsedGeneration()); + valueHandle.reclaim_memory(_handler.get_oldest_used_generation()); LOG(info, "doCompactEnumStore done"); @@ -658,22 +658,22 @@ bumpGeneration(Tree &tree, (void) tree; (void) valueHandle; postingsAlloc.freeze(); - postingsAlloc.transferHoldLists(_handler.getCurrentGeneration()); - postings.transferHoldLists(_handler.getCurrentGeneration()); + postingsAlloc.assign_generation(_handler.getCurrentGeneration()); + postings.assign_generation(_handler.getCurrentGeneration()); _handler.incGeneration(); } void AttributePostingListTest:: -removeOldGenerations(Tree &tree, +reclaim_memory(Tree &tree, ValueHandle &valueHandle, PostingList &postings, PostingListNodeAllocator &postingsAlloc) { (void) tree; (void) valueHandle; - postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration()); - postings.trimHoldLists(_handler.getFirstUsedGeneration()); + postingsAlloc.reclaim_memory(_handler.get_oldest_used_generation()); + postings.reclaim_memory(_handler.get_oldest_used_generation()); } int @@ -689,7 +689,7 @@ AttributePostingListTest::Main() lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, _stlTree, _randomValues); _intNodeAlloc->freeze(); - _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + _intNodeAlloc->assign_generation(_handler.getCurrentGeneration()); doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore); removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, _stlTree, _randomValues); diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 222a3341ef9..9127c4b59fc 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -221,11 +221,11 @@ public: auto vector = _vectors.get_vector(docid).typify<double>(); _removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); } - void transfer_hold_lists(generation_t current_gen) override { + void assign_generation(generation_t current_gen) override { _transfer_gen = current_gen; } - void trim_hold_lists(generation_t first_used_gen) override { - _trim_gen = first_used_gen; + void reclaim_memory(generation_t oldest_used_gen) override { + _trim_gen = oldest_used_gen; } bool consider_compact(const CompactionStrategy&) override { return false; diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp index 6e60d14b8ff..8feb7b7e287 100644 --- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -1,14 +1,20 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/diskindex/fusion.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/common/flush_token.h> #include <vespa/searchlib/diskindex/diskindex.h> -#include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/diskindex/zcposoccrandread.h> #include <vespa/searchlib/fef/fieldpositionsiterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/index/schemautil.h> #include <vespa/searchlib/memoryindex/document_inverter.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> @@ -31,7 +37,10 @@ LOG_SETUP("fusion_test"); namespace search { +using document::ArrayFieldValue; using document::Document; +using document::StringFieldValue; +using document::WeightedSetFieldValue; using fef::FieldPositionsIterator; using fef::TermFieldMatchData; using fef::TermFieldMatchDataArray; @@ -110,26 +119,20 @@ toString(FieldPositionsIterator posItr, bool hasElements = false, bool hasWeight } std::unique_ptr<Document> -make_doc10(DocBuilder &b) +make_doc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - addStr("e").addStr("f").addStr("z"). - endField(); - b.startIndexField("f1"). - addStr("w").addStr("x"). - addStr("y").addStr("z"). - endField(); - b.startIndexField("f2"). - startElement(4).addStr("ax").addStr("ay").addStr("z").endElement(). - startElement(5).addStr("ax").endElement(). - endField(); - b.startIndexField("f3"). - startElement(4).addStr("wx").addStr("z").endElement(). - endField(); - - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + doc->setValue("f0", sfb.tokenize("a b c d e f z").build()); + doc->setValue("f1", sfb.tokenize("w x y z").build()); + ArrayFieldValue string_array(b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("ax ay z").build()); + string_array.add(sfb.tokenize("ax").build()); + doc->setValue("f2", string_array); + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("wx z").build(), 4); + doc->setValue("f3", string_wset); + return doc; } Schema::IndexField @@ -151,6 +154,18 @@ make_schema(bool interleaved_features) return schema; } +EmptyDocBuilder::AddFieldsType +make_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + void assert_interleaved_features(DiskIndex &d, const vespalib::string &field, const vespalib::string &term, uint32_t doc_id, uint32_t exp_num_occs, uint32_t exp_field_length) { @@ -327,7 +342,8 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire addField("f2").addField("f3"). addField("f4")); FieldIndexCollection fic(schema, MockFieldLengthInspector()); - DocBuilder b(schema); + EmptyDocBuilder b(make_add_fields()); + StringFieldBuilder sfb(b); auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2); auto pushThreads = SequencedTaskExecutor::create(push_executor, 2); DocumentInverterContext inv_context(schema, *invertThreads, *pushThreads, fic); @@ -338,19 +354,21 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire inv.invertDocument(10, *doc, {}); myPushDocument(inv); - b.startDocument("id:ns:searchdocument::11"). - startIndexField("f3"). - startElement(-27).addStr("zz").endElement(). - endField(); - doc = b.endDocument(); + doc = b.make_document("id:ns:searchdocument::11"); + { + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.word("zz").build(), -27); + doc->setValue("f3", string_wset); + } inv.invertDocument(11, *doc, {}); myPushDocument(inv); - b.startDocument("id:ns:searchdocument::12"). - startIndexField("f3"). - startElement(0).addStr("zz0").endElement(). - endField(); - doc = b.endDocument(); + doc = b.make_document("id:ns:searchdocument::12"); + { + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.word("zz0").build(), 0); + doc->setValue("f3", string_wset); + } inv.invertDocument(12, *doc, {}); myPushDocument(inv); @@ -468,7 +486,7 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng FieldIndexCollection fic(_schema, field_length_inspector); uint32_t numDocs = 20; uint32_t numWords = 1000; - DocBuilder b(_schema); + EmptyDocBuilder b(make_add_fields()); auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2); auto pushThreads = SequencedTaskExecutor::create(push_executor, 2); DocumentInverterContext inv_context(_schema, *invertThreads, *pushThreads, fic); diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore deleted file mode 100644 index 999644fce87..00000000000 --- a/searchlib/src/tests/index/docbuilder/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*_test -.depend -Makefile -docbuilder_test -searchlib_docbuilder_test_app diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt deleted file mode 100644 index 7a969f602ea..00000000000 --- a/searchlib/src/tests/index/docbuilder/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_docbuilder_test_app TEST - SOURCES - docbuilder_test.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app) diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp deleted file mode 100644 index f76b61dcb78..00000000000 --- a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/log/log.h> -LOG_SETUP("docbuilder_test"); -#include <boost/algorithm/string/classification.hpp> -#include <boost/algorithm/string/split.hpp> -#include <vespa/searchlib/index/docbuilder.h> -#include <vespa/vespalib/encoding/base64.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/document/repo/fixedtyperepo.h> -#include <iostream> - -using namespace document; -using search::index::schema::CollectionType; - -namespace search::index { - -namespace -{ -std::string empty; -} - -namespace linguistics -{ -const vespalib::string SPANTREE_NAME("linguistics"); -} - - -TEST("test docBuilder") -{ - Schema s; - s.addIndexField(Schema::IndexField("ia", schema::DataType::STRING)); - s.addIndexField(Schema::IndexField("ib", schema::DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("ic", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addUriIndexFields(Schema::IndexField("iu", schema::DataType::STRING)); - s.addUriIndexFields(Schema::IndexField("iau", schema::DataType::STRING, CollectionType::ARRAY)); - s.addUriIndexFields(Schema::IndexField("iwu", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("aa", schema::DataType::INT32)); - s.addAttributeField(Schema::AttributeField("ab", schema::DataType::FLOAT)); - s.addAttributeField(Schema::AttributeField("ac", schema::DataType::STRING)); - s.addAttributeField(Schema::AttributeField("ad", schema::DataType::INT32, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("ae", schema::DataType::FLOAT, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("af", schema::DataType::STRING, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("ag", schema::DataType::INT32, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("ah", schema::DataType::FLOAT, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("ai", schema::DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("asp1", schema::DataType::INT32)); - s.addAttributeField(Schema::AttributeField("asp2", schema::DataType::INT64)); - s.addAttributeField(Schema::AttributeField("aap1", schema::DataType::INT32, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("aap2", schema::DataType::INT64, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("awp1", schema::DataType::INT32, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("awp2", schema::DataType::INT64, CollectionType::WEIGHTEDSET)); - - DocBuilder b(s); - Document::UP doc; - std::vector<std::string> lines; - std::vector<std::string>::const_iterator itr; - std::string xml; - - { // empty - doc = b.startDocument("id:ns:searchdocument::0").endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::0\"/>", *itr++); - EXPECT_EQUAL("", *itr++); - EXPECT_TRUE(itr == lines.end()); - } - { // all fields set - std::vector<char> binaryBlob; - binaryBlob.push_back('\0'); - binaryBlob.push_back('\2'); - binaryBlob.push_back('\1'); - std::string raw1s("Single Raw Element"); - std::string raw1a0("Array Raw Element 0"); - std::string raw1a1("Array Raw Element 1"); - std::string raw1w0("Weighted Set Raw Element 0"); - std::string raw1w1("Weighted Set Raw Element 1"); - raw1s += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1a0 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1a1 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1w0 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - raw1w1 += std::string(&binaryBlob[0], - &binaryBlob[0] + binaryBlob.size()); - b.startDocument("id:ns:searchdocument::1"); - b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField(); - b.startIndexField("ib").startElement().addStr("foo").endElement(). - startElement(1).addStr("bar").addStr("baz").endElement().endField(); - b. startIndexField("ic"). - startElement(20).addStr("bar").addStr("baz").endElement(). - startElement().addStr("foo").endElement(). - endField(); - b.startIndexField("iu"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - b.startIndexField("iau"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - b.startIndexField("iwu"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - b.startAttributeField("aa").addInt(2147483647).endField(); - b.startAttributeField("ab").addFloat(1234.56).endField(); - b.startAttributeField("ac").addStr("foo baz").endField(); - b.startAttributeField("ad").startElement().addInt(10).endElement().endField(); - b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField(); - b.startAttributeField("af").startElement().addStr("foo").endElement().endField(); - b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField(); - b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField(); - b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField(); - b.startAttributeField("asp1").addInt(1001).endField(); - b.startAttributeField("asp2").addPosition(1002, 1003).endField(); - b.startAttributeField("aap1"). - startElement().addInt(1004).endElement(). - startElement().addInt(1005).endElement(). - endField(); - b.startAttributeField("aap2"). - startElement().addPosition(1006, 1007).endElement(). - startElement().addPosition(1008, 1009).endElement(). - endField(); - b.startAttributeField("awp1"). - startElement(41).addInt(1010).endElement(). - startElement(42).addInt(1011).endElement(). - endField(); - b.startAttributeField("awp2"). - startElement(43).addPosition(1012, 1013).endElement(). - startElement(44).addPosition(1014, 1015).endElement(). - endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::1\">", *itr++); - EXPECT_EQUAL("<iu>", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:81/fluke?ab=2#4</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>81</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>4</fragment>", *itr++); - EXPECT_EQUAL("</iu>", *itr++); - EXPECT_EQUAL("<aa>2147483647</aa>", *itr++); - EXPECT_EQUAL("<aap2>", *itr++); - EXPECT_EQUAL("<item>1047806</item>", *itr++); - EXPECT_EQUAL("<item>1048322</item>", *itr++); - EXPECT_EQUAL("</aap2>", *itr++); - EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++); - EXPECT_EQUAL("<ae>", *itr++); - EXPECT_EQUAL("<item>10.5</item>", *itr++); - EXPECT_EQUAL("</ae>", *itr++); - EXPECT_EQUAL("<ib>", *itr++); - EXPECT_EQUAL("<item>foo</item>", *itr++); - EXPECT_EQUAL("<item>bar baz</item>", *itr++); - EXPECT_EQUAL("</ib>", *itr++); - EXPECT_EQUAL("<ah>", *itr++); - EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++); - EXPECT_EQUAL("</ah>", *itr++); - EXPECT_EQUAL("<ic>", *itr++); - EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++); - EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++); - EXPECT_EQUAL("</ic>", *itr++); - EXPECT_EQUAL("<ac>foo baz</ac>", *itr++); - EXPECT_EQUAL("<awp2>", *itr++); - EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++); - EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++); - EXPECT_EQUAL("</awp2>", *itr++); - EXPECT_EQUAL("<iau>", *itr++); - EXPECT_EQUAL("<item>", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:82/fluke?ab=2#8</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>82</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>8</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("<item>", *itr++); - EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++); - EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>82</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>9</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("</iau>", *itr++); - EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++); - EXPECT_EQUAL("<ai>", *itr++); - EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++); - EXPECT_EQUAL("</ai>", *itr++); - EXPECT_EQUAL("<asp1>1001</asp1>", *itr++); - EXPECT_EQUAL("<ad>", *itr++); - EXPECT_EQUAL("<item>10</item>", *itr++); - EXPECT_EQUAL("</ad>", *itr++); - EXPECT_EQUAL("<iwu>", *itr++); - EXPECT_EQUAL("<item weight=\"4\">", *itr++); - EXPECT_EQUAL("<all>http://www.example.com:83/fluke?ab=2#12</all>", *itr++); - EXPECT_EQUAL("<host>www.example.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>83</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>12</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("<item weight=\"7\">", *itr++); - EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++); - EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); - EXPECT_EQUAL("<scheme>http</scheme>", *itr++); - EXPECT_EQUAL("<path>/fluke</path>", *itr++); - EXPECT_EQUAL("<port>85</port>", *itr++); - EXPECT_EQUAL("<query>ab=2</query>", *itr++); - EXPECT_EQUAL("<fragment>13</fragment>", *itr++); - EXPECT_EQUAL("</item>", *itr++); - EXPECT_EQUAL("</iwu>", *itr++); - EXPECT_EQUAL("<ab>1234.56</ab>", *itr++); - EXPECT_EQUAL("<ag>", *itr++); - EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++); - EXPECT_EQUAL("</ag>", *itr++); - EXPECT_EQUAL("<awp1>", *itr++); - EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++); - EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++); - EXPECT_EQUAL("</awp1>", *itr++); - EXPECT_EQUAL("<aap1>", *itr++); - EXPECT_EQUAL("<item>1004</item>", *itr++); - EXPECT_EQUAL("<item>1005</item>", *itr++); - EXPECT_EQUAL("</aap1>", *itr++); - EXPECT_EQUAL("<af>", *itr++); - EXPECT_EQUAL("<item>foo</item>", *itr++); - EXPECT_EQUAL("</af>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); -#if 0 - std::cout << "onedoc xml start -----" << std::endl << - xml << std::endl << - "-------" << std::endl; - std::cout << "onedoc toString start ----" << std::endl << - doc->toString(true) << std::endl << - "-------" << std::endl; -#endif - } - { // create one more to see that everything is cleared - b.startDocument("id:ns:searchdocument::2"); - b.startIndexField("ia").addStr("yes").endField(); - b.startAttributeField("aa").addInt(20).endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::2\">", *itr++); - EXPECT_EQUAL("<aa>20</aa>", *itr++); - EXPECT_EQUAL("<ia>yes</ia>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); - } - { // create field with cjk chars - b.startDocument("id:ns:searchdocument::3"); - b.startIndexField("ia"). - addStr("我就是那个"). - setAutoSpace(false). - addStr("大灰狼"). - setAutoSpace(true). - endField(); - doc = b.endDocument(); - xml = doc->toXml(""); - boost::split(lines, xml, boost::is_any_of("\n")); - itr = lines.begin(); - EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"id:ns:searchdocument::3\">", *itr++); - EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++); - EXPECT_EQUAL("</document>", *itr++); - EXPECT_TRUE(itr == lines.end()); - const FieldValue::UP iaval = doc->getValue("ia"); - ASSERT_TRUE(iaval.get() != NULL); - const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *> - (iaval.get()); - ASSERT_TRUE(iasval != NULL); - StringFieldValue::SpanTrees trees = iasval->getSpanTrees(); - const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME); - ASSERT_TRUE(tree != NULL); - std::vector<Span> spans; - std::vector<Span> expSpans; - for (SpanTree::const_iterator i = tree->begin(), ie = tree->end(); - i != ie; ++i) { - Annotation &ann = const_cast<Annotation &>(*i); - const Span *span = dynamic_cast<const Span *>(ann.getSpanNode()); - if (span == NULL) - continue; - spans.push_back(*span); - } - expSpans.push_back(Span(0, 15)); - expSpans.push_back(Span(0, 15)); - expSpans.push_back(Span(15, 9)); - expSpans.push_back(Span(15, 9)); - ASSERT_TRUE(expSpans == spans); -#if 0 - std::cout << "onedoc xml start -----" << std::endl << - xml << std::endl << - "-------" << std::endl; - std::cout << "onedoc toString start ----" << std::endl << - doc->toString(true) << std::endl << - "-------" << std::endl; -#endif - } -} - -TEST("test if index names are valid uri parts") { - EXPECT_FALSE(UriField::mightBePartofUri("all")); - EXPECT_FALSE(UriField::mightBePartofUri("fragment")); - EXPECT_FALSE(UriField::mightBePartofUri(".all")); - EXPECT_FALSE(UriField::mightBePartofUri("all.b")); - EXPECT_TRUE(UriField::mightBePartofUri("b.all")); - EXPECT_TRUE(UriField::mightBePartofUri("b.scheme")); - EXPECT_TRUE(UriField::mightBePartofUri("b.host")); - EXPECT_TRUE(UriField::mightBePartofUri("b.port")); - EXPECT_TRUE(UriField::mightBePartofUri("b.hostname")); - EXPECT_TRUE(UriField::mightBePartofUri("b.path")); - EXPECT_TRUE(UriField::mightBePartofUri("b.query")); - EXPECT_TRUE(UriField::mightBePartofUri("b.fragment")); -} - -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore deleted file mode 100644 index f15be1efcfe..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*_test -.depend -Makefile -doctypebuilder_test -searchlib_doctypebuilder_test_app diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt deleted file mode 100644 index 348ecde5a7c..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_doctypebuilder_test_app TEST - SOURCES - doctypebuilder_test.cpp - DEPENDS - searchlib -) -vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app) diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp deleted file mode 100644 index 95854fa11b2..00000000000 --- a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/searchlib/index/doctypebuilder.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/vespalib/testkit/testapp.h> - -using namespace document; - -namespace search { -namespace index { - -using schema::CollectionType; -using schema::DataType; - -TEST("testSearchDocType") { - Schema s; - s.addIndexField(Schema::IndexField("ia", DataType::STRING)); - s.addIndexField(Schema::IndexField("ib", DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("ic", DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addUriIndexFields(Schema::IndexField("iu", DataType::STRING)); - s.addUriIndexFields(Schema::IndexField("iau", DataType::STRING, CollectionType::ARRAY)); - s.addUriIndexFields(Schema::IndexField("iwu", DataType::STRING, CollectionType::WEIGHTEDSET)); - s.addAttributeField(Schema::AttributeField("aa", DataType::INT32)); - s.addAttributeField(Schema::AttributeField("spos", DataType::INT64)); - s.addAttributeField(Schema::AttributeField("apos", DataType::INT64, CollectionType::ARRAY)); - s.addAttributeField(Schema::AttributeField("wpos", DataType::INT64, CollectionType::WEIGHTEDSET)); - - DocTypeBuilder docTypeBuilder(s); - document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig(); - DocumentTypeRepo repo(config); - const DocumentType *docType = repo.getDocumentType("searchdocument"); - ASSERT_TRUE(docType); - EXPECT_EQUAL(10u, docType->getFieldCount()); - - EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName()); - EXPECT_EQUAL("Array<String>", - docType->getField("ib").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<String>", - docType->getField("ic").getDataType().getName()); - EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName()); - EXPECT_EQUAL("Array<url>", - docType->getField("iau").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<url>", - docType->getField("iwu").getDataType().getName()); - - EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName()); - EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName()); - EXPECT_EQUAL("Array<Long>", - docType->getField("apos").getDataType().getName()); - EXPECT_EQUAL("WeightedSet<Long>", - docType->getField("wpos").getDataType().getName()); -} - -TEST("require that multiple fields can have the same type") { - Schema s; - s.addIndexField(Schema::IndexField("array1", DataType::STRING, CollectionType::ARRAY)); - s.addIndexField(Schema::IndexField("array2", DataType::STRING, CollectionType::ARRAY)); - DocTypeBuilder docTypeBuilder(s); - document::config::DocumenttypesConfig config = docTypeBuilder.makeConfig(); - DocumentTypeRepo repo(config); - const DocumentType *docType = repo.getDocumentType("searchdocument"); - ASSERT_TRUE(docType); - EXPECT_EQUAL(2u, docType->getFieldCount()); - - EXPECT_EQUAL("Array<String>", - docType->getField("array1").getDataType().getName()); - EXPECT_EQUAL("Array<String>", - docType->getField("array2").getDataType().getName()); -} - -} // namespace index -} // namespace search - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/string_field_builder/CMakeLists.txt b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt new file mode 100644 index 00000000000..f8774eae5ca --- /dev/null +++ b/searchlib/src/tests/index/string_field_builder/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_string_field_builder_test_app TEST + SOURCES + string_field_builder_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_string_field_builder_test_app COMMAND searchlib_string_field_builder_test_app) diff --git a/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp new file mode 100644 index 00000000000..8c2b641f724 --- /dev/null +++ b/searchlib/src/tests/index/string_field_builder/string_field_builder_test.cpp @@ -0,0 +1,141 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/index/string_field_builder.h> +#include <vespa/document/annotation/annotation.h> +#include <vespa/document/annotation/span.h> +#include <vespa/document/annotation/spanlist.h> +#include <vespa/document/annotation/spantree.h> +#include <vespa/document/datatype/annotationtype.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/index/empty_doc_builder.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <cassert> +#include <iostream> + +using document::Annotation; +using document::AnnotationType; +using document::Span; +using document::SpanNode; +using document::SpanTree; +using document::StringFieldValue; +using search::index::EmptyDocBuilder; +using search::index::StringFieldBuilder; + +namespace +{ + +const vespalib::string SPANTREE_NAME("linguistics"); + +struct MyAnnotation { + int32_t start; + int32_t length; + std::optional<vespalib::string> label; + + MyAnnotation(int32_t start_in, int32_t length_in) noexcept + : start(start_in), + length(length_in), + label() + { + } + + MyAnnotation(int32_t start_in, int32_t length_in, vespalib::string label_in) noexcept + : start(start_in), + length(length_in), + label(label_in) + { + } + + bool operator==(const MyAnnotation& rhs) const noexcept; +}; + +bool +MyAnnotation::operator==(const MyAnnotation& rhs) const noexcept +{ + return start == rhs.start && + length == rhs.length && + label == rhs.label; +} + + +std::ostream& operator<<(std::ostream& os, const MyAnnotation& ann) { + os << "[" << ann.start << "," << ann.length << "]"; + if (ann.label.has_value()) { + os << "(\"" << ann.label.value() << "\")"; + } + return os; +} + +} + +class StringFieldBuilderTest : public testing::Test +{ +protected: + EmptyDocBuilder edb; + StringFieldBuilder sfb; + StringFieldBuilderTest(); + ~StringFieldBuilderTest(); + std::vector<MyAnnotation> get_annotations(const StringFieldValue& val); + void assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val); +}; + +StringFieldBuilderTest::StringFieldBuilderTest() + : testing::Test(), + edb(), + sfb(edb) +{ +} + +StringFieldBuilderTest::~StringFieldBuilderTest() = default; + +std::vector<MyAnnotation> +StringFieldBuilderTest::get_annotations(const StringFieldValue& val) +{ + std::vector<MyAnnotation> result; + StringFieldValue::SpanTrees trees = val.getSpanTrees(); + const auto* tree = StringFieldValue::findTree(trees, SPANTREE_NAME); + if (tree != nullptr) { + for (auto& ann : *tree) { + assert(ann.getType() == *AnnotationType::TERM); + auto span = dynamic_cast<const Span *>(ann.getSpanNode()); + if (span == nullptr) { + continue; + } + auto ann_fv = ann.getFieldValue(); + if (ann_fv == nullptr) { + result.emplace_back(span->from(), span->length()); + } else { + result.emplace_back(span->from(), span->length(), dynamic_cast<const StringFieldValue &>(*ann_fv).getValue()); + } + } + } + return result; +} + +void +StringFieldBuilderTest::assert_annotations(std::vector<MyAnnotation> exp, const vespalib::string& plain, const StringFieldValue& val) +{ + EXPECT_EQ(exp, get_annotations(val)); + EXPECT_EQ(plain, val.getValue()); +} + +TEST_F(StringFieldBuilderTest, no_annotations) +{ + assert_annotations({}, "foo", StringFieldValue("foo")); +} + +TEST_F(StringFieldBuilderTest, single_word) +{ + assert_annotations({{0, 4}}, "word", sfb.word("word").build()); +} + +TEST_F(StringFieldBuilderTest, tokenize) +{ + assert_annotations({{0, 4}, {5, 2}, {8, 1}, {10, 4}}, "this is a test", sfb.tokenize("this is a test").build()); +} + +TEST_F(StringFieldBuilderTest, alt_word) +{ + assert_annotations({{0, 3}, {4, 3}, {4, 3, "baz"}}, "foo bar", sfb.word("foo").space().word("bar").alt_word("baz").build()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp index 3f8a04d9460..83746b611fb 100644 --- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp @@ -1,8 +1,13 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/searchlib/index/docbuilder.h> -#include <vespa/searchlib/index/field_length_calculator.h> #include <vespa/searchlib/memoryindex/document_inverter.h> +#include <vespa/document/datatype/datatype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> +#include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> @@ -19,9 +24,10 @@ namespace search::memoryindex { using document::Document; -using index::DocBuilder; +using index::EmptyDocBuilder; using index::FieldLengthCalculator; using index::Schema; +using index::StringFieldBuilder; using index::schema::CollectionType; using index::schema::DataType; using vespalib::SequencedTaskExecutor; @@ -29,64 +35,68 @@ using vespalib::ISequencedTaskExecutor; namespace { +EmptyDocBuilder::AddFieldsType +make_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + Document::UP -makeDoc10(DocBuilder &b) +makeDoc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); + return doc; } Document::UP -makeDoc11(DocBuilder &b) +makeDoc11(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::11"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("e").addStr("f"). - endField(); - b.startIndexField("f1"). - addStr("a").addStr("g"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::11"); + doc->setValue("f0", sfb.tokenize("a b e f").build()); + doc->setValue("f1", sfb.tokenize("a g").build()); + return doc; } Document::UP -makeDoc12(DocBuilder &b) +makeDoc12(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::12"); - b.startIndexField("f0"). - addStr("h").addStr("doc12"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::12"); + doc->setValue("f0", sfb.tokenize("h doc12").build()); + return doc; } Document::UP -makeDoc13(DocBuilder &b) +makeDoc13(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::13"); - b.startIndexField("f0"). - addStr("i").addStr("doc13"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::13"); + doc->setValue("f0", sfb.tokenize("i doc13").build()); + return doc; } Document::UP -makeDoc14(DocBuilder &b) +makeDoc14(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::14"); - b.startIndexField("f0"). - addStr("j").addStr("doc14"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::14"); + doc->setValue("f0", sfb.tokenize("j doc14").build()); + return doc; } Document::UP -makeDoc15(DocBuilder &b) +makeDoc15(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::15"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::15"); } } @@ -96,7 +106,7 @@ VESPA_THREAD_STACK_TAG(push_executor) struct DocumentInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; WordStore _word_store; @@ -118,7 +128,7 @@ struct DocumentInverterTest : public ::testing::Test { DocumentInverterTest() : _schema(makeSchema()), - _b(_schema), + _b(make_add_fields()), _invertThreads(SequencedTaskExecutor::create(invert_executor, 1)), _pushThreads(SequencedTaskExecutor::create(push_executor, 1)), _word_store(), diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index dcca1f136f6..04d1f08db6f 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -1,13 +1,22 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/document/datatype/datatype.h> +#include <vespa/document/datatype/urldatatype.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/diskindex/fusion.h> #include <vespa/searchlib/diskindex/indexbuilder.h> #include <vespa/searchlib/diskindex/zcposoccrandread.h> #include <vespa/searchlib/fef/fieldpositionsiterator.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/docidandfeatures.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/document_inverter.h> #include <vespa/searchlib/memoryindex/document_inverter_context.h> #include <vespa/searchlib/memoryindex/field_index_collection.h> @@ -37,7 +46,11 @@ namespace search { using namespace fef; using namespace index; +using document::ArrayFieldValue; using document::Document; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetFieldValue; using queryeval::RankedSearchIteratorBase; using queryeval::SearchIterator; using search::index::schema::CollectionType; @@ -505,6 +518,12 @@ make_single_field_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_single_add_fields() +{ + return [](auto& header) { header.addField("f0", document::DataType::T_STRING); }; +} + template <typename FieldIndexType> struct FieldIndexTest : public ::testing::Test { Schema schema; @@ -706,6 +725,18 @@ make_multi_field_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_multi_field_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; +} + struct FieldIndexCollectionTest : public ::testing::Test { Schema schema; FieldIndexCollection fic; @@ -907,16 +938,16 @@ class InverterTest : public ::testing::Test { public: Schema _schema; FieldIndexCollection _fic; - DocBuilder _b; + EmptyDocBuilder _b; std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; DocumentInverterContext _inv_context; DocumentInverter _inv; - InverterTest(const Schema& schema) + InverterTest(const Schema& schema, EmptyDocBuilder::AddFieldsType add_fields) : _schema(schema), _fic(_schema, MockFieldLengthInspector()), - _b(_schema), + _b(add_fields), _invertThreads(SequencedTaskExecutor::create(invert_executor, 2)), _pushThreads(SequencedTaskExecutor::create(push_executor, 2)), _inv_context(_schema, *_invertThreads, *_pushThreads, _fic), @@ -938,91 +969,63 @@ public: class BasicInverterTest : public InverterTest { public: - BasicInverterTest() : InverterTest(make_multi_field_schema()) {} + BasicInverterTest() : InverterTest(make_multi_field_schema(), make_multi_field_add_fields()) {} }; TEST_F(BasicInverterTest, require_that_inversion_is_working) { Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::20"); - _b.startIndexField("f0"). - addStr("a").addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::20"); + doc->setValue("f0", sfb.tokenize("a a b c d").build()); _inv.invertDocument(20, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::30"); - _b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - addStr("e").addStr("f"). - endField(); - _b.startIndexField("f1"). - addStr("\nw2").addStr("w").addStr("x"). - addStr("\nw3").addStr("y").addStr("z"). - endField(); - _b.startIndexField("f2"). - startElement(4). - addStr("w").addStr("x"). - endElement(). - startElement(5). - addStr("y").addStr("z"). - endElement(). - endField(); - _b.startIndexField("f3"). - startElement(6). - addStr("w").addStr("x"). - endElement(). - startElement(7). - addStr("y").addStr("z"). - endElement(). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::30"); + doc->setValue("f0", sfb.tokenize("a b c d e f").build()); + doc->setValue("f1", sfb.word("\nw2").tokenize(" w x "). + word("\nw3").tokenize(" y z").build()); + { + ArrayFieldValue string_array(_b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("w x").build()); + string_array.add(sfb.tokenize("y z").build()); + doc->setValue("f2", string_array); + } + { + WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("w x").build(), 6); + string_wset.add(sfb.tokenize("y z").build(), 7); + doc->setValue("f3", string_wset); + } _inv.invertDocument(30, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::40"); - _b.startIndexField("f0"). - addStr("a").addStr("a").addStr("b").addStr("c").addStr("a"). - addStr("e").addStr("f"). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::40"); + doc->setValue("f0", sfb.tokenize("a a b c a e f").build()); _inv.invertDocument(40, *doc, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::999"); - _b.startIndexField("f0"). - addStr("this").addStr("is").addStr("_a_").addStr("test"). - addStr("for").addStr("insertion").addStr("speed").addStr("with"). - addStr("more").addStr("than").addStr("just").addStr("__a__"). - addStr("few").addStr("words").addStr("present").addStr("in"). - addStr("some").addStr("of").addStr("the").addStr("fields"). - endField(); - _b.startIndexField("f1"). - addStr("the").addStr("other").addStr("field").addStr("also"). - addStr("has").addStr("some").addStr("content"). - endField(); - _b.startIndexField("f2"). - startElement(1). - addStr("strange").addStr("things").addStr("here"). - addStr("has").addStr("some").addStr("content"). - endElement(). - endField(); - _b.startIndexField("f3"). - startElement(3). - addStr("not").addStr("a").addStr("weighty").addStr("argument"). - endElement(). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::999"); + doc->setValue("f0", sfb.tokenize("this is ").word("_a_"). + tokenize(" test for insertion speed with more than just "). + word("__a__").tokenize(" few words present in some of the fields").build()); + doc->setValue("f1", sfb.tokenize("the other field also has some content").build()); + { + ArrayFieldValue string_array(_b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("strange things here has some content").build()); + doc->setValue("f2", string_array); + } + { + WeightedSetFieldValue string_wset(_b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("not a weighty argument").build(), 3); + doc->setValue("f3", string_wset); + } for (uint32_t docId = 10000; docId < 20000; ++docId) { _inv.invertDocument(docId, *doc, {}); myPushDocument(_inv); @@ -1132,19 +1135,17 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remover) { - Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::1"); - _b.startIndexField("f0").addStr("a").addStr("b").endField(); - _b.startIndexField("f1").addStr("a").addStr("c").endField(); - Document::UP doc1 = _b.endDocument(); - _inv.invertDocument(1, *doc1.get(), {}); + auto doc1 = _b.make_document("id:ns:searchdocument::1"); + doc1->setValue("f0", sfb.tokenize("a b").build()); + doc1->setValue("f1", sfb.tokenize("a c").build()); + _inv.invertDocument(1, *doc1, {}); myPushDocument(_inv); - _b.startDocument("id:ns:searchdocument::2"); - _b.startIndexField("f0").addStr("b").addStr("c").endField(); - Document::UP doc2 = _b.endDocument(); - _inv.invertDocument(2, *doc2.get(), {}); + auto doc2 = _b.make_document("id:ns:searchdocument::2"); + doc2->setValue("f0", sfb.tokenize("b c").build()); + _inv.invertDocument(2, *doc2, {}); myPushDocument(_inv); EXPECT_TRUE(assertPostingList("[1]", find("a", 0))); @@ -1172,136 +1173,71 @@ make_uri_schema() return result; } +EmptyDocBuilder::AddFieldsType +make_uri_add_fields() +{ + return [](auto& header) { using namespace document::config_builder; + header.addField("iu", UrlDataType::getInstance().getId()) + .addField("iau", Array(UrlDataType::getInstance().getId())) + .addField("iwu", Wset(UrlDataType::getInstance().getId())); + }; +} + class UriInverterTest : public InverterTest { public: - UriInverterTest() : InverterTest(make_uri_schema()) {} + UriInverterTest() : InverterTest(make_uri_schema(), make_uri_add_fields()) {} }; TEST_F(UriInverterTest, require_that_uri_indexing_is_working) { Document::UP doc; - - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("iu"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - _b.startIndexField("iau"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - _b.startIndexField("iwu"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - doc = _b.endDocument(); + StringFieldBuilder sfb(_b); + sfb.url_mode(true); + StructFieldValue url_value(_b.get_data_type("url")); + + doc = _b.make_document("id:ns:searchdocument::10"); + url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("81").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("4").build()); + doc->setValue("iu", url_value); + ArrayFieldValue url_array(_b.get_data_type("Array<url>")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("82").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("8").build()); + url_array.add(url_value); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("fragment", sfb.tokenize("9").build()); + url_array.add(url_value); + doc->setValue("iau", url_array); + WeightedSetFieldValue url_wset(_b.get_data_type("WeightedSet<url>")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("83").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("12").build()); + url_wset.add(url_value, 4); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("port", sfb.tokenize("85").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("13").build()); + url_wset.add(url_value, 7); + doc->setValue("iwu", url_wset); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); @@ -1360,21 +1296,16 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) class CjkInverterTest : public InverterTest { public: - CjkInverterTest() : InverterTest(make_single_field_schema()) {} + CjkInverterTest() : InverterTest(make_single_field_schema(), make_single_add_fields()) {} }; TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) { Document::UP doc; + StringFieldBuilder sfb(_b); - _b.startDocument("id:ns:searchdocument::10"); - _b.startIndexField("f0"). - addStr("我就是那个"). - setAutoSpace(false). - addStr("大灰狼"). - setAutoSpace(true). - endField(); - doc = _b.endDocument(); + doc = _b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.word("我就是那个").word("大灰狼").build()); _inv.invertDocument(10, *doc, {}); myPushDocument(_inv); diff --git a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp index ed049a82c42..bf3a911a579 100644 --- a/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/field_inverter/field_inverter_test.cpp @@ -1,8 +1,14 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> #include <vespa/searchlib/memoryindex/word_store.h> @@ -13,9 +19,12 @@ namespace search { +using document::ArrayFieldValue; using document::Document; -using index::DocBuilder; +using document::WeightedSetFieldValue; +using index::EmptyDocBuilder; using index::Schema; +using index::StringFieldBuilder; using index::schema::CollectionType; using index::schema::DataType; @@ -26,93 +35,91 @@ namespace memoryindex { namespace { Document::UP -makeDoc10(DocBuilder &b) +makeDoc10(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("c").addStr("d"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::10"); + doc->setValue("f0", sfb.tokenize("a b c d").build()); + return doc; } Document::UP -makeDoc11(DocBuilder &b) +makeDoc11(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::11"); - b.startIndexField("f0"). - addStr("a").addStr("b").addStr("e").addStr("f"). - endField(); - b.startIndexField("f1"). - addStr("a").addStr("g"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::11"); + doc->setValue("f0", sfb.tokenize("a b e f").build()); + doc->setValue("f1", sfb.tokenize("a g").build()); + return doc; } Document::UP -makeDoc12(DocBuilder &b) +makeDoc12(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::12"); - b.startIndexField("f0"). - addStr("h").addStr("doc12"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::12"); + doc->setValue("f0", sfb.tokenize("h doc12").build()); + return doc; } Document::UP -makeDoc13(DocBuilder &b) +makeDoc13(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::13"); - b.startIndexField("f0"). - addStr("i").addStr("doc13"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::13"); + doc->setValue("f0", sfb.tokenize("i doc13").build()); + return doc; } Document::UP -makeDoc14(DocBuilder &b) +makeDoc14(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::14"); - b.startIndexField("f0"). - addStr("j").addStr("doc14"). - endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::14"); + doc->setValue("f0", sfb.tokenize("j doc14").build()); + return doc; } Document::UP -makeDoc15(DocBuilder &b) +makeDoc15(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::15"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::15"); } Document::UP -makeDoc16(DocBuilder &b) +makeDoc16(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::16"); - b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz"). - addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty"). - addStr("z").endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::16"); + doc->setValue("f0", sfb.tokenize("foo bar baz").alt_word("altbaz").tokenize(" y").alt_word("alty").tokenize(" z").build()); + return doc; } Document::UP -makeDoc17(DocBuilder &b) +makeDoc17(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::17"); - b.startIndexField("f1").addStr("foo0").addStr("bar0").endField(); - b.startIndexField("f2").startElement(1).addStr("foo").addStr("bar").endElement().startElement(1).addStr("bar").endElement().endField(); - b.startIndexField("f3").startElement(3).addStr("foo2").addStr("bar2").endElement().startElement(4).addStr("bar2").endElement().endField(); - return b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::17"); + doc->setValue("f1", sfb.tokenize("foo0 bar0").build()); + ArrayFieldValue string_array(b.get_data_type("Array<String>")); + string_array.add(sfb.tokenize("foo bar").build()); + string_array.add(sfb.tokenize("bar").build()); + doc->setValue("f2", string_array); + WeightedSetFieldValue string_wset(b.get_data_type("WeightedSet<String>")); + string_wset.add(sfb.tokenize("foo2 bar2").build(), 3); + string_wset.add(sfb.tokenize("bar2").build(), 4); + doc->setValue("f3", string_wset); + return doc; } vespalib::string corruptWord = "corruptWord"; Document::UP -makeCorruptDocument(DocBuilder &b, size_t wordOffset) +makeCorruptDocument(EmptyDocBuilder &b, size_t wordOffset) { - b.startDocument("id:ns:searchdocument::18"); - b.startIndexField("f0").addStr("before").addStr(corruptWord).addStr("after").addStr("z").endField(); - auto doc = b.endDocument(); + StringFieldBuilder sfb(b); + auto doc = b.make_document("id:ns:searchdocument::18"); + doc->setValue("f0", sfb.tokenize("before ").word(corruptWord).tokenize(" after z").build()); vespalib::nbostream stream; doc->serialize(stream); std::vector<char> raw; @@ -127,14 +134,14 @@ makeCorruptDocument(DocBuilder &b, size_t wordOffset) } vespalib::nbostream badstream; badstream.write(&raw[0], raw.size()); - return std::make_unique<Document>(*b.getDocumentTypeRepo(), badstream); + return std::make_unique<Document>(b.get_repo(), badstream); } } struct FieldInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; WordStore _word_store; FieldIndexRemover _remover; test::OrderedFieldIndexInserterBackend _inserter_backend; @@ -151,9 +158,21 @@ struct FieldInverterTest : public ::testing::Test { return schema; } + static EmptyDocBuilder::AddFieldsType + make_add_fields() + { + return [](auto& header) { using namespace document::config_builder; + using DataType = document::DataType; + header.addField("f0", DataType::T_STRING) + .addField("f1", DataType::T_STRING) + .addField("f2", Array(DataType::T_STRING)) + .addField("f3", Wset(DataType::T_STRING)); + }; + } + FieldInverterTest() : _schema(makeSchema()), - _b(_schema), + _b(make_add_fields()), _word_store(), _remover(_word_store), _inserter_backend(), diff --git a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp index b3ea948dfa7..1730e34adb5 100644 --- a/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp +++ b/searchlib/src/tests/memoryindex/memory_index/memory_index_test.cpp @@ -1,11 +1,15 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/searchlib/common/scheduletaskcallback.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/matchdatalayout.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/i_field_length_inspector.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/memory_index.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> @@ -59,6 +63,12 @@ struct MySetup : public IFieldLengthInspector { } return FieldLengthInfo(); } + void add_fields(document::config_builder::Struct& header) const { + for (uint32_t i = 0; i < schema.getNumIndexFields(); ++i) { + auto& field = schema.getIndexField(i); + header.addField(field.getName(), document::DataType::T_STRING); + } + } }; @@ -70,31 +80,38 @@ struct Index { std::unique_ptr<ISequencedTaskExecutor> _invertThreads; std::unique_ptr<ISequencedTaskExecutor> _pushThreads; MemoryIndex index; - DocBuilder builder; + EmptyDocBuilder builder; + StringFieldBuilder sfb; + std::unique_ptr<Document> builder_doc; uint32_t docid; std::string currentField; + bool add_space; Index(const MySetup &setup); ~Index(); void closeField() { if (!currentField.empty()) { - builder.endField(); + builder_doc->setValue(currentField, sfb.build()); currentField.clear(); } } Index &doc(uint32_t id) { docid = id; - builder.startDocument(vespalib::make_string("id:ns:searchdocument::%u", id)); + builder_doc = builder.make_document(vespalib::make_string("id:ns:searchdocument::%u", id)); return *this; } Index &field(const std::string &name) { closeField(); - builder.startIndexField(name); currentField = name; + add_space = false; return *this; } Index &add(const std::string &token) { - builder.addStr(token); + if (add_space) { + sfb.space(); + } + add_space = true; + sfb.word(token); return *this; } void internalSyncCommit() { @@ -106,7 +123,7 @@ struct Index { } Document::UP commit() { closeField(); - Document::UP d = builder.endDocument(); + Document::UP d = std::move(builder_doc); index.insertDocument(docid, *d, {}); internalSyncCommit(); return d; @@ -133,9 +150,12 @@ Index::Index(const MySetup &setup) _invertThreads(SequencedTaskExecutor::create(invert_executor, 2)), _pushThreads(SequencedTaskExecutor::create(push_executor, 2)), index(schema, setup, *_invertThreads, *_pushThreads), - builder(schema), + builder([&setup](auto& header) { setup.add_fields(header); }), + sfb(builder), + builder_doc(), docid(1), - currentField() + currentField(), + add_space(false) { } Index::~Index() = default; diff --git a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp index 969f483eef6..3995f06628c 100644 --- a/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/url_field_inverter/url_field_inverter_test.cpp @@ -1,11 +1,21 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/memoryindex/url_field_inverter.h> +#include <vespa/document/datatype/urldatatype.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/document/fieldvalue/arrayfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/fieldvalue/structfieldvalue.h> +#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/index/empty_doc_builder.h> #include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/index/schema_index_fields.h> +#include <vespa/searchlib/index/string_field_builder.h> #include <vespa/searchlib/memoryindex/field_index_remover.h> #include <vespa/searchlib/memoryindex/field_inverter.h> -#include <vespa/searchlib/memoryindex/url_field_inverter.h> #include <vespa/searchlib/memoryindex/word_store.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter_backend.h> @@ -14,6 +24,10 @@ namespace search { using document::Document; +using document::ArrayFieldValue; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetFieldValue; using index::schema::CollectionType; using index::schema::DataType; @@ -26,160 +40,88 @@ namespace { const vespalib::string url = "url"; Document::UP -makeDoc10Single(DocBuilder &b) +makeDoc10Single(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:81/fluke?ab=2#4"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("81"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("4"). - endSubField(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StructFieldValue url_value(b.get_data_type("url")); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + url_value.setValue("all", sfb.tokenize("http://www.example.com:81/fluke?ab=2#4").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("81").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("4").build()); + doc->setValue("url", url_value); + return doc; } Document::UP -makeDoc10Array(DocBuilder &b) +makeDoc10Array(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:82/fluke?ab=2#8"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("8"). - endSubField(). - endElement(). - startElement(1). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("82"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("9"). - endSubField(). - endElement(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + ArrayFieldValue url_array(b.get_data_type("Array<url>")); + StructFieldValue url_value(b.get_data_type("url")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:82/fluke?ab=2#8").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("82").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("8").build()); + url_array.add(url_value); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:82/fluke?ab=2#9").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("fragment", sfb.tokenize("9").build()); + url_array.add(url_value); + doc->setValue("url", url_array); + return doc; } Document::UP -makeDoc10WeightedSet(DocBuilder &b) +makeDoc10WeightedSet(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - b.startIndexField("url"). - startElement(4). - startSubField("all"). - addUrlTokenizedString("http://www.example.com:83/fluke?ab=2#12"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.example.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("83"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - addTermAnnotation("altfluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("12"). - endSubField(). - endElement(). - startElement(7). - startSubField("all"). - addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). - endSubField(). - startSubField("scheme"). - addUrlTokenizedString("http"). - endSubField(). - startSubField("host"). - addUrlTokenizedString("www.flickr.com"). - endSubField(). - startSubField("port"). - addUrlTokenizedString("85"). - endSubField(). - startSubField("path"). - addUrlTokenizedString("/fluke"). - endSubField(). - startSubField("query"). - addUrlTokenizedString("ab=2"). - endSubField(). - startSubField("fragment"). - addUrlTokenizedString("13"). - endSubField(). - endElement(). - endField(); - return b.endDocument(); + auto doc = b.make_document("id:ns:searchdocument::10"); + StringFieldBuilder sfb(b); + sfb.url_mode(true); + WeightedSetFieldValue url_wset(b.get_data_type("WeightedSet<url>")); + StructFieldValue url_value(b.get_data_type("url")); + url_value.setValue("all", sfb.tokenize("http://www.example.com:83/fluke?ab=2#12").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.example.com").build()); + url_value.setValue("port", sfb.tokenize("83").build()); + url_value.setValue("path", sfb.tokenize("/fluke").alt_word("altfluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("12").build()); + url_wset.add(url_value, 4); + url_value.setValue("all", sfb.tokenize("http://www.flickr.com:85/fluke?ab=2#13").build()); + url_value.setValue("scheme", sfb.tokenize("http").build()); + url_value.setValue("host", sfb.tokenize("www.flickr.com").build()); + url_value.setValue("port", sfb.tokenize("85").build()); + url_value.setValue("path", sfb.tokenize("/fluke").build()); + url_value.setValue("query", sfb.tokenize("ab=2").build()); + url_value.setValue("fragment", sfb.tokenize("13").build()); + url_wset.add(url_value, 7); + doc->setValue("url", url_wset); + return doc; } Document::UP -makeDoc10Empty(DocBuilder &b) +makeDoc10Empty(EmptyDocBuilder &b) { - b.startDocument("id:ns:searchdocument::10"); - return b.endDocument(); + return b.make_document("id:ns:searchdocument::10"); } } struct UrlFieldInverterTest : public ::testing::Test { Schema _schema; - DocBuilder _b; + EmptyDocBuilder _b; WordStore _word_store; FieldIndexRemover _remover; test::OrderedFieldIndexInserterBackend _inserter_backend; @@ -195,9 +137,10 @@ struct UrlFieldInverterTest : public ::testing::Test { return schema; } - UrlFieldInverterTest(Schema::CollectionType collectionType) + UrlFieldInverterTest(Schema::CollectionType collectionType, + EmptyDocBuilder::AddFieldsType add_fields) : _schema(makeSchema(collectionType)), - _b(_schema), + _b(add_fields), _word_store(), _remover(_word_store), _inserter_backend(), @@ -250,16 +193,32 @@ struct UrlFieldInverterTest : public ::testing::Test { UrlFieldInverterTest::~UrlFieldInverterTest() = default; +EmptyDocBuilder::AddFieldsType +add_single_url = [](auto& header) { + header.addField("url", UrlDataType::getInstance().getId()); }; + +EmptyDocBuilder::AddFieldsType +add_array_url = [](auto& header) { + using namespace document::config_builder; + header.addField("url", Array(UrlDataType::getInstance().getId())); }; + +EmptyDocBuilder::AddFieldsType +add_wset_url = [](auto& header) { + using namespace document::config_builder; + header.addField("url", Wset(UrlDataType::getInstance().getId())); }; + + + struct SingleInverterTest : public UrlFieldInverterTest { - SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE) {} + SingleInverterTest() : UrlFieldInverterTest(CollectionType::SINGLE, add_single_url) {} }; struct ArrayInverterTest : public UrlFieldInverterTest { - ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY) {} + ArrayInverterTest() : UrlFieldInverterTest(CollectionType::ARRAY, add_array_url) {} }; struct WeightedSetInverterTest : public UrlFieldInverterTest { - WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET) {} + WeightedSetInverterTest() : UrlFieldInverterTest(CollectionType::WEIGHTEDSET, add_wset_url) {} }; diff --git a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp index 64cb6a6c146..cb9fa8522a8 100644 --- a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp +++ b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp @@ -58,7 +58,7 @@ public: DirectTensorStoreTest() : store() {} virtual ~DirectTensorStoreTest() { - store.clearHoldLists(); + store.reclaim_all_memory(); } void expect_tensor(const Value* exp, EntryRef ref) { diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 7877b488065..958423860e5 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -99,10 +99,10 @@ public: commit(); } void commit() { - index->transfer_hold_lists(gen_handler.getCurrentGeneration()); + index->assign_generation(gen_handler.getCurrentGeneration()); gen_handler.incGeneration(); - gen_handler.updateFirstUsedGeneration(); - index->trim_hold_lists(gen_handler.getFirstUsedGeneration()); + gen_handler.update_oldest_used_generation(); + index->reclaim_memory(gen_handler.get_oldest_used_generation()); } void set_filter(std::vector<uint32_t> docids) { uint32_t sz = 10; diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index d559fa592ad..47812c2a63c 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -267,10 +267,10 @@ public: ASSERT_EQ(r.get(), nullptr); } void commit(uint32_t docid) { - index->transfer_hold_lists(gen_handler.getCurrentGeneration()); + index->assign_generation(gen_handler.getCurrentGeneration()); gen_handler.incGeneration(); - gen_handler.updateFirstUsedGeneration(); - index->trim_hold_lists(gen_handler.getFirstUsedGeneration()); + gen_handler.update_oldest_used_generation(); + index->reclaim_memory(gen_handler.get_oldest_used_generation()); std::lock_guard<std::mutex> guard(in_progress_lock); in_progress->clearBit(docid); // printf("commit: %u\n", docid); diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index 963285d760d..100470b5a5f 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -184,7 +184,7 @@ void AttributeVector::incGeneration() { // Freeze trees etc, to stop new readers from accessing currently held data - onGenerationChange(_genHandler.getNextGeneration()); + before_inc_generation(_genHandler.getCurrentGeneration()); _genHandler.incGeneration(); // Remove old data on hold lists that can no longer be reached by readers removeAllOldGenerations(); @@ -237,8 +237,8 @@ AttributeVector::headerTypeOK(const vespalib::GenericHeader &header) const getConfig().collectionType().asString(); } -void AttributeVector::removeOldGenerations(generation_t firstUsed) { (void) firstUsed; } -void AttributeVector::onGenerationChange(generation_t generation) { (void) generation; } +void AttributeVector::reclaim_memory(generation_t oldest_used_gen) { (void) oldest_used_gen; } +void AttributeVector::before_inc_generation(generation_t current_gen) { (void) current_gen; } const IEnumStore* AttributeVector::getEnumStoreBase() const { return nullptr; } IEnumStore* AttributeVector::getEnumStoreBase() { return nullptr; } const attribute::MultiValueMappingBase * AttributeVector::getMultiValueBase() const { return nullptr; } @@ -409,8 +409,8 @@ bool AttributeVector::applyWeight(DocId, const FieldValue&, const AssignValueUpd void AttributeVector::removeAllOldGenerations() { - _genHandler.updateFirstUsedGeneration(); - removeOldGenerations(_genHandler.getFirstUsedGeneration()); + _genHandler.update_oldest_used_generation(); + reclaim_memory(_genHandler.get_oldest_used_generation()); } @@ -483,14 +483,12 @@ AttributeVector::compactLidSpace(uint32_t wantedLidLimit) { incGeneration(); } - bool AttributeVector::canShrinkLidSpace() const { return wantShrinkLidSpace() && - _compactLidSpaceGeneration.load(std::memory_order_relaxed) < getFirstUsedGeneration(); + _compactLidSpaceGeneration.load(std::memory_order_relaxed) < get_oldest_used_generation(); } - void AttributeVector::shrinkLidSpace() { diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index f245a216aeb..6963814be0c 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -158,8 +158,8 @@ public: void incGeneration(); void removeAllOldGenerations(); - generation_t getFirstUsedGeneration() const { - return _genHandler.getFirstUsedGeneration(); + generation_t get_oldest_used_generation() const { + return _genHandler.get_oldest_used_generation(); } generation_t getCurrentGeneration() const { @@ -446,8 +446,8 @@ private: GenerationHandler::Guard takeGenerationGuard() { return _genHandler.takeGuard(); } /// Clean up [0, firstUsed> - virtual void removeOldGenerations(generation_t firstUsed); - virtual void onGenerationChange(generation_t generation); + virtual void reclaim_memory(generation_t oldest_used_gen); + virtual void before_inc_generation(generation_t current_gen); virtual void onUpdateStat() = 0; /** * Used to regulate access to critical resources. Apply the @@ -466,8 +466,8 @@ public: /** * Should be called by the writer thread. */ - void updateFirstUsedGeneration() { - _genHandler.updateFirstUsedGeneration(); + void update_oldest_used_generation() { + _genHandler.update_oldest_used_generation(); } /** diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 52f42ed368e..0a0b2040b2a 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -96,8 +96,8 @@ public: vespalib::AddressSpace get_values_address_space_usage() const override; - void transfer_hold_lists(generation_t generation); - void trim_hold_lists(generation_t first_used); + void assign_generation(generation_t current_gen); + void reclaim_memory(generation_t first_used); ssize_t load_unique_values(const void* src, size_t available, IndexVector& idx) override; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 1ef194f6812..b863e56fb4a 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -104,17 +104,17 @@ EnumStoreT<EntryT>::get_values_address_space_usage() const template <typename EntryT> void -EnumStoreT<EntryT>::transfer_hold_lists(generation_t generation) +EnumStoreT<EntryT>::assign_generation(generation_t current_gen) { - _store.transferHoldLists(generation); + _store.assign_generation(current_gen); } template <typename EntryT> void -EnumStoreT<EntryT>::trim_hold_lists(generation_t firstUsed) +EnumStoreT<EntryT>::reclaim_memory(generation_t oldest_used_gen) { // remove generations in the range [0, firstUsed> - _store.trimHoldLists(firstUsed); + _store.reclaim_memory(oldest_used_gen); } template <typename EntryT> diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp index df75b0ab4e5..f8cf742bdb2 100644 --- a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp @@ -229,10 +229,10 @@ FlagAttributeT<B>::resizeBitVectors(uint32_t neededSize) template <typename B> void -FlagAttributeT<B>::removeOldGenerations(vespalib::GenerationHandler::generation_t firstUsed) +FlagAttributeT<B>::reclaim_memory(vespalib::GenerationHandler::generation_t oldest_used_gen) { - B::removeOldGenerations(firstUsed); - _bitVectorHolder.reclaim(firstUsed); + B::reclaim_memory(oldest_used_gen); + _bitVectorHolder.reclaim(oldest_used_gen); } template class FlagAttributeT<FlagBaseImpl>; diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.h b/searchlib/src/vespa/searchlib/attribute/flagattribute.h index 796c1493cc9..df75e7afa04 100644 --- a/searchlib/src/vespa/searchlib/attribute/flagattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.h @@ -33,7 +33,7 @@ private: void ensureGuardBit(); void clearGuardBit(DocId doc); void resizeBitVectors(uint32_t neededSize); - void removeOldGenerations(vespalib::GenerationHandler::generation_t firstUsed) override; + void reclaim_memory(vespalib::GenerationHandler::generation_t oldest_used_gen) override; uint32_t getOffset(int8_t value) const { return value + 128; } using AtomicBitVectorPtr = vespalib::datastore::AtomicValueWrapper<BitVector *>; diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h index 61798959f3e..98587baadd2 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h @@ -48,8 +48,8 @@ public: */ ReadView make_read_view(size_t read_size) const { return ReadView(_indices.make_read_view(read_size), &_store); } // Pass on hold list management to underlying store - void transferHoldLists(generation_t generation) { _store.transferHoldLists(generation); } - void trimHoldLists(generation_t firstUsed) { _store.trimHoldLists(firstUsed); } + void assign_generation(generation_t current_gen) { _store.assign_generation(current_gen); } + void reclaim_memory(generation_t oldest_used_gen) { _store.reclaim_memory(oldest_used_gen); } void prepareLoadFromMultiValue() { _store.setInitializing(true); } void doneLoadFromMultiValue() { _store.setInitializing(false); } diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h index ee8f3181fd9..a073060afc5 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h @@ -63,8 +63,8 @@ public: void onCommit() override; void onUpdateStat() override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; //----------------------------------------------------------------------------------------------------------------- // Attribute read API diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index ec948882312..4dad82073e0 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -194,15 +194,15 @@ MultiValueEnumAttribute<B, M>::onUpdateStat() template <typename B, typename M> void -MultiValueEnumAttribute<B, M>::removeOldGenerations(generation_t firstUsed) +MultiValueEnumAttribute<B, M>::reclaim_memory(generation_t oldest_used_gen) { - this->_enumStore.trim_hold_lists(firstUsed); - this->_mvMapping.trimHoldLists(firstUsed); + this->_enumStore.reclaim_memory(oldest_used_gen); + this->_mvMapping.reclaim_memory(oldest_used_gen); } template <typename B, typename M> void -MultiValueEnumAttribute<B, M>::onGenerationChange(generation_t generation) +MultiValueEnumAttribute<B, M>::before_inc_generation(generation_t current_gen) { /* * Freeze tree before generation is increased in attribute vector @@ -211,8 +211,8 @@ MultiValueEnumAttribute<B, M>::onGenerationChange(generation_t generation) * sufficiently new frozen tree. */ freezeEnumDictionary(); - this->_mvMapping.transferHoldLists(generation - 1); - this->_enumStore.transfer_hold_lists(generation - 1); + this->_mvMapping.assign_generation(current_gen); + this->_enumStore.assign_generation(current_gen); } template <typename B, typename M> diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h index 0a29b4af48d..ed78f7776f1 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h @@ -60,9 +60,9 @@ public: uint32_t getValueCount(DocId doc) const override; void onCommit() override; void onUpdateStat() override; - void removeOldGenerations(generation_t firstUsed) override; + void reclaim_memory(generation_t oldest_used_gen) override; - void onGenerationChange(generation_t generation) override; + void before_inc_generation(generation_t current_gen) override; bool onLoad(vespalib::Executor *executor) override; virtual bool onLoadEnumerated(ReaderBase &attrReader); diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 8cabd8483bf..b746fa5d555 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -96,16 +96,16 @@ void MultiValueNumericAttribute<B, M>::setNewValues(DocId doc, const std::vector } template <typename B, typename M> -void MultiValueNumericAttribute<B, M>::removeOldGenerations(generation_t firstUsed) +void MultiValueNumericAttribute<B, M>::reclaim_memory(generation_t oldest_used_gen) { - this->_mvMapping.trimHoldLists(firstUsed); + this->_mvMapping.reclaim_memory(oldest_used_gen); } template <typename B, typename M> -void MultiValueNumericAttribute<B, M>::onGenerationChange(generation_t generation) +void MultiValueNumericAttribute<B, M>::before_inc_generation(generation_t current_gen) { - this->_mvMapping.transferHoldLists(generation - 1); + this->_mvMapping.assign_generation(current_gen); } template <typename B, typename M> diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h index 4bd8ad6e99f..a22a6241ab2 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h @@ -80,8 +80,8 @@ public: MultiValueNumericPostingAttribute(const vespalib::string & name, const AttributeVector::Config & cfg); ~MultiValueNumericPostingAttribute(); - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index 9a8c9738bc0..deee72dcf39 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -56,19 +56,19 @@ MultiValueNumericPostingAttribute<B, M>::~MultiValueNumericPostingAttribute() template <typename B, typename M> void -MultiValueNumericPostingAttribute<B, M>::removeOldGenerations(generation_t firstUsed) +MultiValueNumericPostingAttribute<B, M>::reclaim_memory(generation_t oldest_used_gen) { - MultiValueNumericEnumAttribute<B, M>::removeOldGenerations(firstUsed); - _postingList.trimHoldLists(firstUsed); + MultiValueNumericEnumAttribute<B, M>::reclaim_memory(oldest_used_gen); + _postingList.reclaim_memory(oldest_used_gen); } template <typename B, typename M> void -MultiValueNumericPostingAttribute<B, M>::onGenerationChange(generation_t generation) +MultiValueNumericPostingAttribute<B, M>::before_inc_generation(generation_t current_gen) { _postingList.freeze(); - MultiValueNumericEnumAttribute<B, M>::onGenerationChange(generation); - _postingList.transferHoldLists(generation - 1); + MultiValueNumericEnumAttribute<B, M>::before_inc_generation(current_gen); + _postingList.assign_generation(current_gen); } template <typename B, typename M> diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h index 4deb71e9759..2e355a9aed2 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -77,8 +77,8 @@ public: MultiValueStringPostingAttributeT(const vespalib::string & name); ~MultiValueStringPostingAttributeT(); - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index fef3db582c8..cfd00f84636 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -75,19 +75,19 @@ MultiValueStringPostingAttributeT<B, T>::mergeMemoryStats(vespalib::MemoryUsage template <typename B, typename T> void -MultiValueStringPostingAttributeT<B, T>::removeOldGenerations(generation_t firstUsed) +MultiValueStringPostingAttributeT<B, T>::reclaim_memory(generation_t oldest_used_gen) { - MultiValueStringAttributeT<B, T>::removeOldGenerations(firstUsed); - _postingList.trimHoldLists(firstUsed); + MultiValueStringAttributeT<B, T>::reclaim_memory(oldest_used_gen); + _postingList.reclaim_memory(oldest_used_gen); } template <typename B, typename T> void -MultiValueStringPostingAttributeT<B, T>::onGenerationChange(generation_t generation) +MultiValueStringPostingAttributeT<B, T>::before_inc_generation(generation_t current_gen) { _postingList.freeze(); - MultiValueStringAttributeT<B, T>::onGenerationChange(generation); - _postingList.transferHoldLists(generation - 1); + MultiValueStringAttributeT<B, T>::before_inc_generation(current_gen); + _postingList.assign_generation(current_gen); } diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp index f2e07bba853..f34099de758 100644 --- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp @@ -125,17 +125,17 @@ PredicateAttribute::onUpdateStat() } void -PredicateAttribute::removeOldGenerations(generation_t firstUsed) +PredicateAttribute::reclaim_memory(generation_t oldest_used_gen) { - getGenerationHolder().reclaim(firstUsed); - _index->trimHoldLists(firstUsed); + getGenerationHolder().reclaim(oldest_used_gen); + _index->reclaim_memory(oldest_used_gen); } void -PredicateAttribute::onGenerationChange(generation_t generation) +PredicateAttribute::before_inc_generation(generation_t current_gen) { - getGenerationHolder().assign_generation(generation - 1); - _index->transferHoldLists(generation - 1); + getGenerationHolder().assign_generation(current_gen); + _index->assign_generation(current_gen); } void diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h index f5d789298a0..159e71e99e3 100644 --- a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h @@ -48,8 +48,8 @@ public: void onSave(IAttributeSaveTarget & saveTarget) override; bool onLoad(vespalib::Executor *executor) override; void onCommit() override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; void onUpdateStat() override; bool addDoc(DocId &doc_id) override; uint32_t clearDoc(DocId doc_id) override; diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp index 36fb02f4c4b..e620d3aca72 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp @@ -161,21 +161,21 @@ ReferenceAttribute::clearDoc(DocId doc) } void -ReferenceAttribute::removeOldGenerations(generation_t firstUsed) +ReferenceAttribute::reclaim_memory(generation_t oldest_used_gen) { - _referenceMappings.trimHoldLists(firstUsed); - _store.trimHoldLists(firstUsed); - getGenerationHolder().reclaim(firstUsed); + _referenceMappings.reclaim_memory(oldest_used_gen); + _store.reclaim_memory(oldest_used_gen); + getGenerationHolder().reclaim(oldest_used_gen); } void -ReferenceAttribute::onGenerationChange(generation_t generation) +ReferenceAttribute::before_inc_generation(generation_t current_gen) { _referenceMappings.freeze(); _store.freeze(); - _referenceMappings.transferHoldLists(generation - 1); - _store.transferHoldLists(generation - 1); - getGenerationHolder().assign_generation(generation - 1); + _referenceMappings.assign_generation(current_gen); + _store.assign_generation(current_gen); + getGenerationHolder().assign_generation(current_gen); } void diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h index dc3e2ad729a..e0ae906eb23 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h @@ -50,8 +50,8 @@ private: ReferenceMappings _referenceMappings; void onAddDocs(DocId docIdLimit) override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; void onCommit() override; void onUpdateStat() override; std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override; diff --git a/searchlib/src/vespa/searchlib/attribute/reference_mappings.h b/searchlib/src/vespa/searchlib/attribute/reference_mappings.h index 2ccc164bf08..cf26b424208 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_mappings.h +++ b/searchlib/src/vespa/searchlib/attribute/reference_mappings.h @@ -59,9 +59,9 @@ public: void clearMapping(const Reference &entry); // Hold list management & freezing - void trimHoldLists(generation_t usedGen) { _reverseMapping.trimHoldLists(usedGen); } + void reclaim_memory(generation_t oldest_used_gen) { _reverseMapping.reclaim_memory(oldest_used_gen); } void freeze() { _reverseMapping.freeze(); } - void transferHoldLists(generation_t generation) { _reverseMapping.transferHoldLists(generation); } + void assign_generation(generation_t current_gen) { _reverseMapping.assign_generation(current_gen); } // Handle mapping changes void notifyReferencedPut(const Reference &entry, uint32_t targetLid); diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp index 6e07a9e658e..ac05ab3b7c6 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp @@ -257,13 +257,13 @@ SingleBoolAttribute::getEstimatedSaveByteSize() const } void -SingleBoolAttribute::removeOldGenerations(generation_t firstUsed) { - getGenerationHolder().reclaim(firstUsed); +SingleBoolAttribute::reclaim_memory(generation_t oldest_used_gen) { + getGenerationHolder().reclaim(oldest_used_gen); } void -SingleBoolAttribute::onGenerationChange(generation_t generation) { - getGenerationHolder().assign_generation(generation - 1); +SingleBoolAttribute::before_inc_generation(generation_t current_gen) { + getGenerationHolder().assign_generation(current_gen); } } diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h index 7868c228e77..a02d5c7d80d 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h @@ -28,8 +28,8 @@ public: void onSave(IAttributeSaveTarget &saveTarget) override; void clearDocs(DocId lidLow, DocId lidLimit, bool in_shrink_lid_space) override; void onShrinkLidSpace() override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; uint64_t getEstimatedSaveByteSize() const override; std::unique_ptr<attribute::SearchContext> diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h index 6e46c697fbc..dbf3e4e7c58 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h @@ -111,8 +111,8 @@ public: uint32_t getValueCount(DocId doc) const override; void onCommit() override; void onUpdateStat() override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; EnumHandle getEnum(DocId doc) const override { return getE(doc); } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 11742bf8f48..dd400295b3d 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -264,15 +264,15 @@ SingleValueEnumAttribute<B>::load_enumerated_data(ReaderBase& attrReader, template <typename B> void -SingleValueEnumAttribute<B>::removeOldGenerations(generation_t firstUsed) +SingleValueEnumAttribute<B>::reclaim_memory(generation_t oldest_used_gen) { - this->_enumStore.trim_hold_lists(firstUsed); - getGenerationHolder().reclaim(firstUsed); + this->_enumStore.reclaim_memory(oldest_used_gen); + getGenerationHolder().reclaim(oldest_used_gen); } template <typename B> void -SingleValueEnumAttribute<B>::onGenerationChange(generation_t generation) +SingleValueEnumAttribute<B>::before_inc_generation(generation_t current_gen) { /* * Freeze tree before generation is increased in attribute vector @@ -281,8 +281,8 @@ SingleValueEnumAttribute<B>::onGenerationChange(generation_t generation) * sufficiently new frozen tree. */ freezeEnumDictionary(); - getGenerationHolder().assign_generation(generation - 1); - this->_enumStore.transfer_hold_lists(generation - 1); + getGenerationHolder().assign_generation(current_gen); + this->_enumStore.assign_generation(current_gen); } diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h index fd2767eaee1..c6387323fea 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h @@ -55,8 +55,8 @@ public: void onCommit() override; void onAddDocs(DocId lidLimit) override; void onUpdateStat() override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; bool addDoc(DocId & doc) override; bool onLoad(vespalib::Executor *executor) override; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp index bbacc10e79c..66af5fe4adc 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp @@ -95,16 +95,16 @@ SingleValueNumericAttribute<B>::addDoc(DocId & doc) { template <typename B> void -SingleValueNumericAttribute<B>::removeOldGenerations(generation_t firstUsed) +SingleValueNumericAttribute<B>::reclaim_memory(generation_t oldest_used_gen) { - getGenerationHolder().reclaim(firstUsed); + getGenerationHolder().reclaim(oldest_used_gen); } template <typename B> void -SingleValueNumericAttribute<B>::onGenerationChange(generation_t generation) +SingleValueNumericAttribute<B>::before_inc_generation(generation_t current_gen) { - getGenerationHolder().assign_generation(generation - 1); + getGenerationHolder().assign_generation(current_gen); } template <typename B> diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h index 720fb211e1a..f2343c1a57c 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h @@ -69,8 +69,8 @@ public: SingleValueNumericPostingAttribute(const vespalib::string & name, const AttributeVector::Config & cfg); ~SingleValueNumericPostingAttribute(); - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp index 2050f887c33..1775774171d 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -127,19 +127,19 @@ SingleValueNumericPostingAttribute<B>::applyValueChanges(EnumStoreBatchUpdater& template <typename B> void -SingleValueNumericPostingAttribute<B>::removeOldGenerations(generation_t firstUsed) +SingleValueNumericPostingAttribute<B>::reclaim_memory(generation_t oldest_used_gen) { - SingleValueNumericEnumAttribute<B>::removeOldGenerations(firstUsed); - _postingList.trimHoldLists(firstUsed); + SingleValueNumericEnumAttribute<B>::reclaim_memory(oldest_used_gen); + _postingList.reclaim_memory(oldest_used_gen); } template <typename B> void -SingleValueNumericPostingAttribute<B>::onGenerationChange(generation_t generation) +SingleValueNumericPostingAttribute<B>::before_inc_generation(generation_t current_gen) { _postingList.freeze(); - SingleValueNumericEnumAttribute<B>::onGenerationChange(generation); - _postingList.transferHoldLists(generation - 1); + SingleValueNumericEnumAttribute<B>::before_inc_generation(current_gen); + _postingList.assign_generation(current_gen); } template <typename B> diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp index 02ab5798f9f..b2662a0928d 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp @@ -106,16 +106,16 @@ SingleValueSmallNumericAttribute::onUpdateStat() void -SingleValueSmallNumericAttribute::removeOldGenerations(generation_t firstUsed) +SingleValueSmallNumericAttribute::reclaim_memory(generation_t oldest_used_gen) { - getGenerationHolder().reclaim(firstUsed); + getGenerationHolder().reclaim(oldest_used_gen); } void -SingleValueSmallNumericAttribute::onGenerationChange(generation_t generation) +SingleValueSmallNumericAttribute::before_inc_generation(generation_t current_gen) { - getGenerationHolder().assign_generation(generation - 1); + getGenerationHolder().assign_generation(current_gen); } diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h index 4bf120d7952..b2af8752fa4 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h @@ -72,8 +72,8 @@ public: void onCommit() override; void onAddDocs(DocId docIdLimit) override; void onUpdateStat() override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; bool addDoc(DocId & doc) override; bool onLoad(vespalib::Executor *executor) override; void onSave(IAttributeSaveTarget &saveTarget) override; diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h index 30549f3048a..358c95f65dc 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h @@ -71,8 +71,8 @@ public: SingleValueStringPostingAttributeT(const vespalib::string & name); ~SingleValueStringPostingAttributeT(); - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index f340d9f70c3..eef72984e79 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -127,19 +127,19 @@ SingleValueStringPostingAttributeT<B>::applyValueChanges(EnumStoreBatchUpdater& template <typename B> void -SingleValueStringPostingAttributeT<B>::removeOldGenerations(generation_t firstUsed) +SingleValueStringPostingAttributeT<B>::reclaim_memory(generation_t oldest_used_gen) { - SingleValueStringAttributeT<B>::removeOldGenerations(firstUsed); - _postingList.trimHoldLists(firstUsed); + SingleValueStringAttributeT<B>::reclaim_memory(oldest_used_gen); + _postingList.reclaim_memory(oldest_used_gen); } template <typename B> void -SingleValueStringPostingAttributeT<B>::onGenerationChange(generation_t generation) +SingleValueStringPostingAttributeT<B>::before_inc_generation(generation_t current_gen) { _postingList.freeze(); - SingleValueStringAttributeT<B>::onGenerationChange(generation); - _postingList.transferHoldLists(generation - 1); + SingleValueStringAttributeT<B>::before_inc_generation(current_gen); + _postingList.assign_generation(current_gen); } template <typename B> diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index 7036ef238b6..0e7e492c954 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -109,8 +109,8 @@ LogDataStore::~LogDataStore() { // Must be called before ending threads as there are sanity checks. _fileChunks.clear(); - _genHandler.updateFirstUsedGeneration(); - _lidInfo.removeOldGenerations(_genHandler.getFirstUsedGeneration()); + _genHandler.update_oldest_used_generation(); + _lidInfo.reclaim_memory(_genHandler.get_oldest_used_generation()); } void @@ -485,8 +485,8 @@ void LogDataStore::compactFile(FileId fileId) FileChunk::UP toDie; for (;;) { MonitorGuard guard(_updateLock); - _genHandler.updateFirstUsedGeneration(); - if (currentGeneration < _genHandler.getFirstUsedGeneration()) { + _genHandler.update_oldest_used_generation(); + if (currentGeneration < _genHandler.get_oldest_used_generation()) { if (_holdFileChunks[fc->getFileId().getId()] == 0u) { toDie = std::move(fc); break; @@ -939,8 +939,8 @@ LogDataStore::setLid(const MonitorGuard &guard, uint32_t lid, const LidInfo &met { (void) guard; if (lid < _lidInfo.size()) { - _genHandler.updateFirstUsedGeneration(); - _lidInfo.removeOldGenerations(_genHandler.getFirstUsedGeneration()); + _genHandler.update_oldest_used_generation(); + _lidInfo.reclaim_memory(_genHandler.get_oldest_used_generation()); const LidInfo prev = vespalib::atomic::load_ref_relaxed(_lidInfo[lid]); if (prev.valid()) { _fileChunks[prev.getFileId()]->remove(lid, prev.size()); @@ -958,8 +958,8 @@ LogDataStore::incGeneration() { _lidInfo.setGeneration(_genHandler.getNextGeneration()); _genHandler.incGeneration(); - _genHandler.updateFirstUsedGeneration(); - _lidInfo.removeOldGenerations(_genHandler.getFirstUsedGeneration()); + _genHandler.update_oldest_used_generation(); + _lidInfo.reclaim_memory(_genHandler.get_oldest_used_generation()); } size_t @@ -1213,7 +1213,7 @@ LogDataStore::canShrinkLidSpace(const MonitorGuard &) const { // Update lock is held, allowing call to _lidInfo.get_size() return getDocIdLimit() < _lidInfo.get_size() && - _compactLidSpaceGeneration < _genHandler.getFirstUsedGeneration(); + _compactLidSpaceGeneration < _genHandler.get_oldest_used_generation(); } size_t diff --git a/searchlib/src/vespa/searchlib/index/CMakeLists.txt b/searchlib/src/vespa/searchlib/index/CMakeLists.txt index 958614844d1..afeb020598b 100644 --- a/searchlib/src/vespa/searchlib/index/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/index/CMakeLists.txt @@ -2,9 +2,7 @@ vespa_add_library(searchlib_searchlib_index OBJECT SOURCES dictionaryfile.cpp - docbuilder.cpp docidandfeatures.cpp - doctypebuilder.cpp dummyfileheadercontext.cpp empty_doc_builder.cpp indexbuilder.cpp @@ -15,6 +13,7 @@ vespa_add_library(searchlib_searchlib_index OBJECT postinglistparams.cpp schemautil.cpp schema_index_fields.cpp + string_field_builder.cpp uri_field.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.cpp b/searchlib/src/vespa/searchlib/index/docbuilder.cpp deleted file mode 100644 index d6169f2f396..00000000000 --- a/searchlib/src/vespa/searchlib/index/docbuilder.cpp +++ /dev/null @@ -1,814 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "docbuilder.h" -#include <vespa/document/datatype/urldatatype.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/fastlib/text/unicodeutil.h> -#include <vespa/vespalib/geo/zcurve.h> -#include <vespa/vespalib/text/utf8.h> -#include <vespa/eval/eval/value.h> -#include <vespa/vespalib/data/slime/slime.h> - -using namespace document; -using namespace search::index; - -using search::index::schema::CollectionType; -using vespalib::Utf8Reader; -using vespalib::Utf8Writer; -using vespalib::geo::ZCurve; - -namespace { - -void -insertStr(const Schema::Field & sfield, document::FieldValue * fvalue, const vespalib::string & val) -{ - if (sfield.getDataType() == schema::DataType::STRING || - sfield.getDataType() == schema::DataType::RAW) - { - (dynamic_cast<LiteralFieldValueB *>(fvalue))->setValue(val); - } else { - throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str())); - } -} - -void -insertInt(const Schema::Field & sfield, document::FieldValue * fvalue, int64_t val) -{ - if (sfield.getDataType() == schema::DataType::INT8) { - (dynamic_cast<ByteFieldValue *>(fvalue))->setValue((uint8_t)val); - } else if (sfield.getDataType() == schema::DataType::INT16) { - (dynamic_cast<ShortFieldValue *>(fvalue))->setValue((int16_t)val); - } else if (sfield.getDataType() == schema::DataType::INT32) { - (dynamic_cast<IntFieldValue *>(fvalue))->setValue((int32_t)val); - } else if (sfield.getDataType() == schema::DataType::INT64) { - (dynamic_cast<LongFieldValue *>(fvalue))->setValue(val); - } else { - throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str())); - } -} - -void -insertFloat(const Schema::Field & sfield, document::FieldValue * fvalue, double val) -{ - if (sfield.getDataType() == schema::DataType::FLOAT) { - (dynamic_cast<FloatFieldValue *>(fvalue))->setValue((float)val); - } else if (sfield.getDataType() == schema::DataType::DOUBLE) { - (dynamic_cast<DoubleFieldValue *>(fvalue))->setValue(val); - } else { - throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str())); - } -} - -void insertPredicate(const Schema::Field &sfield, - document::FieldValue *fvalue, - std::unique_ptr<vespalib::Slime> val) { - if (sfield.getDataType() == schema::DataType::BOOLEANTREE) { - *(dynamic_cast<PredicateFieldValue *>(fvalue)) = - PredicateFieldValue(std::move(val)); - } else { - throw DocBuilder::Error(vespalib::make_string( - "Field '%s' not compatible", - sfield.getName().c_str())); - } -} - -void insertTensor(const Schema::Field &schemaField, - document::FieldValue *fvalue, - std::unique_ptr<vespalib::eval::Value> val) { - if (schemaField.getDataType() == schema::DataType::TENSOR) { - *(dynamic_cast<TensorFieldValue *>(fvalue)) = std::move(val); - } else { - throw DocBuilder::Error(vespalib::make_string( - "Field '%s' not compatible", - schemaField.getName().c_str())); - } -} - -void -insertPosition(const Schema::Field & sfield, - document::FieldValue * fvalue, int32_t xpos, int32_t ypos) -{ - assert(*fvalue->getDataType() == *DataType::LONG); - assert(sfield.getDataType() == schema::DataType::INT64); - (void) sfield; - int64_t zpos = ZCurve::encode(xpos, ypos); - document::LongFieldValue *zvalue = - dynamic_cast<LongFieldValue *>(fvalue); - zvalue->setValue(zpos); -} - -} - -namespace docbuilderkludge -{ - -namespace linguistics -{ - -const vespalib::string SPANTREE_NAME("linguistics"); - -enum TokenType { - UNKNOWN = 0, - SPACE = 1, - PUNCTUATION = 2, - SYMBOL = 3, - ALPHABETIC = 4, - NUMERIC = 5, - MARKER = 6 -}; - -} - -} - -using namespace docbuilderkludge; - -namespace { - -Annotation -makeTokenType(linguistics::TokenType type) -{ - return Annotation(*AnnotationType::TOKEN_TYPE, std::make_unique<IntFieldValue>(type)); -} - -} - -namespace search::index { - -VESPA_IMPLEMENT_EXCEPTION(DocBuilderError, vespalib::Exception); - -DocBuilder::FieldHandle::FieldHandle(const document::Field & dfield, const Schema::Field & field) : - _sfield(field), - _value(), - _element() -{ - _value = dfield.createValue(); -} - -DocBuilder::CollectionFieldHandle::CollectionFieldHandle(const document::Field & dfield, const Schema::Field & field) : - FieldHandle(dfield, field), - _elementWeight(1) -{ -} - -void -DocBuilder::CollectionFieldHandle::startElement(int32_t weight) -{ - assert(!_element); - _elementWeight = weight; - const CollectionFieldValue * value = dynamic_cast<CollectionFieldValue *>(_value.get()); - _element = value->createNested(); -} - -void -DocBuilder::CollectionFieldHandle::endElement() -{ - if (_sfield.getCollectionType() == CollectionType::ARRAY) { - onEndElement(); - ArrayFieldValue * value = dynamic_cast<ArrayFieldValue *>(_value.get()); - value->add(*_element); - } else if (_sfield.getCollectionType() == CollectionType::WEIGHTEDSET) { - onEndElement(); - WeightedSetFieldValue * value = dynamic_cast<WeightedSetFieldValue *>(_value.get()); - value->add(*_element, _elementWeight); - } else { - throw Error(vespalib::make_string("Field '%s' not compatible", _sfield.getName().c_str())); - } - _element.reset(); -} - -DocBuilder::IndexFieldHandle::IndexFieldHandle(const FixedTypeRepo & repo, const document::Field & dfield, const Schema::Field & sfield) - : CollectionFieldHandle(dfield, sfield), - _str(), - _strSymbols(0u), - _spanList(nullptr), - _spanTree(), - _lastSpan(nullptr), - _spanStart(0u), - _autoAnnotate(true), - _autoSpace(true), - _skipAutoSpace(true), - _uriField(false), - _subField(), - _repo(repo) -{ - _str.reserve(1023); - - if (_sfield.getCollectionType() == CollectionType::SINGLE) { - if (*_value->getDataType() == document::UrlDataType::getInstance()) { - _uriField = true; - } - } else { - const CollectionFieldValue * value = dynamic_cast<CollectionFieldValue *>(_value.get()); - if (value->getNestedType() == document::UrlDataType::getInstance()) { - _uriField = true; - } - } - startAnnotate(); -} - -void -DocBuilder::IndexFieldHandle::append(const vespalib::string &val) -{ - _strSymbols += val.size(); - _str += val; -} - -void -DocBuilder::IndexFieldHandle::addStr(const vespalib::string &val) -{ - assert(_spanTree); - if (val.empty()) { - return; - } - if (!_skipAutoSpace && _autoSpace) { - addSpace(); - } - _skipAutoSpace = false; - _spanStart = _strSymbols; - append(val); - if (_autoAnnotate) { - addSpan(); - addTermAnnotation(); - if (val[0] >= '0' && val[0] <= '9') { - addNumericTokenAnnotation(); - } else { - addAlphabeticTokenAnnotation(); - } - } -} - -void -DocBuilder::IndexFieldHandle::addSpace() -{ - addNoWordStr(" "); -} - -void -DocBuilder::IndexFieldHandle::addNoWordStr(const vespalib::string &val) -{ - assert(_spanTree); - if (val.empty()) { - return; - } - _spanStart = _strSymbols; - append(val); - if (_autoAnnotate) { - addSpan(); - if (val[0] == ' ' || val[0] == '\t') { - addSpaceTokenAnnotation(); - } else if (val[0] >= '0' && val[0] <= '9') { - addNumericTokenAnnotation(); - } else { - addAlphabeticTokenAnnotation(); - } - - } - _skipAutoSpace = true; -} - -void -DocBuilder::IndexFieldHandle::addTokenizedString(const vespalib::string &val, - bool urlMode) -{ - Utf8Reader r(val); - vespalib::string sbuf; - Utf8Writer w(sbuf); - uint32_t c = 0u; - bool oldWord = false; - assert(_uriField == urlMode); - assert(_uriField != _subField.empty()); - - while (r.hasMore()) { - c = r.getChar(); - bool newWord = Fast_UnicodeUtil::IsWordChar(c) || - (urlMode && (c == '-' || c == '_')); - if (oldWord != newWord) { - if (!sbuf.empty()) { - if (oldWord) { - addStr(sbuf); - } else { - addNoWordStr(sbuf); - } - sbuf.clear(); - } - oldWord = newWord; - } - w.putChar(c); - } - if (!sbuf.empty()) { - if (oldWord) { - addStr(sbuf); - } else { - addNoWordStr(sbuf); - } - } -} - -void -DocBuilder::IndexFieldHandle::addSpan(size_t start, size_t len) -{ - const SpanNode &span = _spanList->add(std::make_unique<Span>(start, len)); - _lastSpan = &span; -} - -void -DocBuilder::IndexFieldHandle::addSpan() -{ - size_t endPos = _strSymbols; - assert(endPos > _spanStart); - addSpan(_spanStart, endPos - _spanStart); - _spanStart = endPos; -} - -void -DocBuilder::IndexFieldHandle::addSpaceTokenAnnotation() -{ - assert(_spanTree); - assert(_lastSpan != nullptr); - _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::SPACE)); -} - -void -DocBuilder::IndexFieldHandle::addNumericTokenAnnotation() -{ - assert(_spanTree); - assert(_lastSpan != nullptr); - _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::NUMERIC)); -} - -void -DocBuilder::IndexFieldHandle::addAlphabeticTokenAnnotation() -{ - assert(_spanTree); - assert(_lastSpan != nullptr); - _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::ALPHABETIC)); -} - -void -DocBuilder::IndexFieldHandle::addTermAnnotation() -{ - assert(_spanTree); - assert(_lastSpan != nullptr); - _spanTree->annotate(*_lastSpan, *AnnotationType::TERM); -} - -void -DocBuilder::IndexFieldHandle::addTermAnnotation(const vespalib::string &val) -{ - assert(_spanTree); - assert(_lastSpan != nullptr); - _spanTree->annotate(*_lastSpan, - Annotation(*AnnotationType::TERM, - std::make_unique<StringFieldValue>(val))); -} - -void -DocBuilder::IndexFieldHandle::onEndElement() -{ - // Flush data for index field. - assert(_subField.empty()); - if (_uriField) { - return; - } - StringFieldValue * value; - if (_sfield.getCollectionType() != CollectionType::SINGLE) { - value = dynamic_cast<StringFieldValue *>(_element.get()); - } else { - value = dynamic_cast<StringFieldValue *>(_value.get()); - } - value->setValue(_str); - // Also drop all spans no annotation for now - if (_spanTree->numAnnotations() > 0u) { - StringFieldValue::SpanTrees trees; - trees.emplace_back(std::move(_spanTree)); - value->setSpanTrees(trees, _repo); - } else { - _spanTree.reset(); - } - _spanList = nullptr; - _lastSpan = nullptr; - _spanStart = 0u; - _strSymbols = 0u; - _str.clear(); - _skipAutoSpace = true; - startAnnotate(); -} - -void -DocBuilder::IndexFieldHandle::onEndField() -{ - if (_sfield.getCollectionType() == CollectionType::SINGLE) { - onEndElement(); - } -} - -void -DocBuilder::IndexFieldHandle::startAnnotate() -{ - SpanList::UP span_list(new SpanList); - _spanList = span_list.get(); - _spanTree.reset(new SpanTree(linguistics::SPANTREE_NAME, std::move(span_list))); -} - -void -DocBuilder::IndexFieldHandle::setAutoAnnotate(bool autoAnnotate) -{ - _autoAnnotate = autoAnnotate; -} - -void -DocBuilder::IndexFieldHandle::setAutoSpace(bool autoSpace) -{ - _autoSpace = autoSpace; -} - -void -DocBuilder::IndexFieldHandle::startSubField(const vespalib::string &subField) -{ - assert(_subField.empty()); - assert(_uriField); - _subField = subField; -} - -void -DocBuilder::IndexFieldHandle::endSubField() -{ - assert(!_subField.empty()); - assert(_uriField); - StructuredFieldValue *sValue; - if (_sfield.getCollectionType() != CollectionType::SINGLE) { - sValue = dynamic_cast<StructFieldValue *>(_element.get()); - } else { - sValue = dynamic_cast<StructFieldValue *>(_value.get()); - } - const Field &f = sValue->getField(_subField); - FieldValue::UP fval(f.getDataType().createFieldValue()); - *fval = _str; - StringFieldValue *value = dynamic_cast<StringFieldValue *>(fval.get()); - StringFieldValue::SpanTrees trees; - trees.emplace_back(std::move(_spanTree)); - value->setSpanTrees(trees, _repo); - sValue->setValue(f, *fval); - _spanList = nullptr; - _lastSpan = nullptr; - _spanStart = 0u; - _strSymbols = 0u; - _str.clear(); - _skipAutoSpace = true; - startAnnotate(); - _subField.clear(); -} - -DocBuilder::AttributeFieldHandle:: -AttributeFieldHandle(const document::Field &dfield, - const Schema::Field &sfield) - : CollectionFieldHandle(dfield, sfield) -{ -} - -void -DocBuilder::AttributeFieldHandle::addStr(const vespalib::string & val) -{ - if (_element) { - insertStr(_sfield, _element.get(), val); - } else { - insertStr(_sfield, _value.get(), val); - } -} - -void -DocBuilder::AttributeFieldHandle::addInt(int64_t val) -{ - if (_element) { - insertInt(_sfield, _element.get(), val); - } else { - insertInt(_sfield, _value.get(), val); - } -} - -void -DocBuilder::AttributeFieldHandle::addFloat(double val) -{ - if (_element) { - insertFloat(_sfield, _element.get(), val); - } else { - insertFloat(_sfield, _value.get(), val); - } -} - -void -DocBuilder::AttributeFieldHandle::addPredicate( - std::unique_ptr<vespalib::Slime> val) -{ - if (_element) { - insertPredicate(_sfield, _element.get(), std::move(val)); - } else { - insertPredicate(_sfield, _value.get(), std::move(val)); - } -} - -void -DocBuilder::AttributeFieldHandle::addTensor( - std::unique_ptr<vespalib::eval::Value> val) -{ - if (_element) { - insertTensor(_sfield, _element.get(), std::move(val)); - } else { - insertTensor(_sfield, _value.get(), std::move(val)); - } -} - -void -DocBuilder::AttributeFieldHandle::addPosition(int32_t xpos, int32_t ypos) -{ - if (_element) { - insertPosition(_sfield, _element.get(), xpos, ypos); - } else { - insertPosition(_sfield, _value.get(), xpos, ypos); - } -} - -DocBuilder::DocumentHandle::DocumentHandle(document::Document &doc, const vespalib::string & docId) - : _type(&doc.getType()), - _doc(&doc), - _fieldHandle(), - _repo(*_doc->getRepo(), *_type) -{ - (void) docId; -} - -DocBuilder::DocumentHandle::~DocumentHandle() = default; - -void -DocBuilder::DocumentHandle::startIndexField(const Schema::Field & sfield) { - _fieldHandle.reset(new IndexFieldHandle(_repo, _type->getField(sfield.getName()), sfield)); -} -void -DocBuilder::DocumentHandle::startAttributeField(const Schema::Field & sfield) { - _fieldHandle.reset(new AttributeFieldHandle(_type->getField(sfield.getName()), sfield)); -} - -void -DocBuilder::DocumentHandle::endField() { - _fieldHandle->onEndField(); - _doc->setValue(_type->getField(_fieldHandle->getField().getName()), *_fieldHandle->getValue()); - _fieldHandle.reset(); -} - -DocBuilder::DocBuilder(const Schema &schema) - : _schema(schema), - _doctypes_config(DocTypeBuilder(schema).makeConfig()), - _repo(std::make_shared<DocumentTypeRepo>(_doctypes_config)), - _docType(*_repo->getDocumentType("searchdocument")), - _doc(), - _handleDoc(), - _currDoc() -{ -} - -DocBuilder::~DocBuilder() = default; - -DocBuilder & -DocBuilder::startDocument(const vespalib::string & docId) -{ - _doc = std::make_unique<Document>(_docType, DocumentId(docId)); - _doc->setRepo(*_repo); - _handleDoc = std::make_shared<DocumentHandle>(*_doc, docId); - return *this; -} - -document::Document::UP -DocBuilder::endDocument() -{ - _handleDoc->endDocument(_doc); - return std::move(_doc); -} - -DocBuilder & -DocBuilder::startIndexField(const vespalib::string & name) -{ - assert(!_handleDoc->getFieldHandle()); - uint32_t field_id = _schema.getIndexFieldId(name); - assert(field_id != Schema::UNKNOWN_FIELD_ID); - _handleDoc->startIndexField(_schema.getIndexField(field_id)); - _currDoc = _handleDoc.get(); - return *this; -} - -DocBuilder & -DocBuilder::startAttributeField(const vespalib::string & name) -{ - assert(!_handleDoc->getFieldHandle()); - uint32_t field_id = _schema.getIndexFieldId(name); - assert(field_id == Schema::UNKNOWN_FIELD_ID); - field_id = _schema.getAttributeFieldId(name); - assert(field_id != Schema::UNKNOWN_FIELD_ID); - _handleDoc->startAttributeField(_schema.getAttributeField(field_id)); - _currDoc = _handleDoc.get(); - return *this; -} - -DocBuilder & -DocBuilder::endField() -{ - assert(_currDoc != nullptr); - _currDoc->endField(); - _currDoc = nullptr; - return *this; -} - -DocBuilder & -DocBuilder::startElement(int32_t weight) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->startElement(weight); - return *this; -} - -DocBuilder & -DocBuilder::endElement() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->endElement(); - return *this; -} - -DocBuilder & -DocBuilder::addStr(const vespalib::string & str) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addStr(str); - return *this; -} - -DocBuilder & -DocBuilder::addSpace() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addSpace(); - return *this; -} - -DocBuilder & -DocBuilder::addNoWordStr(const vespalib::string & str) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addNoWordStr(str); - return *this; -} - -DocBuilder & -DocBuilder::addTokenizedString(const vespalib::string &str) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addTokenizedString(str, false); - return *this; -} - -DocBuilder & -DocBuilder::addUrlTokenizedString(const vespalib::string &str) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addTokenizedString(str, true); - return *this; -} - -DocBuilder & -DocBuilder::addInt(int64_t val) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addInt(val); - return *this; -} - -DocBuilder & -DocBuilder::addFloat(double val) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addFloat(val); - return *this; -} - -DocBuilder & -DocBuilder::addPredicate(std::unique_ptr<vespalib::Slime> val) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addPredicate(std::move(val)); - return *this; -} - -DocBuilder & -DocBuilder::addTensor(std::unique_ptr<vespalib::eval::Value> val) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addTensor(std::move(val)); - return *this; -} - -DocBuilder & -DocBuilder::addSpan(size_t start, size_t len) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addSpan(start, len); - return *this; -} - -DocBuilder & -DocBuilder::addSpan() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addSpan(); - return *this; -} - -DocBuilder & -DocBuilder::addSpaceTokenAnnotation() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addSpaceTokenAnnotation(); - return *this; -} - -DocBuilder & -DocBuilder::addNumericTokenAnnotation() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addNumericTokenAnnotation(); - return *this; -} - -DocBuilder & -DocBuilder::addAlphabeticTokenAnnotation() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addAlphabeticTokenAnnotation(); - return *this; -} - -DocBuilder& -DocBuilder::addTermAnnotation() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addTermAnnotation(); - return *this; -} - -DocBuilder & -DocBuilder::addTermAnnotation(const vespalib::string &val) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addTermAnnotation(val); - return *this; -} - -DocBuilder & -DocBuilder::addPosition(int32_t xpos, int32_t ypos) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addPosition(xpos, ypos); - return *this; -} - -DocBuilder & -DocBuilder::addRaw(const void *buf, size_t len) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->addRaw(buf, len); - return *this; -} - -DocBuilder & -DocBuilder::startSubField(const vespalib::string &subField) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->startSubField(subField); - return *this; -} - -DocBuilder & -DocBuilder::endSubField() -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->endSubField(); - return *this; -} - -DocBuilder & -DocBuilder::setAutoAnnotate(bool autoAnnotate) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->setAutoAnnotate(autoAnnotate); - return *this; -} - -DocBuilder & -DocBuilder::setAutoSpace(bool autoSpace) -{ - assert(_currDoc != nullptr); - _currDoc->getFieldHandle()->setAutoSpace(autoSpace); - return *this; -} - -} diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.h b/searchlib/src/vespa/searchlib/index/docbuilder.h deleted file mode 100644 index a8a37b57070..00000000000 --- a/searchlib/src/vespa/searchlib/index/docbuilder.h +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "doctypebuilder.h" -#include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/document/fieldvalue/fieldvalues.h> -#include <vespa/document/annotation/annotation.h> -#include <vespa/document/annotation/span.h> -#include <vespa/document/annotation/spanlist.h> -#include <vespa/document/annotation/spantree.h> -#include <vespa/vespalib/util/exception.h> -#include <vespa/vespalib/util/stringfmt.h> - -namespace vespalib::eval { struct Value; } - -namespace search::index { - -VESPA_DEFINE_EXCEPTION(DocBuilderError, vespalib::Exception); - -/** - * Builder class used to generate a search document that corresponds - * to an index schema. - **/ -class DocBuilder { -public: - typedef DocBuilderError Error; - -private: - /** - * Base class for handling the construction of a field. - **/ - class FieldHandle { - public: - typedef std::shared_ptr<FieldHandle> SP; - protected: - const Schema::Field & _sfield; - document::FieldValue::UP _value; - document::FieldValue::UP _element; - public: - FieldHandle(const document::Field & dfield, const Schema::Field & field); - virtual ~FieldHandle() {} - virtual void startElement(int32_t weight) { (void) weight; throw Error("Function not supported"); } - virtual void endElement() { throw Error("Function not supported"); } - virtual void addStr(const vespalib::string & val) { (void) val; throw Error("Function not supported"); } - - virtual void addSpace() { - throw Error("Function not supported"); - } - - virtual void addNoWordStr(const vespalib::string & val) { - (void) val; - throw Error("Function not supported"); - } - - virtual void addTokenizedString(const vespalib::string &val, bool urlMode) { - (void) val; - (void) urlMode; - throw Error("Function not supported"); - } - - virtual void addSpan(size_t start, size_t len) { - (void) start; - (void) len; - throw Error("Function not supported"); - } - - virtual void addSpan() { - throw Error("Function not supported"); - } - - virtual void addSpaceTokenAnnotation() { - throw Error("Function not supported"); - } - - virtual void addNumericTokenAnnotation() { - throw Error("Function not supported"); - } - - virtual void addAlphabeticTokenAnnotation() { - throw Error("Function not supported"); - } - - virtual void addTermAnnotation() { - throw Error("Function not supported"); - } - - virtual void addTermAnnotation(const vespalib::string &val) { - (void) val; - throw Error("Function not supported"); - } - - virtual void addInt(int64_t val) { (void) val; throw Error("Function not supported"); } - virtual void addFloat(double val) { (void) val; throw Error("Function not supported"); } - virtual void addPredicate(std::unique_ptr<vespalib::Slime>) { - throw Error("Function not supported"); - } - virtual void addTensor(std::unique_ptr<vespalib::eval::Value>) { - throw Error("Function not supported"); - } - const document::FieldValue::UP & getValue() const { return _value; } - const Schema::Field & getField() const { return _sfield; } - - virtual void onEndElement() {} - virtual void onEndField() {} - - virtual void setAutoAnnotate(bool autoAnnotate) { - (void) autoAnnotate; - throw Error("Function not supported"); - } - - virtual void setAutoSpace(bool autoSpace) { - (void) autoSpace; - throw Error("Function not supported"); - } - - virtual void addPosition(int32_t xpos, int32_t ypos) { - (void) xpos; - (void) ypos; - throw Error("Function not supported"); - } - - virtual void addRaw(const void *buf, size_t len) { - (void) buf; - (void) len; - throw Error("Function not supported"); - } - - virtual void startSubField(const vespalib::string &subField) { - (void) subField; - throw Error("Function not supported"); - } - - virtual void endSubField() { - throw Error("Function not supported"); - } - }; - - /** - * Class that can handle multi value fields. - **/ - class CollectionFieldHandle : public FieldHandle { - private: - int32_t _elementWeight; - public: - CollectionFieldHandle(const document::Field & dfield, const Schema::Field & sfield); - void startElement(int32_t weight) override; - void endElement() override; - }; - - /** - * Class for handling the construction of the content of an index field. - **/ - class IndexFieldHandle : public CollectionFieldHandle { - vespalib::string _str; // adjusted as word comes along - size_t _strSymbols; // symbols in string, assuming UTF8 - document::SpanList *_spanList; // owned by _spanTree - document::SpanTree::UP _spanTree; - const document::SpanNode *_lastSpan; - size_t _spanStart; // start of span - bool _autoAnnotate; // Add annotation when adding strings - bool _autoSpace; // Add space before strings - bool _skipAutoSpace; // one shot skip of adding space - bool _uriField; // URI handling (special struct case) - vespalib::string _subField; - const document::FixedTypeRepo & _repo; - - void append(const vespalib::string &val); - - public: - IndexFieldHandle(const document::FixedTypeRepo & repo, - const document::Field &dfield, - const Schema::Field &sfield); - - void addStr(const vespalib::string & val) override; - void addSpace() override; - void addNoWordStr(const vespalib::string & val) override; - void addTokenizedString(const vespalib::string &val, bool urlMode) override; - void addSpan(size_t start, size_t len) override; - void addSpan() override; - void addSpaceTokenAnnotation() override; - void addNumericTokenAnnotation() override; - void addAlphabeticTokenAnnotation() override; - void addTermAnnotation() override; - void addTermAnnotation(const vespalib::string &val) override; - void onEndElement() override; - void onEndField() override; - void startAnnotate(); - void setAutoAnnotate(bool autoAnnotate) override; - void setAutoSpace(bool autoSpace) override; - void startSubField(const vespalib::string &subField) override; - void endSubField() override; - }; - - /** - * Class for handling the construction of the content of an attribute field. - **/ - class AttributeFieldHandle : public CollectionFieldHandle { - public: - AttributeFieldHandle(const document::Field & dfield, const Schema::Field & sfield); - void addStr(const vespalib::string & val) override; - void addInt(int64_t val) override; - void addFloat(double val) override; - void addPredicate(std::unique_ptr<vespalib::Slime> val) override; - void addTensor(std::unique_ptr<vespalib::eval::Value> val) override; - void addPosition(int32_t xpos, int32_t ypos) override; - }; - - /** - * Class for handling the construction of a document (set of fields). - **/ - class DocumentHandle { - public: - typedef std::shared_ptr<DocumentHandle> SP; - private: - const document::DocumentType * _type; - document::Document *const _doc; - FieldHandle::SP _fieldHandle; - document::FixedTypeRepo _repo; - public: - DocumentHandle(document::Document &doc, const vespalib::string & docId); - ~DocumentHandle(); - const FieldHandle::SP & getFieldHandle() const { return _fieldHandle; } - void startIndexField(const Schema::Field & sfield); - void startAttributeField(const Schema::Field & sfield); - void endField(); - void endDocument(const document::Document::UP & doc) { - (void) doc; - } - }; - - const Schema & _schema; - document::config::DocumenttypesConfig _doctypes_config; - std::shared_ptr<const document::DocumentTypeRepo> _repo; - const document::DocumentType &_docType; - document::Document::UP _doc; // the document we are about to generate - - DocumentHandle::SP _handleDoc; // handle for all fields - DocumentHandle * _currDoc; // the current document handle - -public: - DocBuilder(const Schema & schema); - ~DocBuilder(); - - DocBuilder & startDocument(const vespalib::string & docId); - document::Document::UP endDocument(); - - DocBuilder & startIndexField(const vespalib::string & name); - DocBuilder & startAttributeField(const vespalib::string & name); - DocBuilder & endField(); - DocBuilder & startElement(int32_t weight = 1); - DocBuilder & endElement(); - DocBuilder & addStr(const vespalib::string & val); - DocBuilder & addSpace(); - DocBuilder & addNoWordStr(const vespalib::string & val); - DocBuilder & addInt(int64_t val); - DocBuilder & addFloat(double val); - DocBuilder & addPredicate(std::unique_ptr<vespalib::Slime> val); - DocBuilder & addTensor(std::unique_ptr<vespalib::eval::Value> val); - DocBuilder &addTokenizedString(const vespalib::string &val); - DocBuilder &addUrlTokenizedString(const vespalib::string &val); - DocBuilder &addSpan(size_t start, size_t len); - DocBuilder &addSpan(); - DocBuilder &addSpaceTokenAnnotation(); - DocBuilder &addNumericTokenAnnotation(); - DocBuilder &addAlphabeticTokenAnnotation(); - DocBuilder &addTermAnnotation(); - DocBuilder &addTermAnnotation(const vespalib::string &val); - DocBuilder &setAutoAnnotate(bool autoAnnotate); - DocBuilder &setAutoSpace(bool autoSpace); - DocBuilder &addPosition(int32_t xpos, int32_t ypos); - DocBuilder &addRaw(const void *buf, size_t len); - DocBuilder &startSubField(const vespalib::string &subField); - DocBuilder &endSubField(); - static bool hasAnnotations() { return true; } - - const document::DocumentType &getDocumentType() const { return _docType; } - const std::shared_ptr<const document::DocumentTypeRepo> &getDocumentTypeRepo() const { return _repo; } - document::config::DocumenttypesConfig getDocumenttypesConfig() const { return _doctypes_config; } -}; - -} diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp deleted file mode 100644 index 5f655419471..00000000000 --- a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "doctypebuilder.h" -#include <vespa/document/datatype/urldatatype.h> -#include <vespa/document/datatype/tensor_data_type.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/repo/configbuilder.h> -#include <set> - -using namespace document; - -namespace search::index { -namespace { - -DataType::Type convert(Schema::DataType type) { - switch (type) { - case schema::DataType::BOOL: - case schema::DataType::UINT2: - case schema::DataType::UINT4: - case schema::DataType::INT8: - return DataType::T_BYTE; - case schema::DataType::INT16: - return DataType::T_SHORT; - case schema::DataType::INT32: - return DataType::T_INT; - case schema::DataType::INT64: - return DataType::T_LONG; - case schema::DataType::FLOAT: - return DataType::T_FLOAT; - case schema::DataType::DOUBLE: - return DataType::T_DOUBLE; - case schema::DataType::STRING: - return DataType::T_STRING; - case schema::DataType::RAW: - return DataType::T_RAW; - case schema::DataType::BOOLEANTREE: - return DataType::T_PREDICATE; - case schema::DataType::TENSOR: - return DataType::T_TENSOR; - default: - break; - } - assert(!"Unknown datatype in schema"); - return DataType::MAX; -} - -void -insertStructType(document::config::DocumenttypesConfig::Documenttype & cfg, const StructDataType & structType) -{ - typedef document::config::DocumenttypesConfig DTC; - DTC::Documenttype::Datatype::Sstruct cfgStruct; - cfgStruct.name = structType.getName(); - Field::Set fieldSet = structType.getFieldSet(); - for (const Field * field : fieldSet) { - DTC::Documenttype::Datatype::Sstruct::Field sField; - sField.name = field->getName(); - sField.datatype = field->getDataType().getId(); - sField.id = field->getId(); - cfgStruct.field.push_back(sField); - } - cfg.datatype.push_back(DTC::Documenttype::Datatype()); - cfg.datatype.back().sstruct = cfgStruct; - cfg.datatype.back().id = structType.getId(); -} - -using namespace document::config_builder; - -TypeOrId makeCollection(TypeOrId datatype, Schema::CollectionType collection_type) { - switch (collection_type) { - case schema::CollectionType::ARRAY: - return Array(datatype); - case schema::CollectionType::WEIGHTEDSET: - // TODO: consider using array of struct<primitive,int32> to keep order - return Wset(datatype); - default: - return datatype; - } -} - -struct TypeCache { - std::map<std::pair<int, Schema::CollectionType>, TypeOrId> types; - - TypeOrId getType(TypeOrId datatype, Schema::CollectionType c_type) { - TypeOrId type = makeCollection(datatype, c_type); - std::pair<int, Schema::CollectionType> key = std::make_pair(datatype.id, c_type); - if (types.find(key) == types.end()) { - types.insert(std::make_pair(key, type)); - } - return types.find(key)->second; - } -}; - -} - -DocTypeBuilder::DocTypeBuilder(const Schema &schema) - : _schema(schema), - _iFields() -{ - _iFields.setup(schema); -} - -document::config::DocumenttypesConfig DocTypeBuilder::makeConfig() const { - using namespace document::config_builder; - TypeCache type_cache; - - typedef std::set<vespalib::string> UsedFields; - UsedFields usedFields; - - Struct header_struct("searchdocument.header"); - header_struct.setId(-1505212454); - - for (size_t i = 0; i < _iFields._textFields.size(); ++i) { - const Schema::IndexField &field = - _schema.getIndexField(_iFields._textFields[i]); - - // only handles string fields for now - assert(field.getDataType() == schema::DataType::STRING); - header_struct.addField(field.getName(), type_cache.getType( - DataType::T_STRING, field.getCollectionType())); - usedFields.insert(field.getName()); - } - - const int32_t uri_type = document::UrlDataType::getInstance().getId(); - for (size_t i = 0; i < _iFields._uriFields.size(); ++i) { - const Schema::IndexField &field = - _schema.getIndexField(_iFields._uriFields[i]._all); - - // only handles string fields for now - assert(field.getDataType() == schema::DataType::STRING); - header_struct.addField(field.getName(), type_cache.getType( - uri_type, field.getCollectionType())); - usedFields.insert(field.getName()); - } - - for (uint32_t i = 0; i < _schema.getNumAttributeFields(); ++i) { - const Schema::AttributeField &field = _schema.getAttributeField(i); - UsedFields::const_iterator usf = usedFields.find(field.getName()); - if (usf != usedFields.end()) { - continue; // taken as index field - } - auto type_id = convert(field.getDataType()); - if (type_id == DataType::T_TENSOR) { - header_struct.addTensorField(field.getName(), field.get_tensor_spec()); - } else { - header_struct.addField(field.getName(), type_cache.getType( - type_id, field.getCollectionType())); - } - usedFields.insert(field.getName()); - } - - DocumenttypesConfigBuilderHelper builder; - builder.document(-645763131, "searchdocument", - header_struct, Struct("searchdocument.body")); - return builder.config(); -} - -document::config::DocumenttypesConfig -DocTypeBuilder::makeConfig(const DocumentType &docType) -{ - typedef document::config::DocumenttypesConfigBuilder DTC; - DTC cfg; - { // document type - DTC::Documenttype dtype; - dtype.id = docType.getId(); - dtype.name = docType.getName(); - // TODO(vekterli): remove header/body config - dtype.headerstruct = docType.getFieldsType().getId(); - dtype.bodystruct = docType.getFieldsType().getId(); - cfg.documenttype.push_back(dtype); - } - insertStructType(cfg.documenttype[0], docType.getFieldsType()); - return cfg; -} - -} diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.h b/searchlib/src/vespa/searchlib/index/doctypebuilder.h deleted file mode 100644 index 4db0ba5b0e3..00000000000 --- a/searchlib/src/vespa/searchlib/index/doctypebuilder.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "schema_index_fields.h" -#include <vespa/document/config/config-documenttypes.h> -#include <vespa/document/fieldvalue/fieldvalues.h> -#include <vespa/vespalib/util/exception.h> -#include <vespa/vespalib/util/stringfmt.h> - -namespace search::index { - -/** - * Builder for the indexingdocument document type based on an index schema. - **/ -class DocTypeBuilder { - const Schema &_schema; - SchemaIndexFields _iFields; - -public: - DocTypeBuilder(const Schema & schema); - document::config::DocumenttypesConfig makeConfig() const; - - static document::config::DocumenttypesConfig - makeConfig(const document::DocumentType &docType); -}; - -} diff --git a/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp b/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp index 6515d896917..fabe630432f 100644 --- a/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp +++ b/searchlib/src/vespa/searchlib/index/empty_doc_builder.cpp @@ -33,6 +33,11 @@ get_document_types_config(EmptyDocBuilder::AddFieldsType add_fields) } +EmptyDocBuilder::EmptyDocBuilder() + : EmptyDocBuilder([](auto&) noexcept {}) +{ +} + EmptyDocBuilder::EmptyDocBuilder(AddFieldsType add_fields) : _document_types_config(std::make_shared<const DocumenttypesConfig>(get_document_types_config(add_fields))), _repo(DocumentTypeRepoFactory::make(*_document_types_config)), diff --git a/searchlib/src/vespa/searchlib/index/empty_doc_builder.h b/searchlib/src/vespa/searchlib/index/empty_doc_builder.h index 7e734af4e95..18b6543bea1 100644 --- a/searchlib/src/vespa/searchlib/index/empty_doc_builder.h +++ b/searchlib/src/vespa/searchlib/index/empty_doc_builder.h @@ -28,6 +28,7 @@ class EmptyDocBuilder { const document::DocumentType* _document_type; public: using AddFieldsType = std::function<void(document::config_builder::Struct&)>; + EmptyDocBuilder(); explicit EmptyDocBuilder(AddFieldsType add_fields); ~EmptyDocBuilder(); const document::DocumentTypeRepo& get_repo() const noexcept { return *_repo; } diff --git a/searchlib/src/vespa/searchlib/index/string_field_builder.cpp b/searchlib/src/vespa/searchlib/index/string_field_builder.cpp new file mode 100644 index 00000000000..3212a021535 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/string_field_builder.cpp @@ -0,0 +1,140 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "string_field_builder.h" +#include "empty_doc_builder.h" +#include <vespa/document/annotation/annotation.h> +#include <vespa/document/annotation/span.h> +#include <vespa/document/annotation/spanlist.h> +#include <vespa/document/annotation/spantree.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/fastlib/text/unicodeutil.h> +#include <vespa/vespalib/text/utf8.h> + +#include <cassert> + +using document::Annotation; +using document::AnnotationType; +using document::FixedTypeRepo; +using document::StringFieldValue; +using document::Span; +using document::SpanList; +using document::SpanNode; +using document::SpanTree; +using vespalib::Utf8Reader; +using vespalib::Utf8Writer; + +namespace search::index { + +namespace { + +const vespalib::string SPANTREE_NAME("linguistics"); + +} + +StringFieldBuilder::StringFieldBuilder(const EmptyDocBuilder& empty_doc_builder) + : _value(), + _span_start(0u), + _span_list(nullptr), + _span_tree(), + _last_span(nullptr), + _url_mode(false), + _repo(empty_doc_builder.get_repo(), empty_doc_builder.get_document_type()) +{ +} + +StringFieldBuilder::~StringFieldBuilder() = default; + +void +StringFieldBuilder::start_annotate() +{ + auto span_list_up = std::make_unique<SpanList>(); + _span_list = span_list_up.get(); + _span_tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); +} + +void +StringFieldBuilder::add_span() +{ + assert(_value.size() > _span_start); + const SpanNode &span = _span_list->add(std::make_unique<Span>(_span_start, _value.size() - _span_start)); + _last_span = &span; + _span_start = _value.size(); +} + +StringFieldBuilder& +StringFieldBuilder::token(const vespalib::string& val, bool is_word) +{ + if (val.empty()) { + return *this; + } + if (!_span_tree) { + start_annotate(); + } + _span_start = _value.size(); + _value.append(val); + add_span(); + if (is_word) { + _span_tree->annotate(*_last_span, *AnnotationType::TERM); + } + return *this; +} + +StringFieldBuilder& +StringFieldBuilder::alt_word(const vespalib::string& val) +{ + assert(_last_span != nullptr); + _span_tree->annotate(*_last_span, + Annotation(*AnnotationType::TERM, + std::make_unique<StringFieldValue>(val))); + return *this; +} + +StringFieldBuilder& +StringFieldBuilder::tokenize(const vespalib::string& val) +{ + Utf8Reader reader(val); + vespalib::string token_buffer; + Utf8Writer writer(token_buffer); + uint32_t c = 0u; + bool old_word = false; + + while (reader.hasMore()) { + c = reader.getChar(); + bool new_word = Fast_UnicodeUtil::IsWordChar(c) || + (_url_mode && (c == '-' || c == '_')); + if (old_word != new_word) { + if (!token_buffer.empty()) { + token(token_buffer, old_word); + token_buffer.clear(); + } + old_word = new_word; + } + writer.putChar(c); + } + if (!token_buffer.empty()) { + token(token_buffer, old_word); + } + return *this; +} + + +document::StringFieldValue +StringFieldBuilder::build() +{ + StringFieldValue value(_value); + // Also drop all spans no annotation for now + if (_span_tree && _span_tree->numAnnotations() > 0u) { + StringFieldValue::SpanTrees trees; + trees.emplace_back(std::move(_span_tree)); + value.setSpanTrees(trees, _repo); + } else { + _span_tree.reset(); + } + _span_list = nullptr; + _last_span = nullptr; + _span_start = 0u; + _value.clear(); + return value; +} + +} diff --git a/searchlib/src/vespa/searchlib/index/string_field_builder.h b/searchlib/src/vespa/searchlib/index/string_field_builder.h new file mode 100644 index 00000000000..1987cbbcf74 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/string_field_builder.h @@ -0,0 +1,45 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/document/repo/fixedtyperepo.h> +#include <memory> + +namespace document { +class SpanList; +struct SpanNode; +class SpanTree; +class StringFieldValue; +} + +namespace search::index { + +class EmptyDocBuilder; + +/* + * Helper class to build annotated string field. + */ +class StringFieldBuilder { + vespalib::string _value; + size_t _span_start; + document::SpanList* _span_list; // owned by _span_tree + std::unique_ptr<document::SpanTree> _span_tree; + const document::SpanNode* _last_span; + bool _url_mode; + const document::FixedTypeRepo _repo; + void start_annotate(); + void add_span(); +public: + StringFieldBuilder(const EmptyDocBuilder& empty_doc_builder); + ~StringFieldBuilder(); + StringFieldBuilder& url_mode(bool url_mode_) noexcept { _url_mode = url_mode_; return *this; } + StringFieldBuilder& token(const vespalib::string& val, bool is_word); + StringFieldBuilder& word(const vespalib::string& val) { return token(val, true); } + StringFieldBuilder& space() { return token(" ", false); } + StringFieldBuilder& tokenize(const vespalib::string& val); + StringFieldBuilder& alt_word(const vespalib::string& val); + document::StringFieldValue build(); +}; + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h index adeeba4d06e..5f5e782a382 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h @@ -205,9 +205,9 @@ public: const std::vector<PosOccFieldsParams> &getFieldsParams() const { return _fieldsParams; } - void trimHoldLists(generation_t usedGen) { _store.trimHoldLists(usedGen); } - void transferHoldLists(generation_t generation) { _store.transferHoldLists(generation); } - void clearHoldLists() { _store.clearHoldLists();} + void reclaim_memory(generation_t oldest_used_gen) { _store.reclaim_memory(oldest_used_gen); } + void assign_generation(generation_t current_gen) { _store.assign_generation(current_gen); } + void reclaim_all_memory() { _store.reclaim_all_memory();} std::unique_ptr<vespalib::datastore::CompactingBuffers> start_compact(); vespalib::MemoryUsage getMemoryUsage() const { return _store.getMemoryUsage(); } vespalib::datastore::MemoryStats getMemStats() const { return _store.getMemStats(); } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index da95f2598b7..4be3031303e 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -69,12 +69,12 @@ FieldIndex<interleaved_features>::~FieldIndex() } _postingListStore.clearBuilder(); freeze(); // Flush all pending posting list tree freezes - transferHoldLists(); + assign_generation(); _dict.clear(); // Clear dictionary freeze(); // Flush pending freeze for dictionary tree. - transferHoldLists(); + assign_generation(); incGeneration(); - trimHoldLists(); + reclaim_memory(); } template <bool interleaved_features> @@ -143,7 +143,7 @@ FieldIndex<interleaved_features>::compactFeatures() using generation_t = GenerationHandler::generation_t; compacting_buffers->finish(); generation_t generation = _generationHandler.getCurrentGeneration(); - _featureStore.transferHoldLists(generation); + _featureStore.assign_generation(generation); } template <bool interleaved_features> diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index fb02ed880b4..187ec5ee971 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -52,20 +52,20 @@ private: _dict.getAllocator().freeze(); } - void trimHoldLists() { - GenerationHandler::generation_t usedGen = - _generationHandler.getFirstUsedGeneration(); - _postingListStore.trimHoldLists(usedGen); - _dict.getAllocator().trimHoldLists(usedGen); - _featureStore.trimHoldLists(usedGen); + void reclaim_memory() { + GenerationHandler::generation_t oldest_used_gen = + _generationHandler.get_oldest_used_generation(); + _postingListStore.reclaim_memory(oldest_used_gen); + _dict.getAllocator().reclaim_memory(oldest_used_gen); + _featureStore.reclaim_memory(oldest_used_gen); } - void transferHoldLists() { + void assign_generation() { GenerationHandler::generation_t generation = _generationHandler.getCurrentGeneration(); - _postingListStore.transferHoldLists(generation); - _dict.getAllocator().transferHoldLists(generation); - _featureStore.transferHoldLists(generation); + _postingListStore.assign_generation(generation); + _dict.getAllocator().assign_generation(generation); + _featureStore.assign_generation(generation); } void incGeneration() { @@ -90,9 +90,9 @@ public: void commit() override { _remover.flush(); freeze(); - transferHoldLists(); + assign_generation(); incGeneration(); - trimHoldLists(); + reclaim_memory(); } /** diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp index c64c490039b..f21ca1b11cc 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp @@ -213,19 +213,19 @@ PredicateIndex::commit() { } void -PredicateIndex::trimHoldLists(generation_t used_generation) { - _interval_index.trimHoldLists(used_generation); - _bounds_index.trimHoldLists(used_generation); - _interval_store.trimHoldLists(used_generation); - _zero_constraint_docs.getAllocator().trimHoldLists(used_generation); +PredicateIndex::reclaim_memory(generation_t oldest_used_gen) { + _interval_index.reclaim_memory(oldest_used_gen); + _bounds_index.reclaim_memory(oldest_used_gen); + _interval_store.reclaim_memory(oldest_used_gen); + _zero_constraint_docs.getAllocator().reclaim_memory(oldest_used_gen); } void -PredicateIndex::transferHoldLists(generation_t generation) { - _interval_index.transferHoldLists(generation); - _bounds_index.transferHoldLists(generation); - _interval_store.transferHoldLists(generation); - _zero_constraint_docs.getAllocator().transferHoldLists(generation); +PredicateIndex::assign_generation(generation_t current_gen) { + _interval_index.assign_generation(current_gen); + _bounds_index.assign_generation(current_gen); + _interval_store.assign_generation(current_gen); + _zero_constraint_docs.getAllocator().assign_generation(current_gen); } vespalib::MemoryUsage diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h index 1bad95c6aa9..238314e17f9 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_index.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h @@ -73,8 +73,8 @@ public: void indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations); void removeDocument(uint32_t doc_id); void commit(); - void trimHoldLists(generation_t used_generation); - void transferHoldLists(generation_t generation); + void reclaim_memory(generation_t oldest_used_gen); + void assign_generation(generation_t current_gen); vespalib::MemoryUsage getMemoryUsage() const; int getArity() const { return _arity; } diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp index 379c859f6c3..af809b2fa69 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp @@ -100,13 +100,13 @@ PredicateIntervalStore::remove(EntryRef ref) { } void -PredicateIntervalStore::trimHoldLists(generation_t used_generation) { - _store.trimHoldLists(used_generation); +PredicateIntervalStore::reclaim_memory(generation_t oldest_used_gen) { + _store.reclaim_memory(oldest_used_gen); } void -PredicateIntervalStore::transferHoldLists(generation_t generation) { - _store.transferHoldLists(generation); +PredicateIntervalStore::assign_generation(generation_t current_gen) { + _store.assign_generation(current_gen); } } diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h index 0b3e32ec6b7..a96c208393d 100644 --- a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h @@ -71,9 +71,9 @@ public: */ void remove(vespalib::datastore::EntryRef ref); - void trimHoldLists(generation_t used_generation); + void reclaim_memory(generation_t oldest_used_gen); - void transferHoldLists(generation_t generation); + void assign_generation(generation_t current_gen); /** * Return memory usage (only the data store is included) diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h index 78805820a30..d49e42a1e35 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.h +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h @@ -187,8 +187,8 @@ public: // (and after doc id limits values are determined) to promote posting lists to vectors. void promoteOverThresholdVectors(); void commit(); - void trimHoldLists(generation_t used_generation); - void transferHoldLists(generation_t generation); + void reclaim_memory(generation_t oldest_used_gen); + void assign_generation(generation_t current_gen); vespalib::MemoryUsage getMemoryUsage() const; template <typename FunctionType> void foreach_frozen_key(vespalib::datastore::EntryRef ref, Key key, FunctionType func) const; diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp index cb37fec26ea..c6f640d72ed 100644 --- a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp @@ -54,17 +54,17 @@ SimpleIndex<Posting, Key, DocId>::~SimpleIndex() { _vector_posting_lists.disableElemHoldList(); _vector_posting_lists.clear(); _vector_posting_lists.getAllocator().freeze(); - _vector_posting_lists.getAllocator().clearHoldLists(); + _vector_posting_lists.getAllocator().reclaim_all_memory(); _dictionary.disableFreeLists(); _dictionary.disableElemHoldList(); _dictionary.clear(); _dictionary.getAllocator().freeze(); - _dictionary.getAllocator().clearHoldLists(); + _dictionary.getAllocator().reclaim_all_memory(); _btree_posting_lists.clearBuilder(); _btree_posting_lists.freeze(); - _btree_posting_lists.clearHoldLists(); + _btree_posting_lists.reclaim_all_memory(); } template <typename Posting, typename Key, typename DocId> @@ -291,19 +291,19 @@ SimpleIndex<Posting, Key, DocId>::commit() { template <typename Posting, typename Key, typename DocId> void -SimpleIndex<Posting, Key, DocId>::trimHoldLists(generation_t used_generation) { - _btree_posting_lists.trimHoldLists(used_generation); - _dictionary.getAllocator().trimHoldLists(used_generation); - _vector_posting_lists.getAllocator().trimHoldLists(used_generation); +SimpleIndex<Posting, Key, DocId>::reclaim_memory(generation_t oldest_used_gen) { + _btree_posting_lists.reclaim_memory(oldest_used_gen); + _dictionary.getAllocator().reclaim_memory(oldest_used_gen); + _vector_posting_lists.getAllocator().reclaim_memory(oldest_used_gen); } template <typename Posting, typename Key, typename DocId> void -SimpleIndex<Posting, Key, DocId>::transferHoldLists(generation_t generation) { - _dictionary.getAllocator().transferHoldLists(generation); - _btree_posting_lists.transferHoldLists(generation); - _vector_posting_lists.getAllocator().transferHoldLists(generation); +SimpleIndex<Posting, Key, DocId>::assign_generation(generation_t current_gen) { + _dictionary.getAllocator().assign_generation(current_gen); + _btree_posting_lists.assign_generation(current_gen); + _vector_posting_lists.getAllocator().assign_generation(current_gen); } template <typename Posting, typename Key, typename DocId> diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 0c797fa7fe1..56b9473b6e6 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -173,7 +173,7 @@ DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, con DenseTensorAttribute::~DenseTensorAttribute() { getGenerationHolder().reclaim_all(); - _tensorStore.clearHoldLists(); + _tensorStore.reclaim_all_memory(); } uint32_t @@ -450,22 +450,20 @@ DenseTensorAttribute::onCommit() } void -DenseTensorAttribute::onGenerationChange(generation_t next_gen) +DenseTensorAttribute::before_inc_generation(generation_t current_gen) { - // TODO: Change onGenerationChange() to send current generation instead of next generation. - // This applies for entire attribute vector code. - TensorAttribute::onGenerationChange(next_gen); + TensorAttribute::before_inc_generation(current_gen); if (_index) { - _index->transfer_hold_lists(next_gen - 1); + _index->assign_generation(current_gen); } } void -DenseTensorAttribute::removeOldGenerations(generation_t first_used_gen) +DenseTensorAttribute::reclaim_memory(generation_t oldest_used_gen) { - TensorAttribute::removeOldGenerations(first_used_gen); + TensorAttribute::reclaim_memory(oldest_used_gen); if (_index) { - _index->trim_hold_lists(first_used_gen); + _index->reclaim_memory(oldest_used_gen); } } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index b0991aa57aa..3aa52fe622a 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -47,8 +47,8 @@ public: std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override; uint32_t getVersion() const override; void onCommit() override; - void onGenerationChange(generation_t next_gen) override; - void removeOldGenerations(generation_t first_used_gen) override; + void before_inc_generation(generation_t current_gen) override; + void reclaim_memory(generation_t oldest_used_gen) override; void get_state(const vespalib::slime::Inserter& inserter) const override; void onShrinkLidSpace() override; diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp index a68f7fcc3da..22db5dc5b47 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp @@ -16,7 +16,7 @@ DirectTensorAttribute::DirectTensorAttribute(stringref name, const Config &cfg) DirectTensorAttribute::~DirectTensorAttribute() { getGenerationHolder().reclaim_all(); - _tensorStore.clearHoldLists(); + _tensorStore.reclaim_all_memory(); } void diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index d23bbcfbed4..10f06a1e1ec 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -511,21 +511,21 @@ HnswIndex::remove_document(uint32_t docid) } void -HnswIndex::transfer_hold_lists(generation_t current_gen) +HnswIndex::assign_generation(generation_t current_gen) { // Note: RcuVector transfers hold lists as part of reallocation based on current generation. // We need to set the next generation here, as it is incremented on a higher level right after this call. _graph.node_refs.setGeneration(current_gen + 1); - _graph.nodes.transferHoldLists(current_gen); - _graph.links.transferHoldLists(current_gen); + _graph.nodes.assign_generation(current_gen); + _graph.links.assign_generation(current_gen); } void -HnswIndex::trim_hold_lists(generation_t first_used_gen) +HnswIndex::reclaim_memory(generation_t oldest_used_gen) { - _graph.node_refs.removeOldGenerations(first_used_gen); - _graph.nodes.trimHoldLists(first_used_gen); - _graph.links.trimHoldLists(first_used_gen); + _graph.node_refs.reclaim_memory(oldest_used_gen); + _graph.nodes.reclaim_memory(oldest_used_gen); + _graph.links.reclaim_memory(oldest_used_gen); } void diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index e3ffada1fc2..8a7422907ea 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -187,8 +187,8 @@ public: vespalib::GenerationHandler::Guard read_guard) const override; void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) override; void remove_document(uint32_t docid) override; - void transfer_hold_lists(generation_t current_gen) override; - void trim_hold_lists(generation_t first_used_gen) override; + void assign_generation(generation_t current_gen) override; + void reclaim_memory(generation_t oldest_used_gen) override; void compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy); void compact_link_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy); bool consider_compact_level_arrays(const CompactionStrategy& compaction_strategy); diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index 51d66fdd14d..d40803dcafd 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -68,8 +68,8 @@ public: virtual void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) = 0; virtual void remove_document(uint32_t docid) = 0; - virtual void transfer_hold_lists(generation_t current_gen) = 0; - virtual void trim_hold_lists(generation_t first_used_gen) = 0; + virtual void assign_generation(generation_t current_gen) = 0; + virtual void reclaim_memory(generation_t first_used_gen) = 0; virtual bool consider_compact(const CompactionStrategy& compaction_strategy) = 0; virtual vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) = 0; virtual vespalib::MemoryUsage memory_usage() const = 0; diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp index d2153ac93ad..bb1c1a3d880 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp @@ -24,7 +24,7 @@ SerializedFastValueAttribute::SerializedFastValueAttribute(stringref name, const SerializedFastValueAttribute::~SerializedFastValueAttribute() { getGenerationHolder().reclaim_all(); - _tensorStore.clearHoldLists(); + _tensorStore.reclaim_all_memory(); } void diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index b233960e339..6130da6fcf9 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -110,17 +110,17 @@ TensorAttribute::onUpdateStat() } void -TensorAttribute::removeOldGenerations(generation_t firstUsed) +TensorAttribute::reclaim_memory(generation_t oldest_used_gen) { - _tensorStore.trimHoldLists(firstUsed); - getGenerationHolder().reclaim(firstUsed); + _tensorStore.reclaim_memory(oldest_used_gen); + getGenerationHolder().reclaim(oldest_used_gen); } void -TensorAttribute::onGenerationChange(generation_t generation) +TensorAttribute::before_inc_generation(generation_t current_gen) { - getGenerationHolder().assign_generation(generation - 1); - _tensorStore.transferHoldLists(generation - 1); + getGenerationHolder().assign_generation(current_gen); + _tensorStore.assign_generation(current_gen); } bool diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h index 7cfbb68eac7..b7bac35d1b7 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h @@ -48,8 +48,8 @@ public: uint32_t clearDoc(DocId docId) override; void onCommit() override; void onUpdateStat() override; - void removeOldGenerations(generation_t firstUsed) override; - void onGenerationChange(generation_t generation) override; + void reclaim_memory(generation_t oldest_used_gen) override; + void before_inc_generation(generation_t current_gen) override; bool addDoc(DocId &docId) override; std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const override; vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_store.h index e2426d2e899..53551bc48fa 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_store.h @@ -48,17 +48,17 @@ public: virtual bool encode_stored_tensor(EntryRef ref, vespalib::nbostream& target) const = 0; // Inherit doc from DataStoreBase - void trimHoldLists(generation_t usedGen) { - _store.trimHoldLists(usedGen); + void reclaim_memory(generation_t oldest_used_gen) { + _store.reclaim_memory(oldest_used_gen); } // Inherit doc from DataStoreBase - void transferHoldLists(generation_t generation) { - _store.transferHoldLists(generation); + void assign_generation(generation_t current_gen) { + _store.assign_generation(current_gen); } - void clearHoldLists() { - _store.clearHoldLists(); + void reclaim_all_memory() { + _store.reclaim_all_memory(); } vespalib::MemoryUsage getMemoryUsage() const { diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp index 6e86d70fe52..e918c523fcf 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp @@ -169,9 +169,9 @@ FakeMemTreeOccMgr::freeze() void -FakeMemTreeOccMgr::transferHoldLists() +FakeMemTreeOccMgr::assign_generation() { - _allocator.transferHoldLists(_generationHandler.getCurrentGeneration()); + _allocator.assign_generation(_generationHandler.getCurrentGeneration()); } void @@ -182,9 +182,9 @@ FakeMemTreeOccMgr::incGeneration() void -FakeMemTreeOccMgr::trimHoldLists() +FakeMemTreeOccMgr::reclaim_memory() { - _allocator.trimHoldLists(_generationHandler.getFirstUsedGeneration()); + _allocator.reclaim_memory(_generationHandler.get_oldest_used_generation()); } @@ -192,9 +192,9 @@ void FakeMemTreeOccMgr::sync() { freeze(); - transferHoldLists(); + assign_generation(); incGeneration(); - trimHoldLists(); + reclaim_memory(); } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h index d0a75930ed5..290ba1cf140 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h @@ -94,9 +94,9 @@ public: ~FakeMemTreeOccMgr(); void freeze(); - void transferHoldLists(); + void assign_generation(); void incGeneration(); - void trimHoldLists(); + void reclaim_memory(); void sync(); void add(uint32_t wordIdx, index::DocIdAndFeatures &features) override; void remove(uint32_t wordIdx, uint32_t docId) override; diff --git a/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp b/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp index 9db36e96fc0..125882f7fe7 100644 --- a/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp +++ b/storage/src/vespa/storage/bucketdb/generic_btree_bucket_database.hpp @@ -38,14 +38,14 @@ void GenericBTreeBucketDatabase<DataStoreTraitsT>::commit_tree_changes() { _tree.getAllocator().freeze(); auto current_gen = _generation_handler.getCurrentGeneration(); - _store.transferHoldLists(current_gen); - _tree.getAllocator().transferHoldLists(current_gen); + _store.assign_generation(current_gen); + _tree.getAllocator().assign_generation(current_gen); _generation_handler.incGeneration(); - auto used_gen = _generation_handler.getFirstUsedGeneration(); - _store.trimHoldLists(used_gen); - _tree.getAllocator().trimHoldLists(used_gen); + auto used_gen = _generation_handler.get_oldest_used_generation(); + _store.reclaim_memory(used_gen); + _tree.getAllocator().reclaim_memory(used_gen); } template <typename DataStoreTraitsT> diff --git a/vespalib/src/tests/btree/btree-stress/btree_stress_test.cpp b/vespalib/src/tests/btree/btree-stress/btree_stress_test.cpp index 3ba7bf85e42..caed5c3543c 100644 --- a/vespalib/src/tests/btree/btree-stress/btree_stress_test.cpp +++ b/vespalib/src/tests/btree/btree-stress/btree_stress_test.cpp @@ -59,8 +59,8 @@ public: AtomicEntryRef add_relaxed(uint32_t value) { return AtomicEntryRef(add(value)); } void hold(const AtomicEntryRef& ref) { _store.holdElem(ref.load_relaxed(), 1); } EntryRef move(EntryRef ref); - void transfer_hold_lists(generation_t gen) { _store.transferHoldLists(gen); } - void trim_hold_lists(generation_t gen) { _store.trimHoldLists(gen); } + void assign_generation(generation_t current_gen) { _store.assign_generation(current_gen); } + void reclaim_memory(generation_t gen) { _store.reclaim_memory(gen); } uint32_t get(EntryRef ref) const { return _store.getEntry(ref); } uint32_t get_acquire(const AtomicEntryRef& ref) const { return get(ref.load_acquire()); } uint32_t get_relaxed(const AtomicEntryRef& ref) const { return get(ref.load_relaxed()); } @@ -118,8 +118,8 @@ public: static uint32_t add(uint32_t value) noexcept { return value; } static uint32_t add_relaxed(uint32_t value) noexcept { return value; } static void hold(uint32_t) noexcept { } - static void transfer_hold_lists(generation_t) noexcept { } - static void trim_hold_lists(generation_t) noexcept { } + static void assign_generation(generation_t) noexcept { } + static void reclaim_memory(generation_t) noexcept { } static uint32_t get(uint32_t value) noexcept { return value; } static uint32_t get_acquire(uint32_t value) noexcept { return value; } static uint32_t get_relaxed(uint32_t value) noexcept { return value; } @@ -274,15 +274,15 @@ Fixture<Params>::commit() auto &allocator = _tree.getAllocator(); allocator.freeze(); auto current_gen = _generationHandler.getCurrentGeneration(); - allocator.transferHoldLists(current_gen); - _keys.transfer_hold_lists(current_gen); - _values.transfer_hold_lists(current_gen); - allocator.transferHoldLists(_generationHandler.getCurrentGeneration()); + allocator.assign_generation(current_gen); + _keys.assign_generation(current_gen); + _values.assign_generation(current_gen); + allocator.assign_generation(_generationHandler.getCurrentGeneration()); _generationHandler.incGeneration(); - auto first_used_gen = _generationHandler.getFirstUsedGeneration(); - allocator.trimHoldLists(first_used_gen); - _keys.trim_hold_lists(first_used_gen); - _values.trim_hold_lists(first_used_gen); + auto oldest_used_gen = _generationHandler.get_oldest_used_generation(); + allocator.reclaim_memory(oldest_used_gen); + _keys.reclaim_memory(oldest_used_gen); + _values.reclaim_memory(oldest_used_gen); } template <typename Params> diff --git a/vespalib/src/tests/btree/btree_store/btree_store_test.cpp b/vespalib/src/tests/btree/btree_store/btree_store_test.cpp index 4da34c64ed9..0370b1ce2eb 100644 --- a/vespalib/src/tests/btree/btree_store/btree_store_test.cpp +++ b/vespalib/src/tests/btree/btree_store/btree_store_test.cpp @@ -31,9 +31,9 @@ protected: void inc_generation() { _store.freeze(); - _store.transferHoldLists(_gen_handler.getCurrentGeneration()); + _store.assign_generation(_gen_handler.getCurrentGeneration()); _gen_handler.incGeneration(); - _store.trimHoldLists(_gen_handler.getFirstUsedGeneration()); + _store.reclaim_memory(_gen_handler.get_oldest_used_generation()); } EntryRef add_sequence(int start_key, int end_key) diff --git a/vespalib/src/tests/btree/btree_test.cpp b/vespalib/src/tests/btree/btree_test.cpp index 3fd00a26189..f2896cb783c 100644 --- a/vespalib/src/tests/btree/btree_test.cpp +++ b/vespalib/src/tests/btree/btree_test.cpp @@ -163,9 +163,9 @@ void cleanup(GenerationHandler & g, ManagerType & m) { m.freeze(); - m.transferHoldLists(g.getCurrentGeneration()); + m.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - m.trimHoldLists(g.getFirstUsedGeneration()); + m.reclaim_memory(g.get_oldest_used_generation()); } template <typename ManagerType, typename NodeType> @@ -874,9 +874,9 @@ TEST_F(BTreeTest, require_that_we_can_insert_and_remove_from_tree) } compacting_buffers->finish(); manager.freeze(); - manager.transferHoldLists(g.getCurrentGeneration()); + manager.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - manager.trimHoldLists(g.getFirstUsedGeneration()); + manager.reclaim_memory(g.get_oldest_used_generation()); } // remove entries for (size_t i = 0; i < numEntries; ++i) { @@ -1106,9 +1106,9 @@ TEST_F(BTreeTest, require_that_memory_usage_is_calculated) EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); // trim hold lists - tm.transferHoldLists(gh.getCurrentGeneration()); + tm.assign_generation(gh.getCurrentGeneration()); gh.incGeneration(); - tm.trimHoldLists(gh.getFirstUsedGeneration()); + tm.reclaim_memory(gh.get_oldest_used_generation()); mu = vespalib::MemoryUsage(); mu.incAllocatedBytes(adjustAllocatedBytes(initialInternalNodes, sizeof(INode))); mu.incAllocatedBytes(adjustAllocatedBytes(initialLeafNodes, sizeof(LNode))); @@ -1282,9 +1282,9 @@ TEST_F(BTreeTest, require_that_small_nodes_works) s.clear(root); s.clearBuilder(); s.freeze(); - s.transferHoldLists(g.getCurrentGeneration()); + s.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - s.trimHoldLists(g.getFirstUsedGeneration()); + s.reclaim_memory(g.get_oldest_used_generation()); } namespace { @@ -1416,9 +1416,9 @@ TEST_F(BTreeTest, require_that_apply_works) s.clear(root); s.clearBuilder(); s.freeze(); - s.transferHoldLists(g.getCurrentGeneration()); + s.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - s.trimHoldLists(g.getFirstUsedGeneration()); + s.reclaim_memory(g.get_oldest_used_generation()); } class MyTreeTestIterator : public MyTree::Iterator @@ -1553,9 +1553,9 @@ inc_generation(GenerationHandler &g, Tree &t) { auto &s = t.getAllocator(); s.freeze(); - s.transferHoldLists(g.getCurrentGeneration()); + s.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - s.trimHoldLists(g.getFirstUsedGeneration()); + s.reclaim_memory(g.get_oldest_used_generation()); } template <typename Tree> diff --git a/vespalib/src/tests/btree/btreeaggregation_test.cpp b/vespalib/src/tests/btree/btreeaggregation_test.cpp index dff7de6660f..fb394df9861 100644 --- a/vespalib/src/tests/btree/btreeaggregation_test.cpp +++ b/vespalib/src/tests/btree/btreeaggregation_test.cpp @@ -272,9 +272,9 @@ void freezeTree(GenerationHandler &g, ManagerType &m) { m.freeze(); - m.transferHoldLists(g.getCurrentGeneration()); + m.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - m.trimHoldLists(g.getFirstUsedGeneration()); + m.reclaim_memory(g.get_oldest_used_generation()); } template <typename ManagerType> @@ -891,9 +891,9 @@ Test::requireThatWeCanInsertAndRemoveFromTree() } compacting_buffers->finish(); manager.freeze(); - manager.transferHoldLists(g.getCurrentGeneration()); + manager.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - manager.trimHoldLists(g.getFirstUsedGeneration()); + manager.reclaim_memory(g.get_oldest_used_generation()); } // remove entries for (size_t i = 0; i < numEntries; ++i) { @@ -1190,9 +1190,9 @@ Test::requireThatSmallNodesWorks() s.clear(root); s.clearBuilder(); s.freeze(); - s.transferHoldLists(g.getCurrentGeneration()); + s.assign_generation(g.getCurrentGeneration()); g.incGeneration(); - s.trimHoldLists(g.getFirstUsedGeneration()); + s.reclaim_memory(g.get_oldest_used_generation()); } void diff --git a/vespalib/src/tests/btree/frozenbtree_test.cpp b/vespalib/src/tests/btree/frozenbtree_test.cpp index 01748b9edeb..3471d5dc3df 100644 --- a/vespalib/src/tests/btree/frozenbtree_test.cpp +++ b/vespalib/src/tests/btree/frozenbtree_test.cpp @@ -114,7 +114,7 @@ FrozenBTreeTest::freeTree(bool verbose) static_cast<uint64_t>(_intTree->getUsedMemory()), static_cast<uint64_t>(_intTree->getHeldMemory())); _intTree->dropFrozen(); - _intTree->removeOldGenerations(_intTree->getGeneration() + 1); + _intTree->reclaim_memory(_intTree->getGeneration() + 1); LOG(info, "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", static_cast<uint64_t>(_intTree->getUsedMemory()), @@ -134,9 +134,9 @@ FrozenBTreeTest::freeTree(bool verbose) (void) verbose; _tree->clear(*_allocator); _allocator->freeze(); - _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + _allocator->assign_generation(_generationHandler->getCurrentGeneration()); _generationHandler->incGeneration(); - _allocator->trimHoldLists(_generationHandler->getFirstUsedGeneration()); + _allocator->reclaim_memory(_generationHandler->get_oldest_used_generation()); delete _tree; _tree = NULL; delete _allocator; @@ -425,7 +425,7 @@ FrozenBTreeTest::Main() EXPECT_TRUE(_tree->getFrozenView(*_allocator).empty()); _allocator->freeze(); EXPECT_FALSE(_tree->getFrozenView(*_allocator).empty()); - _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + _allocator->assign_generation(_generationHandler->getCurrentGeneration()); lookupFrozenRandomValues(*_tree, *_allocator, _randomValues); traverseTreeIterator(*_tree, *_allocator, diff --git a/vespalib/src/tests/datastore/array_store/array_store_test.cpp b/vespalib/src/tests/datastore/array_store/array_store_test.cpp index 1708b0fd948..afef530b33e 100644 --- a/vespalib/src/tests/datastore/array_store/array_store_test.cpp +++ b/vespalib/src/tests/datastore/array_store/array_store_test.cpp @@ -123,7 +123,7 @@ struct ArrayStoreTest : public TestT void assert_ref_reused(const EntryVector& first, const EntryVector& second, bool should_reuse) { EntryRef ref1 = add(first); remove(ref1); - trimHoldLists(); + reclaim_memory(); EntryRef ref2 = add(second); EXPECT_EQ(should_reuse, (ref2 == ref1)); assertGet(ref2, second); @@ -136,9 +136,9 @@ struct ArrayStoreTest : public TestT } return EntryRef(); } - void trimHoldLists() { - store.transferHoldLists(generation++); - store.trimHoldLists(generation); + void reclaim_memory() { + store.assign_generation(generation++); + store.reclaim_memory(generation); } void compactWorst(bool compactMemory, bool compactAddressSpace) { CompactionSpec compaction_spec(compactMemory, compactAddressSpace); @@ -283,7 +283,7 @@ TEST_P(NumberStoreTest, track_size_of_large_array_allocations_with_free_lists_en assert_buffer_stats(ref, TestBufferStats().used(2).hold(0).dead(1).extra_used(16)); remove({1,2,3,4}); assert_buffer_stats(ref, TestBufferStats().used(2).hold(1).dead(1).extra_hold(16).extra_used(16)); - trimHoldLists(); + reclaim_memory(); assert_buffer_stats(ref, TestBufferStats().used(2).hold(0).dead(2).extra_used(0)); add({5,6,7,8,9}); assert_buffer_stats(ref, TestBufferStats().used(2).hold(0).dead(1).extra_used(20)); @@ -316,7 +316,7 @@ test_compaction(NumberStoreBasicTest &f) EntryRef size2Ref = f.add({2,2}); EntryRef size3Ref = f.add({3,3,3}); f.remove(f.add({5,5})); - f.trimHoldLists(); + f.reclaim_memory(); f.assertBufferState(size1Ref, MemStats().used(1).dead(0)); f.assertBufferState(size2Ref, MemStats().used(4).dead(2)); f.assertBufferState(size3Ref, MemStats().used(2).dead(1)); // Note: First element is reserved @@ -335,7 +335,7 @@ test_compaction(NumberStoreBasicTest &f) EXPECT_NE(size2BufferId, f.getBufferId(f.getEntryRef({2,2}))); f.assertGet(size2Ref, {2,2}); // Old ref should still point to data. EXPECT_TRUE(f.store.bufferState(size2Ref).isOnHold()); - f.trimHoldLists(); + f.reclaim_memory(); EXPECT_TRUE(f.store.bufferState(size2Ref).isFree()); } @@ -360,7 +360,7 @@ void testCompaction(NumberStoreTest &f, bool compactMemory, bool compactAddressS f.remove(f.add({5,5,5})); f.remove(f.add({6})); f.remove(f.add({7})); - f.trimHoldLists(); + f.reclaim_memory(); f.assertBufferState(size1Ref, MemStats().used(3).dead(2)); f.assertBufferState(size2Ref, MemStats().used(2).dead(0)); f.assertBufferState(size3Ref, MemStats().used(6).dead(3)); @@ -397,7 +397,7 @@ void testCompaction(NumberStoreTest &f, bool compactMemory, bool compactAddressS EXPECT_FALSE(f.store.bufferState(size1Ref).isOnHold()); } EXPECT_FALSE(f.store.bufferState(size2Ref).isOnHold()); - f.trimHoldLists(); + f.reclaim_memory(); if (compactMemory) { EXPECT_TRUE(f.store.bufferState(size3Ref).isFree()); } else { @@ -436,7 +436,7 @@ TEST_P(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_small_a assertMemoryUsage(exp.used(entrySize() * 3)); remove({1,2,3}); assertMemoryUsage(exp.hold(entrySize() * 3)); - trimHoldLists(); + reclaim_memory(); assertMemoryUsage(exp.holdToDead(entrySize() * 3)); } @@ -447,7 +447,7 @@ TEST_P(NumberStoreTest, used_onHold_and_dead_memory_usage_is_tracked_for_large_a assertMemoryUsage(exp.used(largeArraySize() + entrySize() * 4)); remove({1,2,3,4}); assertMemoryUsage(exp.hold(largeArraySize() + entrySize() * 4)); - trimHoldLists(); + reclaim_memory(); assertMemoryUsage(exp.decUsed(entrySize() * 4).decHold(largeArraySize() + entrySize() * 4). dead(largeArraySize())); } diff --git a/vespalib/src/tests/datastore/datastore/datastore_test.cpp b/vespalib/src/tests/datastore/datastore/datastore_test.cpp index 1cb4f3e2307..10b96a87444 100644 --- a/vespalib/src/tests/datastore/datastore/datastore_test.cpp +++ b/vespalib/src/tests/datastore/datastore/datastore_test.cpp @@ -28,8 +28,8 @@ public: void holdElem(EntryRef ref, uint64_t len) { ParentType::holdElem(ref, len); } - void transferHoldLists(generation_t generation) { - ParentType::transferHoldLists(generation); + void assign_generation(generation_t current_gen) { + ParentType::assign_generation(current_gen); } void reclaim_entry_refs(generation_t oldest_used_gen) override { ParentType::reclaim_entry_refs(oldest_used_gen); @@ -261,29 +261,29 @@ TEST(DataStoreTest, require_that_we_can_hold_and_trim_buffers) s.switch_primary_buffer(); EXPECT_EQ(1u, s.primary_buffer_id()); s.holdBuffer(0); // hold last buffer - s.transferHoldLists(10); + s.assign_generation(10); EXPECT_EQ(1u, MyRef(s.addEntry(2)).bufferId()); s.switch_primary_buffer(); EXPECT_EQ(2u, s.primary_buffer_id()); s.holdBuffer(1); // hold last buffer - s.transferHoldLists(20); + s.assign_generation(20); EXPECT_EQ(2u, MyRef(s.addEntry(3)).bufferId()); s.switch_primary_buffer(); EXPECT_EQ(3u, s.primary_buffer_id()); s.holdBuffer(2); // hold last buffer - s.transferHoldLists(30); + s.assign_generation(30); EXPECT_EQ(3u, MyRef(s.addEntry(4)).bufferId()); s.holdBuffer(3); // hold current buffer - s.transferHoldLists(40); + s.assign_generation(40); EXPECT_TRUE(s.getBufferState(0).size() != 0); EXPECT_TRUE(s.getBufferState(1).size() != 0); EXPECT_TRUE(s.getBufferState(2).size() != 0); EXPECT_TRUE(s.getBufferState(3).size() != 0); - s.trimHoldLists(11); + s.reclaim_memory(11); EXPECT_TRUE(s.getBufferState(0).size() == 0); EXPECT_TRUE(s.getBufferState(1).size() != 0); EXPECT_TRUE(s.getBufferState(2).size() != 0); @@ -292,7 +292,7 @@ TEST(DataStoreTest, require_that_we_can_hold_and_trim_buffers) s.switch_primary_buffer(); EXPECT_EQ(0u, s.primary_buffer_id()); EXPECT_EQ(0u, MyRef(s.addEntry(5)).bufferId()); - s.trimHoldLists(41); + s.reclaim_memory(41); EXPECT_TRUE(s.getBufferState(0).size() != 0); EXPECT_TRUE(s.getBufferState(1).size() == 0); EXPECT_TRUE(s.getBufferState(2).size() == 0); @@ -304,13 +304,13 @@ TEST(DataStoreTest, require_that_we_can_hold_and_trim_elements) MyStore s; MyRef r1 = s.addEntry(1); s.holdElem(r1, 1); - s.transferHoldLists(10); + s.assign_generation(10); MyRef r2 = s.addEntry(2); s.holdElem(r2, 1); - s.transferHoldLists(20); + s.assign_generation(20); MyRef r3 = s.addEntry(3); s.holdElem(r3, 1); - s.transferHoldLists(30); + s.assign_generation(30); EXPECT_EQ(1, s.getEntry(r1)); EXPECT_EQ(2, s.getEntry(r2)); EXPECT_EQ(3, s.getEntry(r3)); @@ -358,11 +358,11 @@ TEST(DataStoreTest, require_that_we_can_use_free_lists) s.enableFreeLists(); auto r1 = s.addEntry(1); s.holdElem(r1, 1); - s.transferHoldLists(10); + s.assign_generation(10); auto r2 = s.addEntry(2); expect_successive_refs(r1, r2); s.holdElem(r2, 1); - s.transferHoldLists(20); + s.assign_generation(20); s.reclaim_entry_refs(11); auto r3 = s.addEntry(3); // reuse r1 EXPECT_EQ(r1, r3); @@ -393,7 +393,7 @@ TEST(DataStoreTest, require_that_we_can_use_free_lists_with_raw_allocator) expect_successive_handles(h1, h2); s.holdElem(h1.ref, 3); s.holdElem(h2.ref, 3); - s.transferHoldLists(10); + s.assign_generation(10); s.reclaim_entry_refs(11); auto h3 = allocator.alloc(3); // reuse h2.ref from free list @@ -429,7 +429,7 @@ TEST(DataStoreTest, require_that_memory_stats_are_calculated) s.addEntry(20); s.addEntry(30); s.holdBuffer(r.bufferId()); - s.transferHoldLists(100); + s.assign_generation(100); m._usedElems += 2; m._holdElems = m._usedElems; m._deadElems = 0; @@ -446,7 +446,7 @@ TEST(DataStoreTest, require_that_memory_stats_are_calculated) m._freeBuffers--; // trim hold buffer - s.trimHoldLists(101); + s.reclaim_memory(101); m._allocElems -= MyRef::offsetSize(); m._usedElems = 1; m._deadElems = 0; @@ -479,13 +479,13 @@ TEST(DataStoreTest, require_that_memory_usage_is_calculated) s.addEntry(30); s.addEntry(40); s.holdBuffer(r.bufferId()); - s.transferHoldLists(100); + s.assign_generation(100); vespalib::MemoryUsage m = s.getMemoryUsage(); EXPECT_EQ(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); EXPECT_EQ(5 * sizeof(int), m.usedBytes()); EXPECT_EQ(0 * sizeof(int), m.deadBytes()); EXPECT_EQ(5 * sizeof(int), m.allocatedBytesOnHold()); - s.trimHoldLists(101); + s.reclaim_memory(101); } TEST(DataStoreTest, require_that_we_can_disable_elemement_hold_list) @@ -513,8 +513,8 @@ TEST(DataStoreTest, require_that_we_can_disable_elemement_hold_list) EXPECT_EQ(4 * sizeof(int), m.usedBytes()); EXPECT_EQ(2 * sizeof(int), m.deadBytes()); EXPECT_EQ(1 * sizeof(int), m.allocatedBytesOnHold()); - s.transferHoldLists(100); - s.trimHoldLists(101); + s.assign_generation(100); + s.reclaim_memory(101); } using IntGrowStore = GrowStore<int, EntryRefT<24>>; @@ -634,9 +634,9 @@ TEST(DataStoreTest, can_set_memory_allocator) s.switch_primary_buffer(); EXPECT_EQ(AllocStats(3, 0), stats); s.holdBuffer(0); - s.transferHoldLists(10); + s.assign_generation(10); EXPECT_EQ(AllocStats(3, 0), stats); - s.trimHoldLists(11); + s.reclaim_memory(11); EXPECT_EQ(AllocStats(3, 2), stats); } EXPECT_EQ(AllocStats(3, 3), stats); @@ -693,8 +693,8 @@ void test_free_element_to_held_buffer(bool direct, bool before_hold_buffer) ASSERT_DEATH({ s.holdElem(ref, 1); }, "isActive\\(\\)"); } } - s.transferHoldLists(100); - s.trimHoldLists(101); + s.assign_generation(100); + s.reclaim_memory(101); } } diff --git a/vespalib/src/tests/datastore/fixed_size_hash_map/fixed_size_hash_map_test.cpp b/vespalib/src/tests/datastore/fixed_size_hash_map/fixed_size_hash_map_test.cpp index ad10bc5c7e6..4f4c3ac94eb 100644 --- a/vespalib/src/tests/datastore/fixed_size_hash_map/fixed_size_hash_map_test.cpp +++ b/vespalib/src/tests/datastore/fixed_size_hash_map/fixed_size_hash_map_test.cpp @@ -88,13 +88,13 @@ DataStoreFixedSizeHashTest::~DataStoreFixedSizeHashTest() void DataStoreFixedSizeHashTest::commit() { - _store.transferHoldLists(_generation_handler.getCurrentGeneration()); - _hash_map->transfer_hold_lists(_generation_handler.getCurrentGeneration()); + _store.assign_generation(_generation_handler.getCurrentGeneration()); + _hash_map->assign_generation(_generation_handler.getCurrentGeneration()); _generation_holder.assign_generation(_generation_handler.getCurrentGeneration()); _generation_handler.incGeneration(); - _store.trimHoldLists(_generation_handler.getFirstUsedGeneration()); - _hash_map->trim_hold_lists(_generation_handler.getFirstUsedGeneration()); - _generation_holder.reclaim(_generation_handler.getFirstUsedGeneration()); + _store.reclaim_memory(_generation_handler.get_oldest_used_generation()); + _hash_map->reclaim_memory(_generation_handler.get_oldest_used_generation()); + _generation_holder.reclaim(_generation_handler.get_oldest_used_generation()); } size_t diff --git a/vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp b/vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp index d6970e20e8a..4c3fe1756c5 100644 --- a/vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp +++ b/vespalib/src/tests/datastore/sharded_hash_map/sharded_hash_map_test.cpp @@ -168,11 +168,11 @@ DataStoreShardedHashTest::~DataStoreShardedHashTest() void DataStoreShardedHashTest::commit() { - _store.transferHoldLists(_generationHandler.getCurrentGeneration()); - _hash_map.transfer_hold_lists(_generationHandler.getCurrentGeneration()); + _store.assign_generation(_generationHandler.getCurrentGeneration()); + _hash_map.assign_generation(_generationHandler.getCurrentGeneration()); _generationHandler.incGeneration(); - _store.trimHoldLists(_generationHandler.getFirstUsedGeneration()); - _hash_map.trim_hold_lists(_generationHandler.getFirstUsedGeneration()); + _store.reclaim_memory(_generationHandler.get_oldest_used_generation()); + _hash_map.reclaim_memory(_generationHandler.get_oldest_used_generation()); } void diff --git a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp index 6612ef998c5..48a0ecafbc6 100644 --- a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp +++ b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp @@ -112,10 +112,10 @@ struct TestBase : public ::testing::Test { } return EntryRef(); } - void trimHoldLists() { + void reclaim_memory() { store.freeze(); - store.transferHoldLists(generation++); - store.trimHoldLists(generation); + store.assign_generation(generation++); + store.reclaim_memory(generation); } void compactWorst() { CompactionSpec compaction_spec(true, true); @@ -364,7 +364,7 @@ TYPED_TEST(TestBase, store_can_be_compacted) EntryRef val0Ref = this->add(this->values()[0]); EntryRef val1Ref = this->add(this->values()[1]); this->remove(this->add(this->values()[2])); - this->trimHoldLists(); + this->reclaim_memory(); size_t reserved = this->get_reserved(val0Ref); size_t array_size = this->get_array_size(val0Ref); this->assertBufferState(val0Ref, TestBufferStats().used(reserved + 3 * array_size).dead(reserved + array_size)); @@ -381,7 +381,7 @@ TYPED_TEST(TestBase, store_can_be_compacted) this->assertGet(val0Ref, this->values()[0]); this->assertGet(val1Ref, this->values()[1]); EXPECT_TRUE(this->store.bufferState(val0Ref).isOnHold()); - this->trimHoldLists(); + this->reclaim_memory(); EXPECT_TRUE(this->store.bufferState(val0Ref).isFree()); this->assertStoreContent(); } @@ -415,7 +415,7 @@ TYPED_TEST(TestBase, store_can_be_enumerated) EntryRef val0Ref = this->add(this->values()[0]); EntryRef val1Ref = this->add(this->values()[1]); this->remove(this->add(this->values()[2])); - this->trimHoldLists(); + this->reclaim_memory(); auto enumerator = this->getEnumerator(true); std::vector<uint32_t> refs; @@ -460,7 +460,7 @@ TEST_F(DoubleTest, nan_is_handled) for (auto &value : myvalues) { refs.emplace_back(add(value)); } - trimHoldLists(); + reclaim_memory(); EXPECT_TRUE(std::isnan(store.get(refs[1]))); EXPECT_TRUE(std::signbit(store.get(refs[1]))); EXPECT_TRUE(std::isinf(store.get(refs[2]))); diff --git a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp index d0fede5c550..496bc814d0d 100644 --- a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp +++ b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp @@ -62,9 +62,9 @@ struct UniqueStoreDictionaryTest : public ::testing::Test { } void inc_generation() { dict.freeze(); - dict.transfer_hold_lists(gen_handler.getCurrentGeneration()); + dict.assign_generation(gen_handler.getCurrentGeneration()); gen_handler.incGeneration(); - dict.trim_hold_lists(gen_handler.getFirstUsedGeneration()); + dict.reclaim_memory(gen_handler.get_oldest_used_generation()); } void take_snapshot() { dict.freeze(); diff --git a/vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp b/vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp index 0b34f587c6d..e865239787b 100644 --- a/vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp +++ b/vespalib/src/tests/datastore/unique_store_string_allocator/unique_store_string_allocator_test.cpp @@ -67,9 +67,9 @@ struct TestBase : public ::testing::Test { EXPECT_EQ(expStats._extra_used, buffer_state(ref).stats().extra_used_bytes()); EXPECT_EQ(expStats._extra_hold, buffer_state(ref).stats().extra_hold_bytes()); } - void trim_hold_lists() { - allocator.get_data_store().transferHoldLists(generation++); - allocator.get_data_store().trimHoldLists(generation); + void reclaim_memory() { + allocator.get_data_store().assign_generation(generation++); + allocator.get_data_store().reclaim_memory(generation); } }; @@ -89,7 +89,7 @@ TEST_F(StringTest, elements_are_put_on_hold_when_value_is_removed) assert_buffer_state(ref, TestBufferStats().used(16).hold(0).dead(0)); remove(ref); assert_buffer_state(ref, TestBufferStats().used(16).hold(16).dead(0)); - trim_hold_lists(); + reclaim_memory(); assert_buffer_state(ref, TestBufferStats().used(16).hold(0).dead(16)); } @@ -100,7 +100,7 @@ TEST_F(StringTest, extra_bytes_used_is_tracked) assert_buffer_state(ref, TestBufferStats().used(2).hold(0).dead(1).extra_used(1001)); remove(ref); assert_buffer_state(ref, TestBufferStats().used(2).hold(1).dead(1).extra_used(1001).extra_hold(1001)); - trim_hold_lists(); + reclaim_memory(); assert_buffer_state(ref, TestBufferStats().used(2).hold(0).dead(2)); ref = add(spaces1000.c_str()); assert_buffer_state(ref, TestBufferStats().used(2).hold(0).dead(1).extra_used(1001)); @@ -110,7 +110,7 @@ TEST_F(StringTest, extra_bytes_used_is_tracked) remove(ref); remove(ref2); assert_buffer_state(ref, TestBufferStats().used(3).hold(2).dead(1).extra_used(2002).extra_hold(2002)); - trim_hold_lists(); + reclaim_memory(); assert_buffer_state(ref, TestBufferStats().used(3).hold(0).dead(3)); } @@ -134,7 +134,7 @@ TEST_F(StringTest, free_list_is_used_when_enabled) EntryRef ref2 = add(spaces1000.c_str()); remove(ref1); remove(ref2); - trim_hold_lists(); + reclaim_memory(); EntryRef ref3 = add(small.c_str()); EntryRef ref4 = add(spaces1000.c_str()); EXPECT_EQ(ref1, ref3); @@ -150,7 +150,7 @@ TEST_F(StringTest, free_list_is_not_used_when_disabled) EntryRef ref2 = add(spaces1000.c_str()); remove(ref1); remove(ref2); - trim_hold_lists(); + reclaim_memory(); EntryRef ref3 = add(small.c_str()); EntryRef ref4 = add(spaces1000.c_str()); EXPECT_NE(ref1, ref3); @@ -168,7 +168,7 @@ TEST_F(StringTest, free_list_is_never_used_for_move_on_compact) EntryRef ref4 = add(spaces1000.c_str()); remove(ref3); remove(ref4); - trim_hold_lists(); + reclaim_memory(); EntryRef ref5 = move_on_compact(ref1); EntryRef ref6 = move_on_compact(ref2); EXPECT_NE(ref5, ref3); diff --git a/vespalib/src/tests/util/generationhandler/generationhandler_test.cpp b/vespalib/src/tests/util/generationhandler/generationhandler_test.cpp index 00da752a749..0bc72f93a9d 100644 --- a/vespalib/src/tests/util/generationhandler/generationhandler_test.cpp +++ b/vespalib/src/tests/util/generationhandler/generationhandler_test.cpp @@ -26,10 +26,10 @@ GenerationHandlerTest::~GenerationHandlerTest() = default; TEST_F(GenerationHandlerTest, require_that_generation_can_be_increased) { EXPECT_EQ(0u, gh.getCurrentGeneration()); - EXPECT_EQ(0u, gh.getFirstUsedGeneration()); + EXPECT_EQ(0u, gh.get_oldest_used_generation()); gh.incGeneration(); EXPECT_EQ(1u, gh.getCurrentGeneration()); - EXPECT_EQ(1u, gh.getFirstUsedGeneration()); + EXPECT_EQ(1u, gh.get_oldest_used_generation()); } TEST_F(GenerationHandlerTest, require_that_readers_can_take_guards) @@ -87,34 +87,34 @@ TEST_F(GenerationHandlerTest, require_that_guards_can_be_copied) TEST_F(GenerationHandlerTest, require_that_the_first_used_generation_is_correct) { - EXPECT_EQ(0u, gh.getFirstUsedGeneration()); + EXPECT_EQ(0u, gh.get_oldest_used_generation()); gh.incGeneration(); - EXPECT_EQ(1u, gh.getFirstUsedGeneration()); + EXPECT_EQ(1u, gh.get_oldest_used_generation()); { GenGuard g1 = gh.takeGuard(); gh.incGeneration(); EXPECT_EQ(1u, gh.getGenerationRefCount()); - EXPECT_EQ(1u, gh.getFirstUsedGeneration()); + EXPECT_EQ(1u, gh.get_oldest_used_generation()); } - EXPECT_EQ(1u, gh.getFirstUsedGeneration()); - gh.updateFirstUsedGeneration(); // Only writer should call this + EXPECT_EQ(1u, gh.get_oldest_used_generation()); + gh.update_oldest_used_generation(); // Only writer should call this EXPECT_EQ(0u, gh.getGenerationRefCount()); - EXPECT_EQ(2u, gh.getFirstUsedGeneration()); + EXPECT_EQ(2u, gh.get_oldest_used_generation()); { GenGuard g1 = gh.takeGuard(); gh.incGeneration(); gh.incGeneration(); EXPECT_EQ(1u, gh.getGenerationRefCount()); - EXPECT_EQ(2u, gh.getFirstUsedGeneration()); + EXPECT_EQ(2u, gh.get_oldest_used_generation()); { GenGuard g2 = gh.takeGuard(); - EXPECT_EQ(2u, gh.getFirstUsedGeneration()); + EXPECT_EQ(2u, gh.get_oldest_used_generation()); } } - EXPECT_EQ(2u, gh.getFirstUsedGeneration()); - gh.updateFirstUsedGeneration(); // Only writer should call this + EXPECT_EQ(2u, gh.get_oldest_used_generation()); + gh.update_oldest_used_generation(); // Only writer should call this EXPECT_EQ(0u, gh.getGenerationRefCount()); - EXPECT_EQ(4u, gh.getFirstUsedGeneration()); + EXPECT_EQ(4u, gh.get_oldest_used_generation()); } TEST_F(GenerationHandlerTest, require_that_generation_can_grow_large) @@ -124,7 +124,7 @@ TEST_F(GenerationHandlerTest, require_that_generation_can_grow_large) EXPECT_EQ(i, gh.getCurrentGeneration()); guards.push_back(gh.takeGuard()); // take guard on current generation if (i >= 128) { - EXPECT_EQ(i - 128, gh.getFirstUsedGeneration()); + EXPECT_EQ(i - 128, gh.get_oldest_used_generation()); guards.pop_front(); EXPECT_EQ(128u, gh.getGenerationRefCount()); } diff --git a/vespalib/src/tests/util/generationhandler_stress/generation_handler_stress_test.cpp b/vespalib/src/tests/util/generationhandler_stress/generation_handler_stress_test.cpp index 74af25b54a8..fd2769fd8b1 100644 --- a/vespalib/src/tests/util/generationhandler_stress/generation_handler_stress_test.cpp +++ b/vespalib/src/tests/util/generationhandler_stress/generation_handler_stress_test.cpp @@ -238,7 +238,7 @@ Fixture::write_indirect_work(uint64_t cnt, IndirectContext& context) ReadStopper read_stopper(_stopRead); uint32_t sleep_cnt = 0; ASSERT_EQ(0, _generationHandler.getCurrentGeneration()); - auto oldest_gen = _generationHandler.getFirstUsedGeneration(); + auto oldest_gen = _generationHandler.get_oldest_used_generation(); for (uint64_t i = 0; i < cnt; ++i) { auto gen = _generationHandler.getCurrentGeneration(); // Hold data for gen, write new data for next_gen @@ -248,7 +248,7 @@ Fixture::write_indirect_work(uint64_t cnt, IndirectContext& context) *v_ptr = next_gen; context._value_ptr.store(v_ptr, std::memory_order_release); _generationHandler.incGeneration(); - auto first_used_gen = _generationHandler.getFirstUsedGeneration(); + auto first_used_gen = _generationHandler.get_oldest_used_generation(); while (oldest_gen < first_used_gen) { // Clear data that readers should no longer have access to. *context.calc_value_ptr(oldest_gen) = 0; @@ -258,8 +258,8 @@ Fixture::write_indirect_work(uint64_t cnt, IndirectContext& context) // Sleep if writer gets too much ahead of readers. std::this_thread::sleep_for(1ms); ++sleep_cnt; - _generationHandler.updateFirstUsedGeneration(); - first_used_gen = _generationHandler.getFirstUsedGeneration(); + _generationHandler.update_oldest_used_generation(); + first_used_gen = _generationHandler.get_oldest_used_generation(); } } _doneWriteWork += cnt; diff --git a/vespalib/src/tests/util/rcuvector/rcuvector_test.cpp b/vespalib/src/tests/util/rcuvector/rcuvector_test.cpp index c23065b7468..5d6ec3050da 100644 --- a/vespalib/src/tests/util/rcuvector/rcuvector_test.cpp +++ b/vespalib/src/tests/util/rcuvector/rcuvector_test.cpp @@ -140,7 +140,7 @@ TEST(RcuVectorTest, generation_handling) v.setGeneration(2); v.push_back(50); - v.removeOldGenerations(3); + v.reclaim_memory(3); EXPECT_EQ(0u, v.getMemoryUsage().allocatedBytesOnHold()); v.push_back(60); // new array EXPECT_EQ(24u, v.getMemoryUsage().allocatedBytesOnHold()); @@ -184,7 +184,7 @@ TEST(RcuVectorTest, memory_usage) EXPECT_TRUE(assertUsage(MemoryUsage(6,6,0,2), v.getMemoryUsage())); v.push_back(4); EXPECT_TRUE(assertUsage(MemoryUsage(12,11,0,6), v.getMemoryUsage())); - v.removeOldGenerations(1); + v.reclaim_memory(1); EXPECT_TRUE(assertUsage(MemoryUsage(6,5,0,0), v.getMemoryUsage())); } @@ -434,7 +434,7 @@ StressFixture::commit() auto current_gen = generation_handler.getCurrentGeneration(); g.assign_generation(current_gen); generation_handler.incGeneration(); - auto first_used_gen = generation_handler.getFirstUsedGeneration(); + auto first_used_gen = generation_handler.get_oldest_used_generation(); g.reclaim(first_used_gen); } diff --git a/vespalib/src/vespa/vespalib/btree/btree.hpp b/vespalib/src/vespa/vespalib/btree/btree.hpp index c6d8886254d..81687b6e62d 100644 --- a/vespalib/src/vespa/vespalib/btree/btree.hpp +++ b/vespalib/src/vespa/vespalib/btree/btree.hpp @@ -20,7 +20,7 @@ BTree<KeyT, DataT, AggrT, CompareT, TraitsT, AggrCalcT>::~BTree() { clear(); _alloc.freeze(); - _alloc.clearHoldLists(); + _alloc.reclaim_all_memory(); } template <typename KeyT, typename DataT, typename AggrT, typename CompareT, diff --git a/vespalib/src/vespa/vespalib/btree/btreenodeallocator.h b/vespalib/src/vespa/vespalib/btree/btreenodeallocator.h index c631ac4041a..77900edf848 100644 --- a/vespalib/src/vespa/vespalib/btree/btreenodeallocator.h +++ b/vespalib/src/vespa/vespalib/btree/btreenodeallocator.h @@ -101,7 +101,7 @@ public: /** * Try to free held nodes if nobody can be referencing them. */ - void trimHoldLists(generation_t usedGen); + void reclaim_memory(generation_t oldest_used_gen); /** * Transfer nodes from hold1 lists to hold2 lists, they are no @@ -109,9 +109,9 @@ public: * older versions of the frozen structure must leave before elements * can be unheld. */ - void transferHoldLists(generation_t generation); + void assign_generation(generation_t current_gen); - void clearHoldLists(); + void reclaim_all_memory(); static bool isValidRef(BTreeNode::Ref ref) { return NodeStore::isValidRef(ref); } diff --git a/vespalib/src/vespa/vespalib/btree/btreenodeallocator.hpp b/vespalib/src/vespa/vespalib/btree/btreenodeallocator.hpp index 8976d73379c..4968bbaf4a7 100644 --- a/vespalib/src/vespa/vespalib/btree/btreenodeallocator.hpp +++ b/vespalib/src/vespa/vespalib/btree/btreenodeallocator.hpp @@ -266,18 +266,18 @@ template <typename KeyT, typename DataT, typename AggrT, size_t INTERNAL_SLOTS, size_t LEAF_SLOTS> void BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>:: -trimHoldLists(generation_t usedGen) +reclaim_memory(generation_t oldest_used_gen) { - _nodeStore.trimHoldLists(usedGen); + _nodeStore.reclaim_memory(oldest_used_gen); } template <typename KeyT, typename DataT, typename AggrT, size_t INTERNAL_SLOTS, size_t LEAF_SLOTS> void BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>:: -transferHoldLists(generation_t generation) +assign_generation(generation_t current_gen) { - _nodeStore.transferHoldLists(generation); + _nodeStore.assign_generation(current_gen); } @@ -285,9 +285,9 @@ template <typename KeyT, typename DataT, typename AggrT, size_t INTERNAL_SLOTS, size_t LEAF_SLOTS> void BTreeNodeAllocator<KeyT, DataT, AggrT, INTERNAL_SLOTS, LEAF_SLOTS>:: -clearHoldLists() +reclaim_all_memory() { - _nodeStore.clearHoldLists(); + _nodeStore.reclaim_all_memory(); } diff --git a/vespalib/src/vespa/vespalib/btree/btreenodestore.h b/vespalib/src/vespa/vespalib/btree/btreenodestore.h index 20f80e07a6b..7b89e2d0ddb 100644 --- a/vespalib/src/vespa/vespalib/btree/btreenodestore.h +++ b/vespalib/src/vespa/vespalib/btree/btreenodestore.h @@ -162,8 +162,8 @@ public: std::unique_ptr<vespalib::datastore::CompactingBuffers> start_compact_worst(const CompactionStrategy& compaction_strategy); - void transferHoldLists(generation_t generation) { - _store.transferHoldLists(generation); + void assign_generation(generation_t current_gen) { + _store.assign_generation(current_gen); } // Inherit doc from DataStoreBase @@ -172,12 +172,12 @@ public: } // Inherit doc from DataStoreBase - void trimHoldLists(generation_t usedGen) { - _store.trimHoldLists(usedGen); + void reclaim_memory(generation_t oldest_used_gen) { + _store.reclaim_memory(oldest_used_gen); } - void clearHoldLists() { - _store.clearHoldLists(); + void reclaim_all_memory() { + _store.reclaim_all_memory(); } // Inherit doc from DataStoreBase diff --git a/vespalib/src/vespa/vespalib/btree/btreestore.h b/vespalib/src/vespa/vespalib/btree/btreestore.h index 54bc397175d..e5c55d5775d 100644 --- a/vespalib/src/vespa/vespalib/btree/btreestore.h +++ b/vespalib/src/vespa/vespalib/btree/btreestore.h @@ -332,25 +332,25 @@ public: // Inherit doc from DataStoreBase void - trimHoldLists(generation_t usedGen) + reclaim_memory(generation_t oldest_used_gen) { - _allocator.trimHoldLists(usedGen); - _store.trimHoldLists(usedGen); + _allocator.reclaim_memory(oldest_used_gen); + _store.reclaim_memory(oldest_used_gen); } // Inherit doc from DataStoreBase void - transferHoldLists(generation_t generation) + assign_generation(generation_t current_gen) { - _allocator.transferHoldLists(generation); - _store.transferHoldLists(generation); + _allocator.assign_generation(current_gen); + _store.assign_generation(current_gen); } void - clearHoldLists() + reclaim_all_memory() { - _allocator.clearHoldLists(); - _store.clearHoldLists(); + _allocator.reclaim_all_memory(); + _store.reclaim_all_memory(); } diff --git a/vespalib/src/vespa/vespalib/datastore/array_store.h b/vespalib/src/vespa/vespalib/datastore/array_store.h index db037ee12fb..e7662b9eb73 100644 --- a/vespalib/src/vespa/vespalib/datastore/array_store.h +++ b/vespalib/src/vespa/vespalib/datastore/array_store.h @@ -114,8 +114,8 @@ public: vespalib::AddressSpace addressSpaceUsage() const; // Pass on hold list management to underlying store - void transferHoldLists(generation_t generation) { _store.transferHoldLists(generation); } - void trimHoldLists(generation_t firstUsed) { _store.trimHoldLists(firstUsed); } + void assign_generation(generation_t current_gen) { _store.assign_generation(current_gen); } + void reclaim_memory(generation_t oldest_used_gen) { _store.reclaim_memory(oldest_used_gen); } vespalib::GenerationHolder &getGenerationHolder() { return _store.getGenerationHolder(); } void setInitializing(bool initializing) { _store.setInitializing(initializing); } diff --git a/vespalib/src/vespa/vespalib/datastore/array_store.hpp b/vespalib/src/vespa/vespalib/datastore/array_store.hpp index 4df8505e927..b31d47fe4fe 100644 --- a/vespalib/src/vespa/vespalib/datastore/array_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/array_store.hpp @@ -57,7 +57,7 @@ ArrayStore<EntryT, RefT, TypeMapperT>::ArrayStore(const ArrayStoreConfig &cfg, s template <typename EntryT, typename RefT, typename TypeMapperT> ArrayStore<EntryT, RefT, TypeMapperT>::~ArrayStore() { - _store.clearHoldLists(); + _store.reclaim_all_memory(); _store.dropBuffers(); } diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp index 4589fbba6fa..96160fd347b 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp @@ -220,10 +220,10 @@ DataStoreBase::addType(BufferTypeBase *typeHandler) } void -DataStoreBase::transferHoldLists(generation_t generation) +DataStoreBase::assign_generation(generation_t current_gen) { - _genHolder.assign_generation(generation); - _entry_ref_hold_list.assign_generation(generation); + _genHolder.assign_generation(current_gen); + _entry_ref_hold_list.assign_generation(current_gen); } void @@ -235,14 +235,14 @@ DataStoreBase::doneHoldBuffer(uint32_t bufferId) } void -DataStoreBase::trimHoldLists(generation_t usedGen) +DataStoreBase::reclaim_memory(generation_t oldest_used_gen) { - reclaim_entry_refs(usedGen); // Trim entries before trimming buffers - _genHolder.reclaim(usedGen); + reclaim_entry_refs(oldest_used_gen); // Trim entries before trimming buffers + _genHolder.reclaim(oldest_used_gen); } void -DataStoreBase::clearHoldLists() +DataStoreBase::reclaim_all_memory() { _entry_ref_hold_list.assign_generation(0); reclaim_all_entry_refs(); diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.h b/vespalib/src/vespa/vespalib/datastore/datastorebase.h index 520c13742b5..598f0872253 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.h +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.h @@ -189,9 +189,9 @@ public: public: /** - * Transfer holds from hold1 to hold2 lists, assigning generation. + * Assign generation on data elements on hold lists added since the last time this function was called. */ - void transferHoldLists(generation_t generation); + void assign_generation(generation_t current_gen); private: /** @@ -201,13 +201,13 @@ private: public: /** - * Trim hold lists, freeing buffers that no longer needs to be held. + * Reclaim memory from hold lists, freeing buffers and entry refs that no longer needs to be held. * - * @param usedGen lowest generation that is still used. + * @param oldest_used_gen oldest generation that is still used. */ - void trimHoldLists(generation_t usedGen); + void reclaim_memory(generation_t oldest_used_gen); - void clearHoldLists(); + void reclaim_all_memory(); template <typename EntryType, typename RefType> EntryType *getEntry(RefType ref) { diff --git a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp index 402905d7aca..5338ce0c6b2 100644 --- a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp +++ b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp @@ -97,23 +97,22 @@ FixedSizeHashMap::add(const ShardedHashComparator & comp, std::function<EntryRef } void -FixedSizeHashMap::transfer_hold_lists_slow(generation_t generation) +FixedSizeHashMap::assign_generation_slow(generation_t current_gen) { auto &hold_2_list = _hold_2_list; for (uint32_t node_idx : _hold_1_list) { - hold_2_list.push_back(std::make_pair(generation, node_idx)); + hold_2_list.push_back(std::make_pair(current_gen, node_idx)); } _hold_1_list.clear(); - } void -FixedSizeHashMap::trim_hold_lists_slow(generation_t first_used) +FixedSizeHashMap::reclaim_memory_slow(generation_t oldest_used_gen) { while (!_hold_2_list.empty()) { auto& first = _hold_2_list.front(); - if (static_cast<sgeneration_t>(first.first - first_used) >= 0) { + if (static_cast<sgeneration_t>(first.first - oldest_used_gen) >= 0) { break; } uint32_t node_idx = first.second; diff --git a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h index dd56b4951bc..de05ec1deb0 100644 --- a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h +++ b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h @@ -56,8 +56,8 @@ private: * A reader must own an appropriate GenerationHandler::Guard to ensure * that memory is held while it can be accessed by reader. * - * The writer must update generation and call transfer_hold_lists and - * trim_hold_lists as needed to free up memory no longer needed by any + * The writer must update generation and call assign_generation and + * reclaim_memory as needed to free up memory no longer needed by any * readers. */ class FixedSizeHashMap { @@ -114,8 +114,8 @@ private: std::deque<std::pair<generation_t, uint32_t>> _hold_2_list; uint32_t _num_shards; - void transfer_hold_lists_slow(generation_t generation); - void trim_hold_lists_slow(generation_t first_used); + void assign_generation_slow(generation_t current_gen); + void reclaim_memory_slow(generation_t oldest_used_gen); void force_add(const EntryComparator& comp, const KvType& kv); public: FixedSizeHashMap(uint32_t module, uint32_t capacity, uint32_t num_shards); @@ -143,15 +143,15 @@ public: return nullptr; } - void transfer_hold_lists(generation_t generation) { + void assign_generation(generation_t current_gen) { if (!_hold_1_list.empty()) { - transfer_hold_lists_slow(generation); + assign_generation_slow(current_gen); } } - void trim_hold_lists(generation_t first_used) { - if (!_hold_2_list.empty() && static_cast<sgeneration_t>(_hold_2_list.front().first - first_used) < 0) { - trim_hold_lists_slow(first_used); + void reclaim_memory(generation_t oldest_used_gen) { + if (!_hold_2_list.empty() && static_cast<sgeneration_t>(_hold_2_list.front().first - oldest_used_gen) < 0) { + reclaim_memory_slow(oldest_used_gen); } } diff --git a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h index 48abda45974..5a75a30d182 100644 --- a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h @@ -25,8 +25,8 @@ public: using generation_t = vespalib::GenerationHandler::generation_t; virtual ~IUniqueStoreDictionary() = default; virtual void freeze() = 0; - virtual void transfer_hold_lists(generation_t generation) = 0; - virtual void trim_hold_lists(generation_t firstUsed) = 0; + virtual void assign_generation(generation_t current_gen) = 0; + virtual void reclaim_memory(generation_t oldest_used_gen) = 0; virtual UniqueStoreAddResult add(const EntryComparator& comp, std::function<EntryRef(void)> insertEntry) = 0; virtual EntryRef find(const EntryComparator& comp) = 0; virtual void remove(const EntryComparator& comp, EntryRef ref) = 0; diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp index 86578f663a1..a28c3071646 100644 --- a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp @@ -107,27 +107,27 @@ ShardedHashMap::find(const EntryComparator& comp, EntryRef key_ref) const } void -ShardedHashMap::transfer_hold_lists(generation_t generation) +ShardedHashMap::assign_generation(generation_t current_gen) { for (size_t i = 0; i < num_shards; ++i) { auto map = _maps[i].load(std::memory_order_relaxed); if (map != nullptr) { - map->transfer_hold_lists(generation); + map->assign_generation(current_gen); } } - _gen_holder.assign_generation(generation); + _gen_holder.assign_generation(current_gen); } void -ShardedHashMap::trim_hold_lists(generation_t first_used) +ShardedHashMap::reclaim_memory(generation_t oldest_used_gen) { for (size_t i = 0; i < num_shards; ++i) { auto map = _maps[i].load(std::memory_order_relaxed); if (map != nullptr) { - map->trim_hold_lists(first_used); + map->reclaim_memory(oldest_used_gen); } } - _gen_holder.reclaim(first_used); + _gen_holder.reclaim(oldest_used_gen); } size_t diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h index 80d14d187b0..572a8790828 100644 --- a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h @@ -28,8 +28,8 @@ struct ICompactable; * A reader must own an appropriate GenerationHandler::Guard to ensure * that memory is held while it can be accessed by reader. * - * The writer must update generation and call transfer_hold_lists and - * trim_hold_lists as needed to free up memory no longer needed by any + * The writer must update generation and call assign_generation and + * reclaim_memory as needed to free up memory no longer needed by any * readers. */ class ShardedHashMap { @@ -52,8 +52,8 @@ public: KvType* remove(const EntryComparator& comp, EntryRef key_ref); KvType* find(const EntryComparator& comp, EntryRef key_ref); const KvType* find(const EntryComparator& comp, EntryRef key_ref) const; - void transfer_hold_lists(generation_t generation); - void trim_hold_lists(generation_t first_used); + void assign_generation(generation_t current_gen); + void reclaim_memory(generation_t oldest_used_gen); size_t size() const noexcept; const EntryComparator &get_default_comparator() const noexcept { return *_comp; } MemoryUsage get_memory_usage() const; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.h b/vespalib/src/vespa/vespalib/datastore/unique_store.h index e7c374985a7..1313d57fbab 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.h @@ -70,8 +70,8 @@ public: inline const DataStoreType& get_data_store() const noexcept { return _allocator.get_data_store(); } // Pass on hold list management to underlying store - void transferHoldLists(generation_t generation); - void trimHoldLists(generation_t firstUsed); + void assign_generation(generation_t current_gen); + void reclaim_memory(generation_t oldest_used_gen); vespalib::GenerationHolder &getGenerationHolder() { return _store.getGenerationHolder(); } void setInitializing(bool initializing) { _store.setInitializing(initializing); } void freeze(); diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp index 63592f82898..b8493017020 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp @@ -190,18 +190,18 @@ UniqueStore<EntryT, RefT, Compare, Allocator>::bufferState(EntryRef ref) const template <typename EntryT, typename RefT, typename Compare, typename Allocator> void -UniqueStore<EntryT, RefT, Compare, Allocator>::transferHoldLists(generation_t generation) +UniqueStore<EntryT, RefT, Compare, Allocator>::assign_generation(generation_t current_gen) { - _dict->transfer_hold_lists(generation); - _store.transferHoldLists(generation); + _dict->assign_generation(current_gen); + _store.assign_generation(current_gen); } template <typename EntryT, typename RefT, typename Compare, typename Allocator> void -UniqueStore<EntryT, RefT, Compare, Allocator>::trimHoldLists(generation_t firstUsed) +UniqueStore<EntryT, RefT, Compare, Allocator>::reclaim_memory(generation_t oldest_used_gen) { - _dict->trim_hold_lists(firstUsed); - _store.trimHoldLists(firstUsed); + _dict->reclaim_memory(oldest_used_gen); + _store.reclaim_memory(oldest_used_gen); } template <typename EntryT, typename RefT, typename Compare, typename Allocator> diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp index 5d96b1e0314..8ad11b18218 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_allocator.hpp @@ -28,7 +28,7 @@ UniqueStoreAllocator<EntryT, RefT>::UniqueStoreAllocator(std::shared_ptr<alloc:: template <typename EntryT, typename RefT> UniqueStoreAllocator<EntryT, RefT>::~UniqueStoreAllocator() { - _store.clearHoldLists(); + _store.reclaim_all_memory(); _store.dropBuffers(); } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h index 7aed81c3a79..8c5f284bb14 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h @@ -74,8 +74,8 @@ public: UniqueStoreDictionary(std::unique_ptr<EntryComparator> compare); ~UniqueStoreDictionary() override; void freeze() override; - void transfer_hold_lists(generation_t generation) override; - void trim_hold_lists(generation_t firstUsed) override; + void assign_generation(generation_t current_gen) override; + void reclaim_memory(generation_t oldest_used_gen) override; UniqueStoreAddResult add(const EntryComparator& comp, std::function<EntryRef(void)> insertEntry) override; EntryRef find(const EntryComparator& comp) override; void remove(const EntryComparator& comp, EntryRef ref) override; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp index 29c4b6514d7..6708b4c1448 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp @@ -41,25 +41,25 @@ UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::freeze() template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> void -UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::transfer_hold_lists(generation_t generation) +UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::assign_generation(generation_t current_gen) { if constexpr (has_btree_dictionary) { - this->_btree_dict.getAllocator().transferHoldLists(generation); + this->_btree_dict.getAllocator().assign_generation(current_gen); } if constexpr (has_hash_dictionary) { - this->_hash_dict.transfer_hold_lists(generation); + this->_hash_dict.assign_generation(current_gen); } } template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> void -UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::trim_hold_lists(generation_t firstUsed) +UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::reclaim_memory(generation_t oldest_used_gen) { if constexpr (has_btree_dictionary) { - this->_btree_dict.getAllocator().trimHoldLists(firstUsed); + this->_btree_dict.getAllocator().reclaim_memory(oldest_used_gen); } if constexpr (has_hash_dictionary) { - this->_hash_dict.trim_hold_lists(firstUsed); + this->_hash_dict.reclaim_memory(oldest_used_gen); } } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp index eeba2f463b9..65cab4850ba 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.hpp @@ -30,7 +30,7 @@ UniqueStoreStringAllocator<RefT>::UniqueStoreStringAllocator(std::shared_ptr<all template <typename RefT> UniqueStoreStringAllocator<RefT>::~UniqueStoreStringAllocator() { - _store.clearHoldLists(); + _store.reclaim_all_memory(); _store.dropBuffers(); } diff --git a/vespalib/src/vespa/vespalib/util/generationhandler.cpp b/vespalib/src/vespa/vespalib/util/generationhandler.cpp index d1cc0271068..3562926d88d 100644 --- a/vespalib/src/vespa/vespalib/util/generationhandler.cpp +++ b/vespalib/src/vespa/vespalib/util/generationhandler.cpp @@ -111,7 +111,7 @@ GenerationHandler::Guard::operator=(Guard &&rhs) } void -GenerationHandler::updateFirstUsedGeneration() +GenerationHandler::update_oldest_used_generation() { for (;;) { if (_first == _last.load(std::memory_order_relaxed)) @@ -125,12 +125,12 @@ GenerationHandler::updateFirstUsedGeneration() toFree->_next = _free; _free = toFree; } - _firstUsedGeneration.store(_first->_generation, std::memory_order_relaxed); + _oldest_used_generation.store(_first->_generation, std::memory_order_relaxed); } GenerationHandler::GenerationHandler() : _generation(0), - _firstUsedGeneration(0), + _oldest_used_generation(0), _last(nullptr), _first(nullptr), _free(nullptr), @@ -144,7 +144,7 @@ GenerationHandler::GenerationHandler() GenerationHandler::~GenerationHandler(void) { - updateFirstUsedGeneration(); + update_oldest_used_generation(); assert(_first == _last.load(std::memory_order_relaxed)); while (_free != nullptr) { GenerationHold *toFree = _free; @@ -190,7 +190,7 @@ GenerationHandler::incGeneration() // reader set_generation(ngen); last->_generation.store(ngen, std::memory_order_relaxed); - updateFirstUsedGeneration(); + update_oldest_used_generation(); return; } GenerationHold *nhold = nullptr; @@ -207,7 +207,7 @@ GenerationHandler::incGeneration() last->_next = nhold; set_generation(ngen); _last.store(nhold, std::memory_order_release); - updateFirstUsedGeneration(); + update_oldest_used_generation(); } uint32_t @@ -215,7 +215,7 @@ GenerationHandler::getGenerationRefCount(generation_t gen) const { if (static_cast<sgeneration_t>(gen - getCurrentGeneration()) > 0) return 0u; - if (static_cast<sgeneration_t>(getFirstUsedGeneration() - gen) > 0) + if (static_cast<sgeneration_t>(get_oldest_used_generation() - gen) > 0) return 0u; for (GenerationHold *hold = _first; hold != nullptr; hold = hold->_next) { if (hold->_generation.load(std::memory_order_relaxed) == gen) diff --git a/vespalib/src/vespa/vespalib/util/generationhandler.h b/vespalib/src/vespa/vespalib/util/generationhandler.h index 9637ad0e414..6ba71b7f5fb 100644 --- a/vespalib/src/vespa/vespalib/util/generationhandler.h +++ b/vespalib/src/vespa/vespalib/util/generationhandler.h @@ -73,7 +73,7 @@ public: private: std::atomic<generation_t> _generation; - std::atomic<generation_t> _firstUsedGeneration; + std::atomic<generation_t> _oldest_used_generation; std::atomic<GenerationHold *> _last; // Points to "current generation" entry GenerationHold *_first; // Points to "firstUsedGeneration" entry GenerationHold *_free; // List of free entries @@ -101,17 +101,17 @@ public: void incGeneration(); /** - * Update first used generation. + * Update the oldest used generation. * Should be called by the writer thread. */ - void updateFirstUsedGeneration(); + void update_oldest_used_generation(); /** - * Returns the first generation guarded by a reader. It might be too low - * if writer hasn't updated first used generation after last reader left. + * Returns the oldest generation guarded by a reader. + * It might be too low if writer hasn't updated oldest used generation after last reader left. */ - generation_t getFirstUsedGeneration() const noexcept { - return _firstUsedGeneration.load(std::memory_order_relaxed); + generation_t get_oldest_used_generation() const noexcept { + return _oldest_used_generation.load(std::memory_order_relaxed); } /** diff --git a/vespalib/src/vespa/vespalib/util/rcuvector.h b/vespalib/src/vespa/vespalib/util/rcuvector.h index 5d084fe3815..b0929303692 100644 --- a/vespalib/src/vespa/vespalib/util/rcuvector.h +++ b/vespalib/src/vespa/vespalib/util/rcuvector.h @@ -182,7 +182,7 @@ public: /** * Remove all old data vectors where generation < firstUsed. **/ - void removeOldGenerations(generation_t firstUsed); + void reclaim_memory(generation_t oldest_used_gen); MemoryUsage getMemoryUsage() const override; }; diff --git a/vespalib/src/vespa/vespalib/util/rcuvector.hpp b/vespalib/src/vespa/vespalib/util/rcuvector.hpp index e551bb17db0..eadda8ac1e9 100644 --- a/vespalib/src/vespa/vespalib/util/rcuvector.hpp +++ b/vespalib/src/vespa/vespalib/util/rcuvector.hpp @@ -187,9 +187,9 @@ RcuVector<T>::~RcuVector() template <typename T> void -RcuVector<T>::removeOldGenerations(generation_t firstUsed) +RcuVector<T>::reclaim_memory(generation_t oldest_used_gen) { - _genHolderStore.reclaim(firstUsed); + _genHolderStore.reclaim(oldest_used_gen); } template <typename T> |