diff options
86 files changed, 1571 insertions, 596 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java index d83636d08d6..7147743a086 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java @@ -20,6 +20,12 @@ import com.yahoo.vespa.flags.FlagSource; import java.io.File; import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; import java.util.logging.Logger; import static com.yahoo.vespa.config.server.filedistribution.FileDistributionUtil.fileReferenceExistsOnDisk; @@ -45,7 +51,7 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer { Curator curator, Duration interval, FlagSource flagSource) { - super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval); + super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, false); this.applicationRepository = applicationRepository; this.configserverConfig = applicationRepository.configserverConfig(); this.supervisor = new Supervisor(new Transport("filedistribution-pool")).setDropEmptyBuffers(true); @@ -56,9 +62,10 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer { protected double maintain() { if (getOtherConfigServersInCluster(configserverConfig).isEmpty()) return 1.0; // Nothing to do - int attempts = 0; - int failures = 0; + final AtomicInteger attempts = new AtomicInteger(0); + final AtomicInteger failures = new AtomicInteger(0); + List<CompletableFuture<Void>> futures = new ArrayList<>(); try (var fileDownloader = createFileDownloader()) { for (var applicationId : applicationRepository.listApplications()) { log.fine(() -> "Verifying application package for " + applicationId); @@ -66,28 +73,40 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer { if (session == null) continue; // App might be deleted after call to listApplications() FileReference applicationPackage = session.getApplicationPackageReference(); - long sessionId = session.getSessionId(); - log.fine(() -> "Verifying application package file reference " + applicationPackage + " for session " + sessionId); - - if (applicationPackage != null) { - attempts++; - if (! fileReferenceExistsOnDisk(downloadDirectory, applicationPackage)) { - log.fine(() -> "Downloading missing application package for application " + applicationId + " (session " + sessionId + ")"); - - FileReferenceDownload download = new FileReferenceDownload(applicationPackage, - false, - this.getClass().getSimpleName()); - if (fileDownloader.getFile(download).isEmpty()) { - failures++; - log.warning("Failed to download application package for application " + applicationId + " (session " + sessionId + ")"); - continue; - } - } - createLocalSessionIfMissing(applicationId, sessionId); + if (applicationPackage == null) continue; + + if ( ! fileReferenceExistsOnDisk(downloadDirectory, applicationPackage)) { + long sessionId = session.getSessionId(); + log.fine(() -> "Downloading application package for " + applicationId + + " application package reference " + applicationPackage + + " (session " + sessionId + ")"); + + FileReferenceDownload download = new FileReferenceDownload(applicationPackage, + false, + this.getClass().getSimpleName()); + futures.add(CompletableFuture.supplyAsync(() -> fileDownloader.getFile(download)) + .thenAccept(file -> { + if (file.isPresent()) { + attempts.incrementAndGet(); + createLocalSessionIfMissing(applicationId, sessionId); + } else { + failures.incrementAndGet(); + log.warning("Failed to download application package for application " + + applicationId + " (session " + sessionId + ")"); + } + })); } } } - return asSuccessFactor(attempts, failures); + log.fine(() -> "Attempts: " + attempts.get() + ", failures: " + failures.get()); + futures.forEach(future -> { + try { + future.get(); + } catch (InterruptedException | ExecutionException e) { + log.log(Level.WARNING, "Failed to get future", e); + } + }); + return asSuccessFactor(attempts.get(), failures.get()); } private FileDownloader createFileDownloader() { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java index 9d1901f83ed..ce068f4929e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java @@ -33,8 +33,8 @@ public abstract class ConfigServerMaintainer extends Maintainer { /** Creates a maintainer where maintainers on different nodes in this cluster run with even delay. */ ConfigServerMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource, - Instant now, Duration interval) { - super(null, interval, now, new JobControl(new JobControlFlags(curator, flagSource)), + Instant now, Duration interval, boolean useLock) { + super(null, interval, now, new JobControl(new JobControlFlags(curator, flagSource, useLock)), new ConfigServerJobMetrics(applicationRepository.metric()), cluster(curator), false); this.applicationRepository = applicationRepository; } @@ -59,13 +59,15 @@ public abstract class ConfigServerMaintainer extends Maintainer { private static final Path root = Path.fromString("/configserver/v1/"); private static final Path lockRoot = root.append("locks"); - private final Curator curator; + private final Curator curator; private final ListFlag<String> inactiveJobsFlag; + private final boolean useLock; - public JobControlFlags(Curator curator, FlagSource flagSource) { + public JobControlFlags(Curator curator, FlagSource flagSource, boolean useLock) { this.curator = curator; this.inactiveJobsFlag = PermanentFlags.INACTIVE_MAINTENANCE_JOBS.bindTo(flagSource); + this.useLock = useLock; } @Override @@ -75,7 +77,9 @@ public abstract class ConfigServerMaintainer extends Maintainer { @Override public Mutex lockMaintenanceJob(String job) { - return curator.lock(lockRoot.append(job), Duration.ofSeconds(1)); + return (useLock) + ? curator.lock(lockRoot.append(job), Duration.ofSeconds(1)) + : () -> { }; } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java index df94d1ad6a4..277e6acd6e6 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java @@ -30,7 +30,7 @@ public class FileDistributionMaintainer extends ConfigServerMaintainer { Curator curator, Duration interval, FlagSource flagSource) { - super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval); + super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, false); this.applicationRepository = applicationRepository; ConfigserverConfig configserverConfig = applicationRepository.configserverConfig(); this.maxUnusedFileReferenceAge = Duration.ofMinutes(configserverConfig.keepUnusedFileReferencesMinutes()); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java index a6802a65e3c..fb4d7ac1fc1 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java @@ -11,7 +11,6 @@ import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker; import com.yahoo.vespa.config.server.tenant.Tenant; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.model.VespaModel; import com.yahoo.yolean.Exceptions; import java.time.Clock; @@ -21,7 +20,6 @@ import java.util.Collection; import java.util.Comparator; import java.util.Map; import java.util.Optional; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; @@ -46,7 +44,7 @@ public class ReindexingMaintainer extends ConfigServerMaintainer { public ReindexingMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource, Duration interval, ConfigConvergenceChecker convergence, Clock clock) { - super(applicationRepository, curator, flagSource, clock.instant(), interval); + super(applicationRepository, curator, flagSource, clock.instant(), interval, true); this.convergence = convergence; this.clock = clock; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java index e0a5e080ff1..d980fb079e7 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java @@ -19,13 +19,12 @@ public class SessionsMaintainer extends ConfigServerMaintainer { private final boolean hostedVespa; SessionsMaintainer(ApplicationRepository applicationRepository, Curator curator, Duration interval, FlagSource flagSource) { - super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval); + super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, true); this.hostedVespa = applicationRepository.configserverConfig().hostedVespa(); } @Override protected double maintain() { - log.log(Level.FINE, () -> "Running " + SessionsMaintainer.class.getSimpleName()); applicationRepository.deleteExpiredLocalSessions(); if (hostedVespa) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java index 049441c3a36..ec09e89568c 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java @@ -25,7 +25,7 @@ public class TenantsMaintainer extends ConfigServerMaintainer { TenantsMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource, Duration interval, Clock clock) { - super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval); + super(applicationRepository, curator, flagSource, applicationRepository.clock().instant(), interval, true); this.ttlForUnusedTenant = defaultTtlForUnusedTenant; this.clock = clock; } diff --git a/container-core/pom.xml b/container-core/pom.xml index 8e3c700628a..7e4198bad0d 100644 --- a/container-core/pom.xml +++ b/container-core/pom.xml @@ -369,6 +369,17 @@ </execution> </executions> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <!-- Illegal reflective access by LogFileHandler via com.yahoo.io.NativeIO --> + <argLine> + --add-opens=java.base/java.io=ALL-UNNAMED + </argLine> + </configuration> + </plugin> + </plugins> <outputDirectory>${buildOutputDirectory}</outputDirectory> </build> diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerResult.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerResult.java index d57cd1e2a21..3e952ed2291 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerResult.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerResult.java @@ -46,6 +46,9 @@ public class InvokerResult { if (hit.hasSortData()) { fh.setSortData(hit.getSortData(), sorting); } + if (hit.hasMatchFeatures()) { + fh.setField("matchfeatures", hit.getMatchFeatures()); + } fh.setQuery(query); fh.setFillable(); fh.setCached(false); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/LeanHit.java b/container-search/src/main/java/com/yahoo/search/dispatch/LeanHit.java index 6a47e19e310..03b0f092abb 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/LeanHit.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/LeanHit.java @@ -36,8 +36,9 @@ public class LeanHit implements Comparable<LeanHit> { public boolean hasSortData() { return sortData != null; } public int getPartId() { return partId; } public int getDistributionKey() { return distributionKey; } - public FeatureData getMatchFeatures() { return matchFeatures; } + public FeatureData getMatchFeatures() { return matchFeatures; } + public boolean hasMatchFeatures() { return matchFeatures != null; } public void addMatchFeatures(Inspector features) { matchFeatures = new FeatureData(features); } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index c1feadf2d43..060a862988c 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -59,6 +59,12 @@ public class Flags { "Will attempt to switch on next host admin tick (requires reboot).", NODE_TYPE, HOSTNAME); + public static final UnboundBooleanFlag MOUNT_READONLY = defineFeatureFlag( + "mount-readonly", false, + List.of("freva"), "2021-11-04", "2021-12-01", + "Whether host-admin should mount container-data and credential directories read-only when starting container", + "Takes effect on next container restart."); + public static final UnboundDoubleFlag DEFAULT_TERM_WISE_LIMIT = defineDoubleFlag( "default-term-wise-limit", 1.0, List.of("baldersheim"), "2020-12-02", "2022-01-01", @@ -342,9 +348,12 @@ public class Flags { public static final UnboundStringFlag JDK_VERSION = defineStringFlag( "jdk-version", "11", List.of("hmusum"), "2021-10-25", "2021-11-25", - "JDK version to use inside containers", - "Takes effect on restart of Docker container", - APPLICATION_ID); + "JDK version to use on host and inside containers. Note application-id dimension only applies for container, " + + "while hostname and node type applies for host.", + "Takes effect on restart for Docker container and on next host-admin tick for host", + APPLICATION_ID, + HOSTNAME, + NODE_TYPE); /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners, diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java index af12a6201d3..3017773700a 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/container/ContainerOperations.java @@ -141,7 +141,7 @@ public class ContainerOperations { } private String executeNodeCtlInContainer(NodeAgentContext context, String program) { - String[] command = new String[] {context.containerPathUnderVespaHome("bin/vespa-nodectl").pathInContainer(), program}; + String[] command = new String[] {context.paths().underVespaHome("bin/vespa-nodectl").pathInContainer(), program}; return executeCommandInContainerAsRoot(context, command).getOutput(); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index 9328eb232a6..dd0b8a2acdc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -86,7 +86,7 @@ public class StorageMaintainer { if (archiveUri.isEmpty()) return false; ApplicationId owner = context.node().owner().orElseThrow(); - List<SyncFileInfo> syncFileInfos = FileFinder.files(context.containerPathUnderVespaHome("logs/vespa")) + List<SyncFileInfo> syncFileInfos = FileFinder.files(context.paths().underVespaHome("logs/vespa")) .maxDepth(2) .stream() .sorted(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime)) @@ -101,7 +101,7 @@ public class StorageMaintainer { DiskSize cachedDiskUsage = diskUsage.getIfPresent(context.containerName()); if (cachedDiskUsage != null) return Optional.of(cachedDiskUsage); - DiskSize diskUsageBytes = getDiskUsed(context, context.containerPath("/").pathOnHost()); + DiskSize diskUsageBytes = getDiskUsed(context, context.paths().of("/").pathOnHost()); diskUsage.put(context.containerName(), diskUsageBytes); return Optional.of(diskUsageBytes); } catch (Exception e) { @@ -150,18 +150,18 @@ public class StorageMaintainer { Function<Instant, Double> monthNormalizer = instant -> Duration.between(instant, start).getSeconds() / oneMonthSeconds; List<DiskCleanupRule> rules = new ArrayList<>(); - rules.add(CoredumpCleanupRule.forContainer(context.containerPathUnderVespaHome("var/crash"))); + rules.add(CoredumpCleanupRule.forContainer(context.paths().underVespaHome("var/crash"))); if (context.node().membership().map(m -> m.type().hasContainer()).orElse(false)) - rules.add(new LinearCleanupRule(() -> FileFinder.files(context.containerPathUnderVespaHome("logs/vespa/qrs")).list(), + rules.add(new LinearCleanupRule(() -> FileFinder.files(context.paths().underVespaHome("logs/vespa/qrs")).list(), fa -> monthNormalizer.apply(fa.lastModifiedTime()), Priority.LOWEST, Priority.HIGHEST)); if (context.nodeType() == NodeType.tenant && context.node().membership().map(m -> m.type().isAdmin()).orElse(false)) - rules.add(new LinearCleanupRule(() -> FileFinder.files(context.containerPathUnderVespaHome("logs/vespa/logarchive")).list(), + rules.add(new LinearCleanupRule(() -> FileFinder.files(context.paths().underVespaHome("logs/vespa/logarchive")).list(), fa -> monthNormalizer.apply(fa.lastModifiedTime()), Priority.LOWEST, Priority.HIGHEST)); if (context.nodeType() == NodeType.proxy) - rules.add(new LinearCleanupRule(() -> FileFinder.files(context.containerPathUnderVespaHome("logs/nginx")).list(), + rules.add(new LinearCleanupRule(() -> FileFinder.files(context.paths().underVespaHome("logs/nginx")).list(), fa -> monthNormalizer.apply(fa.lastModifiedTime()), Priority.LOWEST, Priority.MEDIUM)); return rules; @@ -203,7 +203,7 @@ public class StorageMaintainer { * Removes old files, reports coredumps and archives container data, runs when container enters state "dirty" */ public void archiveNodeStorage(NodeAgentContext context) { - ContainerPath logsDirInContainer = context.containerPathUnderVespaHome("logs"); + ContainerPath logsDirInContainer = context.paths().underVespaHome("logs"); Path containerLogsInArchiveDir = archiveContainerStoragePath .resolve(context.containerName().asString() + "_" + DATE_TIME_FORMATTER.format(clock.instant()) + logsDirInContainer.pathInContainer()); @@ -213,7 +213,7 @@ public class StorageMaintainer { new UnixPath(containerLogsInArchiveDir).createParents(); containerLogsOnHost.moveIfExists(containerLogsInArchiveDir); } - new UnixPath(context.containerPath("/")).deleteRecursively(); + new UnixPath(context.paths().of("/")).deleteRecursively(); } private String getMicrocodeVersion() { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java index 0594c5ee016..8d4e487ed6c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java @@ -2,9 +2,9 @@ package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; import com.fasterxml.jackson.databind.ObjectMapper; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; import com.yahoo.vespa.hosted.node.admin.container.metrics.Dimensions; import com.yahoo.vespa.hosted.node.admin.container.metrics.Metrics; -import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; @@ -87,7 +87,7 @@ public class CoredumpHandler { public void converge(NodeAgentContext context, Supplier<Map<String, Object>> nodeAttributesSupplier, boolean throwIfCoreBeingWritten) { - ContainerPath containerCrashPath = context.containerPath(crashPatchInContainer); + ContainerPath containerCrashPath = context.paths().of(crashPatchInContainer); ContainerPath containerProcessingPath = containerCrashPath.resolve(PROCESSING_DIRECTORY_NAME); updateMetrics(context, containerCrashPath); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java index 6fef2cbbb50..5c1401ade52 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/identity/AthenzCredentialsMaintainer.java @@ -24,9 +24,9 @@ import com.yahoo.vespa.hosted.node.admin.component.ConfigServerInfo; import com.yahoo.vespa.hosted.node.admin.container.ContainerName; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentTask; -import com.yahoo.vespa.hosted.node.admin.nodeagent.VespaUser; import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; import javax.net.ssl.HostnameVerifier; @@ -109,7 +109,7 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { try { context.log(logger, Level.FINE, "Checking certificate"); - ContainerPath containerSiaDirectory = context.containerPath(CONTAINER_SIA_DIRECTORY); + ContainerPath containerSiaDirectory = context.paths().of(CONTAINER_SIA_DIRECTORY); ContainerPath privateKeyFile = (ContainerPath) SiaUtils.getPrivateKeyFile(containerSiaDirectory, context.identity()); ContainerPath certificateFile = (ContainerPath) SiaUtils.getCertificateFile(containerSiaDirectory, context.identity()); ContainerPath identityDocumentFile = containerSiaDirectory.resolve("vespa-node-identity-document.json"); @@ -153,14 +153,14 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { } public void clearCredentials(NodeAgentContext context) { - FileFinder.files(context.containerPath(CONTAINER_SIA_DIRECTORY)) + FileFinder.files(context.paths().of(CONTAINER_SIA_DIRECTORY)) .deleteRecursively(context); lastRefreshAttempt.remove(context.containerName()); } @Override public Duration certificateLifetime(NodeAgentContext context) { - ContainerPath containerSiaDirectory = context.containerPath(CONTAINER_SIA_DIRECTORY); + ContainerPath containerSiaDirectory = context.paths().of(CONTAINER_SIA_DIRECTORY); ContainerPath certificateFile = (ContainerPath) SiaUtils.getCertificateFile(containerSiaDirectory, context.identity()); try { X509Certificate certificate = readCertificateFromFile(certificateFile); @@ -207,7 +207,7 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { EntityBindingsMapper.toAttestationData(signedIdentityDocument), csr); EntityBindingsMapper.writeSignedIdentityDocumentToFile(identityDocumentFile, signedIdentityDocument); - writePrivateKeyAndCertificate(context.vespaUser(), + writePrivateKeyAndCertificate(context.users().vespa(), privateKeyFile, keyPair.getPrivate(), certificateFile, instanceIdentity.certificate()); context.log(logger, "Instance successfully registered and credentials written to file"); } @@ -235,7 +235,7 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { context.identity(), identityDocument.providerUniqueId().asDottedString(), csr); - writePrivateKeyAndCertificate(context.vespaUser(), + writePrivateKeyAndCertificate(context.users().vespa(), privateKeyFile, keyPair.getPrivate(), certificateFile, instanceIdentity.certificate()); context.log(logger, "Instance successfully refreshed and credentials written to file"); } catch (ZtsClientException e) { @@ -252,19 +252,18 @@ public class AthenzCredentialsMaintainer implements CredentialsMaintainer { } - private static void writePrivateKeyAndCertificate(VespaUser vespaUser, + private static void writePrivateKeyAndCertificate(UnixUser vespaUser, ContainerPath privateKeyFile, PrivateKey privateKey, ContainerPath certificateFile, X509Certificate certificate) { - writeFile(privateKeyFile, vespaUser, KeyUtils.toPem(privateKey)); - writeFile(certificateFile, vespaUser, X509CertificateUtils.toPem(certificate)); + writeFile(privateKeyFile.withUser(vespaUser), KeyUtils.toPem(privateKey)); + writeFile(certificateFile.withUser(vespaUser), X509CertificateUtils.toPem(certificate)); } - private static void writeFile(ContainerPath path, VespaUser vespaUser, String utf8Content) { + private static void writeFile(ContainerPath path, String utf8Content) { new UnixPath(path.resolveSibling(path.getFileName() + ".tmp")) .writeUtf8File(utf8Content, "r--------") - .setOwnerId(vespaUser.uid()) .atomicMove(path); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/AbstractProducer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/AbstractProducer.java index 1756b81f795..a1416d3274c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/AbstractProducer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/AbstractProducer.java @@ -48,7 +48,7 @@ abstract class AbstractProducer implements ArtifactProducer { } protected int findVespaServicePid(NodeAgentContext ctx, String configId) throws IOException { - ContainerPath findPidBinary = ctx.containerPathUnderVespaHome("libexec/vespa/find-pid"); + ContainerPath findPidBinary = ctx.paths().underVespaHome("libexec/vespa/find-pid"); CommandResult findPidResult = executeCommand(ctx, List.of(findPidBinary.pathInContainer(), configId), true); return Integer.parseInt(findPidResult.getOutput()); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java index b299e1f3f0d..86dc1ed983d 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImpl.java @@ -88,7 +88,7 @@ public class VespaServiceDumperImpl implements VespaServiceDumper { handleFailure(context, request, startedAt, "No artifacts requested"); return; } - ContainerPath directory = context.containerPathUnderVespaHome("tmp/vespa-service-dump"); + ContainerPath directory = context.paths().underVespaHome("tmp/vespa-service-dump"); UnixPath unixPathDirectory = new UnixPath(directory); try { context.log(log, Level.INFO, @@ -100,9 +100,7 @@ public class VespaServiceDumperImpl implements VespaServiceDumper { unixPathDirectory.deleteRecursively(); } context.log(log, Level.INFO, "Creating '" + unixPathDirectory +"'."); - unixPathDirectory.createDirectory("rwxr-x---") - .setOwner(context.vespaUser().name()) - .setGroup(context.vespaUser().group()); + unixPathDirectory.createDirectory("rwxr-x---"); URI destination = serviceDumpDestination(nodeSpec, createDumpId(request)); ProducerContext producerCtx = new ProducerContext(context, directory, request); List<Artifact> producedArtifacts = new ArrayList<>(); @@ -194,7 +192,7 @@ public class VespaServiceDumperImpl implements VespaServiceDumper { @Override public int servicePid() { if (pid == -1) { - ContainerPath findPidBinary = nodeAgentCtx.containerPathUnderVespaHome("libexec/vespa/find-pid"); + ContainerPath findPidBinary = nodeAgentCtx.paths().underVespaHome("libexec/vespa/find-pid"); CommandResult findPidResult = executeCommandInNode(List.of(findPidBinary.pathInContainer(), serviceId()), true); this.pid = Integer.parseInt(findPidResult.getOutput()); } @@ -228,7 +226,7 @@ public class VespaServiceDumperImpl implements VespaServiceDumper { @Override public ContainerPath containerPathUnderVespaHome(String relativePath) { - return nodeAgentCtx.containerPathUnderVespaHome(relativePath); + return nodeAgentCtx.paths().underVespaHome(relativePath); } @Override public Options options() { return this; } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java index 8111f41f9d5..2713ff45ae8 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContext.java @@ -11,9 +11,7 @@ import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; import com.yahoo.vespa.hosted.node.admin.container.ContainerName; import com.yahoo.vespa.hosted.node.admin.container.ContainerNetworkMode; -import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; -import java.nio.file.Path; import java.util.Optional; public interface NodeAgentContext extends TaskContext { @@ -24,10 +22,10 @@ public interface NodeAgentContext extends TaskContext { /** @return node ACL from node-repository */ Acl acl(); - /** @return name of the docker container this context applies to */ + /** @return name of the linux container this context applies to */ ContainerName containerName(); - /** @return hostname of the docker container this context applies to */ + /** @return hostname of the linux container this context applies to */ default HostName hostname() { return HostName.from(node().hostname()); } @@ -42,14 +40,15 @@ public interface NodeAgentContext extends TaskContext { ZoneApi zone(); - /** @return information about the Vespa user inside the container */ - VespaUser vespaUser(); + /** @return information about users/user namespace of the linux container this context applies to */ + UserScope users(); - UserNamespace userNamespace(); + /** @return methods to resolve paths within container's file system */ + PathScope paths(); default boolean isDisabled(NodeAgentTask task) { return false; - }; + } /** * The vcpu value in NodeSpec is the number of vcpus required by the node on a fixed historical @@ -61,11 +60,5 @@ public interface NodeAgentContext extends TaskContext { */ double vcpuOnThisHost(); - ContainerPath containerPath(String pathInNode); - - ContainerPath containerPathUnderVespaHome(String relativePath); - - ContainerPath containerPathFromPathOnHost(Path pathOnHost); - Optional<ApplicationId> hostExclusiveTo(); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java index 037bbc56d1d..47c96103ab5 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImpl.java @@ -16,8 +16,8 @@ import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.Acl; import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; import com.yahoo.vespa.hosted.node.admin.container.ContainerName; import com.yahoo.vespa.hosted.node.admin.container.ContainerNetworkMode; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerFileSystem; -import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; import java.nio.file.FileSystem; import java.nio.file.Path; @@ -40,15 +40,15 @@ public class NodeAgentContextImpl implements NodeAgentContext { private final AthenzIdentity identity; private final ContainerNetworkMode containerNetworkMode; private final ZoneApi zone; - private final ContainerFileSystem containerFs; - private final ContainerPath pathToVespaHome; + private final UserScope userScope; + private final PathScope pathScope; private final double cpuSpeedup; private final Set<NodeAgentTask> disabledNodeAgentTasks; private final Optional<ApplicationId> hostExclusiveTo; public NodeAgentContextImpl(NodeSpec node, Acl acl, AthenzIdentity identity, ContainerNetworkMode containerNetworkMode, ZoneApi zone, - FlagSource flagSource, ContainerFileSystem containerFs, String pathToVespaHome, + FlagSource flagSource, UserScope userScope, PathScope pathScope, double cpuSpeedup, Optional<ApplicationId> hostExclusiveTo) { if (cpuSpeedup <= 0) throw new IllegalArgumentException("cpuSpeedUp must be positive, was: " + cpuSpeedup); @@ -59,12 +59,14 @@ public class NodeAgentContextImpl implements NodeAgentContext { this.identity = Objects.requireNonNull(identity); this.containerNetworkMode = Objects.requireNonNull(containerNetworkMode); this.zone = Objects.requireNonNull(zone); - this.containerFs = Objects.requireNonNull(containerFs); - this.pathToVespaHome = containerFs.getPath(pathToVespaHome); + this.userScope = Objects.requireNonNull(userScope); + this.pathScope = Objects.requireNonNull(pathScope); this.logPrefix = containerName.asString() + ": "; this.cpuSpeedup = cpuSpeedup; this.disabledNodeAgentTasks = NodeAgentTask.fromString( - PermanentFlags.DISABLED_HOST_ADMIN_TASKS.bindTo(flagSource).with(FetchVector.Dimension.HOSTNAME, node.hostname()).value()); + PermanentFlags.DISABLED_HOST_ADMIN_TASKS.bindTo(flagSource) + .with(FetchVector.Dimension.HOSTNAME, node.hostname()) + .with(FetchVector.Dimension.NODE_TYPE, node.type().name()).value()); this.hostExclusiveTo = hostExclusiveTo; } @@ -99,13 +101,13 @@ public class NodeAgentContextImpl implements NodeAgentContext { } @Override - public VespaUser vespaUser() { - return containerFs.getUserPrincipalLookupService().vespaUser(); + public UserScope users() { + return userScope; } @Override - public UserNamespace userNamespace() { - return containerFs.getUserPrincipalLookupService().userNamespace(); + public PathScope paths() { + return pathScope; } @Override @@ -119,24 +121,6 @@ public class NodeAgentContextImpl implements NodeAgentContext { } @Override - public ContainerPath containerPath(String pathInNode) { - return containerFs.getPath(pathInNode); - } - - @Override - public ContainerPath containerPathUnderVespaHome(String relativePath) { - if (relativePath.startsWith("/")) - throw new IllegalArgumentException("Expected a relative path to the Vespa home, got: " + relativePath); - - return pathToVespaHome.resolve(relativePath); - } - - @Override - public ContainerPath containerPathFromPathOnHost(Path pathOnHost) { - return ContainerPath.fromPathOnHost(containerFs, pathOnHost); - } - - @Override public Optional<ApplicationId> hostExclusiveTo() { return hostExclusiveTo; } @@ -156,20 +140,6 @@ public class NodeAgentContextImpl implements NodeAgentContext { logger.log(level, logPrefix + message, throwable); } - @Override - public String toString() { - return "NodeAgentContextImpl{" + - "node=" + node + - ", acl=" + acl + - ", containerName=" + containerName + - ", identity=" + identity + - ", dockerNetworking=" + containerNetworkMode + - ", zone=" + zone + - ", pathToVespaHome=" + pathToVespaHome + - ", hostExclusiveTo='" + hostExclusiveTo + '\'' + - '}'; - } - public static NodeAgentContextImpl.Builder builder(NodeSpec node) { return new Builder(new NodeSpec.Builder(node)); } @@ -193,7 +163,7 @@ public class NodeAgentContextImpl implements NodeAgentContext { private ContainerNetworkMode containerNetworkMode; private ZoneApi zone; private UserNamespace userNamespace; - private VespaUser vespaUser; + private UnixUser vespaUser; private Path containerStorage; private FlagSource flagSource; private double cpuSpeedUp = 1; @@ -233,7 +203,7 @@ public class NodeAgentContextImpl implements NodeAgentContext { return this; } - public Builder vespaUser(VespaUser vespaUser) { + public Builder vespaUser(UnixUser vespaUser) { this.vespaUser = vespaUser; return this; } @@ -267,12 +237,11 @@ public class NodeAgentContextImpl implements NodeAgentContext { public NodeAgentContextImpl build() { Objects.requireNonNull(containerStorage, "Must set one of containerStorage or fileSystem"); - UserNamespace userNamespace = Optional.ofNullable(this.userNamespace) - .orElseGet(() -> new UserNamespace(100000, 100000, 100000)); - VespaUser vespaUser = Optional.ofNullable(this.vespaUser) - .orElseGet(() -> new VespaUser("vespa", "vespa", 1000, 100)); + UserScope userScope = UserScope.create( + Optional.ofNullable(vespaUser).orElseGet(() -> new UnixUser("vespa", 1000, "vespa", 100)), + Optional.ofNullable(userNamespace).orElseGet(() -> new UserNamespace(100000, 100000, 100000))); ContainerFileSystem containerFs = ContainerFileSystem.create(containerStorage - .resolve(nodeSpecBuilder.hostname().split("\\.")[0]), userNamespace, vespaUser); + .resolve(nodeSpecBuilder.hostname().split("\\.")[0]), userScope); containerFs.createRoot(); return new NodeAgentContextImpl( @@ -302,8 +271,8 @@ public class NodeAgentContextImpl implements NodeAgentContext { } }), Optional.ofNullable(flagSource).orElseGet(InMemoryFlagSource::new), - containerFs, - "/opt/vespa", + userScope, + new PathScope(containerFs, "/opt/vespa"), cpuSpeedUp, hostExclusiveTo); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/PathScope.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/PathScope.java new file mode 100644 index 00000000000..1ba71c4c2ed --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/PathScope.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerFileSystem; +import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; + +import java.nio.file.Path; +import java.util.Objects; + +/** + * @author freva + */ +public class PathScope { + + private final ContainerFileSystem containerFs; + private final String pathToVespaHome; + private final UserScope users; + + public PathScope(ContainerFileSystem containerFs, String pathToVespaHome) { + this.containerFs = Objects.requireNonNull(containerFs); + this.pathToVespaHome = Objects.requireNonNull(pathToVespaHome); + this.users = containerFs.getUserPrincipalLookupService().userScope(); + } + + public ContainerPath of(String pathInNode) { + return of(pathInNode, users.root()); + } + + public ContainerPath of(String pathInNode, UnixUser user) { + return ContainerPath.fromPathInContainer(containerFs, Path.of(pathInNode), user); + } + + public ContainerPath underVespaHome(String relativePath) { + if (relativePath.startsWith("/")) + throw new IllegalArgumentException("Expected a relative path to the Vespa home, got: " + relativePath); + + return ContainerPath.fromPathInContainer(containerFs, Path.of(pathToVespaHome, relativePath), users.vespa()); + } + + public ContainerPath fromPathOnHost(Path pathOnHost) { + return ContainerPath.fromPathOnHost(containerFs, pathOnHost, users.root()); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PathScope pathScope = (PathScope) o; + return containerFs.equals(pathScope.containerFs) && pathToVespaHome.equals(pathScope.pathToVespaHome) && users.equals(pathScope.users); + } + + @Override + public int hashCode() { + return Objects.hash(containerFs, pathToVespaHome, users); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserScope.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserScope.java new file mode 100644 index 00000000000..0c4c79172e2 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/UserScope.java @@ -0,0 +1,52 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.nodeagent; + +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; + +import java.util.Objects; + +/** + * @author freva + */ +public class UserScope { + + private final UnixUser root; + private final UnixUser vespa; + private final UserNamespace namespace; + + private UserScope(UnixUser root, UnixUser vespa, UserNamespace namespace) { + this.root = Objects.requireNonNull(root); + this.vespa = Objects.requireNonNull(vespa); + this.namespace = Objects.requireNonNull(namespace); + } + + public UnixUser root() { + return root; + } + + public UnixUser vespa() { + return vespa; + } + + public UserNamespace namespace() { + return namespace; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UserScope userScope = (UserScope) o; + return root.equals(userScope.root) && vespa.equals(userScope.vespa) && namespace.equals(userScope.namespace); + } + + @Override + public int hashCode() { + return Objects.hash(root, vespa, namespace); + } + + /** Creates user scope with default root user */ + public static UserScope create(UnixUser vespaUser, UserNamespace namespace) { + return new UserScope(UnixUser.ROOT, vespaUser, namespace); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/VespaUser.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/VespaUser.java deleted file mode 100644 index eb391809e6f..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/VespaUser.java +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.nodeagent; - -import java.util.Objects; - -/** - * Describes Vespa user inside the container user namespace. - * - * @author freva - */ -public class VespaUser { - - private final String name; - private final String group; - private final int uid; - private final int gid; - - public VespaUser(String name, String group, int uid, int gid) { - this.name = Objects.requireNonNull(name); - this.group = Objects.requireNonNull(group); - this.uid = uid; - this.gid = gid; - } - - public String name() { return name; } - public String group() { return group; } - public int uid() { return uid; } - public int gid() { return gid; } -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixUser.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixUser.java new file mode 100644 index 00000000000..665bb4b8bbc --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/UnixUser.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.task.util.file; + +import java.util.Objects; + +/** + * A regular UNIX-style user and its primary group. + * + * @author mpolden + */ +public class UnixUser { + + public static final UnixUser ROOT = new UnixUser("root", 0, "root", 0); + + private final String name; + private final int uid; + private final String group; + private final int gid; + + public UnixUser(String name, int uid, String group, int gid) { + this.name = name; + this.uid = uid; + this.group = group; + this.gid = gid; + } + + /** Username of this */ + public String name() { return name; } + + /** User ID of this */ + public int uid() { return uid; } + + /** Primary group of this */ + public String group() { return group; } + + /** Primary group ID of this */ + public int gid() { return gid; } + + @Override + public String toString() { + return "user " + name + ":" + group; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UnixUser unixUser = (UnixUser) o; + return uid == unixUser.uid && name.equals(unixUser.name) && + gid == unixUser.gid && group.equals(unixUser.group); + } + + @Override + public int hashCode() { + return Objects.hash(uid, name, gid, group); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java index a2c6f63355d..4167ba2d76e 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystem.java @@ -1,8 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.task.util.fs; -import com.yahoo.vespa.hosted.node.admin.nodeagent.UserNamespace; -import com.yahoo.vespa.hosted.node.admin.nodeagent.VespaUser; +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; import java.io.IOException; import java.nio.file.FileStore; @@ -65,7 +64,7 @@ public class ContainerFileSystem extends FileSystem { @Override public ContainerPath getPath(String first, String... more) { - return ContainerPath.fromPathInContainer(this, Path.of(first, more)); + return ContainerPath.fromPathInContainer(this, Path.of(first, more), getUserPrincipalLookupService().userScope().root()); } @Override @@ -93,7 +92,7 @@ public class ContainerFileSystem extends FileSystem { throw new UnsupportedOperationException(); } - public static ContainerFileSystem create(Path containerStorageRoot, UserNamespace userNamespace, VespaUser vespaUser) { - return new ContainerFileSystemProvider(containerStorageRoot, userNamespace, vespaUser).getFileSystem(null); + public static ContainerFileSystem create(Path containerStorageRoot, UserScope userScope) { + return new ContainerFileSystemProvider(containerStorageRoot, userScope).getFileSystem(null); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java index c956cae868b..5405a5acd61 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemProvider.java @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.task.util.fs; -import com.yahoo.vespa.hosted.node.admin.nodeagent.UserNamespace; -import com.yahoo.vespa.hosted.node.admin.nodeagent.VespaUser; +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; import java.io.IOException; import java.net.URI; @@ -45,10 +45,10 @@ class ContainerFileSystemProvider extends FileSystemProvider { private final ContainerFileSystem containerFs; private final ContainerUserPrincipalLookupService userPrincipalLookupService; - ContainerFileSystemProvider(Path containerRootOnHost, UserNamespace userNamespace, VespaUser vespaUser) { + ContainerFileSystemProvider(Path containerRootOnHost, UserScope userScope) { this.containerFs = new ContainerFileSystem(this, containerRootOnHost); this.userPrincipalLookupService = new ContainerUserPrincipalLookupService( - containerRootOnHost.getFileSystem().getUserPrincipalLookupService(), userNamespace, vespaUser); + containerRootOnHost.getFileSystem().getUserPrincipalLookupService(), userScope); } public ContainerUserPrincipalLookupService userPrincipalLookupService() { @@ -87,7 +87,8 @@ class ContainerFileSystemProvider extends FileSystemProvider { @Override public DirectoryStream<Path> newDirectoryStream(Path dir, DirectoryStream.Filter<? super Path> filter) throws IOException { Path pathOnHost = pathOnHost(dir); - return new ContainerDirectoryStream(provider(pathOnHost).newDirectoryStream(pathOnHost, filter)); + return new ContainerDirectoryStream(provider(pathOnHost).newDirectoryStream(pathOnHost, filter), + toContainerPath(dir).user()); } @Override @@ -233,15 +234,17 @@ class ContainerFileSystemProvider extends FileSystemProvider { } private void fixOwnerToContainerRoot(ContainerPath path) throws IOException { - setAttribute(path, "unix:uid", 0, LinkOption.NOFOLLOW_LINKS); - setAttribute(path, "unix:gid", 0, LinkOption.NOFOLLOW_LINKS); + setAttribute(path, "unix:uid", path.user().uid(), LinkOption.NOFOLLOW_LINKS); + setAttribute(path, "unix:gid", path.user().gid(), LinkOption.NOFOLLOW_LINKS); } private class ContainerDirectoryStream implements DirectoryStream<Path> { private final DirectoryStream<Path> hostDirectoryStream; + private final UnixUser user; - private ContainerDirectoryStream(DirectoryStream<Path> hostDirectoryStream) { + private ContainerDirectoryStream(DirectoryStream<Path> hostDirectoryStream, UnixUser user) { this.hostDirectoryStream = hostDirectoryStream; + this.user = user; } @Override @@ -256,7 +259,7 @@ class ContainerFileSystemProvider extends FileSystemProvider { @Override public Path next() { Path pathOnHost = hostPathIterator.next(); - return ContainerPath.fromPathOnHost(containerFs, pathOnHost); + return ContainerPath.fromPathOnHost(containerFs, pathOnHost, user); } }; } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java index 9450c8d4612..4f12c9439f2 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPath.java @@ -1,6 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.task.util.fs; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; + import java.io.IOException; import java.net.URI; import java.nio.file.LinkOption; @@ -24,11 +26,13 @@ public class ContainerPath implements Path { private final ContainerFileSystem containerFs; private final Path pathOnHost; private final String[] parts; + private final UnixUser user; - private ContainerPath(ContainerFileSystem containerFs, Path pathOnHost, String[] parts) { + private ContainerPath(ContainerFileSystem containerFs, Path pathOnHost, String[] parts, UnixUser user) { this.containerFs = Objects.requireNonNull(containerFs); this.pathOnHost = Objects.requireNonNull(pathOnHost); this.parts = Objects.requireNonNull(parts); + this.user = Objects.requireNonNull(user); if (!pathOnHost.isAbsolute()) throw new IllegalArgumentException("Path host must be absolute: " + pathOnHost); @@ -39,6 +43,8 @@ public class ContainerPath implements Path { public Path pathOnHost() { return pathOnHost; } public String pathInContainer() { return '/' + String.join("/", parts); } + public ContainerPath withUser(UnixUser user) { return new ContainerPath(containerFs, pathOnHost, parts, user); } + UnixUser user() { return user; } @Override public ContainerFileSystem getFileSystem() { @@ -47,7 +53,7 @@ public class ContainerPath implements Path { @Override public ContainerPath getRoot() { - return resolve(containerFs, new String[0], Path.of("/")); + return resolve(containerFs, new String[0], Path.of("/"), user); } @Override @@ -59,7 +65,7 @@ public class ContainerPath implements Path { @Override public ContainerPath getParent() { if (parts.length == 0) return null; - return new ContainerPath(containerFs, pathOnHost.getParent(), Arrays.copyOf(parts, parts.length-1)); + return new ContainerPath(containerFs, pathOnHost.getParent(), Arrays.copyOf(parts, parts.length-1), user); } @Override @@ -83,7 +89,7 @@ public class ContainerPath implements Path { return Path.of(parts[beginIndex], rest); } - @Override public ContainerPath resolve(Path other) { return resolve(containerFs, parts, other); } + @Override public ContainerPath resolve(Path other) { return resolve(containerFs, parts, other, user); } @Override public ContainerPath resolve(String other) { return resolve(Path.of(other)); } @Override public ContainerPath resolveSibling(String other) { return resolve(Path.of("..", other)); } @@ -133,7 +139,7 @@ public class ContainerPath implements Path { public ContainerPath toRealPath(LinkOption... options) throws IOException { Path realPathOnHost = pathOnHost.toRealPath(options); if (realPathOnHost.equals(pathOnHost)) return this; - return fromPathOnHost(containerFs, realPathOnHost); + return fromPathOnHost(containerFs, realPathOnHost, user); } @Override @@ -176,7 +182,7 @@ public class ContainerPath implements Path { return containerFs.containerRootOnHost().getFileName() + ":" + pathInContainer(); } - private static ContainerPath resolve(ContainerFileSystem containerFs, String[] currentParts, Path other) { + private static ContainerPath resolve(ContainerFileSystem containerFs, String[] currentParts, Path other, UnixUser user) { List<String> parts = other.isAbsolute() ? new ArrayList<>() : new ArrayList<>(Arrays.asList(currentParts)); for (int i = 0; i < other.getNameCount(); i++) { String part = other.getName(i).toString(); @@ -190,28 +196,29 @@ public class ContainerPath implements Path { return new ContainerPath(containerFs, containerFs.containerRootOnHost().resolve(String.join("/", parts)), - parts.toArray(String[]::new)); + parts.toArray(String[]::new), + user); } - public static ContainerPath fromPathInContainer(ContainerFileSystem containerFs, Path pathInContainer) { + public static ContainerPath fromPathInContainer(ContainerFileSystem containerFs, Path pathInContainer, UnixUser user) { if (!pathInContainer.isAbsolute()) throw new IllegalArgumentException("Path in container must be absolute: " + pathInContainer); - return resolve(containerFs, new String[0], pathInContainer); + return resolve(containerFs, new String[0], pathInContainer, user); } - public static ContainerPath fromPathOnHost(ContainerFileSystem containerFs, Path pathOnHost) { + public static ContainerPath fromPathOnHost(ContainerFileSystem containerFs, Path pathOnHost, UnixUser user) { pathOnHost = pathOnHost.normalize(); Path containerRootOnHost = containerFs.containerRootOnHost(); Path pathUnderContainerStorage = containerRootOnHost.relativize(pathOnHost); if (pathUnderContainerStorage.getNameCount() == 0 || pathUnderContainerStorage.getName(0).toString().isEmpty()) - return new ContainerPath(containerFs, pathOnHost, new String[0]); + return new ContainerPath(containerFs, pathOnHost, new String[0], user); if (pathUnderContainerStorage.getName(0).toString().equals("..")) throw new IllegalArgumentException("Path " + pathOnHost + " is not under container root " + containerRootOnHost); List<String> parts = new ArrayList<>(); for (int i = 0; i < pathUnderContainerStorage.getNameCount(); i++) parts.add(pathUnderContainerStorage.getName(i).toString()); - return new ContainerPath(containerFs, pathOnHost, parts.toArray(String[]::new)); + return new ContainerPath(containerFs, pathOnHost, parts.toArray(String[]::new), user); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java index 79d43b39f43..60ee92045e0 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupService.java @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.task.util.fs; -import com.yahoo.vespa.hosted.node.admin.nodeagent.UserNamespace; -import com.yahoo.vespa.hosted.node.admin.nodeagent.VespaUser; +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; import java.io.IOException; import java.nio.file.attribute.GroupPrincipal; @@ -10,6 +10,7 @@ import java.nio.file.attribute.UserPrincipal; import java.nio.file.attribute.UserPrincipalLookupService; import java.nio.file.attribute.UserPrincipalNotFoundException; import java.util.Objects; +import java.util.function.Function; /** * @author freva @@ -17,59 +18,55 @@ import java.util.Objects; public class ContainerUserPrincipalLookupService extends UserPrincipalLookupService { private final UserPrincipalLookupService baseFsUserPrincipalLookupService; - private final UserNamespace userNamespace; - private final VespaUser vespaUser; + private final UserScope userScope; - ContainerUserPrincipalLookupService( - UserPrincipalLookupService baseFsUserPrincipalLookupService, UserNamespace userNamespace, VespaUser vespaUser) { + ContainerUserPrincipalLookupService(UserPrincipalLookupService baseFsUserPrincipalLookupService, UserScope userScope) { this.baseFsUserPrincipalLookupService = Objects.requireNonNull(baseFsUserPrincipalLookupService); - this.userNamespace = Objects.requireNonNull(userNamespace); - this.vespaUser = Objects.requireNonNull(vespaUser); + this.userScope = Objects.requireNonNull(userScope); } - public UserNamespace userNamespace() { return userNamespace; } - public VespaUser vespaUser() { return vespaUser; } + public UserScope userScope() { return userScope; } - public int userIdOnHost(int containerUid) { return userNamespace.userIdOnHost(containerUid); } - public int groupIdOnHost(int containerGid) { return userNamespace.groupIdOnHost(containerGid); } - public int userIdInContainer(int hostUid) { return userNamespace.userIdInContainer(hostUid); } - public int groupIdInContainer(int hostGid) { return userNamespace.groupIdInContainer(hostGid); } + public int userIdOnHost(int containerUid) { return userScope.namespace().userIdOnHost(containerUid); } + public int groupIdOnHost(int containerGid) { return userScope.namespace().groupIdOnHost(containerGid); } + public int userIdInContainer(int hostUid) { return userScope.namespace().userIdInContainer(hostUid); } + public int groupIdInContainer(int hostGid) { return userScope.namespace().groupIdInContainer(hostGid); } @Override public ContainerUserPrincipal lookupPrincipalByName(String name) throws IOException { - int containerUid = resolveName(name, vespaUser.name(), vespaUser.uid()); - String user = resolveId(containerUid, vespaUser.name(), vespaUser.uid()); + int containerUid = resolveName(name, UnixUser::uid, UnixUser::name); + String user = resolveId(containerUid, UnixUser::uid, UnixUser::name); String hostUid = String.valueOf(userIdOnHost(containerUid)); return new ContainerUserPrincipal(containerUid, user, baseFsUserPrincipalLookupService.lookupPrincipalByName(hostUid)); } @Override public ContainerGroupPrincipal lookupPrincipalByGroupName(String group) throws IOException { - int containerGid = resolveName(group, vespaUser.group(), vespaUser.gid()); - String name = resolveId(containerGid, vespaUser.group(), vespaUser.gid()); + int containerGid = resolveName(group, UnixUser::gid, UnixUser::group); + String name = resolveId(containerGid, UnixUser::gid, UnixUser::group); String hostGid = String.valueOf(groupIdOnHost(containerGid)); return new ContainerGroupPrincipal(containerGid, name, baseFsUserPrincipalLookupService.lookupPrincipalByGroupName(hostGid)); } public ContainerUserPrincipal userPrincipal(int uid, UserPrincipal baseFsPrincipal) { - String name = resolveId(uid, vespaUser.name(), vespaUser.uid()); + String name = resolveId(uid, UnixUser::uid, UnixUser::name); return new ContainerUserPrincipal(uid, name, baseFsPrincipal); } public ContainerGroupPrincipal groupPrincipal(int gid, GroupPrincipal baseFsPrincipal) { - String name = resolveId(gid, vespaUser.group(), vespaUser.gid()); + String name = resolveId(gid, UnixUser::gid, UnixUser::group); return new ContainerGroupPrincipal(gid, name, baseFsPrincipal); } - private String resolveId(int id, String vespaName, int vespaId) { - if (id == 0) return "root"; - if (id == vespaId) return vespaName; + private String resolveId(int id, Function<UnixUser, Integer> idExtractor, Function<UnixUser, String> nameExtractor) { + if (idExtractor.apply(userScope.root()) == id) return nameExtractor.apply(userScope.root()); + if (idExtractor.apply(userScope.vespa()) == id) return nameExtractor.apply(userScope.vespa()); return String.valueOf(id); } - private int resolveName(String name, String vespaName, int vespaId) throws UserPrincipalNotFoundException { - if (name.equals("root")) return 0; - if (name.equals(vespaName)) return vespaId; + private int resolveName(String name, Function<UnixUser, Integer> idExtractor, Function<UnixUser, String> nameExtractor) throws UserPrincipalNotFoundException { + if (name.equals(nameExtractor.apply(userScope.root()))) return idExtractor.apply(userScope.root()); + if (name.equals(nameExtractor.apply(userScope.vespa()))) return idExtractor.apply(userScope.vespa()); try { return Integer.parseInt(name); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java index cd020d81450..42661bdfdd0 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java @@ -9,8 +9,8 @@ import com.yahoo.vespa.hosted.node.admin.maintenance.disk.DiskCleanup; import com.yahoo.vespa.hosted.node.admin.maintenance.sync.SyncClient; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl; -import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; import com.yahoo.vespa.hosted.node.admin.task.util.file.DiskSize; +import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath; import com.yahoo.vespa.hosted.node.admin.task.util.process.TestTerminal; import com.yahoo.vespa.test.file.TestFileSystem; @@ -76,7 +76,7 @@ public class StorageMaintainerTest { Path pathToArchiveDir = fileSystem.getPath("/data/vespa/storage/container-archive"); Files.createDirectories(pathToArchiveDir); - Path containerStorageRoot = context1.containerPath("/").pathOnHost().getParent(); + Path containerStorageRoot = context1.paths().of("/").pathOnHost().getParent(); Set<String> containerStorageRootContentsBeforeArchive = FileFinder.from(containerStorageRoot) .maxDepth(1) .stream() @@ -115,9 +115,9 @@ public class StorageMaintainerTest { NodeAgentContext context = NodeAgentContextImpl.builder(containerName + ".domain.tld") .fileSystem(fileSystem).build(); - ContainerPath containerVespaHome = context.containerPathUnderVespaHome(""); - Files.createDirectories(context.containerPath("/etc/something")); - Files.createFile(context.containerPath("/etc/something/conf")); + ContainerPath containerVespaHome = context.paths().underVespaHome(""); + Files.createDirectories(context.paths().of("/etc/something")); + Files.createFile(context.paths().of("/etc/something/conf")); Files.createDirectories(containerVespaHome.resolve("logs/vespa")); Files.createFile(containerVespaHome.resolve("logs/vespa/vespa.log")); @@ -126,7 +126,7 @@ public class StorageMaintainerTest { Files.createDirectories(containerVespaHome.resolve("var/db")); Files.createFile(containerVespaHome.resolve("var/db/some-file")); - ContainerPath containerRoot = context.containerPath("/"); + ContainerPath containerRoot = context.paths().of("/"); Set<String> actualContents = FileFinder.files(containerRoot) .stream() .map(fileAttributes -> containerRoot.relativize(fileAttributes.path()).toString()) diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java index ded99cf3778..ed45768aec8 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java @@ -31,7 +31,7 @@ public class CoreCollectorTest { private final NodeAgentContext context = NodeAgentContextImpl.builder("container-123.domain.tld") .fileSystem(TestFileSystem.create()).build(); - private final ContainerPath TEST_CORE_PATH = context.containerPath("/tmp/core.1234"); + private final ContainerPath TEST_CORE_PATH = context.paths().of("/tmp/core.1234"); private final String TEST_BIN_PATH = "/usr/bin/program"; private final List<String> GDB_BACKTRACE = List.of("[New Thread 2703]", "Core was generated by `/usr/bin/program\'.", "Program terminated with signal 11, Segmentation fault.", @@ -182,7 +182,7 @@ public class CoreCollectorTest { @Test public void metadata_for_java_heap_dump() { - assertEquals(JAVA_HEAP_DUMP_METADATA, coreCollector.collect(context, context.containerPath("/dump_java_pid123.hprof"))); + assertEquals(JAVA_HEAP_DUMP_METADATA, coreCollector.collect(context, context.paths().of("/dump_java_pid123.hprof"))); } private void mockExec(String[] cmd, String output) { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java index 9f507f451b9..5371d69ecda 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java @@ -48,7 +48,7 @@ public class CoredumpHandlerTest { private final FileSystem fileSystem = TestFileSystem.create(); private final NodeAgentContext context = NodeAgentContextImpl.builder("container-123.domain.tld") .fileSystem(fileSystem).build(); - private final ContainerPath containerCrashPath = context.containerPath("/var/crash"); + private final ContainerPath containerCrashPath = context.paths().of("/var/crash"); private final Path doneCoredumpsPath = fileSystem.getPath("/home/docker/dumps"); private final TestTerminal terminal = new TestTerminal(); @@ -64,8 +64,8 @@ public class CoredumpHandlerTest { @Test public void coredump_enqueue_test() throws IOException { - ContainerPath crashPath = context.containerPath("/some/crash/path"); - ContainerPath processingDir = context.containerPath("/some/other/processing"); + ContainerPath crashPath = context.paths().of("/some/crash/path"); + ContainerPath processingDir = context.paths().of("/some/other/processing"); Files.createDirectories(crashPath); createFileAged(crashPath.resolve("bash.core.431"), Duration.ZERO); @@ -98,8 +98,8 @@ public class CoredumpHandlerTest { @Test public void enqueue_with_hs_err_files() throws IOException { - ContainerPath crashPath = context.containerPath("/some/crash/path"); - ContainerPath processingDir = context.containerPath("/some/other/processing"); + ContainerPath crashPath = context.paths().of("/some/crash/path"); + ContainerPath processingDir = context.paths().of("/some/other/processing"); Files.createDirectories(crashPath); createFileAged(crashPath.resolve("java.core.69"), Duration.ofSeconds(515)); @@ -119,7 +119,7 @@ public class CoredumpHandlerTest { @Test public void coredump_to_process_test() throws IOException { - ContainerPath processingDir = context.containerPath("/some/other/processing"); + ContainerPath processingDir = context.paths().of("/some/other/processing"); // Initially there are no core dumps Optional<ContainerPath> enqueuedPath = coredumpHandler.enqueueCoredump(containerCrashPath, processingDir); @@ -164,7 +164,7 @@ public class CoredumpHandlerTest { "}}"; - ContainerPath coredumpDirectory = context.containerPath("/var/crash/id-123"); + ContainerPath coredumpDirectory = context.paths().of("/var/crash/id-123"); Files.createDirectories(coredumpDirectory.pathOnHost()); Files.createFile(coredumpDirectory.resolve("dump_core.456")); when(coreCollector.collect(eq(context), eq(coredumpDirectory.resolve("dump_core.456")))) @@ -180,12 +180,12 @@ public class CoredumpHandlerTest { @Test(expected = IllegalStateException.class) public void cant_get_metadata_if_no_core_file() throws IOException { - coredumpHandler.getMetadata(context, context.containerPath("/fake/path"), Map::of); + coredumpHandler.getMetadata(context, context.paths().of("/fake/path"), Map::of); } @Test(expected = IllegalStateException.class) public void fails_to_get_core_file_if_only_compressed() throws IOException { - ContainerPath coredumpDirectory = context.containerPath("/path/to/coredump/proccessing/id-123"); + ContainerPath coredumpDirectory = context.paths().of("/path/to/coredump/proccessing/id-123"); Files.createDirectories(coredumpDirectory); Files.createFile(coredumpDirectory.resolve("dump_bash.core.431.lz4")); coredumpHandler.findCoredumpFileInProcessingDirectory(coredumpDirectory); @@ -193,7 +193,7 @@ public class CoredumpHandlerTest { @Test public void process_single_coredump_test() throws IOException { - ContainerPath coredumpDirectory = context.containerPath("/path/to/coredump/proccessing/id-123"); + ContainerPath coredumpDirectory = context.paths().of("/path/to/coredump/proccessing/id-123"); Files.createDirectories(coredumpDirectory); Files.write(coredumpDirectory.resolve("metadata.json"), "metadata".getBytes()); Files.createFile(coredumpDirectory.resolve("dump_bash.core.431")); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java index f632a5c2bd2..9a6a358d9f0 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextImplTest.java @@ -26,54 +26,54 @@ public class NodeAgentContextImplTest { public void path_on_host_from_path_in_node_test() { assertEquals( "/data/vespa/storage/container-1", - context.containerPath("/").pathOnHost().toString()); + context.paths().of("/").pathOnHost().toString()); assertEquals( "/data/vespa/storage/container-1/dev/null", - context.containerPath("/dev/null").pathOnHost().toString()); + context.paths().of("/dev/null").pathOnHost().toString()); } @Test(expected=IllegalArgumentException.class) public void path_in_container_must_be_absolute() { - context.containerPath("some/relative/path"); + context.paths().of("some/relative/path"); } @Test public void path_in_node_from_path_on_host_test() { assertEquals( "/dev/null", - context.containerPathFromPathOnHost(fileSystem.getPath("/data/vespa/storage/container-1/dev/null")).pathInContainer()); + context.paths().fromPathOnHost(fileSystem.getPath("/data/vespa/storage/container-1/dev/null")).pathInContainer()); } @Test(expected=IllegalArgumentException.class) public void path_on_host_must_be_absolute() { - context.containerPathFromPathOnHost(Path.of("some/relative/path")); + context.paths().fromPathOnHost(Path.of("some/relative/path")); } @Test(expected=IllegalArgumentException.class) public void path_on_host_must_be_inside_container_storage_of_context() { - context.containerPathFromPathOnHost(fileSystem.getPath("/data/vespa/storage/container-2/dev/null")); + context.paths().fromPathOnHost(fileSystem.getPath("/data/vespa/storage/container-2/dev/null")); } @Test(expected=IllegalArgumentException.class) public void path_on_host_must_be_inside_container_storage() { - context.containerPathFromPathOnHost(fileSystem.getPath("/home")); + context.paths().fromPathOnHost(fileSystem.getPath("/home")); } @Test public void path_under_vespa_host_in_container_test() { assertEquals( "/opt/vespa", - context.containerPathUnderVespaHome("").pathInContainer()); + context.paths().underVespaHome("").pathInContainer()); assertEquals( "/opt/vespa/logs/vespa/vespa.log", - context.containerPathUnderVespaHome("logs/vespa/vespa.log").pathInContainer()); + context.paths().underVespaHome("logs/vespa/vespa.log").pathInContainer()); } @Test(expected=IllegalArgumentException.class) public void path_under_vespa_home_must_be_relative() { - context.containerPathUnderVespaHome("/home"); + context.paths().underVespaHome("/home"); } @Test diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemTest.java index 14c1378fbd7..b5f2ef41a1a 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerFileSystemTest.java @@ -2,8 +2,9 @@ package com.yahoo.vespa.hosted.node.admin.task.util.fs; import com.yahoo.vespa.hosted.node.admin.nodeagent.UserNamespace; -import com.yahoo.vespa.hosted.node.admin.nodeagent.VespaUser; +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; import com.yahoo.vespa.test.file.TestFileSystem; import org.junit.jupiter.api.Test; @@ -26,14 +27,12 @@ class ContainerFileSystemTest { private final FileSystem fileSystem = TestFileSystem.create(); private final UnixPath containerRootOnHost = new UnixPath(fileSystem.getPath("/data/storage/ctr1")); - private final UserNamespace userNamespace = new UserNamespace(10_000, 11_000, 10000); - private final VespaUser vespaUser = new VespaUser("vespa", "users", 1000, 100); - private final ContainerFileSystem containerFs = ContainerFileSystem.create( - containerRootOnHost.createDirectories().toPath(), userNamespace, vespaUser); + private final UserScope userScope = UserScope.create(new UnixUser("vespa", 1000, "users", 100), new UserNamespace(10_000, 11_000, 10000)); + private final ContainerFileSystem containerFs = ContainerFileSystem.create(containerRootOnHost.createDirectories().toPath(), userScope); @Test public void creates_files_and_directories_with_container_root_as_owner() throws IOException { - ContainerPath containerPath = ContainerPath.fromPathInContainer(containerFs, Path.of("/opt/vespa/logs/file")); + ContainerPath containerPath = ContainerPath.fromPathInContainer(containerFs, Path.of("/opt/vespa/logs/file"), userScope.root()); UnixPath unixPath = new UnixPath(containerPath).createParents().writeUtf8File("hello world"); for (ContainerPath p = containerPath; p.getParent() != null; p = p.getParent()) @@ -43,14 +42,14 @@ class ContainerFileSystemTest { assertOwnership(containerPath, 500, 1000, 10500, 12000); UnixPath hostFile = new UnixPath(fileSystem.getPath("/file")).createNewFile(); - ContainerPath destination = ContainerPath.fromPathInContainer(containerFs, Path.of("/copy1")); + ContainerPath destination = ContainerPath.fromPathInContainer(containerFs, Path.of("/copy1"), userScope.root()); Files.copy(hostFile.toPath(), destination); assertOwnership(destination, 0, 0, 10000, 11000); } @Test public void file_write_and_read() throws IOException { - ContainerPath containerPath = ContainerPath.fromPathInContainer(containerFs, Path.of("/file")); + ContainerPath containerPath = ContainerPath.fromPathInContainer(containerFs, Path.of("/file"), userScope.root()); UnixPath unixPath = new UnixPath(containerPath); unixPath.writeUtf8File("hello"); assertOwnership(containerPath, 0, 0, 10000, 11000); @@ -61,16 +60,20 @@ class ContainerFileSystemTest { assertOwnership(containerPath, 500, 200, 10500, 11200); // Owner should not have been updated as the file already existed assertEquals("hello world", unixPath.readUtf8File()); + + unixPath.deleteIfExists(); + new UnixPath(containerPath.withUser(userScope.vespa())).writeUtf8File("test123"); + assertOwnership(containerPath, 1000, 100, 11000, 11100); } @Test public void copy() throws IOException { UnixPath hostFile = new UnixPath(fileSystem.getPath("/file")).createNewFile(); - ContainerPath destination = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest")); + ContainerPath destination = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest"), userScope.root()); // If file is copied to JimFS path, the UID/GIDs are not fixed Files.copy(hostFile.toPath(), destination.pathOnHost()); - assertEquals(String.valueOf(userNamespace.overflowId()), Files.getOwner(destination).getName()); + assertEquals(String.valueOf(userScope.namespace().overflowId()), Files.getOwner(destination).getName()); Files.delete(destination); Files.copy(hostFile.toPath(), destination); @@ -87,7 +90,7 @@ class ContainerFileSystemTest { // Set owner + group and copy within ContainerFS new UnixPath(destination).setOwnerId(500).setGroupId(200); - ContainerPath destination2 = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest2")); + ContainerPath destination2 = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest2"), userScope.root()); Files.copy(destination, destination2, StandardCopyOption.COPY_ATTRIBUTES, StandardCopyOption.REPLACE_EXISTING); assertOwnership(destination2, 500, 200, 10500, 11200); } @@ -95,11 +98,11 @@ class ContainerFileSystemTest { @Test public void move() throws IOException { UnixPath hostFile = new UnixPath(fileSystem.getPath("/file")).createNewFile(); - ContainerPath destination = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest")); + ContainerPath destination = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest"), userScope.root()); // If file is moved to JimFS path, the UID/GIDs are not fixed Files.move(hostFile.toPath(), destination.pathOnHost()); - assertEquals(String.valueOf(userNamespace.overflowId()), Files.getOwner(destination).getName()); + assertEquals(String.valueOf(userScope.namespace().overflowId()), Files.getOwner(destination).getName()); Files.delete(destination); hostFile.createNewFile(); @@ -118,21 +121,21 @@ class ContainerFileSystemTest { // Set owner + group and move within ContainerFS new UnixPath(destination).setOwnerId(500).setGroupId(200); - ContainerPath destination2 = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest2")); + ContainerPath destination2 = ContainerPath.fromPathInContainer(containerFs, Path.of("/dest2"), userScope.root()); Files.move(destination, destination2, StandardCopyOption.COPY_ATTRIBUTES, StandardCopyOption.REPLACE_EXISTING); assertOwnership(destination2, 500, 200, 10500, 11200); } @Test public void symlink() throws IOException { - ContainerPath source = ContainerPath.fromPathInContainer(containerFs, Path.of("/src")); + ContainerPath source = ContainerPath.fromPathInContainer(containerFs, Path.of("/src"), userScope.root()); // Symlink from ContainerPath to some relative path (different FS provider) Files.createSymbolicLink(source, fileSystem.getPath("../relative/target")); assertEquals(fileSystem.getPath("../relative/target"), Files.readSymbolicLink(source)); Files.delete(source); // Symlinks from ContainerPath to a ContainerPath: Target is resolved within container with base FS provider - Files.createSymbolicLink(source, ContainerPath.fromPathInContainer(containerFs, Path.of("/path/in/container"))); + Files.createSymbolicLink(source, ContainerPath.fromPathInContainer(containerFs, Path.of("/path/in/container"), userScope.root())); assertEquals(fileSystem.getPath("/path/in/container"), Files.readSymbolicLink(source)); assertOwnership(source, 0, 0, 10000, 11000); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPathTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPathTest.java index 768db980432..95696798c43 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPathTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerPathTest.java @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.task.util.fs; -import com.yahoo.vespa.hosted.node.admin.nodeagent.UserNamespace; -import com.yahoo.vespa.hosted.node.admin.nodeagent.VespaUser; +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; import com.yahoo.vespa.test.file.TestFileSystem; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -14,8 +14,6 @@ import java.nio.file.Files; import java.nio.file.LinkOption; import java.nio.file.Path; -import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath.fromPathInContainer; -import static com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath.fromPathOnHost; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; @@ -28,28 +26,28 @@ import static org.mockito.Mockito.mock; class ContainerPathTest { private final FileSystem baseFs = TestFileSystem.create(); - private final ContainerFileSystem containerFs = ContainerFileSystem.create(baseFs.getPath("/data/storage/ctr1"), mock(UserNamespace.class), mock(VespaUser.class)); + private final ContainerFileSystem containerFs = ContainerFileSystem.create(baseFs.getPath("/data/storage/ctr1"), mock(UserScope.class)); @Test public void create_new_container_path() { - ContainerPath path = fromPathInContainer(containerFs, Path.of("/opt/vespa//logs/./file")); + ContainerPath path = fromPathInContainer(Path.of("/opt/vespa//logs/./file")); assertPaths(path, "/data/storage/ctr1/opt/vespa/logs/file", "/opt/vespa/logs/file"); - path = fromPathOnHost(containerFs, baseFs.getPath("/data/storage/ctr1/opt/vespa/logs/file")); + path = fromPathOnHost(baseFs.getPath("/data/storage/ctr1/opt/vespa/logs/file")); assertPaths(path, "/data/storage/ctr1/opt/vespa/logs/file", "/opt/vespa/logs/file"); - path = fromPathOnHost(containerFs, baseFs.getPath("/data/storage/ctr2/..////./ctr1/./opt")); + path = fromPathOnHost(baseFs.getPath("/data/storage/ctr2/..////./ctr1/./opt")); assertPaths(path, "/data/storage/ctr1/opt", "/opt"); - assertThrows(() -> fromPathInContainer(containerFs, Path.of("relative/path")), "Path in container must be absolute: relative/path"); - assertThrows(() -> fromPathOnHost(containerFs, baseFs.getPath("relative/path")), "Paths have different roots: /data/storage/ctr1, relative/path"); - assertThrows(() -> fromPathOnHost(containerFs, baseFs.getPath("/data/storage/ctr2")), "Path /data/storage/ctr2 is not under container root /data/storage/ctr1"); - assertThrows(() -> fromPathOnHost(containerFs, baseFs.getPath("/data/storage/ctr1/../ctr2")), "Path /data/storage/ctr2 is not under container root /data/storage/ctr1"); + assertThrows(() -> fromPathInContainer(Path.of("relative/path")), "Path in container must be absolute: relative/path"); + assertThrows(() -> fromPathOnHost(baseFs.getPath("relative/path")), "Paths have different roots: /data/storage/ctr1, relative/path"); + assertThrows(() -> fromPathOnHost(baseFs.getPath("/data/storage/ctr2")), "Path /data/storage/ctr2 is not under container root /data/storage/ctr1"); + assertThrows(() -> fromPathOnHost(baseFs.getPath("/data/storage/ctr1/../ctr2")), "Path /data/storage/ctr2 is not under container root /data/storage/ctr1"); } @Test public void container_path_operations() { - ContainerPath path = fromPathInContainer(containerFs, Path.of("/opt/vespa/logs/file")); + ContainerPath path = fromPathInContainer(Path.of("/opt/vespa/logs/file")); ContainerPath parent = path.getParent(); assertPaths(path.getRoot(), "/data/storage/ctr1", "/"); assertPaths(parent, "/data/storage/ctr1/opt/vespa/logs", "/opt/vespa/logs"); @@ -72,7 +70,7 @@ class ContainerPathTest { @Test public void resolution() { - ContainerPath path = fromPathInContainer(containerFs, Path.of("/opt/vespa/logs")); + ContainerPath path = fromPathInContainer(Path.of("/opt/vespa/logs")); assertPaths(path.resolve(Path.of("/root")), "/data/storage/ctr1/root", "/root"); assertPaths(path.resolve(Path.of("relative")), "/data/storage/ctr1/opt/vespa/logs/relative", "/opt/vespa/logs/relative"); assertPaths(path.resolve(Path.of("/../../../dir2/../../../dir2")), "/data/storage/ctr1/dir2", "/dir2"); @@ -84,7 +82,7 @@ class ContainerPathTest { @Test public void resolves_real_paths() throws IOException { - ContainerPath path = fromPathInContainer(containerFs, Path.of("/opt/vespa/logs")); + ContainerPath path = fromPathInContainer(Path.of("/opt/vespa/logs")); Files.createDirectories(path.pathOnHost().getParent()); Files.createFile(baseFs.getPath("/data/storage/ctr1/opt/vespa/target1")); @@ -103,6 +101,13 @@ class ContainerPathTest { assertThrows(path::toRealPath, "Path /data/storage/ctr2 is not under container root /data/storage/ctr1"); } + private ContainerPath fromPathInContainer(Path pathInContainer) { + return ContainerPath.fromPathInContainer(containerFs, pathInContainer, UnixUser.ROOT); + } + private ContainerPath fromPathOnHost(Path pathOnHost) { + return ContainerPath.fromPathOnHost(containerFs, pathOnHost, UnixUser.ROOT); + } + private static void assertPaths(ContainerPath actual, String expectedPathOnHost, String expectedPathInContainer) { assertEquals(expectedPathOnHost, actual.pathOnHost().toString()); assertEquals(expectedPathInContainer, actual.pathInContainer()); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupServiceTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupServiceTest.java index 40db3250ca3..72eec92cf53 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupServiceTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/fs/ContainerUserPrincipalLookupServiceTest.java @@ -2,7 +2,8 @@ package com.yahoo.vespa.hosted.node.admin.task.util.fs; import com.yahoo.vespa.hosted.node.admin.nodeagent.UserNamespace; -import com.yahoo.vespa.hosted.node.admin.nodeagent.VespaUser; +import com.yahoo.vespa.hosted.node.admin.nodeagent.UserScope; +import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixUser; import com.yahoo.vespa.test.file.TestFileSystem; import org.junit.jupiter.api.Test; @@ -19,10 +20,9 @@ import static org.junit.jupiter.api.Assertions.assertThrows; */ class ContainerUserPrincipalLookupServiceTest { - private final UserNamespace userNamespace = new UserNamespace(10_000, 11_000, 10000); - private final VespaUser vespaUser = new VespaUser("vespa", "users", 1000, 100); + private final UserScope userScope = UserScope.create(new UnixUser("vespa", 1000, "users", 100), new UserNamespace(10_000, 11_000, 10000)); private final ContainerUserPrincipalLookupService userPrincipalLookupService = - new ContainerUserPrincipalLookupService(TestFileSystem.create().getUserPrincipalLookupService(), userNamespace, vespaUser); + new ContainerUserPrincipalLookupService(TestFileSystem.create().getUserPrincipalLookupService(), userScope); @Test public void correctly_resolves_ids() throws IOException { diff --git a/parent/pom.xml b/parent/pom.xml index 05b3d671281..7cdc57244b4 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -642,6 +642,11 @@ <version>${apache.httpclient5.version}</version> </dependency> <dependency> + <groupId>org.apache.httpcomponents.core5</groupId> + <artifactId>httpcore5</artifactId> + <version>${apache.httpclient5.version}</version> + </dependency> + <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpmime</artifactId> <version>${apache.httpclient.version}</version> @@ -873,7 +878,7 @@ <antlr4.version>4.5</antlr4.version> <apache.httpclient.version>4.5.13</apache.httpclient.version> <apache.httpcore.version>4.4.13</apache.httpcore.version> - <apache.httpclient5.version>5.1</apache.httpclient5.version> + <apache.httpclient5.version>5.1.1</apache.httpclient5.version> <asm.version>9.2</asm.version> <!-- Athenz dependencies. Make sure these dependencies match those in Vespa's internal repositories --> diff --git a/sd-plugin/BACKLOG.md b/sd-plugin/BACKLOG.md new file mode 100644 index 00000000000..19e185dcc6b --- /dev/null +++ b/sd-plugin/BACKLOG.md @@ -0,0 +1,34 @@ +### Open Issues + +1. In some cases, the grammar prefers not to enforce bad syntax, because if the parser encounters bad syntax it stops +and can't build the PSI tree. That means none of the features will work for a file like that. For example, in cases +where an element supposes to have zero-to-one occurrences, the grammar will treat it as zero-to-many. +2. In order to enable the grammar recognize some keywords as identifiers (e.g. "filter" as a field's name), the +identifier rule (named "IdentifierVal") wraps the regex (ID_REG) and the KeywordOrIdentifier rule (which contains all +the keywords in the language). +3. The implementation of the GoTo Declaration feature is not exactly the same as IntelliJ. In IntelliJ if a reference +has several declarations, after clicking "Goto Declaration" there is a little window with all the declarations to choose +from. It can be done by changing the method "multiResolve" in SdReference.java to return more than one declaration. The +problem with that is that it causes the "Find Usages" feature to not work. For now, I decided to make the plugin +"Goto Declaration" feature show only the most specific declaration by the right rank-profile scope. +4. The "Find Usages" window can group usages only under rank-profiles and document-summaries. Other usages appear +directly under the .sd file. In order to create another group type of usages' group, you'll need to create 2 classes: +one for the extension "fileStructureGroupRuleProvider" (e.g. SdRankProfileGroupingRuleProvider.java), and one for the +grouping rule itself (e.g. SdRankProfileGroupingRule.java). +Another open problem is that the navigation isn't working in the current grouping rules. It means that when clicking on +the group headline (e.g. some name of a rank-profile) the IDE doesn't "jump" to the matching declaration. +5. Goto declaration doesn't work for document's inherits. e.g. if document A inherits from document B, B doesn't have a +reference to its declaration. +6. There aren't any tests for the plugin. + +### Some useful links: +1. JetBrains official tutorials: https://plugins.jetbrains.com/docs/intellij/custom-language-support.html and +https://plugins.jetbrains.com/docs/intellij/custom-language-support-tutorial.html +2. Grammar-Kit HOWTO: Helps to understand the BNF syntax. + https://github.com/JetBrains/Grammar-Kit/blob/master/HOWTO.md +3. How to deal with left-recursion in the grammar (in SD for example it happens in expressions). Last answer here: +https://intellij-support.jetbrains.com/hc/en-us/community/posts/360001258300-What-s-the-alternative-to-left-recursion-in-GrammarKit- +4. Great tutorial for a custom-language-plugin, but only for the basics (mainly the parser and lexer): + https://medium.com/@shan1024/custom-language-plugin-development-for-intellij-idea-part-01-d6a41ab96bc9 +5. Code of Dart (some custom language) plugin for IntelliJ: +https://github.com/JetBrains/intellij-plugins/tree/0f07ca63355d5530b441ca566c98f17c560e77f8/Dart
\ No newline at end of file diff --git a/sd-plugin/README.md b/sd-plugin/README.md index 74ca1781596..1cab356f78e 100644 --- a/sd-plugin/README.md +++ b/sd-plugin/README.md @@ -1,25 +1,42 @@ <!-- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> -This directory holds the code for an IntteliJ plugin for reading SD files. +This directory holds the code for an IntelliJ plugin for reading SD files. -NOTE: This is the source code, not the plugin itself. In order to be able to use the plugin you'll need to download it from JetBrains Marketplace or create a zip file and load it to IntelliJ (details later). +NOTE: This is the source code, not the plugin itself. In order to be able to use the plugin you'll need to download it +from JetBrains Marketplace or create a zip file and load it to IntelliJ by choosing "Install Plugin from Disk". -Before cloning, you should download Gradle and create a Gradle project. -You should also download Grammar-Kit plugin from the Marketplace. +Before starting, you should: +1. Download Gradle 7 (if you don't have it already). +2. Make sure that the bundled Plugin DevKit plugin is enabled (inside IntelliJ). +3. Optional- Download Grammar-Kit plugin from JetBrains Marketplace (inside IntelliJ). It helps with reading the .bnf file. +4. Optional- Download PsiViewer plugin from JetBrains Marketplace (inside IntelliJ). It helps to test the grammar defined +in the .bnf file. +### Working Process The grammar is defined in 2 files: - sd.bnf - sd.flex -After cloning, you should: -1. Right-click the sd.bnf file and press "Generate Parser Code" -2. Right-click the sd.flex file and press "Run JFlex Generator" - -Now you should have a "gen" folder next to the "java" folder, and it contains all the parser and lexer code. +In order to generate the lexer and parser's code, you need to run in the command line: -Important note! After any change in one of this 2 files (bnf, flex) you'll need to generate again. The proper way is to delete the "gen" folder and then do 1-2 again. + ./gradlew generateSdParser + ./gradlew generateSdLexer -Now, you can run the gradle task "intellij/runIde", open a project with some sd file and see how the plugin works on it. +You should now have a "gen" folder next to the "java" folder, and it contains all the parser and lexer code. -In order to test the plugin locally (on you IDE, not by running the gradle task "runIde"), you can run the gradle task -"intellij/buildPlugin". It would create a zip file in the directory build\distributions. You can load it to IntelliJ by -clicking the "settings" in preferences/Plugins and click "Install Plugin from disk".
\ No newline at end of file +NOTE- Running those tasks would reset the "gen" folder, and all the previous generated files would be deleted before the +new ones would be generated. + +Now, you can run the gradle task "intellij/runIde" (or "./gradlew runIde" in the command line), open a project with some sd file and see how the plugin works on it. + +### Build the Plugin +In order to build the plugin and create a zip file from it, you should run the command: + + ./gradlew buildPlugin + +Or since it's a default task you can just run: + + ./gradlew + +This task also invokes the tasks generateSdParser and generateSdLexer as a part of the building process. + +Now, you'll have a zip file in the directory build\distributions. diff --git a/sd-plugin/build.gradle b/sd-plugin/build.gradle index 0a4798032d9..e5b39769b96 100644 --- a/sd-plugin/build.gradle +++ b/sd-plugin/build.gradle @@ -38,7 +38,7 @@ compileJava { group 'org.yahoo.native' -version '1.0.1' +version '1.0.2' sourceCompatibility = 11 diff --git a/sd-plugin/src/main/java/org/intellij/sdk/language/SdSyntaxHighlighter.java b/sd-plugin/src/main/java/org/intellij/sdk/language/SdSyntaxHighlighter.java index 46cbffe0aa8..e6452685e3c 100644 --- a/sd-plugin/src/main/java/org/intellij/sdk/language/SdSyntaxHighlighter.java +++ b/sd-plugin/src/main/java/org/intellij/sdk/language/SdSyntaxHighlighter.java @@ -173,6 +173,7 @@ public class SdSyntaxHighlighter extends SyntaxHighlighterBase { constants.add(SdTypes.SUMMARY); constants.add(SdTypes.INDEX); constants.add(SdTypes.SET_LANGUAGE); + constants.add(SdTypes.LOWERCASE); constants.add(SdTypes.FAST_SEARCH); constants.add(SdTypes.FAST_ACCESS); constants.add(SdTypes.PAGED); diff --git a/sd-plugin/src/main/java/org/intellij/sdk/language/lexer/sd.flex b/sd-plugin/src/main/java/org/intellij/sdk/language/lexer/sd.flex index 5368551b81b..585a6672189 100644 --- a/sd-plugin/src/main/java/org/intellij/sdk/language/lexer/sd.flex +++ b/sd-plugin/src/main/java/org/intellij/sdk/language/lexer/sd.flex @@ -32,12 +32,12 @@ ID_WITH_DASH = [a-zA-Z_][a-zA-Z0-9_-]* WHITE_SPACE=[ \t\n\x0B\f\r]+ COMMENT=#.* -SYMBOL= [|:{}(),.\[\]] +SYMBOL= [!$|:{}(),.\[\]] INTEGER = [0-9]+ FLOAT = {INTEGER}[.][0-9]+[e]? COMPARISON_OPERATOR = [<>]|(==)|(<=)|(>=)|(\~=) ARITHMETIC_OPERATOR = [\-+*/] -STRING = [\"][^\"\n]*[\"] +STRING = \"([^\"\\]*(\\.[^\"\\]*)*)\" WORD = \w+ @@ -109,17 +109,21 @@ WORD = \w+ "rank" { return RANK; } "filter" { return FILTER; } "normal" { return NORMAL; } + "literal" { return LITERAL; } "indexing-rewrite" { return INDEXING_REWRITE; } "none" { return NONE; } "query-command" { return QUERY_COMMAND; } "full" { return FULL; } - "dinamic" { return DYNAMIC; } + "static" { return STATIC; } + "dynamic" { return DYNAMIC; } "source" { return SOURCE; } "to" { return TO; } "matched-elements-only" { return MATCHED_ELEMENTS_ONLY; } "input" { return INPUT; } "mutable" { return MUTABLE; } + "enable-bit-vectors" { return ENABLE_BIT_VECTORS; } + "enable-only-bit-vector" { return ENABLE_ONLY_BIT_VECTOR; } "document-summary" { return DOCUMENT_SUMMARY; } "from-disk" { return FROM_DISK; } "omit-summary-features" { return OMIT_SUMMARY_FEATURES; } @@ -127,10 +131,12 @@ WORD = \w+ "as" { return AS; } "rank-profile" { return RANK_PROFILE; } + "model" { return MODEL; } "match-phase" { return MATCH_PHASE; } "order" { return ORDER; } "ascending" { return ASCENDING; } "descending" { return DESCENDING; } + "locale" { return LOCALE; } "max-hits" { return MAX_HITS; } "diversity" { return DIVERSITY; } "min-groups" { return MIN_GROUPS; } @@ -154,6 +160,7 @@ WORD = \w+ "constants" { return CONSTANTS; } "second-phase" { return SECOND_PHASE; } "rerank-count" { return RERANK_COUNT; } + "rank-features" { return RANK_FEATURES; } "weight" { return WEIGHT; } "index" { return INDEX; } @@ -203,6 +210,10 @@ WORD = \w+ "body" { return BODY; } "header" { return HEADER; } + "summary-to" { return SUMMARY_TO; } + + "evaluation-point" { return EVALUATION_POINT; } + "pre-post-filter-tipping-point" { return PRE_POST_FILTER_TIPPING_POINT; } // In here, we check for character sequences which matches regular expressions defined above. {ID} { return ID_REG; } diff --git a/sd-plugin/src/main/java/org/intellij/sdk/language/parser/sd.bnf b/sd-plugin/src/main/java/org/intellij/sdk/language/parser/sd.bnf index 243e177dbdd..67bdca7d2dc 100644 --- a/sd-plugin/src/main/java/org/intellij/sdk/language/parser/sd.bnf +++ b/sd-plugin/src/main/java/org/intellij/sdk/language/parser/sd.bnf @@ -28,21 +28,21 @@ NOTE: This grammar does not enforce zero-or-one occurrences of elements (treats ID_WITH_DASH_REG = 'regexp:[a-zA-Z_][a-zA-Z0-9_-]*' WHITE_SPACE = 'regexp:\s+' COMMENT = 'regexp:#.*' - SYMBOL = 'regexp:[|:{}(),.\[\]]' + SYMBOL = 'regexp:[!$|:{}(),.\[\]]' COMPARISON_OPERATOR = 'regexp:[<>]|(==)|(<=)|(>=)|(~=)' ARITHMETIC_OPERATOR = 'regexp:[\-+*/]' INTEGER_REG = 'regexp:[0-9]+' FLOAT_REG = 'regexp:[0-9]+[.][0-9]+[e]?' - STRING_REG = 'regexp:[\"][^\"]*[\"]' + STRING_REG = 'regexp:\"([^\"\\]*(\\.[^\"\\]*)*)\"' WORD_REG = 'regexp:\w+' ] } -SdFile ::= SchemaDefinition -SchemaDefinition ::= (search | schema) IdentifierVal? '{' SchemaBody '}' +SdFile ::= SchemaDefinition | DocumentDefinition +SchemaDefinition ::= (search | schema) IdentifierVal? (inherits IdentifierVal)? '{' SchemaBody '}' SchemaBody ::= SchemaBodyOptions* DocumentDefinition SchemaBodyOptions* // Does not support zero-or-one occurrences private SchemaBodyOptions ::= SchemaFieldDefinition | ImportFieldDefinition | DocumentSummaryDefinition | - RankProfileDefinition | + RankProfileDefinition | IndexDefinition | FieldSetDefinition | ConstantDefinition | OnnxModelDefinition | StemmingDefinition | raw-as-base64-in-summary | SchemaAnnotationDefinition @@ -54,18 +54,13 @@ SchemaFieldDefinition ::= field IdentifierVal type FieldTypeName '{' SchemaField FieldTypeName ::= ("array" '<' (FieldTypeName | IdentifierVal) '>') | ("weightedset" '<' SingleValueFieldTypeName '>') | ("map" '<' (FieldTypeName | IdentifierVal) ',' (FieldTypeName | IdentifierVal) '>') | TensorType | - SingleValueFieldTypeName + (SingleValueFieldTypeName '[' ']') | SingleValueFieldTypeName private SingleValueFieldTypeName ::= "string" | "int" | "long" | "bool" | "byte" | "float" | "double" | "position" | "predicate" | "raw" | "uri" | "reference" '<' IdentifierVal '>' | "annotationreference" '<' IdentifierVal '>' | IdentifierVal -private TensorType ::= "tensor" '<' ("float" | "double" | "int8" | "bfloat16") '>' '(' TensorDimension (',' TensorDimension)* ')' -private TensorDimension ::= WORD_REG ('{' '}') | ('[' INTEGER_REG ']') +private TensorType ::= "tensor" ('<' ("float" | "double" | "int8" | "bfloat16") '>')? '(' TensorDimension (',' TensorDimension)* ')' +private TensorDimension ::= WordWrapper (('{' '}') | ('[' INTEGER_REG ']')) -SchemaFieldBody ::= SchemaFieldBodyOptions* // Does not support zero-or-one occurrences -SchemaFieldBodyOptions ::= SchemaFieldIndexingDefinition | AttributeDefinition | RankDefinition | IndexingRewriteState | - MatchDefinition | StructFieldDefinition | QueryCommandDefinition - -SchemaFieldIndexingDefinition ::= indexing (':' SchemaFieldIndexingStatement) | ('{' SchemaFieldIndexingStatement+ '}') -SchemaFieldIndexingStatement ::= (input IdentifierVal) | IndexingStatement +SchemaFieldBody ::= DocumentFieldBodyOptions* // Fields of schemas and documents defined the same way here DocumentSummaryDefinition ::= document-summary IdentifierWithDashVal (inherits IdentifierWithDashVal)? '{' DocumentSummaryBody '}' { mixin="org.intellij.sdk.language.psi.impl.SdNamedElementImpl" @@ -86,7 +81,7 @@ private FieldSetBodyOptions ::= (fields ':' IdentifierVal (',' IdentifierVal)*) ConstantDefinition ::= constant IdentifierVal '{' ConstantBody '}' ConstantBody ::= ConstantBodyOptions* private ConstantBodyOptions ::= (file ':' FilePath) | (uri ':' UriPath) | (type ':' TensorType) -private FilePath ::= (IdentifierVal | WORD_REG) ('.' | '/' | IdentifierWithDashVal | WORD_REG)+ +private FilePath ::= WordWrapper (('.' | '/') WordWrapper)+ private UriPath ::= ('H'|'h') ('T'|'t') ('T'|'t') ('P'|'p') ('S'|'s')? ':' ('//')? (IdentifierWithDashVal | '.' | '/' | ':')+ @@ -100,7 +95,7 @@ SchemaAnnotationDefinition ::= AnnotationDefinition implements=["org.intellij.sdk.language.psi.SdDeclaration" "org.intellij.sdk.language.psi.SdNamedElement"] } -private AnnotationDefinition ::= annotation IdentifierVal '{' AnnotationFieldDefinition* '}' +private AnnotationDefinition ::= annotation IdentifierVal (inherits IdentifierVal)? '{' AnnotationFieldDefinition* '}' AnnotationFieldDefinition ::= field IdentifierVal type FieldTypeName '{' '}' { mixin="org.intellij.sdk.language.psi.impl.SdNamedElementImpl" implements=["org.intellij.sdk.language.psi.SdDeclaration" "org.intellij.sdk.language.psi.SdNamedElement"] @@ -109,11 +104,13 @@ AnnotationFieldDefinition ::= field IdentifierVal type FieldTypeName '{' '}' //------------------------- //--- Expressions rules --- //------------------------- -RankingExpression ::= ParenthesisedExpr | BooleanExpr |ArithmeticExpr | IfFunctionExpr | - QueryDefinitionExpr | FunctionCallExpr | PrimitiveExpr +RankingExpression ::= FilePathExpr | ParenthesisedExpr | BooleanExpr | ArithmeticExpr | IfFunctionExpr | + QueryDefinitionExpr | FunctionCallExpr | InListRankingExpr | PrimitiveExpr -IfFunctionExpr ::= "if" '(' (InListRankingExpression | RankingExpression) ',' RankingExpression ',' RankingExpression ')' -InListRankingExpression ::= RankingExpression "in" '[' RankingExpression (',' RankingExpression)* ']' +FilePathExpr ::= file ':' (FilePath | WordWrapper) + +IfFunctionExpr ::= "if" '(' (InListRankingExpr | RankingExpression) ',' RankingExpression ',' RankingExpression ')' +InListRankingExpr ::= RankingExpression "in" '[' RankingExpression (',' RankingExpression)* ']' BooleanExpr ::= RankingExpression COMPARISON_OPERATOR RankingExpression @@ -125,41 +122,42 @@ FunctionCallExpr ::= IdentifierWithDashVal '(' RankingExpression (',' RankingExp ParenthesisedExpr ::= '(' RankingExpression ')' -PrimitiveExpr ::= (('-')? INTEGER_REG) | (('-')? FLOAT_REG) | IdentifierVal | RankFeature +PrimitiveExpr ::= (('-')? INTEGER_REG) | (('-')? FLOAT_REG) | IdentifierVal | RankFeature | STRING_REG //------------------------- //-- Rank Profile rules --- //------------------------- -RankProfileDefinition ::= rank-profile IdentifierWithDashVal (inherits IdentifierWithDashVal)? '{' RankProfileBody '}' +RankProfileDefinition ::= (rank-profile | model) IdentifierWithDashVal (inherits IdentifierWithDashVal)? '{' RankProfileBody '}' { mixin="org.intellij.sdk.language.psi.impl.SdNamedElementImpl" implements=["org.intellij.sdk.language.psi.SdDeclaration"] } private RankProfileBody ::= RankProfileBodyOptions* // Does not support zero-or-one occurrences private RankProfileBodyOptions ::= MatchPhaseDefinition | NumThreadsDefinition | FunctionDefinition | TermwiseLimitDefinition | ignore-default-rank-features | RankPropertiesDefinition | FirstPhaseDefinition | - SummaryFeaturesDefinition | MatchFeaturesDefinition | RankFeaturesDefinition | - SecondPhaseDefinition | ConstantsDefinition | - RankDefinition | RankTypeDefinition | MinHitsDefinition | NumSearchPartitionDefinition + SummaryFeaturesDefinition | MatchFeaturesDefinition | RankFeaturesDefinition | + SecondPhaseDefinition | ConstantsDefinition | RankDefinition | RankTypeDefinition | + MinHitsDefinition | NumSearchPartitionDefinition | FieldWeightDefinition MatchPhaseDefinition ::= match-phase '{' MatchPhaseBody '}' MatchPhaseBody ::= MatchPhaseBodyOptions+ -MatchPhaseBodyOptions ::= (attribute ':' IdentifierVal (order ':' (ascending | descending))?) | (max-hits ':' INTEGER_REG) - | DiversityDefinition // Does not support zero-or-one occurrences +MatchPhaseBodyOptions ::= (attribute ':' IdentifierVal) | (order ':' (ascending | descending)) | (max-hits ':' ('-')? INTEGER_REG) + | DiversityDefinition | (evaluation-point ':' ('-')? FLOAT_REG) | + (pre-post-filter-tipping-point ':' ('-')? FLOAT_REG) // Does not support zero-or-one occurrences DiversityDefinition ::= diversity '{' DiversityBody '}' DiversityBody ::= DiversityBodyOptions* -private DiversityBodyOptions ::= (attribute ':' IdentifierVal) | (min-groups ':' INTEGER_REG) | (cutoff-factor ':' FLOAT_REG) | +private DiversityBodyOptions ::= (attribute ':' IdentifierVal) | (min-groups ':' ('-')? INTEGER_REG) | (cutoff-factor ':' ('-')? FLOAT_REG) | (cutoff-strategy ':' (strict | loose)) private NumThreadsDefinition ::= num-threads-per-search ':' INTEGER_REG -private TermwiseLimitDefinition ::= termwise-limit ':' (FLOAT_REG | INTEGER_REG) -private MinHitsDefinition ::= min-hits-per-thread ':' INTEGER_REG +private TermwiseLimitDefinition ::= termwise-limit ':' ('-')? (FLOAT_REG | INTEGER_REG) +private MinHitsDefinition ::= min-hits-per-thread ':' ('-')? INTEGER_REG private NumSearchPartitionDefinition ::= num-search-partition ':' INTEGER_REG +FieldWeightDefinition ::= weight IdentifierVal ':' INTEGER_REG FirstPhaseDefinition ::= first-phase '{' FirstPhaseBody '}' { mixin="org.intellij.sdk.language.psi.impl.SdFirstPhaseDefinitionMixin" } FirstPhaseBody ::= FirstPhaseBodyOptions* // Does not support zero-or-one occurrences -private FirstPhaseBodyOptions ::= (keep-rank-count ':' INTEGER_REG) | (rank-score-drop-limit ':' (FLOAT_REG | INTEGER_REG)) | ExpressionDefinition +private FirstPhaseBodyOptions ::= (keep-rank-count ':' INTEGER_REG) | (rank-score-drop-limit ':' ('-')? (FLOAT_REG | INTEGER_REG)) | ExpressionDefinition -ExpressionDefinition ::= expression ((':' RankingExpression) | ('{' RankingExpression* '}') | - (':' file ':' FilePath)) +ExpressionDefinition ::= expression ((':' RankingExpression) | ('{' RankingExpression* '}')) SecondPhaseDefinition ::= second-phase '{' SecondPhaseBody '}' SecondPhaseBody ::= SecondPhaseBodyOptions* @@ -167,7 +165,7 @@ private SecondPhaseBodyOptions ::= (rerank-count ':' INTEGER_REG) | ExpressionDe RankPropertiesDefinition ::= rank-properties '{' RankPropertiesBody '}' RankPropertiesBody ::= (RankPropertiesKey ':' RankPropertiesValue)+ -RankPropertiesKey ::= (IdentifierWithDashVal | STRING_REG | '(' | ')' | '.' | ',')+ +RankPropertiesKey ::= (IdentifierWithDashVal | STRING_REG | '(' | ')' | '.' | ',' | '$' | INTEGER_REG)+ RankPropertiesValue ::= (('-')? INTEGER_REG) | (('-')? FLOAT_REG) | WORD_REG | IdentifierVal | STRING_REG FunctionDefinition ::= (function | macro) inline? IdentifierVal '(' (ArgumentDefinition (',' ArgumentDefinition)*)? ')' @@ -182,9 +180,9 @@ ArgumentDefinition ::= IdentifierVal SummaryFeaturesDefinition ::= summary-features ((':' RankFeature+) | ((inherits IdentifierVal)? '{' RankFeature* '}')) -MatchFeaturesDefinition ::= match-features (':' RankFeature+) | ('{' RankFeature* '}') +MatchFeaturesDefinition ::= match-features ((':' RankFeature+) | ('{' RankFeature* '}')) -RankFeaturesDefinition ::= rank-features (':' RankFeature+) | ('{' RankFeature* '}') +RankFeaturesDefinition ::= rank-features ((':' RankFeature+) | ('{' RankFeature* '}')) ConstantsDefinition ::= constants '{' (IdentifierVal ':' RankPropertiesValue)* '}' @@ -234,14 +232,13 @@ DocumentFieldDefinition ::= field IdentifierVal type FieldTypeName '{' DocumentF DocumentFieldBody ::= DocumentFieldBodyOptions* // Does not support zero-or-one occurrences private DocumentFieldBodyOptions ::= StructFieldDefinition | MatchDefinition | IndexingDefinition | AttributeDefinition | - AliasDef | RankDefinition | IndexingRewriteState | QueryCommandDefinition | SummaryDefinition | + AliasDefinition | RankDefinition | IndexingRewriteState | QueryCommandDefinition | SummaryDefinition | BoldingDefinition | (id ':' INTEGER_REG) | IndexDefinition | (normalizing ':' IdentifierWithDashVal) | SortingDefinition | StemmingDefinition | (weight ':' INTEGER_REG) | WeightedSetDefinition | - RankTypeDefinition | DictionaryDefinition - + RankTypeDefinition | DictionaryDefinition | SummaryToDefinition | body //***** Field's body elements ******// // Struct -StructFieldDefinition ::= struct-field IdentifierVal '{' StructFieldBody '}' +StructFieldDefinition ::= struct-field IdentifierVal ('.' IdentifierVal)? '{' StructFieldBody '}' { mixin="org.intellij.sdk.language.psi.impl.SdNamedElementImpl" implements=["org.intellij.sdk.language.psi.SdDeclaration" "org.intellij.sdk.language.psi.SdNamedElement"] } @@ -251,61 +248,66 @@ StructFieldBodyOptions ::= IndexingDefinition | AttributeDefinition | MatchDefin StructFieldDefinition | SummaryDefinition // Match MatchDefinition ::= match ((':' MatchProperty) | ('{' MatchProperty+ '}')) -MatchProperty ::= text | exact | exact-terminator | word | prefix | cased | uncased | substring | suffix | max-length | - gram | gram-size +MatchProperty ::= text | exact | (exact-terminator ':' STRING_REG) | word | prefix | cased | uncased | substring | + suffix | (max-length ':' INTEGER_REG) | gram | (gram-size ':' INTEGER_REG) | WordWrapper // Indexing -IndexingDefinition ::= indexing (':' IndexingStatement) | ('{' IndexingStatement+ '}') -IndexingStatement ::= IndexingStatementOptions (('|' IndexingStatementOptions)*) | ((';' IndexingStatementOptions)*) +IndexingDefinition ::= indexing ((':' IndexingStatement) | ('{' IndexingStatement+ '}')) +IndexingStatement ::= IndexingStatementOptions ((('|' | ';') IndexingStatementOptions)*) // Does not support zero-or-one occurrences -IndexingStatementOptions ::= summary | attribute | index | set_language +IndexingStatementOptions ::= summary | attribute | index | set_language | lowercase | (input (IdentifierVal | IndexingStuff)+) | + ('{' IndexingStatementOptions '}') | IndexingStuff+ +private IndexingStuff ::= WordWrapper | INTEGER_REG | FLOAT_REG | STRING_REG | ('{' IndexingStatementOptions+ '}') | + ':' | ('|' IndexingStatementOptions) | ';' | '.' | '(' | ')' | ARITHMETIC_OPERATOR | COMPARISON_OPERATOR // Attribute -AttributeDefinition ::= attribute ((':' SimpleAttributeProperty) | ('{' (SimpleAttributeProperty | ComplexAttributeProperty)+ '}')) -SimpleAttributeProperty ::= fast-search | fast-access | paged | mutable // Does not support zero-or-one occurrences -ComplexAttributeProperty ::= AliasDef | SortingDefinition | DistanceMetricDef // Does not support zero-or-one occurrences +AttributeDefinition ::= attribute ((':' SimpleAttributeProperty) | ('{' (ComplexAttributeProperty | SimpleAttributeProperty)+ '}')) +SimpleAttributeProperty ::= fast-search | fast-access | paged | mutable | enable-bit-vectors | enable-only-bit-vector | WordWrapper // Does not support zero-or-one occurrences +ComplexAttributeProperty ::= AliasDefinition | SortingDefinition | DistanceMetricDef // Does not support zero-or-one occurrences DistanceMetricDef ::= distance-metric ':' IdentifierWithDashVal // Alias -AliasDef ::= alias IdentifierVal? ':' IdentifierWithDashVal +AliasDefinition ::= alias (IdentifierWithDashVal ('.' IdentifierWithDashVal)*)? ':' IdentifierWithDashVal ('.' IdentifierWithDashVal)* // Stemming StemmingDefinition ::= stemming ':' IdentifierWithDashVal // Rank RankDefinition ::= rank ((IdentifierVal? ':' RankingSetting) | ('{' RankingSetting '}')) -RankingSetting ::= filter | normal +RankingSetting ::= filter | normal | literal | WordWrapper // Indexing Rewrite IndexingRewriteState ::= indexing-rewrite ':' none // Query Command -QueryCommandDefinition ::= query-command ':' IdentifierVal | STRING_REG +QueryCommandDefinition ::= query-command ':' (IdentifierVal | STRING_REG | WordWrapper) // Summary -SummaryDefinition ::= summary ((':' SummaryBodyOptions) | (IdentifierWithDashVal? (type FieldTypeName)? '{' SummaryBody '}')) +SummaryDefinition ::= summary IdentifierWithDashVal? (type FieldTypeName)? ((':' SummaryBodyOptions) | ( '{' SummaryBody '}')) { mixin="org.intellij.sdk.language.psi.impl.SdSummaryDefinitionMixin" } SummaryBody ::= SummaryBodyOptions* // Does not support zero-or-one occurrences -SummaryBodyOptions ::= full | dynamic | (source ':' IdentifierVal (',' IdentifierVal)*) | - (to ':' IdentifierVal (',' IdentifierVal)*) | matched-elements-only +SummaryBodyOptions ::= full | static | dynamic | (source ':' (IdentifierVal ('.' IdentifierVal)?) (',' IdentifierVal ('.' IdentifierVal)?)*) | + (to ':' IdentifierVal (',' IdentifierVal)*) | matched-elements-only | BoldingDefinition +// Summary To +SummaryToDefinition ::= summary-to ':' WordWrapper (',' WordWrapper)* // Bolding BoldingDefinition ::= bolding ':' (on | off | true | false) // Index -IndexDefinition ::= index IdentifierVal (':' IndexProperty) | ('{' IndexProperty* '}') +IndexDefinition ::= index IdentifierVal? ((':' IndexProperty) | ('{' IndexProperty '}')) IndexProperty ::= IndexPropertyOptions* private IndexPropertyOptions ::= (alias ':' IdentifierWithDashVal) | StemmingDefinition | (arity ':' INTEGER_REG) | (lower-bound ':' INTEGER_REG ('L')?) | (upper-bound ':' INTEGER_REG ('L')?) | - (dense-posting-list-threshold ':' FLOAT_REG) | enable-bm25 | HnswDefinition + (dense-posting-list-threshold ':' FLOAT_REG) | enable-bm25 | prefix | HnswDefinition HnswDefinition ::= hnsw '{' HnswBody '}' HnswBody ::= HnswBodyOptions* private HnswBodyOptions ::= (max-links-per-node ':' INTEGER_REG) | (neighbors-to-explore-at-insert ':' INTEGER_REG) | (multi-threaded-indexing ':' (on | off | true | false)) // Sorting -SortingDefinition ::= sorting (':' SortingProperty) | ('{' SortingProperty* '}') +SortingDefinition ::= sorting ((':' SortingProperty) | ('{' SortingProperty* '}')) SortingProperty ::= ascending | descending | (function ':' SortingFunction) | (strength ':' SortingStrength) | (locale ':' IdentifierWithDashVal) SortingFunction ::= uca | raw | lowercase SortingStrength ::= primary | secondary | tertiary | quaternary | identical // Rank Type -RankTypeDefinition ::= rank-type IdentifierVal ':' IdentifierVal +RankTypeDefinition ::= rank-type IdentifierVal? ':' IdentifierVal // Weighted Set -WeightedSetDefinition ::= weightedset (':' WeightedSetProperty) | ('{' WeightedSetProperty* '}') // Does not support +WeightedSetDefinition ::= weightedset ((':' WeightedSetProperty) | ('{' WeightedSetProperty* '}')) // Does not support // zero-or-one occurrences WeightedSetProperty ::= create-if-nonexistent | remove-if-zero // Dictionary -DictionaryDefinition ::= dictionary (':' DictionarySetting) | ('{' DictionarySetting* '}') +DictionaryDefinition ::= dictionary ((':' DictionarySetting) | ('{' DictionarySetting* '}')) DictionarySetting ::= hash | btree | cased | uncased //***** End of Field's body elements ******// @@ -313,6 +315,8 @@ DictionarySetting ::= hash | btree | cased | uncased //---- Util rules ----- //--------------------- +private WordWrapper ::= KeywordOrIdentifier | KeywordNotIdentifier | ID_REG | ID_WITH_DASH_REG | WORD_REG + IdentifierVal ::= KeywordOrIdentifier | ID_REG { mixin="org.intellij.sdk.language.psi.impl.SdIdentifierMixin" implements=["org.intellij.sdk.language.psi.SdIdentifier"] } @@ -328,12 +332,12 @@ KeywordOrIdentifier ::= schema | search | document | struct | field | type | ind order | ascending | descending | diversity | constants | expression | weight | match | function | macro | inline | text | exact | word | prefix | cased | uncased | substring | suffix | gram | paged | mutable | alias | sorting | strength | locale | uca | lowercase | - primary | secondary | tertiary | quaternary | identical | rank | filter | normal | none | full | dynamic | - source | to | strict | loose | + primary | secondary | tertiary | quaternary | identical | rank | filter | normal | literal | + none | full | dynamic | source | to | strict | loose | bolding | on | off | true | false | id | normalizing | stemming | arity | hnsw | dictionary | hash | btree | fieldset | fields | constant | annotation - | attribute | body | header | index | - reference | summary | set_language + | attribute | body | header | index | static | + reference | summary | set_language | model // Note- in this form, those keywords can't be use as identifier-with-dash! KeywordNotIdentifier ::= struct-field | document-summary | omit-summary-features | from-disk | rank-profile | rank-type | @@ -345,5 +349,6 @@ KeywordNotIdentifier ::= struct-field | document-summary | omit-summary-features indexing-rewrite | query-command | matched-elements-only | lower-bound | upper-bound | dense-posting-list-threshold | enable-bm25 | max-links-per-node | neighbors-to-explore-at-insert | multi-threaded-indexing | create-if-nonexistent | remove-if-zero | raw-as-base64-in-summary | - onnx-model | cutoff-factor | cutoff-strategy | on-match | on-rank | on-summary + onnx-model | cutoff-factor | cutoff-strategy | on-match | on-rank | on-summary | enable-bit-vectors | + enable-only-bit-vector | summary-to | evaluation-point | pre-post-filter-tipping-point
\ No newline at end of file diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt index f7d1b763117..36c7e386445 100644 --- a/searchcore/CMakeLists.txt +++ b/searchcore/CMakeLists.txt @@ -90,6 +90,7 @@ vespa_define_module( src/tests/proton/documentdb/documentbucketmover src/tests/proton/documentdb/documentdbconfig src/tests/proton/documentdb/documentdbconfigscout + src/tests/proton/documentdb/executor_threading_service src/tests/proton/documentdb/feedhandler src/tests/proton/documentdb/feedview src/tests/proton/documentdb/fileconfigmanager diff --git a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp index 019c7f898bf..a52c9ec2fb6 100644 --- a/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp +++ b/searchcore/src/apps/verify_ranksetup/verify_ranksetup.cpp @@ -151,6 +151,9 @@ App::verify(const search::index::Schema &schema, for (size_t i = 0; i < rankSetup.getSummaryFeatures().size(); ++i) { ok = verifyFeature(factory, indexEnv, rankSetup.getSummaryFeatures()[i], "summary features") && ok; } + for (const auto & feature : rankSetup.get_match_features()) { + ok = verifyFeature(factory, indexEnv, feature, "match features") && ok; + } for (size_t i = 0; i < rankSetup.getDumpFeatures().size(); ++i) { ok = verifyFeature(factory, indexEnv, rankSetup.getDumpFeatures()[i], "dump features") && ok; } diff --git a/searchcore/src/tests/proton/documentdb/executor_threading_service/CMakeLists.txt b/searchcore/src/tests/proton/documentdb/executor_threading_service/CMakeLists.txt new file mode 100644 index 00000000000..721f2207213 --- /dev/null +++ b/searchcore/src/tests/proton/documentdb/executor_threading_service/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_executor_threading_service_test_app TEST + SOURCES + executor_threading_service_test.cpp + DEPENDS + searchcore_server + GTest::GTest +) +vespa_add_test(NAME searchcore_executor_threading_service_test_app COMMAND searchcore_executor_threading_service_test_app) diff --git a/searchcore/src/tests/proton/documentdb/executor_threading_service/executor_threading_service_test.cpp b/searchcore/src/tests/proton/documentdb/executor_threading_service/executor_threading_service_test.cpp new file mode 100644 index 00000000000..714ffaa16b7 --- /dev/null +++ b/searchcore/src/tests/proton/documentdb/executor_threading_service/executor_threading_service_test.cpp @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcore/proton/server/executorthreadingservice.h> +#include <vespa/searchcore/proton/server/threading_service_config.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/util/sequencedtaskexecutor.h> + +using namespace proton; +using vespalib::ISequencedTaskExecutor; +using vespalib::SequencedTaskExecutor; +using SharedFieldWriterExecutor = ThreadingServiceConfig::SharedFieldWriterExecutor; + + +SequencedTaskExecutor* +to_concrete_type(ISequencedTaskExecutor& exec) +{ + return dynamic_cast<SequencedTaskExecutor*>(&exec); +} + +class ExecutorThreadingServiceTest : public ::testing::Test { +public: + vespalib::ThreadStackExecutor shared_executor; + std::unique_ptr<ExecutorThreadingService> service; + ExecutorThreadingServiceTest() + : shared_executor(1, 1000), + service() + { + } + void setup(uint32_t indexing_threads, SharedFieldWriterExecutor shared_field_writer) { + service = std::make_unique<ExecutorThreadingService>(shared_executor, + ThreadingServiceConfig::make(indexing_threads, shared_field_writer)); + } + SequencedTaskExecutor* index_inverter() { + return to_concrete_type(service->indexFieldInverter()); + } + SequencedTaskExecutor* index_writer() { + return to_concrete_type(service->indexFieldWriter()); + } + SequencedTaskExecutor* attribute_writer() { + return to_concrete_type(service->attributeFieldWriter()); + } +}; + +void +assert_executor(SequencedTaskExecutor* exec, uint32_t exp_executors, uint32_t exp_task_limit) +{ + EXPECT_EQ(exp_executors, exec->getNumExecutors()); + EXPECT_EQ(exp_task_limit, exec->first_executor()->getTaskLimit()); +} + +TEST_F(ExecutorThreadingServiceTest, no_shared_field_writer_executor) +{ + setup(4, SharedFieldWriterExecutor::NONE); + EXPECT_NE(index_inverter(), index_writer()); + EXPECT_NE(index_writer(), attribute_writer()); + assert_executor(index_inverter(), 4, 100); + assert_executor(index_writer(), 4, 100); + assert_executor(attribute_writer(), 4, 100); +} + +TEST_F(ExecutorThreadingServiceTest, shared_executor_for_index_field_writers) +{ + setup(4, SharedFieldWriterExecutor::INDEX); + EXPECT_EQ(index_inverter(), index_writer()); + EXPECT_NE(index_inverter(), attribute_writer()); + assert_executor(index_inverter(), 8, 100); + assert_executor(attribute_writer(), 4, 100); +} + +TEST_F(ExecutorThreadingServiceTest, shared_executor_for_index_and_attribute_field_writers) +{ + setup(4, SharedFieldWriterExecutor::INDEX_AND_ATTRIBUTE); + EXPECT_EQ(index_inverter(), index_writer()); + EXPECT_EQ(index_inverter(), attribute_writer()); + assert_executor(index_inverter(), 12, 100); +} + +GTEST_MAIN_RUN_ALL_TESTS() + diff --git a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp index 85c91884294..8314fa6bfb8 100644 --- a/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp +++ b/searchcore/src/tests/proton/feed_and_search/feed_and_search.cpp @@ -20,6 +20,8 @@ #include <vespa/searchlib/queryeval/fake_requestcontext.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/gate.h> +#include <vespa/vespalib/util/destructor_callbacks.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <sstream> @@ -59,6 +61,13 @@ using vespalib::string; namespace { +void commit_memory_index_and_wait(MemoryIndex &memory_index) +{ + vespalib::Gate gate; + memory_index.commit(std::make_shared<vespalib::GateCallback>(gate)); + gate.await(); +} + class Test : public vespalib::TestApp { const char *current_state; void DumpState(bool) { @@ -163,8 +172,7 @@ void Test::requireThatMemoryIndexCanBeDumpedAndSearched() { doc = buildDocument(doc_builder, doc_id2, word2); memory_index.insertDocument(doc_id2, *doc.get()); - memory_index.commit(std::shared_ptr<vespalib::IDestructorCallback>()); - indexFieldWriter->sync_all(); + commit_memory_index_and_wait(memory_index); testSearch(memory_index, word1, doc_id1); testSearch(memory_index, word2, doc_id2); diff --git a/searchcore/src/tests/proton/index/fusionrunner_test.cpp b/searchcore/src/tests/proton/index/fusionrunner_test.cpp index ca10ff01a69..736bc4bae96 100644 --- a/searchcore/src/tests/proton/index/fusionrunner_test.cpp +++ b/searchcore/src/tests/proton/index/fusionrunner_test.cpp @@ -14,6 +14,8 @@ #include <vespa/searchlib/memoryindex/memory_index.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/test/index/mock_field_length_inspector.h> +#include <vespa/vespalib/util/gate.h> +#include <vespa/vespalib/util/destructor_callbacks.h> #include <vespa/vespalib/testkit/testapp.h> #include <set> @@ -156,8 +158,10 @@ void addDocument(DocBuilder & doc_builder, MemoryIndex &index, ISourceSelector & uint8_t index_id, uint32_t docid, const string &word) { Document::UP doc = buildDocument(doc_builder, docid, word); index.insertDocument(docid, *doc); - index.commit(std::shared_ptr<vespalib::IDestructorCallback>()); + vespalib::Gate gate; + index.commit(std::make_shared<vespalib::GateCallback>(gate)); selector.setSource(docid, index_id); + gate.await(); } void Test::createIndex(const string &dir, uint32_t id, bool fusion) { @@ -182,7 +186,6 @@ void Test::createIndex(const string &dir, uint32_t id, bool fusion) { addDocument(doc_builder, memory_index, *_selector, id, id + 1, "bar"); addDocument(doc_builder, memory_index, *_selector, id, id + 2, "baz"); addDocument(doc_builder, memory_index, *_selector, id, id + 3, "qux"); - _threadingService.indexFieldWriter().sync_all(); const uint32_t docIdLimit = std::min(memory_index.getDocIdLimit(), _selector->getDocIdLimit()); diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp index 84cf0e3655f..8e41323e461 100644 --- a/searchcore/src/tests/proton/index/indexmanager_test.cpp +++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp @@ -21,6 +21,8 @@ #include <vespa/searchlib/test/index/mock_field_length_inspector.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/util/destructor_callbacks.h> +#include <vespa/vespalib/util/gate.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/vespalib/util/time.h> @@ -96,6 +98,12 @@ Document::UP buildDocument(DocBuilder &doc_builder, int id, return doc_builder.endDocument(); } +void push_documents_and_wait(search::memoryindex::DocumentInverter &inverter) { + vespalib::Gate gate; + inverter.pushDocuments(std::make_shared<vespalib::GateCallback>(gate)); + gate.await(); +} + std::shared_ptr<vespalib::IDestructorCallback> emptyDestructorCallback; struct IndexManagerTest : public ::testing::Test { @@ -142,10 +150,11 @@ struct IndexManagerTest : public ::testing::Test { Document::UP addDocument(uint32_t docid); void resetIndexManager(); void removeDocument(uint32_t docId, SerialNum serialNum) { + vespalib::Gate gate; runAsIndex([&]() { _index_manager->removeDocument(docId, serialNum); - _index_manager->commit(serialNum, emptyDestructorCallback); + _index_manager->commit(serialNum, std::make_shared<vespalib::GateCallback>(gate)); }); - _writeService.indexFieldWriter().sync_all(); + gate.await(); } void removeDocument(uint32_t docId) { SerialNum serialNum = ++_serial_num; @@ -182,10 +191,11 @@ IndexManagerTest::addDocument(uint32_t id) { Document::UP doc = buildDocument(_builder, id, "foo"); SerialNum serialNum = ++_serial_num; + vespalib::Gate gate; runAsIndex([&]() { _index_manager->putDocument(id, *doc, serialNum); _index_manager->commit(serialNum, - emptyDestructorCallback); }); - _writeService.indexFieldWriter().sync_all(); + std::make_shared<vespalib::GateCallback>(gate)); }); + gate.await(); return doc; } @@ -407,9 +417,7 @@ TEST_F(IndexManagerTest, require_that_flush_stats_are_calculated) Document::UP doc = addDocument(docid); inverter.invertDocument(docid, *doc); - invertThreads->sync_all(); - inverter.pushDocuments(std::shared_ptr<vespalib::IDestructorCallback>()); - pushThreads->sync_all(); + push_documents_and_wait(inverter); index_size = fic.getMemoryUsage().allocatedBytes() - fixed_index_size; /// Must account for both docid 0 being reserved and the extra after. @@ -426,9 +434,7 @@ TEST_F(IndexManagerTest, require_that_flush_stats_are_calculated) inverter.invertDocument(docid + 10, *doc); doc = addDocument(docid + 100); inverter.invertDocument(docid + 100, *doc); - invertThreads->sync_all(); - inverter.pushDocuments(std::shared_ptr<vespalib::IDestructorCallback>()); - pushThreads->sync_all(); + push_documents_and_wait(inverter); index_size = fic.getMemoryUsage().allocatedBytes() - fixed_index_size; /// Must account for both docid 0 being reserved and the extra after. selector_size = (docid + 100 + 1) * sizeof(Source); diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index d690fb29795..1123871b91e 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -143,10 +143,11 @@ struct MyWorld { config.add(indexproperties::rank::FirstPhase::NAME, "attribute(a1)"); config.add(indexproperties::hitcollector::HeapSize::NAME, (vespalib::asciistream() << heapSize).str()); config.add(indexproperties::hitcollector::ArraySize::NAME, (vespalib::asciistream() << arraySize).str()); - config.add(indexproperties::summary::Feature::NAME, "attribute(a1)"); + config.add(indexproperties::summary::Feature::NAME, "matches(f1)"); config.add(indexproperties::summary::Feature::NAME, "rankingExpression(\"reduce(tensor(x[3])(x),sum)\")"); config.add(indexproperties::summary::Feature::NAME, "rankingExpression(\"tensor(x[3])(x)\")"); config.add(indexproperties::summary::Feature::NAME, "value(100)"); + config.add(indexproperties::summary::Feature::NAME, " attribute ( a1 ) "); // will be sorted and normalized config.add(indexproperties::dump::IgnoreDefaultFeatures::NAME, "true"); config.add(indexproperties::dump::Feature::NAME, "attribute(a2)"); @@ -211,6 +212,45 @@ struct MyWorld { config.import(cfg); } + void setup_match_features() { + config.add(indexproperties::match::Feature::NAME, "attribute(a1)"); + config.add(indexproperties::match::Feature::NAME, "attribute(a2)"); + config.add(indexproperties::match::Feature::NAME, "matches(a1)"); + config.add(indexproperties::match::Feature::NAME, "matches(f1)"); + config.add(indexproperties::match::Feature::NAME, "rankingExpression(\"tensor(x[3])(x)\")"); + } + + static void verify_match_features(SearchReply &reply, const vespalib::string &matched_field) { + if (reply.hits.empty()) { + EXPECT_EQUAL(reply.match_features.names.size(), 0u); + EXPECT_EQUAL(reply.match_features.values.size(), 0u); + } else { + ASSERT_EQUAL(reply.match_features.names.size(), 5u); + EXPECT_EQUAL(reply.match_features.names[0], "attribute(a1)"); + EXPECT_EQUAL(reply.match_features.names[1], "attribute(a2)"); + EXPECT_EQUAL(reply.match_features.names[2], "matches(a1)"); + EXPECT_EQUAL(reply.match_features.names[3], "matches(f1)"); + EXPECT_EQUAL(reply.match_features.names[4], "rankingExpression(\"tensor(x[3])(x)\")"); + ASSERT_EQUAL(reply.match_features.values.size(), 5 * reply.hits.size()); + for (size_t i = 0; i < reply.hits.size(); ++i) { + const auto *f = &reply.match_features.values[i * 5]; + EXPECT_GREATER(f[0].as_double(), 0.0); + EXPECT_GREATER(f[1].as_double(), 0.0); + EXPECT_EQUAL(f[0].as_double(), reply.hits[i].metric); + EXPECT_EQUAL(f[0].as_double() * 2, f[1].as_double()); + EXPECT_EQUAL(f[2].as_double(), double(matched_field == "a1")); + EXPECT_EQUAL(f[3].as_double(), double(matched_field == "f1")); + EXPECT_TRUE(f[4].is_data()); + { + nbostream buf(f[4].as_data().data, f[4].as_data().size); + auto actual = spec_from_value(*SimpleValue::from_stream(buf)); + auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2); + EXPECT_EQUAL(actual, expect); + } + } + } + } + void setup_match_phase_limiting(const vespalib::string &attribute, size_t max_hits, bool descending) { inject_match_phase_limiting(config, attribute, max_hits, descending); @@ -442,6 +482,30 @@ TEST("require that matching is performed (multi-threaded)") { } } +TEST("require that match features are calculated (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(); + world.basicResults(); + world.setup_match_features(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + SearchReply::UP reply = world.performSearch(request, threads); + EXPECT_GREATER(reply->hits.size(), 0u); + world.verify_match_features(*reply, "f1"); + } +} + +TEST("require that no hits gives no match feature names") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + world.setup_match_features(); + SearchRequest::SP request = world.createSimpleRequest("f1", "not_found"); + SearchReply::UP reply = world.performSearch(request, 1); + EXPECT_EQUAL(reply->hits.size(), 0u); + world.verify_match_features(*reply, "f1"); +} + TEST("require that matching also returns hits when only bitvector is used (multi-threaded)") { for (size_t threads = 1; threads <= 16; ++threads) { MyWorld world; @@ -645,30 +709,36 @@ TEST("require that summary features are filled") { world.basicResults(); DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); FeatureSet::SP fs = world.getSummaryFeatures(req); - const FeatureSet::Value * f = NULL; - EXPECT_EQUAL(4u, fs->numFeatures()); + const FeatureSet::Value * f = nullptr; + EXPECT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]); - EXPECT_EQUAL("value(100)", fs->getNames()[3]); - EXPECT_EQUAL(2u, fs->numDocs()); + EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); + EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQUAL("value(100)", fs->getNames()[4]); + EXPECT_EQUAL(3u, fs->numDocs()); f = fs->getFeaturesByDocId(10); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(10, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); + EXPECT_EQUAL(1, f[1].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); f = fs->getFeaturesByDocId(15); - EXPECT_TRUE(f == NULL); + EXPECT_TRUE(f != nullptr); + EXPECT_EQUAL(15, f[0].as_double()); + EXPECT_EQUAL(0, f[1].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); f = fs->getFeaturesByDocId(30); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); - EXPECT_TRUE(f[1].is_double()); - EXPECT_TRUE(!f[1].is_data()); - EXPECT_EQUAL(f[1].as_double(), 3.0); // 0 + 1 + 2 - EXPECT_TRUE(!f[2].is_double()); - EXPECT_TRUE(f[2].is_data()); + EXPECT_EQUAL(1, f[1].as_double()); + EXPECT_TRUE(f[2].is_double()); + EXPECT_TRUE(!f[2].is_data()); + EXPECT_EQUAL(f[2].as_double(), 3.0); // 0 + 1 + 2 + EXPECT_TRUE(!f[3].is_double()); + EXPECT_TRUE(f[3].is_data()); + EXPECT_EQUAL(100, f[4].as_double()); { - nbostream buf(f[2].as_data().data, f[2].as_data().size); + nbostream buf(f[3].as_data().data, f[3].as_data().size); auto actual = spec_from_value(*SimpleValue::from_stream(buf)); auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2); EXPECT_EQUAL(actual, expect); @@ -681,17 +751,18 @@ TEST("require that rank features are filled") { world.basicResults(); DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); FeatureSet::SP fs = world.getRankFeatures(req); - const FeatureSet::Value * f = NULL; + const FeatureSet::Value * f = nullptr; EXPECT_EQUAL(1u, fs->numFeatures()); EXPECT_EQUAL("attribute(a2)", fs->getNames()[0]); - EXPECT_EQUAL(2u, fs->numDocs()); + EXPECT_EQUAL(3u, fs->numDocs()); f = fs->getFeaturesByDocId(10); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(20, f[0].as_double()); f = fs->getFeaturesByDocId(15); - EXPECT_TRUE(f == NULL); + EXPECT_TRUE(f != nullptr); + EXPECT_EQUAL(30, f[0].as_double()); f = fs->getFeaturesByDocId(30); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(60, f[0].as_double()); } @@ -727,29 +798,42 @@ TEST("require that getSummaryFeatures can use cached query setup") { docsum_request->hits.back().docid = 30; FeatureSet::SP fs = world.getSummaryFeatures(docsum_request); - ASSERT_EQUAL(4u, fs->numFeatures()); + ASSERT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]); - EXPECT_EQUAL("value(100)", fs->getNames()[3]); + EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); + EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQUAL("value(100)", fs->getNames()[4]); ASSERT_EQUAL(1u, fs->numDocs()); const auto *f = fs->getFeaturesByDocId(30); ASSERT_TRUE(f); EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); // getSummaryFeatures can be called multiple times. fs = world.getSummaryFeatures(docsum_request); - ASSERT_EQUAL(4u, fs->numFeatures()); + ASSERT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]); - EXPECT_EQUAL("value(100)", fs->getNames()[3]); + EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); + EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQUAL("value(100)", fs->getNames()[4]); ASSERT_EQUAL(1u, fs->numDocs()); f = fs->getFeaturesByDocId(30); ASSERT_TRUE(f); EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); +} + +double count_f1_matches(FeatureSet &fs) { + ASSERT_TRUE(fs.getNames().size() > 1); + ASSERT_EQUAL(fs.getNames()[1], "matches(f1)"); + double sum = 0.0; + for (size_t i = 0; i < fs.numDocs(); ++i) { + auto *f = fs.getFeaturesByIndex(i); + sum += f[1].as_double(); + } + return sum; } TEST("require that getSummaryFeatures prefers cached query setup") { @@ -765,16 +849,18 @@ TEST("require that getSummaryFeatures prefers cached query setup") { req->sessionId = request->sessionId; req->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); FeatureSet::SP fs = world.getSummaryFeatures(req); - EXPECT_EQUAL(4u, fs->numFeatures()); - ASSERT_EQUAL(0u, fs->numDocs()); // "spread" has no hits + EXPECT_EQUAL(5u, fs->numFeatures()); + EXPECT_EQUAL(3u, fs->numDocs()); + EXPECT_EQUAL(0.0, count_f1_matches(*fs)); // "spread" has no hits // Empty cache auto pruneTime = vespalib::steady_clock::now() + 600s; world.sessionManager->pruneTimedOutSessions(pruneTime); fs = world.getSummaryFeatures(req); - EXPECT_EQUAL(4u, fs->numFeatures()); - ASSERT_EQUAL(2u, fs->numDocs()); // "foo" has two hits + EXPECT_EQUAL(5u, fs->numFeatures()); + EXPECT_EQUAL(3u, fs->numDocs()); + EXPECT_EQUAL(2.0, count_f1_matches(*fs)); // "foo" has two hits } TEST("require that match params are set up straight with ranking on") { diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index e1bdf13fd36..6b85f4a6829 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -493,6 +493,16 @@ hwinfo.cpu.cores int default = 0 restart ## Deprecated -> Use documentdb.feeding.concurrency feeding.concurrency double default = 0.2 restart +## Whether we should use a shared field writer executor in the document db threading service: +## +## NONE: Don't use a shared executor. +## INDEX: Use a shared executor for index field inverter and index field writer. +## INDEX_AND_ATTRIBUTE: Use a shared executor for index field inverter, index field writer, and attribute field writer. +## DOCUMENT_DB: Use a shared executor for index field inverter, index field writer, and attribute field writer among all document dbs. +## +## TODO: Remove this when a shared executor is the default. +feeding.shared_field_writer_executor enum {NONE, INDEX, INDEX_AND_ATTRIBUTE, DOCUMENT_DB} default = NONE restart + ## Adjustment to resource limit when determining if maintenance jobs can run. ## ## Currently used by 'lid_space_compaction' and 'move_buckets' jobs. diff --git a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt index 5f0017293e7..41e2fe2105f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt @@ -7,6 +7,7 @@ vespa_add_library(searchcore_matching STATIC docid_range_scheduler.cpp docsum_matcher.cpp document_scorer.cpp + extract_features.cpp fakesearchcontext.cpp handlerecorder.cpp i_match_loop_communicator.cpp diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp index 91b44b277f0..864e0a6b337 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp @@ -3,6 +3,7 @@ #include "docsum_matcher.h" #include "match_tools.h" #include "search_session.h" +#include "extract_features.h" #include <vespa/eval/eval/value_codec.h> #include <vespa/vespalib/objects/nbostream.h> #include <vespa/searchcommon/attribute/i_search_context.h> @@ -46,45 +47,7 @@ get_feature_set(const MatchToolsFactory &mtf, } else { matchTools->setup_dump(); } - RankProgram &rankProgram = matchTools->rank_program(); - - std::vector<vespalib::string> featureNames; - FeatureResolver resolver(rankProgram.get_seeds(false)); - featureNames.reserve(resolver.num_features()); - for (size_t i = 0; i < resolver.num_features(); ++i) { - featureNames.emplace_back(resolver.name_of(i)); - } - auto retval = std::make_unique<FeatureSet>(featureNames, docs.size()); - if (docs.empty()) { - return retval; - } - FeatureSet &fs = *retval; - - SearchIterator &search = matchTools->search(); - search.initRange(docs.front(), docs.back()+1); - for (uint32_t i = 0; i < docs.size(); ++i) { - if (search.seek(docs[i])) { - uint32_t docId = search.getDocId(); - search.unpack(docId); - auto * f = fs.getFeaturesByIndex(fs.addDocId(docId)); - for (uint32_t j = 0; j < featureNames.size(); ++j) { - if (resolver.is_object(j)) { - auto obj = resolver.resolve(j).as_object(docId); - if (! obj.get().type().is_double()) { - vespalib::nbostream buf; - encode_value(obj.get(), buf); - f[j].set_data(vespalib::Memory(buf.peek(), buf.size())); - } else { - f[j].set_double(obj.get().as_double()); - } - } else { - f[j].set_double(resolver.resolve(j).as_number(docId)); - } - } - } else { - LOG(debug, "getFeatureSet: Did not find hit for docid '%u'. Skipping hit", docs[i]); - } - } + auto retval = ExtractFeatures::get_feature_set(matchTools->search(), matchTools->rank_program(), docs); if (auto onSummaryTask = mtf.createOnSummaryTask()) { onSummaryTask->run(docs); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp new file mode 100644 index 00000000000..ef03fac2f6a --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.cpp @@ -0,0 +1,177 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "extract_features.h" +#include "match_tools.h" +#include <vespa/eval/eval/value_codec.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/util/runnable.h> +#include <vespa/vespalib/util/thread_bundle.h> +#include <vespa/searchlib/fef/feature_resolver.h> +#include <vespa/searchlib/fef/rank_program.h> +#include <vespa/searchlib/queryeval/searchiterator.h> + +using vespalib::Runnable; +using vespalib::ThreadBundle; +using search::FeatureSet; +using search::FeatureValues; +using search::fef::FeatureResolver; +using search::fef::RankProgram; +using search::queryeval::SearchIterator; + +namespace proton::matching { + +using OrderedDocs = ExtractFeatures::OrderedDocs; + +namespace { + +struct MyChunk : Runnable { + const std::pair<uint32_t,uint32_t> *begin; + const std::pair<uint32_t,uint32_t> *end; + FeatureValues &result; + MyChunk(const std::pair<uint32_t,uint32_t> *begin_in, + const std::pair<uint32_t,uint32_t> *end_in, + FeatureValues &result_in) + : begin(begin_in), end(end_in), result(result_in) {} + void calculate_features(SearchIterator &search, FeatureResolver &resolver) { + size_t num_features = result.names.size(); + assert(end > begin); + assert(num_features == resolver.num_features()); + search.initRange(begin[0].first, end[-1].first + 1); + for (auto pos = begin; pos != end; ++pos) { + uint32_t docid = pos->first; + search.unpack(docid); + auto * f = &result.values[pos->second * num_features]; + for (uint32_t i = 0; i < num_features; ++i) { + if (resolver.is_object(i)) { + auto obj = resolver.resolve(i).as_object(docid); + if (!obj.get().type().is_double()) { + vespalib::nbostream buf; + encode_value(obj.get(), buf); + f[i].set_data(vespalib::Memory(buf.peek(), buf.size())); + } else { + f[i].set_double(obj.get().as_double()); + } + } else { + f[i].set_double(resolver.resolve(i).as_number(docid)); + } + } + } + } +}; + +struct FirstChunk : MyChunk { + SearchIterator &search; + FeatureResolver &resolver; + FirstChunk(const std::pair<uint32_t,uint32_t> *begin_in, + const std::pair<uint32_t,uint32_t> *end_in, + FeatureValues &result_in, + SearchIterator &search_in, + FeatureResolver &resolver_in) + : MyChunk(begin_in, end_in, result_in), + search(search_in), + resolver(resolver_in) {} + void run() override { calculate_features(search, resolver); } +}; + +struct LaterChunk : MyChunk { + const MatchToolsFactory &mtf; + LaterChunk(const std::pair<uint32_t,uint32_t> *begin_in, + const std::pair<uint32_t,uint32_t> *end_in, + FeatureValues &result_in, + const MatchToolsFactory &mtf_in) + : MyChunk(begin_in, end_in, result_in), + mtf(mtf_in) {} + void run() override { + auto tools = mtf.createMatchTools(); + tools->setup_match_features(); + FeatureResolver resolver(tools->rank_program().get_seeds(false)); + calculate_features(tools->search(), resolver); + } +}; + +struct MyWork { + size_t num_threads; + std::vector<Runnable::UP> chunks; + MyWork(ThreadBundle &thread_bundle) : num_threads(thread_bundle.size()), chunks() { + chunks.reserve(num_threads); + } + void run(ThreadBundle &thread_bundle) { + std::vector<Runnable*> refs; + refs.reserve(chunks.size()); + for (const auto &task: chunks) { + refs.push_back(task.get()); + } + thread_bundle.run(refs); + } +}; + +} // unnamed + +FeatureSet::UP +ExtractFeatures::get_feature_set(SearchIterator &search, RankProgram &rank_program, const std::vector<uint32_t> &docs) +{ + std::vector<vespalib::string> featureNames; + FeatureResolver resolver(rank_program.get_seeds(false)); + featureNames.reserve(resolver.num_features()); + for (size_t i = 0; i < resolver.num_features(); ++i) { + featureNames.emplace_back(resolver.name_of(i)); + } + auto result = std::make_unique<FeatureSet>(featureNames, docs.size()); + if (!docs.empty()) { + search.initRange(docs.front(), docs.back()+1); + for (uint32_t docid: docs) { + search.unpack(docid); + auto * f = result->getFeaturesByIndex(result->addDocId(docid)); + for (uint32_t i = 0; i < featureNames.size(); ++i) { + if (resolver.is_object(i)) { + auto obj = resolver.resolve(i).as_object(docid); + if (!obj.get().type().is_double()) { + vespalib::nbostream buf; + encode_value(obj.get(), buf); + f[i].set_data(vespalib::Memory(buf.peek(), buf.size())); + } else { + f[i].set_double(obj.get().as_double()); + } + } else { + f[i].set_double(resolver.resolve(i).as_number(docid)); + } + } + } + } + return result; +} + +FeatureValues +ExtractFeatures::get_match_features(const MatchToolsFactory &mtf, const OrderedDocs &docs, ThreadBundle &thread_bundle) +{ + FeatureValues result; + auto tools = mtf.createMatchTools(); + tools->setup_match_features(); + FeatureResolver resolver(tools->rank_program().get_seeds(false)); + result.names.reserve(resolver.num_features()); + for (size_t i = 0; i < resolver.num_features(); ++i) { + result.names.emplace_back(resolver.name_of(i)); + } + result.values.resize(result.names.size() * docs.size()); + MyWork work(thread_bundle); + size_t per_thread = docs.size() / work.num_threads; + size_t rest_docs = docs.size() % work.num_threads; + size_t idx = 0; + for (size_t i = 0; i < work.num_threads; ++i) { + size_t chunk_size = per_thread + (i < rest_docs); + if (chunk_size == 0) { + break; + } + if (i == 0) { + work.chunks.push_back(std::make_unique<FirstChunk>(&docs[idx], &docs[idx + chunk_size], result, tools->search(), resolver)); + } else { + work.chunks.push_back(std::make_unique<LaterChunk>(&docs[idx], &docs[idx + chunk_size], result, mtf)); + } + idx += chunk_size; + } + assert(idx == docs.size()); + work.run(thread_bundle); + return result; +} + +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/extract_features.h b/searchcore/src/vespa/searchcore/proton/matching/extract_features.h new file mode 100644 index 00000000000..66e98d9db2d --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/matching/extract_features.h @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/common/featureset.h> +#include <vector> + +namespace vespalib { class ThreadBundle; }; +namespace search::queryeval { class SearchIterator; } +namespace search::fef { class RankProgram; } + +namespace proton::matching { + +class MatchToolsFactory; + +struct ExtractFeatures { + using FeatureSet = search::FeatureSet; + using FeatureValues = search::FeatureValues; + using ThreadBundle = vespalib::ThreadBundle; + using SearchIterator = search::queryeval::SearchIterator; + using RankProgram = search::fef::RankProgram; + + /** + * Extract all seed features from a rank program for a list of + * documents (must be in ascending order) using unpack information + * from a search. + **/ + static FeatureSet::UP get_feature_set(SearchIterator &search, RankProgram &rank_program, const std::vector<uint32_t> &docs); + + // first: docid, second: result index (must be sorted on docid) + using OrderedDocs = std::vector<std::pair<uint32_t,uint32_t>>; + + /** + * Extract match features using multiple threads. + **/ + static FeatureValues get_match_features(const MatchToolsFactory &mtf, const OrderedDocs &docs, ThreadBundle &thread_bundle); +}; + +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 26ed94f1d73..827ff4b5aca 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -5,7 +5,9 @@ #include "match_loop_communicator.h" #include "match_thread.h" #include "match_tools.h" +#include "extract_features.h" #include <vespa/searchlib/engine/trace.h> +#include <vespa/searchlib/engine/searchreply.h> #include <vespa/vespalib/util/thread_bundle.h> #include <vespa/vespalib/util/issue.h> #include <vespa/vespalib/data/slime/inserter.h> @@ -21,6 +23,7 @@ namespace proton::matching { using namespace search::fef; using search::queryeval::SearchIterator; using search::FeatureSet; +using vespalib::ThreadBundle; using vespalib::Issue; namespace { @@ -57,12 +60,25 @@ createScheduler(uint32_t numThreads, uint32_t numSearchPartitions, uint32_t numD return std::make_unique<TaskDocidRangeScheduler>(numThreads, numSearchPartitions, numDocs); } +auto make_reply(const MatchToolsFactory &mtf, ResultProcessor &processor, ThreadBundle &bundle, auto full_result) { + if (mtf.has_match_features()) { + auto docs = processor.extract_docid_ordering(*full_result); + auto reply = processor.makeReply(std::move(std::move(full_result))); + if ((docs.size() > 0) && reply->_reply) { + reply->_reply->match_features = ExtractFeatures::get_match_features(mtf, docs, bundle); + } + return reply; + } else { + return processor.makeReply(std::move(full_result)); + } +} + } // namespace proton::matching::<unnamed> ResultProcessor::Result::UP MatchMaster::match(search::engine::Trace & trace, const MatchParams ¶ms, - vespalib::ThreadBundle &threadBundle, + ThreadBundle &threadBundle, const MatchToolsFactory &mtf, ResultProcessor &resultProcessor, uint32_t distributionKey, @@ -87,7 +103,7 @@ MatchMaster::match(search::engine::Trace & trace, } resultProcessor.prepareThreadContextCreation(threadBundle.size()); threadBundle.run(targets); - ResultProcessor::Result::UP reply = resultProcessor.makeReply(threadState[0]->extract_result()); + auto reply = make_reply(mtf, resultProcessor, threadBundle, threadState[0]->extract_result()); double query_time_s = vespalib::to_s(query_latency_time.elapsed()); double rerank_time_s = vespalib::to_s(timedCommunicator.elapsed); double match_time_s = 0.0; diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index d652435cbca..accbb19669a 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -141,6 +141,12 @@ MatchTools::setup_second_phase() } void +MatchTools::setup_match_features() +{ + setup(_rankSetup.create_match_program()); +} + +void MatchTools::setup_summary() { setup(_rankSetup.create_summary_program()); @@ -281,6 +287,12 @@ MatchToolsFactory::has_first_phase_rank() const { return !_rankSetup.getFirstPhaseRank().empty(); } +bool +MatchToolsFactory::has_match_features() const +{ + return _rankSetup.has_match_features(); +} + AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext, vespalib::stringref attribute, vespalib::stringref operation) : _requestContext(requestContext), diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index a403b4b1a78..d63c67ec1d0 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -67,6 +67,7 @@ public: void tag_search_as_changed() { _search_has_changed = true; } void setup_first_phase(); void setup_second_phase(); + void setup_match_features(); void setup_summary(); void setup_dump(); }; @@ -129,6 +130,7 @@ public: std::unique_ptr<search::queryeval::IDiversifier> createDiversifier(uint32_t heapSize) const; search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); } bool has_first_phase_rank() const; + bool has_match_features() const; std::unique_ptr<AttributeOperationTask> createOnMatchTask() const; std::unique_ptr<AttributeOperationTask> createOnFirstPhaseTask() const; std::unique_ptr<AttributeOperationTask> createOnSecondPhaseTask() const; diff --git a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp index 3da0d17895a..da1e6a2d567 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/result_processor.cpp @@ -102,6 +102,19 @@ ResultProcessor::createThreadContext(const vespalib::Doom & hardDoom, size_t thr return std::make_unique<Context>(std::move(sort), std::move(result), std::move(groupingContext)); } +std::vector<std::pair<uint32_t,uint32_t>> +ResultProcessor::extract_docid_ordering(const PartialResult &result) const +{ + size_t est_size = result.size() - std::min(result.size(), _offset); + std::vector<std::pair<uint32_t,uint32_t>> list; + list.reserve(est_size); + for (size_t i = _offset; i < result.size(); ++i) { + list.emplace_back(result.hit(i)._docId, list.size()); + } + std::sort(list.begin(), list.end(), [](const auto &a, const auto &b){ return (a.first < b.first); }); + return list; +}; + ResultProcessor::Result::UP ResultProcessor::makeReply(PartialResultUP full_result) { diff --git a/searchcore/src/vespa/searchcore/proton/matching/result_processor.h b/searchcore/src/vespa/searchcore/proton/matching/result_processor.h index e0220d53d1a..5ec11cd7acb 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/result_processor.h +++ b/searchcore/src/vespa/searchcore/proton/matching/result_processor.h @@ -103,6 +103,7 @@ public: size_t countFS4Hits(); void prepareThreadContextCreation(size_t num_threads); Context::UP createThreadContext(const vespalib::Doom & hardDoom, size_t thread_id, uint32_t distributionKey); + std::vector<std::pair<uint32_t,uint32_t>> extract_docid_ordering(const PartialResult &result) const; std::unique_ptr<Result> makeReply(PartialResultUP full_result); }; diff --git a/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.cpp b/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.cpp index cb4b396d63d..d35aaf9f909 100644 --- a/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.cpp @@ -12,6 +12,7 @@ using vespalib::BlockingThreadStackExecutor; using vespalib::SingleExecutor; using vespalib::SequencedTaskExecutor; using OptimizeFor = vespalib::Executor::OptimizeFor; +using SharedFieldWriterExecutor = proton::ThreadingServiceConfig::SharedFieldWriterExecutor; namespace proton { @@ -30,9 +31,10 @@ createExecutorWithOneThread(uint32_t stackSize, uint32_t taskLimit, OptimizeFor VESPA_THREAD_STACK_TAG(master_executor) VESPA_THREAD_STACK_TAG(index_executor) VESPA_THREAD_STACK_TAG(summary_executor) -VESPA_THREAD_STACK_TAG(field_inverter_executor) +VESPA_THREAD_STACK_TAG(index_field_inverter_executor) +VESPA_THREAD_STACK_TAG(index_field_writer_executor) +VESPA_THREAD_STACK_TAG(attribute_field_writer_executor) VESPA_THREAD_STACK_TAG(field_writer_executor) -VESPA_THREAD_STACK_TAG(attribute_executor) } @@ -50,11 +52,38 @@ ExecutorThreadingService::ExecutorThreadingService(vespalib::ThreadExecutor & sh _masterService(_masterExecutor), _indexService(*_indexExecutor), _summaryService(*_summaryExecutor), - _indexFieldInverter(SequencedTaskExecutor::create(field_inverter_executor, cfg.indexingThreads(), cfg.defaultTaskLimit())), - _indexFieldWriter(SequencedTaskExecutor::create(field_writer_executor, cfg.indexingThreads(), cfg.defaultTaskLimit())), - _attributeFieldWriter(SequencedTaskExecutor::create(attribute_executor, cfg.indexingThreads(), cfg.defaultTaskLimit(), - cfg.optimize(), cfg.kindOfwatermark(), cfg.reactionTime())) + _indexFieldInverter(), + _indexFieldWriter(), + _attributeFieldWriter(), + _field_writer(), + _index_field_inverter_ptr(), + _index_field_writer_ptr(), + _attribute_field_writer_ptr() { + if (cfg.shared_field_writer() == SharedFieldWriterExecutor::INDEX) { + _field_writer = SequencedTaskExecutor::create(field_writer_executor, cfg.indexingThreads() * 2, cfg.defaultTaskLimit()); + _attributeFieldWriter = SequencedTaskExecutor::create(attribute_field_writer_executor, cfg.indexingThreads(), cfg.defaultTaskLimit(), + cfg.optimize(), cfg.kindOfwatermark(), cfg.reactionTime()); + _index_field_inverter_ptr = _field_writer.get(); + _index_field_writer_ptr = _field_writer.get(); + _attribute_field_writer_ptr = _attributeFieldWriter.get(); + + } else if (cfg.shared_field_writer() == SharedFieldWriterExecutor::INDEX_AND_ATTRIBUTE) { + _field_writer = SequencedTaskExecutor::create(field_writer_executor, cfg.indexingThreads() * 3, cfg.defaultTaskLimit(), + cfg.optimize(), cfg.kindOfwatermark(), cfg.reactionTime()); + _index_field_inverter_ptr = _field_writer.get(); + _index_field_writer_ptr = _field_writer.get(); + _attribute_field_writer_ptr = _field_writer.get(); + } else { + // TODO: Add support for shared field writer across all document dbs. + _indexFieldInverter = SequencedTaskExecutor::create(index_field_inverter_executor, cfg.indexingThreads(), cfg.defaultTaskLimit()); + _indexFieldWriter = SequencedTaskExecutor::create(index_field_writer_executor, cfg.indexingThreads(), cfg.defaultTaskLimit()); + _attributeFieldWriter = SequencedTaskExecutor::create(attribute_field_writer_executor, cfg.indexingThreads(), cfg.defaultTaskLimit(), + cfg.optimize(), cfg.kindOfwatermark(), cfg.reactionTime()); + _index_field_inverter_ptr = _indexFieldInverter.get(); + _index_field_writer_ptr = _indexFieldWriter.get(); + _attribute_field_writer_ptr = _attributeFieldWriter.get(); + } } ExecutorThreadingService::~ExecutorThreadingService() = default; @@ -74,11 +103,11 @@ ExecutorThreadingService::syncOnce() { if (!isMasterThread) { _masterExecutor.sync(); } - _attributeFieldWriter->sync_all(); + _attribute_field_writer_ptr->sync_all(); _indexExecutor->sync(); _summaryExecutor->sync(); - _indexFieldInverter->sync_all(); - _indexFieldWriter->sync_all(); + _index_field_inverter_ptr->sync_all(); + _index_field_writer_ptr->sync_all(); if (!isMasterThread) { _masterExecutor.sync(); } @@ -89,13 +118,13 @@ ExecutorThreadingService::shutdown() { _masterExecutor.shutdown(); _masterExecutor.sync(); - _attributeFieldWriter->sync_all(); + _attribute_field_writer_ptr->sync_all(); _summaryExecutor->shutdown(); _summaryExecutor->sync(); _indexExecutor->shutdown(); _indexExecutor->sync(); - _indexFieldInverter->sync_all(); - _indexFieldWriter->sync_all(); + _index_field_inverter_ptr->sync_all(); + _index_field_writer_ptr->sync_all(); } void @@ -103,9 +132,9 @@ ExecutorThreadingService::setTaskLimit(uint32_t taskLimit, uint32_t summaryTaskL { _indexExecutor->setTaskLimit(taskLimit); _summaryExecutor->setTaskLimit(summaryTaskLimit); - _indexFieldInverter->setTaskLimit(taskLimit); - _indexFieldWriter->setTaskLimit(taskLimit); - _attributeFieldWriter->setTaskLimit(taskLimit); + _index_field_inverter_ptr->setTaskLimit(taskLimit); + _index_field_writer_ptr->setTaskLimit(taskLimit); + _attribute_field_writer_ptr->setTaskLimit(taskLimit); } ExecutorThreadingServiceStats @@ -115,24 +144,24 @@ ExecutorThreadingService::getStats() _indexExecutor->getStats(), _summaryExecutor->getStats(), _sharedExecutor.getStats(), - _indexFieldInverter->getStats(), - _indexFieldWriter->getStats(), - _attributeFieldWriter->getStats()); + _index_field_inverter_ptr->getStats(), + _index_field_writer_ptr->getStats(), + _attribute_field_writer_ptr->getStats()); } vespalib::ISequencedTaskExecutor & ExecutorThreadingService::indexFieldInverter() { - return *_indexFieldInverter; + return *_index_field_inverter_ptr; } vespalib::ISequencedTaskExecutor & ExecutorThreadingService::indexFieldWriter() { - return *_indexFieldWriter; + return *_index_field_writer_ptr; } vespalib::ISequencedTaskExecutor & ExecutorThreadingService::attributeFieldWriter() { - return *_attributeFieldWriter; + return *_attribute_field_writer_ptr; } } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.h b/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.h index ed34e518114..51da27586f7 100644 --- a/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.h +++ b/searchcore/src/vespa/searchcore/proton/server/executorthreadingservice.h @@ -27,6 +27,10 @@ private: std::unique_ptr<vespalib::ISequencedTaskExecutor> _indexFieldInverter; std::unique_ptr<vespalib::ISequencedTaskExecutor> _indexFieldWriter; std::unique_ptr<vespalib::ISequencedTaskExecutor> _attributeFieldWriter; + std::unique_ptr<vespalib::ISequencedTaskExecutor> _field_writer; + vespalib::ISequencedTaskExecutor* _index_field_inverter_ptr; + vespalib::ISequencedTaskExecutor* _index_field_writer_ptr; + vespalib::ISequencedTaskExecutor* _attribute_field_writer_ptr; void syncOnce(); public: diff --git a/searchcore/src/vespa/searchcore/proton/server/threading_service_config.cpp b/searchcore/src/vespa/searchcore/proton/server/threading_service_config.cpp index e2d3c4a2366..012d91cb49f 100644 --- a/searchcore/src/vespa/searchcore/proton/server/threading_service_config.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/threading_service_config.cpp @@ -14,12 +14,14 @@ ThreadingServiceConfig::ThreadingServiceConfig(uint32_t indexingThreads_, uint32_t defaultTaskLimit_, OptimizeFor optimize_, uint32_t kindOfWatermark_, - vespalib::duration reactionTime_) + vespalib::duration reactionTime_, + SharedFieldWriterExecutor shared_field_writer_) : _indexingThreads(indexingThreads_), _defaultTaskLimit(defaultTaskLimit_), _optimize(optimize_), _kindOfWatermark(kindOfWatermark_), - _reactionTime(reactionTime_) + _reactionTime(reactionTime_), + _shared_field_writer(shared_field_writer_) { } @@ -60,12 +62,13 @@ ThreadingServiceConfig::make(const ProtonConfig &cfg, double concurrency, const return ThreadingServiceConfig(indexingThreads, cfg.indexing.tasklimit, selectOptimization(cfg.indexing.optimize), cfg.indexing.kindOfWatermark, - vespalib::from_s(cfg.indexing.reactiontime)); + vespalib::from_s(cfg.indexing.reactiontime), + cfg.feeding.sharedFieldWriterExecutor); } ThreadingServiceConfig -ThreadingServiceConfig::make(uint32_t indexingThreads) { - return ThreadingServiceConfig(indexingThreads, 100, OptimizeFor::LATENCY, 0, 10ms); +ThreadingServiceConfig::make(uint32_t indexingThreads, SharedFieldWriterExecutor shared_field_writer_) { + return ThreadingServiceConfig(indexingThreads, 100, OptimizeFor::LATENCY, 0, 10ms, shared_field_writer_); } void @@ -81,7 +84,8 @@ ThreadingServiceConfig::operator==(const ThreadingServiceConfig &rhs) const _defaultTaskLimit == rhs._defaultTaskLimit && _optimize == rhs._optimize && _kindOfWatermark == rhs._kindOfWatermark && - _reactionTime == rhs._reactionTime; + _reactionTime == rhs._reactionTime && + _shared_field_writer == rhs._shared_field_writer; } } diff --git a/searchcore/src/vespa/searchcore/proton/server/threading_service_config.h b/searchcore/src/vespa/searchcore/proton/server/threading_service_config.h index d945e9911fc..5869eaf9c2e 100644 --- a/searchcore/src/vespa/searchcore/proton/server/threading_service_config.h +++ b/searchcore/src/vespa/searchcore/proton/server/threading_service_config.h @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/searchcore/config/config-proton.h> #include <vespa/searchcore/proton/common/hw_info.h> #include <vespa/vespalib/util/executor.h> #include <vespa/vespalib/util/time.h> @@ -16,6 +17,7 @@ class ThreadingServiceConfig { public: using ProtonConfig = const vespa::config::search::core::internal::InternalProtonType; using OptimizeFor = vespalib::Executor::OptimizeFor; + using SharedFieldWriterExecutor = ProtonConfig::Feeding::SharedFieldWriterExecutor; private: uint32_t _indexingThreads; @@ -23,19 +25,22 @@ private: OptimizeFor _optimize; uint32_t _kindOfWatermark; vespalib::duration _reactionTime; // Maximum reaction time to new tasks + SharedFieldWriterExecutor _shared_field_writer; private: - ThreadingServiceConfig(uint32_t indexingThreads_, uint32_t defaultTaskLimit_, OptimizeFor optimize, uint32_t kindOfWatermark, vespalib::duration reactionTime); + ThreadingServiceConfig(uint32_t indexingThreads_, uint32_t defaultTaskLimit_, OptimizeFor optimize_, + uint32_t kindOfWatermark_, vespalib::duration reactionTime_, SharedFieldWriterExecutor shared_field_writer_); public: static ThreadingServiceConfig make(const ProtonConfig &cfg, double concurrency, const HwInfo::Cpu &cpuInfo); - static ThreadingServiceConfig make(uint32_t indexingThreads); + static ThreadingServiceConfig make(uint32_t indexingThreads, SharedFieldWriterExecutor shared_field_writer_ = SharedFieldWriterExecutor::NONE); void update(const ThreadingServiceConfig& cfg); uint32_t indexingThreads() const { return _indexingThreads; } uint32_t defaultTaskLimit() const { return _defaultTaskLimit; } OptimizeFor optimize() const { return _optimize; } uint32_t kindOfwatermark() const { return _kindOfWatermark; } vespalib::duration reactionTime() const { return _reactionTime; } + SharedFieldWriterExecutor shared_field_writer() const { return _shared_field_writer; } bool operator==(const ThreadingServiceConfig &rhs) const; }; diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 5022225fbf3..c9c44af3aa3 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -176,6 +176,7 @@ vespa_define_module( src/tests/memoryindex/compact_words_store src/tests/memoryindex/datastore src/tests/memoryindex/document_inverter + src/tests/memoryindex/document_inverter_collection src/tests/memoryindex/field_index src/tests/memoryindex/field_index_remover src/tests/memoryindex/field_inverter diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp index fba488e78aa..3889de5b4c4 100644 --- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -19,6 +19,8 @@ #include <vespa/vespalib/btree/btreenodeallocator.hpp> #include <vespa/vespalib/btree/btreeroot.hpp> #include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/util/gate.h> +#include <vespa/vespalib/util/destructor_callbacks.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/vespalib/util/sequencedtaskexecutor.h> #include <gtest/gtest.h> @@ -75,7 +77,9 @@ namespace { void myPushDocument(DocumentInverter &inv) { - inv.pushDocuments(std::shared_ptr<vespalib::IDestructorCallback>()); + vespalib::Gate gate; + inv.pushDocuments(std::make_shared<vespalib::GateCallback>(gate)); + gate.await(); } } @@ -329,9 +333,7 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire doc = make_doc10(b); inv.invertDocument(10, *doc); - invertThreads->sync_all(); myPushDocument(inv); - pushThreads->sync_all(); b.startDocument("id:ns:searchdocument::11"). startIndexField("f3"). @@ -339,9 +341,7 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire endField(); doc = b.endDocument(); inv.invertDocument(11, *doc); - invertThreads->sync_all(); myPushDocument(inv); - pushThreads->sync_all(); b.startDocument("id:ns:searchdocument::12"). startIndexField("f3"). @@ -349,9 +349,7 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire endField(); doc = b.endDocument(); inv.invertDocument(12, *doc); - invertThreads->sync_all(); myPushDocument(inv); - pushThreads->sync_all(); IndexBuilder ib(schema); vespalib::string dump2dir = prefix + "dump2"; @@ -469,9 +467,7 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng DocumentInverter inv(inv_context); inv.invertDocument(10, *make_doc10(b)); - invertThreads->sync_all(); myPushDocument(inv); - pushThreads->sync_all(); IndexBuilder ib(_schema); TuneFileIndexing tuneFileIndexing; @@ -614,7 +610,7 @@ TEST_F(FusionTest, require_that_fusion_can_be_stopped) vespalib::rmdir("stopdump3", true); flush_token = std::make_shared<MyFlushToken>(47); ASSERT_FALSE(try_merge_simple_indexes("stopdump3", {"stopdump2"}, flush_token)); - EXPECT_EQ(49, flush_token->get_checks()); + EXPECT_LT(48, flush_token->get_checks()); clean_stopped_fusion_testdirs(); } diff --git a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp index d81df4c63fe..bf9bbf1007a 100644 --- a/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp +++ b/searchlib/src/tests/memoryindex/document_inverter/document_inverter_test.cpp @@ -8,25 +8,23 @@ #include <vespa/searchlib/memoryindex/field_inverter.h> #include <vespa/searchlib/memoryindex/i_field_index_collection.h> #include <vespa/searchlib/memoryindex/word_store.h> +#include <vespa/searchlib/test/memoryindex/mock_field_index_collection.h> #include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/vespalib/util/sequencedtaskexecutor.h> #include <vespa/vespalib/gtest/gtest.h> -namespace search { +namespace search::memoryindex { using document::Document; using index::DocBuilder; +using index::FieldLengthCalculator; using index::Schema; using index::schema::CollectionType; using index::schema::DataType; using vespalib::SequencedTaskExecutor; using vespalib::ISequencedTaskExecutor; -using namespace index; - -namespace memoryindex { - namespace { Document::UP @@ -91,32 +89,6 @@ makeDoc15(DocBuilder &b) } -class MockFieldIndexCollection : public IFieldIndexCollection { - FieldIndexRemover &_remover; - test::OrderedFieldIndexInserter &_inserter; - FieldLengthCalculator &_calculator; - -public: - MockFieldIndexCollection(FieldIndexRemover &remover, - test::OrderedFieldIndexInserter &inserter, - FieldLengthCalculator &calculator) - : _remover(remover), - _inserter(inserter), - _calculator(calculator) - { - } - - FieldIndexRemover &get_remover(uint32_t) override { - return _remover; - } - IOrderedFieldIndexInserter &get_inserter(uint32_t) override { - return _inserter; - } - index::FieldLengthCalculator &get_calculator(uint32_t) override { - return _calculator; - } -}; - VESPA_THREAD_STACK_TAG(invert_executor) VESPA_THREAD_STACK_TAG(push_executor) @@ -129,7 +101,7 @@ struct DocumentInverterTest : public ::testing::Test { FieldIndexRemover _remover; test::OrderedFieldIndexInserter _inserter; FieldLengthCalculator _calculator; - MockFieldIndexCollection _fic; + test::MockFieldIndexCollection _fic; DocumentInverterContext _inv_context; DocumentInverter _inv; @@ -306,6 +278,5 @@ TEST_F(DocumentInverterTest, require_that_empty_document_can_be_inverted) } } -} GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/memoryindex/document_inverter_collection/CMakeLists.txt b/searchlib/src/tests/memoryindex/document_inverter_collection/CMakeLists.txt new file mode 100644 index 00000000000..2697e9c5626 --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_inverter_collection/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_inverter_collection_test_app TEST + SOURCES + document_inverter_collection_test.cpp + DEPENDS + searchlib_test + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_document_inverter_collection_test_app COMMAND searchlib_document_inverter_collection_test_app) diff --git a/searchlib/src/tests/memoryindex/document_inverter_collection/document_inverter_collection_test.cpp b/searchlib/src/tests/memoryindex/document_inverter_collection/document_inverter_collection_test.cpp new file mode 100644 index 00000000000..a6d675d05dc --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_inverter_collection/document_inverter_collection_test.cpp @@ -0,0 +1,93 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/index/field_length_calculator.h> +#include <vespa/searchlib/memoryindex/document_inverter.h> +#include <vespa/searchlib/memoryindex/document_inverter_collection.h> +#include <vespa/searchlib/memoryindex/document_inverter_context.h> +#include <vespa/searchlib/memoryindex/field_index_remover.h> +#include <vespa/searchlib/memoryindex/field_inverter.h> +#include <vespa/searchlib/memoryindex/i_field_index_collection.h> +#include <vespa/searchlib/memoryindex/word_store.h> +#include <vespa/searchlib/test/memoryindex/mock_field_index_collection.h> +#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> +#include <vespa/vespalib/util/retain_guard.h> +#include <vespa/vespalib/util/sequencedtaskexecutor.h> +#include <thread> + +#include <vespa/vespalib/gtest/gtest.h> + + + +namespace search::memoryindex { +using document::Document; +using index::FieldLengthCalculator; +using index::Schema; +using vespalib::RetainGuard; +using vespalib::SequencedTaskExecutor; +using vespalib::ISequencedTaskExecutor; + +VESPA_THREAD_STACK_TAG(invert_executor) +VESPA_THREAD_STACK_TAG(push_executor) + +struct DocumentInverterCollectionTest : public ::testing::Test { + Schema _schema; + std::unique_ptr<ISequencedTaskExecutor> _invertThreads; + std::unique_ptr<ISequencedTaskExecutor> _pushThreads; + WordStore _word_store; + FieldIndexRemover _remover; + test::OrderedFieldIndexInserter _inserter; + FieldLengthCalculator _calculator; + test::MockFieldIndexCollection _fic; + DocumentInverterContext _inv_context; + DocumentInverterCollection _inv_collection; + + DocumentInverterCollectionTest() + : _schema(), + _invertThreads(SequencedTaskExecutor::create(invert_executor, 4)), + _pushThreads(SequencedTaskExecutor::create(push_executor, 4)), + _word_store(), + _remover(_word_store), + _inserter(), + _calculator(), + _fic(_remover, _inserter, _calculator), + _inv_context(_schema, *_invertThreads, *_pushThreads, _fic), + _inv_collection(_inv_context, 10) + { + } + +}; + +TEST_F(DocumentInverterCollectionTest, idle_inverter_is_reused) +{ + auto& active = _inv_collection.get_active_inverter(); + for (uint32_t i = 0; i < 4; ++i) { + _inv_collection.switch_active_inverter(); + EXPECT_EQ(&active, &_inv_collection.get_active_inverter()); + } + EXPECT_EQ(1u, _inv_collection.get_num_inverters()); +} + +TEST_F(DocumentInverterCollectionTest, busy_inverter_is_not_reused) +{ + auto& active = _inv_collection.get_active_inverter(); + auto retain = std::make_shared<RetainGuard>(active.get_ref_count()); + _inv_collection.switch_active_inverter(); + EXPECT_NE(&active, &_inv_collection.get_active_inverter()); + EXPECT_EQ(2u, _inv_collection.get_num_inverters()); +} + +TEST_F(DocumentInverterCollectionTest, number_of_inverters_is_limited_by_max) +{ + for (uint32_t i = 0; i < 50; ++i) { + auto& active = _inv_collection.get_active_inverter(); + auto retain = std::make_shared<RetainGuard>(active.get_ref_count()); + _pushThreads->execute(i, [retain(std::move(retain))] () { std::this_thread::sleep_for(10ms); }); + _inv_collection.switch_active_inverter(); + } + EXPECT_LE(4u, _inv_collection.get_num_inverters()); + EXPECT_GE(_inv_collection.get_max_inverters(), _inv_collection.get_num_inverters()); +} + +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index a94e9cf5320..7b52eec78a6 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -19,6 +19,8 @@ #include <vespa/searchlib/test/memoryindex/wrap_inserter.h> #include <vespa/vespalib/btree/btreenodeallocator.hpp> #include <vespa/vespalib/btree/btreeroot.hpp> +#include <vespa/vespalib/util/gate.h> +#include <vespa/vespalib/util/destructor_callbacks.h> #include <vespa/vespalib/util/sequencedtaskexecutor.h> #include <vespa/vespalib/gtest/gtest.h> @@ -412,12 +414,12 @@ public: MyInserter::~MyInserter() = default; void -myremove(uint32_t docId, DocumentInverter &inv, - ISequencedTaskExecutor &invertThreads) +myremove(uint32_t docId, DocumentInverter &inv) { inv.removeDocument(docId); - invertThreads.sync_all(); - inv.pushDocuments(std::shared_ptr<vespalib::IDestructorCallback>()); + vespalib::Gate gate; + inv.pushDocuments(std::make_shared<vespalib::GateCallback>(gate)); + gate.await(); } class MyDrainRemoves : IFieldIndexRemoveListener { @@ -443,7 +445,9 @@ public: void myPushDocument(DocumentInverter &inv) { - inv.pushDocuments(std::shared_ptr<vespalib::IDestructorCallback>()); + vespalib::Gate gate; + inv.pushDocuments(std::make_shared<vespalib::GateCallback>(gate)); + gate.await(); } const FeatureStore * @@ -953,9 +957,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) endField(); doc = _b.endDocument(); _inv.invertDocument(10, *doc); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); _b.startDocument("id:ns:searchdocument::20"); _b.startIndexField("f0"). @@ -963,9 +965,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) endField(); doc = _b.endDocument(); _inv.invertDocument(20, *doc); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); _b.startDocument("id:ns:searchdocument::30"); _b.startIndexField("f0"). @@ -994,9 +994,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) endField(); doc = _b.endDocument(); _inv.invertDocument(30, *doc); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); _b.startDocument("id:ns:searchdocument::40"); _b.startIndexField("f0"). @@ -1005,9 +1003,7 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) endField(); doc = _b.endDocument(); _inv.invertDocument(40, *doc); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); _b.startDocument("id:ns:searchdocument::999"); _b.startIndexField("f0"). @@ -1035,12 +1031,9 @@ TEST_F(BasicInverterTest, require_that_inversion_is_working) doc = _b.endDocument(); for (uint32_t docId = 10000; docId < 20000; ++docId) { _inv.invertDocument(docId, *doc); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); } - _pushThreads->sync_all(); DataStoreBase::MemStats beforeStats = getFeatureStoreMemStats(_fic); LOG(info, "Before feature compaction: allocElems=%zu, usedElems=%zu" @@ -1152,17 +1145,13 @@ TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remo _b.startIndexField("f1").addStr("a").addStr("c").endField(); Document::UP doc1 = _b.endDocument(); _inv.invertDocument(1, *doc1.get()); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); _b.startDocument("id:ns:searchdocument::2"); _b.startIndexField("f0").addStr("b").addStr("c").endField(); Document::UP doc2 = _b.endDocument(); _inv.invertDocument(2, *doc2.get()); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); EXPECT_TRUE(assertPostingList("[1]", find("a", 0))); EXPECT_TRUE(assertPostingList("[1,2]", find("b", 0))); @@ -1170,8 +1159,7 @@ TEST_F(BasicInverterTest, require_that_inverter_handles_remove_via_document_remo EXPECT_TRUE(assertPostingList("[1]", find("a", 1))); EXPECT_TRUE(assertPostingList("[1]", find("c", 1))); - myremove(1, _inv, *_invertThreads); - _pushThreads->sync_all(); + myremove(1, _inv); EXPECT_TRUE(assertPostingList("[]", find("a", 0))); EXPECT_TRUE(assertPostingList("[2]", find("b", 0))); @@ -1321,11 +1309,8 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) endField(); doc = _b.endDocument(); _inv.invertDocument(10, *doc); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); - SimpleMatchData match_data; { uint32_t fieldId = _schema.getIndexFieldId("iu"); @@ -1397,11 +1382,8 @@ TEST_F(CjkInverterTest, require_that_cjk_indexing_is_working) endField(); doc = _b.endDocument(); _inv.invertDocument(10, *doc); - _invertThreads->sync_all(); myPushDocument(_inv); - _pushThreads->sync_all(); - SimpleMatchData match_data; uint32_t fieldId = _schema.getIndexFieldId("f0"); { @@ -1475,8 +1457,7 @@ struct RemoverTest : public FieldIndexCollectionTest { void remove(uint32_t docId) { DocumentInverterContext inv_context(schema, *_invertThreads, *_pushThreads, fic); DocumentInverter inv(inv_context); - myremove(docId, inv, *_invertThreads); - _pushThreads->sync_all(); + myremove(docId, inv); EXPECT_FALSE(fic.getFieldIndex(0u)->getDocumentRemover(). getStore().get(docId).valid()); } diff --git a/searchlib/src/vespa/searchlib/common/featureset.h b/searchlib/src/vespa/searchlib/common/featureset.h index 3bce5c92640..1415d4b48ef 100644 --- a/searchlib/src/vespa/searchlib/common/featureset.h +++ b/searchlib/src/vespa/searchlib/common/featureset.h @@ -146,5 +146,11 @@ public: const Value *getFeaturesByDocId(uint32_t docId) const; }; -} // namespace search +// An even simpler feature container. Used to pass match features around. +struct FeatureValues { + using Value = FeatureSet::Value; + std::vector<vespalib::string> names; + std::vector<Value> values; // values.size() == names.size() * N +}; +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/searchreply.cpp b/searchlib/src/vespa/searchlib/engine/searchreply.cpp index 954b3d5141c..8caf254fff7 100644 --- a/searchlib/src/vespa/searchlib/engine/searchreply.cpp +++ b/searchlib/src/vespa/searchlib/engine/searchreply.cpp @@ -12,6 +12,7 @@ SearchReply::SearchReply() groupResult(), coverage(), hits(), + match_features(), request(), my_issues() { } @@ -19,15 +20,16 @@ SearchReply::SearchReply() SearchReply::~SearchReply() = default; SearchReply::SearchReply(const SearchReply &rhs) - : _distributionKey (rhs._distributionKey), - totalHitCount(rhs.totalHitCount), - sortIndex (rhs.sortIndex), - sortData (rhs.sortData), - groupResult (rhs.groupResult), - coverage (rhs.coverage), - hits (rhs.hits), - request(), // NB not copied - my_issues() // NB not copied + : _distributionKey (rhs._distributionKey), + totalHitCount (rhs.totalHitCount), + sortIndex (rhs.sortIndex), + sortData (rhs.sortData), + groupResult (rhs.groupResult), + coverage (rhs.coverage), + hits (rhs.hits), + match_features (rhs.match_features), + request(), // NB not copied + my_issues() // NB not copied { } } diff --git a/searchlib/src/vespa/searchlib/engine/searchreply.h b/searchlib/src/vespa/searchlib/engine/searchreply.h index b23a5fc192e..25418a698c2 100644 --- a/searchlib/src/vespa/searchlib/engine/searchreply.h +++ b/searchlib/src/vespa/searchlib/engine/searchreply.h @@ -5,6 +5,7 @@ #include <vespa/document/base/globalid.h> #include <vespa/searchlib/common/hitrank.h> #include <vespa/searchlib/common/unique_issues.h> +#include <vespa/searchlib/common/featureset.h> #include <vespa/vespalib/util/array.h> #include <vespa/searchlib/engine/searchrequest.h> #include <vector> @@ -70,6 +71,7 @@ public: vespalib::Array<char> groupResult; Coverage coverage; std::vector<Hit> hits; + FeatureValues match_features; PropertiesMap propertiesMap; SearchRequest::UP request; @@ -84,4 +86,3 @@ public: }; } - diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt index 021e5f9cab8..e845201f5c6 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(searchlib_memoryindex OBJECT SOURCES compact_words_store.cpp document_inverter.cpp + document_inverter_collection.cpp document_inverter_context.cpp feature_store.cpp field_index.cpp diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp index f42cfa25877..879942ea5d7 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.cpp @@ -6,12 +6,15 @@ #include "field_inverter.h" #include "url_field_inverter.h" #include <vespa/vespalib/util/isequencedtaskexecutor.h> +#include <vespa/vespalib/util/retain_guard.h> namespace search::memoryindex { using document::Document; using index::Schema; using search::index::FieldLengthCalculator; +using vespalib::ISequencedTaskExecutor; +using vespalib::RetainGuard; DocumentInverter::DocumentInverter(DocumentInverterContext& context) : _context(context), @@ -109,19 +112,50 @@ DocumentInverter::removeDocuments(LidVector lids) } } +namespace { + +template <typename Inverter> +void push_documents_helper(ISequencedTaskExecutor& invert_threads, + ISequencedTaskExecutor& push_threads, + Inverter &inverter, + uint32_t field_id, + std::shared_ptr<vespalib::IDestructorCallback> on_write_done, + std::shared_ptr<RetainGuard> retain) +{ + auto invert_id = invert_threads.getExecutorId(field_id); + auto push_id = push_threads.getExecutorId(field_id); + invert_threads.execute(invert_id, + [&push_threads, push_id, &inverter, retain(std::move(retain)), on_write_done(std::move(on_write_done))] () mutable + { + push_threads.execute(push_id, + [&inverter, retain(std::move(retain)), on_write_done(std::move(on_write_done))]() + { + inverter.applyRemoves(); + inverter.pushDocuments(); + }); + }); +} + +} + void DocumentInverter::pushDocuments(const std::shared_ptr<vespalib::IDestructorCallback> &onWriteDone) { - uint32_t fieldId = 0; + auto retain = std::make_shared<RetainGuard>(_ref_count); + auto& schema_index_fields = _context.get_schema_index_fields(); + auto& invert_threads = _context.get_invert_threads(); auto& push_threads = _context.get_push_threads(); - for (auto &inverter : _inverters) { - push_threads.execute(fieldId,[inverter(inverter.get()), onWriteDone]() { - inverter->applyRemoves(); - inverter->pushDocuments(); - }); - ++fieldId; + for (uint32_t field_id : schema_index_fields._textFields) { + auto& inverter = *_inverters[field_id]; + push_documents_helper(invert_threads, push_threads, inverter, field_id, onWriteDone, retain); + } + uint32_t uri_field_id = 0; + for (const auto& uri_field : schema_index_fields._uriFields) { + uint32_t field_id = uri_field._all; + auto& inverter = *_urlInverters[uri_field_id]; + push_documents_helper(invert_threads, push_threads, inverter, field_id, onWriteDone, retain); + ++uri_field_id; } } } - diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h index cce6eda615d..f69d482f565 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h @@ -2,6 +2,8 @@ #pragma once +#include <vespa/vespalib/util/monitored_refcount.h> + #include <cstdint> #include <memory> #include <vector> @@ -41,6 +43,7 @@ private: std::vector<std::unique_ptr<FieldInverter>> _inverters; std::vector<std::unique_ptr<UrlFieldInverter>> _urlInverters; + vespalib::MonitoredRefCount _ref_count; public: /** @@ -92,6 +95,9 @@ public: const std::vector<std::unique_ptr<FieldInverter> > & getInverters() const { return _inverters; } uint32_t getNumFields() const { return _inverters.size(); } + void wait_for_zero_ref_count() { _ref_count.waitForZeroRefCount(); } + bool has_zero_ref_count() { return _ref_count.has_zero_ref_count(); } + vespalib::MonitoredRefCount& get_ref_count() noexcept { return _ref_count; } }; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.cpp new file mode 100644 index 00000000000..d9b27735489 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.cpp @@ -0,0 +1,45 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "document_inverter_collection.h" +#include "document_inverter.h" +#include <cassert> + +namespace search::memoryindex { + +DocumentInverterCollection::DocumentInverterCollection(DocumentInverterContext& context, uint32_t max_inverters) + : _context(context), + _free_inverters(), + _inflight_inverters(), + _active_inverter(std::make_unique<DocumentInverter>(_context)), + _num_inverters(1), + _max_inverters(max_inverters) +{ +} + +DocumentInverterCollection::~DocumentInverterCollection() = default; + +void +DocumentInverterCollection::switch_active_inverter() +{ + _inflight_inverters.emplace_back(std::move(_active_inverter)); + while (!_inflight_inverters.empty() && _inflight_inverters.front()->has_zero_ref_count()) { + _free_inverters.emplace_back(std::move(_inflight_inverters.front())); + _inflight_inverters.pop_front(); + } + if (!_free_inverters.empty()) { + _active_inverter = std::move(_free_inverters.back()); + _free_inverters.pop_back(); + return; + } + if (_num_inverters >= _max_inverters) { + assert(!_inflight_inverters.empty()); + _active_inverter = std::move(_inflight_inverters.front()); + _inflight_inverters.pop_front(); + _active_inverter->wait_for_zero_ref_count(); + return; + } + _active_inverter = std::make_unique<DocumentInverter>(_context); + ++_num_inverters; +} + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.h new file mode 100644 index 00000000000..d07cca67e08 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter_collection.h @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <deque> +#include <memory> +#include <vector> + +namespace search::memoryindex { + +class DocumentInverter; +class DocumentInverterContext; + +/* + * Class containing the document inverters used by a memory index. + */ +class DocumentInverterCollection { + DocumentInverterContext& _context; + std::vector<std::unique_ptr<DocumentInverter>> _free_inverters; + std::deque<std::unique_ptr<DocumentInverter>> _inflight_inverters; + std::unique_ptr<DocumentInverter> _active_inverter; + uint32_t _num_inverters; + uint32_t _max_inverters; +public: + DocumentInverterCollection(DocumentInverterContext& context, uint32_t max_inverters); + ~DocumentInverterCollection(); + DocumentInverter& get_active_inverter() noexcept { return *_active_inverter; } + void switch_active_inverter(); + uint32_t get_num_inverters() const noexcept { return _num_inverters; } + uint32_t get_max_inverters() const noexcept { return _max_inverters; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index 1e59d7ff83b..15cdef2f664 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "document_inverter.h" +#include "document_inverter_collection.h" #include "document_inverter_context.h" #include "field_index_collection.h" #include "memory_index.h" @@ -59,9 +60,7 @@ MemoryIndex::MemoryIndex(const Schema& schema, _pushThreads(pushThreads), _fieldIndexes(std::make_unique<FieldIndexCollection>(_schema, inspector)), _inverter_context(std::make_unique<DocumentInverterContext>(_schema, _invertThreads, _pushThreads, *_fieldIndexes)), - _inverter0(std::make_unique<DocumentInverter>(*_inverter_context)), - _inverter1(std::make_unique<DocumentInverter>(*_inverter_context)), - _inverter(_inverter0.get()), + _inverters(std::make_unique<DocumentInverterCollection>(*_inverter_context, 4)), _frozen(false), _maxDocId(0), // docId 0 is reserved _numDocs(0), @@ -88,7 +87,8 @@ MemoryIndex::insertDocument(uint32_t docId, const document::Document &doc) return; } updateMaxDocId(docId); - _inverter->invertDocument(docId, doc); + auto& inverter = _inverters->get_active_inverter(); + inverter.invertDocument(docId, doc); if (_indexedDocs.insert(docId).second) { incNumDocs(); } @@ -108,22 +108,16 @@ MemoryIndex::removeDocuments(LidVector lids) decNumDocs(); } } - _inverter->removeDocuments(std::move(lids)); + auto& inverter = _inverters->get_active_inverter(); + inverter.removeDocuments(std::move(lids)); } void MemoryIndex::commit(const std::shared_ptr<vespalib::IDestructorCallback> &onWriteDone) { - _invertThreads.sync_all(); // drain inverting into this inverter - _pushThreads.sync_all(); // drain use of other inverter - _inverter->pushDocuments(onWriteDone); - flipInverter(); -} - -void -MemoryIndex::flipInverter() -{ - _inverter = (_inverter != _inverter0.get()) ? _inverter0.get(): _inverter1.get(); + auto& inverter = _inverters->get_active_inverter(); + inverter.pushDocuments(onWriteDone); + _inverters->switch_active_inverter(); } void diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h index 1ea9f34b48c..760a4ecfb0f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h @@ -20,7 +20,7 @@ namespace document { class Document; } namespace search::memoryindex { -class DocumentInverter; +class DocumentInverterCollection; class DocumentInverterContext; class FieldIndexCollection; @@ -48,9 +48,7 @@ private: ISequencedTaskExecutor &_pushThreads; std::unique_ptr<FieldIndexCollection> _fieldIndexes; std::unique_ptr<DocumentInverterContext> _inverter_context; - std::unique_ptr<DocumentInverter> _inverter0; - std::unique_ptr<DocumentInverter> _inverter1; - DocumentInverter *_inverter; + std::unique_ptr<DocumentInverterCollection> _inverters; bool _frozen; uint32_t _maxDocId; uint32_t _numDocs; @@ -79,8 +77,6 @@ private: } } - void flipInverter(); - public: using UP = std::unique_ptr<MemoryIndex>; using SP = std::shared_ptr<MemoryIndex>; diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp index 8a6efbea080..10918a83c50 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.cpp @@ -363,6 +363,32 @@ UrlFieldInverter::removeDocument(uint32_t docId) _hostname->removeDocument(docId); } +void +UrlFieldInverter::applyRemoves() +{ + _all->applyRemoves(); + _scheme->applyRemoves(); + _host->applyRemoves(); + _port->applyRemoves(); + _path->applyRemoves(); + _query->applyRemoves(); + _fragment->applyRemoves(); + _hostname->applyRemoves(); +} + +void +UrlFieldInverter::pushDocuments() +{ + _all->pushDocuments(); + _scheme->pushDocuments(); + _host->pushDocuments(); + _port->pushDocuments(); + _path->pushDocuments(); + _query->pushDocuments(); + _fragment->pushDocuments(); + _hostname->pushDocuments(); +} + UrlFieldInverter::UrlFieldInverter(index::Schema::CollectionType collectionType, FieldInverter *all, FieldInverter *scheme, diff --git a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h index 76f694f2d8b..2dbe3c48959 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/url_field_inverter.h @@ -63,6 +63,8 @@ public: void setUseAnnotations(bool useAnnotations) { _useAnnotations = useAnnotations; } + void applyRemoves(); + void pushDocuments(); }; } diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt index 90921cb53b3..c7cc6d77ede 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt @@ -1,5 +1,6 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(searchlib_searchlib_test_memoryindex INTERFACE +vespa_add_library(searchlib_searchlib_test_memoryindex SOURCES + mock_field_index_collection.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/mock_field_index_collection.cpp b/searchlib/src/vespa/searchlib/test/memoryindex/mock_field_index_collection.cpp new file mode 100644 index 00000000000..1392a92853d --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/memoryindex/mock_field_index_collection.cpp @@ -0,0 +1,37 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "mock_field_index_collection.h" +#include "ordered_field_index_inserter.h" + +namespace search::memoryindex::test { + +MockFieldIndexCollection::MockFieldIndexCollection(FieldIndexRemover& remover, + OrderedFieldIndexInserter& inserter, + index::FieldLengthCalculator& calculator) + : _remover(remover), + _inserter(inserter), + _calculator(calculator) +{ +} + +MockFieldIndexCollection::~MockFieldIndexCollection() = default; + +FieldIndexRemover& +MockFieldIndexCollection::get_remover(uint32_t) +{ + return _remover; +} + +IOrderedFieldIndexInserter& +MockFieldIndexCollection::get_inserter(uint32_t) +{ + return _inserter; +} + +index::FieldLengthCalculator& +MockFieldIndexCollection::get_calculator(uint32_t) +{ + return _calculator; +} + +} diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/mock_field_index_collection.h b/searchlib/src/vespa/searchlib/test/memoryindex/mock_field_index_collection.h new file mode 100644 index 00000000000..0dd7127f0ae --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/memoryindex/mock_field_index_collection.h @@ -0,0 +1,32 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/memoryindex/i_field_index_collection.h> + +namespace search::index { class FieldLengthCalculator; } +namespace search::memoryindex { class FieldIndexRemover; } + +namespace search::memoryindex::test { + +class OrderedFieldIndexInserter; + +/* + * Mockup of field index collection used by unit tests. + */ +class MockFieldIndexCollection : public IFieldIndexCollection { + FieldIndexRemover& _remover; + OrderedFieldIndexInserter& _inserter; + index::FieldLengthCalculator& _calculator; + +public: + MockFieldIndexCollection(FieldIndexRemover& remover, + OrderedFieldIndexInserter& inserter, + index::FieldLengthCalculator& calculator); + ~MockFieldIndexCollection() override; + FieldIndexRemover& get_remover(uint32_t) override; + IOrderedFieldIndexInserter& get_inserter(uint32_t) override; + index::FieldLengthCalculator& get_calculator(uint32_t) override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h index 793a0c31b1c..de7023499a1 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h @@ -2,6 +2,7 @@ #pragma once +#include <vespa/searchlib/index/docidandfeatures.h> #include <vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h> #include <sstream> diff --git a/vespalib/src/vespa/vespalib/util/monitored_refcount.cpp b/vespalib/src/vespa/vespalib/util/monitored_refcount.cpp index 4376e26bb66..c89bc9d417b 100644 --- a/vespalib/src/vespa/vespalib/util/monitored_refcount.cpp +++ b/vespalib/src/vespa/vespalib/util/monitored_refcount.cpp @@ -41,4 +41,11 @@ MonitoredRefCount::waitForZeroRefCount() _cv.wait(guard, [this] { return (_refCount == 0u); }); } +bool +MonitoredRefCount::has_zero_ref_count() +{ + std::unique_lock<std::mutex> guard(_lock); + return (_refCount == 0u); +} + } diff --git a/vespalib/src/vespa/vespalib/util/monitored_refcount.h b/vespalib/src/vespa/vespalib/util/monitored_refcount.h index 465284b6fd3..101ac54cd32 100644 --- a/vespalib/src/vespa/vespalib/util/monitored_refcount.h +++ b/vespalib/src/vespa/vespalib/util/monitored_refcount.h @@ -24,6 +24,7 @@ public: MonitoredRefCount(); virtual ~MonitoredRefCount(); void waitForZeroRefCount(); + bool has_zero_ref_count(); }; } |