diff options
200 files changed, 4924 insertions, 1873 deletions
diff --git a/application/abi-spec.json b/application/abi-spec.json index c95039b4c1f..95a9d2a524a 100644 --- a/application/abi-spec.json +++ b/application/abi-spec.json @@ -89,7 +89,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -128,7 +129,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -159,7 +161,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -190,7 +193,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", diff --git a/build_settings.cmake b/build_settings.cmake index 63535062c9b..d0bb50360da 100644 --- a/build_settings.cmake +++ b/build_settings.cmake @@ -54,20 +54,16 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" ST if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin") set(VESPA_ATOMIC_LIB "") set(VESPA_GCC_LIB "") - set(VESPA_STDCXX_FS_LIB "") else() if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0) set(VESPA_GCC_LIB "gcc") - set(VESPA_STDCXX_FS_LIB "stdc++fs") else() set(VESPA_GCC_LIB "") - set(VESPA_STDCXX_FS_LIB "") endif() endif() else() set(CXX_SPECIFIC_WARN_OPTS "-Wnoexcept -Wsuggest-override -Wnon-virtual-dtor -Wformat-security -Wmismatched-tags") set(VESPA_GCC_LIB "gcc") - set(VESPA_STDCXX_FS_LIB "stdc++fs") endif() # Detect uring shared library. diff --git a/client/go/internal/cli/auth/zts/zts.go b/client/go/internal/cli/auth/zts/zts.go index 0f73ea5912d..19ea6e48b0c 100644 --- a/client/go/internal/cli/auth/zts/zts.go +++ b/client/go/internal/cli/auth/zts/zts.go @@ -80,18 +80,21 @@ func (c *Client) AccessToken() (Token, error) { return Token{}, err } defer response.Body.Close() - + b, err := io.ReadAll(response.Body) + if err != nil { + return Token{}, err + } if response.StatusCode != http.StatusOK { - return Token{}, fmt.Errorf("zts: got status %d from %s", response.StatusCode, c.tokenURL.String()) + body := string(b) + if body == "" { + body = "no body" + } + return Token{}, fmt.Errorf("zts: got status %d (%s) from %s", response.StatusCode, body, c.tokenURL.String()) } var ztsResponse struct { AccessToken string `json:"access_token"` ExpirySecs int `json:"expires_in"` } - b, err := io.ReadAll(response.Body) - if err != nil { - return Token{}, err - } if err := json.Unmarshal(b, &ztsResponse); err != nil { return Token{}, err } diff --git a/client/go/internal/cli/auth/zts/zts_test.go b/client/go/internal/cli/auth/zts/zts_test.go index 15c60ed46d7..ad1ed66f460 100644 --- a/client/go/internal/cli/auth/zts/zts_test.go +++ b/client/go/internal/cli/auth/zts/zts_test.go @@ -46,6 +46,14 @@ func TestAccessToken(t *testing.T) { } expiresAt = clock.now().Add(30 * time.Minute) assertToken(t, Token{Value: "bar", ExpiresAt: expiresAt}, token) + + // Request body is included in error + httpClient.NextResponseString(503, "broken!") + _, err = client.AccessToken() + want := "zts: got status 503 (broken!) from http://example.com/zts/v1/oauth2/token" + if got := err.Error(); got != want { + t.Errorf("got err=%q, want %q", got, want) + } } func assertToken(t *testing.T, want, got Token) { diff --git a/client/go/internal/cli/cmd/cert.go b/client/go/internal/cli/cmd/cert.go index ccfce5eb7bb..f7320e37626 100644 --- a/client/go/internal/cli/cmd/cert.go +++ b/client/go/internal/cli/cmd/cert.go @@ -114,11 +114,11 @@ func doCert(cli *CLI, overwriteCertificate, skipApplicationPackage bool, args [] if !overwriteCertificate { hint := "Use -f flag to force overwriting" - if util.PathExists(privateKeyFile) { - return errHint(fmt.Errorf("private key %s already exists", color.CyanString(privateKeyFile)), hint) + if util.PathExists(privateKeyFile.path) { + return errHint(fmt.Errorf("private key %s already exists", color.CyanString(privateKeyFile.path)), hint) } - if util.PathExists(certificateFile) { - return errHint(fmt.Errorf("certificate %s already exists", color.CyanString(certificateFile)), hint) + if util.PathExists(certificateFile.path) { + return errHint(fmt.Errorf("certificate %s already exists", color.CyanString(certificateFile.path)), hint) } } @@ -126,14 +126,14 @@ func doCert(cli *CLI, overwriteCertificate, skipApplicationPackage bool, args [] if err != nil { return err } - if err := keyPair.WriteCertificateFile(certificateFile, overwriteCertificate); err != nil { + if err := keyPair.WriteCertificateFile(certificateFile.path, overwriteCertificate); err != nil { return fmt.Errorf("could not write certificate: %w", err) } - if err := keyPair.WritePrivateKeyFile(privateKeyFile, overwriteCertificate); err != nil { + if err := keyPair.WritePrivateKeyFile(privateKeyFile.path, overwriteCertificate); err != nil { return fmt.Errorf("could not write private key: %w", err) } - cli.printSuccess("Certificate written to ", color.CyanString(certificateFile)) - cli.printSuccess("Private key written to ", color.CyanString(privateKeyFile)) + cli.printSuccess("Certificate written to ", color.CyanString(certificateFile.path)) + cli.printSuccess("Private key written to ", color.CyanString(privateKeyFile.path)) if !skipApplicationPackage { return doCertAdd(cli, overwriteCertificate, args) } diff --git a/client/go/internal/cli/cmd/config.go b/client/go/internal/cli/cmd/config.go index eb79a2004c4..0a03686dd33 100644 --- a/client/go/internal/cli/cmd/config.go +++ b/client/go/internal/cli/cmd/config.go @@ -384,24 +384,43 @@ func (c *Config) caCertificatePath() string { return c.environment["VESPA_CLI_DATA_PLANE_CA_CERT_FILE"] } -func (c *Config) certificatePath(app vespa.ApplicationID, targetType string) (string, error) { - if override, ok := c.environment["VESPA_CLI_DATA_PLANE_CERT_FILE"]; ok { - return override, nil - } - if targetType == vespa.TargetHosted { - return athenzPath("cert") - } - return c.applicationFilePath(app, "data-plane-public-cert.pem") +type credentialsFile struct { + path string + optional bool } -func (c *Config) privateKeyPath(app vespa.ApplicationID, targetType string) (string, error) { - if override, ok := c.environment["VESPA_CLI_DATA_PLANE_KEY_FILE"]; ok { - return override, nil +func (c *Config) credentialsFile(app vespa.ApplicationID, targetType string, cert bool) (credentialsFile, error) { + envVar := "VESPA_CLI_DATA_PLANE_CERT_FILE" + athenzFile := "cert" + applicationFile := "data-plane-public-cert.pem" + if !cert { + envVar = "VESPA_CLI_DATA_PLANE_KEY_FILE" + athenzFile = "key" + applicationFile = "data-plane-private-key.pem" + } + if override, ok := c.environment[envVar]; ok { + return credentialsFile{override, false}, nil } if targetType == vespa.TargetHosted { - return athenzPath("key") + path, err := athenzPath(athenzFile) + if err != nil { + return credentialsFile{}, err + } + return credentialsFile{path, false}, nil } - return c.applicationFilePath(app, "data-plane-private-key.pem") + path, err := c.applicationFilePath(app, applicationFile) + if err != nil { + return credentialsFile{}, err + } + return credentialsFile{path, true}, nil +} + +func (c *Config) certificatePath(app vespa.ApplicationID, targetType string) (credentialsFile, error) { + return c.credentialsFile(app, targetType, true) +} + +func (c *Config) privateKeyPath(app vespa.ApplicationID, targetType string) (credentialsFile, error) { + return c.credentialsFile(app, targetType, false) } func (c *Config) readTLSOptions(app vespa.ApplicationID, targetType string) (vespa.TLSOptions, error) { @@ -413,16 +432,13 @@ func (c *Config) readTLSOptions(app vespa.ApplicationID, targetType string) (ves // CA certificate if caCertOk { options.CACertificate = []byte(caCertText) - } else { - caCertFile := c.caCertificatePath() - if caCertFile != "" { - b, err := os.ReadFile(caCertFile) - if err != nil { - return options, err - } - options.CACertificate = b - options.CACertificateFile = caCertFile + } else if caCertFile := c.caCertificatePath(); caCertFile != "" { + b, err := os.ReadFile(caCertFile) + if err != nil { + return options, err } + options.CACertificate = b + options.CACertificateFile = caCertFile } // Certificate and private key if certOk && keyOk { @@ -440,15 +456,17 @@ func (c *Config) readTLSOptions(app vespa.ApplicationID, targetType string) (ves if err != nil { return vespa.TLSOptions{}, err } - kp, err := tls.LoadX509KeyPair(certFile, keyFile) + kp, err := tls.LoadX509KeyPair(certFile.path, keyFile.path) + allowMissing := os.IsNotExist(err) && keyFile.optional && certFile.optional if err == nil { options.KeyPair = []tls.Certificate{kp} - options.PrivateKeyFile = keyFile - options.CertificateFile = certFile - } else if err != nil && !os.IsNotExist(err) { + options.PrivateKeyFile = keyFile.path + options.CertificateFile = certFile.path + } else if err != nil && !allowMissing { return vespa.TLSOptions{}, err } } + // If we found a key pair, parse it and check expiry if options.KeyPair != nil { cert, err := x509.ParseCertificate(options.KeyPair[0].Certificate[0]) if err != nil { diff --git a/client/go/internal/cli/cmd/config_test.go b/client/go/internal/cli/cmd/config_test.go index 14a3cf7cbbc..b00be38d021 100644 --- a/client/go/internal/cli/cmd/config_test.go +++ b/client/go/internal/cli/cmd/config_test.go @@ -253,6 +253,12 @@ func TestConfigReadTLSOptions(t *testing.T) { PrivateKeyFile: defaultKeyFile, }, ) + + // Key pair files specified through environment are required + nonExistentFile := filepath.Join(homeDir, "non-existent-file") + cli, _, _ := newTestCLI(t, "VESPA_CLI_DATA_PLANE_CERT_FILE="+nonExistentFile, "VESPA_CLI_DATA_PLANE_KEY_FILE="+nonExistentFile) + _, err := cli.config.readTLSOptions(app, vespa.TargetLocal) + assert.True(t, os.IsNotExist(err)) } func TestConfigTargetResolving(t *testing.T) { diff --git a/client/go/internal/cli/cmd/prod.go b/client/go/internal/cli/cmd/prod.go index 14fbae68b17..3b37197340f 100644 --- a/client/go/internal/cli/cmd/prod.go +++ b/client/go/internal/cli/cmd/prod.go @@ -102,8 +102,17 @@ https://cloud.vespa.ai/en/reference/deployment`, } } +type prodDeployOptions struct { + copyCert bool + risk int + commit string + description string + authorEmail string + sourceURL string +} + func newProdDeployCmd(cli *CLI) *cobra.Command { - copyCert := false + var options prodDeployOptions cmd := &cobra.Command{ Use: "deploy", Aliases: []string{"submit"}, // TODO: Remove in Vespa 9 @@ -118,7 +127,9 @@ services.xml. For more information about production deployments in Vespa Cloud see: https://cloud.vespa.ai/en/production-deployment -https://cloud.vespa.ai/en/automated-deployments`, +https://cloud.vespa.ai/en/automated-deployments +https://cloud.vespa.ai/en/reference/vespa-cloud-api#submission-properties +`, DisableAutoGenTag: true, SilenceUsage: true, Example: `$ mvn package # when adding custom Java components @@ -142,21 +153,33 @@ $ vespa prod deploy`, if err := verifyTests(cli, pkg); err != nil { return err } - opts := vespa.DeploymentOptions{ApplicationPackage: pkg, Target: target} - if err := maybeCopyCertificate(copyCert, true, cli, target, pkg); err != nil { + if err := maybeCopyCertificate(options.copyCert, true, cli, target, pkg); err != nil { return err } - if err := vespa.Submit(opts); err != nil { + deployment := vespa.DeploymentOptions{ApplicationPackage: pkg, Target: target} + submission := vespa.Submission{ + Risk: options.risk, + Commit: options.commit, + Description: options.description, + AuthorEmail: options.authorEmail, + SourceURL: options.sourceURL, + } + if err := vespa.Submit(deployment, submission); err != nil { return fmt.Errorf("could not deploy application: %w", err) } else { cli.printSuccess("Deployed ", color.CyanString(pkg.Path)) log.Printf("See %s for deployment progress\n", color.CyanString(fmt.Sprintf("%s/tenant/%s/application/%s/prod/deployment", - opts.Target.Deployment().System.ConsoleURL, opts.Target.Deployment().Application.Tenant, opts.Target.Deployment().Application.Application))) + deployment.Target.Deployment().System.ConsoleURL, deployment.Target.Deployment().Application.Tenant, deployment.Target.Deployment().Application.Application))) } return nil }, } - cmd.Flags().BoolVarP(©Cert, "add-cert", "A", false, `Copy certificate of the configured application to the current application package`) + cmd.Flags().BoolVarP(&options.copyCert, "add-cert", "A", false, "Copy certificate of the configured application to the current application package (default false)") + cmd.Flags().IntVarP(&options.risk, "risk", "", 0, "The risk score of source code being deployed. 0 to ignore (default 0)") + cmd.Flags().StringVarP(&options.commit, "commit", "", "", "Identifier of the source code being deployed. For example a commit hash") + cmd.Flags().StringVarP(&options.description, "description", "", "", "Description of the source code being deployed. For example a git commit message") + cmd.Flags().StringVarP(&options.authorEmail, "author-email", "", "", "Email of the author of the commit being deployed") + cmd.Flags().StringVarP(&options.sourceURL, "source-url", "", "", "URL which points to the source code being deployed. For example the build job running the submission") return cmd } diff --git a/client/go/internal/vespa/deploy.go b/client/go/internal/vespa/deploy.go index d04b8ba631c..ae4d4678d66 100644 --- a/client/go/internal/vespa/deploy.go +++ b/client/go/internal/vespa/deploy.go @@ -51,6 +51,14 @@ type DeploymentOptions struct { Version version.Version } +type Submission struct { + Risk int `json:"risk,omitempty"` + Commit string `json:"commit,omitempty"` + Description string `json:"description,omitempty"` + AuthorEmail string `json:"authorEmail,omitempty"` + SourceURL string `json:"sourceUrl,omitempty"` +} + type LogLinePrepareResponse struct { Time int64 Level string @@ -247,7 +255,7 @@ func copyToPart(dst *multipart.Writer, src io.Reader, fieldname, filename string return nil } -func Submit(opts DeploymentOptions) error { +func Submit(opts DeploymentOptions, submission Submission) error { if !opts.Target.IsCloud() { return fmt.Errorf("%s: deploy is unsupported by %s target", opts, opts.Target.Type()) } @@ -261,7 +269,11 @@ func Submit(opts DeploymentOptions) error { } var body bytes.Buffer writer := multipart.NewWriter(&body) - if err := copyToPart(writer, strings.NewReader("{}"), "submitOptions", ""); err != nil { + submitOptions, err := json.Marshal(submission) + if err != nil { + return err + } + if err := copyToPart(writer, bytes.NewReader(submitOptions), "submitOptions", ""); err != nil { return err } applicationZip, err := opts.ApplicationPackage.zipReader(false) diff --git a/client/go/internal/vespa/deploy_test.go b/client/go/internal/vespa/deploy_test.go index ddb500d26e3..39a9f2bcdf2 100644 --- a/client/go/internal/vespa/deploy_test.go +++ b/client/go/internal/vespa/deploy_test.go @@ -69,6 +69,41 @@ func TestDeployCloud(t *testing.T) { assert.Equal(t, string(values["deployOptions"]), `{"vespaVersion":"1.2.3"}`) } +func TestSubmit(t *testing.T) { + httpClient := mock.HTTPClient{} + target := createCloudTarget(t, "http://vespacloud", io.Discard) + cloudTarget, ok := target.(*cloudTarget) + require.True(t, ok) + cloudTarget.httpClient = &httpClient + appDir, _ := mock.ApplicationPackageDir(t, false, true) + opts := DeploymentOptions{ + Target: target, + ApplicationPackage: ApplicationPackage{Path: appDir}, + } + httpClient.NextResponseString(200, "ok") + require.Nil(t, Submit(opts, Submission{})) + require.Nil(t, httpClient.LastRequest.ParseMultipartForm(1<<20)) + assert.Equal(t, "{}", httpClient.LastRequest.FormValue("submitOptions")) + f, err := httpClient.LastRequest.MultipartForm.File["applicationZip"][0].Open() + require.Nil(t, err) + defer f.Close() + contents := make([]byte, 5) + f.Read(contents) + assert.Equal(t, "PK\x03\x04\x14", string(contents)) + + require.Nil(t, Submit(opts, Submission{ + Risk: 1, + Commit: "sha", + Description: "broken garbage", + AuthorEmail: "foo@example.com", + SourceURL: "https://github.com/foo/repo", + })) + require.Nil(t, httpClient.LastRequest.ParseMultipartForm(1<<20)) + assert.Equal(t, + "{\"risk\":1,\"commit\":\"sha\",\"description\":\"broken garbage\",\"authorEmail\":\"foo@example.com\",\"sourceUrl\":\"https://github.com/foo/repo\"}", + httpClient.LastRequest.FormValue("submitOptions")) +} + func TestApplicationFromString(t *testing.T) { app, err := ApplicationFromString("t1.a1.i1") assert.Nil(t, err) diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index c9f5cfeb9c8..8453fb3450c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -13,9 +13,13 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -59,7 +63,7 @@ public class NodeStateChangeChecker { this.clusterInfo = cluster.clusterInfo(); this.inMoratorium = inMoratorium; this.maxNumberOfGroupsAllowedToBeDown = cluster.maxNumberOfGroupsAllowedToBeDown(); - if ( ! groupVisiting.isHierarchical() && maxNumberOfGroupsAllowedToBeDown > 1) + if ( ! isGroupedSetup() && maxNumberOfGroupsAllowedToBeDown > 1) throw new IllegalArgumentException("Cannot have both 1 group and maxNumberOfGroupsAllowedToBeDown > 1"); } @@ -153,16 +157,22 @@ public class NodeStateChangeChecker { if (result.notAllowed()) return result; - if (maxNumberOfGroupsAllowedToBeDown == -1) { - result = checkIfAnotherNodeInAnotherGroupHasWantedState(nodeInfo); + if (isGroupedSetup()) { + if (maxNumberOfGroupsAllowedToBeDown == -1) { + result = checkIfAnotherNodeInAnotherGroupHasWantedState(nodeInfo); + if (result.notAllowed()) + return result; + if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) + return allow(); + } else { + var optionalResult = checkIfOtherNodesHaveWantedState(nodeInfo, newDescription, clusterState); + if (optionalResult.isPresent()) + return optionalResult.get(); + } + } else { + result = otherNodeHasWantedState(nodeInfo); if (result.notAllowed()) return result; - if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription)) - return allow(); - } else { - var optionalResult = checkIfOtherNodesHaveWantedState(nodeInfo, newDescription, clusterState); - if (optionalResult.isPresent()) - return optionalResult.get(); } if (nodeIsDown(clusterState, nodeInfo)) { @@ -185,6 +195,10 @@ public class NodeStateChangeChecker { return allow(); } + private boolean isGroupedSetup() { + return groupVisiting.isHierarchical(); + } + /** Refuse to override whatever an operator or unknown entity is doing. */ private static Result checkIfStateSetWithDifferentDescription(NodeInfo nodeInfo, String newDescription) { State oldWantedState = nodeInfo.getUserWantedState().getState(); @@ -196,82 +210,88 @@ public class NodeStateChangeChecker { } /** - * Returns a disallow-result if there is another node (in another group, if hierarchical) - * that has a wanted state != UP. We disallow more than 1 suspended node/group at a time. + * Returns a disallow-result if there is another node in another group + * that has a wanted state != UP. We disallow more than 1 suspended group at a time. */ private Result checkIfAnotherNodeInAnotherGroupHasWantedState(StorageNodeInfo nodeInfo) { - if (groupVisiting.isHierarchical()) { - SettableOptional<Result> anotherNodeHasWantedState = new SettableOptional<>(); - - groupVisiting.visit(group -> { - if (!groupContainsNode(group, nodeInfo.getNode())) { - Result result = otherNodeInGroupHasWantedState(group); - if (result.notAllowed()) { - anotherNodeHasWantedState.set(result); - // Have found a node that is suspended, halt the visiting - return false; - } + SettableOptional<Result> anotherNodeHasWantedState = new SettableOptional<>(); + groupVisiting.visit(group -> { + if (! groupContainsNode(group, nodeInfo.getNode())) { + Result result = otherNodeInGroupHasWantedState(group); + if (result.notAllowed()) { + anotherNodeHasWantedState.set(result); + // Have found a node that is suspended, halt the visiting + return false; } + } - return true; - }); + return true; + }); - return anotherNodeHasWantedState.asOptional().orElseGet(Result::allow); - } else { - // Returns a disallow-result if there is another node with a wanted state - return otherNodeHasWantedState(nodeInfo); - } + return anotherNodeHasWantedState.asOptional().orElseGet(Result::allow); } /** * Returns an optional Result, where return value is: - * For flat setup: Return Optional.of(disallowed) if wanted state is set on some node, else Optional.empty - * For hierarchical setup: No wanted state for other nodes, return Optional.empty - * Wanted state for nodes/groups are not UP: - * if less than maxNumberOfGroupsAllowedToBeDown: return Optional.of(allowed) - * else: if node is in group with nodes already down: return Optional.of(allowed), else Optional.of(disallowed) + * - No wanted state for other nodes, return Optional.empty + * - Wanted state for nodes/groups are not UP: + * - if less than maxNumberOfGroupsAllowedToBeDown: return Optional.of(allowed) + * else: if node is in group with nodes already down: return Optional.of(allowed), else Optional.of(disallowed) */ private Optional<Result> checkIfOtherNodesHaveWantedState(StorageNodeInfo nodeInfo, String newDescription, ClusterState clusterState) { Node node = nodeInfo.getNode(); - if (groupVisiting.isHierarchical()) { - Set<Integer> groupsWithNodesWantedStateNotUp = groupsWithUserWantedStateNotUp(); - if (groupsWithNodesWantedStateNotUp.size() == 0) { - log.log(FINE, "groupsWithNodesWantedStateNotUp=0"); - return Optional.empty(); - } + Set<Integer> groupsWithNodesWantedStateNotUp = groupsWithUserWantedStateNotUp(); + if (groupsWithNodesWantedStateNotUp.size() == 0) { + log.log(FINE, "groupsWithNodesWantedStateNotUp=0"); + return Optional.empty(); + } - Set<Integer> groupsWithSameStateAndDescription = groupsWithSameStateAndDescription(MAINTENANCE, newDescription); - if (aGroupContainsNode(groupsWithSameStateAndDescription, node)) { - log.log(FINE, "Node is in group with same state and description, allow"); - return Optional.of(allow()); - } - // There are groups with nodes not up, but with another description, probably operator set - if (groupsWithSameStateAndDescription.size() == 0) { - return Optional.of(disallow("Wanted state already set for another node in groups: " + - sortSetIntoList(groupsWithNodesWantedStateNotUp))); - } + Set<Integer> groupsWithSameStateAndDescription = groupsWithSameStateAndDescription(MAINTENANCE, newDescription); + if (aGroupContainsNode(groupsWithSameStateAndDescription, node)) { + log.log(FINE, "Node is in group with same state and description, allow"); + return Optional.of(allow()); + } + // There are groups with nodes not up, but with another description, probably operator set + if (groupsWithSameStateAndDescription.size() == 0) { + return Optional.of(disallow("Wanted state already set for another node in groups: " + + sortSetIntoList(groupsWithNodesWantedStateNotUp))); + } - Set<Integer> retiredAndNotUpGroups = groupsWithNotRetiredAndNotUp(clusterState); - int numberOfGroupsToConsider = retiredAndNotUpGroups.size(); - // Subtract one group if node is in a group with nodes already retired or not up, since number of such groups will - // not increase if we allow node to go down - if (aGroupContainsNode(retiredAndNotUpGroups, node)) { - numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; - } - if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { - log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); - return Optional.of(allow()); - } + Set<Integer> retiredAndNotUpGroups = groupsWithNotRetiredAndNotUp(clusterState); + int numberOfGroupsToConsider = retiredAndNotUpGroups.size(); + // Subtract one group if node is in a group with nodes already retired or not up, since number of such groups will + // not increase if we allow node to go down + if (aGroupContainsNode(retiredAndNotUpGroups, node)) { + numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1; + } - return Optional.of(disallow(String.format("At most %d groups can have wanted state: %s", - maxNumberOfGroupsAllowedToBeDown, - sortSetIntoList(retiredAndNotUpGroups)))); - } else { - // Return a disallow-result if there is another node with a wanted state - var otherNodeHasWantedState = otherNodeHasWantedState(nodeInfo); - if (otherNodeHasWantedState.notAllowed()) - return Optional.of(otherNodeHasWantedState); + var result = checkRedundancy(retiredAndNotUpGroups, clusterState); + if (result.isPresent() && result.get().notAllowed()) + return result; + + if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) { + log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups); + return Optional.of(allow()); + } + + return Optional.of(disallow(String.format("At most %d groups can have wanted state: %s", + maxNumberOfGroupsAllowedToBeDown, + sortSetIntoList(retiredAndNotUpGroups)))); + } + + // Check redundancy for nodes seen from all distributors that are UP in cluster state for + // storage nodes that are in groups that should be UP + private Optional<Result> checkRedundancy(Set<Integer> retiredAndNotUpGroups, ClusterState clusterState) { + Set<Integer> indexesToCheck = new HashSet<>(); + retiredAndNotUpGroups.forEach(index -> getNodesInGroup(index).forEach(node -> indexesToCheck.add(node.index()))); + + for (var distributorNodeInfo : clusterInfo.getDistributorNodeInfos()) { + if (clusterState.getNodeState(distributorNodeInfo.getNode()).getState() != UP) continue; + + var r = checkRedundancySeenFromDistributor(distributorNodeInfo, indexesToCheck); + if (r.notAllowed()) + return Optional.of(r); } return Optional.empty(); } @@ -396,26 +416,56 @@ public class NodeStateChangeChecker { } private Result checkRedundancy(DistributorNodeInfo distributorNodeInfo, Node node) { - List<StorageNode> storageNodes = distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes(); - for (StorageNode storageNode : storageNodes) { - if (storageNode.getIndex() == node.getIndex()) { - Integer minReplication = storageNode.getMinCurrentReplicationFactorOrNull(); - // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system). - if (minReplication != null && minReplication < requiredRedundancy) { - return disallow("Distributor " + distributorNodeInfo.getNodeIndex() - + " says storage node " + node.getIndex() - + " has buckets with redundancy as low as " - + storageNode.getMinCurrentReplicationFactorOrNull() - + ", but we require at least " + requiredRedundancy); - } else { - return allow(); - } + Integer minReplication = minReplication(distributorNodeInfo).get(node.getIndex()); + return verifyRedundancy(distributorNodeInfo, minReplication, node.getIndex()); + } + + private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set<Integer> indexesToCheck) { + Map<Integer, Integer> replication = new LinkedHashMap<>(minReplication(distributorNodeInfo)); + + Integer minReplication = null; + Integer minReplicationIndex = null; + for (var entry : replication.entrySet()) { + Integer value = entry.getValue(); + Integer nodeIndex = entry.getKey(); + if ( ! indexesToCheck.contains(nodeIndex)) continue; + if (minReplication == null || (value != null && value < minReplication)) { + minReplication = value; + if (minReplication == null) continue; + + minReplicationIndex = nodeIndex; + if (minReplication < requiredRedundancy) break; } } + return verifyRedundancy(distributorNodeInfo, minReplication, minReplicationIndex); + } + + private Result verifyRedundancy(DistributorNodeInfo distributorNodeInfo, Integer minReplication, Integer minReplicationIndex) { + // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system). + if (minReplication != null && minReplication < requiredRedundancy) { + return disallow("Distributor " + distributorNodeInfo.getNodeIndex() + + " says storage node " + minReplicationIndex + + " has buckets with redundancy as low as " + + minReplication + ", but we require at least " + requiredRedundancy); + } + return allow(); } + // Replication per storage node index + private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) { + Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>(); + for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) { + var currentValue = replicationPerNodeIndex.get(storageNode.getIndex()); + Integer minReplicationFactor = storageNode.getMinCurrentReplicationFactorOrNull(); + if (currentValue == null || (minReplicationFactor != null && minReplicationFactor < currentValue)) + replicationPerNodeIndex.put(storageNode.getIndex(), minReplicationFactor); + } + + return replicationPerNodeIndex; + } + /** * We want to check with the distributors to verify that it is safe to take down the storage node. * @param node the node to be checked @@ -456,6 +506,16 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private Group groupForThisIndex(int groupIndex) { + return clusterInfo.getAllNodeInfos().stream() + .map(NodeInfo::getGroup) + .filter(Objects::nonNull) + .filter(Group::isLeafGroup) + .filter(group -> group.getIndex() == groupIndex) + .findFirst() + .orElseThrow(); + } + // groups with at least one node with the same state & description private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) { return clusterInfo.getAllNodeInfos().stream() @@ -485,6 +545,10 @@ public class NodeStateChangeChecker { .collect(Collectors.toSet()); } + private List<ConfiguredNode> getNodesInGroup(int groupIndex) { + return groupForThisIndex(groupIndex).getNodes(); + } + public static class Result { public enum Action { diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 7b20fcf694a..b73ee86251f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.log.LogSetup; import com.yahoo.vdslib.distribution.ConfiguredNode; import com.yahoo.vdslib.distribution.Distribution; import com.yahoo.vdslib.state.ClusterState; @@ -275,25 +276,8 @@ public class NodeStateChangeCheckerTest { assertEquals("At most 2 groups can have wanted state: [0, 1]", result.reason()); } - // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance, - // try to set storage node 4 in group 2 to maintenance - /* WIP - { - ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .2.s:m .3.s:m", currentClusterStateVersion)); - setStorageNodeWantedState(cluster, 0, UP, ""); - setStorageNodeWantedState(cluster, 1, UP, ""); - int nodeIndex = 4; - Node node = new Node(STORAGE, nodeIndex); - Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); - assertFalse(result.settingWantedStateIsAllowed(), result.toString()); - assertFalse(result.wantedStateAlreadySet()); - assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason()); - } - - */ - // 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description - // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed + // (set in maintenance by operator), try to set storage node 2 in group 1 to maintenance, should be allowed { ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion)); setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description @@ -305,6 +289,29 @@ public class NodeStateChangeCheckerTest { assertFalse(result.isAlreadySet()); } + // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance, + // try to set storage node 4 in group 2 to maintenance + { + setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator"); + setStorageNodeWantedState(cluster, 1, MAINTENANCE, "Orchestrator"); + setStorageNodeWantedState(cluster, 2, UP, ""); // Set up again + setStorageNodeWantedState(cluster, 3, UP, ""); // Set up again + ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion)); + + // Set bucket in sync to 1 for node 2 in group 1 + var distributorHostInfo = createDistributorHostInfo(1, 2, 1); + cluster.clusterInfo().getDistributorNodeInfo(0).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + cluster.clusterInfo().getDistributorNodeInfo(1).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + cluster.clusterInfo().getDistributorNodeInfo(2).setHostInfo(HostInfo.createHostInfo(distributorHostInfo)); + + int nodeIndex = 2; + Node node = new Node(STORAGE, nodeIndex); + Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.allowed(), result.toString()); + assertFalse(result.isAlreadySet()); + assertEquals("Distributor 0 says storage node 0 has buckets with redundancy as low as 1, but we require at least 4", result.reason()); + } + } @ParameterizedTest diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index d5a1b832aba..e9eb15592a2 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -118,29 +118,15 @@ public interface ModelContext { @ModelFeatureFlag(owners = {"vekterli", "havardpe"}) default boolean enableConditionalPutRemoveWriteRepair() { return false; } @ModelFeatureFlag(owners = {"mortent", "olaa"}) default boolean enableDataplaneProxy() { return false; } @ModelFeatureFlag(owners = {"baldersheim"}) default boolean enableNestedMultivalueGrouping() { return false; } + @ModelFeatureFlag(owners = {"jonmv"}) default boolean useReconfigurableDispatcher() { return false; } - //Below are all flags that must be kept until 7 is out of the door + // Below are all flags that must be kept until 7 is out of the door and implementations and/or default flag values are in sync with what is defined here. @ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean ignoreThreadStackSizes() { return false; } - @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default boolean useThreePhaseUpdates() { return true; } @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean skipCommunicationManagerThread() { return true; } @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean skipMbusRequestThread() { return true; } @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean skipMbusReplyThread() { return true; } @ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean useQrserverServiceName() { return true; } @ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean avoidRenamingSummaryFeatures() { return false; } - @ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean experimentalSdParsing() { return true; } // TODO: Remove after June 2022 - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean enableBitVectors() { return true; } - @ModelFeatureFlag(owners = {"bjorncs"}, removeAfter="7.last") default boolean enableServerOcspStapling() { return true; } - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int defaultPoolNumThreads() { return 1; } - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int availableProcessors() { return 1; } - @ModelFeatureFlag(owners = {"vekterli", "geirst"}, removeAfter="7.last") default boolean unorderedMergeChaining() { return true; } - @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default String mergeThrottlingPolicy() { return "STATIC"; } - @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default double persistenceThrottlingWsDecrementFactor() { return 1.2; } - @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default double persistenceThrottlingWsBackoff() { return 0.95; } - @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default int persistenceThrottlingWindowSize() { return -1; } - @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default double persistenceThrottlingWsResizeRate() { return 3; } - @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default boolean persistenceThrottlingOfMergeFeedOps() { return true; } - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int maxConcurrentMergesPerNode() { return 16; } - @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int maxMergeQueueSize() { return 100; } } /** Warning: As elsewhere in this package, do not make backwards incompatible changes that will break old config models! */ diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java index ea4988f3029..41bbf5e1b6a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java @@ -101,9 +101,9 @@ public class NodesSpecification { this.hasCountAttribute = hasCountAttribute; } - private static NodesSpecification create(boolean dedicated, boolean canFail, Version version, - ModelElement nodesElement, Optional<DockerImage> dockerImageRepo, - Optional<CloudAccount> cloudAccount) { + static NodesSpecification create(boolean dedicated, boolean canFail, Version version, + ModelElement nodesElement, Optional<DockerImage> dockerImageRepo, + Optional<CloudAccount> cloudAccount) { var resolvedElement = resolveElement(nodesElement); var combinedId = findCombinedId(nodesElement, resolvedElement); var resourceConstraints = toResourceConstraints(resolvedElement); @@ -126,8 +126,13 @@ public class NodesSpecification { var nodes = rangeFrom(nodesElement, "count"); var groups = rangeFrom(nodesElement, "groups"); var groupSize = rangeFrom(nodesElement, "group-size"); - int defaultMaxGroups = groupSize.isEmpty() ? 1 : nodes.to().orElse(1); // Don't constrain the number of groups if group size is set - var min = new ClusterResources(nodes.from().orElse(1), groups.from().orElse(1), nodeResources(nodesElement).getFirst()); + + // Find the tightest possible limits for groups to avoid falsely concluding we are autoscaling + // when only specifying group size + int defaultMinGroups = nodes.from().orElse(1) / groupSize.to().orElse(nodes.from().orElse(1)); + int defaultMaxGroups = groupSize.isEmpty() ? 1 : nodes.to().orElse(1) / groupSize.from().orElse(1); + + var min = new ClusterResources(nodes.from().orElse(1), groups.from().orElse(defaultMinGroups), nodeResources(nodesElement).getFirst()); var max = new ClusterResources(nodes.to().orElse(1), groups.to().orElse(defaultMaxGroups), nodeResources(nodesElement).getSecond()); return new ResourceConstraints(min, max, groupSize); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java index 5fa893e9599..ae60ed77a7a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java @@ -2,8 +2,6 @@ package com.yahoo.vespa.model.container.http.ssl; import com.yahoo.jdisc.http.ConnectorConfig; -import com.yahoo.jdisc.http.ssl.impl.CloudSslContextProvider; -import com.yahoo.jdisc.http.ssl.impl.ConfiguredSslContextFactoryProvider; import java.util.Optional; @@ -17,8 +15,6 @@ import static com.yahoo.jdisc.http.ConnectorConfig.Ssl.ClientAuth; */ public class CloudSslProvider extends SslProvider { public static final String COMPONENT_ID_PREFIX = "configured-ssl-provider@"; - public static final String MTLSONLY_COMPONENT_CLASS = ConfiguredSslContextFactoryProvider.class.getName(); - public static final String TOKEN_COMPONENT_CLASS = CloudSslContextProvider.class.getName(); private final String privateKey; private final String certificate; @@ -26,8 +22,9 @@ public class CloudSslProvider extends SslProvider { private final String caCertificate; private final ClientAuth.Enum clientAuthentication; - public CloudSslProvider(String servername, String privateKey, String certificate, String caCertificatePath, String caCertificate, ClientAuth.Enum clientAuthentication, boolean enableTokenSupport) { - super(COMPONENT_ID_PREFIX, servername, componentClass(enableTokenSupport), null); + public CloudSslProvider(String servername, String privateKey, String certificate, String caCertificatePath, + String caCertificate, ClientAuth.Enum clientAuthentication) { + super(COMPONENT_ID_PREFIX, servername, "com.yahoo.jdisc.http.ssl.impl.CloudSslContextProvider", null); this.privateKey = privateKey; this.certificate = certificate; this.caCertificatePath = caCertificatePath; @@ -35,10 +32,6 @@ public class CloudSslProvider extends SslProvider { this.clientAuthentication = clientAuthentication; } - private static String componentClass(boolean enableTokenSupport) { - return enableTokenSupport ? TOKEN_COMPONENT_CLASS : MTLSONLY_COMPONENT_CLASS; - } - @Override public void amendConnectorConfig(ConnectorConfig.Builder builder) { builder.ssl.enabled(true); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java index 5bf348e5bb5..4f11611541d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.http.ssl; import com.yahoo.config.model.api.EndpointCertificateSecrets; import com.yahoo.jdisc.http.ConnectorConfig; -import com.yahoo.jdisc.http.ConnectorConfig.Ssl.ClientAuth; import com.yahoo.security.tls.TlsContext; import com.yahoo.vespa.model.container.http.ConnectorFactory; @@ -18,96 +17,71 @@ import java.util.List; */ public class HostedSslConnectorFactory extends ConnectorFactory { - private static final List<String> INSECURE_WHITELISTED_PATHS = List.of("/status.html"); - private static final String DEFAULT_HOSTED_TRUSTSTORE = "/opt/yahoo/share/ssl/certs/athenz_certificate_bundle.pem"; - - private final boolean enforceClientAuth; - private final boolean enforceHandshakeClientAuth; - private final Collection<String> tlsCiphersOverride; + boolean requireTlsClientAuthDuringTlsHandshake; + private final List<String> tlsCiphersOverride; private final boolean enableProxyProtocolMixedMode; private final Duration endpointConnectionTtl; - /** - * Create connector factory that uses a certificate provided by the config-model / configserver and default hosted Vespa truststore. - */ - public static HostedSslConnectorFactory withProvidedCertificate( - String serverName, EndpointCertificateSecrets endpointCertificateSecrets, boolean enforceHandshakeClientAuth, - Collection<String> tlsCiphersOverride, boolean enableProxyProtocolMixedMode, int port, - Duration endpointConnectionTtl, boolean enableTokenSupport) { - CloudSslProvider sslProvider = createConfiguredDirectSslProvider( - serverName, endpointCertificateSecrets, DEFAULT_HOSTED_TRUSTSTORE, /*tlsCaCertificates*/null, enforceHandshakeClientAuth, enableTokenSupport); - return new HostedSslConnectorFactory(sslProvider, false, enforceHandshakeClientAuth, tlsCiphersOverride, - enableProxyProtocolMixedMode, port, endpointConnectionTtl); - } - - /** - * Create connector factory that uses a certificate provided by the config-model / configserver and a truststore configured by the application. - */ - public static HostedSslConnectorFactory withProvidedCertificateAndTruststore( - String serverName, EndpointCertificateSecrets endpointCertificateSecrets, String tlsCaCertificates, - Collection<String> tlsCiphersOverride, boolean enableProxyProtocolMixedMode, int port, - Duration endpointConnectionTtl, boolean enableTokenSupport) { - CloudSslProvider sslProvider = createConfiguredDirectSslProvider( - serverName, endpointCertificateSecrets, /*tlsCaCertificatesPath*/null, tlsCaCertificates, false, enableTokenSupport); - return new HostedSslConnectorFactory(sslProvider, true, false, tlsCiphersOverride, enableProxyProtocolMixedMode, - port, endpointConnectionTtl); - } - - /** - * Create connector factory that uses the default certificate and truststore provided by Vespa (through Vespa-global TLS configuration). - */ - public static HostedSslConnectorFactory withDefaultCertificateAndTruststore(String serverName, Collection<String> tlsCiphersOverride, - boolean enableProxyProtocolMixedMode, int port, - Duration endpointConnectionTtl) { - return new HostedSslConnectorFactory(new DefaultSslProvider(serverName), true, false, tlsCiphersOverride, - enableProxyProtocolMixedMode, port, endpointConnectionTtl); - } + public static Builder builder(String name, int listenPort) { return new Builder(name, listenPort); } - private HostedSslConnectorFactory(SslProvider sslProvider, boolean enforceClientAuth, - boolean enforceHandshakeClientAuth, Collection<String> tlsCiphersOverride, - boolean enableProxyProtocolMixedMode, int port, Duration endpointConnectionTtl) { - super(new Builder("tls"+port, port).sslProvider(sslProvider)); - this.enforceClientAuth = enforceClientAuth; - this.enforceHandshakeClientAuth = enforceHandshakeClientAuth; - this.tlsCiphersOverride = tlsCiphersOverride; - this.enableProxyProtocolMixedMode = enableProxyProtocolMixedMode; - this.endpointConnectionTtl = endpointConnectionTtl; + private HostedSslConnectorFactory(Builder builder) { + super(new ConnectorFactory.Builder("tls"+builder.port, builder.port).sslProvider(createSslProvider(builder))); + this.requireTlsClientAuthDuringTlsHandshake = builder.requireTlsClientAuthDuringTlsHandshake; + this.tlsCiphersOverride = List.copyOf(builder.tlsCiphersOverride); + this.enableProxyProtocolMixedMode = builder.enableProxyProtocolMixedMode; + this.endpointConnectionTtl = builder.endpointConnectionTtl; } - private static CloudSslProvider createConfiguredDirectSslProvider( - String serverName, EndpointCertificateSecrets endpointCertificateSecrets, String tlsCaCertificatesPath, String tlsCaCertificates, boolean enforceHandshakeClientAuth, boolean enableTokenSupport) { - var clientAuthentication = enforceHandshakeClientAuth ? ClientAuth.Enum.NEED_AUTH : ClientAuth.Enum.WANT_AUTH; + private static SslProvider createSslProvider(Builder builder) { + if (builder.endpointCertificate == null) return new DefaultSslProvider(builder.name); + var clientAuthentication = builder.requireTlsClientAuthDuringTlsHandshake + ? ConnectorConfig.Ssl.ClientAuth.Enum.NEED_AUTH : ConnectorConfig.Ssl.ClientAuth.Enum.WANT_AUTH; return new CloudSslProvider( - serverName, - endpointCertificateSecrets.key(), - endpointCertificateSecrets.certificate(), - tlsCaCertificatesPath, - tlsCaCertificates, - clientAuthentication, - enableTokenSupport); + builder.name, builder.endpointCertificate.key(), builder.endpointCertificate.certificate(), + builder.tlsCaCertificatesPath, builder.tlsCaCertificatesPem, clientAuthentication); } @Override public void getConfig(ConnectorConfig.Builder connectorBuilder) { super.getConfig(connectorBuilder); - if (! enforceHandshakeClientAuth) { - connectorBuilder - .tlsClientAuthEnforcer(new ConnectorConfig.TlsClientAuthEnforcer.Builder() - .pathWhitelist(INSECURE_WHITELISTED_PATHS) - .enable(enforceClientAuth)); + if (! requireTlsClientAuthDuringTlsHandshake) { + connectorBuilder.tlsClientAuthEnforcer( + new ConnectorConfig.TlsClientAuthEnforcer.Builder() + .pathWhitelist(List.of("/status.html")).enable(true)); } // Disables TLSv1.3 as it causes some browsers to prompt user for client certificate (when connector has 'want' auth) connectorBuilder.ssl.enabledProtocols(List.of("TLSv1.2")); - if (!tlsCiphersOverride.isEmpty()) { connectorBuilder.ssl.enabledCipherSuites(tlsCiphersOverride.stream().sorted().toList()); } else { connectorBuilder.ssl.enabledCipherSuites(TlsContext.ALLOWED_CIPHER_SUITES.stream().sorted().toList()); } - connectorBuilder .proxyProtocol(new ConnectorConfig.ProxyProtocol.Builder().enabled(true).mixedMode(enableProxyProtocolMixedMode)) .idleTimeout(Duration.ofSeconds(30).toSeconds()) .maxConnectionLife(endpointConnectionTtl != null ? endpointConnectionTtl.toSeconds() : 0); } + + public static class Builder { + final String name; + final int port; + boolean requireTlsClientAuthDuringTlsHandshake; + List<String> tlsCiphersOverride; + boolean enableProxyProtocolMixedMode; + Duration endpointConnectionTtl; + EndpointCertificateSecrets endpointCertificate; + String tlsCaCertificatesPem; + String tlsCaCertificatesPath; + + private Builder(String name, int port) { this.name = name; this.port = port; } + public Builder requireTlsClientAuthDuringTlsHandshake(boolean enable) {this.requireTlsClientAuthDuringTlsHandshake = enable; return this; } + public Builder endpointConnectionTtl(Duration ttl) { endpointConnectionTtl = ttl; return this; } + public Builder tlsCiphersOverride(Collection<String> ciphers) { tlsCiphersOverride = List.copyOf(ciphers); return this; } + public Builder proxyProtocolMixedMode(boolean enable) { enableProxyProtocolMixedMode = enable; return this; } + public Builder endpointCertificate(EndpointCertificateSecrets cert) { this.endpointCertificate = cert; return this; } + public Builder tlsCaCertificatesPath(String path) { this.tlsCaCertificatesPath = path; return this; } + public Builder tlsCaCertificatesPem(String pem) { this.tlsCaCertificatesPem = pem; return this; } + + public HostedSslConnectorFactory build() { return new HostedSslConnectorFactory(this); } + } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java index 414d4c817c7..f0296d49472 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java @@ -6,6 +6,8 @@ import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.semantics.SemanticRulesConfig; import com.yahoo.search.config.IndexInfoConfig; import com.yahoo.search.config.SchemaInfoConfig; +import com.yahoo.search.dispatch.Dispatcher; +import com.yahoo.search.dispatch.ReconfigurableDispatcher; import com.yahoo.search.pagetemplates.PageTemplatesConfig; import com.yahoo.search.query.profile.config.QueryProfilesConfig; import com.yahoo.search.ranking.RankProfilesEvaluatorFactory; @@ -49,6 +51,7 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> private final List<SearchCluster> searchClusters = new LinkedList<>(); private final Collection<String> schemasWithGlobalPhase; private final boolean globalPhase; + private final boolean useReconfigurableDispatcher; private QueryProfiles queryProfiles; private SemanticRules semanticRules; @@ -57,6 +60,7 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) { super(chains); this.globalPhase = deployState.featureFlags().enableGlobalPhase(); + this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher(); this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState); this.owningCluster = cluster; @@ -81,16 +85,17 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> /** Adds a Dispatcher component to the owning container cluster for each search cluster */ private void initializeDispatchers(Collection<SearchCluster> searchClusters) { + Class<? extends Dispatcher> dispatcherClass = useReconfigurableDispatcher ? ReconfigurableDispatcher.class : Dispatcher.class; for (SearchCluster searchCluster : searchClusters) { if (searchCluster instanceof IndexedSearchCluster indexed) { - var dispatcher = new DispatcherComponent(indexed); + var dispatcher = new DispatcherComponent(indexed, dispatcherClass); owningCluster.addComponent(dispatcher); } if (globalPhase) { for (var documentDb : searchCluster.getDocumentDbs()) { - if (!schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue; + if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue; var factory = new RankProfilesEvaluatorComponent(documentDb); - if (! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { + if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { owningCluster.addComponent(factory); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java index f9a3a1f1990..fe2df8101bd 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.model.container.search; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.osgi.provider.model.ComponentModel; +import com.yahoo.search.dispatch.Dispatcher; import com.yahoo.vespa.config.search.DispatchConfig; import com.yahoo.vespa.config.search.DispatchNodesConfig; import com.yahoo.vespa.model.container.component.Component; @@ -22,15 +23,15 @@ public class DispatcherComponent extends Component<TreeConfigProducer<?>, Compon private final IndexedSearchCluster indexedSearchCluster; - public DispatcherComponent(IndexedSearchCluster indexedSearchCluster) { - super(toComponentModel(indexedSearchCluster.getClusterName())); + public DispatcherComponent(IndexedSearchCluster indexedSearchCluster, Class<? extends Dispatcher> clazz) { + super(toComponentModel(indexedSearchCluster.getClusterName(), clazz)); this.indexedSearchCluster = indexedSearchCluster; } - private static ComponentModel toComponentModel(String clusterName) { + private static ComponentModel toComponentModel(String clusterName, Class<? extends Dispatcher> clazz) { String dispatcherComponentId = "dispatcher." + clusterName; // used by ClusterSearcher return new ComponentModel(dispatcherComponentId, - com.yahoo.search.dispatch.Dispatcher.class.getName(), + clazz.getName(), PlatformBundles.SEARCH_AND_DOCPROC_BUNDLE); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 00feb0a1c76..3318138ebd7 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -16,7 +16,6 @@ import com.yahoo.config.model.ConfigModelContext; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ConfigServerSpec; import com.yahoo.config.model.api.ContainerEndpoint; -import com.yahoo.config.model.api.EndpointCertificateSecrets; import com.yahoo.config.model.api.TenantSecretStore; import com.yahoo.config.model.application.provider.IncludeDirs; import com.yahoo.config.model.builder.xml.ConfigModelBuilder; @@ -109,7 +108,6 @@ import java.io.IOException; import java.io.Reader; import java.net.URI; import java.security.cert.X509Certificate; -import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -600,31 +598,35 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { .ifPresent(accessControl -> accessControl.configureDefaultHostedConnector(cluster.getHttp())); ; } - private void addAdditionalHostedConnector(DeployState deployState, ApplicationContainerCluster cluster) { + private void addAdditionalHostedConnector(DeployState state, ApplicationContainerCluster cluster) { JettyHttpServer server = cluster.getHttp().getHttpServer().get(); String serverName = server.getComponentId().getName(); // If the deployment contains certificate/private key reference, setup TLS port - HostedSslConnectorFactory connectorFactory; - Collection<String> tlsCiphersOverride = deployState.getProperties().tlsCiphersOverride(); - boolean proxyProtocolMixedMode = deployState.getProperties().featureFlags().enableProxyProtocolMixedMode(); - Duration endpointConnectionTtl = deployState.getProperties().endpointConnectionTtl(); - var port = getDataplanePort(deployState); - if (deployState.endpointCertificateSecrets().isPresent()) { - boolean authorizeClient = deployState.zone().system().isPublic(); + var builder = HostedSslConnectorFactory.builder(serverName, getDataplanePort(state)) + .proxyProtocolMixedMode(state.getProperties().featureFlags().enableProxyProtocolMixedMode()) + .tlsCiphersOverride(state.getProperties().tlsCiphersOverride()) + .endpointConnectionTtl(state.getProperties().endpointConnectionTtl()); + var endpointCert = state.endpointCertificateSecrets().orElse(null); + if (endpointCert != null) { + builder.endpointCertificate(endpointCert); + boolean isPublic = state.zone().system().isPublic(); List<X509Certificate> clientCertificates = getClientCertificates(cluster); - if (authorizeClient && clientCertificates.isEmpty()) { - throw new IllegalArgumentException("Client certificate authority security/clients.pem is missing - " + - "see: https://cloud.vespa.ai/en/security/guide#data-plane"); + if (isPublic) { + if (clientCertificates.isEmpty()) + throw new IllegalArgumentException("Client certificate authority security/clients.pem is missing - " + + "see: https://cloud.vespa.ai/en/security/guide#data-plane"); + builder.tlsCaCertificatesPem(X509CertificateUtils.toPem(clientCertificates)); + } else { + builder.tlsCaCertificatesPath("/opt/yahoo/share/ssl/certs/athenz_certificate_bundle.pem"); } - EndpointCertificateSecrets endpointCertificateSecrets = deployState.endpointCertificateSecrets().get(); - - boolean enforceHandshakeClientAuth = cluster.getHttp().getAccessControl() - .map(accessControl -> accessControl.clientAuthentication) - .map(clientAuth -> clientAuth == AccessControl.ClientAuthentication.need) - .orElse(false); + builder.requireTlsClientAuthDuringTlsHandshake( + cluster.getHttp().getAccessControl() + .map(accessControl -> accessControl.clientAuthentication) + .map(clientAuth -> clientAuth == AccessControl.ClientAuthentication.need) + .orElse(false)); - boolean enableTokenSupport = deployState.featureFlags().enableDataplaneProxy() + boolean enableTokenSupport = state.featureFlags().enableDataplaneProxy() && cluster.getClients().stream().anyMatch(c -> !c.tokens().isEmpty()); // Set up component to generate proxy cert if token support is enabled @@ -633,24 +635,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { cluster.addSimpleComponent(DataplaneProxyService.class); var dataplaneProxy = new DataplaneProxy( - getDataplanePort(deployState), - endpointCertificateSecrets.certificate(), - endpointCertificateSecrets.key()); + getDataplanePort(state), + endpointCert.certificate(), + endpointCert.key()); cluster.addComponent(dataplaneProxy); } - - connectorFactory = authorizeClient - ? HostedSslConnectorFactory.withProvidedCertificateAndTruststore( - serverName, endpointCertificateSecrets, X509CertificateUtils.toPem(clientCertificates), - tlsCiphersOverride, proxyProtocolMixedMode, port, endpointConnectionTtl, enableTokenSupport) - : HostedSslConnectorFactory.withProvidedCertificate( - serverName, endpointCertificateSecrets, enforceHandshakeClientAuth, tlsCiphersOverride, - proxyProtocolMixedMode, port, endpointConnectionTtl, enableTokenSupport); - } else { - connectorFactory = HostedSslConnectorFactory.withDefaultCertificateAndTruststore( - serverName, tlsCiphersOverride, proxyProtocolMixedMode, port, - endpointConnectionTtl); } + var connectorFactory = builder.build(); cluster.getHttp().getAccessControl().ifPresent(accessControl -> accessControl.configureHostedConnector(connectorFactory)); server.addConnector(connectorFactory); } diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc index bb63dcd73ff..bb0e39a41ab 100644 --- a/config-model/src/main/resources/schema/content.rnc +++ b/config-model/src/main/resources/schema/content.rnc @@ -82,7 +82,7 @@ ClusterControllerTuning = element cluster-controller { element stable-state-period { xsd:string { pattern = "([0-9\.]+)\s*([a-z]+)?" } }? & element min-distributor-up-ratio { xsd:double }? & element min-storage-up-ratio { xsd:double }? & - element groups-allowed-down-ratio { xsd:double }? + element groups-allowed-down-ratio { xsd:double { minInclusive = "0" maxInclusive = "1" } }? } DispatchTuning = element dispatch { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecificationTest.java b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecificationTest.java new file mode 100644 index 00000000000..2a03a9307ca --- /dev/null +++ b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecificationTest.java @@ -0,0 +1,104 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.builder.xml.dom; + +import com.yahoo.text.XML; +import org.junit.jupiter.api.Test; +import org.w3c.dom.Document; +import com.yahoo.component.Version; + +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * @author bratseth + */ +public class NodesSpecificationTest { + + @Test + void noExplicitGroupLimits() { + var spec = nodesSpecification("<nodes count='30'/>"); + assertEquals(30, spec.minResources().nodes()); + assertEquals( 1, spec.minResources().groups()); + assertEquals(30, spec.maxResources().nodes()); + assertEquals( 1, spec.maxResources().groups()); // no grouping by default -> implicit max groups is 1 + assertTrue(spec.groupSize().from().isEmpty()); + assertTrue(spec.groupSize().to().isEmpty()); + } + + @Test + void testGroupSize1() { + var spec = nodesSpecification("<nodes count='30' group-size='1'/>"); + assertEquals(30, spec.minResources().nodes()); + assertEquals(30, spec.minResources().groups()); + assertEquals(30, spec.maxResources().nodes()); + assertEquals(30, spec.maxResources().groups()); + assertEquals(1, spec.groupSize().from().getAsInt()); + assertEquals(1, spec.groupSize().to().getAsInt()); + } + + @Test + void testGroupSize3() { + var spec = nodesSpecification("<nodes count='30' group-size='3'/>"); + assertEquals(30, spec.minResources().nodes()); + assertEquals(10, spec.minResources().groups()); + assertEquals(30, spec.maxResources().nodes()); + assertEquals(10, spec.maxResources().groups()); + assertEquals( 3, spec.groupSize().from().getAsInt()); + assertEquals( 3, spec.groupSize().to().getAsInt()); + } + + @Test + void testVariableGroupSize1() { + var spec = nodesSpecification("<nodes count='30' group-size='[15, 30]'/>"); + assertEquals(30, spec.minResources().nodes()); + assertEquals( 1, spec.minResources().groups()); + assertEquals(30, spec.maxResources().nodes()); + assertEquals( 2, spec.maxResources().groups()); + assertEquals(15, spec.groupSize().from().getAsInt()); + assertEquals(30, spec.groupSize().to().getAsInt()); + } + + @Test + void testVariableGroupSize2() { + var spec = nodesSpecification("<nodes count='30' group-size='[6, 10]'/>"); + assertEquals(30, spec.minResources().nodes()); + assertEquals( 3, spec.minResources().groups()); + assertEquals(30, spec.maxResources().nodes()); + assertEquals( 5, spec.maxResources().groups()); + assertEquals( 6, spec.groupSize().from().getAsInt()); + assertEquals(10, spec.groupSize().to().getAsInt()); + } + + @Test + void testGroupSizeLowerBound() { + var spec = nodesSpecification("<nodes count='30' group-size='[6, ]'/>"); + assertEquals(30, spec.minResources().nodes()); + assertEquals( 1, spec.minResources().groups()); + assertEquals(30, spec.maxResources().nodes()); + assertEquals( 5, spec.maxResources().groups()); + assertEquals( 6, spec.groupSize().from().getAsInt()); + assertTrue(spec.groupSize().to().isEmpty()); + } + + @Test + void testGroupSizeUpperBound() { + var spec = nodesSpecification("<nodes count='30' group-size='[, 10]'/>"); + assertEquals(30, spec.minResources().nodes()); + assertEquals( 3, spec.minResources().groups()); + assertEquals(30, spec.maxResources().nodes()); + assertEquals( 30, spec.maxResources().groups()); + assertTrue(spec.groupSize().from().isEmpty()); + assertEquals(10, spec.groupSize().to().getAsInt()); + } + + private NodesSpecification nodesSpecification(String nodesElement) { + Document nodesXml = XML.getDocument(nodesElement); + return NodesSpecification.create(false, false, Version.emptyVersion, + new ModelElement(nodesXml.getDocumentElement()), + Optional.empty(), Optional.empty()); + + } + +} diff --git a/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java b/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java index 78ef17f613a..6cd344466e4 100644 --- a/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java +++ b/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java @@ -2,9 +2,9 @@ package com.yahoo.config.codegen; import com.yahoo.config.codegen.LeafCNode.FileLeaf; +import com.yahoo.config.codegen.LeafCNode.ModelLeaf; import com.yahoo.config.codegen.LeafCNode.PathLeaf; import com.yahoo.config.codegen.LeafCNode.UrlLeaf; -import com.yahoo.config.codegen.LeafCNode.ModelLeaf; import java.util.ArrayList; import java.util.List; @@ -41,7 +41,7 @@ public class BuilderGenerator { private static String getDeclaration(InnerCNode node) { String getInterfaces = (node.getParent() == null) ? "implements ConfigInstance.Builder" : "implements ConfigBuilder"; - return "public static class Builder " + getInterfaces + " {"; + return "public static final class Builder " + getInterfaces + " {"; } private static String getSpecialRootBuilderCode(InnerCNode node) { diff --git a/configgen/src/test/resources/allfeatures.reference b/configgen/src/test/resources/allfeatures.reference index 8a681048f65..b7a79f663e7 100644 --- a/configgen/src/test/resources/allfeatures.reference +++ b/configgen/src/test/resources/allfeatures.reference @@ -99,7 +99,7 @@ public final class AllfeaturesConfig extends ConfigInstance { void getConfig(Builder builder); } - public static class Builder implements ConfigInstance.Builder { + public static final class Builder implements ConfigInstance.Builder { private Set<String> __uninitialized = new HashSet<String>(Arrays.asList( "boolVal", "intVal", @@ -1345,7 +1345,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class Basic_struct extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(); private String foo = null; @@ -1432,7 +1432,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class Struct_of_struct extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(); public Inner0.Builder inner0 = new Inner0.Builder(); @@ -1529,7 +1529,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class Inner0 extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(); private String name = null; @@ -1616,7 +1616,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class Inner1 extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(); private String name = null; @@ -1703,7 +1703,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class MyArray extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(Arrays.asList( "refVal" )); @@ -1939,7 +1939,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class AnotherArray extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(); private Integer foo = null; @@ -2013,7 +2013,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class MyMap extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(Arrays.asList( "refVal" )); @@ -2249,7 +2249,7 @@ public final class AllfeaturesConfig extends ConfigInstance { */ public final static class AnotherArray extends InnerNode { - public static class Builder implements ConfigBuilder { + public static final class Builder implements ConfigBuilder { private Set<String> __uninitialized = new HashSet<String>(); private Integer foo = null; diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index a7a26343edf..dac881cf5ee 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -209,6 +209,7 @@ public class ModelContextImpl implements ModelContext { private final boolean enableConditionalPutRemoveWriteRepair; private final boolean enableDataplaneProxy; private final boolean enableNestedMultivalueGrouping; + private final boolean useReconfigurableDispatcher; public FeatureFlags(FlagSource source, ApplicationId appId, Version version) { this.defaultTermwiseLimit = flagValue(source, appId, version, Flags.DEFAULT_TERM_WISE_LIMIT); @@ -259,6 +260,7 @@ public class ModelContextImpl implements ModelContext { this.enableConditionalPutRemoveWriteRepair = flagValue(source, appId, version, Flags.ENABLE_CONDITIONAL_PUT_REMOVE_WRITE_REPAIR); this.enableDataplaneProxy = flagValue(source, appId, version, Flags.ENABLE_DATAPLANE_PROXY); this.enableNestedMultivalueGrouping = flagValue(source, appId, version, Flags.ENABLE_NESTED_MULTIVALUE_GROUPING); + this.useReconfigurableDispatcher = flagValue(source, appId, version, Flags.USE_RECONFIGURABLE_DISPATCHER); } @Override public int heapSizePercentage() { return heapPercentage; } @@ -317,6 +319,7 @@ public class ModelContextImpl implements ModelContext { @Override public boolean enableConditionalPutRemoveWriteRepair() { return enableConditionalPutRemoveWriteRepair; } @Override public boolean enableDataplaneProxy() { return enableDataplaneProxy; } @Override public boolean enableNestedMultivalueGrouping() { return enableNestedMultivalueGrouping; } + @Override public boolean useReconfigurableDispatcher() { return useReconfigurableDispatcher; } private static <V> V flagValue(FlagSource source, ApplicationId appId, Version vespaVersion, UnboundFlag<? extends V, ?, ?> flag) { return flag.bindTo(source) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java index a8dc48e6c14..ef41512f979 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java @@ -46,7 +46,9 @@ public class DataplaneTokenSerializer { return new DataplaneToken.Version( inspector.field(FINGERPRINT_FIELD).asString(), inspector.field(CHECKACCESSHASH_FIELD).asString(), - expirationStr.equals("<none>") ? Optional.empty() : Optional.of(Instant.parse(expirationStr))); + expirationStr.equals("<none>") ? Optional.empty() + : (expirationStr.isBlank() + ? Optional.of(Instant.EPOCH) : Optional.of(Instant.parse(expirationStr)))); } public static Slime toSlime(List<DataplaneToken> dataplaneTokens) { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java index 2c898b8bf7d..342ea7b2297 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java @@ -9,9 +9,7 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.TenantName; import com.yahoo.container.jdisc.HttpRequest; - import com.yahoo.security.X509CertificateUtils; - import com.yahoo.slime.Cursor; import com.yahoo.slime.Injector; import com.yahoo.slime.ObjectInserter; @@ -29,7 +27,6 @@ import java.security.cert.X509Certificate; import java.time.Duration; import java.util.List; import java.util.OptionalInt; -; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; diff --git a/container-core/abi-spec.json b/container-core/abi-spec.json index 572d18b02f3..757afeb64e2 100644 --- a/container-core/abi-spec.json +++ b/container-core/abi-spec.json @@ -310,7 +310,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1032,7 +1033,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1091,7 +1093,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1128,7 +1131,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1174,7 +1178,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1205,7 +1210,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1240,7 +1246,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1329,7 +1336,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1366,7 +1374,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1768,7 +1777,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1806,7 +1816,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1855,7 +1866,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1884,7 +1896,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1915,7 +1928,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1946,7 +1960,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1977,7 +1992,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", diff --git a/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java b/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java index 0e381860e98..3cf316db5c2 100644 --- a/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java +++ b/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java @@ -17,9 +17,7 @@ import com.yahoo.documentapi.VisitorParameters; import com.yahoo.documentapi.VisitorSession; import com.yahoo.documentapi.messagebus.MessageBusDocumentAccess; import com.yahoo.documentapi.messagebus.MessageBusParams; -import com.yahoo.documentapi.messagebus.protocol.DocumentProtocolPoliciesConfig; import com.yahoo.messagebus.MessagebusConfig; -import com.yahoo.vespa.config.content.DistributionConfig; import com.yahoo.yolean.concurrent.Memoized; import java.util.logging.Level; diff --git a/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java b/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java index 52b372638c8..3471627e887 100644 --- a/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java +++ b/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java @@ -12,7 +12,7 @@ import java.util.Set; /** * A component which tracks the up/down status of any clusters which should influence * the up down status of this container itself, as well as the separate fact (from config) - * that such clusters are present. This is a separate fact because we might know we have clusters configured + * that such clusters are present. This is a separate fact because we might know we have clusters configured, * but we don't have positive information that they are up yet, and in this case we should be down. * * This is a separate component which has <b>no dependencies</b> such that the status tracked in this diff --git a/container-disc/abi-spec.json b/container-disc/abi-spec.json index 75246a77e03..92f21af0cde 100644 --- a/container-disc/abi-spec.json +++ b/container-disc/abi-spec.json @@ -68,7 +68,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -105,7 +106,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -135,7 +137,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", diff --git a/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java b/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java index 16dedd0765d..ab5080b8f3f 100644 --- a/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java +++ b/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java @@ -1,12 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.container.jdisc.messagebus; -import com.yahoo.component.annotation.Inject; import com.yahoo.component.AbstractComponent; +import com.yahoo.component.annotation.Inject; import com.yahoo.container.jdisc.ContainerMbusConfig; import com.yahoo.document.DocumentTypeManager; import com.yahoo.documentapi.messagebus.protocol.DocumentProtocol; -import com.yahoo.documentapi.messagebus.protocol.DocumentProtocolPoliciesConfig; import com.yahoo.jdisc.ReferencedResource; import com.yahoo.jdisc.References; import com.yahoo.jdisc.ResourceReference; @@ -25,7 +24,6 @@ import com.yahoo.messagebus.network.NetworkMultiplexer; import com.yahoo.messagebus.shared.SharedIntermediateSession; import com.yahoo.messagebus.shared.SharedMessageBus; import com.yahoo.messagebus.shared.SharedSourceSession; -import com.yahoo.vespa.config.content.DistributionConfig; import com.yahoo.yolean.concurrent.Memoized; import java.util.HashMap; @@ -65,25 +63,18 @@ public final class SessionCache extends AbstractComponent { @Inject public SessionCache(NetworkMultiplexerProvider nets, ContainerMbusConfig containerMbusConfig, DocumentTypeManager documentTypeManager, - MessagebusConfig messagebusConfig, - DocumentProtocolPoliciesConfig policiesConfig, - DistributionConfig distributionConfig) { - this(nets::net, containerMbusConfig, documentTypeManager, - messagebusConfig, policiesConfig, distributionConfig); + MessagebusConfig messagebusConfig) { + this(nets::net, containerMbusConfig, documentTypeManager, messagebusConfig); } public SessionCache(Supplier<NetworkMultiplexer> net, ContainerMbusConfig containerMbusConfig, DocumentTypeManager documentTypeManager, - MessagebusConfig messagebusConfig, - DocumentProtocolPoliciesConfig policiesConfig, - DistributionConfig distributionConfig) { + MessagebusConfig messagebusConfig) { this(net, containerMbusConfig, messagebusConfig, - new DocumentProtocol(documentTypeManager, - policiesConfig, - distributionConfig)); + new DocumentProtocol(documentTypeManager)); } public SessionCache(Supplier<NetworkMultiplexer> net, ContainerMbusConfig containerMbusConfig, diff --git a/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java b/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java index e0cd9ca6dde..e41ce539b4a 100644 --- a/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java +++ b/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java @@ -39,9 +39,7 @@ public class MbusClientProviderTest { SessionCache cache = new SessionCache(() -> NetworkMultiplexer.dedicated(new NullNetwork()), new ContainerMbusConfig.Builder().build(), new DocumentTypeManager(new DocumentmanagerConfig.Builder().build()), - new MessagebusConfig.Builder().build(), - new DocumentProtocolPoliciesConfig.Builder().build(), - new DistributionConfig.Builder().build()); + new MessagebusConfig.Builder().build()); MbusClientProvider p = new MbusClientProvider(cache, config); assertNotNull(p.get()); p.deconstruct(); diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index c41c1c79149..0f440957dfd 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -4456,7 +4456,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -4696,7 +4697,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -4758,7 +4760,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -4784,7 +4787,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -7186,7 +7190,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -7212,7 +7217,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", diff --git a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java index fd8110e1173..d1377b8d373 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java @@ -82,7 +82,7 @@ public abstract class BaseNodeMonitor<T> { /** Thread-safely changes the state of this node if required */ protected abstract void setWorking(boolean working,String explanation); - /** Returns whether or not this is monitoring an internal node. Default is false. */ + /** Returns whether this is monitoring an internal node. Default is false. */ public boolean isInternal() { return internal; } } diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java index 0b627e91bc5..332bf4ea2c4 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java @@ -66,7 +66,7 @@ public class ClusterMonitor<T> { * </ul> * * @param node the object representing the node - * @param internal whether or not this node is internal to this cluster + * @param internal whether this node is internal to this cluster */ public void add(T node, boolean internal) { nodeMonitors.put(node, new TrafficNodeMonitor<>(node, configuration, internal)); @@ -96,11 +96,10 @@ public class ClusterMonitor<T> { * Ping all nodes which needs pinging to discover state changes */ public void ping(Executor executor) { - for (Iterator<BaseNodeMonitor<T>> i = nodeMonitorIterator(); i.hasNext() && !closed.get(); ) { - BaseNodeMonitor<T> monitor= i.next(); - nodeManager.ping(this, monitor.getNode(), executor); // Cause call to failed or responded + for (var monitor : nodeMonitors()) { + if (closed.get()) return; // Do nothing to change state if close has started. + nodeManager.ping(this, monitor.getNode(), executor); } - if (closed.get()) return; // Do nothing to change state if close has started. nodeManager.pingIterationCompleted(); } @@ -143,7 +142,7 @@ public class ClusterMonitor<T> { // for all pings when there are no problems (important because it ensures that // any thread local connections are reused) 2) a new thread will be started to execute // new pings when a ping is not responding - ExecutorService pingExecutor=Executors.newCachedThreadPool(ThreadFactoryFactory.getDaemonThreadFactory("search.ping")); + ExecutorService pingExecutor = Executors.newCachedThreadPool(ThreadFactoryFactory.getDaemonThreadFactory("search.ping")); while (!closed.get()) { try { log.finest("Activating ping"); @@ -165,7 +164,9 @@ public class ClusterMonitor<T> { } pingExecutor.shutdown(); try { - pingExecutor.awaitTermination(10, TimeUnit.SECONDS); + if ( ! pingExecutor.awaitTermination(10, TimeUnit.SECONDS)) { + log.warning("Timeout waiting for ping executor to terminate"); + } } catch (InterruptedException e) { } log.info("Stopped cluster monitor thread " + getName()); } diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java index 4af6757db8c..1cf36d75fc5 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java @@ -48,7 +48,7 @@ public abstract class ClusterSearcher<T> extends PingableSearcher implements Nod * * @param id the id of this searcher * @param connections the connections of the cluster - * @param internal whether or not this cluster is internal (part of the same installation) + * @param internal whether this cluster is internal (part of the same installation) */ public ClusterSearcher(ComponentId id, List<T> connections, boolean internal) { this(id, connections, new Hasher<>(), internal); diff --git a/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java b/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java index 1f6602053d9..f8f8c0d888d 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java @@ -22,7 +22,7 @@ public class MonitorConfiguration { /** * Returns the number of milliseconds to attempt to service a request - * (at different nodes) before giving up. Default is 5000 ms. + * (at different nodes) before giving up. See {@link #requestTimeout}. */ public long getRequestTimeout() { return requestTimeout; } diff --git a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java index 11475b6a0ca..108e7e3e34b 100644 --- a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java +++ b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java @@ -23,7 +23,7 @@ public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> { this.configuration = configuration; } - /** Whether or not this has ever responded successfully */ + /** Whether this has ever responded successfully */ private boolean atStartUp = true; public T getNode() { return node; } @@ -55,7 +55,7 @@ public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> { respondedAt = now(); succeededAt = respondedAt; - setWorking(true,"Responds correctly"); + setWorking(true, "Responds correctly"); } /** diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java index 77496114df1..c6fef88fa2d 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java @@ -2,7 +2,6 @@ package com.yahoo.search.dispatch; import java.io.Closeable; -import java.time.Duration; import java.util.function.BiConsumer; /** @@ -21,8 +20,8 @@ public abstract class CloseableInvoker implements Closeable { private RequestDuration duration; public void teardown(BiConsumer<Boolean, RequestDuration> teardown) { - this.teardown = teardown; - this.duration = new RequestDuration(); + this.teardown = this.teardown == null ? teardown : this.teardown.andThen(teardown); + this.duration = this.duration == null ? new RequestDuration() : this.duration; } protected void setFinalStatus(boolean success) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java index db7e80a95e5..6f6b0fc2b79 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java @@ -1,9 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.dispatch; -import com.yahoo.component.annotation.Inject; import com.yahoo.component.AbstractComponent; import com.yahoo.component.ComponentId; +import com.yahoo.component.annotation.Inject; import com.yahoo.compress.Compressor; import com.yahoo.container.handler.VipStatus; import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; @@ -12,13 +12,14 @@ import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.cluster.ClusterMonitor; import com.yahoo.search.dispatch.SearchPath.InvalidSearchPathException; +import com.yahoo.search.dispatch.rpc.RpcConnectionPool; import com.yahoo.search.dispatch.rpc.RpcInvokerFactory; import com.yahoo.search.dispatch.rpc.RpcPingFactory; import com.yahoo.search.dispatch.rpc.RpcResourcePool; import com.yahoo.search.dispatch.searchcluster.Group; -import com.yahoo.search.dispatch.searchcluster.SearchGroups; import com.yahoo.search.dispatch.searchcluster.Node; import com.yahoo.search.dispatch.searchcluster.SearchCluster; +import com.yahoo.search.dispatch.searchcluster.SearchGroups; import com.yahoo.search.query.profile.types.FieldDescription; import com.yahoo.search.query.profile.types.FieldType; import com.yahoo.search.query.profile.types.QueryProfileType; @@ -32,6 +33,7 @@ import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; /** * A dispatcher communicates with search nodes to perform queries and fill hits. @@ -54,19 +56,43 @@ public class Dispatcher extends AbstractComponent { /** If set will control computation of how many hits will be fetched from each partition.*/ public static final CompoundName topKProbability = CompoundName.from(DISPATCH + "." + TOP_K_PROBABILITY); + private final InvokerFactoryFactory invokerFactories; private final DispatchConfig dispatchConfig; - private final RpcResourcePool rpcResourcePool; + private final RpcConnectionPool rpcResourcePool; private final SearchCluster searchCluster; - private final ClusterMonitor<Node> clusterMonitor; private volatile VolatileItems volatileItems; private static class VolatileItems { + final LoadBalancer loadBalancer; final InvokerFactory invokerFactory; - VolatileItems(LoadBalancer loadBalancer, InvokerFactory invokerFactory) { + final ClusterMonitor<Node> clusterMonitor; + final AtomicInteger inflight = new AtomicInteger(1); // Initial reference. + Runnable cleanup = () -> { }; + + VolatileItems(LoadBalancer loadBalancer, InvokerFactory invokerFactory, ClusterMonitor<Node> clusterMonitor) { this.loadBalancer = loadBalancer; this.invokerFactory = invokerFactory; + this.clusterMonitor = clusterMonitor; + } + + private void countDown() { + if (inflight.decrementAndGet() == 0) cleanup.run(); + } + + private class Ref implements AutoCloseable { + boolean handedOff = false; + { inflight.incrementAndGet(); } + VolatileItems get() { return VolatileItems.this; } + /** Hands off the reference to the given invoker, which will decrement the counter when closed. */ + <T extends CloseableInvoker> T register(T invoker) { + invoker.teardown((__, ___) -> countDown()); + handedOff = true; + return invoker; + } + @Override public void close() { if ( ! handedOff) countDown(); } } + } private static final QueryProfileType argumentType; @@ -81,34 +107,105 @@ public class Dispatcher extends AbstractComponent { public static QueryProfileType getArgumentType() { return argumentType; } + interface InvokerFactoryFactory { + InvokerFactory create(RpcConnectionPool rpcConnectionPool, SearchGroups searchGroups, DispatchConfig dispatchConfig); + } + @Inject - public Dispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, - DispatchNodesConfig nodesConfig, VipStatus vipStatus) { - this.dispatchConfig = dispatchConfig; - rpcResourcePool = new RpcResourcePool(dispatchConfig, nodesConfig); - searchCluster = new SearchCluster(clusterId.stringValue(), dispatchConfig.minActivedocsPercentage(), - toNodes(nodesConfig), vipStatus, new RpcPingFactory(rpcResourcePool)); - clusterMonitor = new ClusterMonitor<>(searchCluster, true); - volatileItems = update(null); + public Dispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, DispatchNodesConfig nodesConfig, VipStatus vipStatus) { + this(clusterId, dispatchConfig, new RpcResourcePool(dispatchConfig, nodesConfig), nodesConfig, vipStatus, RpcInvokerFactory::new); initialWarmup(dispatchConfig.warmuptime()); } - /* For simple mocking in tests. Beware that searchCluster is shutdown on in deconstruct() */ - Dispatcher(ClusterMonitor<Node> clusterMonitor, SearchCluster searchCluster, - DispatchConfig dispatchConfig, InvokerFactory invokerFactory) { + Dispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, RpcConnectionPool rpcConnectionPool, + DispatchNodesConfig nodesConfig, VipStatus vipStatus, InvokerFactoryFactory invokerFactories) { + this(dispatchConfig, rpcConnectionPool, + new SearchCluster(clusterId.stringValue(), dispatchConfig.minActivedocsPercentage(), + toNodes(nodesConfig), vipStatus, new RpcPingFactory(rpcConnectionPool)), + invokerFactories); + } + + Dispatcher(DispatchConfig dispatchConfig, RpcConnectionPool rpcConnectionPool, + SearchCluster searchCluster, InvokerFactoryFactory invokerFactories) { + this(dispatchConfig, rpcConnectionPool, searchCluster, new ClusterMonitor<>(searchCluster, false), invokerFactories); + this.volatileItems.clusterMonitor.start(); // Populate nodes to monitor before starting it. + } + + Dispatcher(DispatchConfig dispatchConfig, RpcConnectionPool rpcConnectionPool, + SearchCluster searchCluster, ClusterMonitor<Node> clusterMonitor, InvokerFactoryFactory invokerFactories) { this.dispatchConfig = dispatchConfig; - this.rpcResourcePool = null; + this.rpcResourcePool = rpcConnectionPool; this.searchCluster = searchCluster; - this.clusterMonitor = clusterMonitor; - this.volatileItems = update(invokerFactory); + this.invokerFactories = invokerFactories; + this.volatileItems = update(clusterMonitor); + searchCluster.addMonitoring(clusterMonitor); } - private VolatileItems update(InvokerFactory invokerFactory) { + /* For simple mocking in tests. Beware that searchCluster is shutdown in deconstruct() */ + Dispatcher(ClusterMonitor<Node> clusterMonitor, SearchCluster searchCluster, + DispatchConfig dispatchConfig, InvokerFactory invokerFactory) { + this(dispatchConfig, null, searchCluster, clusterMonitor, (__, ___, ____) -> invokerFactory); + } + + /** Returns the snapshot of volatile items that need to be kept together, incrementing its reference counter. */ + private VolatileItems.Ref volatileItems() { + return volatileItems.new Ref(); + } + + /** + * This is called whenever we have new config for backend nodes. + * Normally, we'd want to handle partial failure of the component graph, by reinstating the old state; + * however, in this case, such a failure would be local to this container, and we instead want to keep + * the newest config, as that is what most accurately represents the actual backend. + * + * The flow of reconfiguration is: + * 1. The volatile snapshot of disposable items is replaced with a new one that only references updated nodes. + * 2. Dependencies of the items in 1., which must be configured, are updated, yielding a list of resources to close. + * 3. When inflight operations against the old snapshot are done, all obsolete resources are cleaned up. + * + * Ownership details: + * 1. The RPC resource pool is owned by the dispatcher, and is updated on node set changes; + * it contains the means by which the container talks to backend nodes, so cleanup must be delayed until safe. + * 2. The invocation factory is owned by the volatile snapshot, and is swapped atomically with it; + * it is used by the dispatcher to create ephemeral invokers, which must complete before cleanup (above) can happen. + * 3. The load balancer is owned by the volatile snapshot, and is swapped atomically with it; + * it is used internally by the dispatcher to select search nodes for queries, and is discarded with its snapshot. + * 4. The cluster monitor is a subordinate to the search cluster, and does whatever that tells it to, at any time; + * it is technically owned by the volatile snapshot, but mostly to show it is swapped together with that. + * 5. The search cluster is owned by the dispatcher, and is updated on node set changes; + * its responsibility is to keep track of the state of the backend, and to provide a view of it to the dispatcher, + * as well as keep the container vip status updated accordingly; it should therefore preserve as much as possible + * of its state across reconfigurations: with new node config, it will immediately forget obsolete nodes, and set + * coverage information as if the new nodes have zero documents, before even checking their status; this is fine + * under the assumption that this is the common case, i.e., new nodes have no documents yet. + */ + void updateWithNewConfig(DispatchNodesConfig nodesConfig) { + try (var items = volatileItems()) { // Marking a reference to the old snapshot, which we want to have cleaned up. + items.get().countDown(); // Decrement for its initial creation reference, so it may reach 0. + + // Let the RPC pool know about the new nodes, and set up the delayed cleanup that we need to do. + Collection<? extends AutoCloseable> connectionPoolsToClose = rpcResourcePool.updateNodes(nodesConfig); + items.get().cleanup = () -> { + for (AutoCloseable pool : connectionPoolsToClose) { + try { pool.close(); } catch (Exception ignored) { } + } + }; + + // Update the nodes the search cluster keeps track of, and what nodes are monitored. + ClusterMonitor<Node> newMonitor = searchCluster.updateNodes(toNodes(nodesConfig), dispatchConfig.minActivedocsPercentage()); + + // Update the snapshot to use the new nodes set in the search cluster; the RPC pool is ready for this. + this.volatileItems = update(newMonitor); + + // Wait for the old cluster monitor to die; it may be pinging nodes we want to shut down RPC connections to. + items.get().clusterMonitor.shutdown(); + } // Close the old snapshot, which may trigger the RPC cleanup now, or when the last invoker is closed, by a search thread. + } + + private VolatileItems update(ClusterMonitor<Node> clusterMonitor) { var items = new VolatileItems(new LoadBalancer(searchCluster.groupList().groups(), toLoadBalancerPolicy(dispatchConfig.distributionPolicy())), - (invokerFactory == null) - ? new RpcInvokerFactory(rpcResourcePool, searchCluster.groupList(), dispatchConfig) - : invokerFactory); - searchCluster.addMonitoring(clusterMonitor); + invokerFactories.create(rpcResourcePool, searchCluster.groupList(), dispatchConfig), + clusterMonitor); return items; } @@ -158,27 +255,30 @@ public class Dispatcher extends AbstractComponent { @Override public void deconstruct() { // The clustermonitor must be shutdown first as it uses the invokerfactory through the searchCluster. - clusterMonitor.shutdown(); + volatileItems.clusterMonitor.shutdown(); if (rpcResourcePool != null) { rpcResourcePool.close(); } } public FillInvoker getFillInvoker(Result result, VespaBackEndSearcher searcher) { - return volatileItems.invokerFactory.createFillInvoker(searcher, result); + try (var items = volatileItems()) { // Take a snapshot, and release it when we're done. + return items.register(items.get().invokerFactory.createFillInvoker(searcher, result)); + } } public SearchInvoker getSearchInvoker(Query query, VespaBackEndSearcher searcher) { - VolatileItems items = volatileItems; // Take a snapshot - int maxHitsPerNode = dispatchConfig.maxHitsPerNode(); - SearchInvoker invoker = getSearchPathInvoker(query, searcher, searchCluster.groupList(), items.invokerFactory, maxHitsPerNode) - .orElseGet(() -> getInternalInvoker(query, searcher, searchCluster, items.loadBalancer, items.invokerFactory, maxHitsPerNode)); - - if (query.properties().getBoolean(com.yahoo.search.query.Model.ESTIMATE)) { - query.setHits(0); - query.setOffset(0); + try (var items = volatileItems()) { // Take a snapshot, and release it when we're done. + int maxHitsPerNode = dispatchConfig.maxHitsPerNode(); + SearchInvoker invoker = getSearchPathInvoker(query, searcher, searchCluster.groupList(), items.get().invokerFactory, maxHitsPerNode) + .orElseGet(() -> getInternalInvoker(query, searcher, searchCluster, items.get().loadBalancer, items.get().invokerFactory, maxHitsPerNode)); + + if (query.properties().getBoolean(com.yahoo.search.query.Model.ESTIMATE)) { + query.setHits(0); + query.setOffset(0); + } + return items.register(invoker); } - return invoker; } /** Builds an invoker based on searchpath */ diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java new file mode 100644 index 00000000000..625a8bcb6da --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java @@ -0,0 +1,37 @@ +package com.yahoo.search.dispatch; + +import com.yahoo.component.ComponentId; +import com.yahoo.config.subscription.ConfigSubscriber; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.messagebus.network.rpc.SlobrokConfigSubscriber; +import com.yahoo.vespa.config.search.DispatchConfig; +import com.yahoo.vespa.config.search.DispatchNodesConfig; +import com.yahoo.yolean.UncheckedInterruptedException; + +import java.util.Objects; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; + +import static java.util.Objects.requireNonNull; + +/** + * @author jonmv + */ +public class ReconfigurableDispatcher extends Dispatcher { + + private final ConfigSubscriber subscriber; + + public ReconfigurableDispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, VipStatus vipStatus) { + super(clusterId, dispatchConfig, new DispatchNodesConfig.Builder().build(), vipStatus); + this.subscriber = new ConfigSubscriber(); + this.subscriber.subscribe(this::updateWithNewConfig, DispatchNodesConfig.class, clusterId.stringValue()); + } + + @Override + public void deconstruct() { + subscriber.close(); + super.deconstruct(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java b/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java index 1206277a103..6b134dc23a6 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java @@ -5,7 +5,7 @@ import java.time.Duration; import java.time.Instant; /** - * Contains start and and time. Exposes a duration, and lets you measure the time difference between 2 requests. + * Contains start and end time. Exposes a duration, and lets you measure the time difference between 2 requests. * It does use System.nanoTime to get a steady clock. * * @author baldersheim diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java index 22ed8b6d9fa..6c1f666835c 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java @@ -12,7 +12,7 @@ import java.util.Optional; * * @author bratseth */ -interface Client { +public interface Client { /** Creates a connection to a particular node in this */ NodeConnection createConnection(String hostname, int port); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java index fd8e0e4f81a..a93ddb0b360 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java @@ -1,11 +1,27 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.dispatch.rpc; +import com.yahoo.vespa.config.search.DispatchNodesConfig; + +import java.util.Collection; +import java.util.List; + /** * Interface for getting a connection given a node id. * * @author balderersheim */ -public interface RpcConnectionPool { +public interface RpcConnectionPool extends AutoCloseable { + + /** Returns a connection to the given node id. */ Client.NodeConnection getConnection(int nodeId); + + + /** Will return a list of items that need a delayed close when updating node set. */ + default Collection<? extends AutoCloseable> updateNodes(DispatchNodesConfig nodesConfig) { return List.of(); } + + /** Shuts down all connections in the pool, and the underlying RPC client. */ + @Override + void close(); + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java index 154002c4f77..b6228994ac8 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java @@ -33,7 +33,7 @@ public class RpcInvokerFactory extends InvokerFactory { super(cluster, dispatchConfig); this.rpcResourcePool = rpcResourcePool; this.compressor = new CompressService(); - decodeType = convert(dispatchConfig.summaryDecodePolicy()); + this.decodeType = convert(dispatchConfig.summaryDecodePolicy()); } @Override diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java index 53dc54f7bc5..a59097e5fff 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java @@ -37,7 +37,7 @@ public class RpcPing implements Pinger, Client.ResponseReceiver { this.clusterMonitor = clusterMonitor; this.pingSequenceId = node.createPingSequenceId(); this.pongHandler = pongHandler; - this. compressor = compressor; + this.compressor = compressor; } @Override diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java index 63530a7f650..d1f22514481 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java @@ -3,8 +3,10 @@ package com.yahoo.search.dispatch.rpc; import com.yahoo.search.dispatch.FillInvoker; import com.yahoo.search.dispatch.rpc.Client.NodeConnection; +import com.yahoo.search.dispatch.rpc.RpcClient.RpcNodeConnection; import com.yahoo.vespa.config.search.DispatchConfig; import com.yahoo.vespa.config.search.DispatchNodesConfig; +import com.yahoo.vespa.config.search.DispatchNodesConfig.Node; import java.util.ArrayList; import java.util.Collection; @@ -19,7 +21,7 @@ import java.util.concurrent.ThreadLocalRandom; * * @author ollivir */ -public class RpcResourcePool implements RpcConnectionPool, AutoCloseable { +public class RpcResourcePool implements RpcConnectionPool { /** Connections to the search nodes this talks to, indexed by node id ("partid") */ private volatile Map<Integer, NodeConnectionPool> nodeConnectionPools = Map.of(); @@ -35,46 +37,35 @@ public class RpcResourcePool implements RpcConnectionPool, AutoCloseable { } public RpcResourcePool(DispatchConfig dispatchConfig, DispatchNodesConfig nodesConfig) { - super(); rpcClient = new RpcClient("dispatch-client", dispatchConfig.numJrtTransportThreads()); numConnections = dispatchConfig.numJrtConnectionsPerNode(); - updateNodes(nodesConfig).forEach(item -> { - try { - item.close(); - } catch (Exception e) {} + updateNodes(nodesConfig).forEach(pool -> { + try { pool.close(); } catch (Exception ignored) { } // Shouldn't throw. }); } - /** Will return a list of items that need a delayed close */ - public Collection<AutoCloseable> updateNodes(DispatchNodesConfig nodesConfig) { - List<AutoCloseable> toClose = new ArrayList<>(); - var builder = new HashMap<Integer, NodeConnectionPool>(); + @Override + public Collection<? extends AutoCloseable> updateNodes(DispatchNodesConfig nodesConfig) { + Map<Integer, NodeConnectionPool> currentPools = new HashMap<>(nodeConnectionPools); + Map<Integer, NodeConnectionPool> nextPools = new HashMap<>(); // Who can be reused - for (var node : nodesConfig.node()) { - var prev = nodeConnectionPools.get(node.key()); - NodeConnection nc = prev != null ? prev.nextConnection() : null; - if (nc instanceof RpcClient.RpcNodeConnection rpcNodeConnection - && rpcNodeConnection.getPort() == node.port() - && rpcNodeConnection.getHostname().equals(node.host())) + for (Node node : nodesConfig.node()) { + if ( currentPools.containsKey(node.key()) + && currentPools.get(node.key()).nextConnection() instanceof RpcNodeConnection rpcNodeConnection + && rpcNodeConnection.getPort() == node.port() + && rpcNodeConnection.getHostname().equals(node.host())) { - builder.put(node.key(), prev); + nextPools.put(node.key(), currentPools.remove(node.key())); } else { - var connections = new ArrayList<NodeConnection>(numConnections); + ArrayList<NodeConnection> connections = new ArrayList<>(numConnections); for (int i = 0; i < numConnections; i++) { connections.add(rpcClient.createConnection(node.host(), node.port())); } - builder.put(node.key(), new NodeConnectionPool(connections)); + nextPools.put(node.key(), new NodeConnectionPool(connections)); } } - // Who are not needed any more - nodeConnectionPools.forEach((key, pool) -> { - var survivor = builder.get(key); - if (survivor == null || pool != survivor) { - toClose.add(pool); - } - }); - this.nodeConnectionPools = Map.copyOf(builder); - return toClose; + this.nodeConnectionPools = Map.copyOf(nextPools); + return currentPools.values(); } @Override diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index 121c12335f5..c8af5cea5aa 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.dispatch.searchcluster; +import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; @@ -51,7 +52,7 @@ public class Group { /** * Returns whether this group has sufficient active documents - * (compared to other groups) that is should receive traffic + * (compared to other groups) that should receive traffic */ public boolean hasSufficientCoverage() { return hasSufficientCoverage; @@ -66,14 +67,16 @@ public class Group { } public void aggregateNodeValues() { - long activeDocs = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(Node::getActiveDocuments).sum(); + List<Node> workingNodes = new ArrayList<>(nodes); + workingNodes.removeIf(node -> node.isWorking() != Boolean.TRUE); + long activeDocs = workingNodes.stream().mapToLong(Node::getActiveDocuments).sum(); activeDocuments = activeDocs; - targetActiveDocuments = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(Node::getTargetActiveDocuments).sum(); + targetActiveDocuments = workingNodes.stream().mapToLong(Node::getTargetActiveDocuments).sum(); isBlockingWrites = nodes.stream().anyMatch(Node::isBlockingWrites); - int numWorkingNodes = workingNodes(); + int numWorkingNodes = workingNodes.size(); if (numWorkingNodes > 0) { long average = activeDocs / numWorkingNodes; - long skew = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(node -> Math.abs(node.getActiveDocuments() - average)).sum(); + long skew = workingNodes.stream().mapToLong(node -> Math.abs(node.getActiveDocuments() - average)).sum(); boolean balanced = skew <= activeDocs * maxContentSkew; if (balanced != isBalanced) { if (!isSparse()) diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 9c65cb3d4c0..3c8950f1f7f 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -6,15 +6,18 @@ import com.yahoo.net.HostName; import com.yahoo.prelude.Pong; import com.yahoo.search.cluster.ClusterMonitor; import com.yahoo.search.cluster.NodeManager; +import com.yahoo.yolean.UncheckedInterruptedException; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.Executor; import java.util.logging.Logger; -import java.util.stream.Collectors; + +import static java.util.stream.Collectors.groupingBy; /** * A model of a search cluster we might want to dispatch queries to. @@ -28,7 +31,7 @@ public class SearchCluster implements NodeManager<Node> { private final String clusterId; private final VipStatus vipStatus; private final PingFactory pingFactory; - private final SearchGroupsImpl groups; + private volatile SearchGroupsImpl groups; private volatile long nextLogTime = 0; /** @@ -45,6 +48,7 @@ public class SearchCluster implements NodeManager<Node> { VipStatus vipStatus, PingFactory pingFactory) { this(clusterId, toGroups(nodes, minActivedocsPercentage), vipStatus, pingFactory); } + public SearchCluster(String clusterId, SearchGroupsImpl groups, VipStatus vipStatus, PingFactory pingFactory) { this.clusterId = clusterId; this.vipStatus = vipStatus; @@ -55,13 +59,28 @@ public class SearchCluster implements NodeManager<Node> { @Override public String name() { return clusterId; } - public VipStatus getVipStatus() { return vipStatus; } + + /** Sets the new nodes to monitor to be the new nodes, but keep any existing node instances which equal the new ones. */ + public ClusterMonitor<Node> updateNodes(Collection<Node> newNodes, double minActivedocsPercentage) { + Collection<Node> retainedNodes = groups.nodes(); + Collection<Node> currentNodes = new HashSet<>(newNodes); + retainedNodes.retainAll(currentNodes); // Throw away all old nodes which are not in the new set. + currentNodes.removeIf(retainedNodes::contains); // Throw away all new nodes for which we have more information in an old object. + Collection<Node> addedNodes = List.copyOf(currentNodes); + currentNodes.addAll(retainedNodes); // Keep the old nodes that were replaced in the new set. + SearchGroupsImpl groups = toGroups(currentNodes, minActivedocsPercentage); + ClusterMonitor<Node> monitor = new ClusterMonitor<>(this, false); + for (Node node : groups.nodes()) monitor.add(node, true); + monitor.start(); + try { while (addedNodes.stream().anyMatch(node -> node.isWorking() == null)) { Thread.sleep(1); } } + catch (InterruptedException e) { throw new UncheckedInterruptedException(e, true); } + pingIterationCompleted(groups); + this.groups = groups; + return monitor; + } public void addMonitoring(ClusterMonitor<Node> clusterMonitor) { - for (var group : groups()) { - for (var node : group.nodes()) - clusterMonitor.add(node, true); - } + for (Node node : groups.nodes()) clusterMonitor.add(node, true); } private static Node findLocalCorpusDispatchTarget(String selfHostname, SearchGroups groups) { @@ -86,14 +105,14 @@ public class SearchCluster implements NodeManager<Node> { private static SearchGroupsImpl toGroups(Collection<Node> nodes, double minActivedocsPercentage) { Map<Integer, Group> groups = new HashMap<>(); - for (Map.Entry<Integer, List<Node>> group : nodes.stream().collect(Collectors.groupingBy(Node::group)).entrySet()) { - Group g = new Group(group.getKey(), group.getValue()); - groups.put(group.getKey(), g); - } + nodes.stream().collect(groupingBy(Node::group)).forEach((groupId, groupNodes) -> { + groups.put(groupId, new Group(groupId, groupNodes)); + }); return new SearchGroupsImpl(Map.copyOf(groups), minActivedocsPercentage); } public SearchGroups groupList() { return groups; } + public Group group(int id) { return groups.get(id); } private Collection<Group> groups() { return groups.groups(); } @@ -107,14 +126,14 @@ public class SearchCluster implements NodeManager<Node> { * or empty if we should not dispatch directly. */ public Optional<Node> localCorpusDispatchTarget() { - if ( localCorpusDispatchTarget == null) return Optional.empty(); + if (localCorpusDispatchTarget == null) return Optional.empty(); // Only use direct dispatch if the local group has sufficient coverage Group localSearchGroup = groups.get(localCorpusDispatchTarget.group()); if ( ! localSearchGroup.hasSufficientCoverage()) return Optional.empty(); // Only use direct dispatch if the local search node is not down - if ( localCorpusDispatchTarget.isWorking() == Boolean.FALSE) return Optional.empty(); + if (localCorpusDispatchTarget.isWorking() == Boolean.FALSE) return Optional.empty(); return Optional.of(localCorpusDispatchTarget); } @@ -176,7 +195,7 @@ public class SearchCluster implements NodeManager<Node> { return groups().stream().allMatch(group -> group.nodes().stream().allMatch(node -> node.isWorking() != null)); } - public long nonWorkingNodeCount() { + long nonWorkingNodeCount() { return groups().stream().flatMap(group -> group.nodes().stream()).filter(node -> node.isWorking() == Boolean.FALSE).count(); } @@ -194,13 +213,13 @@ public class SearchCluster implements NodeManager<Node> { /** Used by the cluster monitor to manage node status */ @Override - public void ping(ClusterMonitor clusterMonitor, Node node, Executor executor) { + public void ping(ClusterMonitor<Node> clusterMonitor, Node node, Executor executor) { Pinger pinger = pingFactory.createPinger(node, clusterMonitor, new PongCallback(node, clusterMonitor)); pinger.ping(); } - private void pingIterationCompletedSingleGroup() { - Group group = groups().iterator().next(); + private void pingIterationCompletedSingleGroup(SearchGroupsImpl groups) { + Group group = groups.groups().iterator().next(); group.aggregateNodeValues(); // With just one group sufficient coverage may not be the same as full coverage, as the // group will always be marked sufficient for use. @@ -209,10 +228,10 @@ public class SearchCluster implements NodeManager<Node> { trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments()); } - private void pingIterationCompletedMultipleGroups() { - groups().forEach(Group::aggregateNodeValues); + private void pingIterationCompletedMultipleGroups(SearchGroupsImpl groups) { + groups.groups().forEach(Group::aggregateNodeValues); long medianDocuments = groups.medianDocumentsPerGroup(); - for (Group group : groups()) { + for (Group group : groups.groups()) { boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), medianDocuments); updateSufficientCoverage(group, sufficientCoverage); trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments); @@ -226,20 +245,20 @@ public class SearchCluster implements NodeManager<Node> { */ @Override public void pingIterationCompleted() { + pingIterationCompleted(groups); + } + + private void pingIterationCompleted(SearchGroupsImpl groups) { if (groups.size() == 1) { - pingIterationCompletedSingleGroup(); + pingIterationCompletedSingleGroup(groups); } else { - pingIterationCompletedMultipleGroups(); + pingIterationCompletedMultipleGroups(groups); } } - - /** * Calculate whether a subset of nodes in a group has enough coverage */ - - private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) { if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about. boolean changed = group.fullCoverageStatusChanged(fullCoverage); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java index b041ba28db9..5727931281a 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java @@ -1,8 +1,16 @@ package com.yahoo.search.dispatch.searchcluster; +import com.yahoo.stream.CustomCollectors; + import java.util.Collection; +import java.util.Comparator; +import java.util.LinkedHashSet; import java.util.Set; +import static java.util.Comparator.comparingInt; +import static java.util.stream.Collectors.toCollection; +import static java.util.stream.Collectors.toSet; + /** * Simple interface for groups and their nodes in the content cluster * @author baldersheim @@ -14,6 +22,11 @@ public interface SearchGroups { default boolean isEmpty() { return size() == 0; } + default Set<Node> nodes() { + return groups().stream().flatMap(group -> group.nodes().stream()) + .sorted(comparingInt(Node::key)) + .collect(toCollection(LinkedHashSet::new)); + } int size(); boolean isPartialGroupCoverageSufficient(Collection<Node> nodes); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java index 514f0de4fec..3c5dbe9927a 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java @@ -3,10 +3,8 @@ package com.yahoo.search.dispatch.searchcluster; import com.google.common.math.Quantiles; import java.util.Collection; -import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; public class SearchGroupsImpl implements SearchGroups { @@ -42,4 +40,5 @@ public class SearchGroupsImpl implements SearchGroups { double[] activeDocuments = groups().stream().mapToDouble(Group::activeDocuments).toArray(); return (long) Quantiles.median().computeInPlace(activeDocuments); } + } diff --git a/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj b/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj index 39ea6435393..a5953964b39 100644 --- a/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj +++ b/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj @@ -274,7 +274,8 @@ TermProduction nonphraseTermProduction() : TermProduction termProduction; } { - ( termProduction = referenceTermProduction() | + ( LOOKAHEAD(2) + termProduction = referenceTermProduction() | termProduction = literalTermProduction() ) { return termProduction; @@ -314,8 +315,10 @@ ReferenceTermProduction referenceTermProduction() : { String reference; boolean produceAll = false; + String comparisonPrefix = ""; } { + ( comparisonPrefix = comparisonPrefix() )? <LEFTSQUAREBRACKET> reference = referenceIdentifier() (<STAR> { produceAll = true; })? @@ -326,10 +329,17 @@ ReferenceTermProduction referenceTermProduction() : LiteralTermProduction literalTermProduction() : { String literal; + String comparisonPrefix = ""; } { - literal = identifier() - { return new LiteralTermProduction(literal); } + ( ( comparisonPrefix = comparisonPrefix() )? literal = identifier() ) + { return new LiteralTermProduction(comparisonPrefix + literal); } +} + +String comparisonPrefix() : {} +{ + <SMALLER> { return "<"; } | + <LARGER> { return ">"; } } TermType termType() : diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/RangesTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RangesTestCase.java new file mode 100644 index 00000000000..2cdbfbdb3fb --- /dev/null +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RangesTestCase.java @@ -0,0 +1,17 @@ +package com.yahoo.prelude.semantics.test; + +import org.junit.jupiter.api.Test; + +public class RangesTestCase extends RuleBaseAbstractTestCase { + + public RangesTestCase() { + super("ranges.sr"); + } + + @Test + void testPrice() { + assertSemantics("AND shoes price:<5000", + "shoes under 5000"); + } + +} diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/ranges.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/ranges.sr new file mode 100644 index 00000000000..3b0120fd18a --- /dev/null +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/ranges.sr @@ -0,0 +1,2 @@ +under 5000 -> price:<5000; +over [...] -> price:>[...]; diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java index 2603f89b546..1278afe3759 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java @@ -1,27 +1,51 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.dispatch; +import com.yahoo.compress.CompressionType; +import com.yahoo.prelude.Pong; import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.cluster.ClusterMonitor; +import com.yahoo.search.dispatch.Dispatcher.InvokerFactoryFactory; +import com.yahoo.search.dispatch.rpc.Client.NodeConnection; +import com.yahoo.search.dispatch.rpc.Client.ResponseReceiver; +import com.yahoo.search.dispatch.rpc.RpcConnectionPool; import com.yahoo.search.dispatch.searchcluster.MockSearchCluster; -import com.yahoo.search.dispatch.searchcluster.SearchGroups; import com.yahoo.search.dispatch.searchcluster.Node; import com.yahoo.search.dispatch.searchcluster.PingFactory; import com.yahoo.search.dispatch.searchcluster.Pinger; import com.yahoo.search.dispatch.searchcluster.PongHandler; import com.yahoo.search.dispatch.searchcluster.SearchCluster; +import com.yahoo.search.dispatch.searchcluster.SearchGroups; +import com.yahoo.search.searchchain.Execution; import com.yahoo.vespa.config.search.DispatchConfig; +import com.yahoo.vespa.config.search.DispatchNodesConfig; import org.junit.jupiter.api.Test; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.Phaser; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import static com.yahoo.search.dispatch.searchcluster.MockSearchCluster.createDispatchConfig; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -149,6 +173,172 @@ public class DispatcherTest { dispatcher.deconstruct(); } + @Test + void testRpcResourceShutdownOnReconfiguration() throws InterruptedException, ExecutionException, IOException { + // Ping factory lets us tick each ping, so we may delay shutdown, due to monitor thread RPC usage. + Map<Integer, Phaser> pingPhasers = new ConcurrentHashMap<>(); + pingPhasers.put(0, new Phaser(2)); + pingPhasers.put(1, new Phaser(2)); + pingPhasers.put(2, new Phaser(2)); + + PingFactory pingFactory = (node, monitor, pongHandler) -> () -> { + pingPhasers.get(node.key()).arriveAndAwaitAdvance(); + pongHandler.handle(new Pong(2, 2)); + pingPhasers.get(node.key()).arriveAndAwaitAdvance(); + }; + + // Search cluster uses the ping factory, and zero nodes initially, later configured with two nodes. + SearchCluster cluster = new MockSearchCluster("cid", 0, 1, pingFactory); + + // Dummy RPC layer where we manually tick responses for each node. + // When a response is let go, we verify the RPC resource is not yet closed. + // This is signalled by terminating its phaser, which is done by the dispatcher in delayed cleanup. + // We verify in the end that all connections have been shut down, prior to shutting down the RPC pool proper. + Map<Integer, Boolean > rpcResources = new HashMap<>(); + AtomicLong cleanupThreadId = new AtomicLong(); + AtomicInteger nodeIdOfSearcher0 = new AtomicInteger(-1); + RpcConnectionPool rpcPool = new RpcConnectionPool() { + // Returns a connection that lets us advance the searcher when we want to, as well as tracking which threads do what. + @Override public NodeConnection getConnection(int nodeId) { + nodeIdOfSearcher0.set(nodeId); + return new NodeConnection() { + @Override public void request(String rpcMethod, CompressionType compression, int uncompressedLength, byte[] compressedPayload, ResponseReceiver responseReceiver, double timeoutSeconds) { + assertTrue(rpcResources.get(nodeId)); + } + @Override public void close() { + assertFalse(rpcResources.remove(nodeId)); + } + }; + } + // Verifies cleanup is done by the expected thread, by ID, and cleans up the "RPC connection" (phaser). + @Override public Collection<? extends AutoCloseable> updateNodes(DispatchNodesConfig config) { + for (DispatchNodesConfig.Node node : config.node()) + rpcResources.putIfAbsent(node.key(), true); + return rpcResources.keySet().stream() + .filter(key -> config.node().stream().noneMatch(node -> node.key() == key)) + .map(key -> (AutoCloseable) () -> { + assertTrue(rpcResources.put(key, false)); + cleanupThreadId.set(Thread.currentThread().getId()); + getConnection(key).close(); + }) + .toList(); + }; + // In the end, we have reconfigured down to 0 nodes, and no resources should be left running after cleanup. + @Override public void close() { + assertEquals(Map.of(), rpcResources); + } + }; + + // This factory just forwards search to the dummy RPC layer above, nothing more. + InvokerFactoryFactory invokerFactories = (rpcConnectionPool, searchGroups, dispatchConfig) -> new InvokerFactory(searchGroups, dispatchConfig) { + @Override protected Optional<SearchInvoker> createNodeSearchInvoker(VespaBackEndSearcher searcher, Query query, int maxHits, Node node) { + return Optional.of(new SearchInvoker(Optional.of(node)) { + @Override protected Object sendSearchRequest(Query query, Object context) { + rpcPool.getConnection(node.key()).request(null, null, 0, null, null, 0); + return null; + }; + @Override protected InvokerResult getSearchResult(Execution execution) { + return new InvokerResult(new Result(new Query())); + } + @Override protected void release() { } + }); + }; + @Override public FillInvoker createFillInvoker(VespaBackEndSearcher searcher, Result result) { + return new FillInvoker() { + @Override protected void getFillResults(Result result, String summaryClass) { fail(); } + @Override protected void sendFillRequest(Result result, String summaryClass) { fail(); } + @Override protected void release() { fail(); } + }; + } + }; + + Dispatcher dispatcher = new Dispatcher(dispatchConfig, rpcPool, cluster, invokerFactories); + ExecutorService executor = Executors.newFixedThreadPool(1); + + // Set two groups with a single node each. The first cluster-monitor has nothing to do, and is shut down immediately. + // There are also no invokers, so the whole reconfiguration completes once the new cluster monitor has seen all nodes. + Future<?> reconfiguration = executor.submit(() -> { + dispatcher.updateWithNewConfig(new DispatchNodesConfig.Builder() + .node(new DispatchNodesConfig.Node.Builder().key(0).group(0).port(123).host("host0")) + .node(new DispatchNodesConfig.Node.Builder().key(1).group(1).port(123).host("host1")) + .build()); + }); + + // Let pings return, to allow the search cluster to reconfigure. + pingPhasers.get(0).arriveAndAwaitAdvance(); + pingPhasers.get(0).arriveAndAwaitAdvance(); + pingPhasers.get(1).arriveAndAwaitAdvance(); + pingPhasers.get(1).arriveAndAwaitAdvance(); + // We need to wait for the cluster to have at least one group, lest dispatch will fail below. + reconfiguration.get(); + assertNotEquals(cleanupThreadId.get(), Thread.currentThread().getId()); + assertEquals(1, cluster.group(0).workingNodes()); + assertEquals(1, cluster.group(1).workingNodes()); + + Node node0 = cluster.group(0).nodes().get(0); // Node0 will be replaced. + Node node1 = cluster.group(1).nodes().get(0); // Node1 will be retained. + + // Start some searches, one against each group, since we have a round-robin policy. + SearchInvoker search0 = dispatcher.getSearchInvoker(new Query(), null); + search0.search(new Query(), null); + // Unknown whether the first or second search hits node0, so we must track that. + int offset = nodeIdOfSearcher0.get(); + SearchInvoker search1 = dispatcher.getSearchInvoker(new Query(), null); + search1.search(new Query(), null); + + // Wait for the current cluster monitor to be mid-ping-round. + pingPhasers.get(0).arriveAndAwaitAdvance(); + + // Then reconfigure the dispatcher with new nodes, replacing node0 with node2. + reconfiguration = executor.submit(() -> { + dispatcher.updateWithNewConfig(new DispatchNodesConfig.Builder() + .node(new DispatchNodesConfig.Node.Builder().key(2).group(0).port(123).host("host2")) + .node(new DispatchNodesConfig.Node.Builder().key(1).group(1).port(123).host("host1")) + .build()); + }); + // Reconfiguration starts, but groups are only updated once the search cluster has knowledge about all of them. + pingPhasers.get(1).arriveAndAwaitAdvance(); + pingPhasers.get(1).arriveAndAwaitAdvance(); + pingPhasers.get(2).arriveAndAwaitAdvance(); + // Cluster has not yet updated its group reference. + assertEquals(1, cluster.group(0).workingNodes()); // Node0 is still working. + assertSame(node0, cluster.group(0).nodes().get(0)); + pingPhasers.get(2).arriveAndAwaitAdvance(); + + // Old cluster monitor is waiting for that ping to complete before it can shut down, and let reconfiguration complete. + pingPhasers.get(0).arriveAndAwaitAdvance(); + reconfiguration.get(); + Node node2 = cluster.group(0).nodes().get(0); + assertNotSame(node0, node2); + assertSame(node1, cluster.group(1).nodes().get(0)); + + // Next search should hit group0 again, this time on node2. + SearchInvoker search2 = dispatcher.getSearchInvoker(new Query(), null); + search2.search(new Query(), null); + + // Searches against nodes 1 and 2 complete. + (offset == 0 ? search0 : search1).close(); + search2.close(); + + // We're still waiting for search against node0 to complete, before we can shut down its RPC connection. + assertEquals(Set.of(0, 1, 2), rpcResources.keySet()); + (offset == 0 ? search1 : search0).close(); + // Thread for search 0 should have closed the RPC pool now. + assertEquals(Set.of(1, 2), rpcResources.keySet()); + assertEquals(cleanupThreadId.get(), Thread.currentThread().getId()); + + // Finally, reconfigure down to 0 nodes. + reconfiguration = executor.submit(() -> { + cleanupThreadId.set(Thread.currentThread().getId()); + dispatcher.updateWithNewConfig(new DispatchNodesConfig.Builder().build()); + }); + pingPhasers.get(1).forceTermination(); + pingPhasers.get(2).forceTermination(); + reconfiguration.get(); + assertNotEquals(cleanupThreadId.get(), Thread.currentThread().getId()); + dispatcher.deconstruct(); + } + interface FactoryStep { boolean returnInvoker(List<Node> nodes, boolean acceptIncompleteCoverage); } diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java index 5fb5b465c69..cd0791a3881 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java @@ -15,7 +15,11 @@ import java.util.Map; public class MockSearchCluster extends SearchCluster { public MockSearchCluster(String clusterId, int groups, int nodesPerGroup) { - super(clusterId, buildGroupListForTest(groups, nodesPerGroup, 88.0), null, null); + this(clusterId, groups, nodesPerGroup, null); + } + + public MockSearchCluster(String clusterId, int groups, int nodesPerGroup, PingFactory pingFactory) { + super(clusterId, buildGroupListForTest(groups, nodesPerGroup, 88.0), null, pingFactory); } @Override diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index 51256ec496e..bfe1aed1084 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -13,11 +13,18 @@ import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.toMap; import static org.junit.jupiter.api.Assertions.*; /** @@ -31,7 +38,7 @@ public class SearchClusterTest { final int nodesPerGroup; final VipStatus vipStatus; final SearchCluster searchCluster; - final ClusterMonitor clusterMonitor; + final ClusterMonitor<Node> clusterMonitor; final List<AtomicInteger> numDocsPerNode; List<AtomicInteger> pingCounts; @@ -57,7 +64,7 @@ public class SearchClusterTest { } searchCluster = new SearchCluster(clusterId, 100.0, nodes, vipStatus, new Factory(nodesPerGroup, numDocsPerNode, pingCounts)); - clusterMonitor = new ClusterMonitor(searchCluster, false); + clusterMonitor = new ClusterMonitor<>(searchCluster, false); searchCluster.addMonitoring(clusterMonitor); } @@ -376,4 +383,37 @@ public class SearchClusterTest { assertTrue(group.isBalanced()); } + @Test + void requireThatPreciselyTheRetainedNodesAreKeptWhenNodesAreUpdated() { + try (State state = new State("query", 2, IntStream.range(0, 6).mapToObj(i -> "node-" + i).toList())) { + List<Node> referenceNodes = List.of(new Node(0, "node-0", 0), + new Node(1, "node-1", 0), + new Node(0, "node-2", 1), + new Node(1, "node-3", 1), + new Node(0, "node-4", 2), + new Node(1, "node-5", 2)); + SearchGroups oldGroups = state.searchCluster.groupList(); + assertEquals(Set.copyOf(referenceNodes), oldGroups.nodes()); + + List<Node> updatedNodes = List.of(new Node(0, "node-1", 0), // Swap node-0 and node-1 + new Node(1, "node-0", 0), // Swap node-1 and node-0 + new Node(0, "node-4", 1), // Swap node-2 and node-4 + new Node(1, "node-3", 1), + new Node(0, "node-2", 2), // Swap node-4 and node-2 + new Node(1, "node-6", 2)); // Replace node-6 + state.searchCluster.updateNodes(updatedNodes, 100.0); + SearchGroups newGroups = state.searchCluster.groupList(); + assertEquals(Set.copyOf(updatedNodes), newGroups.nodes()); + + Map<Node, Node> oldNodesByIdentity = newGroups.nodes().stream().collect(toMap(identity(), identity())); + Map<Node, Node> newNodesByIdentity = newGroups.nodes().stream().collect(toMap(identity(), identity())); + assertSame(updatedNodes.get(0), newNodesByIdentity.get(updatedNodes.get(0))); + assertSame(updatedNodes.get(1), newNodesByIdentity.get(updatedNodes.get(1))); + assertSame(updatedNodes.get(2), newNodesByIdentity.get(updatedNodes.get(2))); + assertSame(oldNodesByIdentity.get(referenceNodes.get(3)), newNodesByIdentity.get(updatedNodes.get(3))); + assertSame(updatedNodes.get(4), newNodesByIdentity.get(updatedNodes.get(4))); + assertSame(updatedNodes.get(5), newNodesByIdentity.get(updatedNodes.get(5))); + } + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java index 5df183d9abb..fbdab67869a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java @@ -59,7 +59,9 @@ public class DataplaneTokenSerializer { Instant creationTime = SlimeUtils.instant(versionCursor.field(creationTimeField)); String author = versionCursor.field(authorField).asString(); String expirationStr = versionCursor.field(expirationField).asString(); - Optional<Instant> expiration = expirationStr.equals("<none>") ? Optional.empty() : Optional.of(Instant.parse(expirationStr)); + Optional<Instant> expiration = expirationStr.equals("<none>") ? Optional.empty() + : (expirationStr.isBlank() + ? Optional.of(Instant.EPOCH) : Optional.of(Instant.parse(expirationStr))); return new DataplaneTokenVersions.Version(fingerPrint, checkAccessHash, creationTime, expiration, author); }) .toList(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 99ad75d0ec4..4824ccc576a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -979,6 +979,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { fingerprintObject.setString("fingerprint", tokenVersion.fingerPrint().value()); fingerprintObject.setString("created", tokenVersion.creationTime().toString()); fingerprintObject.setString("author", tokenVersion.author()); + fingerprintObject.setString("expiration", tokenVersion.expiration().map(Instant::toString).orElse("<none>")); } } return new SlimeJsonResponse(slime); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java index b6ac65467ac..3cd9d586350 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java @@ -8,7 +8,6 @@ import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.InstanceName; import com.yahoo.config.provision.TenantName; import com.yahoo.restapi.RestApiException; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.controller.ControllerTester; @@ -42,7 +41,6 @@ import static com.yahoo.application.container.handler.Request.Method.DELETE; import static com.yahoo.application.container.handler.Request.Method.GET; import static com.yahoo.application.container.handler.Request.Method.POST; import static com.yahoo.application.container.handler.Request.Method.PUT; -import static com.yahoo.vespa.hosted.controller.restapi.application.ApplicationApiTest.createApplicationSubmissionData; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -476,7 +474,7 @@ public class ApplicationApiCloudTest extends ControllerContainerCloudTest { (response) -> Assertions.assertThat(new String(response.getBody(), UTF_8)).matches(Pattern.compile(regexGenerateToken)), 200); - String regexListTokens = "\\{\"tokens\":\\[\\{\"id\":\"myTokenId\",\"versions\":\\[\\{\"fingerprint\":\".*\",\"created\":\".*\",\"author\":\"user@test\"}]}]}"; + String regexListTokens = "\\{\"tokens\":\\[\\{\"id\":\"myTokenId\",\"versions\":\\[\\{\"fingerprint\":\".*\",\"created\":\".*\",\"author\":\"user@test\",\"expiration\":\".*\"}]}]}"; tester.assertResponse(request("/application/v4/tenant/scoober/token", GET) .roles(Role.developer(tenantName)), (response) -> Assertions.assertThat(new String(response.getBody(), UTF_8)).matches(Pattern.compile(regexListTokens)), diff --git a/documentapi/abi-spec.json b/documentapi/abi-spec.json index 7662258037c..0252da8a4d1 100644 --- a/documentapi/abi-spec.json +++ b/documentapi/abi-spec.json @@ -1888,7 +1888,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1914,7 +1915,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -1936,7 +1938,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -2095,7 +2098,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -2136,7 +2140,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h index 5c0d9e3317d..49f88edb2f9 100644 --- a/eval/src/vespa/eval/eval/value_type.h +++ b/eval/src/vespa/eval/eval/value_type.h @@ -25,13 +25,13 @@ public: : name(name_in), size(npos) {} Dimension(const vespalib::string &name_in, size_type size_in) noexcept : name(name_in), size(size_in) {} - bool operator==(const Dimension &rhs) const noexcept { + bool operator==(const Dimension &rhs) const { return ((name == rhs.name) && (size == rhs.size)); } - bool operator!=(const Dimension &rhs) const noexcept { return !(*this == rhs); } - bool is_mapped() const noexcept { return (size == npos); } - bool is_indexed() const noexcept { return (size != npos); } - bool is_trivial() const noexcept { return (size == 1); } + bool operator!=(const Dimension &rhs) const { return !(*this == rhs); } + bool is_mapped() const { return (size == npos); } + bool is_indexed() const { return (size != npos); } + bool is_trivial() const { return (size == 1); } }; private: @@ -39,10 +39,10 @@ private: CellType _cell_type; std::vector<Dimension> _dimensions; - ValueType() noexcept + ValueType() : _error(true), _cell_type(CellType::DOUBLE), _dimensions() {} - ValueType(CellType cell_type_in, std::vector<Dimension> &&dimensions_in) noexcept + ValueType(CellType cell_type_in, std::vector<Dimension> &&dimensions_in) : _error(false), _cell_type(cell_type_in), _dimensions(std::move(dimensions_in)) {} static ValueType error_if(bool has_error, ValueType else_type); @@ -57,7 +57,7 @@ public: CellMeta cell_meta() const { return {_cell_type, is_double()}; } bool is_error() const { return _error; } bool is_double() const; - bool has_dimensions() const noexcept { return !_dimensions.empty(); } + bool has_dimensions() const { return !_dimensions.empty(); } bool is_sparse() const; bool is_dense() const; bool is_mixed() const; @@ -70,12 +70,12 @@ public: std::vector<Dimension> mapped_dimensions() const; size_t dimension_index(const vespalib::string &name) const; std::vector<vespalib::string> dimension_names() const; - bool operator==(const ValueType &rhs) const noexcept { + bool operator==(const ValueType &rhs) const { return ((_error == rhs._error) && (_cell_type == rhs._cell_type) && (_dimensions == rhs._dimensions)); } - bool operator!=(const ValueType &rhs) const noexcept { return !(*this == rhs); } + bool operator!=(const ValueType &rhs) const { return !(*this == rhs); } ValueType map() const; ValueType reduce(const std::vector<vespalib::string> &dimensions_in) const; diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index c3788a20ddc..0dd0c885eee 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -49,8 +49,8 @@ public class Flags { private static volatile TreeMap<FlagId, FlagDefinition> flags = new TreeMap<>(); public static final UnboundBooleanFlag IPV6_IN_GCP = defineFeatureFlag( - "ipv6-in-gcp", false, - List.of("hakonhall"), "2023-05-15", "2023-07-15", + "ipv6-in-gcp", true, + List.of("hakonhall"), "2023-05-15", "2023-08-01", "Provision GCP hosts with external IPv6 addresses", "Takes effect on the next host provisioning"); @@ -429,6 +429,13 @@ public class Flags { "Takes effect at redeployment", ZONE_ID, APPLICATION_ID); + public static final UnboundBooleanFlag USE_RECONFIGURABLE_DISPATCHER = defineFeatureFlag( + "use-reconfigurable-dispatcher", false, + List.of("jonmv"), "2023-07-14", "2023-10-01", + "Whether to set up a ReconfigurableDispatcher with config self-sub for backend nodes", + "Takes effect at redeployment", + ZONE_ID, APPLICATION_ID); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners, String createdAt, String expiresAt, String description, diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index f99c0e32a36..348fd97e869 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -362,6 +362,12 @@ public class PermanentFlags { "Takes effect on next redeployment", APPLICATION_ID); + public static final UnboundBooleanFlag AUTOSCALING = defineFeatureFlag( + "autoscaling", true, + "Whether to enable autoscaling", + "Takes effect immediately", + APPLICATION_ID); + private PermanentFlags() {} private static UnboundBooleanFlag defineFeatureFlag( diff --git a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java index 554c1d924a2..2dc80fc9d2b 100644 --- a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java +++ b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java @@ -119,12 +119,7 @@ public class CloudDataPlaneFilter extends JsonSecurityRequestFilterBase { var tokenVersion = TokenVersion.of( token.id(), token.fingerprints().get(version), token.checkAccessHashes().get(version), token.expirations().get(version)); - var expiration = tokenVersion.expiration().orElse(null); - if (expiration != null && now.isAfter(expiration)) - log.fine(() -> "Ignoring expired version %s of token '%s' (expiration=%s)".formatted( - tokenVersion.fingerprint(), tokenVersion.id(), expiration)); - else - tokens.put(tokenVersion.accessHash(), tokenVersion); + tokens.put(tokenVersion.accessHash(), tokenVersion); } } // Add reverse proxy certificate as required certificate for client definition diff --git a/linguistics-components/abi-spec.json b/linguistics-components/abi-spec.json index 4b713afba83..a2b5a98344f 100644 --- a/linguistics-components/abi-spec.json +++ b/linguistics-components/abi-spec.json @@ -22,7 +22,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -50,7 +51,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -196,7 +198,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -223,7 +226,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", diff --git a/messagebus/abi-spec.json b/messagebus/abi-spec.json index acedccf80ba..15a24f82f75 100644 --- a/messagebus/abi-spec.json +++ b/messagebus/abi-spec.json @@ -410,7 +410,8 @@ "com.yahoo.config.ConfigInstance$Builder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -451,7 +452,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -476,7 +478,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", @@ -515,7 +518,8 @@ "com.yahoo.config.ConfigBuilder" ], "attributes" : [ - "public" + "public", + "final" ], "methods" : [ "public void <init>()", diff --git a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java index 1c61b65f77b..9936b4612c5 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java @@ -51,6 +51,7 @@ public enum ConfigServerMetrics implements VespaMetrics { NODES_NON_ACTIVE("nodes.nonActive", Unit.NODE, "The number of non-active nodes in a cluster"), NODES_NON_ACTIVE_FRACTION("nodes.nonActiveFraction", Unit.NODE, "The fraction of non-active nodes vs total nodes in a cluster"), NODES_EXCLUSIVE_SWITCH_FRACTION("nodes.exclusiveSwitchFraction", Unit.FRACTION, "The fraction of nodes in a cluster on exclusive network switches"), + NODES_EMPTY_EXCLUSIVE("nodes.emptyExclusive", Unit.NODE, "The number of exclusive hosts that do not have any nodes allocated to them"), CLUSTER_COST("cluster.cost", Unit.DOLLAR_PER_HOUR, "The cost of the nodes allocated to a certain cluster, in $/hr"), CLUSTER_LOAD_IDEAL_CPU("cluster.load.ideal.cpu", Unit.FRACTION, "The ideal cpu load of a certain cluster"), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index 2287b768dee..e586e6277d5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -92,6 +92,7 @@ public class AllocationOptimizer { .multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts .divide(clusterModel.redundancyAdjustment()) // correct for double redundancy adjustment .scaled(current.realResources().nodeResources()); + // Combine the scaled resource values computed here // with the currently configured non-scaled values, given in the limits, if any var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index c75a5ca0b26..289025f9d21 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -168,7 +168,6 @@ public class ClusterModel { } public static Duration minScalingDuration(ClusterSpec clusterSpec) { - if (clusterSpec.isStateful()) return Duration.ofHours(6); return Duration.ofMinutes(5); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 856d6e07156..92f86325cf7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -7,6 +7,9 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -34,6 +37,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { private final Autoscaler autoscaler; private final Deployer deployer; private final Metric metric; + private final BooleanFlag enabledFlag; public AutoscalingMaintainer(NodeRepository nodeRepository, Deployer deployer, @@ -43,6 +47,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { this.autoscaler = new Autoscaler(nodeRepository); this.deployer = deployer; this.metric = metric; + this.enabledFlag = PermanentFlags.AUTOSCALING.bindTo(nodeRepository.flagSource()); } @Override @@ -53,6 +58,9 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { int attempts = 0; int failures = 0; for (var applicationNodes : activeNodesByApplication().entrySet()) { + boolean enabled = enabledFlag.with(FetchVector.Dimension.APPLICATION_ID, + applicationNodes.getKey().serializedForm()).value(); + if (!enabled) continue; for (var clusterNodes : nodesByCluster(applicationNodes.getValue()).entrySet()) { attempts++; if ( ! autoscale(applicationNodes.getKey(), clusterNodes.getKey())) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index 8213286639c..8a9a29f58c6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -30,6 +30,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing import com.yahoo.vespa.hosted.provision.provisioning.NodeCandidate; import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer; import com.yahoo.vespa.hosted.provision.provisioning.NodeSpec; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningThrottler; import java.time.Duration; import java.time.Instant; @@ -57,6 +58,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { private final HostProvisioner hostProvisioner; private final ListFlag<ClusterCapacity> preprovisionCapacityFlag; + private final ProvisioningThrottler throttler; HostCapacityMaintainer(NodeRepository nodeRepository, Duration interval, @@ -66,6 +68,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { super(nodeRepository, interval, metric); this.hostProvisioner = hostProvisioner; this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource); + this.throttler = new ProvisioningThrottler(nodeRepository, metric); } @Override @@ -203,19 +206,23 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { var clusterType = Optional.ofNullable(clusterCapacityDeficit.clusterType()); nodesPlusProvisioned.addAll(provisionHosts(clusterCapacityDeficit.count(), toNodeResources(clusterCapacityDeficit), - clusterType.map(ClusterSpec.Type::from))); + clusterType.map(ClusterSpec.Type::from), + nodeList)); } } - private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType) { + private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType, NodeList allNodes) { try { + if (throttler.throttle(allNodes, Agent.HostCapacityMaintainer)) { + throw new NodeAllocationException("Host provisioning is being throttled", true); + } Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count); - List<Node> hosts = new ArrayList<>(); HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources, ApplicationId.defaultId(), osVersion, HostSharing.shared, clusterType, Optional.empty(), nodeRepository().zone().cloud().account(), false); + List<Node> hosts = new ArrayList<>(); hostProvisioner.provisionHosts(request, provisionedHosts -> { hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(Duration.ZERO)).toList()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 3b846351b36..15913fec5ed 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -21,11 +21,13 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.ClusterId; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.persistence.CacheStats; import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; +import java.time.Instant; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -64,7 +66,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { @Override public double maintain() { // Sort by hostname to get deterministic metric reporting order (and hopefully avoid changes - // to metric reporting time so we get double reporting or no reporting within a minute) + // to metric reporting time, so we get double reporting or no reporting within a minute) NodeList nodes = nodeRepository().nodes().list().sortedBy(Comparator.comparing(Node::hostname)); ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot(); @@ -79,6 +81,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { updateRepairTicketMetrics(nodes); updateAllocationMetrics(nodes); updateClusterMetrics(nodes); + updateEmptyExclusiveHosts(nodes); return 1.0; } @@ -386,6 +389,19 @@ public class MetricsReporter extends NodeRepositoryMaintainer { .forEach((status, number) -> metric.set(ConfigServerMetrics.HOSTED_VESPA_BREAKFIXED_HOSTS.baseName(), number, getContext(Map.of("status", status)))); } + private void updateEmptyExclusiveHosts(NodeList nodes) { + Instant now = nodeRepository().clock().instant(); + Duration minActivePeriod = Duration.ofMinutes(10); + int emptyHosts = nodes.parents().state(State.active) + .matching(node -> (node.type() != NodeType.host && node.type().isHost()) || + node.exclusiveToApplicationId().isPresent()) + .matching(host -> host.history().hasEventBefore(History.Event.Type.activated, + now.minus(minActivePeriod))) + .matching(host -> nodes.childrenOf(host).state(State.active).isEmpty()) + .size(); + metric.set(ConfigServerMetrics.NODES_EMPTY_EXCLUSIVE.baseName(), emptyHosts, null); + } + static Map<String, String> dimensions(ApplicationId application, ClusterSpec.Id cluster) { Map<String, String> dimensions = new HashMap<>(dimensions(application)); dimensions.put("clusterid", cluster.value()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index a16290361fb..585a7f341b5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -195,39 +195,48 @@ public class NodeFailer extends NodeRepositoryMaintainer { /** * Called when a node should be moved to the failed state: Do that if it seems safe, * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host). - * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken. + * Otherwise, not replacing the node ensures (by Orchestrator check) that no further action will be taken. */ private void failActive(FailingNode failing) { Optional<Deployment> deployment = deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5)); if (deployment.isEmpty()) return; + boolean redeploy = false; // If the active node that we are trying to fail is of type host, we need to successfully fail all // the children nodes running on it before we fail the host. Failing a child node in a dynamically // provisioned zone may require provisioning new hosts that require the host application lock to be held, // so we must release ours before failing the children. - List<FailingNode> activeChildrenToFail = new ArrayList<>(); - boolean redeploy = false; - try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // TODO: recursive lock for right order, only for hosts though - // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail - if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner))) - return; - if (lock.node().state() == Node.State.failed) - return; - if (!Objects.equals(failing.node().state(), lock.node().state())) - return; - failing = new FailingNode(lock.node(), failing.reason); - - String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); - for (Node failingTenantNode : nodeRepository().nodes().list().childrenOf(failing.node())) { - if (failingTenantNode.state() == Node.State.active) { - activeChildrenToFail.add(new FailingNode(failingTenantNode, reasonForChildFailure)); - } else if (failingTenantNode.state() != Node.State.failed) { - nodeRepository().nodes().fail(failingTenantNode.hostname(), Agent.NodeFailer, reasonForChildFailure); + if (failing.node.type().isHost()) { + List<FailingNode> activeChildrenToFail = new ArrayList<>(); + try (var lock = nodeRepository().nodes().lockAndGetRecursively(failing.node.hostname(), Optional.empty())) { + failing = shouldFail(lock.parent().node(), failing); + if (failing == null) return; + + String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); + for (var failingTenantNode : lock.children()) { + if (failingTenantNode.node().state() == Node.State.active) { + activeChildrenToFail.add(new FailingNode(failingTenantNode.node(), reasonForChildFailure)); + } else if (failingTenantNode.node().state() != Node.State.failed) { + nodeRepository().nodes().fail(failingTenantNode.node().hostname(), Agent.NodeFailer, reasonForChildFailure); + } + } + + if (activeChildrenToFail.isEmpty()) { + log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); + markWantToFail(failing.node(), true, lock.parent()); + redeploy = true; } } + // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, + // so failActive() may take a long time to complete, but the remaining children should be fast. + activeChildrenToFail.forEach(this::failActive); + } + else { + try (var lock = nodeRepository().nodes().lockAndGetRequired(failing.node)) { + failing = shouldFail(lock.node(), failing); + if (failing == null) return; - if (activeChildrenToFail.isEmpty()) { log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); markWantToFail(failing.node(), true, lock); redeploy = true; @@ -237,13 +246,19 @@ public class NodeFailer extends NodeRepositoryMaintainer { // Redeploy to replace failing node if (redeploy) { redeploy(deployment.get(), failing); - return; } + } - // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, - // so failActive() may take a long time to complete, but the remaining children should be fast. - activeChildrenToFail.forEach(this::failActive); - + // Returns an updated FailingNode if we should still fail the node, otherwise null + private static FailingNode shouldFail(Node fresh, FailingNode stale) { + // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail + if (!Objects.equals(stale.node.allocation().map(Allocation::owner), fresh.allocation().map(Allocation::owner))) + return null; + if (fresh.state() == Node.State.failed) + return null; + if (!Objects.equals(stale.node.state(), fresh.state())) + return null; + return new FailingNode(fresh, stale.reason); } private void redeploy(Deployment deployment, FailingNode failing) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index bf046c09899..1ae9b00d794 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -72,7 +72,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { } boolean redeploy = false; List<String> nodesToDeactivate = new ArrayList<>(); - try (var lock = nodeRepository().applications().lock(application)) { // TODO: take recusrive lock for right order + try (var lock = nodeRepository().applications().lock(application)) { NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); Map<Removal, NodeList> nodesByRemovalReason = activeNodes.owner(application) .retired() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java index 92faacbca23..c388273b1a6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java @@ -258,12 +258,10 @@ public class CuratorDb { Pair<Integer, Node> cached = cachedNodes.getIfPresent(path); if (cached != null && cached.getFirst().equals(stat)) return cached.getSecond(); cachedNodes.invalidate(path); - try { - return cachedNodes.get(path, () -> new Pair<>(stat, read(path, nodeSerializer::fromJson).get())).getSecond(); - } - catch (ExecutionException e) { - throw new UncheckedExecutionException(e.getCause()); - } + Optional<Node> node = session.getData(path).filter(data -> data.length > 0).map(nodeSerializer::fromJson); + if (node.isEmpty()) return null; + cachedNodes.put(path, new Pair<>(stat, node.get())); + return node.get(); }); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 43b8cd08989..bcc63a6704a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -16,6 +16,7 @@ import com.yahoo.config.provision.ProvisionLock; import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.Provisioner; import com.yahoo.config.provision.Zone; +import com.yahoo.jdisc.Metric; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -61,7 +62,8 @@ public class NodeRepositoryProvisioner implements Provisioner { @Inject public NodeRepositoryProvisioner(NodeRepository nodeRepository, Zone zone, - ProvisionServiceProvider provisionServiceProvider) { + ProvisionServiceProvider provisionServiceProvider, + Metric metric) { this.nodeRepository = nodeRepository; this.allocationOptimizer = new AllocationOptimizer(nodeRepository); this.capacityPolicies = new CapacityPolicies(nodeRepository); @@ -71,7 +73,8 @@ public class NodeRepositoryProvisioner implements Provisioner { this.nodeResourceLimits = new NodeResourceLimits(nodeRepository); this.preparer = new Preparer(nodeRepository, provisionServiceProvider.getHostProvisioner(), - loadBalancerProvisioner); + loadBalancerProvisioner, + metric); this.activator = new Activator(nodeRepository, loadBalancerProvisioner); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index 8975dda8e60..79b1bccbbde 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -7,6 +7,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeAllocationException; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.jdisc.Metric; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; @@ -36,11 +37,13 @@ public class Preparer { private final NodeRepository nodeRepository; private final Optional<HostProvisioner> hostProvisioner; private final Optional<LoadBalancerProvisioner> loadBalancerProvisioner; + private final ProvisioningThrottler throttler; - public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner) { + public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner, Metric metric) { this.nodeRepository = nodeRepository; this.hostProvisioner = hostProvisioner; this.loadBalancerProvisioner = loadBalancerProvisioner; + this.throttler = new ProvisioningThrottler(nodeRepository, metric); } /** @@ -110,6 +113,9 @@ public class Preparer { Optional.of(cluster.id()), requested.cloudAccount(), deficit.dueToFlavorUpgrade()); + if (throttler.throttle(allNodes, Agent.system)) { + throw new NodeAllocationException("Host provisioning is being throttled", true); + } hostProvisioner.get().provisionHosts(request, whenProvisioned); } catch (NodeAllocationException e) { // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java new file mode 100644 index 00000000000..b08e7dbccb0 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java @@ -0,0 +1,69 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.provisioning; + +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.History; + +import java.time.Duration; +import java.time.Instant; +import java.util.Objects; +import java.util.logging.Logger; + +/** + * Throttles provisioning of new hosts in dynamically provisioned zones. + * + * @author mpolden + */ +public class ProvisioningThrottler { + + /** Metric that indicates whether throttling is active where 1 means active and 0 means inactive */ + private static final String throttlingActiveMetric = "throttledHostProvisioning"; + + private static final Logger LOG = Logger.getLogger(ProvisioningThrottler.class.getName()); + + private static final int MIN_SIZE = 100; + private static final int MAX_GROWTH = 200; + private static final double MAX_GROWTH_RATE = 0.4; + + private final NodeRepository nodeRepository; + private final Metric metric; + + public ProvisioningThrottler(NodeRepository nodeRepository, Metric metric) { + this.nodeRepository = Objects.requireNonNull(nodeRepository); + this.metric = Objects.requireNonNull(metric); + } + + private Duration window() { + return nodeRepository.zone().system().isCd() ? Duration.ofHours(2) : Duration.ofHours(8); + } + + /** Returns whether provisioning should be throttled at given instant */ + public boolean throttle(NodeList allNodes, Agent agent) { + Duration window = window(); + Instant startOfWindow = nodeRepository.clock().instant().minus(window); + NodeList hosts = allNodes.hosts(); + int existingHosts = hosts.not().state(Node.State.deprovisioned).size(); + int provisionedRecently = hosts.matching(host -> host.history().hasEventAfter(History.Event.Type.provisioned, startOfWindow)) + .size(); + boolean throttle = throttle(provisionedRecently, existingHosts, window, agent); + metric.set(throttlingActiveMetric, throttle ? 1 : 0, null); + return throttle; + } + + static boolean throttle(int recent, int total, Duration window, Agent agent) { + if (total < MIN_SIZE && recent < MAX_GROWTH) return false; // Allow burst in small zones + int maxGrowth = Math.min(MAX_GROWTH, (int) (total * MAX_GROWTH_RATE)); + boolean throttle = recent > maxGrowth; + if (throttle) { + LOG.warning(String.format("Throttling provisioning of new hosts by %s: %d hosts have been provisioned " + + "in the past %s, which exceeds growth limit of %d", agent, + recent, window, maxGrowth)); + } + return throttle; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java index 65abcbef698..7ded74b7451 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.ProvisionLogger; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; +import java.util.logging.Logger; /** * A logger which remembers all messages logged in addition to writing them to standard out. @@ -14,18 +15,20 @@ import java.util.logging.Level; */ public class InMemoryProvisionLogger implements ProvisionLogger { + private static final Logger LOG = Logger.getLogger(InMemoryProvisionLogger.class.getName()); + private final List<String> systemLog = new ArrayList<>(); private final List<String> applicationLog = new ArrayList<>(); @Override public void log(Level level, String message) { - System.out.println("ProvisionLogger system " + level + ": " + message); + LOG.info("ProvisionLogger system " + level + ": " + message); systemLog.add(level + ": " + message); } @Override public void logApplicationPackage(Level level, String message) { - System.out.println("ProvisionLogger application " + level + ": " + message); + LOG.info("ProvisionLogger application " + level + ": " + message); applicationLog.add(level + ": " + message); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index 40460e70861..26478d2b566 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -25,6 +25,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.config.provision.ZoneEndpoint; import com.yahoo.config.provision.ZoneEndpoint.AccessType; import com.yahoo.config.provision.ZoneEndpoint.AllowedUrn; +import com.yahoo.jdisc.test.MockMetric; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.curator.mock.MockCurator; @@ -104,7 +105,7 @@ public class MockNodeRepository extends NodeRepository { } private void populate() { - NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, Zone.defaultZone(), new MockProvisionServiceProvider()); + NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, Zone.defaultZone(), new MockProvisionServiceProvider(), new MockMetric()); List<Node> nodes = new ArrayList<>(); // Regular nodes diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index bd31c7578b9..47206265c68 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -43,10 +43,10 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max)) .build(); fixture.tester.clock().advance(Duration.ofDays(2)); - fixture.loader().applyLoad(new Load(0.17f, 0.17, 0.12), 1, true, true, 100); + fixture.loader().applyLoad(new Load(0.8f, 0.17, 0.12), 1, true, true, 100); var result = fixture.autoscale(); assertTrue(result.resources().isEmpty()); - assertNotEquals(Autoscaling.Status.insufficient, result.status()); + assertEquals(Autoscaling.Status.insufficient, result.status()); fixture.tester.clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(0.08f, 0.17, 0.12), 1, true, true, 100); @@ -65,7 +65,7 @@ public class AutoscalingTest { fixture.loader().applyCpuLoad(0.7f, 10); var scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high", - 9, 1, 3.6, 8.5, 360.9, + 9, 1, 2.8, 6.8, 288.7, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); @@ -87,7 +87,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(7)); fixture.loader().applyCpuLoad(0.1f, 10); fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly", - 8, 1, 1.0, 8.3, 338.4, + 6, 1, 1.1, 8.8, 346.8, fixture.autoscale()); } @@ -169,6 +169,7 @@ public class AutoscalingTest { @Test public void test_only_autoscaling_up_quickly() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); + fixture.setScalingDuration(Duration.ofHours(12)); // Fixture sets last completion to be 1 day into the past fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); fixture.tester().assertResources("Scaling up (only) since resource usage is too high", 8, 1, 7.1, 9.3, 75.4, @@ -179,6 +180,7 @@ public class AutoscalingTest { @Test public void test_scale_in_both_directions_when_ok_to_scale_down() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); + fixture.setScalingDuration(Duration.ofHours(12)); // Fixture sets last completion to be 1 day into the past fixture.tester.clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); fixture.tester().assertResources("Scaling cpu and disk up and memory down", @@ -189,6 +191,7 @@ public class AutoscalingTest { @Test public void test_scale_in_both_directions_when_ok_to_scale_down_exclusive() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(false).build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester.clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); fixture.tester().assertResources("Scaling cpu and disk up, memory follows", @@ -199,33 +202,36 @@ public class AutoscalingTest { @Test public void test_autoscaling_uses_peak() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.loader().applyCpuLoad(0.01, 100); fixture.loader().applyCpuLoad(0.70, 1); fixture.loader().applyCpuLoad(0.01, 100); fixture.tester().assertResources("Scaling up since peak resource usage is too high", - 8, 1, 4.3, 9.3, 36.2, + 8, 1, 4.3, 7.4, 29.0, fixture.autoscale()); } @Test public void test_autoscaling_uses_peak_exclusive() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(false).build(); + fixture.setScalingDuration(Duration.ofHours(9)); fixture.loader().applyCpuLoad(0.01, 100); fixture.loader().applyCpuLoad(0.70, 1); fixture.loader().applyCpuLoad(0.01, 100); fixture.tester().assertResources("Scaling up since peak resource usage is too high", - 9, 1, 4, 16.0, 150, + 9, 1, 4, 8.0, 100, fixture.autoscale()); } @Test public void test_autoscaling_uses_peak_preprovisioned() { var fixture = DynamicProvisioningTester.fixture().hostCount(15).build(); + fixture.setScalingDuration(Duration.ofHours(9)); fixture.loader().applyCpuLoad(0.01, 100); fixture.loader().applyCpuLoad(0.70, 1); fixture.loader().applyCpuLoad(0.01, 100); - fixture.tester().assertResources("Scaling up since peak resource usage is too high", - 8, 1, 4.3, 9.7, 42.9, + fixture.tester().assertResources("Scaling up cpu since peak resource usage is too high", + 8, 1, 4.3, 7.7, 34.3, fixture.autoscale()); } @@ -278,6 +284,7 @@ public class AutoscalingTest { .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == slow)); fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.loader().applyCpuLoad(0.25, 120); // Changing min and max from slow to any @@ -329,6 +336,7 @@ public class AutoscalingTest { .initialResources(Optional.of(now)) .capacity(Capacity.from(min, max)).build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(1)); fixture.loader().applyLoad(new Load(0.25, 0.95, 0.95), 120); fixture.tester().assertResources("Scaling up to limit since resource usage is too high", @@ -384,6 +392,7 @@ public class AutoscalingTest { .initialResources(Optional.of(now)) .capacity(Capacity.from(min, max)) .build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.4, 240); fixture.tester().assertResources("Scaling cpu up", @@ -402,9 +411,9 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max, IntRange.of(2, 3), false, true, Optional.empty(), ClusterInfo.empty())) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().applyCpuLoad(0.4, 240); + fixture.loader().applyCpuLoad(0.6, 240); fixture.tester().assertResources("Scaling cpu up", - 12, 6, 2.8, 4.2, 27.5, + 12, 6, 3.0, 4.2, 27.5, fixture.autoscale()); } @@ -446,6 +455,7 @@ public class AutoscalingTest { public void suggestions_ignores_limits() { ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).capacity(Capacity.from(min, min)).build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(1.0, 120); fixture.tester().assertResources("Suggesting above capacity limit", @@ -457,6 +467,7 @@ public class AutoscalingTest { public void suggestions_ignores_limits_exclusive() { ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1)); var fixture = DynamicProvisioningTester.fixture().awsProdSetup(false).capacity(Capacity.from(min, min)).build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(1.0, 120); fixture.tester().assertResources("Suggesting above capacity limit", @@ -504,6 +515,7 @@ public class AutoscalingTest { .initialResources(Optional.of(now)) .capacity(Capacity.from(min, max)) .build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(0.5, 0.8, 0.1), 120); fixture.tester().assertResources("Suggesting resources where disk is 3x memory (this is a content cluster)", @@ -524,6 +536,7 @@ public class AutoscalingTest { .initialResources(Optional.of(now)) .capacity(Capacity.from(min, max)) .build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(0.5, 0.8, 0.1), 120); fixture.tester().assertResources("Suggesting resources where disk is 3x memory (this is a content cluster)", @@ -547,7 +560,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.9, 120); fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time", - 10, 5, 7.7, 41.5, 124.6, + 8, 4, 7.4, 41.5, 124.6, fixture.autoscale()); } @@ -562,6 +575,7 @@ public class AutoscalingTest { .capacity(Capacity.from(min, max, IntRange.of(1), false, true, Optional.empty(), ClusterInfo.empty())) .build(); fixture.tester().clock().advance(Duration.ofDays(2)); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.loader().applyCpuLoad(0.9, 120); fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time", 7, 7, 9.4, 78.6, 235.8, @@ -578,6 +592,7 @@ public class AutoscalingTest { .initialResources(Optional.of(now)) .capacity(Capacity.from(min, max)) .build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120); fixture.tester().clock().advance(timePassed.negated()); @@ -608,7 +623,7 @@ public class AutoscalingTest { } @Test - public void test_autoscaling_group_size() { + public void test_autoscaling_groupsize() { var min = new ClusterResources( 2, 2, new NodeResources(1, 1, 1, 1)); var now = new ClusterResources(6, 2, new NodeResources(10, 100, 100, 1)); var max = new ClusterResources(30, 30, new NodeResources(100, 100, 1000, 1)); @@ -634,6 +649,7 @@ public class AutoscalingTest { .initialResources(Optional.of(now)) .capacity(Capacity.from(min, max)) .build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(0.16, 0.02, 0.5), 120); fixture.tester().assertResources("Scaling down memory", @@ -644,10 +660,10 @@ public class AutoscalingTest { @Test public void scaling_down_only_after_delay() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); - fixture.loader().applyCpuLoad(0.02, 120); + fixture.loader().applyCpuLoad(0.02, 5); assertTrue("Too soon after initial deployment", fixture.autoscale().resources().isEmpty()); fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1)); - fixture.loader().applyCpuLoad(0.02, 120); + fixture.loader().applyCpuLoad(0.02, 5); fixture.tester().assertResources("Scaling down since enough time has passed", 3, 1, 1.0, 23.6, 101.4, fixture.autoscale()); @@ -663,20 +679,20 @@ public class AutoscalingTest { .build(); fixture.tester.clock().advance(Duration.ofDays(1)); - fixture.loader().applyCpuLoad(0.25, 120); + fixture.loader().applyCpuLoad(0.5, 120); // (no read share stored) fixture.tester().assertResources("Advice to scale up since we set aside for bcp by default", - 5, 1, 3, 100, 100, + 7, 1, 3, 100, 100, fixture.autoscale()); - fixture.loader().applyCpuLoad(0.25, 120); + fixture.loader().applyCpuLoad(0.5, 120); fixture.storeReadShare(0.25, 0.5); fixture.tester().assertResources("Half of global share is the same as the default assumption used above", - 5, 1, 3, 100, 100, + 7, 1, 3, 100, 100, fixture.autoscale()); fixture.tester.clock().advance(Duration.ofDays(1)); - fixture.loader().applyCpuLoad(0.25, 120); + fixture.loader().applyCpuLoad(0.5, 120); fixture.storeReadShare(0.5, 0.5); fixture.tester().assertResources("Advice to scale down since we don't need room for bcp", 4, 1, 3, 100, 100, @@ -686,6 +702,7 @@ public class AutoscalingTest { @Test public void test_autoscaling_considers_growth_rate() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.tester().clock().advance(Duration.ofDays(2)); Duration timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 200.0 : 100.0, t -> 0.0); @@ -719,9 +736,9 @@ public class AutoscalingTest { @Test public void test_autoscaling_weights_growth_rate_by_confidence() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); + fixture.setScalingDuration(Duration.ofHours(6)); double scalingFactor = 1.0/6000; // To make the average query rate low - fixture.setScalingDuration(Duration.ofMinutes(60)); fixture.tester().clock().advance(Duration.ofDays(2)); Duration timeAdded = fixture.loader().addLoadMeasurements(100, t -> scalingFactor * (100.0 + (t < 50 ? t * t * t : 155000 - (t - 49) * (t - 49) * (t - 49))), @@ -736,6 +753,7 @@ public class AutoscalingTest { @Test public void test_autoscaling_considers_query_vs_write_rate() { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); + fixture.setScalingDuration(Duration.ofHours(6)); fixture.loader().addCpuMeasurements(0.4, 220); @@ -877,13 +895,13 @@ public class AutoscalingTest { fixture.currentResources().advertisedResources()); fixture.tester().deploy(fixture.applicationId(), clusterSpec(false), fixture.capacity()); - fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 100); + fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 5); fixture.tester().assertResources("Exclusive nodes makes no difference here", 2, 1, 4, 8, 100.0, fixture.autoscale()); fixture.tester().deploy(fixture.applicationId(), clusterSpec(true), fixture.capacity()); - fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 100); + fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 5); fixture.tester().assertResources("Reverts to the initial resources", 2, 1, 4, 8, 100, fixture.currentResources().advertisedResources()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java index 29a7aff3e6a..637932681ee 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -206,7 +206,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration2.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 50.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 3.0, 7.4, 29.0, + 8, 1, 2.9, 7.4, 29.0, fixture.autoscale()); // Mostly local @@ -216,7 +216,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration3.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 90.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 2.3, 7.4, 29.0, + 8, 1, 2.2, 7.4, 29.0, fixture.autoscale()); // Local only @@ -226,7 +226,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration4.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 2.2, 7.4, 29.0, + 8, 1, 2.1, 7.4, 29.0, fixture.autoscale()); // No group info, should be the same as the above @@ -236,7 +236,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration5.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 2.2, 7.4, 29.0, + 8, 1, 2.1, 7.4, 29.0, fixture.autoscale()); // 40 query rate, no group info (for reference to the below) @@ -266,7 +266,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration8.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 1.9, 7.4, 29.0, + 8, 1, 1.8, 7.4, 29.0, fixture.autoscale()); } @@ -288,16 +288,18 @@ public class AutoscalingUsingBcpGroupInfoTest { // External load is measured to zero -> 0 fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().addCpuMeasurements(0.7f, 10); + var duration = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration.negated()); fixture.loader().addQueryRateMeasurements(10, i -> 0.0); assertEquals(new Autoscaling.Metrics(0, 1.0, 0), fixture.autoscale().metrics()); // External load fixture.tester().clock().advance(Duration.ofDays(2)); - fixture.loader().addCpuMeasurements(0.7f, 10); + duration = fixture.loader().addCpuMeasurements(0.7f, 10); + fixture.tester().clock().advance(duration.negated()); fixture.loader().addQueryRateMeasurements(10, i -> 110.0); - assertEquals(new Autoscaling.Metrics(110, 1.1, 0.05), + assertEquals(new Autoscaling.Metrics(110, 1.0, 0.05), round(fixture.autoscale().metrics())); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index 53d753e2850..3091f82143d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -26,7 +26,6 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import com.yahoo.vespa.hosted.provision.autoscale.Load; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; -import com.yahoo.vespa.hosted.provision.node.ClusterId; import com.yahoo.vespa.hosted.provision.node.Generation; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; @@ -133,13 +132,14 @@ public class MetricsReporterTest { expectedMetrics.put("suspendedSeconds", 123L); expectedMetrics.put("numberOfServices", 0L); - expectedMetrics.put("cache.nodeObject.hitRate", 0.5555555555555556D); + expectedMetrics.put("cache.nodeObject.hitRate", 5D/7D); expectedMetrics.put("cache.nodeObject.evictionCount", 0L); expectedMetrics.put("cache.nodeObject.size", 2L); expectedMetrics.put("cache.curator.hitRate", 3D/5D); expectedMetrics.put("cache.curator.evictionCount", 0L); expectedMetrics.put("cache.curator.size", 2L); + expectedMetrics.put("nodes.emptyExclusive", 0); nodeRepository.nodes().list(); tester.clock().setInstant(Instant.ofEpochSecond(124)); @@ -278,7 +278,6 @@ public class MetricsReporterTest { assertEquals(4, getMetric("nodes.active", metric, dimensions)); assertEquals(0, getMetric("nodes.nonActive", metric, dimensions)); - Map<String, String> clusterDimensions = Map.of("applicationId", applicationId.toFullString(), "clusterid", clusterSpec.id().value()); assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions)); @@ -341,6 +340,34 @@ public class MetricsReporterTest { assertEquals(1D, getMetric("nodes.exclusiveSwitchFraction", metric, MetricsReporter.dimensions(app, spec2.id())).doubleValue(), Double.MIN_VALUE); } + @Test + public void empty_exclusive_hosts() { + ProvisioningTester tester = new ProvisioningTester.Builder().build(); + ApplicationId app = ApplicationId.from("t1", "a1", "default"); + TestMetric metric = new TestMetric(); + MetricsReporter metricsReporter = metricsReporter(metric, tester); + NodeResources resources = new NodeResources(8, 32, 100, 10); + List<Node> hosts = tester.makeReadyNodes(4, resources, NodeType.host, 5); + tester.activateTenantHosts(); + tester.patchNodes(hosts, (host) -> host.withExclusiveToApplicationId(app)); + + // Hosts are not considered empty until enough time passes + metricsReporter.maintain(); + assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue()); + tester.clock().advance(Duration.ofMinutes(10)); + metricsReporter.maintain(); + assertEquals(hosts.size(), metric.values.get("nodes.emptyExclusive").intValue()); + + // Deploy application + ClusterSpec spec = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("c1")).vespaVersion("1").build(); + Capacity capacity = Capacity.from(new ClusterResources(4, 1, resources)); + tester.deploy(app, spec, capacity); + + // Host are now in use + metricsReporter.maintain(); + assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue()); + } + private Number getMetric(String name, TestMetric metric, Map<String, String> dimensions) { List<TestMetric.TestContext> metrics = metric.context.get(name).stream() .filter(ctx -> ctx.properties.entrySet().containsAll(dimensions.entrySet())) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java index 7763459dd92..79644206918 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java @@ -10,6 +10,7 @@ import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Provisioner; import com.yahoo.config.provision.Zone; +import com.yahoo.jdisc.test.MockMetric; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -60,7 +61,7 @@ public class InfraDeployerImplTest { private final NodeRepositoryTester tester = new NodeRepositoryTester(); private final NodeRepository nodeRepository = tester.nodeRepository(); - private final Provisioner provisioner = spy(new NodeRepositoryProvisioner(nodeRepository, Zone.defaultZone(), new EmptyProvisionServiceProvider())); + private final Provisioner provisioner = spy(new NodeRepositoryProvisioner(nodeRepository, Zone.defaultZone(), new EmptyProvisionServiceProvider(), new MockMetric())); private final InfrastructureVersions infrastructureVersions = nodeRepository.infrastructureVersions(); private final DuperModelInfraApi duperModelInfraApi = mock(DuperModelInfraApi.class); private final InfraDeployerImpl infraDeployer; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index a3a90d58c2c..bca48b19ccf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -22,12 +22,12 @@ import com.yahoo.config.provision.NodeResources.DiskSpeed; import com.yahoo.config.provision.NodeResources.StorageType; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.ProvisionLock; -import com.yahoo.config.provision.ProvisionLogger; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; +import com.yahoo.jdisc.test.MockMetric; import com.yahoo.test.ManualClock; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.applicationmodel.InfrastructureApplication; @@ -73,7 +73,6 @@ import java.util.UUID; import java.util.function.Function; import java.util.function.Predicate; import java.util.function.UnaryOperator; -import java.util.logging.Level; import java.util.stream.Collectors; import static com.yahoo.config.provision.NodeResources.StorageType.local; @@ -131,7 +130,7 @@ public class ProvisioningTester { true, spareCount, 1000); - this.provisioner = new NodeRepositoryProvisioner(nodeRepository, zone, provisionServiceProvider); + this.provisioner = new NodeRepositoryProvisioner(nodeRepository, zone, provisionServiceProvider, new MockMetric()); this.capacityPolicies = new CapacityPolicies(nodeRepository); this.provisionLogger = new InMemoryProvisionLogger(); this.loadBalancerService = loadBalancerService; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottlerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottlerTest.java new file mode 100644 index 00000000000..f38b4732ed7 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottlerTest.java @@ -0,0 +1,30 @@ +package com.yahoo.vespa.hosted.provision.provisioning; + +import com.yahoo.vespa.hosted.provision.node.Agent; +import org.junit.jupiter.api.Test; + +import java.time.Duration; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static com.yahoo.vespa.hosted.provision.provisioning.ProvisioningThrottler.throttle; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * @author mpolden + */ +class ProvisioningThrottlerTest { + + @Test + void throttling() { + Agent agent = Agent.system; + Duration window = Duration.ofHours(1); + assertFalse(throttle(199, 99, window, agent)); + assertTrue(throttle(200, 99, window, agent)); + assertFalse(throttle(40, 100, window, agent)); + assertTrue(throttle(41, 100, window, agent)); + assertTrue(throttle(100, 100, window, agent)); + assertFalse(throttle(200, 2100, window, agent)); + assertTrue(throttle(201, 2100, window, agent)); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json index 42925b797d7..05a62ff944d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json @@ -94,7 +94,7 @@ "at" : 123 } ], - "scalingDuration": 21600000 + "scalingDuration": 300000 } } } diff --git a/screwdriver.yaml b/screwdriver.yaml index 79a1569633f..19374a436d5 100644 --- a/screwdriver.yaml +++ b/screwdriver.yaml @@ -91,7 +91,7 @@ jobs: screwdriver.cd/cpu: 7 screwdriver.cd/ram: 16 screwdriver.cd/disk: HIGH - screwdriver.cd/timeout: 120 + screwdriver.cd/timeout: 150 screwdriver.cd/dockerEnabled: true screwdriver.cd/dockerCpu: TURBO screwdriver.cd/dockerRam: HIGH diff --git a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp index b7f5731ddf4..3a7e2a706cb 100644 --- a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp +++ b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp @@ -15,6 +15,7 @@ #include <vector> #include <limits> #include <unistd.h> +#include <filesystem> #include <vespa/log/log.h> LOG_SETUP("vespa-gen-testdocs"); @@ -550,7 +551,7 @@ DocumentGenerator::generate(uint32_t docMin, uint32_t docIdLimit, bool headers, bool json) { string fullName(prependBaseDir(baseDir, feedFileName)); - FastOS_File::Delete(fullName.c_str()); + std::filesystem::remove(std::filesystem::path(fullName)); Fast_BufferedFile f(new FastOS_File); f.WriteOpen(fullName.c_str()); if (json) { diff --git a/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp b/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp index db2675a7779..1fc5c40a47a 100644 --- a/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp +++ b/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp @@ -15,9 +15,9 @@ #include <vespa/searchcore/proton/test/documentdb_config_builder.h> #include <vespa/searchcore/proton/test/transport_helper.h> #include <vespa/searchsummary/config/config-juniperrc.h> -#include <vespa/vespalib/io/fileutil.h> #include <vespa/config-bucketspaces.h> #include <vespa/vespalib/testkit/test_kit.h> +#include <filesystem> using namespace cloud::config::filedistribution; @@ -165,7 +165,7 @@ TEST_FF("requireThatConfigCanBeSerializedAndDeserialized", Transport(), Document TEST_FF("requireThatConfigCanBeLoadedWithoutExtraConfigsDataFile", Transport(), DocumentDBConfig::SP(makeBaseConfigSnapshot(f1.transport()))) { saveBaseConfigSnapshot(f1.transport(), *f2, 70); - EXPECT_FALSE(vespalib::unlink("out/config-70/extraconfigs.dat")); + EXPECT_FALSE(std::filesystem::remove(std::filesystem::path("out/config-70/extraconfigs.dat"))); DocumentDBConfig::SP esnap(makeEmptyConfigSnapshot()); { FileConfigManager cm(f1.transport(), "out", myId, "dummy"); diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp index 6e9d4be97aa..f88e89db25e 100644 --- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp @@ -21,11 +21,11 @@ #include <vespa/searchlib/queryeval/simpleresult.h> #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/vespalib/gtest/gtest.h> -#include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/test/insertion_operators.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/threadstackexecutor.h> +#include <filesystem> #include <thread> #include <vespa/log/log.h> @@ -525,7 +525,7 @@ TEST(DocumentMetaStoreTest, gids_can_be_saved_and_loaded) EXPECT_EQ(numLids + 1, dms2.getNumDocs()); EXPECT_EQ(numLids - (3 - i), dms2.getNumUsedLids()); } - vespalib::unlink("documentmetastore2.dat"); + std::filesystem::remove(std::filesystem::path("documentmetastore2.dat")); } TEST(DocumentMetaStoreTest, bucket_used_bits_are_lbounded_at_load_time) @@ -551,7 +551,7 @@ TEST(DocumentMetaStoreTest, bucket_used_bits_are_lbounded_at_load_time) BucketId expected_bucket(storage::spi::BucketLimits::MinUsedBits, gid.convertToBucketId().getRawId()); assertGid(gid, lid, dms2, expected_bucket, Timestamp(1000)); - vespalib::unlink("documentmetastore2.dat"); + std::filesystem::remove(std::filesystem::path("documentmetastore2.dat")); } TEST(DocumentMetaStore, stats_are_updated) @@ -1915,8 +1915,8 @@ TEST(DocumentMetaStoreTest, document_sizes_are_saved) assertSize(dms4, 1, 1); assertSize(dms4, 2, 1); assertSize(dms4, 3, 1); - vespalib::unlink("documentmetastore3.dat"); - vespalib::unlink("documentmetastore4.dat"); + std::filesystem::remove(std::filesystem::path("documentmetastore3.dat")); + std::filesystem::remove(std::filesystem::path("documentmetastore4.dat")); } namespace { diff --git a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp index 35583ea46da..e8926a957b7 100644 --- a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp +++ b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp @@ -120,7 +120,7 @@ void Test::requireThatIndexesInUseAreNotRemoved() { void Test::requireThatInvalidFlushIndexesAreRemoved() { createIndexes(); - FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete(); + std::filesystem::remove(std::filesystem::path(index_dir + "/index.flush.4/serial.dat")); DiskIndexes disk_indexes; DiskIndexCleaner::clean(index_dir, disk_indexes); vector<string> indexes = readIndexes(); @@ -131,7 +131,7 @@ void Test::requireThatInvalidFlushIndexesAreRemoved() { void Test::requireThatInvalidFusionIndexesAreRemoved() { createIndexes(); - FastOS_File((index_dir + "/index.fusion.2/serial.dat").c_str()).Delete(); + std::filesystem::remove(std::filesystem::path(index_dir + "/index.fusion.2/serial.dat")); DiskIndexes disk_indexes; DiskIndexCleaner::clean(index_dir, disk_indexes); vector<string> indexes = readIndexes(); @@ -144,7 +144,7 @@ void Test::requireThatInvalidFusionIndexesAreRemoved() { void Test::requireThatRemoveDontTouchNewIndexes() { createIndexes(); - FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete(); + std::filesystem::remove(std::filesystem::path(index_dir + "/index.flush.4/serial.dat")); DiskIndexes disk_indexes; DiskIndexCleaner::removeOldIndexes(index_dir, disk_indexes); vector<string> indexes = readIndexes(); diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp index 2f6ebcd967f..a7209ea8897 100644 --- a/searchcore/src/tests/proton/index/indexmanager_test.cpp +++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp @@ -728,7 +728,7 @@ TEST_F(IndexManagerTest, require_that_serial_number_is_read_on_load) void crippleFusion(uint32_t fusionId) { vespalib::asciistream ost; ost << index_dir << "/index.flush." << fusionId << "/serial.dat"; - FastOS_File(ost.str().data()).Delete(); + std::filesystem::remove(std::filesystem::path(ost.str())); } TEST_F(IndexManagerTest, require_that_failed_fusion_is_retried) diff --git a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt index 7bb1c9b878c..d4de8e578bd 100644 --- a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt @@ -29,6 +29,4 @@ vespa_add_library(searchcore_pcommon STATIC statusreport.cpp DEPENDS searchcore_proton_metrics - EXTERNAL_DEPENDS - ${VESPA_STDCXX_FS_LIB} ) diff --git a/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp b/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp index e25eb5c422c..c893bb6fe2b 100644 --- a/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp +++ b/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp @@ -5,7 +5,6 @@ #include <vespa/config/print/fileconfigwriter.h> #include <vespa/config/subscription/configsubscriber.hpp> #include <vespa/fastos/file.h> -#include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/util/time.h> #include <vespa/vespalib/util/resource_limits.h> #include <vespa/vespalib/util/size_literals.h> @@ -107,7 +106,7 @@ double measureDiskWriteSpeed(const vespalib::string &path, double elapsed = vespalib::to_s(after - before); diskWriteSpeed = diskWriteLen / elapsed / 1_Mi; } - vespalib::unlink(fileName); + std::filesystem::remove(std::filesystem::path(fileName)); return diskWriteSpeed; } diff --git a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt index 0f36ab4e834..f5544ed1b15 100644 --- a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt @@ -126,6 +126,4 @@ vespa_add_library(searchcore_server STATIC searchcore_summaryengine searchcore_reference configdefinitions - EXTERNAL_DEPENDS - ${VESPA_STDCXX_FS_LIB} ) diff --git a/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp b/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp index 704364dfb9b..51d6938b13b 100644 --- a/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp +++ b/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp @@ -39,7 +39,7 @@ bool isValidIndex(const string &index_dir) { } void invalidateIndex(const string &index_dir) { - vespalib::unlink(index_dir + "/serial.dat"); + std::filesystem::remove(std::filesystem::path(index_dir + "/serial.dat")); vespalib::File::sync(index_dir); } diff --git a/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp b/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp index 97afce79861..a5f796cf48e 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp @@ -25,6 +25,7 @@ using search::index::SchemaUtil; using search::SerialNum; using vespalib::IllegalStateException; using vespalib::FileHeader; +using std::filesystem::path; namespace searchcorespi::index { @@ -64,8 +65,9 @@ IndexWriteUtilities::writeSerialNum(SerialNum serialNum, vespalib::File::sync(dir); if (ok) { - FastOS_File renameFile(tmpFileName.c_str()); - ok &= renameFile.Rename(fileName.c_str()); + std::error_code ec; + std::filesystem::rename(path(tmpFileName), path(fileName), ec); + ok = !ec; } if (!ok) { std::ostringstream msg; @@ -84,26 +86,15 @@ IndexWriteUtilities::copySerialNumFile(const vespalib::string &sourceDir, vespalib::string tmpDest = dest + ".tmp"; std::error_code ec; - std::filesystem::copy_file(std::filesystem::path(source), std::filesystem::path(tmpDest), ec); + std::filesystem::copy_file(path(source), path(tmpDest), ec); if (ec) { LOG(error, "Unable to copy file '%s'", source.c_str()); return false; } - FastOS_File file(tmpDest.c_str()); - if (!file.OpenReadWrite()) { - LOG(error, "Unable to open '%s' for fsync", tmpDest.c_str()); - return false; - } - if (!file.Sync()) { - LOG(error, "Unable to fsync '%s'", tmpDest.c_str()); - return false; - } - if (!file.Close()) { - LOG(error, "Unable to close '%s'", tmpDest.c_str()); - return false; - } + vespalib::File::sync(tmpDest); vespalib::File::sync(destDir); - if (!file.Rename(dest.c_str())) { + std::filesystem::rename(path(tmpDest), path(dest), ec); + if (ec) { LOG(error, "Unable to rename file '%s' to '%s'", tmpDest.c_str(), dest.c_str()); return false; } @@ -159,7 +150,7 @@ IndexWriteUtilities::updateDiskIndexSchema(const vespalib::string &indexDir, } vespalib::string schemaTmpName = schemaName + ".tmp"; vespalib::string schemaOrigName = schemaName + ".orig"; - vespalib::unlink(schemaTmpName); + std::filesystem::remove(path(schemaTmpName)); if (!newSchema->saveToFile(schemaTmpName)) { LOG(error, "Could not save schema to '%s'", schemaTmpName.c_str()); diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp index c072f722677..82c1839e63b 100644 --- a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp +++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp @@ -10,7 +10,7 @@ #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/vespalib/data/databuffer.h> -#include <vespa/fastos/file.h> +#include <filesystem> #include <vespa/log/log.h> LOG_SETUP("attributefilewriter_test"); @@ -24,7 +24,7 @@ namespace { vespalib::string testFileName("test.dat"); vespalib::string hello("Hello world"); -void removeTestFile() { FastOS_File::Delete(testFileName.c_str()); } +void removeTestFile() { std::filesystem::remove(std::filesystem::path(testFileName)); } struct Fixture { TuneFileAttributes _tuneFileAttributes; diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp index 5fa8889a01d..6e622c840b6 100644 --- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -53,9 +53,15 @@ struct BitVectorTest StringAttribute & asString(AttributePtr &v); FloatingPointAttribute & asFloat(AttributePtr &v); - AttributePtr make(Config cfg, const vespalib::string &pref, bool fastSearch, bool filter); + AttributePtr + make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableOnlyBitVector, + bool filter); - void addDocs(const AttributePtr &v, size_t sz); + void + addDocs(const AttributePtr &v, size_t sz); template <typename VectorType> void populate(VectorType &v, uint32_t low, uint32_t high, bool set); @@ -63,16 +69,22 @@ struct BitVectorTest template <typename VectorType> void populateAll(VectorType &v, uint32_t low, uint32_t high, bool set); - void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, bool prefix); + void + buildTermQuery(std::vector<char> & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); template <typename V> - vespalib::string getSearchStr(); + vespalib::string + getSearchStr(); template <typename V, typename T> - SearchContextPtr getSearch(const V & vec, const T & term, bool prefix, bool useBitVector); + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix, bool useBitVector); template <typename V> - SearchContextPtr getSearch(const V & vec, bool useBitVector); + SearchContextPtr + getSearch(const V & vec, bool useBitVector); void checkSearch(AttributePtr v, @@ -95,7 +107,10 @@ struct BitVectorTest template <typename VectorType, typename BufferType> void - test(BasicType bt, CollectionType ct, const vespalib::string &pref, bool fastSearch, bool filter); + test(BasicType bt, CollectionType ct, const vespalib::string &pref, + bool fastSearch, + bool enableOnlyBitVector, + bool filter); template <typename VectorType, typename BufferType> void @@ -180,7 +195,8 @@ BitVectorTest::getSearchStr<StringAttribute>() template <typename V, typename T> SearchContextPtr -BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, bool useBitVector) +BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, + bool useBitVector) { std::vector<char> query; vespalib::asciistream ss; @@ -195,7 +211,8 @@ BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, bool useBitVe template <> SearchContextPtr -BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, bool useBitVector) +BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, + bool useBitVector) { return getSearch<IntegerAttribute>(v, "[-42;-42]", false, useBitVector); } @@ -203,23 +220,32 @@ BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, bool useBi template <> SearchContextPtr BitVectorTest:: -getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v, bool useBitVector) +getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v, + bool useBitVector) { - return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false, useBitVector); + return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false, + useBitVector); } template <> SearchContextPtr -BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v, bool useBitVector) +BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v, + bool useBitVector) { - return getSearch<StringAttribute, const vespalib::string &>(v, "foo", false, useBitVector); + return getSearch<StringAttribute, const vespalib::string &> + (v, "foo", false, useBitVector); } BitVectorTest::AttributePtr -BitVectorTest::make(Config cfg, const vespalib::string &pref, bool fastSearch, bool filter) +BitVectorTest::make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableOnlyBitVector, + bool filter) { cfg.setFastSearch(fastSearch); + cfg.setEnableOnlyBitVector(enableOnlyBitVector); cfg.setIsFilter(filter); AttributePtr v = AttributeFactory::createAttribute(pref, cfg); return v; @@ -241,9 +267,11 @@ BitVectorTest::addDocs(const AttributePtr &v, size_t sz) template <> void -BitVectorTest::populate(IntegerAttribute &v, uint32_t low, uint32_t high, bool set) +BitVectorTest::populate(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) { - for (size_t i(low), m(high); i < m; i+= 5) { + for(size_t i(low), m(high); i < m; i+= 5) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -260,9 +288,11 @@ BitVectorTest::populate(IntegerAttribute &v, uint32_t low, uint32_t high, bool s template <> void -BitVectorTest::populate(FloatingPointAttribute &v, uint32_t low, uint32_t high, bool set) +BitVectorTest::populate(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) { - for (size_t i(low), m(high); i < m; i+= 5) { + for(size_t i(low), m(high); i < m; i+= 5) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -279,9 +309,11 @@ BitVectorTest::populate(FloatingPointAttribute &v, uint32_t low, uint32_t high, template <> void -BitVectorTest::populate(StringAttribute &v, uint32_t low, uint32_t high, bool set) +BitVectorTest::populate(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) { - for (size_t i(low), m(high); i < m; i+= 5) { + for(size_t i(low), m(high); i < m; i+= 5) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -297,9 +329,11 @@ BitVectorTest::populate(StringAttribute &v, uint32_t low, uint32_t high, bool se template <> void -BitVectorTest::populateAll(IntegerAttribute &v, uint32_t low, uint32_t high, bool set) +BitVectorTest::populateAll(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) { - for (size_t i(low), m(high); i < m; ++i) { + for(size_t i(low), m(high); i < m; ++i) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -317,9 +351,11 @@ BitVectorTest::populateAll(IntegerAttribute &v, uint32_t low, uint32_t high, boo template <> void -BitVectorTest::populateAll(FloatingPointAttribute &v, uint32_t low, uint32_t high, bool set) +BitVectorTest::populateAll(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) { - for (size_t i(low), m(high); i < m; ++i) { + for(size_t i(low), m(high); i < m; ++i) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -337,9 +373,11 @@ BitVectorTest::populateAll(FloatingPointAttribute &v, uint32_t low, uint32_t hig template <> void -BitVectorTest::populateAll(StringAttribute &v, uint32_t low, uint32_t high, bool set) +BitVectorTest::populateAll(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) { - for (size_t i(low), m(high); i < m; ++i) { + for(size_t i(low), m(high); i < m; ++i) { if (!set) { v.clearDoc(i); } else if (v.hasMultiValue()) { @@ -378,7 +416,8 @@ BitVectorTest::checkSearch(AttributePtr v, assert(!checkStride || (docId % 5) == 2u); sb->unpack(docId); EXPECT_EQUAL(md.getDocId(), docId); - if (v->getCollectionType() == CollectionType::SINGLE || !weights) { + if (v->getCollectionType() == CollectionType::SINGLE || + !weights) { EXPECT_EQUAL(1, md.getWeight()); } else if (v->getCollectionType() == CollectionType::ARRAY) { EXPECT_EQUAL(2, md.getWeight()); @@ -417,10 +456,15 @@ BitVectorTest::checkSearch(AttributePtr v, template <typename VectorType, typename BufferType> void -BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pref, bool fastSearch, bool filter) +BitVectorTest::test(BasicType bt, + CollectionType ct, + const vespalib::string &pref, + bool fastSearch, + bool enableOnlyBitVector, + bool filter) { Config cfg(bt, ct); - AttributePtr v = make(cfg, pref, fastSearch, filter); + AttributePtr v = make(cfg, pref, fastSearch, enableOnlyBitVector, filter); addDocs(v, 1024); auto &tv = as<VectorType>(v); populate(tv, 2, 1023, true); @@ -428,7 +472,7 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre SearchContextPtr sc = getSearch<VectorType>(tv, true); checkSearch(v, std::move(sc), 2, 1022, 205, !fastSearch && !filter, true); sc = getSearch<VectorType>(tv, false); - checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true); const search::IDocumentWeightAttribute *dwa = v->asDocumentWeightAttribute(); if (dwa != nullptr) { search::IDocumentWeightAttribute::LookupResult lres = @@ -437,8 +481,8 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre using SI = search::queryeval::SearchIterator; TermFieldMatchData md; SI::UP dwsi(new DWSI(md, *dwa, lres)); - if (!filter) { - TEST_DO(checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true)); + if (!enableOnlyBitVector) { + checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true); } else { dwsi->initRange(1, v->getCommittedDocIdLimit()); EXPECT_TRUE(dwsi->isAtEnd()); @@ -446,13 +490,13 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre } populate(tv, 2, 973, false); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 977, 1022, 10, !filter, true); + checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector &&!filter, true); populate(tv, 2, 973, true); sc = getSearch<VectorType>(tv, true); checkSearch(v, std::move(sc), 2, 1022, 205, !fastSearch && !filter, true); addDocs(v, 15000); sc = getSearch<VectorType>(tv, true); - checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true); populateAll(tv, 10, 15000, true); sc = getSearch<VectorType>(tv, true); checkSearch(v, std::move(sc), 2, 14999, 14992, !fastSearch && !filter, false); @@ -464,65 +508,85 @@ void BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pref) { LOG(info, "test run, pref is %s", pref.c_str()); - test<VectorType, BufferType>(bt, ct, pref, false, false); - test<VectorType, BufferType>(bt, ct, pref, false, true); - test<VectorType, BufferType>(bt, ct, pref, true, false); - test<VectorType, BufferType>(bt, ct, pref, true, true); + test<VectorType, BufferType>(bt, ct, pref, false, false, false); + test<VectorType, BufferType>(bt, ct, pref, false, false, true); + test<VectorType, BufferType>(bt, ct, pref, true, false, false); + test<VectorType, BufferType>(bt, ct, pref, true, false, true); + test<VectorType, BufferType>(bt, ct, pref, true, true, false); + test<VectorType, BufferType>(bt, ct, pref, true, true, true); } TEST_F("Test bitvectors with single value int32", BitVectorTest) { f.template test<IntegerAttribute, - IntegerAttribute::largeint_t>(BasicType::INT32, CollectionType::SINGLE, "int32_sv"); + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::SINGLE, + "int32_sv"); } TEST_F("Test bitvectors with array value int32", BitVectorTest) { f.template test<IntegerAttribute, - IntegerAttribute::largeint_t>(BasicType::INT32, CollectionType::ARRAY, "int32_a"); + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::ARRAY, + "int32_a"); } TEST_F("Test bitvectors with weighted set value int32", BitVectorTest) { f.template test<IntegerAttribute, - IntegerAttribute::WeightedInt>(BasicType::INT32, CollectionType::WSET, "int32_sv"); + IntegerAttribute::WeightedInt>(BasicType::INT32, + CollectionType::WSET, + "int32_sv"); } TEST_F("Test bitvectors with single value double", BitVectorTest) { f.template test<FloatingPointAttribute, - double>(BasicType::DOUBLE, CollectionType::SINGLE, "double_sv"); + double>(BasicType::DOUBLE, + CollectionType::SINGLE, + "double_sv"); } TEST_F("Test bitvectors with array value double", BitVectorTest) { f.template test<FloatingPointAttribute, - double>(BasicType::DOUBLE, CollectionType::ARRAY, "double_a"); + double>(BasicType::DOUBLE, + CollectionType::ARRAY, + "double_a"); } TEST_F("Test bitvectors with weighted set value double", BitVectorTest) { f.template test<FloatingPointAttribute, - FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE, CollectionType::WSET, "double_ws"); + FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE, + CollectionType::WSET, + "double_ws"); } TEST_F("Test bitvectors with single value string", BitVectorTest) { f.template test<StringAttribute, - vespalib::string>(BasicType::STRING, CollectionType::SINGLE, "string_sv"); + vespalib::string>(BasicType::STRING, + CollectionType::SINGLE, + "string_sv"); } TEST_F("Test bitvectors with array value string", BitVectorTest) { f.template test<StringAttribute, - vespalib::string>(BasicType::STRING, CollectionType::ARRAY, "string_a"); + vespalib::string>(BasicType::STRING, + CollectionType::ARRAY, + "string_a"); } TEST_F("Test bitvectors with weighted set value string", BitVectorTest) { f.template test<StringAttribute, - StringAttribute::WeightedString>(BasicType::STRING, CollectionType::WSET, "string_ws"); + StringAttribute::WeightedString>(BasicType::STRING, + CollectionType::WSET, + "string_ws"); } @@ -569,4 +633,5 @@ TEST("Test that bitvector iterators adheres to SearchIterator requirements") { } } + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp b/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp index 79ef6e42bb2..c5d70109015 100644 --- a/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp +++ b/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp @@ -123,7 +123,7 @@ void hammerAttribute(AttributePtr &v, DocIdRange range, uint32_t count) Config compactAddressSpaceAttributeConfig(bool enableAddressSpaceCompact) { Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setCompactionStrategy({ 1.0f, (enableAddressSpaceCompact ? 0.2f : 1.0f) }); + cfg.setCompactionStrategy({ 1.0, (enableAddressSpaceCompact ? 0.2 : 1.0) }); return cfg; } diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp index 227dbfadbc0..57029f92111 100644 --- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp +++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp @@ -4,7 +4,9 @@ #include <vespa/searchcommon/attribute/status.h> #include <vespa/searchlib/attribute/postingstore.h> #include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/vespalib/btree/btreenodeallocator.hpp> #include <vespa/vespalib/btree/btreerootbase.hpp> +#include <vespa/vespalib/btree/btreeroot.hpp> #include <vespa/searchlib/attribute/postingstore.hpp> #include <vespa/vespalib/datastore/buffer_type.hpp> #include <vespa/vespalib/gtest/gtest.h> @@ -40,7 +42,7 @@ std::ostream& operator<<(std::ostream& os, const PostingStoreSetup setup) Config make_config(PostingStoreSetup param) { Config cfg; - cfg.setIsFilter(param.enable_only_bitvector); + cfg.setEnableOnlyBitVector(param.enable_only_bitvector); return cfg; } @@ -210,7 +212,8 @@ PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length) EXPECT_EQ(make_exp_sequence(4, 4 + sequence_length), get_sequence(ref1)); EXPECT_EQ(make_exp_sequence(5, 5 + sequence_length), get_sequence(ref2)); auto usage_after = store.getMemoryUsage(); - if ((sequence_length < huge_sequence_length) || !_config.getIsFilter()) { + if (sequence_length < huge_sequence_length || + !_config.getEnableOnlyBitVector()) { EXPECT_GT(usage_before.deadBytes(), usage_after.deadBytes()); } else { EXPECT_EQ(usage_before.deadBytes(), usage_after.deadBytes()); diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp index 25de1105973..00e2a82d24e 100644 --- a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp @@ -471,7 +471,7 @@ PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::ve } EXPECT_EQ(doc, docEnd); } else { - EXPECT_TRUE(has_bitvector && vec.getIsFilter()); + EXPECT_TRUE(has_bitvector && vec.getEnableOnlyBitVector()); numHits = postingList.getBitVectorEntry(find_result.second)->_bv->reader().countTrueBits(); } if (has_bitvector) { @@ -612,21 +612,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n { Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("sint32", cfg); testPostingList<Int32PostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::INT32, CollectionType::ARRAY)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("aint32", cfg); testPostingList<Int32ArrayPostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::INT32, CollectionType::WSET)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("wsint32", cfg); testPostingList<Int32WsetPostingListAttribute>(ptr1, numDocs, values); } @@ -640,21 +640,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n { Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("sfloat", cfg); testPostingList<FloatPostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("afloat", cfg); testPostingList<FloatArrayPostingListAttribute>(ptr1, numDocs, values); } { Config cfg(Config(BasicType::FLOAT, CollectionType::WSET)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("wsfloat", cfg); testPostingList<FloatWsetPostingListAttribute>(ptr1, numDocs, values); } @@ -674,21 +674,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n { Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("sstr", cfg); testPostingList<StringPostingListAttribute>(ptr1, numDocs, charValues); } { Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("astr", cfg); testPostingList<StringArrayPostingListAttribute>(ptr1, numDocs, charValues); } { Config cfg(Config(BasicType::STRING, CollectionType::WSET)); cfg.setFastSearch(true); - cfg.setIsFilter(enable_only_bitvector); + cfg.setEnableOnlyBitVector(enable_only_bitvector); AttributePtr ptr1 = create_attribute("wsstr", cfg); testPostingList<StringWsetPostingListAttribute>(ptr1, numDocs, charValues); } diff --git a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp index e356187a19f..07b64864d9a 100644 --- a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp +++ b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp @@ -13,9 +13,9 @@ #include <vespa/searchlib/test/mock_gid_to_lid_mapping.h> #include <vespa/searchcommon/attribute/config.h> #include <vespa/vespalib/gtest/gtest.h> -#include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/test/insertion_operators.h> #include <cinttypes> +#include <filesystem> #include <vespa/log/log.h> LOG_SETUP("reference_attribute_test"); @@ -312,8 +312,8 @@ TEST_F(ReferenceAttributeTest, attribute_can_be_saved_and_loaded) assertRef(doc1, 1); assertRef(doc2, 2); assertRef(doc1, 4); - EXPECT_TRUE(vespalib::unlink("test.dat")); - EXPECT_TRUE(vespalib::unlink("test.udat")); + EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.dat"))); + EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.udat"))); } TEST_F(ReferenceAttributeTest, update_uses_gid_mapper_to_set_target_lid) @@ -399,8 +399,8 @@ TEST_F(ReferenceAttributeTest, populateTargetLids_uses_gid_mapper_to_update_lid_ save(); load(); checkPopulateTargetLids(*this); - EXPECT_TRUE(vespalib::unlink("test.dat")); - EXPECT_TRUE(vespalib::unlink("test.udat")); + EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.dat"))); + EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.udat"))); } TEST_F(ReferenceAttributeTest, populateTargetLids_handles_removes) diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 9cfd5946dbb..2f3684874ee 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -510,8 +510,8 @@ TEST("require that single weighted set turns filter on filter fields") { SimpleStringTerm node("foo", "", 0, Weight(1)); Result result = do_search(attribute_manager, node, strict); EXPECT_EQUAL(3u, result.est_hits); - EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") != vespalib::string::npos); - EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos); + EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") != vespalib::string::npos); ASSERT_EQUAL(3u, result.hits.size()); EXPECT_FALSE(result.est_empty); EXPECT_EQUAL(20u, result.hits[0].docid); diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp index ca8eaa176a4..8acb39853e9 100644 --- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -383,12 +383,6 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire fic.dump(ib); ib.close(); - vespalib::string tsName = dump2dir + "/.teststamp"; - using FileKit = search::FileKit; - ASSERT_TRUE(FileKit::createStamp(tsName)); - ASSERT_TRUE(FileKit::hasStamp(tsName)); - ASSERT_TRUE(FileKit::removeStamp(tsName)); - ASSERT_FALSE(FileKit::hasStamp(tsName)); vespalib::ThreadStackExecutor executor(4); do { diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp index 5370eff78cf..1d7bd9b8504 100644 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp @@ -204,7 +204,7 @@ TEST("test that DirectIOPadding works accordng to spec") { EXPECT_EQUAL(1u, padAfter); EXPECT_TRUE(file.Close()); - FastOS_File::Delete(file.GetFileName()); + std::filesystem::remove(std::filesystem::path(file.GetFileName())); } #endif @@ -748,23 +748,6 @@ TEST("testWriteRead") { std::filesystem::remove_all(std::filesystem::path("empty")); } -TEST("requireThatSyncTokenIsUpdatedAfterFlush") { -#if 0 - std::string file = "sync.dat"; - FastOS_File::Delete(file.c_str()); - { - vespalib::DataBuffer buf; - SimpleDataStore store(file); - EXPECT_EQUAL(0u, store.lastSyncToken()); - makeData(buf, 10); - store.write(0, buf, 10); - store.flush(4); - EXPECT_EQUAL(4u, store.lastSyncToken()); - } - FastOS_File::Delete(file.c_str()); -#endif -} - TEST("requireThatFlushTimeIsAvailableAfterFlush") { DirectoryHandler testDir("flushtime"); vespalib::system_time before(vespalib::system_clock::now()); @@ -1022,7 +1005,7 @@ TEST_F("require that lid space can be increased after being compacted and then s TEST_F("require that there is control of static memory usage", Fixture) { vespalib::MemoryUsage usage = f.store.getMemoryUsage(); - EXPECT_EQUAL(520u + sizeof(LogDataStore::NameIdSet) + sizeof(std::mutex), sizeof(LogDataStore)); + EXPECT_EQUAL(536u + sizeof(LogDataStore::NameIdSet) + sizeof(std::mutex), sizeof(LogDataStore)); EXPECT_EQUAL(74108u, usage.allocatedBytes()); EXPECT_EQUAL(384u, usage.usedBytes()); } diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index dc64c3328e4..c22d3b3abb8 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -67,6 +67,7 @@ using search::attribute::WeightedEnumContent; using search::attribute::test::AttributeBuilder; using search::common::GeoLocation; using search::common::GeoLocationSpec; +using vespalib::eval::ValueType; using AttributePtr = AttributeVector::SP; using AVC = search::attribute::Config; @@ -391,6 +392,14 @@ Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env) avs.push_back(AttributeFactory::createAttribute("sbool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 14 avs.push_back(AttributeFactory::createAttribute("sebool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 15 avs.push_back(AttributeFactory::createAttribute("sdouble", AVC(AVBT::DOUBLE, AVCT::SINGLE))); // 16 + { + AVC cfg(AVBT::TENSOR, AVCT::SINGLE); + cfg.setTensorType(ValueType::from_spec("tensor(x[2])")); + avs.push_back(AttributeFactory::createAttribute("tensor", cfg)); + } + avs.push_back(AttributeFactory::createAttribute("predicate", AVC(AVBT::PREDICATE, AVCT::SINGLE))); // 18 + avs.push_back(AttributeFactory::createAttribute("reference", AVC(AVBT::REFERENCE, AVCT::SINGLE))); // 19 + avs.push_back(AttributeFactory::createAttribute("raw", AVC(AVBT::RAW, AVCT::SINGLE))); // 20 // simulate a unique only attribute as specified in sd AVC cfg(AVBT::INT32, AVCT::SINGLE); @@ -417,7 +426,11 @@ Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env) .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sdouble") .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sbyte") .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sbool") - .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sebool"); + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sebool") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw"); } for (const auto & attr : avs) { @@ -1499,6 +1512,10 @@ Test::testMatch() ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "tensor"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "predicate"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "reference"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "raw"); FtIndexEnvironment idx_env; idx_env.getBuilder() @@ -1507,7 +1524,11 @@ Test::testMatch() .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz") .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint") .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint") - .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw"); StringList params, in, out; FT_SETUP_OK(pt, params, in, out.add("score").add("totalWeight")); diff --git a/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp index dc9c68c4539..986848c39b7 100644 --- a/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp +++ b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp @@ -31,6 +31,7 @@ TEST_F("test default attribute config", Fixture) EXPECT_EQUAL(CollectionType::Type::SINGLE, f._config.collectionType().type()); EXPECT_TRUE(!f._config.fastSearch()); + EXPECT_TRUE(!f._config.getEnableOnlyBitVector()); EXPECT_TRUE(!f._config.getIsFilter()); EXPECT_TRUE(!f._config.fastAccess()); EXPECT_TRUE(f._config.tensorType().is_error()); @@ -42,6 +43,7 @@ TEST_F("test integer weightedset attribute config", EXPECT_EQUAL(BasicType::Type::INT32, f._config.basicType().type()); EXPECT_EQUAL(CollectionType::Type::WSET, f._config.collectionType().type()); EXPECT_TRUE(!f._config.fastSearch()); + EXPECT_TRUE(!f._config.getEnableOnlyBitVector()); EXPECT_TRUE(!f._config.getIsFilter()); EXPECT_TRUE(!f._config.fastAccess()); EXPECT_TRUE(f._config.tensorType().is_error()); diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.h b/searchlib/src/vespa/searchcommon/attribute/basictype.h index 407348fea92..46387dd2738 100644 --- a/searchlib/src/vespa/searchcommon/attribute/basictype.h +++ b/searchlib/src/vespa/searchcommon/attribute/basictype.h @@ -9,7 +9,7 @@ namespace search::attribute { class BasicType { public: - enum Type : uint8_t { + enum Type { NONE = 0, STRING = 1, BOOL = 2, @@ -28,33 +28,33 @@ class BasicType MAX_TYPE }; - explicit BasicType(int t) noexcept : _type(Type(t)) { } - explicit BasicType(unsigned int t) noexcept : _type(Type(t)) { } - BasicType(Type t) noexcept : _type(t) { } + explicit BasicType(int t) : _type(Type(t)) { } + explicit BasicType(unsigned int t) : _type(Type(t)) { } + BasicType(Type t) : _type(t) { } explicit BasicType(const vespalib::string & t) : _type(asType(t)) { } - Type type() const noexcept { return _type; } - const char * asString() const noexcept { return asString(_type); } - size_t fixedSize() const noexcept { return fixedSize(_type); } - static BasicType fromType(bool) noexcept { return BOOL; } - static BasicType fromType(int8_t) noexcept { return INT8; } - static BasicType fromType(int16_t) noexcept { return INT16; } - static BasicType fromType(int32_t) noexcept { return INT32; } - static BasicType fromType(int64_t) noexcept { return INT64; } - static BasicType fromType(float) noexcept { return FLOAT; } - static BasicType fromType(double) noexcept { return DOUBLE; } - bool operator==(const BasicType &b) const noexcept { return _type == b._type; } - bool operator!=(const BasicType &b) const noexcept { return _type != b._type; } + Type type() const { return _type; } + const char * asString() const { return asString(_type); } + size_t fixedSize() const { return fixedSize(_type); } + static BasicType fromType(bool) { return BOOL; } + static BasicType fromType(int8_t) { return INT8; } + static BasicType fromType(int16_t) { return INT16; } + static BasicType fromType(int32_t) { return INT32; } + static BasicType fromType(int64_t) { return INT64; } + static BasicType fromType(float) { return FLOAT; } + static BasicType fromType(double) { return DOUBLE; } + bool operator==(const BasicType &b) const { return _type == b._type; } + bool operator!=(const BasicType &b) const { return _type != b._type; } private: - static const char * asString(Type t) noexcept { return _typeTable[t]._name; } - static size_t fixedSize(Type t) noexcept { return _typeTable[t]._fixedSize; } + static const char * asString(Type t) { return _typeTable[t]._name; } + static size_t fixedSize(Type t) { return _typeTable[t]._fixedSize; } static Type asType(const vespalib::string & t); Type _type; struct TypeInfo { - Type _type; + Type _type; unsigned int _fixedSize; const char * _name; }; diff --git a/searchlib/src/vespa/searchcommon/attribute/collectiontype.h b/searchlib/src/vespa/searchcommon/attribute/collectiontype.h index 05fad8cbc64..35cb7612ed0 100644 --- a/searchlib/src/vespa/searchcommon/attribute/collectiontype.h +++ b/searchlib/src/vespa/searchcommon/attribute/collectiontype.h @@ -9,7 +9,7 @@ namespace search::attribute { class CollectionType { public: - enum Type : uint8_t { + enum Type { /** * Single value type with one value stored for each document. **/ @@ -26,30 +26,32 @@ class CollectionType MAX_TYPE }; - CollectionType(Type t = SINGLE, bool remove = false, bool create = false) noexcept - : _type(t), - _removeIfZero(remove), - _createIfNonExistant(create) - { } + CollectionType(Type t = SINGLE, bool remove = false, bool create = false) : + _type(t), + _removeIfZero(remove), + _createIfNonExistant(create) + { + } explicit - CollectionType(const vespalib::string & t, bool remove = false, bool create = false) - : _type(asType(t)), - _removeIfZero(remove), - _createIfNonExistant(create) - { } + CollectionType(const vespalib::string & t, bool remove = false, bool create = false) : + _type(asType(t)), + _removeIfZero(remove), + _createIfNonExistant(create) + { + } - Type type() const noexcept { return _type; } - bool isMultiValue() const noexcept { return _type != SINGLE; } - bool isWeightedSet() const noexcept { return _type == WSET; } - bool isArray() const noexcept { return _type == ARRAY; } - bool removeIfZero() const noexcept { return _removeIfZero; } - bool createIfNonExistant() const noexcept { return _createIfNonExistant; } - const char * asString() const noexcept { return asString(_type); } - void removeIfZero(bool newValue) noexcept { _removeIfZero = newValue; } - void createIfNonExistant(bool newValue) noexcept { _createIfNonExistant = newValue; } - bool operator!=(const CollectionType &b) const noexcept { return !(operator==(b)); } - bool operator==(const CollectionType &b) const noexcept { + Type type() const { return _type; } + bool isMultiValue() const { return _type != SINGLE; } + bool isWeightedSet() const { return _type == WSET; } + bool isArray() const { return _type == ARRAY; } + bool removeIfZero() const { return _removeIfZero; } + bool createIfNonExistant() const { return _createIfNonExistant; } + const char * asString() const { return asString(_type); } + void removeIfZero(bool newValue) { _removeIfZero = newValue; } + void createIfNonExistant(bool newValue) { _createIfNonExistant = newValue; } + bool operator!=(const CollectionType &b) const { return !(operator==(b)); } + bool operator==(const CollectionType &b) const { return _type == b._type && _removeIfZero == b._removeIfZero && _createIfNonExistant == b._createIfNonExistant; @@ -61,12 +63,12 @@ class CollectionType const char * _name; }; - static const char * asString(Type t) noexcept { return _typeTable[t]._name; } + static const char * asString(Type t) { return _typeTable[t]._name; } static Type asType(const vespalib::string &t); - Type _type : 4; - bool _removeIfZero : 1; - bool _createIfNonExistant : 1; + Type _type; + bool _removeIfZero; + bool _createIfNonExistant; static const TypeInfo _typeTable[MAX_TYPE]; }; diff --git a/searchlib/src/vespa/searchcommon/attribute/config.cpp b/searchlib/src/vespa/searchcommon/attribute/config.cpp index 7c302a10731..91495025dee 100644 --- a/searchlib/src/vespa/searchcommon/attribute/config.cpp +++ b/searchlib/src/vespa/searchcommon/attribute/config.cpp @@ -19,18 +19,19 @@ Config::Config(BasicType bt, CollectionType ct, bool fastSearch_) noexcept : _basicType(bt), _type(ct), _fastSearch(fastSearch_), + _enableOnlyBitVector(false), _isFilter(false), _fastAccess(false), _mutable(false), _paged(false), - _distance_metric(DistanceMetric::Euclidean), + _maxUnCommittedMemory(MAX_UNCOMMITTED_MEMORY), _match(Match::UNCASED), _dictionary(), - _maxUnCommittedMemory(MAX_UNCOMMITTED_MEMORY), _growStrategy(), _compactionStrategy(), _predicateParams(), _tensorType(vespalib::eval::ValueType::error_type()), + _distance_metric(DistanceMetric::Euclidean), _hnsw_index_params() { } @@ -42,11 +43,12 @@ Config & Config::operator = (Config &&) noexcept = default; Config::~Config() = default; bool -Config::operator==(const Config &b) const noexcept +Config::operator==(const Config &b) const { return _basicType == b._basicType && _type == b._type && _fastSearch == b._fastSearch && + _enableOnlyBitVector == b._enableOnlyBitVector && _isFilter == b._isFilter && _fastAccess == b._fastAccess && _mutable == b._mutable && diff --git a/searchlib/src/vespa/searchcommon/attribute/config.h b/searchlib/src/vespa/searchcommon/attribute/config.h index 17c762267cc..32cac7ec9d6 100644 --- a/searchlib/src/vespa/searchcommon/attribute/config.h +++ b/searchlib/src/vespa/searchcommon/attribute/config.h @@ -21,7 +21,7 @@ namespace search::attribute { */ class Config { public: - enum class Match : uint8_t { CASED, UNCASED }; + enum class Match { CASED, UNCASED }; using CompactionStrategy = vespalib::datastore::CompactionStrategy; Config() noexcept; Config(BasicType bt) noexcept : Config(bt, CollectionType::SINGLE) { } @@ -33,27 +33,29 @@ public: Config & operator = (Config &&) noexcept; ~Config(); - BasicType basicType() const noexcept { return _basicType; } - CollectionType collectionType() const noexcept { return _type; } - bool fastSearch() const noexcept { return _fastSearch; } - bool paged() const noexcept { return _paged; } - const PredicateParams &predicateParams() const noexcept { return _predicateParams; } - const vespalib::eval::ValueType & tensorType() const noexcept { return _tensorType; } - DistanceMetric distance_metric() const noexcept { return _distance_metric; } + BasicType basicType() const { return _basicType; } + CollectionType collectionType() const { return _type; } + bool fastSearch() const { return _fastSearch; } + bool paged() const { return _paged; } + const PredicateParams &predicateParams() const { return _predicateParams; } + const vespalib::eval::ValueType & tensorType() const { return _tensorType; } + DistanceMetric distance_metric() const { return _distance_metric; } const std::optional<HnswIndexParams>& hnsw_index_params() const { return _hnsw_index_params; } /** * Check if attribute posting list can consist of only a bitvector with * no corresponding btree. */ - bool getIsFilter() const noexcept { return _isFilter; } - bool isMutable() const noexcept { return _mutable; } + bool getEnableOnlyBitVector() const { return _enableOnlyBitVector; } + + bool getIsFilter() const { return _isFilter; } + bool isMutable() const { return _mutable; } /** * Check if this attribute should be fast accessible at all times. * If so, attribute is kept in memory also for non-searchable documents. */ - bool fastAccess() const noexcept { return _fastAccess; } + bool fastAccess() const { return _fastAccess; } const GrowStrategy & getGrowStrategy() const { return _growStrategy; } const CompactionStrategy &getCompactionStrategy() const { return _compactionStrategy; } @@ -81,6 +83,14 @@ public: * document frequency goes down, since recreated btree representation * will then have lost weight information. */ + Config & setEnableOnlyBitVector(bool enableOnlyBitVector) { + _enableOnlyBitVector = enableOnlyBitVector; + return *this; + } + + /** + * Hide weight information when searching in attributes. + */ Config & setIsFilter(bool isFilter) { _isFilter = isFilter; return *this; } Config & setMutable(bool isMutable) { _mutable = isMutable; return *this; } Config & setPaged(bool paged_in) { _paged = paged_in; return *this; } @@ -92,28 +102,29 @@ public: } Config & set_dictionary_config(const DictionaryConfig & cfg) { _dictionary = cfg; return *this; } Config & set_match(Match match) { _match = match; return *this; } - bool operator!=(const Config &b) const noexcept { return !(operator==(b)); } - bool operator==(const Config &b) const noexcept ; + bool operator!=(const Config &b) const { return !(operator==(b)); } + bool operator==(const Config &b) const; - uint64_t getMaxUnCommittedMemory() const noexcept { return _maxUnCommittedMemory; } + uint64_t getMaxUnCommittedMemory() const { return _maxUnCommittedMemory; } Config & setMaxUnCommittedMemory(uint64_t value) { _maxUnCommittedMemory = value; return *this; } private: BasicType _basicType; CollectionType _type; - bool _fastSearch : 1; - bool _isFilter : 1; - bool _fastAccess : 1; - bool _mutable : 1; - bool _paged : 1; - DistanceMetric _distance_metric; + bool _fastSearch; + bool _enableOnlyBitVector; + bool _isFilter; + bool _fastAccess; + bool _mutable; + bool _paged; + uint64_t _maxUnCommittedMemory; Match _match; DictionaryConfig _dictionary; - uint64_t _maxUnCommittedMemory; GrowStrategy _growStrategy; CompactionStrategy _compactionStrategy; PredicateParams _predicateParams; vespalib::eval::ValueType _tensorType; + DistanceMetric _distance_metric; std::optional<HnswIndexParams> _hnsw_index_params; }; diff --git a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h index 35f5fb4fe6b..9f9f45810b9 100644 --- a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h +++ b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h @@ -2,10 +2,8 @@ #pragma once -#include <cstdint> - namespace search::attribute { -enum DistanceMetric : uint8_t { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, Dotproduct }; +enum class DistanceMetric { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, Dotproduct }; } diff --git a/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h b/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h index 205a75c188f..d81eb9c5d3c 100644 --- a/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h +++ b/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h @@ -10,23 +10,24 @@ namespace search::attribute { * Persistent parameters for predicate attributes. */ class PersistentPredicateParams { + uint32_t _arity; int64_t _lower_bound; int64_t _upper_bound; - uint32_t _arity; public: - PersistentPredicateParams() noexcept - : _lower_bound(std::numeric_limits<int64_t>::min()), - _upper_bound(std::numeric_limits<int64_t>::max()), - _arity(8) - { } - uint32_t arity() const noexcept { return _arity; } - int64_t lower_bound() const noexcept { return _lower_bound; } - int64_t upper_bound() const noexcept { return _upper_bound; } - void setArity(uint32_t v) noexcept { _arity = v; } - void setBounds(int64_t lower, int64_t upper) noexcept { _lower_bound = lower; _upper_bound = upper; } + PersistentPredicateParams() + : _arity(8), + _lower_bound(std::numeric_limits<int64_t>::min()), + _upper_bound(std::numeric_limits<int64_t>::max()) + { + } + uint32_t arity() const { return _arity; } + int64_t lower_bound() const { return _lower_bound; } + int64_t upper_bound() const { return _upper_bound; } + void setArity(uint32_t v) { _arity = v; } + void setBounds(int64_t lower, int64_t upper) { _lower_bound = lower; _upper_bound = upper; } - bool operator==(const PersistentPredicateParams &rhs) const noexcept { + bool operator==(const PersistentPredicateParams &rhs) const { return ((_arity == rhs._arity) && (_lower_bound == rhs._lower_bound) && (_upper_bound == rhs._upper_bound)); diff --git a/searchlib/src/vespa/searchcommon/attribute/predicate_params.h b/searchlib/src/vespa/searchcommon/attribute/predicate_params.h index 7e9258ab5db..133b7331689 100644 --- a/searchlib/src/vespa/searchcommon/attribute/predicate_params.h +++ b/searchlib/src/vespa/searchcommon/attribute/predicate_params.h @@ -11,16 +11,17 @@ namespace search::attribute { */ class PredicateParams : public PersistentPredicateParams { - float _dense_posting_list_threshold; + double _dense_posting_list_threshold; public: - PredicateParams() noexcept + PredicateParams() : PersistentPredicateParams(), _dense_posting_list_threshold(0.4) - { } + { + } - float dense_posting_list_threshold() const noexcept { return _dense_posting_list_threshold; } - void setDensePostingListThreshold(float v) noexcept { _dense_posting_list_threshold = v; } - bool operator==(const PredicateParams &rhs) const noexcept { + double dense_posting_list_threshold() const { return _dense_posting_list_threshold; } + void setDensePostingListThreshold(double v) { _dense_posting_list_threshold = v; } + bool operator==(const PredicateParams &rhs) const { return (PersistentPredicateParams::operator==(rhs) && (_dense_posting_list_threshold == rhs._dense_posting_list_threshold)); } diff --git a/searchlib/src/vespa/searchcommon/common/dictionary_config.h b/searchlib/src/vespa/searchcommon/common/dictionary_config.h index f504439c5a3..f51341ad799 100644 --- a/searchlib/src/vespa/searchcommon/common/dictionary_config.h +++ b/searchlib/src/vespa/searchcommon/common/dictionary_config.h @@ -3,7 +3,6 @@ #pragma once #include <iosfwd> -#include <cstdint> namespace search { @@ -12,8 +11,8 @@ namespace search { */ class DictionaryConfig { public: - enum class Type : uint8_t { BTREE, HASH, BTREE_AND_HASH }; - enum class Match : uint8_t { CASED, UNCASED }; + enum class Type { BTREE, HASH, BTREE_AND_HASH }; + enum class Match { CASED, UNCASED }; DictionaryConfig() noexcept : _type(Type::BTREE), _match(Match::UNCASED) {} DictionaryConfig(Type type) noexcept : _type(type), _match(Match::UNCASED) {} DictionaryConfig(Type type, Match match) noexcept : _type(type), _match(match) {} @@ -21,8 +20,8 @@ public: Match getMatch() const { return _match; } bool operator == (const DictionaryConfig & b) const { return (_type == b._type) && (_match == b._match); } private: - Type _type : 4; - Match _match : 4; + Type _type; + Match _match; }; std::ostream& operator<<(std::ostream& os, const DictionaryConfig & cfg); diff --git a/searchlib/src/vespa/searchcommon/common/growstrategy.h b/searchlib/src/vespa/searchcommon/common/growstrategy.h index 86750eafbfc..8766989ded0 100644 --- a/searchlib/src/vespa/searchcommon/common/growstrategy.h +++ b/searchlib/src/vespa/searchcommon/common/growstrategy.h @@ -23,17 +23,17 @@ public: { } - static GrowStrategy make(uint32_t docsInitialCapacity, float docsGrowFactor, uint32_t docsGrowDelta) noexcept { + static GrowStrategy make(uint32_t docsInitialCapacity, float docsGrowFactor, uint32_t docsGrowDelta) { return {docsInitialCapacity, docsGrowFactor, docsGrowDelta, 0, 0.2}; } - float getMultiValueAllocGrowFactor() const noexcept { return _multiValueAllocGrowFactor; } + float getMultiValueAllocGrowFactor() const { return _multiValueAllocGrowFactor; } - bool operator==(const GrowStrategy & rhs) const noexcept { + bool operator==(const GrowStrategy & rhs) const { return vespalib::GrowStrategy::operator==(rhs) && (_multiValueAllocGrowFactor == rhs._multiValueAllocGrowFactor); } - bool operator!=(const GrowStrategy & rhs) const noexcept { + bool operator!=(const GrowStrategy & rhs) const { return !(operator==(rhs)); } }; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 399c0266ec9..ba791444dea 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -130,10 +130,26 @@ private: public: AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute, - const string &query_stack, const SearchContextParams ¶ms); + const string &query_stack, const SearchContextParams ¶ms) + : AttributeFieldBlueprint(field, attribute, QueryTermDecoder::decodeTerm(query_stack), params) + { } AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute, - QueryTermSimple::UP term, const SearchContextParams ¶ms); - ~AttributeFieldBlueprint() override; + QueryTermSimple::UP term, const SearchContextParams ¶ms) + : SimpleLeafBlueprint(field), + _attr(attribute), + _query_term(term->getTermString()), + _search_context(attribute.createSearchContext(std::move(term), params)), + _type(OTHER) + { + uint32_t estHits = _search_context->approximateHits(); + HitEstimate estimate(estHits, estHits == 0); + setEstimate(estimate); + if (attribute.isFloatingPointType()) { + _type = FLOAT; + } else if (attribute.isIntegerType()) { + _type = INT; + } + } SearchIteratorUP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override { assert(tfmda.size() == 1); @@ -165,30 +181,6 @@ public: bool getRange(vespalib::string &from, vespalib::string &to) const override; }; -AttributeFieldBlueprint::~AttributeFieldBlueprint() = default; - -AttributeFieldBlueprint::AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute, - const string &query_stack, const SearchContextParams ¶ms) - : AttributeFieldBlueprint(field, attribute, QueryTermDecoder::decodeTerm(query_stack), params) -{ } -AttributeFieldBlueprint::AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute, - QueryTermSimple::UP term, const SearchContextParams ¶ms) - : SimpleLeafBlueprint(field), - _attr(attribute), - _query_term(term->getTermString()), - _search_context(attribute.createSearchContext(std::move(term), params)), - _type(OTHER) -{ - uint32_t estHits = _search_context->approximateHits(); - HitEstimate estimate(estHits, estHits == 0); - setEstimate(estimate); - if (attribute.isFloatingPointType()) { - _type = FLOAT; - } else if (attribute.isIntegerType()) { - _type = INT; - } -} - vespalib::string get_type(const IAttributeVector& attr) { @@ -637,11 +629,7 @@ public: return bitvector_iterator; } } - if (_attr.has_weight_iterator(_dict_entry.posting_idx)) { - return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); - } else { - return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict); - } + return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); } SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const override { diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index d606daaa3e0..f4ab447ed51 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -125,8 +125,7 @@ bool AttributeVector::hasArrayType() const { return _config->collectionType().is bool AttributeVector::getIsFilter() const { return _config->getIsFilter(); } bool AttributeVector::getIsFastSearch() const { return _config->fastSearch(); } bool AttributeVector::isMutable() const { return _config->isMutable(); } -attribute::BasicType::Type AttributeVector::getBasicType() const { return _config->basicType().type(); } -attribute::CollectionType::Type AttributeVector::getCollectionType() const { return _config->collectionType().type(); } +bool AttributeVector::getEnableOnlyBitVector() const { return _config->getEnableOnlyBitVector(); } bool AttributeVector::isEnumerated(const vespalib::GenericHeader &header) diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index 68dfe52643f..e3a7fdeb2c3 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -288,6 +288,7 @@ public: bool getIsFilter() const override final; bool getIsFastSearch() const override final; bool isMutable() const; + bool getEnableOnlyBitVector() const; const Config &getConfig() const noexcept { return *_config; } void update_config(const Config& cfg); @@ -319,8 +320,8 @@ public: AddressSpaceUsage getAddressSpaceUsage() const; - BasicType::Type getBasicType() const override final; - CollectionType::Type getCollectionType() const override final; + BasicType::Type getBasicType() const override final { return getInternalBasicType().type(); } + CollectionType::Type getCollectionType() const override final { return getInternalCollectionType().type(); } uint32_t getCommittedDocIdLimit() const override final { return _committedDocIdLimit.load(std::memory_order_acquire); } bool isImported() const override; diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index 4e300fe3800..7f04efd940b 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -104,6 +104,7 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) Config retval(bType, cType); PredicateParams predicateParams; retval.setFastSearch(cfg.fastsearch); + retval.setEnableOnlyBitVector(cfg.enableonlybitvector); retval.setIsFilter(cfg.enableonlybitvector); retval.setFastAccess(cfg.fastaccess); retval.setMutable(cfg.ismutable); diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h index d6499708b76..be36bcd185a 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h @@ -43,7 +43,6 @@ struct IDocumentWeightAttribute virtual void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const = 0; virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const = 0; virtual DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const = 0; - virtual bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept = 0; virtual std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const = 0; virtual ~IDocumentWeightAttribute() = default; }; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h index f45ba3c8773..71c50ccb270 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h @@ -41,7 +41,6 @@ private: void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override; DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override; std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override; - bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override; }; DocumentWeightAttributeAdapter _document_weight_attribute_adapter; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index 89ef0a7d8a0..1009fa2fb5f 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -146,17 +146,12 @@ MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::make_bi } template <typename B, typename M> -bool -MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept -{ - return self.getPostingList().has_btree(idx); -} - -template <typename B, typename M> const IDocumentWeightAttribute * MultiValueNumericPostingAttribute<B, M>::asDocumentWeightAttribute() const { - if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) { + if (this->hasWeightedSetType() && + this->getBasicType() == AttributeVector::BasicType::INT64 && + !this->getConfig().getIsFilter()) { return &_document_weight_attribute_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h index 5c4d97660f6..b25c31a7dea 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -39,7 +39,6 @@ private: void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override; DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override; std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override; - bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override; }; DocumentWeightAttributeAdapter _document_weight_attribute_adapter; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index 3042a9d0bb9..19840b5a474 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -159,13 +159,6 @@ MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::create( } template <typename B, typename M> -bool -MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept -{ - return self.getPostingList().has_btree(idx); -} - -template <typename B, typename M> std::unique_ptr<queryeval::SearchIterator> MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const { @@ -176,7 +169,9 @@ template <typename B, typename T> const IDocumentWeightAttribute * MultiValueStringPostingAttributeT<B, T>::asDocumentWeightAttribute() const { - if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::STRING)) { + if (this->hasWeightedSetType() && + this->getBasicType() == AttributeVector::BasicType::STRING && + !this->getConfig().getIsFilter()) { return &_document_weight_attribute_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index 725491c4702..d32d8cde7ea 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -154,7 +154,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) DocIt postings; vespalib::ConstArrayRef<Posting> array = _merger.getArray(); postings.set(&array[0], &array[array.size()]); - if (_postingList.isFilter()) { + if (_postingList._isFilter) { return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, postings); } else { return std::make_unique<AttributePostingListIteratorT<DocIt>>(_baseSearchCtx, _hasWeight, matchData, postings); @@ -182,7 +182,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) DocIt postings; const Posting *array = postingList.getKeyDataEntry(_pidx, clusterSize); postings.set(array, array + clusterSize); - if (postingList.isFilter()) { + if (postingList._isFilter) { return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, postings); } else { return std::make_unique<AttributePostingListIteratorT<DocIt>>(_baseSearchCtx, _hasWeight, matchData, postings); @@ -191,7 +191,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) typename PostingList::BTreeType::FrozenView frozen(_frozenRoot, postingList.getAllocator()); using DocIt = typename PostingList::ConstIterator; - if (_postingList.isFilter()) { + if (_postingList._isFilter) { return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, frozen.getRoot(), frozen.getAllocator()); } else { return std::make_unique<AttributePostingListIteratorT<DocIt>> (_baseSearchCtx, _hasWeight, matchData, frozen.getRoot(), frozen.getAllocator()); diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index 09af15e35d5..2703201b292 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -19,7 +19,9 @@ using vespalib::btree::BTreeNoLeafData; using vespalib::datastore::EntryRefFilter; PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &status, const Config &config) - : _bvSize(64u), + : _enableOnlyBitVector(config.getEnableOnlyBitVector()), + _isFilter(config.getIsFilter()), + _bvSize(64u), _bvCapacity(128u), _minBvDocFreq(64), _maxBvDocFreq(std::numeric_limits<uint32_t>::max()), @@ -27,9 +29,9 @@ PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &s _dictionary(dictionary), _status(status), _bvExtraBytes(0), - _compaction_spec(), - _isFilter(config.getIsFilter()) -{ } + _compaction_spec() +{ +} PostingStoreBase2::~PostingStoreBase2() = default; @@ -58,7 +60,8 @@ PostingStoreBase2::resizeBitVectors(uint32_t newSize, uint32_t newCapacity) template <typename DataT> -PostingStore<DataT>::PostingStore(IEnumStoreDictionary& dictionary, Status &status, const Config &config) +PostingStore<DataT>::PostingStore(IEnumStoreDictionary& dictionary, Status &status, + const Config &config) : Parent(false), PostingStoreBase2(dictionary, status, config), _bvType(1, 1024u, RefType::offsetSize()) @@ -182,7 +185,8 @@ PostingStore<DataT>::applyNew(EntryRef &ref, AddIter a, AddIter ae) template <typename DataT> void -PostingStore<DataT>::makeDegradedTree(EntryRef &ref, const BitVector &bv) +PostingStore<DataT>::makeDegradedTree(EntryRef &ref, + const BitVector &bv) { assert(!ref.valid()); BTreeTypeRefPair tPair(allocBTree()); @@ -260,7 +264,7 @@ PostingStore<DataT>::makeBitVector(EntryRef &ref) assert(bv.countTrueBits() == expDocFreq); BitVectorRefPair bPair(allocBitVector()); BitVectorEntry *bve = bPair.data; - if (isFilter()) { + if (_enableOnlyBitVector) { BTreeType *tree = getWTreeEntry(iRef); tree->clear(_allocator); _store.hold_entry(ref); @@ -297,7 +301,7 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref, AddIter aOrg, AddIter ae) assert(bv.countTrueBits() == expDocFreq); BitVectorRefPair bPair(allocBitVector()); BitVectorEntry *bve = bPair.data; - if (!isFilter()) { + if (!_enableOnlyBitVector) { applyNewTree(bve->_tree, aOrg, ae, CompareT()); } bve->_bv = bvsp; @@ -311,7 +315,11 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref, AddIter aOrg, AddIter ae) template <typename DataT> void -PostingStore<DataT>::apply(BitVector &bv, AddIter a, AddIter ae, RemoveIter r, RemoveIter re) +PostingStore<DataT>::apply(BitVector &bv, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re) { while (a != ae || r != re) { if (r != re && (a == ae || *r < a->_key)) { @@ -337,7 +345,11 @@ PostingStore<DataT>::apply(BitVector &bv, AddIter a, AddIter ae, RemoveIter r, R template <typename DataT> void -PostingStore<DataT>::apply(EntryRef &ref, AddIter a, AddIter ae, RemoveIter r, RemoveIter re) +PostingStore<DataT>::apply(EntryRef &ref, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re) { if (!ref.valid()) { // No old data @@ -494,9 +506,11 @@ PostingStore<DataT>::beginFrozen(const EntryRef ref) const return ConstIterator(shortArray, clusterSize, _allocator, _aggrCalc); } + template <typename DataT> void -PostingStore<DataT>::beginFrozen(const EntryRef ref, std::vector<ConstIterator> &where) const +PostingStore<DataT>::beginFrozen(const EntryRef ref, + std::vector<ConstIterator> &where) const { if (!ref.valid()) { where.emplace_back(); @@ -728,7 +742,8 @@ PostingStore<DataT>::compact_worst_buffers(CompactionSpec compaction_spec, const filter.add_buffers(_bvType.get_active_buffers()); } _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) - { return move(refs); }, filter); + { return move(refs); }, + filter); compacting_buffers->finish(); } diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index 8c6ed3d9497..57f523acefe 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -27,7 +27,7 @@ public: std::shared_ptr<GrowableBitVector> _bv; // bitvector public: - BitVectorEntry() noexcept + BitVectorEntry() : _tree(), _bv() { } @@ -36,22 +36,25 @@ public: class PostingStoreBase2 { +public: + bool _enableOnlyBitVector; + bool _isFilter; protected: - static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u; uint32_t _bvSize; uint32_t _bvCapacity; +public: uint32_t _minBvDocFreq; // Less than this ==> destroy bv uint32_t _maxBvDocFreq; // Greater than or equal to this ==> create bv - std::set<uint32_t> _bvs; // Current bitvectors - IEnumStoreDictionary& _dictionary; - Status &_status; - uint64_t _bvExtraBytes; +protected: + std::set<uint32_t> _bvs; // Current bitvectors + IEnumStoreDictionary& _dictionary; + Status &_status; + uint64_t _bvExtraBytes; PostingStoreCompactionSpec _compaction_spec; -private: - bool _isFilter; + + static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u; public: - bool isFilter() const noexcept { return _isFilter; } PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &status, const Config &config); virtual ~PostingStoreBase2(); bool resizeBitVectors(uint32_t newSize, uint32_t newCapacity); @@ -108,7 +111,7 @@ public: bool removeSparseBitVectors() override; void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs); - static bool isBitVector(uint32_t typeId) noexcept { return typeId == BUFFERTYPE_BITVECTOR; } + static bool isBitVector(uint32_t typeId) { return typeId == BUFFERTYPE_BITVECTOR; } void applyNew(EntryRef &ref, AddIter a, AddIter ae); @@ -183,9 +186,6 @@ public: BitVectorEntry *getWBitVectorEntry(RefType ref) { return _store.template getEntry<BitVectorEntry>(ref); } - bool has_btree(const EntryRef ref) const noexcept { - return !ref.valid() || !isBitVector(getTypeId(RefType(ref))) || !isFilter(); - } std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(RefType ref, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const; diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp index 6e56f5477c2..17a0e6256d4 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp @@ -41,7 +41,7 @@ StringSearchHelper::StringSearchHelper(StringSearchHelper&&) noexcept = default; StringSearchHelper::~StringSearchHelper() = default; bool -StringSearchHelper::isMatch(const char *src) const noexcept { +StringSearchHelper::isMatch(const char *src) const { if (__builtin_expect(isRegex(), false)) { return getRegex().valid() && getRegex().partial_match(std::string_view(src)); } diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h index 0c52692ee04..7bfcf0e4292 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h @@ -22,7 +22,7 @@ public: StringSearchHelper(const StringSearchHelper &) = delete; StringSearchHelper & operator =(const StringSearchHelper &) = delete; ~StringSearchHelper(); - bool isMatch(const char *src) const noexcept; + bool isMatch(const char *src) const; bool isPrefix() const noexcept { return _isPrefix; } bool isRegex() const noexcept { return _isRegex; } bool isCased() const noexcept { return _isCased; } diff --git a/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp index 60d04d7e3ad..42c9ed4e1c6 100644 --- a/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp +++ b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp @@ -4,8 +4,9 @@ #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/guard.h> -#include <cassert> #include <algorithm> +#include <cassert> +#include <filesystem> #include <vespa/log/log.h> LOG_SETUP(".indexmetainfo"); @@ -305,7 +306,7 @@ IndexMetaInfo::save(const vespalib::string &baseName) { vespalib::string fileName = makeFileName(baseName); vespalib::string newName = fileName + ".new"; - vespalib::unlink(newName); + std::filesystem::remove(std::filesystem::path(newName)); vespalib::FilePointer f(fopen(newName.c_str(), "w")); if (!f.valid()) { LOG(warning, "could not open file for writing: %s", newName.c_str()); diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp index 4ae0ce2621a..f4d129bfc58 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp @@ -22,7 +22,7 @@ readHeader(vespalib::FileHeader &h, const vespalib::string &name) { Fast_BufferedFile file(32_Ki); - file.OpenReadOnly(name.c_str()); + file.ReadOpenExisting(name.c_str()); h.readFile(file); } @@ -58,8 +58,7 @@ BitVectorFileWrite::open(const vespalib::string &name, if (tuneFileWrite.getWantDirectIO()) { _datFile->EnableDirectIO(); } - // XXX no checking for success: - _datFile->OpenWriteOnly(datname.c_str()); + _datFile->WriteOpen(datname.c_str()); if (_datHeaderLen == 0) { assert(_numKeys == 0); diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp index 6913c03262c..0caf89a0730 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp @@ -20,7 +20,7 @@ void readHeader(vespalib::FileHeader &h, const vespalib::string &name) { Fast_BufferedFile file(32_Ki); - file.OpenReadOnly(name.c_str()); + file.ReadOpenExisting(name.c_str()); h.readFile(file); } @@ -66,8 +66,7 @@ BitVectorIdxFileWrite::open(const vespalib::string &name, _idxFile->EnableDirectIO(); } - // XXX no checking for success: - _idxFile->OpenWriteOnly(idxname.c_str()); + _idxFile->WriteOpen(idxname.c_str()); if (_idxHeaderLen == 0) { assert(_numKeys == 0); diff --git a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp index 24d790afe74..fb1fe98aa88 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp @@ -482,10 +482,6 @@ FieldMerger::merge_field_start() return; } - if (FileKit::hasStamp(_field_dir + "/.mergeocc_done")) { - _state = State::MERGE_DONE; - return; - } std::filesystem::create_directory(std::filesystem::path(_field_dir)); LOG(debug, "merge_field for field %s dir %s", _field_name.c_str(), _field_dir.c_str()); @@ -507,10 +503,6 @@ FieldMerger::merge_field_finish() merge_postings_failed(); return; } - if (!FileKit::createStamp(_field_dir + "/.mergeocc_done")) { - _failed = true; - return; - } vespalib::File::sync(_field_dir); if (!clean_tmp_dirs()) { diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp index 7f6f5c4ed15..6d849532931 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp @@ -5,8 +5,9 @@ #include "extposocc.h" #include "pagedict4file.h" #include <vespa/vespalib/util/error.h> -#include <vespa/log/log.h> +#include <filesystem> +#include <vespa/log/log.h> LOG_SETUP(".diskindex.fieldwriter"); using search::index::FieldLengthInfo; @@ -184,7 +185,7 @@ FieldWriter::remove(const vespalib::string &prefix) { for (const char **j = termOccNames; *j != nullptr; ++j) { vespalib::string tmpName = prefix + *j; - FastOS_File::Delete(tmpName.c_str()); + std::filesystem::remove(std::filesystem::path(tmpName)); } } diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp index 159af42635f..98541dba646 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp @@ -15,6 +15,7 @@ #include <vespa/vespalib/util/arrayqueue.hpp> #include <vespa/vespalib/util/array.hpp> #include <vespa/fastos/file.h> +#include <filesystem> #include <future> #include <vespa/log/log.h> @@ -141,23 +142,14 @@ verifyOrAssert(const TmpChunkMetaV & v) } } -vespalib::string eraseErrorMsg(const vespalib::string & fileName, int error) { - return make_string("Error erasing file '%s'. Error is '%s'", - fileName.c_str(), getErrorString(error).c_str()); -} - } void FileChunk::erase() { _file.reset(); - if (!FastOS_File::Delete(_idxFileName.c_str()) && (errno != ENOENT)) { - throw std::runtime_error(eraseErrorMsg(_idxFileName, errno)); - } - if (!FastOS_File::Delete(_dataFileName.c_str()) && (errno != ENOENT)) { - throw std::runtime_error(eraseErrorMsg(_dataFileName, errno)); - } + std::filesystem::remove(std::filesystem::path(_idxFileName)); + std::filesystem::remove(std::filesystem::path(_dataFileName)); } size_t @@ -569,18 +561,14 @@ void FileChunk::eraseIdxFile(const vespalib::string & name) { vespalib::string fileName(createIdxFileName(name)); - if ( ! FastOS_File::Delete(fileName.c_str())) { - throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str())); - } + std::filesystem::remove(std::filesystem::path(fileName)); } void FileChunk::eraseDatFile(const vespalib::string & name) { vespalib::string fileName(createDatFileName(name)); - if ( ! FastOS_File::Delete(fileName.c_str())) { - throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str())); - } + std::filesystem::remove(std::filesystem::path(fileName)); } diff --git a/searchlib/src/vespa/searchlib/features/matchfeature.cpp b/searchlib/src/vespa/searchlib/features/matchfeature.cpp index 53a7ce6e108..7a2148510d4 100644 --- a/searchlib/src/vespa/searchlib/features/matchfeature.cpp +++ b/searchlib/src/vespa/searchlib/features/matchfeature.cpp @@ -10,9 +10,34 @@ using namespace search::fef; using CollectionType = FieldInfo::CollectionType; +using DataType = FieldInfo::DataType; namespace search::features { +namespace { + +auto attribute_match_data_types = ParameterDataTypeSet::normalTypeSet(); + +bool matchable_field(const FieldInfo& info) +{ + auto field_type = info.type(); + if (field_type != FieldType::INDEX && field_type != FieldType::ATTRIBUTE) { + return false; + } + auto data_type = info.get_data_type(); + if (data_type == DataType::TENSOR || data_type == DataType::RAW) { + // not matchable + return false; + } + if (field_type == FieldType::ATTRIBUTE && !attribute_match_data_types.allowedType(data_type)) { + // bad data type for attributeMatch feature + return false; + } + return true; +} + +} + MatchExecutor::MatchExecutor(const MatchParams & params) : FeatureExecutor(), _params(params) @@ -67,30 +92,28 @@ MatchBlueprint::setup(const IIndexEnvironment & env, { for (uint32_t i = 0; i < env.getNumFields(); ++i) { const FieldInfo * info = env.getField(i); - if (info->get_data_type() == FieldInfo::DataType::TENSOR) { - // not matchable + if (!matchable_field(*info)) { continue; } - if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) { - _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name())); - if (info->type() == FieldType::INDEX) { - if (info->collection() == CollectionType::SINGLE) { - defineInput("fieldMatch(" + info->name() + ")"); - } else { - defineInput("elementCompleteness(" + info->name() + ")"); - } - } else if (info->type() == FieldType::ATTRIBUTE) { - defineInput("attributeMatch(" + info->name() + ")"); + _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name())); + if (info->type() == FieldType::INDEX) { + if (info->collection() == CollectionType::SINGLE) { + defineInput("fieldMatch(" + info->name() + ")"); + } else { + defineInput("elementCompleteness(" + info->name() + ")"); } + } else if (info->type() == FieldType::ATTRIBUTE) { + defineInput("attributeMatch(" + info->name() + ")"); } } describeOutput("score", "Normalized sum over all matched fields"); describeOutput("totalWeight", "Sum of rank weights for all matched fields"); for (uint32_t i = 0; i < env.getNumFields(); ++i) { const FieldInfo * info = env.getField(i); - if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) { - describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'"); + if (!matchable_field(*info)) { + continue; } + describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'"); } return true; } diff --git a/searchlib/src/vespa/searchlib/fef/properties.cpp b/searchlib/src/vespa/searchlib/fef/properties.cpp index 2cc4e50b593..6f334630dc5 100644 --- a/searchlib/src/vespa/searchlib/fef/properties.cpp +++ b/searchlib/src/vespa/searchlib/fef/properties.cpp @@ -11,7 +11,7 @@ const Property::Value Property::_emptyValue; const Property::Values Property::_emptyValues; const Property::Value & -Property::getAt(uint32_t idx) const noexcept +Property::getAt(uint32_t idx) const { if (idx < (*_values).size()) { return (*_values)[idx]; @@ -22,7 +22,7 @@ Property::getAt(uint32_t idx) const noexcept //----------------------------------------------------------------------------- uint32_t -Properties::rawHash(const void *buf, uint32_t len) noexcept +Properties::rawHash(const void *buf, uint32_t len) { uint32_t res = 0; unsigned const char *pt = (unsigned const char *) buf; @@ -33,7 +33,7 @@ Properties::rawHash(const void *buf, uint32_t len) noexcept return res; } -Properties::Properties() noexcept +Properties::Properties() : _numValues(0), _data() { @@ -59,7 +59,7 @@ Properties::add(vespalib::stringref key, vespalib::stringref value) } uint32_t -Properties::count(vespalib::stringref key) const noexcept +Properties::count(vespalib::stringref key) const { if (!key.empty()) { auto node = _data.find(key); @@ -112,14 +112,14 @@ Properties::clear() } bool -Properties::operator==(const Properties &rhs) const noexcept +Properties::operator==(const Properties &rhs) const { return (_numValues == rhs._numValues && _data == rhs._data); } uint32_t -Properties::hashCode() const noexcept +Properties::hashCode() const { uint32_t hash = numKeys() + numValues(); for (const auto& elem : _data) { @@ -159,7 +159,7 @@ Properties::visitNamespace(vespalib::stringref ns, } Property -Properties::lookup(vespalib::stringref key) const noexcept +Properties::lookup(vespalib::stringref key) const { if (key.empty()) { return Property(); @@ -172,7 +172,7 @@ Properties::lookup(vespalib::stringref key) const noexcept } Property Properties::lookup(vespalib::stringref namespace1, - vespalib::stringref key) const noexcept + vespalib::stringref key) const { if (namespace1.empty() || key.empty()) { return Property(); @@ -184,7 +184,7 @@ Property Properties::lookup(vespalib::stringref namespace1, Property Properties::lookup(vespalib::stringref namespace1, vespalib::stringref namespace2, - vespalib::stringref key) const noexcept + vespalib::stringref key) const { if (namespace1.empty() || namespace2.empty() || key.empty()) { return Property(); @@ -197,7 +197,7 @@ Property Properties::lookup(vespalib::stringref namespace1, Property Properties::lookup(vespalib::stringref namespace1, vespalib::stringref namespace2, vespalib::stringref namespace3, - vespalib::stringref key) const noexcept + vespalib::stringref key) const { if (namespace1.empty() || namespace2.empty() || namespace3.empty() || key.empty()) { return Property(); @@ -207,7 +207,7 @@ Property Properties::lookup(vespalib::stringref namespace1, return lookup(fullKey); } -void Properties::swap(Properties & rhs) noexcept +void Properties::swap(Properties & rhs) { _data.swap(rhs._data); std::swap(_numValues, rhs._numValues); diff --git a/searchlib/src/vespa/searchlib/fef/properties.h b/searchlib/src/vespa/searchlib/fef/properties.h index 80e8c70939c..a6ae83b0339 100644 --- a/searchlib/src/vespa/searchlib/fef/properties.h +++ b/searchlib/src/vespa/searchlib/fef/properties.h @@ -37,7 +37,7 @@ private: * * @param values the values for this property **/ - Property(const Values &values) noexcept : _values(&values) { } + Property(const Values &values) : _values(&values) { } public: /** @@ -46,14 +46,14 @@ public: * object on the stack in the application, and will also be used * by the @ref Properties class when a lookup gives no results. **/ - Property() noexcept : _values(&_emptyValues) { } + Property() : _values(&_emptyValues) { } /** * Check if we found what we were looking for or not. * * @return true if the key we looked up had at least one value **/ - bool found() const noexcept { + bool found() const { return !(*_values).empty(); } @@ -63,7 +63,7 @@ public: * * @return first value for the looked up key, or "" **/ - const Value &get() const noexcept { + const Value &get() const { if ((*_values).empty()) { return _emptyValue; } @@ -78,7 +78,7 @@ public: * @return first value for the looked up key, or fallBack * @param fallBack value to return if no values were found **/ - const Value & get(const Value &fallBack) const noexcept { + const Value & get(const Value &fallBack) const { if ((*_values).empty()) { return fallBack; } @@ -90,7 +90,7 @@ public: * * @return number of values for this property **/ - uint32_t size() const noexcept { return (*_values).size(); } + uint32_t size() const { return (*_values).size(); } /** * Obtain a specific value for the looked up key. @@ -98,7 +98,7 @@ public: * @return the requested value, or "" if idx was out of bounds * @param idx the index of the value we want to access **/ - const Value &getAt(uint32_t idx) const noexcept; + const Value &getAt(uint32_t idx) const; }; //----------------------------------------------------------------------------- @@ -127,7 +127,7 @@ public: /** * Virtual destructor to allow safe subclassing. **/ - virtual ~IPropertiesVisitor() = default; + virtual ~IPropertiesVisitor() {} }; //----------------------------------------------------------------------------- @@ -156,7 +156,7 @@ private: * @param buf data pointer * @param len data length **/ - static uint32_t rawHash(const void *buf, uint32_t len) noexcept; + static uint32_t rawHash(const void *buf, uint32_t len); public: using UP = std::unique_ptr<Properties>; @@ -164,7 +164,7 @@ public: /** * Create an empty properties object. **/ - Properties() noexcept; + Properties(); Properties(Properties &&) noexcept = default; Properties & operator=(Properties &&) noexcept = default; Properties(const Properties &); @@ -192,7 +192,7 @@ public: * @return number of values for the given key * @param key the key **/ - uint32_t count(vespalib::stringref key) const noexcept; + uint32_t count(vespalib::stringref key) const; /** * Remove all values for the given key. @@ -226,14 +226,14 @@ public: * * @return number of keys **/ - uint32_t numKeys() const noexcept { return _data.size(); } + uint32_t numKeys() const { return _data.size(); } /** * Obtain the total number of values stored in this object. * * @return number of values **/ - uint32_t numValues() const noexcept { return _numValues; } + uint32_t numValues() const { return _numValues; } /** * Check if rhs contains the same key/value pairs as this @@ -242,14 +242,14 @@ public: * * @return true if we are equal to rhs **/ - bool operator==(const Properties &rhs) const noexcept; + bool operator==(const Properties &rhs) const; /** * Calculate a hash code for this object * * @return hash code for this object **/ - uint32_t hashCode() const noexcept; + uint32_t hashCode() const; /** * Visit all key/value pairs @@ -275,7 +275,7 @@ public: * @return object encapsulating lookup result * @param key the key to look up **/ - Property lookup(vespalib::stringref key) const noexcept; + Property lookup(vespalib::stringref key) const; /** * Look up a key inside a namespace using the proposed namespace @@ -289,7 +289,7 @@ public: * @param key the key to look up **/ Property lookup(vespalib::stringref namespace1, - vespalib::stringref key) const noexcept; + vespalib::stringref key) const; /** * Look up a key inside a namespace using the proposed namespace @@ -305,7 +305,7 @@ public: **/ Property lookup(vespalib::stringref namespace1, vespalib::stringref namespace2, - vespalib::stringref key) const noexcept; + vespalib::stringref key) const; /** * Look up a key inside a namespace using the proposed namespace @@ -323,13 +323,13 @@ public: Property lookup(vespalib::stringref namespace1, vespalib::stringref namespace2, vespalib::stringref namespace3, - vespalib::stringref key) const noexcept; + vespalib::stringref key) const; - void swap(Properties & rhs) noexcept ; + void swap(Properties & rhs); }; inline void -swap(Properties & a, Properties & b) noexcept +swap(Properties & a, Properties & b) { a.swap(b); } diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp index 3f6085ef7ff..488c58e3119 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -87,7 +87,7 @@ Blueprint::sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit) return { uint32_t(std::min(sum, uint64_t(limit))), empty }; } -Blueprint::State::State() noexcept +Blueprint::State::State() : _fields(), _estimateHits(0), _tree_size(1), @@ -97,13 +97,13 @@ Blueprint::State::State() noexcept _cost_tier(COST_TIER_NORMAL) {} -Blueprint::State::State(FieldSpecBase field) noexcept +Blueprint::State::State(FieldSpecBase field) : State() { _fields.add(field); } -Blueprint::State::State(FieldSpecBaseList fields_in) noexcept +Blueprint::State::State(FieldSpecBaseList fields_in) : _fields(std::move(fields_in)), _estimateHits(0), _tree_size(1), @@ -116,7 +116,7 @@ Blueprint::State::State(FieldSpecBaseList fields_in) noexcept Blueprint::State::~State() = default; -Blueprint::Blueprint() noexcept +Blueprint::Blueprint() : _parent(0), _sourceId(0xffffffff), _docid_limit(0), @@ -383,7 +383,7 @@ StateCache::notifyChange() { IntermediateBlueprint::~IntermediateBlueprint() = default; void -IntermediateBlueprint::setDocIdLimit(uint32_t limit) noexcept +IntermediateBlueprint::setDocIdLimit(uint32_t limit) { Blueprint::setDocIdLimit(limit); for (Blueprint::UP &child : _children) { @@ -576,7 +576,7 @@ IntermediateBlueprint::createSearch(fef::MatchData &md, bool strict) const return createIntermediateSearch(std::move(subSearches), strict, md); } -IntermediateBlueprint::IntermediateBlueprint() noexcept = default; +IntermediateBlueprint::IntermediateBlueprint() = default; IntermediateBlueprint & IntermediateBlueprint::addChild(Blueprint::UP child) @@ -737,6 +737,13 @@ LeafBlueprint::optimize(Blueprint* &self) } void +LeafBlueprint::setEstimate(HitEstimate est) +{ + _state.estimate(est); + notifyChange(); +} + +void LeafBlueprint::set_cost_tier(uint32_t value) { assert(value < 0x100); @@ -745,6 +752,13 @@ LeafBlueprint::set_cost_tier(uint32_t value) } void +LeafBlueprint::set_allow_termwise_eval(bool value) +{ + _state.allow_termwise_eval(value); + notifyChange(); +} + +void LeafBlueprint::set_want_global_filter(bool value) { _state.want_global_filter(value); diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 8d230b6ec01..dc7a0992d82 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -48,11 +48,11 @@ public: uint32_t estHits; bool empty; - HitEstimate() noexcept : estHits(0), empty(true) {} - HitEstimate(uint32_t estHits_, bool empty_) noexcept + HitEstimate() : estHits(0), empty(true) {} + HitEstimate(uint32_t estHits_, bool empty_) : estHits(estHits_), empty(empty_) {} - bool operator < (const HitEstimate &other) const noexcept { + bool operator < (const HitEstimate &other) const { if (empty == other.empty) { return (estHits < other.estHits); } else { @@ -77,21 +77,21 @@ public: static constexpr uint8_t COST_TIER_EXPENSIVE = 2; static constexpr uint8_t COST_TIER_MAX = 255; - State() noexcept; - State(FieldSpecBase field) noexcept; - State(FieldSpecBaseList fields_in) noexcept; + State(); + State(FieldSpecBase field); + State(FieldSpecBaseList fields_in); State(const State &rhs) = delete; State(State &&rhs) noexcept = default; State &operator=(const State &rhs) = delete; State &operator=(State &&rhs) noexcept = default; ~State(); - bool isTermLike() const noexcept { return !_fields.empty(); } - const FieldSpecBaseList &fields() const noexcept { return _fields; } + bool isTermLike() const { return !_fields.empty(); } + const FieldSpecBaseList &fields() const { return _fields; } - size_t numFields() const noexcept { return _fields.size(); } - const FieldSpecBase &field(size_t idx) const noexcept { return _fields[idx]; } - const FieldSpecBase *lookupField(uint32_t fieldId) const noexcept { + size_t numFields() const { return _fields.size(); } + const FieldSpecBase &field(size_t idx) const { return _fields[idx]; } + const FieldSpecBase *lookupField(uint32_t fieldId) const { for (const FieldSpecBase & field : _fields) { if (field.getFieldId() == fieldId) { return &field; @@ -100,27 +100,27 @@ public: return nullptr; } - void estimate(HitEstimate est) noexcept { + void estimate(HitEstimate est) { _estimateHits = est.estHits; _estimateEmpty = est.empty; } - HitEstimate estimate() const noexcept { return HitEstimate(_estimateHits, _estimateEmpty); } - double hit_ratio(uint32_t docid_limit) const noexcept { + HitEstimate estimate() const { return HitEstimate(_estimateHits, _estimateEmpty); } + double hit_ratio(uint32_t docid_limit) const { uint32_t total_hits = _estimateHits; uint32_t total_docs = std::max(total_hits, docid_limit); return (total_docs == 0) ? 0.0 : double(total_hits) / double(total_docs); } - void tree_size(uint32_t value) noexcept { + void tree_size(uint32_t value) { assert(value < 0x100000); _tree_size = value; } - uint32_t tree_size() const noexcept { return _tree_size; } - void allow_termwise_eval(bool value) noexcept { _allow_termwise_eval = value; } - bool allow_termwise_eval() const noexcept { return _allow_termwise_eval; } - void want_global_filter(bool value) noexcept { _want_global_filter = value; } - bool want_global_filter() const noexcept { return _want_global_filter; } - void cost_tier(uint8_t value) noexcept { _cost_tier = value; } - uint8_t cost_tier() const noexcept { return _cost_tier; } + uint32_t tree_size() const { return _tree_size; } + void allow_termwise_eval(bool value) { _allow_termwise_eval = value; } + bool allow_termwise_eval() const { return _allow_termwise_eval; } + void want_global_filter(bool value) { _want_global_filter = value; } + bool want_global_filter() const { return _want_global_filter; } + void cost_tier(uint8_t value) { _cost_tier = value; } + uint8_t cost_tier() const { return _cost_tier; } }; // utility that just takes maximum estimate @@ -137,7 +137,7 @@ public: // utility to get the greater estimate to sort first, higher tiers last struct TieredGreaterEstimate { - bool operator () (const auto &a, const auto &b) const noexcept { + bool operator () (const auto &a, const auto &b) const { const auto &lhs = a->getState(); const auto &rhs = b->getState(); if (lhs.cost_tier() != rhs.cost_tier()) { @@ -149,7 +149,7 @@ public: // utility to get the lesser estimate to sort first, higher tiers last struct TieredLessEstimate { - bool operator () (const auto &a, const auto &b) const noexcept { + bool operator () (const auto &a, const auto &b) const { const auto &lhs = a->getState(); const auto &rhs = b->getState(); if (lhs.cost_tier() != rhs.cost_tier()) { @@ -189,20 +189,20 @@ public: // hit that isn't certain to be a match). enum class FilterConstraint { UPPER_BOUND, LOWER_BOUND }; - Blueprint() noexcept; + Blueprint(); Blueprint(const Blueprint &) = delete; Blueprint &operator=(const Blueprint &) = delete; virtual ~Blueprint(); - void setParent(Blueprint *parent) noexcept { _parent = parent; } - Blueprint *getParent() const noexcept { return _parent; } + void setParent(Blueprint *parent) { _parent = parent; } + Blueprint *getParent() const { return _parent; } bool has_parent() const { return (_parent != nullptr); } - Blueprint &setSourceId(uint32_t sourceId) noexcept { _sourceId = sourceId; return *this; } - uint32_t getSourceId() const noexcept { return _sourceId; } + Blueprint &setSourceId(uint32_t sourceId) { _sourceId = sourceId; return *this; } + uint32_t getSourceId() const { return _sourceId; } - virtual void setDocIdLimit(uint32_t limit) noexcept { _docid_limit = limit; } - uint32_t get_docid_limit() const noexcept { return _docid_limit; } + virtual void setDocIdLimit(uint32_t limit) { _docid_limit = limit; } + uint32_t get_docid_limit() const { return _docid_limit; } static Blueprint::UP optimize(Blueprint::UP bp); virtual void optimize(Blueprint* &self) = 0; @@ -227,7 +227,7 @@ public: virtual const State &getState() const = 0; const Blueprint &root() const; - double hit_ratio() const noexcept { return getState().hit_ratio(_docid_limit); } + double hit_ratio() const { return getState().hit_ratio(_docid_limit); } virtual void fetchPostings(const ExecuteInfo &execInfo) = 0; virtual void freeze() = 0; @@ -319,10 +319,10 @@ protected: public: using IndexList = std::vector<size_t>; - IntermediateBlueprint() noexcept; + IntermediateBlueprint(); ~IntermediateBlueprint() override; - void setDocIdLimit(uint32_t limit) noexcept final; + void setDocIdLimit(uint32_t limit) final; void optimize(Blueprint* &self) final; void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override; @@ -360,30 +360,24 @@ private: State _state; protected: void optimize(Blueprint* &self) final; - void setEstimate(HitEstimate est) { - _state.estimate(est); - notifyChange(); - } + void setEstimate(HitEstimate est); void set_cost_tier(uint32_t value); - void set_allow_termwise_eval(bool value) { - _state.allow_termwise_eval(value); - notifyChange(); - } + void set_allow_termwise_eval(bool value); void set_want_global_filter(bool value); void set_tree_size(uint32_t value); - LeafBlueprint(bool allow_termwise_eval) noexcept + LeafBlueprint(bool allow_termwise_eval) : _state() { _state.allow_termwise_eval(allow_termwise_eval); } - LeafBlueprint(FieldSpecBase field, bool allow_termwise_eval) noexcept + LeafBlueprint(FieldSpecBase field, bool allow_termwise_eval) : _state(field) { _state.allow_termwise_eval(allow_termwise_eval); } - LeafBlueprint(FieldSpecBaseList fields, bool allow_termwise_eval) noexcept + LeafBlueprint(FieldSpecBaseList fields, bool allow_termwise_eval) : _state(std::move(fields)) { _state.allow_termwise_eval(allow_termwise_eval); @@ -392,7 +386,7 @@ protected: public: ~LeafBlueprint() override = default; const State &getState() const final { return _state; } - void setDocIdLimit(uint32_t limit) noexcept final { Blueprint::setDocIdLimit(limit); } + void setDocIdLimit(uint32_t limit) final { Blueprint::setDocIdLimit(limit); } void fetchPostings(const ExecuteInfo &execInfo) override; void freeze() final; SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override; @@ -403,15 +397,15 @@ public: // for leaf nodes representing a single term struct SimpleLeafBlueprint : LeafBlueprint { - explicit SimpleLeafBlueprint() noexcept : LeafBlueprint(true) {} - explicit SimpleLeafBlueprint(FieldSpecBase field) noexcept : LeafBlueprint(field, true) {} - explicit SimpleLeafBlueprint(FieldSpecBaseList fields) noexcept: LeafBlueprint(std::move(fields), true) {} + explicit SimpleLeafBlueprint() : LeafBlueprint(true) {} + explicit SimpleLeafBlueprint(FieldSpecBase field) : LeafBlueprint(field, true) {} + explicit SimpleLeafBlueprint(FieldSpecBaseList fields) : LeafBlueprint(std::move(fields), true) {} }; // for leaf nodes representing more complex structures like wand/phrase struct ComplexLeafBlueprint : LeafBlueprint { - explicit ComplexLeafBlueprint(FieldSpecBase field) noexcept : LeafBlueprint(field, false) {} - explicit ComplexLeafBlueprint(FieldSpecBaseList fields) noexcept : LeafBlueprint(std::move(fields), false) {} + explicit ComplexLeafBlueprint(FieldSpecBase field) : LeafBlueprint(field, false) {} + explicit ComplexLeafBlueprint(FieldSpecBaseList fields) : LeafBlueprint(std::move(fields), false) {} }; //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp index cd1ddd5b92e..121591723e2 100644 --- a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp @@ -5,9 +5,9 @@ namespace search::queryeval { -FieldSpecBase::FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) noexcept - : _fieldId(fieldId | (isFilter_ ? 0x1000000u : 0)), - _handle(handle) +FieldSpecBase::FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) : + _fieldId(fieldId | (isFilter_ ? 0x1000000u : 0)), + _handle(handle) { assert(fieldId < 0x1000000); // Can be represented by 24 bits } diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.h b/searchlib/src/vespa/searchlib/queryeval/field_spec.h index c4cd1ac2de8..fd925fdf4ff 100644 --- a/searchlib/src/vespa/searchlib/queryeval/field_spec.h +++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.h @@ -19,15 +19,15 @@ namespace search::queryeval { class FieldSpecBase { public: - FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_ = false) noexcept; + FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_ = false); // resolve where to put match information for this term/field combination fef::TermFieldMatchData *resolve(fef::MatchData &md) const; const fef::TermFieldMatchData *resolve(const fef::MatchData &md) const; - uint32_t getFieldId() const noexcept { return _fieldId & 0xffffff; } - fef::TermFieldHandle getHandle() const noexcept { return _handle; } + uint32_t getFieldId() const { return _fieldId & 0xffffff; } + fef::TermFieldHandle getHandle() const { return _handle; } /// a filter produces less detailed match data - bool isFilter() const noexcept { return _fieldId & 0x1000000; } + bool isFilter() const { return _fieldId & 0x1000000; } private: uint32_t _fieldId; // field id in ranking framework fef::TermFieldHandle _handle; // handle used when exposing match data to ranking framework @@ -40,13 +40,13 @@ class FieldSpec : public FieldSpecBase { public: FieldSpec(const vespalib::string & name, uint32_t fieldId, - fef::TermFieldHandle handle, bool isFilter_ = false) noexcept + fef::TermFieldHandle handle, bool isFilter_ = false) : FieldSpecBase(fieldId, handle, isFilter_), _name(name) {} ~FieldSpec(); - const vespalib::string & getName() const noexcept { return _name; } + const vespalib::string & getName() const { return _name; } private: vespalib::string _name; // field name }; @@ -61,7 +61,7 @@ private: List _list; public: - FieldSpecBaseList() noexcept = default; + FieldSpecBaseList() = default; FieldSpecBaseList(FieldSpecBaseList &&) noexcept = default; FieldSpecBaseList & operator=(FieldSpecBaseList &&) noexcept = default; FieldSpecBaseList(const FieldSpecBaseList &) = default; @@ -69,15 +69,15 @@ public: ~FieldSpecBaseList(); void reserve(size_t sz) { _list.reserve(sz); } using const_iterator = const FieldSpecBase *; - FieldSpecBaseList &add(const FieldSpecBase &spec) noexcept { + FieldSpecBaseList &add(const FieldSpecBase &spec) { _list.push_back(spec); return *this; } - bool empty() const noexcept { return _list.empty(); } - size_t size() const noexcept { return _list.size(); } - const_iterator begin() const noexcept { return _list.begin(); } - const_iterator end() const noexcept { return _list.end(); } - const FieldSpecBase &operator[](size_t i) const noexcept { return _list[i]; } + bool empty() const { return _list.empty(); } + size_t size() const { return _list.size(); } + const_iterator begin() const { return _list.begin(); } + const_iterator end() const { return _list.end(); } + const FieldSpecBase &operator[](size_t i) const { return _list[i]; } }; /** @@ -89,7 +89,7 @@ private: vespalib::SmallVector<FieldSpec, 1> _list; public: - FieldSpecList() noexcept = default; + FieldSpecList() = default; FieldSpecList(FieldSpecList &&) noexcept = delete; FieldSpecList & operator=(FieldSpecList &&) noexcept = delete; FieldSpecList(const FieldSpecList &) noexcept = delete; @@ -99,9 +99,9 @@ public: _list.push_back(spec); return *this; } - bool empty() const noexcept { return _list.empty(); } - size_t size() const noexcept { return _list.size(); } - const FieldSpec &operator[](size_t i) const noexcept { return _list[i]; } + bool empty() const { return _list.empty(); } + size_t size() const { return _list.size(); } + const FieldSpec &operator[](size_t i) const { return _list[i]; } void clear() { _list.clear(); } }; diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp index 0cb72202811..aaf361b65ca 100644 --- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp +++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp @@ -6,6 +6,7 @@ #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/fastlib/io/bufferedfile.h> #include <cassert> +#include <filesystem> #include <vespa/log/log.h> LOG_SETUP(".transactionlog.domainpart"); @@ -371,7 +372,7 @@ DomainPart::erase(SerialNum to) bool retval(true); if (to > get_range_to()) { close(); - _transLog->Delete(); + std::filesystem::remove(std::filesystem::path(_fileName)); } else { auto range_from = get_range_from(); if (to > range_from) { diff --git a/searchlib/src/vespa/searchlib/util/filekit.cpp b/searchlib/src/vespa/searchlib/util/filekit.cpp index 07eab9bb2be..4012ef00dae 100644 --- a/searchlib/src/vespa/searchlib/util/filekit.cpp +++ b/searchlib/src/vespa/searchlib/util/filekit.cpp @@ -9,87 +9,6 @@ LOG_SETUP(".filekit"); namespace search { -using vespalib::getLastErrorString; - -bool -FileKit::createStamp(const vespalib::string &name) -{ - FastOS_File stamp; - FastOS_StatInfo statInfo; - bool statres; - - statres = FastOS_File::Stat(name.c_str(), &statInfo); - - if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) { - LOG(error, "FATAL: Could not check stamp file %s: %s", - name.c_str(), getLastErrorString().c_str()); - return false; - } - if (statres && statInfo._size > 0) { - LOG(error, "FATAL: Stamp file not empty: %s", name.c_str()); - return false; - } - - if (!stamp.OpenWriteOnlyTruncate(name.c_str())) { - LOG(error, "FATAL: Could not create stamp file %s: %s", - name.c_str(), getLastErrorString().c_str()); - return false; - } - return true; -} - - -bool -FileKit::hasStamp(const vespalib::string &name) -{ - FastOS_StatInfo statInfo; - bool statres; - - statres = FastOS_File::Stat(name.c_str(), &statInfo); - - if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) { - LOG(error, "FATAL: Could not check stamp file %s: %s", - name.c_str(), getLastErrorString().c_str()); - return false; - } - return statres; -} - - -bool -FileKit::removeStamp(const vespalib::string &name) -{ - FastOS_StatInfo statInfo; - bool deleteres; - bool statres; - - statres = FastOS_File::Stat(name.c_str(), &statInfo); - - if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) { - LOG(error, "FATAL: Could not check stamp file %s: %s", - name.c_str(), getLastErrorString().c_str()); - return false; - } - if (statres && statInfo._size > 0) { - LOG(error, "FATAL: Stamp file not empty: %s", name.c_str()); - return false; - } - - do { - deleteres = FastOS_File::Delete(name.c_str()); - //FIX! errno - } while (!deleteres && errno == EINTR); - - if (!deleteres && - FastOS_File::GetLastError() != FastOS_File::ERR_ENOENT) { - LOG(error, "FATAL: Could not remove stamp file %s: %s", - name.c_str(), getLastErrorString().c_str()); - return false; - } - return true; -} - - vespalib::system_time FileKit::getModificationTime(const vespalib::string &name) { diff --git a/searchlib/src/vespa/searchlib/util/filekit.h b/searchlib/src/vespa/searchlib/util/filekit.h index 8c994ff5866..dbd6d2e5a2e 100644 --- a/searchlib/src/vespa/searchlib/util/filekit.h +++ b/searchlib/src/vespa/searchlib/util/filekit.h @@ -10,10 +10,6 @@ namespace search { class FileKit { public: - static bool createStamp(const vespalib::string &name); - static bool hasStamp(const vespalib::string &name); - static bool removeStamp(const vespalib::string &name); - /** * Returns the modification time of the given file/directory, * or time stamp 0 if stating of file/directory fails. diff --git a/vespa-feed-client/pom.xml b/vespa-feed-client/pom.xml index 19130b52268..b7787d68881 100644 --- a/vespa-feed-client/pom.xml +++ b/vespa-feed-client/pom.xml @@ -25,11 +25,6 @@ <scope>compile</scope> </dependency> <dependency> - <groupId>org.apache.httpcomponents.client5</groupId> - <artifactId>httpclient5</artifactId> - <scope>compile</scope> - </dependency> - <dependency> <groupId>org.eclipse.jetty.http2</groupId> <artifactId>http2-http-client-transport</artifactId> <scope>compile</scope> @@ -51,11 +46,6 @@ <artifactId>junit-jupiter</artifactId> <scope>test</scope> </dependency> - <dependency> - <groupId>com.github.tomakehurst</groupId> - <artifactId>wiremock-jre8-standalone</artifactId> - <scope>test</scope> - </dependency> </dependencies> <build> @@ -80,21 +70,6 @@ <showDeprecation>true</showDeprecation> </configuration> </execution> - <execution> - <id>compile-java-9</id> - <phase>compile</phase> - <goals> - <goal>compile</goal> - </goals> - <configuration> - <release>9</release> - <compileSourceRoots> - <compileSourceRoot>${project.basedir}/src/main/java9</compileSourceRoot> - </compileSourceRoots> - <outputDirectory>${project.build.outputDirectory}/META-INF/versions/9</outputDirectory> - <showDeprecation>true</showDeprecation> - </configuration> - </execution> </executions> </plugin> <plugin> diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java deleted file mode 100644 index 96c65a6b165..00000000000 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java +++ /dev/null @@ -1,243 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package ai.vespa.feed.client.impl; - -import ai.vespa.feed.client.FeedClientBuilder.Compression; -import ai.vespa.feed.client.HttpResponse; -import org.apache.hc.client5.http.async.methods.SimpleHttpRequest; -import org.apache.hc.client5.http.async.methods.SimpleHttpResponse; -import org.apache.hc.client5.http.config.ConnectionConfig; -import org.apache.hc.client5.http.config.RequestConfig; -import org.apache.hc.client5.http.impl.async.CloseableHttpAsyncClient; -import org.apache.hc.client5.http.impl.async.HttpAsyncClients; -import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder; -import org.apache.hc.core5.concurrent.FutureCallback; -import org.apache.hc.core5.http.ContentType; -import org.apache.hc.core5.http.Header; -import org.apache.hc.core5.http.HttpHeaders; -import org.apache.hc.core5.http.message.BasicHeader; -import org.apache.hc.core5.http2.config.H2Config; -import org.apache.hc.core5.net.URIAuthority; -import org.apache.hc.core5.reactor.IOReactorConfig; -import org.apache.hc.core5.util.Timeout; - -import javax.net.ssl.SSLContext; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.zip.GZIPOutputStream; - -import static ai.vespa.feed.client.FeedClientBuilder.Compression.auto; -import static ai.vespa.feed.client.FeedClientBuilder.Compression.gzip; -import static org.apache.hc.core5.http.ssl.TlsCiphers.excludeH2Blacklisted; -import static org.apache.hc.core5.http.ssl.TlsCiphers.excludeWeak; - -/** - * @author jonmv - */ -class ApacheCluster implements Cluster { - - private final List<Endpoint> endpoints = new ArrayList<>(); - private final List<BasicHeader> defaultHeaders = Arrays.asList(new BasicHeader(HttpHeaders.USER_AGENT, String.format("vespa-feed-client/%s (Apache)", Vespa.VERSION)), - new BasicHeader("Vespa-Client-Version", Vespa.VERSION)); - private final Header gzipEncodingHeader = new BasicHeader(HttpHeaders.CONTENT_ENCODING, "gzip"); - private final Compression compression; - private int someNumber = 0; - - private final ExecutorService dispatchExecutor = Executors.newFixedThreadPool(8, t -> new Thread(t, "request-dispatch-thread")); - private final ScheduledExecutorService timeoutExecutor = Executors.newSingleThreadScheduledExecutor(t -> new Thread(t, "request-timeout-thread")); - - ApacheCluster(FeedClientBuilderImpl builder) throws IOException { - for (int i = 0; i < builder.connectionsPerEndpoint; i++) - for (URI endpoint : builder.endpoints) - endpoints.add(new Endpoint(createHttpClient(builder), endpoint)); - this.compression = builder.compression; - } - - @Override - public void dispatch(HttpRequest wrapped, CompletableFuture<HttpResponse> vessel) { - Endpoint leastBusy = endpoints.get(0); - int min = Integer.MAX_VALUE; - int start = ++someNumber % endpoints.size(); - for (int i = 0; i < endpoints.size(); i++) { - Endpoint endpoint = endpoints.get((i + start) % endpoints.size()); - int inflight = endpoint.inflight.get(); - if (inflight < min) { - leastBusy = endpoint; - min = inflight; - } - } - Endpoint endpoint = leastBusy; - endpoint.inflight.incrementAndGet(); - - dispatchExecutor.execute(() -> { - try { - SimpleHttpRequest request = new SimpleHttpRequest(wrapped.method(), wrapped.path()); - request.setScheme(endpoint.url.getScheme()); - request.setAuthority(new URIAuthority(endpoint.url.getHost(), portOf(endpoint.url))); - request.setConfig(RequestConfig.custom().setConnectionRequestTimeout(Timeout.DISABLED).build()); - defaultHeaders.forEach(request::setHeader); - wrapped.headers().forEach((name, value) -> request.setHeader(name, value.get())); - if (wrapped.body() != null) { - byte[] body = wrapped.body(); - if (compression == gzip || compression == auto && body.length > 512) { - request.setHeader(gzipEncodingHeader); - body = gzipped(body); - } - request.setBody(body, ContentType.APPLICATION_JSON); - } - - Future<?> future = endpoint.client.execute(request, - new FutureCallback<SimpleHttpResponse>() { - @Override public void completed(SimpleHttpResponse response) { vessel.complete(new ApacheHttpResponse(response)); } - @Override public void failed(Exception ex) { vessel.completeExceptionally(ex); } - @Override public void cancelled() { vessel.cancel(false); } - }); - // Manually schedule response timeout as the Apache HTTP/2 multiplexing client does not support response timeouts - long timeoutMillis = wrapped.timeout() == null ? 190_000 : wrapped.timeout().toMillis(); - Future<?> cancellation = timeoutExecutor.schedule( - () -> { - vessel.completeExceptionally( - new TimeoutException(String.format("Request timed out after %dms", timeoutMillis))); - future.cancel(true); - }, - timeoutMillis * 11 / 10 + 1_000, TimeUnit.MILLISECONDS); - vessel.whenComplete((__, ___) -> cancellation.cancel(true)); - } - catch (Throwable thrown) { - vessel.completeExceptionally(thrown); - } - vessel.whenComplete((__, ___) -> endpoint.inflight.decrementAndGet()); - }); - } - - private byte[] gzipped(byte[] content) throws IOException{ - ByteArrayOutputStream buffer = new ByteArrayOutputStream(1 << 10); - try (GZIPOutputStream zip = new GZIPOutputStream(buffer)) { - zip.write(content); - } - return buffer.toByteArray(); - } - - @Override - public void close() { - Throwable thrown = null; - dispatchExecutor.shutdownNow().forEach(Runnable::run); - for (Endpoint endpoint : endpoints) { - try { - endpoint.client.close(); - } - catch (Throwable t) { - if (thrown == null) thrown = t; - else thrown.addSuppressed(t); - } - } - timeoutExecutor.shutdownNow().forEach(Runnable::run); - if (thrown != null) throw new RuntimeException(thrown); - } - - - private static class Endpoint { - - private final CloseableHttpAsyncClient client; - private final AtomicInteger inflight = new AtomicInteger(0); - private final URI url; - - private Endpoint(CloseableHttpAsyncClient client, URI url) { - this.client = client; - this.url = url; - - this.client.start(); - } - - } - - private static CloseableHttpAsyncClient createHttpClient(FeedClientBuilderImpl builder) throws IOException { - SSLContext sslContext = builder.constructSslContext(); - String[] allowedCiphers = excludeH2Blacklisted(excludeWeak(sslContext.getSupportedSSLParameters().getCipherSuites())); - if (allowedCiphers.length == 0) - throw new IllegalStateException("No adequate SSL cipher suites supported by the JVM"); - - ClientTlsStrategyBuilder tlsStrategyBuilder = ClientTlsStrategyBuilder.create() - .setCiphers(allowedCiphers) - .setSslContext(sslContext); - if (builder.hostnameVerifier != null) - tlsStrategyBuilder.setHostnameVerifier(builder.hostnameVerifier); - - // Socket timeout must be longer than the longest feasible response timeout - Timeout socketTimeout = Timeout.ofMinutes(15); - - ConnectionConfig connCfg = ConnectionConfig.custom() - .setSocketTimeout(socketTimeout) - .setConnectTimeout(Timeout.ofSeconds(10)) - .build(); - - return HttpAsyncClients.customHttp2() - .setH2Config( - H2Config.custom() - .setMaxConcurrentStreams(builder.maxStreamsPerConnection) - .setCompressionEnabled(true) - .setPushEnabled(false) - .setInitialWindowSize(Integer.MAX_VALUE) - .build()) - .setIOReactorConfig( - IOReactorConfig.custom() - .setIoThreadCount(Math.max(Math.min(Runtime.getRuntime().availableProcessors(), 8), 2)) - .setTcpNoDelay(true) - .setSoTimeout(socketTimeout) - .build()) - .setTlsStrategy(tlsStrategyBuilder.build()) - .setDefaultConnectionConfig(connCfg) - .disableAutomaticRetries() - .disableRedirectHandling() - .disableCookieManagement() - .build(); - } - - private static int portOf(URI url) { - return url.getPort() == -1 ? url.getScheme().equals("http") ? 80 : 443 - : url.getPort(); - } - - private static class ApacheHttpResponse implements HttpResponse { - - private final SimpleHttpResponse wrapped; - - private ApacheHttpResponse(SimpleHttpResponse wrapped) { - this.wrapped = wrapped; - } - - @Override - public int code() { - return wrapped.getCode(); - } - - @Override - public byte[] body() { - return wrapped.getBodyBytes(); - } - - @Override - public String contentType() { - return wrapped.getContentType().getMimeType(); - } - - @Override - public String toString() { - return "HTTP response with code " + code() + - (body() != null ? " and body '" + wrapped.getBodyText() + "'" : ""); - } - - } - -} diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java index f228717eba5..40c5fda8ce3 100644 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java +++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java @@ -55,8 +55,7 @@ class HttpFeedClient implements FeedClient { private final boolean speedTest; HttpFeedClient(FeedClientBuilderImpl builder) throws IOException { - this(builder, builder.dryrun ? - new DryrunCluster() : experimentalClientEnabled() ? new JettyCluster(builder) : new ApacheCluster(builder)); + this(builder, builder.dryrun ? new DryrunCluster() : new JettyCluster(builder)); } HttpFeedClient(FeedClientBuilderImpl builder, Cluster cluster) { @@ -315,13 +314,4 @@ class HttpFeedClient implements FeedClient { return query.toString(); } - private static boolean experimentalClientEnabled() { - String name = "VESPA_FEED_EXPERIMENTAL_CLIENT"; - return Optional.ofNullable(System.getenv(name)) - .map(Boolean::parseBoolean) - .orElse(Optional.ofNullable(System.getProperty(name)) - .map(Boolean::parseBoolean) - .orElse(true)); - } - } diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java index 1a125ebfbb5..cd7a4e6222e 100644 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java +++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java @@ -147,10 +147,11 @@ class JettyCluster implements Cluster { h2Client.setInitialStreamRecvWindow(initialWindow); ClientConnectionFactory.Info http2 = new ClientConnectionFactoryOverHTTP2.HTTP2(h2Client); HttpClientTransportDynamic transport = new HttpClientTransportDynamic(connector, http2); + int connectionsPerEndpoint = b.connectionsPerEndpoint; transport.setConnectionPoolFactory(dest -> { MultiplexConnectionPool pool = new MultiplexConnectionPool( - dest, Pool.StrategyType.RANDOM, b.connectionsPerEndpoint, false, dest, Integer.MAX_VALUE); - pool.preCreateConnections(b.connectionsPerEndpoint); + dest, Pool.StrategyType.RANDOM, connectionsPerEndpoint, false, dest, Integer.MAX_VALUE); + pool.preCreateConnections(connectionsPerEndpoint); return pool; }); HttpClient httpClient = new HttpClient(transport); @@ -186,9 +187,10 @@ class JettyCluster implements Cluster { new HttpProxy(address, false, new Origin.Protocol(Collections.singletonList("h2c"), false))); } Map<String, Supplier<String>> proxyHeadersCopy = new TreeMap<>(b.proxyRequestHeaders); + URI proxyUri = URI.create(endpointUri(b.proxy)); if (!proxyHeadersCopy.isEmpty()) { httpClient.getAuthenticationStore().addAuthenticationResult(new Authentication.Result() { - @Override public URI getURI() { return URI.create(endpointUri(b.proxy)); } + @Override public URI getURI() { return proxyUri; } @Override public void apply(Request r) { r.headers(hs -> proxyHeadersCopy.forEach((k, v) -> hs.add(k, v.get()))); } diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/TlsDetailsFactory.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/TlsDetailsFactory.java deleted file mode 100644 index 5183ce61761..00000000000 --- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/TlsDetailsFactory.java +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package ai.vespa.feed.client.impl; - -import org.apache.hc.core5.reactor.ssl.TlsDetails; - -import javax.net.ssl.SSLEngine; - -/** - * @author bjorncs - */ -public class TlsDetailsFactory { - private TlsDetailsFactory() {} - - public static TlsDetails create(SSLEngine e) { return new TlsDetails(e.getSession(), "h2"); /*h2 == HTTP2*/ } -} - diff --git a/vespa-feed-client/src/main/java9/ai/vespa/feed/client/impl/TlsDetailsFactory.java b/vespa-feed-client/src/main/java9/ai/vespa/feed/client/impl/TlsDetailsFactory.java deleted file mode 100644 index f9903d9943d..00000000000 --- a/vespa-feed-client/src/main/java9/ai/vespa/feed/client/impl/TlsDetailsFactory.java +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package ai.vespa.feed.client.impl; - -import org.apache.hc.core5.reactor.ssl.TlsDetails; - -import javax.net.ssl.SSLEngine; - -/** - * {@link SSLEngine#getApplicationProtocol()} is not available on all JDK8 versions - * (https://bugs.openjdk.org/browse/JDK-8051498) - * - * @author bjorncs - */ -public class TlsDetailsFactory { - private TlsDetailsFactory() {} - - public static TlsDetails create(SSLEngine e) { - return new TlsDetails(e.getSession(), e.getApplicationProtocol()); - } -} diff --git a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/ApacheClusterTest.java b/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/ApacheClusterTest.java deleted file mode 100644 index cf9a36f2aa8..00000000000 --- a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/ApacheClusterTest.java +++ /dev/null @@ -1,77 +0,0 @@ -package ai.vespa.feed.client.impl; - -import ai.vespa.feed.client.FeedClientBuilder.Compression; -import ai.vespa.feed.client.HttpResponse; -import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.net.URI; -import java.time.Duration; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.zip.GZIPOutputStream; - -import static com.github.tomakehurst.wiremock.client.WireMock.any; -import static com.github.tomakehurst.wiremock.client.WireMock.anyRequestedFor; -import static com.github.tomakehurst.wiremock.client.WireMock.anyUrl; -import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; -import static com.github.tomakehurst.wiremock.client.WireMock.okJson; -import static com.github.tomakehurst.wiremock.client.WireMock.postRequestedFor; -import static com.github.tomakehurst.wiremock.client.WireMock.urlEqualTo; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.jupiter.api.Assertions.assertEquals; - -class ApacheClusterTest { - - @RegisterExtension - final WireMockExtension server = new WireMockExtension(); - - @Test - void testClient() throws Exception { - for (Compression compression : Compression.values()) { - try (ApacheCluster cluster = new ApacheCluster(new FeedClientBuilderImpl(List.of(URI.create("http://localhost:" + server.port()))) - .setCompression(compression))) { - server.stubFor(any(anyUrl())) - .setResponse(okJson("{}").build()); - - CompletableFuture<HttpResponse> vessel = new CompletableFuture<>(); - cluster.dispatch(new HttpRequest("POST", - "/path", - Map.of("name1", () -> "value1", - "name2", () -> "value2"), - "content".getBytes(UTF_8), - Duration.ofSeconds(10)), - vessel); - - AutoCloseable verifyResponse = () -> { - HttpResponse response = vessel.get(15, TimeUnit.SECONDS); - assertEquals("{}", new String(response.body(), UTF_8)); - assertEquals(200, response.code()); - }; - AutoCloseable verifyServer = () -> { - server.verify(1, anyRequestedFor(anyUrl())); - RequestPatternBuilder expected = postRequestedFor(urlEqualTo("/path")).withHeader("name1", equalTo("value1")) - .withHeader("name2", equalTo("value2")) - .withHeader("Content-Type", equalTo("application/json; charset=UTF-8")) - .withRequestBody(equalTo("content")); - expected = switch (compression) { - case auto, none -> expected.withoutHeader("Content-Encoding"); - case gzip -> expected.withHeader("Content-Encoding", equalTo("gzip")); - }; - server.verify(1, expected); - server.resetRequests(); - }; - try (verifyServer; verifyResponse) { } - } - } - } - -} diff --git a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/WireMockExtension.java b/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/WireMockExtension.java deleted file mode 100644 index ef61213889b..00000000000 --- a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/WireMockExtension.java +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package ai.vespa.feed.client.impl; - -import com.github.tomakehurst.wiremock.WireMockServer; -import com.github.tomakehurst.wiremock.core.Options; -import com.github.tomakehurst.wiremock.core.WireMockConfiguration; -import org.junit.jupiter.api.extension.AfterEachCallback; -import org.junit.jupiter.api.extension.BeforeEachCallback; -import org.junit.jupiter.api.extension.ExtensionContext; - -/** - * Allows wiremock to be used as a JUnit 5 extension, like - * <pre> - * - * @RegisterExtension - * WireMockExtension mockServer1 = new WireMockExtension(); - * </pre> - */ -public class WireMockExtension extends WireMockServer implements BeforeEachCallback, AfterEachCallback { - - public WireMockExtension() { - this(WireMockConfiguration.options() - .dynamicPort() - .dynamicHttpsPort()); - } - - public WireMockExtension(Options options) { - super(options); - } - - @Override - public void beforeEach(ExtensionContext extensionContext) { - start(); - } - - @Override - public void afterEach(ExtensionContext extensionContext) { - stop(); - resetAll(); - } - -} diff --git a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java index 1641bd7802f..a2ac86309d9 100644 --- a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java +++ b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java @@ -336,15 +336,15 @@ public class TestReport { StackTraceElement[] stack = thrown.getStackTrace(); int i = 0; - int previousNativeFrame = -1; + int firstReflectFrame = -1; int cutoff = 0; boolean rootedInTestFramework = false; while (++i < stack.length) { rootedInTestFramework |= testFrameworkRootClass.equals(stack[i].getClassName()); - if (stack[i].isNativeMethod()) - previousNativeFrame = i; // Native method invokes the first user test frame. - if (rootedInTestFramework && previousNativeFrame > 0) { - cutoff = previousNativeFrame; + if (firstReflectFrame == -1 && stack[i].getClassName().startsWith("jdk.internal.reflect.")) + firstReflectFrame = i; // jdk.internal.reflect class invokes the first user test frame, on both jdk 17 and 21. + if (rootedInTestFramework && firstReflectFrame > 0) { + cutoff = firstReflectFrame; break; } boolean isDynamicTestInvocation = "org.junit.jupiter.engine.descriptor.DynamicTestTestDescriptor".equals(stack[i].getClassName()); diff --git a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java index 9bb0e7ff955..cf2a1700f28 100644 --- a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java +++ b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java @@ -122,6 +122,7 @@ public class VespaCliTestRunner implements TestRunner { // The CI environment variables tells Vespa CLI to omit certain warnings that do not apply to CI environments builder.environment().put("CI", "true"); builder.environment().put("VESPA_CLI_CLOUD_CI", "true"); + builder.environment().put("VESPA_CLI_CLOUD_SYSTEM", config.system().value()); builder.environment().put("VESPA_CLI_HOME", ensureDirectoryForVespaCli("cli-home").toString()); builder.environment().put("VESPA_CLI_CACHE_DIR", ensureDirectoryForVespaCli("cli-cache").toString()); builder.environment().put("VESPA_CLI_ENDPOINTS", toEndpointsConfig(config)); diff --git a/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java b/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java index fcd0553566f..288442eaf7d 100644 --- a/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java +++ b/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java @@ -42,6 +42,7 @@ class VespaCliTestRunnerTest { assertTrue(builder.environment().containsKey("VESPA_CLI_CLOUD_CI")); assertTrue(builder.environment().containsKey("VESPA_CLI_HOME")); assertTrue(builder.environment().containsKey("VESPA_CLI_CACHE_DIR")); + assertEquals("publiccd", builder.environment().get("VESPA_CLI_CLOUD_SYSTEM")); assertEquals("{\"endpoints\":[{\"cluster\":\"default\",\"url\":\"https://dev.endpoint:443/\"}]}", builder.environment().get("VESPA_CLI_ENDPOINTS")); assertEquals(artifacts.resolve("key").toAbsolutePath().toString(), @@ -72,6 +73,7 @@ class VespaCliTestRunnerTest { assertTrue(builder.environment().containsKey("VESPA_CLI_CLOUD_CI")); assertTrue(builder.environment().containsKey("VESPA_CLI_HOME")); assertTrue(builder.environment().containsKey("VESPA_CLI_CACHE_DIR")); + assertEquals("cd", builder.environment().get("VESPA_CLI_CLOUD_SYSTEM")); assertEquals("{\"endpoints\":[{\"cluster\":\"default\",\"url\":\"https://dev.endpoint:443/\"}]}", builder.environment().get("VESPA_CLI_ENDPOINTS")); assertEquals("/opt/vespa/var/vespa/sia/key", diff --git a/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp b/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp index 8aa9b943419..edf38122202 100644 --- a/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp +++ b/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp @@ -1,17 +1,26 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/fastlib/io/bufferedfile.h> #include <vespa/vespalib/testkit/test_kit.h> +#include <filesystem> +namespace { + +void remove_testfiles() +{ + std::filesystem::remove(std::filesystem::path("testfile1")); + std::filesystem::remove(std::filesystem::path("testfile2")); + std::filesystem::remove(std::filesystem::path("testfile3")); + std::filesystem::remove(std::filesystem::path("testfile4")); + std::filesystem::remove(std::filesystem::path("testfile5")); +} + +} TEST("main") { int value = 0; FastOS_StatInfo statInfo; - FastOS_File::Delete("testfile1"); - FastOS_File::Delete("testfile2"); - FastOS_File::Delete("testfile3"); - FastOS_File::Delete("testfile4"); - FastOS_File::Delete("testfile5"); + remove_testfiles(); Fast_BufferedFile bufFile(4096); @@ -84,11 +93,7 @@ TEST("main") { } printf (" -- SUCCESS\n\n"); - FastOS_File::Delete("testfile1"); - FastOS_File::Delete("testfile2"); - FastOS_File::Delete("testfile3"); - FastOS_File::Delete("testfile4"); - FastOS_File::Delete("testfile5"); + remove_testfiles(); printf ("All tests OK for bufferedfiletest\n"); printf (" -- SUCCESS\n\n"); diff --git a/vespalib/src/tests/fastos/file_test.cpp b/vespalib/src/tests/fastos/file_test.cpp index 6b58a4a1fd8..ecb35df5d26 100644 --- a/vespalib/src/tests/fastos/file_test.cpp +++ b/vespalib/src/tests/fastos/file_test.cpp @@ -165,7 +165,7 @@ TEST(FileTest, WriteOnlyTest) { ASSERT_EQ(myFile->GetPosition(), 0); EXPECT_LT(myFile->Read(dummyData, 6), 0); EXPECT_TRUE(myFile->Close()); - EXPECT_TRUE(myFile->Delete()); + EXPECT_TRUE(std::filesystem::remove(std::filesystem::path(woFilename))); } TEST(FileTest, ReadWriteTest) { @@ -188,7 +188,7 @@ TEST(FileTest, ReadWriteTest) { EXPECT_EQ(myFile->Read(dummyData2, 6), 0); EXPECT_EQ(myFile->GetPosition(), 6); EXPECT_TRUE(myFile->Close()); - EXPECT_TRUE(myFile->Delete()); + EXPECT_TRUE(std::filesystem::remove(std::filesystem::path(rwFilename))); } TEST(FileTest, ScanDirectoryTest) { diff --git a/vespalib/src/tests/fileheader/fileheader_test.cpp b/vespalib/src/tests/fileheader/fileheader_test.cpp index 21e374e4f62..911c6ef7cfe 100644 --- a/vespalib/src/tests/fileheader/fileheader_test.cpp +++ b/vespalib/src/tests/fileheader/fileheader_test.cpp @@ -4,9 +4,16 @@ #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/fastos/file.h> +#include <filesystem> using namespace vespalib; +namespace { + +vespalib::string fileheader_tmp("fileheader.tmp"); + +} + class Test : public vespalib::TestApp { private: void testTag(); @@ -337,7 +344,7 @@ Test::testFileReader() { { FastOS_File file; - ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp")); + ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str())); uint8_t buf[256]; for (uint32_t i = 0; i < 256; ++i) { @@ -347,7 +354,7 @@ Test::testFileReader() } { FastOS_File file; - ASSERT_TRUE(file.OpenReadOnly("fileheader.tmp")); + ASSERT_TRUE(file.OpenReadOnly(fileheader_tmp.c_str())); FileHeader::FileReader reader(file); char buf[7]; @@ -362,7 +369,7 @@ Test::testFileReader() EXPECT_EQUAL(256u, sum); ASSERT_TRUE(file.Close()); - file.Delete(); + std::filesystem::remove(std::filesystem::path(fileheader_tmp)); } } @@ -371,7 +378,7 @@ Test::testFileWriter() { { FastOS_File file; - ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp")); + ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str())); FileHeader::FileWriter writer(file); uint32_t sum = 0; @@ -388,7 +395,7 @@ Test::testFileWriter() } { FastOS_File file; - ASSERT_TRUE(file.OpenReadOnly("fileheader.tmp")); + ASSERT_TRUE(file.OpenReadOnly(fileheader_tmp.c_str())); uint8_t buf[256]; EXPECT_EQUAL(256, file.Read(buf, 256)); @@ -397,7 +404,7 @@ Test::testFileWriter() } ASSERT_TRUE(file.Close()); - file.Delete(); + std::filesystem::remove(std::filesystem::path(fileheader_tmp)); } } @@ -412,13 +419,13 @@ Test::testFileHeader() header.putTag(FileHeader::Tag("baz", "666999")); FastOS_File file; - ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp")); + ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str())); len = header.writeFile(file); EXPECT_EQUAL(len, header.getSize()); } { FastOS_File file; - ASSERT_TRUE(file.OpenReadWrite("fileheader.tmp")); + ASSERT_TRUE(file.OpenReadWrite(fileheader_tmp.c_str())); FileHeader header; EXPECT_EQUAL(len, header.readFile(file)); @@ -441,11 +448,11 @@ Test::testFileHeader() FileHeader header; FastOS_File file; - ASSERT_TRUE(file.OpenReadOnly("fileheader.tmp")); + ASSERT_TRUE(file.OpenReadOnly(fileheader_tmp.c_str())); EXPECT_EQUAL(len, header.readFile(file)); EXPECT_EQUAL(len, header.getSize()); ASSERT_TRUE(file.Close()); - file.Delete(); + std::filesystem::remove(std::filesystem::path(fileheader_tmp)); EXPECT_TRUE(header.hasTag("foo")); EXPECT_EQUAL(9.6, header.getTag("foo").asFloat()); @@ -571,12 +578,12 @@ Test::testRewriteErrors() { FastOS_File file; - ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp")); + ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str())); EXPECT_EQUAL(len, header.writeFile(file)); } { FastOS_File file; - ASSERT_TRUE(file.OpenReadWrite("fileheader.tmp")); + ASSERT_TRUE(file.OpenReadWrite(fileheader_tmp.c_str())); header.putTag(FileHeader::Tag("baz", "cox")); EXPECT_TRUE(len != header.getSize()); try { diff --git a/vespalib/src/tests/fuzzy/CMakeLists.txt b/vespalib/src/tests/fuzzy/CMakeLists.txt index bc48e775711..00a89d0a604 100644 --- a/vespalib/src/tests/fuzzy/CMakeLists.txt +++ b/vespalib/src/tests/fuzzy/CMakeLists.txt @@ -16,3 +16,12 @@ vespa_add_executable(vespalib_levenshtein_distance_test_app TEST GTest::GTest ) vespa_add_test(NAME vespalib_levenshtein_distance_test_app COMMAND vespalib_levenshtein_distance_test_app) + +vespa_add_executable(vespalib_levenshtein_dfa_test_app TEST + SOURCES + levenshtein_dfa_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_levenshtein_dfa_test_app COMMAND vespalib_levenshtein_dfa_test_app) diff --git a/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp b/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp new file mode 100644 index 00000000000..6966fd0b703 --- /dev/null +++ b/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp @@ -0,0 +1,507 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/fuzzy/levenshtein_dfa.h> +#include <vespa/vespalib/fuzzy/dfa_stepping_base.h> +#include <vespa/vespalib/fuzzy/unicode_utils.h> +#include <vespa/vespalib/fuzzy/levenshtein_distance.h> // For benchmarking purposes +#include <vespa/vespalib/util/benchmark_timer.h> +#include <charconv> +#include <concepts> +#include <filesystem> +#include <fstream> +#include <string> +#include <string_view> +#include <gtest/gtest.h> + +using namespace ::testing; +using namespace vespalib::fuzzy; +namespace fs = std::filesystem; + +static std::string benchmark_dictionary; + +struct LevenshteinDfaTest : TestWithParam<LevenshteinDfa::DfaType> { + + static LevenshteinDfa::DfaType dfa_type() noexcept { return GetParam(); } + + static std::optional<uint32_t> calculate(std::string_view left, std::string_view right, uint32_t threshold) { + auto dfa_lhs = LevenshteinDfa::build(left, threshold, dfa_type()); + auto maybe_match_lhs = dfa_lhs.match(right, nullptr); + + auto dfa_rhs = LevenshteinDfa::build(right, threshold, dfa_type()); + auto maybe_match_rhs = dfa_rhs.match(left, nullptr); + + EXPECT_EQ(maybe_match_lhs.matches(), maybe_match_rhs.matches()); + if (maybe_match_lhs.matches()) { + EXPECT_EQ(maybe_match_lhs.edits(), maybe_match_rhs.edits()); + return {maybe_match_lhs.edits()}; + } + return std::nullopt; + } + + static std::optional<uint32_t> calculate(std::u8string_view left, std::u8string_view right, uint32_t threshold) { + std::string_view lhs_ch(reinterpret_cast<const char*>(left.data()), left.size()); + std::string_view rhs_ch(reinterpret_cast<const char*>(right.data()), right.size()); + return calculate(lhs_ch, rhs_ch, threshold); + } + +}; + +INSTANTIATE_TEST_SUITE_P(AllDfaTypes, + LevenshteinDfaTest, + Values(LevenshteinDfa::DfaType::Explicit, + LevenshteinDfa::DfaType::Implicit), + PrintToStringParamName()); + +// Same as existing non-DFA Levenshtein tests, but with some added instantiations +// for smaller max distances. +TEST_P(LevenshteinDfaTest, edge_cases_have_correct_edit_distance) { + EXPECT_EQ(calculate("abc", "abc", 2), std::optional{0}); + for (auto max : {1, 2}) { + EXPECT_EQ(calculate("abc", "ab1", max), std::optional{1}) << max; + EXPECT_EQ(calculate("abc", "1bc", max), std::optional{1}) << max; + EXPECT_EQ(calculate("abc", "a1c", max), std::optional{1}) << max; + EXPECT_EQ(calculate("abc", "ab", max), std::optional{1}) << max; + EXPECT_EQ(calculate("abc", "abcd", max), std::optional{1}) << max; + EXPECT_EQ(calculate("a", "", max), std::optional{1}) << max; + } + EXPECT_EQ(calculate("bc", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("ab", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("cd", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("ad", "abcd", 2), std::optional{2}); + EXPECT_EQ(calculate("abc", "a12", 2), std::optional{2}); + EXPECT_EQ(calculate("abc", "123", 2), std::nullopt); + EXPECT_EQ(calculate("ab", "", 1), std::nullopt); + EXPECT_EQ(calculate("ab", "", 2), std::optional{2}); + EXPECT_EQ(calculate("abc", "", 2), std::nullopt); + EXPECT_EQ(calculate("abc", "123", 2), std::nullopt); +} + +TEST_P(LevenshteinDfaTest, distance_is_in_utf32_code_point_space) { + // Each hiragana/katakana/kanji corresponds to multiple (3) UTF-8 chars but a single UTF-32 code point. + EXPECT_EQ(calculate(u8"猫", u8"猫", 2), std::optional{0}); + EXPECT_EQ(calculate(u8"猫", u8"犬", 2), std::optional{1}); + EXPECT_EQ(calculate(u8"猫と犬", u8"犬と猫", 2), std::optional{2}); + EXPECT_EQ(calculate(u8"猫は好き", u8"犬が好き", 2), std::optional{2}); + EXPECT_EQ(calculate(u8"カラオケ", u8"カラオケ", 2), std::optional{0}); + EXPECT_EQ(calculate(u8"カラオケ", u8"カラoケ", 2), std::optional{1}); + EXPECT_EQ(calculate(u8"カラオケ", u8"カraオケ", 2), std::optional{2}); + EXPECT_EQ(calculate(u8"kaラオケ", u8"カラオケ", 2), std::optional{2}); + EXPECT_EQ(calculate(u8"カラオケ", u8"カラoke", 2), std::nullopt); +} + +void test_dfa_successor(const LevenshteinDfa& dfa, std::string_view source, std::string_view expected_successor) { + std::string successor; + auto m = dfa.match(source, &successor); + if (m.matches()) { + FAIL() << "Expected '" << source << "' to emit a successor, but it " + << "matched with " << static_cast<uint32_t>(m.edits()) + << " edits (of max " << static_cast<uint32_t>(m.max_edits()) << " edits)"; + } + EXPECT_EQ(successor, expected_successor); + EXPECT_TRUE(dfa.match(successor, nullptr).matches()); +} + +TEST_P(LevenshteinDfaTest, can_generate_successors_to_mismatching_source_strings) { + auto dfa = LevenshteinDfa::build("food", 1, dfa_type()); + + test_dfa_successor(dfa, "", "\x01""food"); + test_dfa_successor(dfa, "faa", "faod"); + test_dfa_successor(dfa, "fooooo", "foop"); + test_dfa_successor(dfa, "ooof", "pfood"); + test_dfa_successor(dfa, "fo", "fo\x01""d"); + test_dfa_successor(dfa, "oo", "ood"); + test_dfa_successor(dfa, "ooo", "oood"); + test_dfa_successor(dfa, "foh", "fohd"); + test_dfa_successor(dfa, "foho", "fohod"); + test_dfa_successor(dfa, "foxx", "foyd"); + test_dfa_successor(dfa, "xfa", "xfood"); + test_dfa_successor(dfa, "gg", "good"); + test_dfa_successor(dfa, "gp", "hfood"); + test_dfa_successor(dfa, "ep", "f\x01""od"); + test_dfa_successor(dfa, "hfoodz", "hood"); + test_dfa_successor(dfa, "aooodz", "bfood"); + + // Also works with Unicode + // 2 chars + test_dfa_successor(dfa, "\xc3\x86""x", // "Æx" + "\xc3\x87""food"); // "Çfood" + // 3 chars + test_dfa_successor(dfa, "\xe7\x8c\xab""\xe3\x81\xaf", // "猫は" + "\xe7\x8c\xac""food"); // "猬food" + // 4 chars + test_dfa_successor(dfa, "\xf0\x9f\xa4\xa9""abc", // <starry eyed emoji>abc + "\xf0\x9f\xa4\xa9""food"); // <starry eyed emoji>food + + // Note that as a general rule, emojis are fickle beasts to deal with since a single + // emoji often takes up multiple code points, which we consider separate characters + // but a user sees as a single actual rendered glyph. + // Multi-code point character edit distance support is left as an exercise for the reader :D +} + +TEST_P(LevenshteinDfaTest, successor_is_well_defined_for_max_unicode_code_point_input) { + auto dfa = LevenshteinDfa::build("food", 1, dfa_type()); + // The successor string must be lexicographically larger than the input string. + // In the presence of a wildcard output edge we handle this by increase the input + // character by 1 and encoding it back as UTF-8. + // It is possible (though arguably very unlikely) that the input character is + // U+10FFFF, which is the maximum valid Unicode character. We have to ensure that + // we can encode U+10FFFF + 1, even though it's technically outside the valid range. + // Luckily, UTF-8 can technically (there's that word again) encode up to U+1FFFFF, + // so the resulting string is byte-wise greater, and that's what matters since we + // don't guarantee that the successor string is _valid_ UTF-8. + // This problem does not happen with the target string, as it's an invalid character + // and will be replaced with the Unicode replacement char before we ever see it. + test_dfa_successor(dfa, "\xf4\x8f\xbf\xbf""xyz", // U+10FFFF + "\xf4\x90\x80\x80""food");// U+10FFFF+1 +} + +TEST_P(LevenshteinDfaTest, successor_is_well_defined_for_empty_target) { + auto dfa = LevenshteinDfa::build("", 1, dfa_type()); + test_dfa_successor(dfa, "aa", "b"); + test_dfa_successor(dfa, "b\x01", "c"); + test_dfa_successor(dfa, "vespa", "w"); +} + +// We should normally be able to rely on higher-level components to ensure we +// only receive valid UTF-8, but make sure we don't choke on it if we do get it. +TEST_P(LevenshteinDfaTest, malformed_utf8_is_replaced_with_placeholder_char) { + // 0xff is not a valid encoding and is implicitly converted to U+FFFD, + // which is the standard Unicode replacement character. + EXPECT_EQ(calculate("\xff", "a", 2), std::optional{1}); + EXPECT_EQ(calculate("\xff\xff", "a", 2), std::optional{2}); + EXPECT_EQ(calculate("a", "\xff", 2), std::optional{1}); + EXPECT_EQ(calculate("a", "\xff\xff\xff", 2), std::nullopt); + EXPECT_EQ(calculate("\xff", "\xef\xbf\xbd"/*U+FFFD*/, 2), std::optional{0}); +} + +TEST_P(LevenshteinDfaTest, unsupported_max_edits_value_throws) { + EXPECT_THROW((void)LevenshteinDfa::build("abc", 0, dfa_type()), std::invalid_argument); + EXPECT_THROW((void)LevenshteinDfa::build("abc", 3, dfa_type()), std::invalid_argument); +} + +// Turn integer v into its bitwise string representation with the MSB as the leftmost character. +template <std::unsigned_integral T> +std::string bits_to_str(T v) { + constexpr const uint8_t n_bits = sizeof(T) * 8; + std::string ret(n_bits, '0'); + for (uint8_t bit = 0; bit < n_bits; ++bit) { + if (v & (1 << bit)) { + ret[n_bits - bit - 1] = '1'; + } + } + return ret; +} + +using DfaTypeAndMaxEdits = std::tuple<LevenshteinDfa::DfaType, uint32_t>; + +struct LevenshteinDfaSuccessorTest : TestWithParam<DfaTypeAndMaxEdits> { + // Print test suffix as e.g. "/Explicit_1" instead of just a GTest-chosen number. + static std::string stringify_params(const TestParamInfo<ParamType>& info) { + std::ostringstream ss; + ss << std::get<0>(info.param) << '_' << std::get<1>(info.param); + return ss.str(); + } +}; + +INSTANTIATE_TEST_SUITE_P(SupportedMaxEdits, + LevenshteinDfaSuccessorTest, + Combine(Values(LevenshteinDfa::DfaType::Explicit, + LevenshteinDfa::DfaType::Implicit), + Values(1, 2)), + LevenshteinDfaSuccessorTest::stringify_params); + +/** + * Exhaustively test successor generation by matching all target and source strings + * in {0,1}^8 against each other. Since we generate bit strings identical to the + * bit patterns of the underlying counter(s), any string at index `i+1` will compare + * lexicographically greater than the one at `i`. We use this to test that we never + * miss a valid match that comes between a mismatch and its generated successor. + * + * For each mismatch we note the successor it emitted. Verify that each subsequent + * match() invocation for a source string < the successor results in a mismatch. + * + * We test this for both max edit distance 1 and 2. Despite being an exhaustive test, + * this completes in a few dozen milliseconds even with ASan instrumentation. + * + * Inspired by approach used by Lucene DFA exhaustive testing. + */ +TEST_P(LevenshteinDfaSuccessorTest, exhaustive_successor_test) { + const auto [dfa_type, max_edits] = GetParam(); + for (uint32_t i = 0; i < 256; ++i) { + const auto target = bits_to_str(static_cast<uint8_t>(i)); + auto target_dfa = LevenshteinDfa::build(target, max_edits, dfa_type); + std::string skip_to, successor; + for (uint32_t j = 0; j < 256; ++j) { + const auto source = bits_to_str(static_cast<uint8_t>(j)); + auto maybe_match = target_dfa.match(source, &successor); + if (maybe_match.matches() && !skip_to.empty()) { + ASSERT_GE(source, skip_to); + } else if (!maybe_match.matches()) { + ASSERT_FALSE(successor.empty()) << source; + ASSERT_GE(successor, skip_to) << source; + ASSERT_GT(successor, source) << source; + skip_to = successor; + } + } + } +} + +namespace { + +template <uint8_t MaxEdits> +void explore(const DfaSteppingBase<FixedMaxEditDistanceTraits<MaxEdits>>& stepper, + const typename DfaSteppingBase<FixedMaxEditDistanceTraits<MaxEdits>>::StateType& in_state) +{ + ASSERT_EQ(stepper.can_match(stepper.step(in_state, WILDCARD)), + stepper.can_wildcard_step(in_state)); + if (!stepper.can_match(in_state)) { + return; // reached the end of the line + } + // DFS-explore all matching transitions, as well as one non-matching transition + auto t = stepper.transitions(in_state); + for (uint32_t c: t.u32_chars()) { + ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.step(in_state, c))); + } + ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.step(in_state, WILDCARD))); +} + +} // anon ns + +using StateStepperTypes = Types< + DfaSteppingBase<FixedMaxEditDistanceTraits<1>>, + DfaSteppingBase<FixedMaxEditDistanceTraits<2>> +>; + +template <typename SteppingBase> +struct LevenshteinSparseStateTest : Test {}; + +TYPED_TEST_SUITE(LevenshteinSparseStateTest, StateStepperTypes); + +// "Meta-test" for checking that the `can_wildcard_step` predicate function is +// functionally equivalent to evaluating `can_match(stepper.step(in_state, WILDCARD))` +TYPED_TEST(LevenshteinSparseStateTest, wildcard_step_predcate_is_equivalent_to_step_with_can_match) { + for (const char* target : {"", "a", "ab", "abc", "abcdef", "aaaaa"}) { + auto u32_target = utf8_string_to_utf32(target); + TypeParam stepper(u32_target); + ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.start())); + } +} + +template <typename T> +void do_not_optimize_away(T&& t) noexcept { + asm volatile("" : : "m"(t) : "memory"); // Clobber the value to avoid losing it to compiler optimizations +} + +enum class BenchmarkType { + DfaExplicit, + DfaImplicit, + Legacy +}; + +const char* to_s(BenchmarkType t) noexcept { + // Note: need underscores since this is used as part of GTest-generated test instance names + switch (t) { + case BenchmarkType::DfaExplicit: return "DFA_explicit"; + case BenchmarkType::DfaImplicit: return "DFA_implicit"; + case BenchmarkType::Legacy: return "legacy"; + } + abort(); +} + +[[nodiscard]] bool benchmarking_enabled() noexcept { + return !benchmark_dictionary.empty(); +} + +[[nodiscard]] std::vector<uint32_t> string_lengths() { + return {2, 8, 16, 64, 256, 1024, 1024*16, 1024*64}; +} + +struct LevenshteinBenchmarkTest : TestWithParam<BenchmarkType> { + + static std::string stringify_params(const TestParamInfo<ParamType>& info) { + return to_s(info.param); + } + + void SetUp() override { + if (!benchmarking_enabled()) { + GTEST_SKIP() << "benchmarking not enabled"; + } + } + + static BenchmarkType benchmark_type() noexcept { return GetParam(); } + + static const std::vector<std::string>& load_dictionary_once() { + static auto sorted_lines = read_and_sort_all_lines(fs::path(benchmark_dictionary)); + return sorted_lines; + } + + static std::vector<std::string> read_and_sort_all_lines(const fs::path& file_path) { + std::ifstream ifs(file_path); + if (!ifs.is_open()) { + throw std::invalid_argument("File does not exist"); + } + std::vector<std::string> lines; + std::string line; + while (std::getline(ifs, line)) { + lines.emplace_back(line); + } + std::sort(lines.begin(), lines.end()); + return lines; + } +}; + +INSTANTIATE_TEST_SUITE_P(AllDfaTypes, + LevenshteinBenchmarkTest, + Values(BenchmarkType::DfaExplicit, + BenchmarkType::DfaImplicit, + BenchmarkType::Legacy), + LevenshteinBenchmarkTest::stringify_params); + +// ("abc", 1) => "a" +// ("abc", 3) => "abc" +// ("abc", 7) => "abcabca" +// ... and so on. +std::string repeated_string(std::string_view str, uint32_t sz) { + uint32_t chunks = sz / str.size(); + std::string ret; + ret.reserve(sz); + for (uint32_t i = 0; i < chunks; ++i) { + ret += str; + } + uint32_t rem = sz % str.size(); + ret += str.substr(0, rem); + return ret; +} + +TEST_P(LevenshteinBenchmarkTest, benchmark_worst_case_matching_excluding_setup_time) { + using vespalib::BenchmarkTimer; + const auto type = benchmark_type(); + fprintf(stderr, "------ %s ------\n", to_s(type)); + for (uint8_t k : {1, 2}) { + for (uint32_t sz : string_lengths()) { + // Use same string as both source and target. This is the worst case in that the entire + // string must be matched and any sparse representation is always maximally filled since + // we never expend any edits via mismatches. + // Also ensure that we have multiple out-edges per node (i.e. don't just repeat "AAA" etc.). + std::string str = repeated_string("abcde", sz); + double min_time_s; + if (type == BenchmarkType::DfaExplicit || type == BenchmarkType::DfaImplicit) { + auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit + : LevenshteinDfa::DfaType::Implicit; + auto dfa = LevenshteinDfa::build(str, k, dfa_type); + min_time_s = BenchmarkTimer::benchmark([&] { + auto res = dfa.match(str, nullptr); // not benchmarking successor generation + do_not_optimize_away(res); + }, 1.0); + } else { + min_time_s = BenchmarkTimer::benchmark([&] { + auto str_u32 = utf8_string_to_utf32(str); // Must be done per term, so included in benchmark body + auto res = vespalib::LevenshteinDistance::calculate(str_u32, str_u32, k); + do_not_optimize_away(res); + }, 1.0); + } + fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0); + } + } +} + +TEST(LevenshteinExplicitDfaBenchmarkTest, benchmark_explicit_dfa_construction) { + if (!benchmarking_enabled()) { + GTEST_SKIP() << "benchmarking not enabled"; + } + using vespalib::BenchmarkTimer; + for (uint8_t k : {1, 2}) { + for (uint32_t sz : string_lengths()) { + std::string str = repeated_string("abcde", sz); + double min_time_s = BenchmarkTimer::benchmark([&] { + auto dfa = LevenshteinDfa::build(str, k, LevenshteinDfa::DfaType::Explicit); + do_not_optimize_away(dfa); + }, 2.0); + auto dfa = LevenshteinDfa::build(str, k, LevenshteinDfa::DfaType::Explicit); + size_t mem_usage = dfa.memory_usage(); + fprintf(stderr, "k=%u, sz=%u: \t%g us \t%zu bytes\n", k, sz, min_time_s * 1000000.0, mem_usage); + } + } +} + +TEST_P(LevenshteinBenchmarkTest, benchmark_brute_force_dictionary_scan) { + using vespalib::BenchmarkTimer; + const auto type = benchmark_type(); + const auto dict = load_dictionary_once(); + std::vector target_lengths = {1, 2, 4, 8, 12, 16, 24, 32, 64}; + fprintf(stderr, "------ %s ------\n", to_s(type)); + for (uint8_t k : {1, 2}) { + for (uint32_t sz : target_lengths) { + std::string str = repeated_string("abcde", sz); + double min_time_s; + if (type == BenchmarkType::DfaExplicit || type == BenchmarkType::DfaImplicit) { + auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit + : LevenshteinDfa::DfaType::Implicit; + auto dfa = LevenshteinDfa::build(str, k, dfa_type); + min_time_s = BenchmarkTimer::benchmark([&] { + for (const auto& line : dict) { + auto res = dfa.match(line, nullptr); + do_not_optimize_away(res); + } + }, 2.0); + } else { + min_time_s = BenchmarkTimer::benchmark([&] { + auto target_u32 = utf8_string_to_utf32(str); + for (const auto& line : dict) { + auto line_u32 = utf8_string_to_utf32(line); + auto res = vespalib::LevenshteinDistance::calculate(line_u32, target_u32, k); + do_not_optimize_away(res); + } + }, 2.0); + } + fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0); + } + } +} + +TEST_P(LevenshteinBenchmarkTest, benchmark_skipping_dictionary_scan) { + const auto type = benchmark_type(); + if (type == BenchmarkType::Legacy) { + GTEST_SKIP() << "Skipping not supported for legacy implementation"; + } + using vespalib::BenchmarkTimer; + const auto dict = load_dictionary_once(); + std::vector target_lengths = {1, 2, 4, 8, 12, 16, 24, 32, 64}; + fprintf(stderr, "------ %s ------\n", to_s(type)); + for (uint8_t k : {1, 2}) { + for (uint32_t sz : target_lengths) { + std::string str = repeated_string("abcde", sz); + auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit + : LevenshteinDfa::DfaType::Implicit; + auto dfa = LevenshteinDfa::build(str, k, dfa_type); + double min_time_s = BenchmarkTimer::benchmark([&] { + auto iter = dict.cbegin(); + auto end = dict.cend(); + std::string successor; + while (iter != end) { + auto maybe_match = dfa.match(*iter, &successor); + if (maybe_match.matches()) { + ++iter; + } else { + iter = std::lower_bound(iter, end, successor); + } + } + }, 2.0); + fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0); + } + } +} + +// TODO: +// - explicit successor generation benchmark + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + if (argc > 1) { + benchmark_dictionary = argv[1]; + if (!fs::exists(fs::path(benchmark_dictionary))) { + fprintf(stderr, "Benchmark dictionary file '%s' does not exist\n", benchmark_dictionary.c_str()); + return 1; + } + } + return RUN_ALL_TESTS(); +} diff --git a/vespalib/src/vespa/fastlib/io/bufferedfile.cpp b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp index aecf08edf6b..60c8a4a40ef 100644 --- a/vespalib/src/vespa/fastlib/io/bufferedfile.cpp +++ b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp @@ -287,7 +287,7 @@ Fast_BufferedFile::ReadOpenExisting(const char *name) bool ok = Close(); ok &= _file->OpenReadOnlyExisting(true, name); if (!ok) { - fprintf(stderr, "ERROR opening %s for read: %s", + fprintf(stderr, "ERROR opening %s for read: %s\n", _file->GetFileName(), getLastErrorString().c_str()); assert(ok); } @@ -304,7 +304,7 @@ Fast_BufferedFile::ReadOpen(const char *name) bool ok = Close(); ok &= _file->OpenReadOnly(name); if (!ok) { - fprintf(stderr, "ERROR opening %s for read: %s", + fprintf(stderr, "ERROR opening %s for read: %s\n", _file->GetFileName(), getLastErrorString().c_str()); assert(ok); } @@ -324,7 +324,7 @@ Fast_BufferedFile::WriteOpen(const char *name) bool ok = Close(); ok &= _file->OpenWriteOnly(name); if (!ok) { - fprintf(stderr, "ERROR opening %s for write: %s", + fprintf(stderr, "ERROR opening %s for write: %s\n", _file->GetFileName(), getLastErrorString().c_str()); assert(ok); } @@ -431,11 +431,6 @@ bool Fast_BufferedFile::Open(unsigned int openFlags, const char * name) return ok; } -bool Fast_BufferedFile::Delete() -{ - return _file->Delete(); -} - void Fast_BufferedFile::alignEndForDirectIO() { while( (_bufi - buf())%MIN_ALIGNMENT ) { diff --git a/vespalib/src/vespa/fastlib/io/bufferedfile.h b/vespalib/src/vespa/fastlib/io/bufferedfile.h index 2a5e0ec7535..8faada61f9d 100644 --- a/vespalib/src/vespa/fastlib/io/bufferedfile.h +++ b/vespalib/src/vespa/fastlib/io/bufferedfile.h @@ -217,7 +217,6 @@ public: * Just forwarded to the real file to support FastOS_FileInterface. */ bool Open(unsigned int, const char*) override; - bool Delete() override; void alignEndForDirectIO(); }; diff --git a/vespalib/src/vespa/fastos/file.cpp b/vespalib/src/vespa/fastos/file.cpp index fdbacb570b4..c8cce99b169 100644 --- a/vespalib/src/vespa/fastos/file.cpp +++ b/vespalib/src/vespa/fastos/file.cpp @@ -335,16 +335,6 @@ FastOS_FileInterface::getLastErrorString() return FastOS_File::getErrorString(err); } -bool FastOS_FileInterface::Rename (const char *newFileName) -{ - bool rc=false; - if (FastOS_File::Rename(GetFileName(), newFileName)) { - SetFileName(newFileName); - rc = true; - } - return rc; -} - void FastOS_FileInterface::dropFromCache() const { } diff --git a/vespalib/src/vespa/fastos/file.h b/vespalib/src/vespa/fastos/file.h index 1a637726e45..84b94df5acc 100644 --- a/vespalib/src/vespa/fastos/file.h +++ b/vespalib/src/vespa/fastos/file.h @@ -20,9 +20,6 @@ constexpr int FASTOS_FILE_OPEN_WRITE = (1<<1); constexpr int FASTOS_FILE_OPEN_EXISTING = (1<<2); constexpr int FASTOS_FILE_OPEN_CREATE = (1<<3); constexpr int FASTOS_FILE_OPEN_TRUNCATE = (1<<4); -constexpr int FASTOS_FILE_OPEN_STDOUT = (2<<5); -constexpr int FASTOS_FILE_OPEN_STDERR = (3<<5); -constexpr int FASTOS_FILE_OPEN_STDFLAGS = (3<<5); constexpr int FASTOS_FILE_OPEN_DIRECTIO = (1<<7); constexpr int FASTOS_FILE_OPEN_SYNCWRITES = (1<<9); // synchronous writes @@ -338,24 +335,6 @@ public: int64_t getSize() const { return const_cast<FastOS_FileInterface *>(this)->GetSize(); } /** - * Delete the file. This method requires that the file is - * currently not opened. - * @return Boolean success/failure - */ - virtual bool Delete() = 0; - - /** - * Rename/move a file or directory. This method requires that - * the file is currently not opened. A move operation is - * supported as long as the source and destination reside - * on the same volume/device. - * The method fails if the destination already exists. - * @param newFileName New file name - * @return Boolean success/failure - */ - virtual bool Rename (const char *newFileName); - - /** * Force completion of pending disk writes (flush cache). */ [[nodiscard]] virtual bool Sync() = 0; diff --git a/vespalib/src/vespa/fastos/linux_file.cpp b/vespalib/src/vespa/fastos/linux_file.cpp index 6fb29782957..dabe84a8f30 100644 --- a/vespalib/src/vespa/fastos/linux_file.cpp +++ b/vespalib/src/vespa/fastos/linux_file.cpp @@ -372,9 +372,6 @@ FastOS_Linux_File::Open(unsigned int openFlags, const char *filename) bool rc; _cachedSize = -1; _filePointer = -1; - if (_directIOEnabled && (_openFlags & FASTOS_FILE_OPEN_STDFLAGS) != 0) { - _directIOEnabled = false; - } if (_syncWritesEnabled) { openFlags |= FASTOS_FILE_OPEN_SYNCWRITES; } diff --git a/vespalib/src/vespa/fastos/unix_file.cpp b/vespalib/src/vespa/fastos/unix_file.cpp index 802e85d7609..952d9820306 100644 --- a/vespalib/src/vespa/fastos/unix_file.cpp +++ b/vespalib/src/vespa/fastos/unix_file.cpp @@ -207,80 +207,50 @@ FastOS_UNIX_File::Open(unsigned int openFlags, const char *filename) bool rc = false; assert(_filedes == -1); - if ((openFlags & FASTOS_FILE_OPEN_STDFLAGS) != 0) { - FILE *file; - - switch(openFlags & FASTOS_FILE_OPEN_STDFLAGS) { + if (filename != nullptr) { + SetFileName(filename); + } + unsigned int accessFlags = CalcAccessFlags(openFlags); - case FASTOS_FILE_OPEN_STDOUT: - file = stdout; - SetFileName("stdout"); - break; + _filedes = open(_filename.c_str(), accessFlags, 0664); - case FASTOS_FILE_OPEN_STDERR: - file = stderr; - SetFileName("stderr"); - break; + rc = (_filedes != -1); - default: - fprintf(stderr, "Invalid open-flags %08X\n", openFlags); - abort(); - } - -#ifdef __linux__ - _filedes = file->_fileno; -#else - _filedes = fileno(file); -#endif + if (rc) { _openFlags = openFlags; - rc = true; - } else { - if (filename != nullptr) { - SetFileName(filename); - } - unsigned int accessFlags = CalcAccessFlags(openFlags); - - _filedes = open(_filename.c_str(), accessFlags, 0664); - - rc = (_filedes != -1); - - if (rc) { - _openFlags = openFlags; - if (_mmapEnabled) { - int64_t filesize = GetSize(); - auto mlen = static_cast<size_t>(filesize); - if ((static_cast<int64_t>(mlen) == filesize) && (mlen > 0)) { - void *mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | _mmapFlags, _filedes, 0); - if (mbase == MAP_FAILED) { - mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | (_mmapFlags & ALWAYS_SUPPORTED_MMAP_FLAGS), _filedes, 0); - } - if (mbase != MAP_FAILED) { + if (_mmapEnabled) { + int64_t filesize = GetSize(); + auto mlen = static_cast<size_t>(filesize); + if ((static_cast<int64_t>(mlen) == filesize) && (mlen > 0)) { + void *mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | _mmapFlags, _filedes, 0); + if (mbase == MAP_FAILED) { + mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | (_mmapFlags & ALWAYS_SUPPORTED_MMAP_FLAGS), _filedes, 0); + } + if (mbase != MAP_FAILED) { #ifdef __linux__ - int fadviseOptions = getFAdviseOptions(); - int eCode(0); - if (POSIX_FADV_RANDOM == fadviseOptions) { - eCode = posix_madvise(mbase, mlen, POSIX_MADV_RANDOM); - } else if (POSIX_FADV_SEQUENTIAL == fadviseOptions) { - eCode = posix_madvise(mbase, mlen, POSIX_MADV_SEQUENTIAL); - } - if (eCode != 0) { - fprintf(stderr, "Failed: posix_madvise(%p, %ld, %d) = %d\n", mbase, mlen, fadviseOptions, eCode); - } -#endif - _mmapbase = mbase; - _mmaplen = mlen; - } else { - close(_filedes); - _filedes = -1; - std::ostringstream os; - os << "mmap of file '" << GetFileName() << "' with flags '" << std::hex << (MAP_SHARED | _mmapFlags) << std::dec - << "' failed with error :'" << getErrorString(GetLastOSError()) << "'"; - throw std::runtime_error(os.str()); + int fadviseOptions = getFAdviseOptions(); + int eCode(0); + if (POSIX_FADV_RANDOM == fadviseOptions) { + eCode = posix_madvise(mbase, mlen, POSIX_MADV_RANDOM); + } else if (POSIX_FADV_SEQUENTIAL == fadviseOptions) { + eCode = posix_madvise(mbase, mlen, POSIX_MADV_SEQUENTIAL); + } + if (eCode != 0) { + fprintf(stderr, "Failed: posix_madvise(%p, %ld, %d) = %d\n", mbase, mlen, fadviseOptions, eCode); } +#endif + _mmapbase = mbase; + _mmaplen = mlen; + } else { + close(_filedes); + _filedes = -1; + std::ostringstream os; + os << "mmap of file '" << GetFileName() << "' with flags '" << std::hex << (MAP_SHARED | _mmapFlags) << std::dec + << "' failed with error :'" << getErrorString(GetLastOSError()) << "'"; + throw std::runtime_error(os.str()); } } } - } return rc; @@ -300,13 +270,9 @@ FastOS_UNIX_File::Close() bool ok = true; if (_filedes >= 0) { - if ((_openFlags & FASTOS_FILE_OPEN_STDFLAGS) != 0) { - ok = true; - } else { - do { - ok = (close(_filedes) == 0); - } while (!ok && errno == EINTR); - } + do { + ok = (close(_filedes) == 0); + } while (!ok && errno == EINTR); if (_mmapbase != nullptr) { madvise(_mmapbase, _mmaplen, MADV_DONTNEED); @@ -341,35 +307,6 @@ FastOS_UNIX_File::GetSize() return fileSize; } -bool -FastOS_UNIX_File::Delete(const char *name) -{ - return (unlink(name) == 0); -} - - -bool -FastOS_UNIX_File::Delete() -{ - assert( ! IsOpened()); - - return (unlink(_filename.c_str()) == 0); -} - -bool FastOS_UNIX_File::Rename (const char *currentFileName, const char *newFileName) -{ - bool rc = false; - - // Enforce documentation. If the destination file exists, - // fail Rename. - FastOS_StatInfo statInfo{}; - if (!FastOS_File::Stat(newFileName, &statInfo)) { - rc = (rename(currentFileName, newFileName) == 0); - } else { - errno = EEXIST; - } - return rc; -} bool FastOS_UNIX_File::Sync() diff --git a/vespalib/src/vespa/fastos/unix_file.h b/vespalib/src/vespa/fastos/unix_file.h index 3d1f6b9db3f..120b1d35add 100644 --- a/vespalib/src/vespa/fastos/unix_file.h +++ b/vespalib/src/vespa/fastos/unix_file.h @@ -30,13 +30,7 @@ protected: bool _mmapEnabled; static unsigned int CalcAccessFlags(unsigned int openFlags); - public: - static bool Rename (const char *currentFileName, const char *newFileName); - bool Rename (const char *newFileName) override { - return FastOS_FileInterface::Rename(newFileName); - } - static bool Stat(const char *filename, FastOS_StatInfo *statInfo); static std::string getCurrentDirectory(); @@ -82,12 +76,10 @@ public: bool SetPosition(int64_t desiredPosition) override; int64_t GetPosition() override; int64_t GetSize() override; - bool Delete() override; [[nodiscard]] bool Sync() override; bool SetSize(int64_t newSize) override; void dropFromCache() const override; - static bool Delete(const char *filename); static int GetLastOSError(); static Error TranslateError(const int osError); static std::string getErrorString(const int osError); diff --git a/vespalib/src/vespa/vespalib/datastore/bufferstate.h b/vespalib/src/vespa/vespalib/datastore/bufferstate.h index 3de821928b8..01439586f5b 100644 --- a/vespalib/src/vespa/vespalib/datastore/bufferstate.h +++ b/vespalib/src/vespa/vespalib/datastore/bufferstate.h @@ -134,21 +134,21 @@ public: class BufferAndMeta { public: - BufferAndMeta() noexcept : BufferAndMeta(nullptr, nullptr, 0, 0) { } + BufferAndMeta() : BufferAndMeta(nullptr, nullptr, 0, 0) { } std::atomic<void*>& get_atomic_buffer() noexcept { return _buffer; } void* get_buffer_relaxed() noexcept { return _buffer.load(std::memory_order_relaxed); } const void* get_buffer_acquire() const noexcept { return _buffer.load(std::memory_order_acquire); } - uint32_t getTypeId() const noexcept { return _typeId; } - uint32_t get_array_size() const noexcept { return _array_size; } - BufferState * get_state_relaxed() noexcept { return _state.load(std::memory_order_relaxed); } + uint32_t getTypeId() const { return _typeId; } + uint32_t get_array_size() const { return _array_size; } + BufferState * get_state_relaxed() { return _state.load(std::memory_order_relaxed); } const BufferState * get_state_acquire() const { return _state.load(std::memory_order_acquire); } uint32_t get_entry_size() const noexcept { return _entry_size; } - void setTypeId(uint32_t typeId) noexcept { _typeId = typeId; } - void set_array_size(uint32_t arraySize) noexcept { _array_size = arraySize; } + void setTypeId(uint32_t typeId) { _typeId = typeId; } + void set_array_size(uint32_t arraySize) { _array_size = arraySize; } void set_entry_size(uint32_t entry_size) noexcept { _entry_size = entry_size; } - void set_state(BufferState * state) noexcept { _state.store(state, std::memory_order_release); } + void set_state(BufferState * state) { _state.store(state, std::memory_order_release); } private: - BufferAndMeta(void* buffer, BufferState * state, uint32_t typeId, uint32_t arraySize) noexcept + BufferAndMeta(void* buffer, BufferState * state, uint32_t typeId, uint32_t arraySize) : _buffer(buffer), _state(state), _typeId(typeId), diff --git a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp index eea49e80135..4eb4ff16864 100644 --- a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp +++ b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp @@ -10,19 +10,19 @@ namespace vespalib::datastore { bool -CompactionStrategy::should_compact_memory(const MemoryUsage& memory_usage) const noexcept +CompactionStrategy::should_compact_memory(const MemoryUsage& memory_usage) const { return should_compact_memory(memory_usage.usedBytes(), memory_usage.deadBytes()); } bool -CompactionStrategy::should_compact_address_space(const AddressSpace& address_space) const noexcept +CompactionStrategy::should_compact_address_space(const AddressSpace& address_space) const { return should_compact_address_space(address_space.used(), address_space.dead()); } CompactionSpec -CompactionStrategy::should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const noexcept +CompactionStrategy::should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const { return CompactionSpec(should_compact_memory(memory_usage), should_compact_address_space(address_space)); } @@ -36,7 +36,7 @@ std::ostream& operator<<(std::ostream& os, const CompactionStrategy& compaction_ } CompactionStrategy -CompactionStrategy::make_compact_all_active_buffers_strategy() noexcept +CompactionStrategy::make_compact_all_active_buffers_strategy() { return CompactionStrategy(0.0, 0.0, std::numeric_limits<uint32_t>::max(), 1.0); } diff --git a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h index c0c1857deae..f78e123e5de 100644 --- a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h +++ b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h @@ -25,15 +25,15 @@ public: static constexpr size_t DEAD_BYTES_SLACK = 0x10000u; static constexpr size_t DEAD_ADDRESS_SPACE_SLACK = 0x10000u; private: - float _maxDeadBytesRatio; // Max ratio of dead bytes before compaction - float _maxDeadAddressSpaceRatio; // Max ratio of dead address space before compaction - float _active_buffers_ratio; // Ratio of active buffers to compact for each reason (memory usage, address space usage) + double _maxDeadBytesRatio; // Max ratio of dead bytes before compaction + double _maxDeadAddressSpaceRatio; // Max ratio of dead address space before compaction uint32_t _max_buffers; // Max number of buffers to compact for each reason (memory usage, address space usage) - bool should_compact_memory(size_t used_bytes, size_t dead_bytes) const noexcept { + double _active_buffers_ratio; // Ratio of active buffers to compact for each reason (memory usage, address space usage) + bool should_compact_memory(size_t used_bytes, size_t dead_bytes) const { return ((dead_bytes >= DEAD_BYTES_SLACK) && (dead_bytes > used_bytes * getMaxDeadBytesRatio())); } - bool should_compact_address_space(size_t used_address_space, size_t dead_address_space) const noexcept { + bool should_compact_address_space(size_t used_address_space, size_t dead_address_space) const { return ((dead_address_space >= DEAD_ADDRESS_SPACE_SLACK) && (dead_address_space > used_address_space * getMaxDeadAddressSpaceRatio())); } @@ -41,37 +41,40 @@ public: CompactionStrategy() noexcept : _maxDeadBytesRatio(0.05), _maxDeadAddressSpaceRatio(0.2), - _active_buffers_ratio(0.1), - _max_buffers(1) - { } - CompactionStrategy(float maxDeadBytesRatio, float maxDeadAddressSpaceRatio) noexcept + _max_buffers(1), + _active_buffers_ratio(0.1) + { + } + CompactionStrategy(double maxDeadBytesRatio, double maxDeadAddressSpaceRatio) noexcept : _maxDeadBytesRatio(maxDeadBytesRatio), _maxDeadAddressSpaceRatio(maxDeadAddressSpaceRatio), - _active_buffers_ratio(0.1), - _max_buffers(1) - { } - CompactionStrategy(float maxDeadBytesRatio, float maxDeadAddressSpaceRatio, uint32_t max_buffers, float active_buffers_ratio) noexcept + _max_buffers(1), + _active_buffers_ratio(0.1) + { + } + CompactionStrategy(double maxDeadBytesRatio, double maxDeadAddressSpaceRatio, uint32_t max_buffers, double active_buffers_ratio) noexcept : _maxDeadBytesRatio(maxDeadBytesRatio), _maxDeadAddressSpaceRatio(maxDeadAddressSpaceRatio), - _active_buffers_ratio(active_buffers_ratio), - _max_buffers(max_buffers) - { } - float getMaxDeadBytesRatio() const noexcept { return _maxDeadBytesRatio; } - float getMaxDeadAddressSpaceRatio() const noexcept { return _maxDeadAddressSpaceRatio; } + _max_buffers(max_buffers), + _active_buffers_ratio(active_buffers_ratio) + { + } + double getMaxDeadBytesRatio() const { return _maxDeadBytesRatio; } + double getMaxDeadAddressSpaceRatio() const { return _maxDeadAddressSpaceRatio; } uint32_t get_max_buffers() const noexcept { return _max_buffers; } - float get_active_buffers_ratio() const noexcept { return _active_buffers_ratio; } - bool operator==(const CompactionStrategy & rhs) const noexcept { + double get_active_buffers_ratio() const noexcept { return _active_buffers_ratio; } + bool operator==(const CompactionStrategy & rhs) const { return (_maxDeadBytesRatio == rhs._maxDeadBytesRatio) && (_maxDeadAddressSpaceRatio == rhs._maxDeadAddressSpaceRatio) && (_max_buffers == rhs._max_buffers) && (_active_buffers_ratio == rhs._active_buffers_ratio); } - bool operator!=(const CompactionStrategy & rhs) const noexcept { return !(operator==(rhs)); } + bool operator!=(const CompactionStrategy & rhs) const { return !(operator==(rhs)); } - bool should_compact_memory(const MemoryUsage& memory_usage) const noexcept; - bool should_compact_address_space(const AddressSpace& address_space) const noexcept; - CompactionSpec should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const noexcept; - static CompactionStrategy make_compact_all_active_buffers_strategy() noexcept; + bool should_compact_memory(const MemoryUsage& memory_usage) const; + bool should_compact_address_space(const AddressSpace& address_space) const; + CompactionSpec should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const; + static CompactionStrategy make_compact_all_active_buffers_strategy(); }; std::ostream& operator<<(std::ostream& os, const CompactionStrategy& compaction_strategy); diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.h b/vespalib/src/vespa/vespalib/datastore/datastorebase.h index b91d6c7cfa6..dbcdbeb12b9 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.h +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.h @@ -72,9 +72,9 @@ public: /** * Get the primary buffer id for the given type id. */ - uint32_t primary_buffer_id(uint32_t typeId) const noexcept { return _primary_buffer_ids[typeId]; } + uint32_t primary_buffer_id(uint32_t typeId) const { return _primary_buffer_ids[typeId]; } BufferState &getBufferState(uint32_t buffer_id) noexcept; - const BufferAndMeta & getBufferMeta(uint32_t buffer_id) const noexcept { return _buffers[buffer_id]; } + const BufferAndMeta & getBufferMeta(uint32_t buffer_id) const { return _buffers[buffer_id]; } uint32_t getMaxNumBuffers() const noexcept { return _buffers.size(); } uint32_t get_bufferid_limit_acquire() const noexcept { return _bufferIdLimit.load(std::memory_order_acquire); } uint32_t get_bufferid_limit_relaxed() noexcept { return _bufferIdLimit.load(std::memory_order_relaxed); } diff --git a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt index 1d770163e06..bdbb03bcfee 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt @@ -1,8 +1,12 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(vespalib_vespalib_fuzzy OBJECT - SOURCES + SOURCES + explicit_levenshtein_dfa.cpp fuzzy_matcher.cpp + implicit_levenshtein_dfa.cpp + levenshtein_dfa.cpp levenshtein_distance.cpp - DEPENDS - ) + unicode_utils.cpp + DEPENDS +) diff --git a/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h b/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h new file mode 100644 index 00000000000..c445c60cc01 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h @@ -0,0 +1,70 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <concepts> +#include <cstdint> + +namespace vespalib::fuzzy { + +// Concept that all DFA matcher implementations must satisfy +template <typename T> +concept DfaMatcher = requires(T a) { + typename T::StateType; + typename T::StateParamType; + typename T::EdgeType; + + // Initial (starting) state of the DFA + { a.start() } -> std::same_as<typename T::StateType>; + + // Whether a given state constitutes a string match within the maximum number of edits + { a.is_match(typename T::StateType{}) } -> std::same_as<bool>; + + // Whether a given state _may_ result in a match, either in the given state or in the + // future if the remaining string input is within the max edit distance + { a.can_match(typename T::StateType{}) } -> std::same_as<bool>; + + // Whether the given state is a valid state. Used for invariant checking. + { a.valid_state(typename T::StateType{}) } -> std::same_as<bool>; + + // Iff the given state represents a terminal matching state, returns the number of + // edits required to reach the state. Otherwise, returns max edits + 1. + { a.match_edit_distance(typename T::StateType{}) } -> std::same_as<uint8_t>; + + // Returns the state that is the result of matching the single logical Levenshtein + // matrix row represented by the given state with the input u32 character value. + { a.match_input(typename T::StateType{}, uint32_t{}) } -> std::same_as<typename T::StateType>; + + // Returns the state that is the result of matching the single logical Levenshtein + // matrix row represented by the given state with a sentinel character that cannot + // match any character in the target string (i.e. is always a mismatch). + { a.match_wildcard(typename T::StateType{}) } -> std::same_as<typename T::StateType>; + + // Whether there is exists an out edge from the given state that can accept a + // _higher_ UTF-32 code point value (character) than the input u32 value. Such an + // edge _may_ be a wildcard edge, which accepts any character. + { a.has_higher_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<bool>; + + // Whether there exists an out edge from the given state whose u32 character value + // _exactly_ matches the input u32 value. + { a.has_exact_explicit_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<bool>; + + // Returns the out edge `e` from the given state that satisfies _both_: + // 1. higher than the given u32 value + // 2. no other out edges are lower than `e` + // Only called in a context where the caller already knows that such an edge must exist. + { a.lowest_higher_explicit_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<typename T::EdgeType>; + + // Returns the out edge from the given state that has the lowest character value + { a.smallest_explicit_out_edge(typename T::StateType{}) } -> std::same_as<typename T::EdgeType>; + + // Whether the given edge is a valid edge. Used for invariant checking. + { a.valid_edge(typename T::EdgeType{}) } -> std::same_as<bool>; + + // For a given edge, returns the UTF-32 code point value the edge represents + { a.edge_to_u32char(typename T::EdgeType{}) } -> std::same_as<uint32_t>; + + // Returns the state that is the result of following the given edge from the given state. + { a.edge_to_state(typename T::StateType{}, typename T::EdgeType{}) } -> std::same_as<typename T::StateType>; +}; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h b/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h new file mode 100644 index 00000000000..7e7881c5a14 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h @@ -0,0 +1,299 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "sparse_state.h" +#include <span> + +namespace vespalib::fuzzy { + +template <typename Traits> +struct DfaSteppingBase { + using StateType = Traits::StateType; + using TransitionsType = Traits::TransitionsType; + + std::span<const uint32_t> _u32_str; // TODO std::u32string_view + + DfaSteppingBase(std::span<const uint32_t> str) noexcept + : _u32_str(str) + { + } + + [[nodiscard]] static constexpr uint8_t max_edits() noexcept { + return Traits::max_edits(); + } + + /** + * Returns the initial state of the DFA. This represents the first row in the + * Levenshtein matrix. + */ + [[nodiscard]] StateType start() const { + StateType ret; + const auto j = std::min(static_cast<uint32_t>(max_edits()), + static_cast<uint32_t>(_u32_str.size())); // e.g. the empty string as target + for (uint32_t i = 0; i <= j; ++i) { + ret.append(i, i); + } + return ret; + } + + /** + * DFA stepping function that takes an input (sparse) state and a 32-bit character value + * (does not have to be valid UTF-32, but usually is) and generates a resulting state + * that represents applying the Levenshtein algorithm on a particular matrix row using + * the provided source string character. + * + * The returned state only includes elements where the edit distance (cost) is within + * the maximum number of edits. All other elements are implicitly beyond the max + * edit distance. It doesn't matter _how_ far beyond they are, since we have a fixed + * maximum to consider. + * + * Stepping a non-matching state S (can_match(S) == false) results in another non- + * matching state. + * + * As an example, this is a visualization of stepping through all source characters of + * the string "fxod" when matching the target string "food" with max edits k=1. + * Note: the actual internal representation is logical <column#, cost> tuples, but + * rendering as a matrix makes things easier to understand. Elements _not_ part of the + * state are rendered as '-'. + * + * f o o d + * start(): [0 1 - - -] + * 'f': [1 0 1 - -] + * 'x': [- 1 1 - -] + * 'o': [- - 1 1 -] + * 'd': [- - - - 1] + * + * In this case, the resulting edit distance is 1, with one substitution 'x' -> 'o'. + * + * If we pull out our trusty pen & paper and do the full matrix calculations, we see + * that the above is equivalent to the full matrix with all costs > k pruned away: + * + * f o o d + * [0 1 2 3 4] + * f [1 0 1 2 3] + * x [2 1 1 2 3] + * o [3 2 1 1 2] + * d [4 3 2 2 1] + * + * Since we're working on sparse states, stepping requires a bit of manual edge case + * handling when compared to a dense representation. + * + * We first have to handle the case where our state includes the 0th matrix column. + * In an explicit Levenshtein matrix of target string length n, source string length m, + * the first column is always the values [0, m], increasing with 1 per row (the first + * _row_ is handled by start()). + * + * To mirror this, if our sparse state includes column 0 we have to increment it by 1, + * unless doing so would bring the cost beyond our max number of edits, in which case + * we don't bother including the column in the new state at all. These correspond to + * the start() -> 'f' -> 'x' transitions in the example above. + * + * What remains is then to do the actual Levenshtein insert/delete/substitute formula + * for matching positions in the matrix. Let d represent the logical (full) Levenshtein + * distance matrix and cell d[i, j] be the minimum number of edits between source string + * character at i+1 and target string character at j+1: + * + * Insertion cost: d[i, j-1] + 1 + * Deletion cost: d[i-1, j] + 1 + * Substitution cost: d[i-1, j-1] + (s[i-1] == t[j-1] ? 1 :0) + * + * d[i, j] = min(Insertion cost, Deletion cost, Substitution cost) + * + * We have to turn this slightly on the head, as instead of going through a matrix row + * and "pulling" values from the previous row, we have to go through a state representing + * the previous row and "push" new values instead (iff these values are within max edits). + * This also means we compute costs for indexes offset by 1 from the source state index + * (can be visualized as the element one down diagonally to the right). + * + * Insertion considers the current row only, i.e. the state being generated. We always + * work left to right in column order, so we can check if the last element (if any) + * in our _new_ sparse state is equal to the index of our source state element. If not, + * we know that it was beyond max edits. Max edits + 1 is inherently beyond max edits + * and need not be included. + * + * Deletion considers the cell directly above our own, which is part of the input state + * if it exists. Since we're computing the costs of cells at index + 1, we know that the + * only way for this cell to be present in the state is if the _next_ element of our + * input state exists and has an index equal to index + 1. If so, the deletion cost is + * the cost recorded for this element + 1. + * + * Substitution considers the cell diagonally up to the left. This very conveniently + * happens to be the input state cell we're currently working from, so it's therefore + * always present. + * + * Example stepping with c='x', max edits k=1: + * + * ====== Initially ====== + * + * f o o d + * state_in: [1 0 1 - -] (0:1, 1:0, 2:1) + * out: [] () + * + * We have a 0th column in state_in, but incrementing it results in 2 > k, so not + * appended to out. + * + * ====== State (0:1), computing for index 1 ====== + * + * Insertion: out state is empty (no cell to our left), so implicit insertion cost + * is > k + * Deletion: state_in[1] is (1:0), which means it represents the cell just above + * index 1. Deletion cost is therefore 0+1 = 1 + * Substitution: (t[0] = 'f') != (c = 'x'), so substitution cost is 1+1 = 2 + * + * Min cost is 1, which is <= k. Appending to output. + * + * out: [- 1] (1:1) + * + * ====== State (1:0), computing for index 2 ====== + * + * Insertion: last element in out has index 1 (cell to our immediate left) with cost + * 1, so insertion cost is 1+1 = 2 + * Deletion: state_in[2] is (2:1), which means it represents the cell just above + * index 2. Deletion cost is therefore 1+1 = 2 + * Substitution: (t[1] = 'o') != (c = 'x'), so substitution cost is 0+1 = 1 + * + * Min cost is 1, which is <= k. Appending to output. + * + * out: [- 1 1] (1:1, 2:1) + * + * ====== State (2:1), computing for index 3 ====== + * + * Insertion: last element in out has index 2 (cell to our immediate left) with cost + * 1, so insertion cost is 1+1 = 2 + * Deletion: state_in[3] does not exist, so implicit deletion cost is > k + * Substitution: (t[2] = 'o') != (c = 'x'), so substitution cost is 1+1 = 2 + * + * Min cost is 2, which is > k. Not appending to output. + * + * Resulting output state (right-padded for clarity): + * + * [- 1 1 - -] (1:1, 2:1) + * + */ + [[nodiscard]] StateType step(const StateType& state_in, uint32_t c) const { + if (state_in.empty()) { + return state_in; // A non-matching state can only step to another equally non-matching state + } + StateType new_state; + if ((state_in.index(0) == 0) && (state_in.cost(0) < max_edits())) { + new_state.append(0, state_in.cost(0) + 1); + } + for (uint32_t i = 0; i < state_in.size(); ++i) { + const auto idx = state_in.index(i); + if (idx == _u32_str.size()) [[unlikely]] { + break; // Can't process beyond matrix width + } + const uint8_t sub_cost = (_u32_str[idx] == c) ? 0 : 1; + // For our Levenshtein insert/delete/sub ops, we know that if a particular index is _not_ + // in the sparse state, its implicit distance is beyond the max edits, and need not be + // considered. + auto dist = state_in.cost(i) + sub_cost; // (Substitution) + if (!new_state.empty() && (new_state.last_index() == idx)) { // (Insertion) anything to our immediate left? + dist = std::min(dist, new_state.last_cost() + 1); + } + if ((i < state_in.size() - 1) && (state_in.index(i + 1) == idx + 1)) { // (Deletion) anything immediately above? + dist = std::min(dist, state_in.cost(i + 1) + 1); + } + if (dist <= max_edits()) { + new_state.append(idx + 1, dist); + } + } + return new_state; + } + + /** + * Simplified version of step() which does not assemble a new state, but only checks + * whether _any_ mismatching character can be substituted in and still result in a + * potentially matching state. This is the case if the resulting state would contain + * _at least one_ entry (recalling that we only retain entries that are within the + * max number of edits). + * + * Consider using this directly instead of `can_match(step(state, WILDCARD))`, + * which has the exact same semantics, but requires computing the full (sparse) + * state before checking if it has any element at all. can_wildcard_step() just + * jumps straight to the last part. + */ + [[nodiscard]] bool can_wildcard_step(const StateType& state_in) const noexcept { + if (state_in.empty()) { + return false; // by definition + } + if ((state_in.index(0) == 0) && (state_in.cost(0) < max_edits())) { + return true; + } + for (uint32_t i = 0; i < state_in.size(); ++i) { + const auto idx = state_in.index(i); + if (idx == _u32_str.size()) [[unlikely]] { + break; + } + const uint8_t sub_cost = 1; // by definition + auto dist = state_in.cost(i) + sub_cost; + // Insertion only looks at the entries already computed in the current row + // and always increases the cost by 1. Since we always bail out immediately if + // there would have been at least one entry within max edits, we transitively + // know that since we have not bailed out yet there is no way we can get here + // and have insertion actually yield a match. So skip computing it entirely. + if ((i < state_in.size() - 1) && (state_in.index(i + 1) == idx + 1)) { + dist = std::min(dist, state_in.cost(i + 1) + 1); + } + if (dist <= max_edits()) { + return true; + } + } + return false; + } + + /** + * Checks if the given state represents a terminal state within the max number of edits + */ + [[nodiscard]] bool is_match(const StateType& state) const noexcept { + // If the last index is equal to the string's length, it means we were able to match + // the entire string and still be within the max edit distance. + return (!state.empty() && state.last_index() == static_cast<uint32_t>(_u32_str.size())); + } + + /** + * Iff the input state represents a terminal matching state, returns the number of + * edits required to reach the state. Otherwise, returns max edits + 1. + */ + [[nodiscard]] uint8_t match_edit_distance(const StateType& state) const noexcept { + if (!is_match(state)) { + return max_edits() + 1; + } + return state.last_cost(); + } + + /** + * Returns whether the given state _may_ end up matching the target string, + * depending on the remaining source string characters. + * + * Note: is_match(s) => can_match(s) is true, but + * can_match(s) => is_match(s) is false + */ + [[nodiscard]] bool can_match(const StateType& state) const noexcept { + // The presence of any entries at all indicates that we may still potentially match + // the target string if the remaining input is within the maximum number of edits. + return !state.empty(); + } + + /** + * All valid character transitions from this state are those that are reachable + * within the max edit distance. + */ + TransitionsType transitions(const StateType& state) const { + TransitionsType t; + for (size_t i = 0; i < state.size(); ++i) { + const auto idx = state.index(i); + if (idx < _u32_str.size()) [[likely]] { + t.add_char(_u32_str[idx]); + } + } + // We must ensure transitions are in increasing character order, so that the + // lowest possible higher char than any candidate char can be found with a + // simple "first-fit" linear scan. + t.sort(); + return t; + } + +}; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg b/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg new file mode 100644 index 00000000000..0974e1d161f --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg @@ -0,0 +1,286 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><!-- Generated by graphviz version 2.40.1 (20161225.0304) + --><!-- Title: levenshtein_dfa Pages: 1 --><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="333pt" height="488pt" viewBox="0.00 0.00 333.00 488.00"> +<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 484)"> +<title>levenshtein_dfa</title> +<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-484 329,-484 329,4 -4,4"/> +<!-- 0 --> +<g id="node1" class="node"> +<title>0</title> +<ellipse fill="none" stroke="#000000" cx="211" cy="-462" rx="18" ry="18"/> +<text text-anchor="middle" x="211" y="-457.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">0</text> +</g> +<!-- 1 --> +<g id="node2" class="node"> +<title>1</title> +<ellipse fill="none" stroke="#000000" cx="157" cy="-373.2" rx="18" ry="18"/> +<text text-anchor="middle" x="157" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">1</text> +</g> +<!-- 0->1 --> +<g id="edge1" class="edge"> +<title>0->1</title> +<path fill="none" stroke="#000000" d="M201.5939,-446.5322C193.3592,-432.9906 181.2571,-413.0894 171.7374,-397.4348"/> +<polygon fill="#000000" stroke="#000000" points="174.6658,-395.5142 166.4795,-388.7885 168.6849,-399.1513 174.6658,-395.5142"/> +<text text-anchor="middle" x="189.9453" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text> +</g> +<!-- 2 --> +<g id="node3" class="node"> +<title>2</title> +<ellipse fill="none" stroke="#000000" cx="211" cy="-373.2" rx="18" ry="18"/> +<text text-anchor="middle" x="211" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">2</text> +</g> +<!-- 0->2 --> +<g id="edge2" class="edge"> +<title>0->2</title> +<path fill="none" stroke="#000000" d="M211,-443.6006C211,-431.4949 211,-415.4076 211,-401.6674"/> +<polygon fill="#000000" stroke="#000000" points="214.5001,-401.272 211,-391.272 207.5001,-401.2721 214.5001,-401.272"/> +<text text-anchor="middle" x="214.8913" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 3 --> +<g id="node4" class="node"> +<title>3</title> +<ellipse fill="none" stroke="#000000" cx="265" cy="-373.2" rx="18" ry="18"/> +<text text-anchor="middle" x="265" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">3</text> +</g> +<!-- 0->3 --> +<g id="edge3" class="edge"> +<title>0->3</title> +<path fill="none" stroke="#000000" d="M220.4061,-446.5322C228.6408,-432.9906 240.7429,-413.0894 250.2626,-397.4348"/> +<polygon fill="#000000" stroke="#000000" points="253.3151,-399.1513 255.5205,-388.7885 247.3342,-395.5142 253.3151,-399.1513"/> +<text text-anchor="middle" x="244.7223" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text> +</g> +<!-- 4 --> +<g id="node5" class="node"> +<title>4</title> +<ellipse fill="none" stroke="#000000" cx="157" cy="-284.4" rx="18" ry="18"/> +<text text-anchor="middle" x="157" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">4</text> +</g> +<!-- 1->4 --> +<g id="edge4" class="edge"> +<title>1->4</title> +<path fill="none" stroke="#000000" d="M157,-354.8006C157,-342.6949 157,-326.6076 157,-312.8674"/> +<polygon fill="#000000" stroke="#000000" points="160.5001,-312.472 157,-302.472 153.5001,-312.4721 160.5001,-312.472"/> +<text text-anchor="middle" x="158.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text> +</g> +<!-- 1->4 --> +<g id="edge6" class="edge"> +<title>1->4</title> +<path fill="none" stroke="#000000" d="M156.1231,-354.8902C155.8898,-349.221 155.6708,-342.9535 155.5554,-337.2 155.4057,-329.7348 155.4057,-327.8652 155.5554,-320.4 155.6041,-317.9728 155.6712,-315.454 155.7501,-312.9273"/> +<polygon fill="#000000" stroke="#000000" points="159.2558,-312.8309 156.1231,-302.7098 152.2605,-312.5755 159.2558,-312.8309"/> +<text text-anchor="middle" x="158.7223" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text> +</g> +<!-- 5 --> +<g id="node6" class="node"> +<title>5</title> +<ellipse fill="none" stroke="#000000" cx="103" cy="-284.4" rx="18" ry="18"/> +<text text-anchor="middle" x="103" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">5</text> +</g> +<!-- 1->5 --> +<g id="edge5" class="edge"> +<title>1->5</title> +<path fill="none" stroke="#000000" d="M147.5939,-357.7322C139.3592,-344.1906 127.2571,-324.2894 117.7374,-308.6348"/> +<polygon fill="#000000" stroke="#000000" points="120.6658,-306.7142 112.4795,-299.9885 114.6849,-310.3513 120.6658,-306.7142"/> +<text text-anchor="middle" x="138.8913" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 6 --> +<g id="node7" class="node"> +<title>6</title> +<ellipse fill="none" stroke="#000000" cx="307" cy="-284.4" rx="18" ry="18"/> +<text text-anchor="middle" x="307" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">6</text> +</g> +<!-- 2->6 --> +<g id="edge7" class="edge"> +<title>2->6</title> +<path fill="none" stroke="#000000" d="M224.3484,-360.8527C240.3968,-346.0079 267.511,-320.9274 286.2858,-303.5607"/> +<polygon fill="#000000" stroke="#000000" points="288.8006,-306.0022 293.765,-296.6424 284.0473,-300.8635 288.8006,-306.0022"/> +<text text-anchor="middle" x="268.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text> +</g> +<!-- 7 --> +<g id="node8" class="node"> +<title>7</title> +<ellipse fill="none" stroke="#000000" cx="190" cy="-195.6" rx="18" ry="18"/> +<text text-anchor="middle" x="190" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">7</text> +</g> +<!-- 2->7 --> +<g id="edge8" class="edge"> +<title>2->7</title> +<path fill="none" stroke="#000000" d="M208.8708,-355.1934C205.2075,-324.212 197.6865,-260.606 193.3267,-223.7346"/> +<polygon fill="#000000" stroke="#000000" points="196.7849,-223.1741 192.1348,-213.6543 189.8333,-223.9961 196.7849,-223.1741"/> +<text text-anchor="middle" x="205.8913" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 3->6 --> +<g id="edge9" class="edge"> +<title>3->6</title> +<path fill="none" stroke="#000000" d="M272.7034,-356.9127C278.881,-343.8515 287.6665,-325.2765 294.7989,-310.1966"/> +<polygon fill="#000000" stroke="#000000" points="298.0914,-311.4211 299.2032,-300.8848 291.7635,-308.4281 298.0914,-311.4211"/> +<text text-anchor="middle" x="290.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text> +</g> +<!-- 8 --> +<g id="node9" class="node"> +<title>8</title> +<ellipse fill="none" stroke="#000000" cx="263" cy="-195.6" rx="18" ry="18"/> +<text text-anchor="middle" x="263" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">8</text> +</g> +<!-- 3->8 --> +<g id="edge10" class="edge"> +<title>3->8</title> +<path fill="none" stroke="#000000" d="M264.7972,-355.1934C264.4483,-324.212 263.732,-260.606 263.3168,-223.7346"/> +<polygon fill="#000000" stroke="#000000" points="266.8158,-223.6142 263.2033,-213.6543 259.8162,-223.6931 266.8158,-223.6142"/> +<text text-anchor="middle" x="267.8913" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 4->7 --> +<g id="edge11" class="edge"> +<title>4->7</title> +<path fill="none" stroke="#000000" d="M163.3627,-267.2785C168.1106,-254.5023 174.6869,-236.8062 180.1171,-222.194"/> +<polygon fill="#000000" stroke="#000000" points="183.4541,-223.2619 183.6568,-212.669 176.8925,-220.8234 183.4541,-223.2619"/> +<text text-anchor="middle" x="180.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 5->7 --> +<g id="edge14" class="edge"> +<title>5->7</title> +<path fill="none" stroke="#000000" d="M115.8371,-271.2973C130.1833,-256.6543 153.5826,-232.7709 170.2601,-215.7483"/> +<polygon fill="#000000" stroke="#000000" points="172.8948,-218.0603 177.393,-208.4678 167.8946,-213.1615 172.8948,-218.0603"/> +<text text-anchor="middle" x="157.7223" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text> +</g> +<!-- 9 --> +<g id="node10" class="node"> +<title>9</title> +<ellipse fill="#d3d3d3" stroke="#000000" cx="60" cy="-195.6" rx="18" ry="18"/> +<text text-anchor="middle" x="60" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">9(1)</text> +</g> +<!-- 5->9 --> +<g id="edge12" class="edge"> +<title>5->9</title> +<path fill="none" stroke="#000000" d="M95.1131,-268.1127C88.7885,-255.0515 79.7938,-236.4765 72.4916,-221.3966"/> +<polygon fill="#000000" stroke="#000000" points="75.4909,-219.5597 67.9825,-212.0848 69.1907,-222.6105 75.4909,-219.5597"/> +<text text-anchor="middle" x="89.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 10 --> +<g id="node11" class="node"> +<title>10</title> +<ellipse fill="#d3d3d3" stroke="#000000" cx="126" cy="-195.6" rx="18" ry="18"/> +<text text-anchor="middle" x="126" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">10(1)</text> +</g> +<!-- 5->10 --> +<g id="edge13" class="edge"> +<title>5->10</title> +<path fill="none" stroke="#000000" d="M107.5441,-266.856C110.7887,-254.3287 115.2168,-237.2326 118.9222,-222.9264"/> +<polygon fill="#000000" stroke="#000000" points="122.3462,-223.6657 121.4654,-213.1076 115.5698,-221.9105 122.3462,-223.6657"/> +<text text-anchor="middle" x="120.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text> +</g> +<!-- 6->8 --> +<g id="edge15" class="edge"> +<title>6->8</title> +<path fill="none" stroke="#000000" d="M298.9297,-268.1127C292.4172,-254.9694 283.1382,-236.2426 275.6413,-221.1125"/> +<polygon fill="#000000" stroke="#000000" points="278.5951,-219.1904 271.0191,-211.784 272.3229,-222.2984 278.5951,-219.1904"/> +<text text-anchor="middle" x="291.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 11 --> +<g id="node12" class="node"> +<title>11</title> +<ellipse fill="none" stroke="#000000" cx="170" cy="-106.8" rx="18" ry="18"/> +<text text-anchor="middle" x="170" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">11</text> +</g> +<!-- 7->11 --> +<g id="edge16" class="edge"> +<title>7->11</title> +<path fill="none" stroke="#000000" d="M185.9527,-177.63C183.1694,-165.2722 179.4189,-148.6197 176.2537,-134.5662"/> +<polygon fill="#000000" stroke="#000000" points="179.5848,-133.4268 173.973,-124.4402 172.7558,-134.9649 179.5848,-133.4268"/> +<text text-anchor="middle" x="184.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 12 --> +<g id="node13" class="node"> +<title>12</title> +<ellipse fill="#d3d3d3" stroke="#000000" cx="113" cy="-18" rx="18" ry="18"/> +<text text-anchor="middle" x="113" y="-13.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">12(1)</text> +</g> +<!-- 7->12 --> +<g id="edge17" class="edge"> +<title>7->12</title> +<path fill="none" stroke="#000000" d="M195.7305,-178.1814C201.8942,-156.377 209.2729,-118.3097 197,-88.8 185.9488,-62.2279 158.7426,-42.3831 138.2647,-30.5667"/> +<polygon fill="#000000" stroke="#000000" points="139.8619,-27.4509 129.4116,-25.7072 136.4936,-33.5872 139.8619,-27.4509"/> +<text text-anchor="middle" x="206.8913" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text> +</g> +<!-- 8->11 --> +<g id="edge18" class="edge"> +<title>8->11</title> +<path fill="none" stroke="#000000" d="M249.6754,-182.8771C234.3441,-168.2382 208.9734,-144.0133 190.9836,-126.8359"/> +<polygon fill="#000000" stroke="#000000" points="192.9495,-123.8738 183.2999,-119.4993 188.1154,-128.9366 192.9495,-123.8738"/> +<text text-anchor="middle" x="227.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 13 --> +<g id="node14" class="node"> +<title>13</title> +<ellipse fill="#d3d3d3" stroke="#000000" cx="18" cy="-106.8" rx="18" ry="18"/> +<text text-anchor="middle" x="18" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">13(1)</text> +</g> +<!-- 9->13 --> +<g id="edge19" class="edge"> +<title>9->13</title> +<path fill="none" stroke="#000000" d="M44.2356,-186.804C34.7839,-180.5699 23.5878,-171.2526 18.2174,-159.6 14.7103,-151.9903 13.6799,-143.0834 13.8048,-134.7757"/> +<polygon fill="#000000" stroke="#000000" points="17.3023,-134.9283 14.4812,-124.716 10.3181,-134.4586 17.3023,-134.9283"/> +<text text-anchor="middle" x="22.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 9->13 --> +<g id="edge21" class="edge"> +<title>9->13</title> +<path fill="none" stroke="#000000" d="M52.2966,-179.3127C46.119,-166.2515 37.3335,-147.6765 30.2011,-132.5966"/> +<polygon fill="#000000" stroke="#000000" points="33.2365,-130.8281 25.7968,-123.2848 26.9086,-133.8211 33.2365,-130.8281"/> +<text text-anchor="middle" x="45.7223" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text> +</g> +<!-- 14 --> +<g id="node15" class="node"> +<title>14</title> +<ellipse fill="#d3d3d3" stroke="#000000" cx="72" cy="-106.8" rx="18" ry="18"/> +<text text-anchor="middle" x="72" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">14(0)</text> +</g> +<!-- 9->14 --> +<g id="edge20" class="edge"> +<title>9->14</title> +<path fill="none" stroke="#000000" d="M62.4284,-177.63C64.0874,-165.353 66.3193,-148.8372 68.2105,-134.8421"/> +<polygon fill="#000000" stroke="#000000" points="71.7044,-135.1219 69.5752,-124.7433 64.7675,-134.1845 71.7044,-135.1219"/> +<text text-anchor="middle" x="71.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text> +</g> +<!-- 10->11 --> +<g id="edge22" class="edge"> +<title>10->11</title> +<path fill="none" stroke="#000000" d="M134.0703,-179.3127C140.5828,-166.1694 149.8618,-147.4426 157.3587,-132.3125"/> +<polygon fill="#000000" stroke="#000000" points="160.6771,-133.4984 161.9809,-122.984 154.4049,-130.3904 160.6771,-133.4984"/> +<text text-anchor="middle" x="155.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text> +</g> +<!-- 10->12 --> +<g id="edge23" class="edge"> +<title>10->12</title> +<path fill="none" stroke="#000000" d="M124.6819,-177.5934C122.4142,-146.612 117.7583,-83.006 115.0594,-46.1346"/> +<polygon fill="#000000" stroke="#000000" points="118.5423,-45.772 114.3215,-36.0543 111.561,-46.2831 118.5423,-45.772"/> +<text text-anchor="middle" x="124.8913" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text> +</g> +<!-- 11->12 --> +<g id="edge24" class="edge"> +<title>11->12</title> +<path fill="none" stroke="#000000" d="M161.5849,-90.7772C155.8346,-80.1448 147.859,-65.9928 140,-54 137.0775,-49.5402 133.7942,-44.8997 130.5502,-40.4917"/> +<polygon fill="#000000" stroke="#000000" points="133.2767,-38.294 124.4657,-32.4105 127.6846,-42.5045 133.2767,-38.294"/> +<text text-anchor="middle" x="153.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text> +</g> +<!-- 13->12 --> +<g id="edge25" class="edge"> +<title>13->12</title> +<path fill="none" stroke="#000000" d="M29.0516,-92.0851C37.7254,-81.0086 50.4346,-65.7741 63.2174,-54 71.136,-46.7062 80.5495,-39.5675 89.0481,-33.5954"/> +<polygon fill="#000000" stroke="#000000" points="91.192,-36.3695 97.4722,-27.8367 87.2416,-30.5907 91.192,-36.3695"/> +<text text-anchor="middle" x="67.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text> +</g> +<!-- 14->12 --> +<g id="edge26" class="edge"> +<title>14->12</title> +<path fill="none" stroke="#000000" d="M79.7118,-90.0974C85.7248,-77.0741 94.1818,-58.7574 101.0669,-43.8455"/> +<polygon fill="#000000" stroke="#000000" points="104.3085,-45.1739 105.3228,-34.6277 97.9532,-42.2395 104.3085,-45.1739"/> +<text text-anchor="middle" x="100.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text> +</g> +<!-- 14->12 --> +<g id="edge27" class="edge"> +<title>14->12</title> +<path fill="none" stroke="#000000" d="M71.9481,-88.4225C72.5604,-77.8966 74.4493,-64.6966 79.5554,-54 82.4978,-47.8362 86.8709,-42.0067 91.4921,-36.9052"/> +<polygon fill="#000000" stroke="#000000" points="94.2341,-39.1107 98.7881,-29.5445 89.2625,-34.1828 94.2341,-39.1107"/> +<text text-anchor="middle" x="82.7223" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text> +</g> +</g> +</svg>
\ No newline at end of file diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp new file mode 100644 index 00000000000..f78de5cc082 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp @@ -0,0 +1,11 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "explicit_levenshtein_dfa.hpp" + +namespace vespalib::fuzzy { + +template class ExplicitLevenshteinDfaImpl<1>; +template class ExplicitLevenshteinDfaImpl<2>; +template class ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<1>>; +template class ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<2>>; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h new file mode 100644 index 00000000000..49baad21530 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h @@ -0,0 +1,147 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "dfa_stepping_base.h" +#include "levenshtein_dfa.h" +#include "sparse_state.h" +#include "unicode_utils.h" +#include <vector> + +namespace vespalib::fuzzy { + +// A doomed state is one that cannot possibly match the target string +constexpr const uint32_t DOOMED = UINT32_MAX; + +template <uint8_t MaxEdits> +struct DfaNode { + static constexpr uint8_t MaxCharOutEdges = diag(MaxEdits); // Not counting wildcard edge + + struct Edge { + uint32_t u32ch; + uint32_t node; + }; + + std::array<Edge, MaxCharOutEdges> match_out_edges_buf; + uint32_t wildcard_edge_to = DOOMED; + uint8_t num_match_out_edges = 0; + uint8_t edits = UINT8_MAX; + + [[nodiscard]] bool has_wildcard_edge() const noexcept { + return wildcard_edge_to != DOOMED; + } + + [[nodiscard]] uint32_t wildcard_edge_to_or_doomed() const noexcept { + return wildcard_edge_to; + } + + [[nodiscard]] std::span<const Edge> match_out_edges() const noexcept { + return std::span(match_out_edges_buf.begin(), num_match_out_edges); + } + + [[nodiscard]] uint32_t match_or_doomed(uint32_t ch) const noexcept { + // Always prefer the exact matching edges + for (const auto& e : match_out_edges()) { + if (e.u32ch == ch) { + return e.node; + } + } + // Fallback to wildcard edge if possible (could be doomed) + return wildcard_edge_to; + } + + [[nodiscard]] bool has_exact_match(uint32_t ch) const noexcept { + for (const auto& e : match_out_edges()) { + if (e.u32ch == ch) { + return true; + } + } + return false; + } + + [[nodiscard]] size_t has_higher_out_edge(uint32_t ch) const noexcept { + if (has_wildcard_edge()) { + return true; // implicitly possible to substitute a higher out edge char + } + return lowest_higher_explicit_out_edge(ch) != nullptr; + } + + [[nodiscard]] const Edge* lowest_higher_explicit_out_edge(uint32_t ch) const noexcept { + // Important: these _must_ be sorted in increasing code point order + for (const auto& e : match_out_edges()) { + if (e.u32ch > ch) { + return &e; + } + } + return nullptr; + } + + void add_match_out_edge(uint32_t out_char, uint32_t out_node) noexcept { + assert(num_match_out_edges < MaxCharOutEdges); + match_out_edges_buf[num_match_out_edges] = Edge(out_char, out_node); + ++num_match_out_edges; + } + + void set_wildcard_out_edge(uint32_t out_node) noexcept { + assert(wildcard_edge_to == DOOMED); + wildcard_edge_to = out_node; + } +}; + +template <uint8_t MaxEdits> +class ExplicitLevenshteinDfaImpl final : public LevenshteinDfa::Impl { +public: + static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2); + + using DfaNodeType = DfaNode<MaxEdits>; + using MatchResult = LevenshteinDfa::MatchResult; +private: + std::vector<DfaNodeType> _nodes; +public: + ExplicitLevenshteinDfaImpl() noexcept = default; + ~ExplicitLevenshteinDfaImpl() override = default; + + static constexpr uint8_t max_edits() noexcept { return MaxEdits; } + + void ensure_node_array_large_enough_for_index(uint32_t node_index) { + if (node_index >= _nodes.size()) { + _nodes.resize(node_index + 1); + } + } + + void set_node_edit_distance(uint32_t node_index, uint8_t edits) { + _nodes[node_index].edits = edits; + } + + void add_outgoing_edge(uint32_t from_node_idx, uint32_t to_node_idx, uint32_t out_char) { + _nodes[from_node_idx].add_match_out_edge(out_char, to_node_idx); + } + + void set_wildcard_edge(uint32_t from_node_idx, uint32_t to_node_idx) { + _nodes[from_node_idx].set_wildcard_out_edge(to_node_idx); + } + + [[nodiscard]] MatchResult match(std::string_view u8str, std::string* successor_out) const override; + + [[nodiscard]] size_t memory_usage() const noexcept override { + return sizeof(DfaNodeType) * _nodes.size(); + } + + void dump_as_graphviz(std::ostream& os) const override; +}; + +template <typename Traits> +class ExplicitLevenshteinDfaBuilder { + std::vector<uint32_t> _u32_str_buf; // TODO std::u32string +public: + explicit ExplicitLevenshteinDfaBuilder(std::string_view str) + : ExplicitLevenshteinDfaBuilder(utf8_string_to_utf32(str)) + {} + + explicit ExplicitLevenshteinDfaBuilder(std::vector<uint32_t> str) noexcept + : _u32_str_buf(std::move(str)) + {} + + [[nodiscard]] LevenshteinDfa build_dfa() const; +}; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp new file mode 100644 index 00000000000..0960219aff3 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp @@ -0,0 +1,228 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "explicit_levenshtein_dfa.h" +#include "match_algorithm.hpp" +#include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <iostream> +#include <span> +#include <queue> + +namespace vespalib::fuzzy { + +// DfaMatcher adapter for explicit DFA implementation +template <uint8_t MaxEdits> +struct ExplicitDfaMatcher { + using DfaNodeType = typename ExplicitLevenshteinDfaImpl<MaxEdits>::DfaNodeType; + using StateType = const DfaNodeType*; + using EdgeType = const DfaNodeType::Edge*; + + using StateParamType = const DfaNodeType*; + + const std::span<const DfaNodeType> _nodes; + + explicit ExplicitDfaMatcher(const std::span<const DfaNodeType> nodes) noexcept + : _nodes(nodes) + {} + + static constexpr uint8_t max_edits() noexcept { return MaxEdits; } + + StateType start() const noexcept { + return &_nodes[0]; + } + bool has_higher_out_edge(StateType node, uint32_t mch) const noexcept { + return node->has_higher_out_edge(mch); + } + StateType match_input(StateType node, uint32_t mch) const noexcept { + auto maybe_node_idx = node->match_or_doomed(mch); + return ((maybe_node_idx != DOOMED) ? &_nodes[maybe_node_idx] : nullptr); + } + bool is_match(StateType node) const noexcept { + return node->edits <= max_edits(); + } + bool can_match(StateType node) const noexcept { + return node != nullptr; + } + uint8_t match_edit_distance(StateType node) const noexcept { + return node->edits; + } + bool valid_state(StateType node) const noexcept { + return node != nullptr; + } + StateType match_wildcard(StateType node) const noexcept { + auto edge_to = node->wildcard_edge_to_or_doomed(); + return ((edge_to != DOOMED) ? &_nodes[edge_to] : nullptr); + } + bool has_exact_explicit_out_edge(StateType node, uint32_t ch) const noexcept { + return node->has_exact_match(ch); + } + EdgeType lowest_higher_explicit_out_edge(StateType node, uint32_t ch) const noexcept { + return node->lowest_higher_explicit_out_edge(ch); + } + EdgeType smallest_explicit_out_edge(StateType node) const noexcept { + // Out-edges are pre-ordered in increasing code point order, so the first + // element is always the smallest possible matching character. + assert(!node->match_out_edges().empty()); + return &node->match_out_edges().front(); + } + bool valid_edge(EdgeType edge) const noexcept { + return edge != nullptr; + } + uint32_t edge_to_u32char(EdgeType edge) const noexcept { + return edge->u32ch; + } + StateType edge_to_state([[maybe_unused]] StateType node, EdgeType edge) const noexcept { + return &_nodes[edge->node]; + } +}; + +template <uint8_t MaxEdits> +LevenshteinDfa::MatchResult +ExplicitLevenshteinDfaImpl<MaxEdits>::match(std::string_view u8str, std::string* successor_out) const { + ExplicitDfaMatcher<MaxEdits> matcher(_nodes); + return MatchAlgorithm<MaxEdits>::match(matcher, u8str, successor_out); +} + +template <uint8_t MaxEdits> +void ExplicitLevenshteinDfaImpl<MaxEdits>::dump_as_graphviz(std::ostream& os) const { + os << std::dec << "digraph levenshtein_dfa {\n"; + os << " fontname=\"Helvetica,Arial,sans-serif\"\n"; + os << " node [shape=circle, fontname=\"Helvetica,Arial,sans-serif\", fixedsize=true];\n"; + os << " edge [fontname=\"Helvetica,Arial,sans-serif\"];\n"; + for (size_t i = 0; i < _nodes.size(); ++i) { + const auto& node = _nodes[i]; + if (node.edits <= max_edits()) { + os << " " << i << " [label=\"" << i << "(" << static_cast<int>(node.edits) << ")\", style=\"filled\"];\n"; + } + for (const auto& edge : node.match_out_edges()) { + std::string as_utf8; + append_utf32_char_as_utf8(as_utf8, edge.u32ch); + os << " " << i << " -> " << edge.node << " [label=\"" << as_utf8 << "\"];\n"; + } + if (node.wildcard_edge_to != DOOMED) { + os << " " << i << " -> " << node.wildcard_edge_to << " [label=\"*\"];\n"; + } + } + os << "}\n"; +} + +namespace { + +template <typename StateType> +struct ExploreState { + using NodeIdAndExplored = std::pair<uint32_t, bool>; + using SparseExploredStates = vespalib::hash_map<StateType, NodeIdAndExplored, typename StateType::hash>; + + uint32_t state_counter; + SparseExploredStates explored_states; + + ExploreState(); + ~ExploreState(); + + [[nodiscard]] SparseExploredStates::iterator node_of(const StateType& state) { + auto maybe_explored = explored_states.find(state); + if (maybe_explored != explored_states.end()) { + return maybe_explored; + } + uint32_t this_node = state_counter; + assert(state_counter < UINT32_MAX); + ++state_counter; + return explored_states.insert(std::make_pair(state, std::make_pair(this_node, false))).first; // not yet explored; + } + + [[nodiscard]] bool already_explored(const SparseExploredStates::iterator& node) const noexcept { + return node->second.second; + } + + void tag_as_explored(SparseExploredStates::iterator& node) noexcept { + node->second.second = true; + } +}; + +template <typename StateType> +ExploreState<StateType>::ExploreState() + : state_counter(0), + explored_states() +{} + +template <typename StateType> +ExploreState<StateType>::~ExploreState() = default; + +template <typename Traits> +class ExplicitLevenshteinDfaBuilderImpl : public DfaSteppingBase<Traits> { + using Base = DfaSteppingBase<Traits>; + + using StateType = typename Base::StateType; + using TransitionsType = typename Base::TransitionsType; + + using Base::_u32_str; + using Base::max_edits; + using Base::start; + using Base::match_edit_distance; + using Base::step; + using Base::is_match; + using Base::can_match; + using Base::transitions; +public: + explicit ExplicitLevenshteinDfaBuilderImpl(std::span<const uint32_t> str) noexcept + : DfaSteppingBase<Traits>(str) + { + assert(str.size() < UINT32_MAX / max_out_edges_per_node()); + } + + [[nodiscard]] static constexpr uint8_t max_out_edges_per_node() noexcept { + // Max possible out transition characters (2k+1) + one wildcard edge. + return diag(max_edits()) + 1; + } + + [[nodiscard]] LevenshteinDfa build_dfa() const; +}; + +template <typename Traits> +LevenshteinDfa ExplicitLevenshteinDfaBuilderImpl<Traits>::build_dfa() const { + auto dfa = std::make_unique<ExplicitLevenshteinDfaImpl<max_edits()>>(); + ExploreState<StateType> exp; + // Use BFS instead of DFS to ensure most node edges point to nodes that are allocated _after_ + // the parent node, which means the CPU can skip ahead instead of ping-ponging back and forth. + // This does _not_ always hold, such as if you have A->B and A->C->B (i.e. both parent and + // grandparent have a transition to the same state), in which case B may be allocated before C. + std::queue<StateType> to_explore; + to_explore.push(start()); + while (!to_explore.empty()) { + auto state = std::move(to_explore.front()); + to_explore.pop(); + auto this_node = exp.node_of(state); // note: invalidated by subsequent calls to node_of + if (exp.already_explored(this_node)) { + continue; + } + exp.tag_as_explored(this_node); + const auto this_node_idx = this_node->second.first; + dfa->ensure_node_array_large_enough_for_index(this_node_idx); + dfa->set_node_edit_distance(this_node_idx, match_edit_distance(state)); + auto t = transitions(state); + for (uint32_t out_c : t.u32_chars()) { + auto new_state = step(state, out_c); + auto out_node = exp.node_of(new_state); + dfa->add_outgoing_edge(this_node_idx, out_node->second.first, out_c); + to_explore.push(std::move(new_state)); + } + auto wildcard_state = step(state, WILDCARD); + if (can_match(wildcard_state)) { + auto out_node = exp.node_of(wildcard_state); + dfa->set_wildcard_edge(this_node_idx, out_node->second.first); + to_explore.push(std::move(wildcard_state)); + } // else: don't bother + } + return LevenshteinDfa(std::move(dfa)); +} + +} // anon ns + +template <typename Traits> +LevenshteinDfa ExplicitLevenshteinDfaBuilder<Traits>::build_dfa() const { + ExplicitLevenshteinDfaBuilderImpl<Traits> builder(_u32_str_buf); + return builder.build_dfa(); +} + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp new file mode 100644 index 00000000000..8b9d2eddcac --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp @@ -0,0 +1,9 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "implicit_levenshtein_dfa.hpp" + +namespace vespalib::fuzzy { + +template class ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<1>>; +template class ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<2>>; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h new file mode 100644 index 00000000000..0846b95d135 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "levenshtein_dfa.h" +#include "unicode_utils.h" +#include <vector> + +namespace vespalib::fuzzy { + +template <typename Traits> +class ImplicitLevenshteinDfa final : public LevenshteinDfa::Impl { + std::vector<uint32_t> _u32_str_buf; // TODO std::u32string +public: + using MatchResult = LevenshteinDfa::MatchResult; + + explicit ImplicitLevenshteinDfa(std::string_view str) + : ImplicitLevenshteinDfa(utf8_string_to_utf32(str)) + {} + + explicit ImplicitLevenshteinDfa(std::vector<uint32_t> str) noexcept + : _u32_str_buf(std::move(str)) + {} + + ~ImplicitLevenshteinDfa() override = default; + + [[nodiscard]] MatchResult match(std::string_view u8str, std::string* successor_out) const override; + + [[nodiscard]] size_t memory_usage() const noexcept override { + return _u32_str_buf.size() * sizeof(uint32_t); + } + + void dump_as_graphviz(std::ostream& os) const override; +}; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp new file mode 100644 index 00000000000..4ee468e424b --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp @@ -0,0 +1,121 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "dfa_stepping_base.h" +#include "implicit_levenshtein_dfa.h" +#include "match_algorithm.hpp" +#include "sparse_state.h" +#include <cassert> +#include <stdexcept> + +namespace vespalib::fuzzy { + +// DfaMatcher adapter for implicit DFA implementation +template <typename Traits> +struct ImplicitDfaMatcher : public DfaSteppingBase<Traits> { + using Base = DfaSteppingBase<Traits>; + + using StateType = typename Base::StateType; + using EdgeType = uint32_t; // Just the raw u32 character value + + using StateParamType = const StateType&; + + using Base::_u32_str; + using Base::max_edits; + using Base::start; + using Base::match_edit_distance; + using Base::step; + using Base::can_wildcard_step; + using Base::is_match; + using Base::can_match; + + explicit ImplicitDfaMatcher(std::span<const uint32_t> u32_str) noexcept + : Base(u32_str) + {} + + // start, is_match, can_match, match_edit_distance are all provided by base type + + template <typename F> + bool has_any_char_matching(const StateType& state, F&& f) const noexcept(noexcept(f(uint32_t{}))) { + for (uint32_t i = 0; i < state.size(); ++i) { + const auto idx = state.index(i); + if ((idx < _u32_str.size()) && f(_u32_str[idx])) { + return true; + } + } + return false; + } + + template <typename F> + void for_each_char(const StateType& state, F&& f) const noexcept(noexcept(f(uint32_t{}))) { + for (uint32_t i = 0; i < state.size(); ++i) { + const auto idx = state.index(i); + if ((idx < _u32_str.size())) [[likely]] { + f(_u32_str[idx]); + } + } + } + + bool has_explicit_higher_out_edge(const StateType& state, uint32_t ch) const noexcept { + return has_any_char_matching(state, [ch](uint32_t state_ch) noexcept { + return state_ch > ch; + }); + } + + bool has_higher_out_edge(const StateType& state, uint32_t mch) const noexcept { + return (has_explicit_higher_out_edge(state, mch) || can_wildcard_step(state)); + } + StateType match_input(const StateType& state, uint32_t mch) const noexcept { + return step(state, mch); + } + bool valid_state(const StateType& state) const noexcept { + return !state.empty(); + } + StateType match_wildcard(const StateType& state) const noexcept { + return step(state, WILDCARD); + } + bool has_exact_explicit_out_edge(const StateType& state, uint32_t ch) const noexcept { + return has_any_char_matching(state, [ch](uint32_t state_ch) noexcept { + return state_ch == ch; + }); + } + EdgeType lowest_higher_explicit_out_edge(const StateType& state, uint32_t ch) const noexcept { + uint32_t min_ch = UINT32_MAX; + for_each_char(state, [ch, &min_ch](uint32_t state_ch) noexcept { + if ((state_ch > ch) && (state_ch < min_ch)) { + min_ch = state_ch; + } + }); + return min_ch; + } + EdgeType smallest_explicit_out_edge(const StateType& state) const noexcept { + uint32_t min_ch = UINT32_MAX; + for_each_char(state, [&min_ch](uint32_t state_ch) noexcept { + min_ch = std::min(min_ch, state_ch); + }); + return min_ch; + } + bool valid_edge(EdgeType edge) const noexcept { + return edge != UINT32_MAX; + } + uint32_t edge_to_u32char(EdgeType edge) const noexcept { + return edge; + } + StateType edge_to_state(const StateType& state, EdgeType edge) const noexcept { + return step(state, edge); + } +}; + +template <typename Traits> +LevenshteinDfa::MatchResult +ImplicitLevenshteinDfa<Traits>::match(std::string_view u8str, std::string* successor_out) const { + ImplicitDfaMatcher<Traits> matcher(_u32_str_buf); + return MatchAlgorithm<Traits::max_edits()>::match(matcher, u8str, successor_out); +} + +template <typename Traits> +void ImplicitLevenshteinDfa<Traits>::dump_as_graphviz(std::ostream&) const { + throw std::runtime_error("Graphviz output not available for implicit Levenshtein DFA"); +} + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp new file mode 100644 index 00000000000..e75ef8365bf --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp @@ -0,0 +1,83 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "explicit_levenshtein_dfa.h" +#include "implicit_levenshtein_dfa.h" +#include "levenshtein_dfa.h" +#include <vespa/vespalib/util/stringfmt.h> +#include <memory> + +namespace vespalib::fuzzy { + +LevenshteinDfa::LevenshteinDfa(std::unique_ptr<Impl> impl) noexcept + : _impl(std::move(impl)) +{} + +LevenshteinDfa::LevenshteinDfa(LevenshteinDfa&&) noexcept = default; +LevenshteinDfa& LevenshteinDfa::operator=(LevenshteinDfa&&) noexcept = default; + +LevenshteinDfa::~LevenshteinDfa() = default; + +LevenshteinDfa::MatchResult +LevenshteinDfa::match(std::string_view u8str, std::string* successor_out) const { + return _impl->match(u8str, successor_out); +} + +size_t LevenshteinDfa::memory_usage() const noexcept { + return _impl->memory_usage(); +} + +void LevenshteinDfa::dump_as_graphviz(std::ostream& out) const { + _impl->dump_as_graphviz(out); +} + +LevenshteinDfa LevenshteinDfa::build(std::string_view target_string, uint8_t max_edits, DfaType dfa_type) { + if (max_edits != 1 && max_edits != 2) { + throw std::invalid_argument(make_string("Levenshtein DFA max_edits must be in {1, 2}, was %u", max_edits)); + } + if (dfa_type == DfaType::Implicit) { + if (max_edits == 1) { + return LevenshteinDfa(std::make_unique<ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<1>>>(target_string)); + } else { // max_edits == 2 + return LevenshteinDfa(std::make_unique<ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<2>>>(target_string)); + } + } else { // DfaType::Explicit + if (max_edits == 1) { + return ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<1>>(target_string).build_dfa(); + } else { // max_edits == 2 + return ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<2>>(target_string).build_dfa(); + } + } + +} + +LevenshteinDfa LevenshteinDfa::build(std::string_view target_string, uint8_t max_edits) { + // TODO automatically select implementation based on target length/max edits? + // Suggestion: + // - Explicit DFA iff (k == 1 && |target| <= 256) || (k == 2 && |target| <= 64). + // - Implicit DFA otherwise. + // This keeps memory overhead < 64k and DFA construction time < 300 usec (measured on + // an M1 Pro; your mileage may vary etc). + // Ideally the implicit DFA would always be the fastest (or at least approximately as + // fast as the explicit DFA), but this is not yet the case. + return build(target_string, max_edits, DfaType::Implicit); +} + +std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::MatchResult& mos) { + if (mos.matches()) { + os << "match(" << static_cast<int>(mos.edits()) << " edits)"; + } else { + os << "mismatch"; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::DfaType& dt) { + if (dt == LevenshteinDfa::DfaType::Implicit) { + os << "Implicit"; + } else { + assert(dt == LevenshteinDfa::DfaType::Explicit); + os << "Explicit"; + } + return os; +} + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h new file mode 100644 index 00000000000..a26ccbe87ee --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h @@ -0,0 +1,244 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <cstdint> +#include <iosfwd> +#include <memory> +#include <string> +#include <string_view> + +namespace vespalib::fuzzy { + +/** + * Levenshtein Deterministic Finite Automata (DFA) + * + * The Levenshtein distance (or edit distance) is the minimum number of edits (additions, + * deletions or substitutions) needed to transform a particular source string s to a + * particular target string t. + * + * Let m be the length of the source string and n be the length of the target string. + * + * The classic dynamic programming algorithm uses a n x m cost matrix and is therefore + * O(nm) in space and time. By observing that only 2 rows of the matrix are actually + * needed, this is commonly reduced to O(n) space complexity (still O(nm) time complexity). + * When the maximum number of allowed edits is constrained to k, some clever observations + * about the nature of the cost matrix allows for reducing the time complexity down to + * O(kn) (more specifically, O((2k+1) * n)). When k is fixed (e.g. k in {1, 2}), the + * time complexity simplifies down to O(n). + * + * This implements code for building and evaluating Levenshtein Deterministic Finite + * Automata, where the resulting DFA efficiently matches all possible source strings that + * can be transformed to the target string within k max edits. This allows for easy linear + * matching of strings. + * + * Inspiration: + * - http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata + * - https://julesjacobs.com/2015/06/17/disqus-levenshtein-simple-and-fast.html + * + * The latter in particular was a close inspiration for the sparse DFA state management. + * + * ====== Dictionary skipping via successor string generation ====== + * + * Scanning for edit distance matches frequently takes place against a sorted dictionary. + * When matching using a DFA, in the case where the source string does _not_ match, we can + * generate the _successor_ string; the next matching string that is lexicographically + * _greater_ than the source string. This string has the invariant that there are no + * possibly matching strings within k edits ordered after the source string but before + * the successor. + * + * This lets us do possibly massive leaps forward in the dictionary, turning a dictionary + * scan into a sublinear operation. + * + * Note that the implemented successor algorithm is slightly different from that described + * in the above blog post. The implemented algorithm requires zero extra data structures + * than the DFA itself and the target string and tries to be extra clever with reducing + * the number of code point conversions required + * + * ====== Unicode support ====== + * + * Matching and successor generation is fully Unicode-aware. All input strings are expected + * to be in UTF-8, and the generated successor is also encoded as UTF-8 (with some caveats; + * see the documentation for match()). + * + * Internally, matching is done on UTF-32 code points and the DFA itself is built around + * UTF-32. This is unlike Lucene, which converts a UTF-32 DFA to an equivalent UTF-8 DFA. + * + * ====== Memory usage ====== + * + * There is always a baseline DFA memory usage O(n) in the target string, as the + * underlying DFA needs to convert the input UTF-8 string to explicit UTF-32 chars. + * + * Aside from the baseline, memory usage depends on whether an explicit or implicit DFA + * is used. + * + * ------ Explicit DFA ------ + * + * The explicit DFA graph takes up quite a bit more memory than the original string + * representation (one reason is the use of UTF-32 characters under the hood). + * + * Expected upper bound memory usage for a string of length n with max edits k is + * + * (2k+1) * N(k) * n * W(k) + * + * where N(1) is expected to be 32 and N(2) is 48, W(1) is 1.34 and W(2) is 3.2 (empirically + * derived). + * + * Memory usage during building is higher due to keeping track of the set of generated + * states in a hash table, but still linear in input size. This extra memory is freed + * once building is complete. + * + * ------ Implicit DFA ------ + * + * Implicit DFAs have a O(1) memory usage during evaluation, which all lives on the stack + * or in registers (this does not include the successor string, which is provided by the + * caller). + * + * Since the sparse state stepping is currently not as fast as explicit DFA node traversal, + * string matching is slower than with the explicit DFA. + * + * ====== In short ====== + * + * - Immutable; build once, run many times. + * - Explicit DFA build time is amortized linear in target string size. + * - Implicit DFA build time is O(1) (aside from initial UTF-32 conversion) + * - Zero-allocation matching. + * - Matching takes in raw UTF-8 input, no need to pre-convert. + * - Streaming UTF-8 to UTF-32 conversion; fully unicode-aware (DFA uses UTF-32 code + * points internally). + * - If required, it's possible (but not currently implemented) to bake case + * insensitive matching semantics into the generated DFA itself. + * - Allows for dictionary forward-skipping via successor algorithm. + * - Amortized zero allocations for successor string building when reusing string + * between matches. + * - Successor string is generated in-place as UTF-8 and can be directly used as input + * to a byte-wise dictionary seek. + */ +class LevenshteinDfa { +public: + class MatchResult { + uint8_t _max_edits; + uint8_t _edits; + public: + constexpr MatchResult(uint8_t max_edits, uint8_t edits) noexcept + : _max_edits(max_edits), + _edits(edits) + {} + + static constexpr MatchResult make_match(uint8_t max_edits, uint8_t edits) noexcept { + return {max_edits, edits}; + } + + static constexpr MatchResult make_mismatch(uint8_t max_edits) noexcept { + return {max_edits, static_cast<uint8_t>(max_edits + 1)}; + } + + [[nodiscard]] constexpr bool matches() const noexcept { return _edits <= _max_edits; } + [[nodiscard]] constexpr uint8_t edits() const noexcept { return _edits; } + [[nodiscard]] constexpr uint8_t max_edits() const noexcept { return _max_edits; } + }; + + struct Impl { + virtual ~Impl() = default; + [[nodiscard]] virtual MatchResult match(std::string_view u8str, std::string* successor_out) const = 0; + [[nodiscard]] virtual size_t memory_usage() const noexcept = 0; + virtual void dump_as_graphviz(std::ostream& out) const = 0; + }; + +private: + std::unique_ptr<Impl> _impl; +public: + explicit LevenshteinDfa(std::unique_ptr<Impl> impl) noexcept; + LevenshteinDfa(LevenshteinDfa&&) noexcept; + LevenshteinDfa& operator=(LevenshteinDfa&&) noexcept; + LevenshteinDfa(const LevenshteinDfa&) = delete; + LevenshteinDfa& operator=(const LevenshteinDfa&) = delete; + ~LevenshteinDfa(); + + /** + * Attempts to match the source string `source` with the target string this DFA was + * built with, emitting a successor string on mismatch if `successor_out` != nullptr. + * + * `source` must not contain any null UTF-8 chars. + * + * Match case: + * Iff `source` is _within_ the maximum edit distance, returns a MatchResult with + * matches() == true and edits() == the actual edit distance. If `successor_out` + * is not nullptr, the string pointed to is _not_ modified. + * + * Mismatch case: + * Iff `source` is _beyond_ the maximum edit distance, returns a MatchResult with + * matches() == false. + * + * Iff `successor_out` is not nullptr, the following holds: + * - `successor_out` is modified to contain the next (in byte-wise ordering) possible + * _matching_ string S so that there exists no other matching string S' that is + * greater than `source` but smaller than S. + * - `successor_out` contains UTF-8 bytes that are within what UTF-8 can legally + * encode in bitwise form, but the _code points_ they encode may not be valid. + * In particular, surrogate pair ranges and U+10FFFF+1 may be encoded, neither of + * which are valid UTF-8. + * + * It is expected that the consumer of `successor_out` is only interested in the + * memcmp()-ordering of strings and not whether they are technically valid Unicode. + * This should be the case for low-level dictionary data structures etc. + * + * Memory allocation: + * This function does not directly or indirectly allocate any heap memory if either: + * + * - the input string is within the max edit distance, or + * - `successor_out` is nullptr, or + * - `successor_out` has sufficient capacity to hold the generated successor + * + * By reusing the successor string across many calls, this therefore amortizes memory + * allocations down to near zero per invocation. + */ + [[nodiscard]] MatchResult match(std::string_view source, std::string* successor_out) const; + + /** + * Returns how much memory is used by the underlying DFA representation, in bytes. + */ + [[nodiscard]] size_t memory_usage() const noexcept; + + enum class DfaType { + Implicit, + Explicit + }; + + /** + * Builds and returns a Levenshtein DFA that matches all strings within `max_edits` + * edits of `target_string`. The type of DFA returned is specified by dfa_type. + * + * `max_edits` must be in {1, 2}. Throws std::invalid_argument if outside range. + * + * `target_string` must not contain any null UTF-8 chars. + */ + [[nodiscard]] static LevenshteinDfa build(std::string_view target_string, + uint8_t max_edits, + DfaType dfa_type); + + /** + * Same as build() but currently always returns an implicit DFA. + */ + [[nodiscard]] static LevenshteinDfa build(std::string_view target_string, uint8_t max_edits); + + /** + * Dumps the DFA as a Graphviz graph in text format to the provided output stream. + * + * Note: Only supported for _explicit_ DFAs. Trying to call this function on an implicit + * DFA will throw a std::runtime_error, as there is no concrete underlying graph + * structure to dump. + * + * Note that only _matching_ state transitions are present in the DFA, and therefore only + * such transitions are present in the generated graph. Overall this makes the graph for + * longer strings much more manageable, as the number of out-edges from a particular depth + * in the graph depends on the max number of edits and not on the length of the string + * itself. Otherwise, you'd have a whole bunch of nodes with out-edges to the same terminal + * non-matching state node. + */ + void dump_as_graphviz(std::ostream& out) const; +}; + +std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::MatchResult& mos); +std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::DfaType& dt); + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp b/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp new file mode 100644 index 00000000000..206b69f8ebe --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp @@ -0,0 +1,291 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "dfa_matcher.h" +#include "levenshtein_dfa.h" +#include "unicode_utils.h" +#include <vespa/vespalib/text/utf8.h> +#include <cassert> +#include <concepts> + +namespace vespalib::fuzzy { + +/** + * Implementation of algorithm for linear-time k-max edits string matching and successor + * string generation over an abstract DFA representation. + * + * The implementation is agnostic to how the underlying DFA is implemented, but requires + * an appropriate adapter that satisfies the DfaMatcher concept contracts. + */ +template <uint8_t MaxEdits> +struct MatchAlgorithm { + using MatchResult = LevenshteinDfa::MatchResult; + + static constexpr uint8_t max_edits() noexcept { return MaxEdits; } + + /** + * Matches UTF-8 source string `source` against the target DFA, optionally generating + * the successor string iff the source string is not within the maximum number of edits + * of the target string. + * + * The actual match loop is very simple: we try to match the DFA as far as we can + * before either consuming all input (source string) characters or ending up in a non- + * matching state before we have consumed all input. In the former case, we may be in + * a matching state (consider matching "foo" with the target string "food"; after + * consuming all input we'll be in a matching state with 1 edit). In the latter case, + * the input string cannot possible match. + * + * If we end up in a matching state, all is well. We simply return a MatchResult with + * the number of edits the state represents. + * + * The interesting bit happens the string does _not_ match and we are asked to provide a + * _successor_ string that _does_ match and is strictly greater in lexicographic order. + * + * We lean on some core invariants: + * + * - The m x n (|source| x |target|) Levenshtein matrix provides, for any m[i, j] with + * i in [1, m], j in [1, n], the _minimum possible_ number of edits that can transform + * the source string prefix of length `i` to the target string prefix of length `j`. + * This means there is no way of transforming said source prefix using _fewer_ edits. + * + * - Any given DFA state corresponds to a unique row in the Levenshtein matrix, thus + * transitively inheriting the invariants of the matrix row elements themselves, such + * as representing the minimum number of edits. + * + * We have two mismatch cases: + * + * 1. We've matched the entire source string without ending in an accepting state. + * + * This can only happen if the input is a (possibly edited) prefix of the target string. + * Any and all _longer_ strings with this prefix is inherently lexicographically greater, + * so we emit the smallest possible suffix that turns prefix || suffix into a matching + * string. + * + * See emit_smallest_matching_suffix() for details. + * + * 2. We've matched a prefix of the source string without ending in an accepting state. + * + * This case is trickier than when the entire source string is a prefix, as we cannot + * just emit a suffix to the source to create a matching, lexicographically greater string. + * + * Consider source "foxx" and target "food". There exists no suffix S in "food" that can + * turn "foxx" || S into a matching string within k=1 edits. + * + * So we have to backtrack to somewhere. + * + * That "somewhere" is the state that maximizes the size of the source prefix while + * allowing us to emit a greater suffix. + * + * For each state we visit, we check if there exists at least one higher out edge than the + * one taken out from that state (this is possibly a wildcard edge). If one exists, we + * copy the state to `last_state_with_higher_out` and remember the state's source string + * prefix as well as the source string character that transitions us away from the state + * (this will be our candidate for building a greater suffix). + * + * When we fail to match the entire source string, we know that last_state_with_higher_out + * represents the last possible branching point (and therefore the longest prefix) where + * we can substitute in or insert a higher character, in turn creating a greater suffix. + * + * Proof by contradiction: let `last_state_with_higher_out` be S and assume there exists + * a state S' that has a greater source string prefix than S while still allowing for + * emitting a lexicographically greater suffix that is within max edits k. We terminate + * the match loop once can_match(X) is false for any state X, where X subsumes S by + * definition. For S' to exist, it must be possible for a transition to exist from X to + * a later state that can have a higher out edge. However, edit distance costs can + * never decrease, only stay constant (with matching substitutions) or increase (with + * insertions, deletions or non-matching substitutions), so it's impossible to follow + * an out-edge from X to any later potentially matching state. Thus, S' can not exist + * and we have a contradiction. + * + * Since we want to generate the smallest possible larger string that matches, we ideally + * want to emit a character that is +1 of the source character after the shared prefix. + * This is using the "higher out"-character we remembered earlier. We do this if we have + * a wildcard out edge (or if there exists an explicit out-edge for value char+1). + * Otherwise, we have to follow the highest explicitly present out-edge. + * + * Once we have emitted one single character that gets us lexicographically higher than + * the source string, we then emit the smallest possible suffix to this. This uses the + * same minimal suffix generation logic as mismatch case 1). + * + * See `backtrack_and_emit_greater_suffix()` for details. + * + * Example: + * (This is easiest to follow by looking at examples/food_dfa.svg) + * + * Source "foxx", target "food" and k=1: + * + * After matching "fo" with 0 edits we reach a state with out-edges {d, o, *}. This state + * has an implicitly higher out-edge (*) and we remember it and the char 'x' for later. + * Edge 'x' can only happen via *, so we take that path. + * + * After matching "fox" with 1 edit we reach a state with out-edges {d, o}. There is + * no out-edge for 'x' and the state is not a matching state, so we need to backtrack + * and generate a successor. + * + * We backtrack to the state representing "fo" and emit it as a successor prefix. We + * observe that this state has a wildcard out-edge and emit 'x'+1 == 'y' to the successor + * string and continue with emitting the smallest suffix. We now have a successor + * prefix of "foy", with which we reach the same logical state as we did with "fox" + * previously. The smallest out-edge here is 'd', so we take it. This leaves us in an + * accepting (matching) state, so suffix generation completes. + * + * "foxx" -> "foyd" + * + * Note that it's possible for the prefix to be empty, which results in a successor + * that has nothing in common with the source altogether. + * Example: "gp" -> "hfood" (+1 char value case) + * + * Performance note: + * Both the input and successor output strings are in UTF-8 format. To avoid doing + * duplicate work, we keep track of the byte length of the string prefix that will be + * part of the successor and simply copy it verbatim instead of building the string + * from converted UTF-32 -> UTF-8 chars as we go. + * + * TODO we could probably also optimize the smallest suffix generation with this when + * we know we can no longer insert any smaller char substitutions and the only way + * to complete the string is to emit it verbatim. + * - To do this we'd need both the original UTF-8 target string as well as a + * secondary vector that maps u32 character index to the corresponding UTF-8 index. + * Both trivial to get as part of DFA initialization. + */ + template <DfaMatcher Matcher> + static MatchResult match(const Matcher& matcher, + std::string_view source, + std::string* successor_out) + { + using StateType = typename Matcher::StateType; + vespalib::Utf8Reader u8_reader(source.data(), source.size()); + uint32_t n_prefix_u8_bytes = 0; + uint32_t char_after_prefix = 0; + StateType last_state_with_higher_out = StateType{}; + + StateType state = matcher.start(); + while (u8_reader.hasMore()) { + const auto u8_pos_before_char = u8_reader.getPos(); + const uint32_t mch = u8_reader.getChar(); + if (successor_out && matcher.has_higher_out_edge(state, mch)) { + last_state_with_higher_out = state; + n_prefix_u8_bytes = u8_pos_before_char; + char_after_prefix = mch; + } + auto maybe_next = matcher.match_input(state, mch); + if (matcher.can_match(maybe_next)) { + state = maybe_next; + } else { + // Can never match; find the successor if requested + if (successor_out) { + *successor_out = source.substr(0, n_prefix_u8_bytes); + assert(matcher.valid_state(last_state_with_higher_out)); + backtrack_and_emit_greater_suffix(matcher, last_state_with_higher_out, + char_after_prefix, *successor_out); + } + return MatchResult::make_mismatch(max_edits()); + } + } + const auto edits = matcher.match_edit_distance(state); + if (edits <= max_edits()) { + return MatchResult::make_match(max_edits(), edits); + } + if (successor_out) { + *successor_out = source; + emit_smallest_matching_suffix(matcher, state, *successor_out); + } + return MatchResult::make_mismatch(max_edits()); + } + + /** + * Instantly backtrack to the last possible branching point in the DFA where we can + * choose some higher outgoing edge character value and still match the DFA. If the node + * has a wildcard edge, we can bump the input char by one and generate the smallest + * possible matching suffix to that. Otherwise, choose the smallest out edge that is + * greater than the input character at that location and _then_ emit the smallest + * matching prefix. + * + * precondition: `last_node_with_higher_out` has either a wildcard edge or a char match + * edge that compares greater than `input_at_branch`. + */ + template <DfaMatcher Matcher> + static void backtrack_and_emit_greater_suffix( + const Matcher& matcher, + typename Matcher::StateParamType last_state_with_higher_out, + const uint32_t input_at_branch, + std::string& successor) + { + auto wildcard_state = matcher.match_wildcard(last_state_with_higher_out); + if (matcher.can_match(wildcard_state)) { + // `input_at_branch` may be U+10FFFF, with +1 being outside legal Unicode _code point_ + // range but _within_ what UTF-8 can technically _encode_. + // We assume that successor-consumers do not care about anything except byte-wise + // ordering. This is similar to what RE2's PossibleMatchRange emits to represent a + // UTF-8 upper bound, so not without precedent. + // If the resulting character corresponds to an existing out-edge we _must_ take it + // instead of the wildcard edge, or we'll end up in the wrong state. + const auto next_char = input_at_branch + 1; + if (!matcher.has_exact_explicit_out_edge(last_state_with_higher_out, next_char)) { + append_utf32_char_as_utf8(successor, next_char); + emit_smallest_matching_suffix(matcher, wildcard_state, successor); + return; + } // else: handle exact match below (it will be found as the first higher out edge) + } + const auto first_highest_edge = matcher.lowest_higher_explicit_out_edge(last_state_with_higher_out, input_at_branch); + assert(matcher.valid_edge(first_highest_edge)); + append_utf32_char_as_utf8(successor, matcher.edge_to_u32char(first_highest_edge)); + emit_smallest_matching_suffix(matcher, matcher.edge_to_state(last_state_with_higher_out, first_highest_edge), successor); + } + + /** + * The smallest possible suffix is generated by following the smallest out-edge per state, + * until we reach a state that is a match. It is possible that the smallest out edge is a + * "wildcard" edge (our terminology), which means that we can insert/substitute an arbitrary + * character and still have `can_match(resulting state)` be true. In this case we emit the + * smallest possible non-null UTF-8 character (0x01). + * + * Examples: + * (These are easiest to follow by looking at examples/food_dfa.svg) + * + * Source "fo", target "food" and k=1: + * + * After matching "fo" we have 1 edit to spare. The smallest valid, non-empty UTF-8 suffix + * to this string must necessarily begin with 0x01, so that's what we emit. The smallest + * edge we can follow from the resulting state is 'd', and that is a accepting (matching) + * state. + * + * "fo" -> "fo\x01d" + * + * Source "fx", target "food" and k=1: + * + * After matching "fx" we have no edits to spare. The smallest character reachable from + * the state is 'o' (in fact, it is the only out edge available since we're down to zero + * available edits). The next state has an out-edge to 'd' and 'o', and we choose 'd' + * since it is smallest. This leaves us in an accepting (matching) state and we terminate + * the loop. + * + * "fx" -> "fxod" + */ + // TODO consider variant for only emitting _prefix of suffix_ to avoid having to generate + // the full string? Won't generate a matching string, but will be lexicographically greater. + template <DfaMatcher Matcher> + static void emit_smallest_matching_suffix( + const Matcher& matcher, + typename Matcher::StateParamType from, + std::string& str) + { + auto state = from; + while (!matcher.is_match(state)) { + // If we can take a wildcard path, emit the smallest possible valid UTF-8 character (0x01). + // Otherwise, find the smallest char that can eventually lead us to a match. + auto wildcard_state = matcher.match_wildcard(state); + if (matcher.can_match(wildcard_state)) { + str += '\x01'; + state = wildcard_state; + } else { + const auto smallest_out_edge = matcher.smallest_explicit_out_edge(state); + assert(matcher.valid_edge(smallest_out_edge)); + append_utf32_char_as_utf8(str, matcher.edge_to_u32char(smallest_out_edge)); + state = matcher.edge_to_state(state, smallest_out_edge); + } + } + } +}; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h new file mode 100644 index 00000000000..40cfa5e6409 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h @@ -0,0 +1,175 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <algorithm> +#include <array> +#include <cassert> +#include <cstdint> +#include <ostream> +#include <span> +#include <xxh3.h> // TODO factor out? + +namespace vespalib::fuzzy { + +// Sentinel U32 char for state stepping that cannot match any target string characters +constexpr const uint32_t WILDCARD = UINT32_MAX; + +/** + * diag(n) is the width of the diagonal of the cost matrix that can possibly be + * within k edits. This means that for a fixed k, it suffices to maintain state + * for up to and including diag(k) consecutive cells for any given matrix row. + */ +constexpr inline uint8_t diag(uint8_t k) noexcept { + return k*2 + 1; +} + +template <uint8_t MaxEdits> +struct FixedSparseState { +private: + static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2); + + std::array<uint32_t, diag(MaxEdits)> indices; + std::array<uint8_t, diag(MaxEdits)> costs; // elems are 1-1 with indices vector + uint8_t sz; +public: + constexpr FixedSparseState() noexcept : indices(), costs(), sz(0) {} + + [[nodiscard]] constexpr bool empty() const noexcept { + return (sz == 0); + } + + [[nodiscard]] constexpr uint32_t size() const noexcept { + return sz; + } + + [[nodiscard]] constexpr uint32_t index(uint32_t entry_idx) const noexcept { + return indices[entry_idx]; + } + + [[nodiscard]] constexpr uint8_t cost(uint32_t entry_idx) const noexcept { + return costs[entry_idx]; + } + + // Precondition: !empty() + [[nodiscard]] constexpr uint32_t last_index() const noexcept { + return indices[sz - 1]; + } + + // Precondition: !empty() + [[nodiscard]] constexpr uint8_t last_cost() const noexcept { + return costs[sz - 1]; + } + + void append(uint32_t index, uint8_t cost) noexcept { + assert(sz < diag(MaxEdits)); + indices[sz] = index; + costs[sz] = cost; + ++sz; + } + + constexpr bool operator==(const FixedSparseState& rhs) const noexcept { + if (sz != rhs.sz) { + return false; + } + return (std::equal(indices.begin(), indices.begin() + sz, rhs.indices.begin()) && + std::equal(costs.begin(), costs.begin() + sz, rhs.costs.begin())); + } + + struct hash { + size_t operator()(const FixedSparseState& s) const noexcept { + static_assert(std::is_same_v<uint32_t, std::decay_t<decltype(s.indices[0])>>); + static_assert(std::is_same_v<uint8_t, std::decay_t<decltype(s.costs[0])>>); + // FIXME GCC 12.2 worse-than-useless(tm) warning false positives :I +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" + return (XXH3_64bits(s.indices.data(), s.sz * sizeof(uint32_t)) ^ + XXH3_64bits(s.costs.data(), s.sz)); +#pragma GCC diagnostic pop + } + }; +}; + +/** + * Prints sparse states as a single matrix row. Columns prior to any state index + * are printed explicitly as '-' characters to make states line up when printed. + * + * Example output for the state (2:1, 3:1): + * + * [-, -, 1, 1] + * + * Only meant as a debugging aid during development, as states with high indices + * will emit very large strings. + */ +template <uint8_t MaxEdits> [[maybe_unused]] +std::ostream& operator<<(std::ostream& os, const FixedSparseState<MaxEdits>& s) { + os << "["; + size_t last_idx = 0; + for (size_t i = 0; i < s.size(); ++i) { + if (i != 0) { + os << ", "; + } + for (size_t j = last_idx; j < s.indices[i]; ++j) { + os << "-, "; + } + last_idx = s.indices[i] + 1; + os << static_cast<uint32_t>(s.costs[i]); + } + os << "]"; + return os; +} + +template <uint8_t MaxEdits> +struct FixedMaxEditsTransitions { + static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2); + + std::array<uint32_t, diag(MaxEdits)> out_u32_chars; + uint8_t size; + + constexpr FixedMaxEditsTransitions() noexcept : out_u32_chars(), size(0) {} + + [[nodiscard]] constexpr bool has_char(uint32_t u32ch) const noexcept { + for (uint8_t i = 0; i < size; ++i) { + if (out_u32_chars[i] == u32ch) { + return true; + } + } + return false; + } + + void add_char(uint32_t u32ch) noexcept { + if (!has_char(u32ch)) { + assert(size < diag(MaxEdits)); + out_u32_chars[size] = u32ch; + ++size; + } + } + + constexpr std::span<const uint32_t> u32_chars() const noexcept { + return {out_u32_chars.begin(), out_u32_chars.begin() + size}; + } + + constexpr std::span<uint32_t> u32_chars() noexcept { + return {out_u32_chars.begin(), out_u32_chars.begin() + size}; + } + + void sort() noexcept { + // TODO use custom sorting networks for fixed array sizes <= 5? + // FIXME GCC 12.2 worse-than-useless(tm) warning false positives :I +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" + std::sort(out_u32_chars.begin(), out_u32_chars.begin() + size); +#pragma GCC diagnostic pop + } +}; + +template <uint8_t MaxEdits> +struct FixedMaxEditDistanceTraits { + static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2); + using StateType = FixedSparseState<MaxEdits>; + using TransitionsType = FixedMaxEditsTransitions<MaxEdits>; + constexpr static uint8_t max_edits() noexcept { + return MaxEdits; + } +}; + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp new file mode 100644 index 00000000000..648be234562 --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp @@ -0,0 +1,108 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "unicode_utils.h" +#include <vespa/vespalib/text/utf8.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <stdexcept> + +namespace vespalib::fuzzy { + +std::vector<uint32_t> utf8_string_to_utf32(std::string_view str) { + vespalib::stringref ch_str(str.data(), str.size()); + vespalib::Utf8Reader utf8_reader(ch_str); + std::vector<uint32_t> u32ret; + u32ret.reserve(str.size()); // Will over-allocate for all non-ASCII + while (utf8_reader.hasMore()) { + u32ret.emplace_back(utf8_reader.getChar()); + } + return u32ret; +} + +std::vector<uint32_t> utf8_string_to_utf32(std::u8string_view u8str) { + return utf8_string_to_utf32(std::string_view(reinterpret_cast<const char*>(u8str.data()), u8str.size())); +} + +[[noreturn]] void throw_bad_code_point(uint32_t codepoint) __attribute__((noinline)); +[[noreturn]] void throw_bad_code_point(uint32_t codepoint) { + throw std::invalid_argument(make_string("invalid UTF-32 codepoint: U+%04X (%u)", codepoint, codepoint)); +} + +namespace { + +/** + * Encodes a single UTF-32 `codepoint` to a 1-4 byte UTF-8 sequence. + * ` + * `u8buf` must point to a buffer with at least 4 writable bytes. + * + * Returns the number of bytes written. + * + * See comments on append_utf32_char_as_utf8() as to why this is not a generic UTF-8 + * encoding function that can be used in all possible scenarios. + */ +[[nodiscard]] uint8_t encode_utf8_char(uint32_t codepoint, unsigned char* u8buf) { + constexpr const uint8_t low_6bits_mask = 0x3F; + + // Yanked and modified from utf8.cpp: + if (codepoint < 0x80) { + u8buf[0] = (char) codepoint; + return 1; + } else if (codepoint < 0x800) { + char low6 = (codepoint & low_6bits_mask); + low6 |= 0x80; + codepoint >>= 6; + char first5 = codepoint; + first5 |= 0xC0; + u8buf[0] = first5; + u8buf[1] = low6; + return 2; + } else if (codepoint < 0x10000) { + char low6 = (codepoint & low_6bits_mask); + low6 |= 0x80; + + codepoint >>= 6; + char mid6 = (codepoint & low_6bits_mask); + mid6 |= 0x80; + + codepoint >>= 6; + char first4 = codepoint; + first4 |= 0xE0; + + u8buf[0] = first4; + u8buf[1] = mid6; + u8buf[2] = low6; + return 3; + } else if (codepoint <= 0x110000) { // Explicitly _include_ U+10FFFF + 1! + char low6 = (codepoint & low_6bits_mask); + low6 |= 0x80; + + codepoint >>= 6; + char mid6 = (codepoint & low_6bits_mask); + mid6 |= 0x80; + + codepoint >>= 6; + char hi6 = (codepoint & low_6bits_mask); + hi6 |= 0x80; + + codepoint >>= 6; + char first3 = codepoint; + first3 |= 0xF0; + + u8buf[0] = first3; + u8buf[1] = hi6; + u8buf[2] = mid6; + u8buf[3] = low6; + return 4; + } else { + throw_bad_code_point(codepoint); + } +} + +} // anon ns + +// TODO optimize inlined in header for case where u32_char is < 0x80? +void append_utf32_char_as_utf8(std::string& out_str, uint32_t u32_char) { + unsigned char u8buf[4]; + uint8_t u8bytes = encode_utf8_char(u32_char, u8buf); + out_str.append(reinterpret_cast<const char*>(u8buf), u8bytes); +} + +} diff --git a/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h new file mode 100644 index 00000000000..8627b01ff6a --- /dev/null +++ b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <cstdint> +#include <string> +#include <string_view> +#include <vector> + +namespace vespalib::fuzzy { + +std::vector<uint32_t> utf8_string_to_utf32(std::string_view str); + +std::vector<uint32_t> utf8_string_to_utf32(std::u8string_view u8str); + +/** + * Encodes a single UTF-32 codepoint `u32_char` to a 1-4 byte UTF-8 sequence and + * appends it to `out_str.` + * + * Note that this will happily encode code points that aren't technically part of + * the valid UTF-8 range, but which will still be correct in memcmp() byte-wise + * ordering, which is the API contract we expose. + * + * In particular, this includes: + * - high/low surrogate ranges U+D800 through U+DFFF (surrogate pairs not allowed + * in UTF-8) + * - U+10FFFF + 1 (outside max code point range by one) + * + * ... So don't copy this function for use as a general UTF-8 emitter, as it is not + * _technically_ conformant! + */ +void append_utf32_char_as_utf8(std::string& out_str, uint32_t u32_char); + +} diff --git a/vespalib/src/vespa/vespalib/text/lowercase.h b/vespalib/src/vespa/vespalib/text/lowercase.h index 5c4e3e34e07..dc081c6ba2d 100644 --- a/vespalib/src/vespa/vespalib/text/lowercase.h +++ b/vespalib/src/vespa/vespalib/text/lowercase.h @@ -43,9 +43,9 @@ public: * @param codepoint the character codepoint to be lowercased. * @return lowercase UCS-4 character (codepoint if no lowercasing is performed). **/ - static uint32_t convert(uint32_t codepoint) noexcept + static uint32_t convert(uint32_t codepoint) { - if (codepoint < 0x100) [[likely]] { + if (codepoint < 0x100) { return lowercase_0_block[codepoint]; } else if (codepoint < 0x600) { return lowercase_0_5_blocks[codepoint]; diff --git a/vespalib/src/vespa/vespalib/text/utf8.cpp b/vespalib/src/vespa/vespalib/text/utf8.cpp index c950f62985f..cae2bbae682 100644 --- a/vespalib/src/vespa/vespalib/text/utf8.cpp +++ b/vespalib/src/vespa/vespalib/text/utf8.cpp @@ -16,16 +16,18 @@ void Utf8::throwX(const char *msg, unsigned int number) throw IllegalArgumentException(what); } -uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept +uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback) { if (_pos == size()) { // this shouldn't happen ... - LOG(warning, "last byte %02X of Utf8Reader block was incomplete UTF-8", firstbyte); + LOG(warning, "last byte %02X of Utf8Reader block was incomplete UTF-8", + firstbyte); return fallback; } assert(hasMore()); // should never fall out of range if (! Utf8::validFirstByte(firstbyte)) { - LOG(debug, "invalid first byte %02X in Utf8Reader data block", firstbyte); + LOG(debug, "invalid first byte %02X in Utf8Reader data block", + firstbyte); return fallback; } int need = Utf8::numContBytes(firstbyte); @@ -46,7 +48,8 @@ uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback) // check > 0x7F ? return r; } else { - LOG(debug, "invalid continuation byte %02X in Utf8Reader data block", contbyte); + LOG(debug, "invalid continuation byte %02X in Utf8Reader data block", + contbyte); return fallback; } } @@ -66,7 +69,8 @@ uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback) // check > 0x7FF ? return r; } else { - LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block", contbyte1, contbyte2); + LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block", + contbyte1, contbyte2); return fallback; } } @@ -91,10 +95,11 @@ uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback) uint32_t -Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept +Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) { if (! Utf8::validFirstByte(firstbyte)) { - LOG(debug, "invalid first byte %02X in Utf8Reader data block", firstbyte); + LOG(debug, "invalid first byte %02X in Utf8Reader data block", + firstbyte); return fallback; } int need = Utf8::numContBytes(firstbyte); @@ -103,7 +108,8 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe if (need == 1) { if (_p[0] == 0) { - LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", firstbyte); + LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", + firstbyte); return fallback; } unsigned char contbyte = _p[0]; @@ -113,14 +119,16 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe // check > 0x7F ? return r; } else { - LOG(debug, "invalid continuation byte %02X in Utf8Reader data block", contbyte); + LOG(debug, "invalid continuation byte %02X in Utf8Reader data block", + contbyte); return fallback; } } if (need == 2) { if (_p[0] == 0 || _p[1] == 0) { - LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", firstbyte); + LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", + firstbyte); return fallback; } unsigned char contbyte1 = _p[0]; @@ -137,14 +145,16 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe // check > 0x7FF ? return r; } else { - LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block", contbyte1, contbyte2); + LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block", + contbyte1, contbyte2); return fallback; } } assert(need == 3); if (_p[0] == 0 || _p[1] == 0 || _p[2] == 0) { - LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", firstbyte); + LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", + firstbyte); return fallback; } unsigned char contbyte1 = _p[0]; @@ -158,7 +168,8 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe // check > 0xFFFF? return decode4(firstbyte, contbyte1, contbyte2, contbyte3); } else { - LOG(debug, "invalid continuation bytes %02X/%02X/%02X in Utf8Reader data block", contbyte1, contbyte2, contbyte3); + LOG(debug, "invalid continuation bytes %02X/%02X/%02X in Utf8Reader data block", + contbyte1, contbyte2, contbyte3); return fallback; } } @@ -223,7 +234,7 @@ template class Utf8Writer<vespalib::string>; template class Utf8Writer<std::string>; template <typename T> -T Utf8::filter_invalid_sequences(const T& input) noexcept +T Utf8::filter_invalid_sequences(const T& input) { T retval; Utf8Reader reader(input.c_str(), input.size()); diff --git a/vespalib/src/vespa/vespalib/text/utf8.h b/vespalib/src/vespa/vespalib/text/utf8.h index 3367bd5b3d2..98e06ca5faf 100644 --- a/vespalib/src/vespa/vespalib/text/utf8.h +++ b/vespalib/src/vespa/vespalib/text/utf8.h @@ -34,14 +34,14 @@ public: * UTF-8 encoded surrogates are also considered invalid. **/ template <typename T> - static T filter_invalid_sequences(const T& input) noexcept; + static T filter_invalid_sequences(const T& input); /** * check if a byte is valid as the first byte of an UTF-8 character. * @param c the byte to be checked * @return true if a valid UTF-8 character can start with this byte **/ - static bool validFirstByte(unsigned char c) noexcept { + static bool validFirstByte(unsigned char c) { return (c < 0x80 || (c > 0xC1 && c < 0xF5)); } @@ -52,12 +52,12 @@ public: * @param c the first byte (must pass validFirstByte check) * @return 0, 1, 2, or 3 **/ - static int numContBytes(unsigned char c) noexcept { + static int numContBytes(unsigned char c) { if (c < 0x80) return 0; if (c > 0xC1 && c < 0xE0) return 1; if (c > 0xDF && c < 0xF0) return 2; if (c > 0xEF && c < 0xF5) return 3; - return -1; + throwX("invalid first byte of UTF8 sequence", c); } /** @@ -65,7 +65,7 @@ public: * @param c the byte to be checked * @return true if a valid UTF-8 character can contain this byte **/ - static bool validContByte(unsigned char c) noexcept { + static bool validContByte(unsigned char c) { return (c > 0x7F && c < 0xC0); } @@ -82,7 +82,8 @@ public: * @param contbyte second byte in this UTF-8 character * @return decoded UCS-4 codepoint in range [0, 0x7FF] **/ - static uint32_t decode2(unsigned char firstbyte, unsigned char contbyte) noexcept + static uint32_t decode2(unsigned char firstbyte, + unsigned char contbyte) { uint32_t r = (firstbyte & low_5bits_mask); r <<= 6; @@ -107,7 +108,7 @@ public: **/ static uint32_t decode3(unsigned char firstbyte, unsigned char contbyte1, - unsigned char contbyte2) noexcept + unsigned char contbyte2) { uint32_t r = (firstbyte & low_4bits_mask); r <<= 6; @@ -137,7 +138,7 @@ public: static uint32_t decode4(unsigned char firstbyte, unsigned char contbyte1, unsigned char contbyte2, - unsigned char contbyte3) noexcept + unsigned char contbyte3) { uint32_t r = (firstbyte & low_3bits_mask); r <<= 6; @@ -176,14 +177,14 @@ class Utf8Reader private: size_type _pos; - uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept; + uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback); public: /** * Construct a reader for the given block of data * @param input data to read UTF-8 from (can be read-only) **/ - Utf8Reader(stringref input) noexcept + Utf8Reader(stringref input) : stringref(input), _pos(0) {} @@ -192,7 +193,7 @@ public: * @param start pointer to the start of the block * @param sz size of the block in bytes **/ - Utf8Reader(const char *start, size_t sz) noexcept + Utf8Reader(const char *start, size_t sz) : stringref(start, sz), _pos(0) {} @@ -200,7 +201,7 @@ public: * check if the buffer has more data. * @return true if there is more data **/ - bool hasMore() const noexcept { return _pos < size(); } + bool hasMore() const { return _pos < size(); } /** * Decode the UTF-8 character at the current position. @@ -210,7 +211,7 @@ public: * @param fallback the value to return if invalid UTF-8 is found * @return a valid UCS-4 codepoint (or the fallback value) **/ - uint32_t getChar(uint32_t fallback) noexcept { + uint32_t getChar(uint32_t fallback) { unsigned char firstbyte = (*this)[_pos++]; // always steps at least 1 position if (firstbyte < 0x80) { return firstbyte; @@ -231,13 +232,13 @@ public: * * @return a valid UCS-4 codepoint **/ - uint32_t getChar() noexcept { return getChar(Utf8::REPLACEMENT_CHAR); } + uint32_t getChar() { return getChar(Utf8::REPLACEMENT_CHAR); } /** * obtain the current byte offset position * @return position in bytes **/ - size_type getPos() const noexcept { return _pos; } + size_type getPos() const { return _pos; } }; @@ -251,7 +252,7 @@ class Utf8ReaderForZTS { private: const char * &_p; - uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept; + uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback); public: /** @@ -264,7 +265,7 @@ public: * * @param start pointer to the start of the block **/ - Utf8ReaderForZTS(const char * &start) noexcept + Utf8ReaderForZTS(const char * &start) : _p(start) {} @@ -272,7 +273,7 @@ public: * check if the buffer has more data. * @return true if there is more data **/ - bool hasMore() const noexcept { + bool hasMore() const { return (*_p) != '\0'; } @@ -284,9 +285,9 @@ public: * @param fallback the value to return if invalid UTF-8 is found * @return a valid UCS-4 codepoint (or the fallback value) **/ - uint32_t getChar(uint32_t fallback) noexcept { + uint32_t getChar(uint32_t fallback) { unsigned char firstbyte = *_p++; // always steps at least 1 position - if (firstbyte < 0x80) [[likely]] { + if (firstbyte < 0x80) { return firstbyte; } else { return getComplexChar(firstbyte, fallback); @@ -305,7 +306,7 @@ public: * * @return a valid UCS-4 codepoint **/ - uint32_t getChar() noexcept{ return getChar(Utf8::REPLACEMENT_CHAR); } + uint32_t getChar() { return getChar(Utf8::REPLACEMENT_CHAR); } /** * count the number of UCS-4 characters will be returned when @@ -313,7 +314,7 @@ public: * "strlen" does not count the zero termination, but bytes * that aren't valid UTF-8 will count as one character each. **/ - static size_t countChars(const char *p) noexcept { + static size_t countChars(const char *p) { Utf8ReaderForZTS reader(p); size_t i; for (i = 0; reader.hasMore(); ++i) { @@ -339,7 +340,7 @@ public: * that the writer will append to. Must be writable * and must be kept alive while the writer is active. **/ - Utf8Writer(Target &target) noexcept : _target(target) {} + Utf8Writer(Target &target) : _target(target) {} /** * append the given character to the target string. diff --git a/vespalib/src/vespa/vespalib/util/alloc.cpp b/vespalib/src/vespa/vespalib/util/alloc.cpp index 2ba3bc252ae..204d80340aa 100644 --- a/vespalib/src/vespa/vespalib/util/alloc.cpp +++ b/vespalib/src/vespa/vespalib/util/alloc.cpp @@ -292,7 +292,7 @@ HeapAllocator::alloc(size_t sz) const { PtrAndSize HeapAllocator::salloc(size_t sz) { if (sz == 0) { - return PtrAndSize(); + return PtrAndSize(nullptr, sz); } void * ptr = malloc(sz); if (ptr == nullptr) { @@ -311,7 +311,7 @@ void HeapAllocator::sfree(PtrAndSize alloc) noexcept { PtrAndSize AlignedHeapAllocator::alloc(size_t sz) const { - if (!sz) { return PtrAndSize(); } + if (!sz) { return PtrAndSize(nullptr, 0); } void* ptr; int result = posix_memalign(&ptr, _alignment, sz); if (result != 0) { diff --git a/vespalib/src/vespa/vespalib/util/alloc.h b/vespalib/src/vespa/vespalib/util/alloc.h index dca4d633b43..a27bcca0b47 100644 --- a/vespalib/src/vespa/vespalib/util/alloc.h +++ b/vespalib/src/vespa/vespalib/util/alloc.h @@ -49,7 +49,7 @@ public: } return *this; } - Alloc() noexcept : _alloc(), _allocator(nullptr) { } + Alloc() noexcept : _alloc(nullptr, 0), _allocator(nullptr) { } ~Alloc() noexcept { reset(); } @@ -83,9 +83,10 @@ private: Alloc(const MemoryAllocator * allocator, size_t sz) noexcept : _alloc(allocator->alloc(sz)), _allocator(allocator) - { } + { + } Alloc(const MemoryAllocator * allocator) noexcept - : _alloc(), + : _alloc(nullptr, 0), _allocator(allocator) { } void clear() noexcept { diff --git a/vespalib/src/vespa/vespalib/util/growstrategy.h b/vespalib/src/vespa/vespalib/util/growstrategy.h index 643e3f03023..02e18e44925 100644 --- a/vespalib/src/vespa/vespalib/util/growstrategy.h +++ b/vespalib/src/vespa/vespalib/util/growstrategy.h @@ -4,15 +4,14 @@ #include <algorithm> #include <cstddef> -#include <cstdint> namespace vespalib { class GrowStrategy { private: - uint32_t _initialCapacity; - uint32_t _minimumCapacity; - uint32_t _growDelta; + size_t _initialCapacity; + size_t _minimumCapacity; + size_t _growDelta; float _growFactor; public: GrowStrategy() noexcept @@ -34,7 +33,7 @@ public: void setInitialCapacity(size_t v) noexcept { _initialCapacity = v; } void setGrowDelta(size_t v) noexcept { _growDelta = v; } - size_t calc_new_size(size_t base_size) const noexcept { + size_t calc_new_size(size_t base_size) const { size_t delta = (base_size * getGrowFactor()) + getGrowDelta(); size_t new_size = base_size + std::max(delta, static_cast<size_t>(1)); return std::max(new_size, getMinimumCapacity()); diff --git a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp index 2c0d0f4339d..9ed4806385d 100644 --- a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp +++ b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp @@ -46,7 +46,7 @@ PtrAndSize MmapFileAllocator::alloc(size_t sz) const { if (sz == 0) { - return PtrAndSize(); // empty allocation + return PtrAndSize(nullptr, 0); // empty allocation } sz = round_up_to_page_size(sz); uint64_t offset = alloc_area(sz); diff --git a/vespalib/src/vespa/vespalib/util/small_vector.h b/vespalib/src/vespa/vespalib/util/small_vector.h index ba166362d33..b47cb5903b9 100644 --- a/vespalib/src/vespa/vespalib/util/small_vector.h +++ b/vespalib/src/vespa/vespalib/util/small_vector.h @@ -216,7 +216,7 @@ public: template <typename T, size_t N, size_t M> bool operator==(const SmallVector<T,N> &a, - const SmallVector<T,M> &b) noexcept + const SmallVector<T,M> &b) { if (a.size() != b.size()) { return false; |