diff options
88 files changed, 2317 insertions, 317 deletions
diff --git a/client/go/.gitignore b/client/go/.gitignore new file mode 100644 index 00000000000..2275a3bddb7 --- /dev/null +++ b/client/go/.gitignore @@ -0,0 +1,3 @@ +bin/ +pkg/ +*.sum diff --git a/client/go/build.sh b/client/go/build.sh new file mode 100755 index 00000000000..9edc5df4002 --- /dev/null +++ b/client/go/build.sh @@ -0,0 +1,7 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# Execute from this directory to build the command-line client to bin/vespa +export GOPATH=`pwd` +cd "$GOPATH/src/" +go test ./... +go install diff --git a/client/go/src/cmd/command_tester.go b/client/go/src/cmd/command_tester.go new file mode 100644 index 00000000000..d7899c51436 --- /dev/null +++ b/client/go/src/cmd/command_tester.go @@ -0,0 +1,55 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// A helper for testing commands +// Author: bratseth + +package cmd + +import ( + "bytes" + "github.com/vespa-engine/vespa/utils" + "github.com/stretchr/testify/assert" + "io/ioutil" + "net/http" + "testing" + "time" +) + +func executeCommand(t *testing.T, client *mockHttpClient, args []string, moreArgs []string) (standardout string) { + utils.ActiveHttpClient = client + + // Reset - persistent flags in Cobra persists over tests + rootCmd.SetArgs([]string{"status", "-t", ""}) + rootCmd.Execute() + + b := bytes.NewBufferString("") + utils.Out = b + rootCmd.SetArgs(append(args, moreArgs...)) + rootCmd.Execute() + out, err := ioutil.ReadAll(b) + assert.Empty(t, err, "No error") + return string(out) +} + +type mockHttpClient struct { + // The HTTP status code that will be returned from the next invocation. Default: 200 + nextStatus int + + // The response body code that will be returned from the next invocation. Default: "" + nextBody string + + // A recording of the last HTTP request made through this + lastRequest *http.Request +} + +func (c *mockHttpClient) Do(request *http.Request, timeout time.Duration) (response *http.Response, error error) { + if c.nextStatus == 0 { + c.nextStatus = 200 + } + c.lastRequest = request + return &http.Response{ + StatusCode: c.nextStatus, + Body: ioutil.NopCloser(bytes.NewBufferString(c.nextBody)), + Header: make(http.Header), + }, + nil +} diff --git a/client/go/src/cmd/config.go b/client/go/src/cmd/config.go new file mode 100644 index 00000000000..647d13939b1 --- /dev/null +++ b/client/go/src/cmd/config.go @@ -0,0 +1,63 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// vespa config command +// author: bratseth + +package cmd + +import ( + "github.com/spf13/cobra" + "github.com/spf13/viper" + "github.com/vespa-engine/vespa/utils" + "os" + "path/filepath" +) + +func init() { + rootCmd.AddCommand(configCmd) +} + +var configCmd = &cobra.Command{ + Use: "config", + Short: "Configure the Vespa command", + Long: `TODO`, + Run: func(cmd *cobra.Command, args []string) { + }, +} + +func readConfig() { + home, err := os.UserHomeDir() + configName := ".vespa" + configType := "yaml" + + cobra.CheckErr(err) + viper.AddConfigPath(home) + viper.SetConfigType(configType) + viper.SetConfigName(configName) + viper.AutomaticEnv() + + viper.ReadInConfig(); +} + +// WIP: Not used yet +func writeConfig() { + //viper.BindPFlag("container-target", rootCmd.PersistentFlags().Lookup("container-target")) + //viper.SetDefault("container-target", "http://127.0.0.1:8080") + + home, _ := os.UserHomeDir() + configName := ".vespa" + configType := "yaml" + + // Viper bug: WriteConfig() will not create the file if missing + configPath := filepath.Join(home, configName + "." + configType) + _, statErr := os.Stat(configPath) + if !os.IsExist(statErr) { + if _, createErr := os.Create(configPath); createErr != nil { + utils.Error("Warning: Can not remember flag parameters: " + createErr.Error()) + } + } + + writeErr := viper.WriteConfig() + if writeErr != nil { + utils.Error("Could not write config:", writeErr.Error()) + } +}
\ No newline at end of file diff --git a/client/go/src/cmd/deploy.go b/client/go/src/cmd/deploy.go new file mode 100644 index 00000000000..4caeef2b859 --- /dev/null +++ b/client/go/src/cmd/deploy.go @@ -0,0 +1,146 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// vespa deploy command +// Author: bratseth + +package cmd + +import ( + "archive/zip" + "errors" + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/utils" + "io" + "io/ioutil" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "time" +) + +func init() { + rootCmd.AddCommand(deployCmd) +} + +var deployCmd = &cobra.Command{ + Use: "deploy application-package-dir OR application.zip", + Short: "Deploys an application package", + Long: `TODO`, + Args: func(cmd *cobra.Command, args []string) error { + if len(args) > 1 { + return errors.New("Expected an application as the only argument") + } + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + if len(args) == 0 { + deploy("src/main/application") + } else { + deploy(args[0]) + } + }, +} + +func deploy(application string) { + if ! strings.HasSuffix(application, ".zip") { + tempZip, error := ioutil.TempFile("", "application.zip") + if error != nil { + utils.Error("Could not create a temporary zip file for the application package") + utils.Detail(error.Error()) + return + } + + error = zipDir(application, tempZip.Name()) + if (error != nil) { + utils.Error(error.Error()) + return + } + defer os.Remove(tempZip.Name()) + application = tempZip.Name() + } + + zipFileReader, zipFileError := os.Open(application) + if zipFileError != nil { + utils.Error("Could not open application package at " + application) + utils.Detail(zipFileError.Error()) + return + } + + url, _ := url.Parse(getTarget(deployContext).deploy + "/application/v2/tenant/default/prepareandactivate") + header := http.Header{} + header.Add("Content-Type", "application/zip") + request := &http.Request{ + URL: url, + Method: "POST", + Header: header, + Body: ioutil.NopCloser(zipFileReader), + } + serviceDescription := "Deploy service" + response := utils.HttpDo(request, time.Minute * 10, serviceDescription) + defer response.Body.Close() + if (response == nil) { + return + } else if response.StatusCode == 200 { + utils.Success("Success") + } else if response.StatusCode % 100 == 4 { + utils.Error("Invalid application package") + // TODO: Output error in body + } else { + utils.Error("Error from", strings.ToLower(serviceDescription), "at", request.URL.Host) + utils.Detail("Response status:", response.Status) + } +} + +func zipDir(dir string, destination string) error { + if filepath.IsAbs(dir) { + message := "Path must be relative, but '" + dir + "'" + return errors.New(message) + } + if ! utils.PathExists(dir) { + message := "'" + dir + "' should be an application package zip or dir, but does not exist" + return errors.New(message) + } + if ! utils.IsDirectory(dir) { + message := "'" + dir + "' should be an application package dir, but is a (non-zip) file" + return errors.New(message) + } + + file, err := os.Create(destination) + if err != nil { + message := "Could not create a temporary zip file for the application package: " + err.Error() + return errors.New(message) + } + defer file.Close() + + w := zip.NewWriter(file) + defer w.Close() + + walker := func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + + zippath := strings.TrimPrefix(path, dir) + zipfile, err := w.Create(zippath) + if err != nil { + return err + } + + _, err = io.Copy(zipfile, file) + if err != nil { + return err + } + return nil + } + return filepath.Walk(dir, walker) +} + diff --git a/client/go/src/cmd/deploy_test.go b/client/go/src/cmd/deploy_test.go new file mode 100644 index 00000000000..d7f1353dd23 --- /dev/null +++ b/client/go/src/cmd/deploy_test.go @@ -0,0 +1,53 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// deploy command tests +// Author: bratseth + +package cmd + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestDeployZip(t *testing.T) { + client := &mockHttpClient{} + assert.Equal(t, + "\x1b[32mSuccess\n", + executeCommand(t, client, []string{"deploy", "testdata/application.zip"}, []string{})) + assertDeployRequestMade("http://127.0.0.1:19071", client, t) +} + +func TestDeployZipWithURLTargetArgument(t *testing.T) { + client := &mockHttpClient{} + assert.Equal(t, + "\x1b[32mSuccess\n", + executeCommand(t, client, []string{"deploy", "testdata/application.zip", "-t", "http://target:19071"}, []string{})) + assertDeployRequestMade("http://target:19071", client, t) +} + +func TestDeployZipWitLocalTargetArgument(t *testing.T) { + client := &mockHttpClient{} + assert.Equal(t, + "\x1b[32mSuccess\n", + executeCommand(t, client, []string{"deploy", "testdata/application.zip", "-t", "local"}, []string{})) + assertDeployRequestMade("http://127.0.0.1:19071", client, t) +} + +func TestDeployDirectory(t *testing.T) { + client := &mockHttpClient{} + assert.Equal(t, + "\x1b[32mSuccess\n", + executeCommand(t, client, []string{"deploy", "testdata/src/main/application"}, []string{})) + assertDeployRequestMade("http://127.0.0.1:19071", client, t) +} + +func assertDeployRequestMade(target string, client *mockHttpClient, t *testing.T) { + assert.Equal(t, target + "/application/v2/tenant/default/prepareandactivate", client.lastRequest.URL.String()) + assert.Equal(t, "application/zip", client.lastRequest.Header.Get("Content-Type")) + assert.Equal(t, "POST", client.lastRequest.Method) + var body = client.lastRequest.Body + assert.NotNil(t, body) + buf := make([]byte, 7) // Just check the first few bytes + body.Read(buf) + assert.Equal(t, "PK\x03\x04\x14\x00\b", string(buf)) +}
\ No newline at end of file diff --git a/client/go/src/cmd/document.go b/client/go/src/cmd/document.go new file mode 100644 index 00000000000..86471a6116f --- /dev/null +++ b/client/go/src/cmd/document.go @@ -0,0 +1,94 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// vespa document command +// author: bratseth + +package cmd + +import ( + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/utils" + "io/ioutil" + "net/http" + "net/url" + "os" + "strings" + "time" +) + +func init() { + rootCmd.AddCommand(documentCmd) + statusCmd.AddCommand(documentPutCmd) + statusCmd.AddCommand(documentGetCmd) +} + +var documentCmd = &cobra.Command{ + Use: "document mynamespace/mydocumenttype/myid document.json", + Short: "Issue document operations (put by default)", + Long: `TODO`, + // TODO: Check args + Run: func(cmd *cobra.Command, args []string) { + put(args[0], args[1]) + }, +} + +var documentPutCmd = &cobra.Command{ + Use: "put mynamespace/mydocumenttype/myid mydocument.json", + Short: "Puts the document in the given file", + Long: `TODO`, + // TODO: This crashes with the above + // TODO: Extract document id from the content + // TODO: Check args + Run: func(cmd *cobra.Command, args []string) { + put(args[0], args[1]) + }, +} + +var documentGetCmd = &cobra.Command{ + Use: "get documentId", + Short: "Gets a document", + Long: `TODO`, + // TODO: Check args + Run: func(cmd *cobra.Command, args []string) { + get(args[0]) + }, +} + +func get(documentId string) { + // TODO +} + +func put(documentId string, jsonFile string) { + url, _ := url.Parse(getTarget(documentContext).document + "/document/v1/" + documentId) + + header := http.Header{} + header.Add("Content-Type", "application/json") + + fileReader, fileError := os.Open(jsonFile) + if fileError != nil { + utils.Error("Could not open file at " + jsonFile) + utils.Detail(fileError.Error()) + return + } + + request := &http.Request{ + URL: url, + Method: "POST", + Header: header, + Body: ioutil.NopCloser(fileReader), + } + serviceDescription := "Container (document/v1 API)" + response := utils.HttpDo(request, time.Second * 60, serviceDescription) + defer response.Body.Close() + if (response == nil) { + return + } else if response.StatusCode == 200 { + utils.Success("Success") // TODO: Change to something including document id + } else if response.StatusCode % 100 == 4 { + utils.Error("Invalid document JSON") + utils.Detail(response.Status) + // TODO: Output error in body + } else { + utils.Error("Error from", strings.ToLower(serviceDescription), "at", request.URL.Host) + utils.Detail("Response status:", response.Status) + } +}
\ No newline at end of file diff --git a/client/go/src/cmd/document_test.go b/client/go/src/cmd/document_test.go new file mode 100644 index 00000000000..7e9af0ab921 --- /dev/null +++ b/client/go/src/cmd/document_test.go @@ -0,0 +1,30 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// document command tests +// Author: bratseth + +package cmd + +import ( + "github.com/stretchr/testify/assert" + "github.com/vespa-engine/vespa/utils" + "io/ioutil" + "testing" +) + +func TestDocumentPut(t *testing.T) { + assertDocumentPut("mynamespace/music/docid/1", "testdata/A-Head-Full-of-Dreams.json", t) +} + +func assertDocumentPut(documentId string, jsonFile string, t *testing.T) { + client := &mockHttpClient{} + assert.Equal(t, + "\x1b[32mSuccess\n", + executeCommand(t, client, []string{"document", documentId, jsonFile}, []string{})) + target := getTarget(documentContext).document + assert.Equal(t, target + "/document/v1/" + documentId, client.lastRequest.URL.String()) + assert.Equal(t, "application/json", client.lastRequest.Header.Get("Content-Type")) + assert.Equal(t, "POST", client.lastRequest.Method) + + fileContent, _ := ioutil.ReadFile(jsonFile) + assert.Equal(t, string(fileContent), utils.ReaderToString(client.lastRequest.Body)) +} diff --git a/client/go/src/cmd/init.go b/client/go/src/cmd/init.go new file mode 100644 index 00000000000..f5b20115794 --- /dev/null +++ b/client/go/src/cmd/init.go @@ -0,0 +1,157 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// vespa init command +// author: bratseth + +package cmd + +import ( + "archive/zip" + "errors" + "path/filepath" + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/utils" + "io" + "io/ioutil" + "net/http" + "net/url" + "os" + "strings" + "time" +) + +// Set this to test without downloading this file from github +var existingSampleAppsZip string + +func init() { + existingSampleAppsZip = "" + rootCmd.AddCommand(initCmd) +} + +var initCmd = &cobra.Command{ + // TODO: "application" and "list" subcommands? + Use: "init applicationName source", + Short: "Creates the files and directory structure for a new Vespa application", + Long: `TODO`, + Args: func(cmd *cobra.Command, args []string) error { + if len(args) != 2 { + return errors.New("vespa init requires a project name and source") + } + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + initApplication(args[0], args[1]) + }, +} + +func initApplication(name string, source string) { + zipFile := getSampleAppsZip() + if zipFile == nil { + return + } + if existingSampleAppsZip == "" { // Indicates we created a temp file now + defer os.Remove(zipFile.Name()) + } + + createErr := os.Mkdir(name, 0755) + if createErr != nil { + utils.Error("Could not create directory '" + name + "'") + utils.Detail(createErr.Error()) + return + } + + zipReader, zipOpenError := zip.OpenReader(zipFile.Name()) + if zipOpenError != nil { + utils.Error("Could not open sample apps zip '" + zipFile.Name() + "'") + utils.Detail(zipOpenError.Error()) + } + defer zipReader.Close() + + found := false + for _, f := range zipReader.File { + zipEntryPrefix := "sample-apps-master/" + source + "/" + if strings.HasPrefix(f.Name, zipEntryPrefix) { + found = true + copyError := copy(f, name, zipEntryPrefix) + if copyError != nil { + utils.Error("Could not copy zip entry '" + f.Name + "' to " + name) + utils.Detail(copyError.Error()) + return + } + } + } + if !found { + utils.Error("Could not find source application '" + source + "'") + } else { + utils.Success("Created " + name) + } +} + +func getSampleAppsZip() *os.File { + if existingSampleAppsZip != "" { + existing, openExistingError := os.Open(existingSampleAppsZip) + if openExistingError != nil { + utils.Error("Could not open existing sample apps zip file '" + existingSampleAppsZip + "'") + utils.Detail(openExistingError.Error()) + } + return existing + } + + // TODO: Cache it? + utils.Detail("Downloading sample apps ...") // TODO: Spawn thread to indicate progress + zipUrl, _ := url.Parse("https://github.com/vespa-engine/sample-apps/archive/refs/heads/master.zip") + request := &http.Request{ + URL: zipUrl, + Method: "GET", + } + response := utils.HttpDo(request, time.Minute * 60, "GitHub") + defer response.Body.Close() + if response.StatusCode != 200 { + utils.Error("Could not download sample apps from github") + utils.Detail(response.Status) + return nil + } + + destination, tempFileError := ioutil.TempFile("", "prefix") + if tempFileError != nil { + utils.Error("Could not create a temp file to hold sample apps") + utils.Detail(tempFileError.Error()) + } + // destination, _ := os.Create("./" + name + "/sample-apps.zip") + // defer destination.Close() + _, err := io.Copy(destination, response.Body) + if err != nil { + utils.Error("Could not download sample apps from GitHub") + utils.Detail(err.Error()) + return nil + } + return destination +} + +func copy(f *zip.File, destinationDir string, zipEntryPrefix string) error { + destinationPath := filepath.Join(destinationDir, filepath.FromSlash(strings.TrimPrefix(f.Name, zipEntryPrefix))) + if strings.HasSuffix(f.Name, "/") { + if f.Name != zipEntryPrefix { // root is already created + createError := os.Mkdir(destinationPath, 0755) + if createError != nil { + return createError + } + } + } else { + zipEntry, zipEntryOpenError := f.Open() + if zipEntryOpenError != nil { + return zipEntryOpenError + } + defer zipEntry.Close() + + destination, createError := os.Create(destinationPath) + if createError != nil { + return createError + } + + _, copyError := io.Copy(destination, zipEntry) + if copyError != nil { + return copyError + } + } + return nil +} diff --git a/client/go/src/cmd/init_test.go b/client/go/src/cmd/init_test.go new file mode 100644 index 00000000000..f0f832293a7 --- /dev/null +++ b/client/go/src/cmd/init_test.go @@ -0,0 +1,32 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// init command tests +// Author: bratseth + +package cmd + +import ( + "github.com/vespa-engine/vespa/utils" + "github.com/stretchr/testify/assert" + "os" + "testing" + "path/filepath" +) + +func TestInit(t *testing.T) { + assertCreated("mytestapp", "album-recommendation-selfhosted", t) +} + +func assertCreated(app string, sampleAppName string, t *testing.T) { + existingSampleAppsZip = "testdata/sample-apps-master.zip" + standardOut := executeCommand(t, &mockHttpClient{}, []string{"init", app, sampleAppName}, []string{}) + defer os.RemoveAll(app) + assert.Equal(t, "\x1b[32mCreated " + app + "\n", standardOut) + assert.True(t, utils.PathExists(filepath.Join(app, "README.md"))) + assert.True(t, utils.PathExists(filepath.Join(app, "src", "main", "application"))) + assert.True(t, utils.IsDirectory(filepath.Join(app, "src", "main", "application"))) + + servicesStat, _ := os.Stat(filepath.Join(app, "src", "main", "application", "services.xml")) + var servicesSize int64 + servicesSize = 2474 + assert.Equal(t, servicesSize, servicesStat.Size()) +} diff --git a/client/go/src/cmd/query.go b/client/go/src/cmd/query.go new file mode 100644 index 00000000000..2e33974a6b5 --- /dev/null +++ b/client/go/src/cmd/query.go @@ -0,0 +1,84 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// vespa query command +// author: bratseth + +package cmd + +import ( + "bufio" + "errors" + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/utils" + "regexp" + "strings" + "net/url" +) + +func init() { + rootCmd.AddCommand(queryCmd) +} + +var queryCmd = &cobra.Command{ + Use: "query \"yql=select from sources * where title contains 'foo'\" hits=5", + Short: "Issue a query to Vespa", + Long: `TODO`, + Args: func(cmd *cobra.Command, args []string) error { + if len(args) < 1 { + return errors.New("vespa query requires at least one argument containing the query string") + } + return nil + }, + Run: func(cmd *cobra.Command, args []string) { + query(args[0]) + }, +} + +func query(argument string) { + if ! startsByParameter(argument) { // Default parameter + argument = "yql=" + argument + } + + argument = escapePayload(argument) + if argument == "" { + return + } + + path := "/search/?" + argument + response := utils.HttpGet(getTarget(queryContext).query, path, "Container") + if (response == nil) { + return + } + defer response.Body.Close() + + defer response.Body.Close() + if (response.StatusCode == 200) { + // TODO: Pretty-print body + scanner := bufio.NewScanner(response.Body) + for ;scanner.Scan(); { + utils.Print(scanner.Text()) + } + if err := scanner.Err(); err != nil { + utils.Error(err.Error()) + } + } else if response.StatusCode % 100 == 4 { + utils.Error("Invalid query (status ", response.Status, ")") + utils.Detail() + } else { + utils.Error("Request failed") + utils.Detail(response.Status) + } +} + +func startsByParameter(argument string) bool { + match, _ := regexp.MatchString("[a-zA-Z0-9_]+=", "peach") // TODO: Allow dot in parameters + return match +} + +func escapePayload(argument string) string { + equalsIndex := strings.Index(argument, "=") + if equalsIndex < 1 { + utils.Error("A query argument must be on the form parameter=value, but was '" + argument + "'") + return "" + } + return argument[0:equalsIndex] + "=" + url.QueryEscape(argument[equalsIndex + 1:len(argument)]) +} diff --git a/client/go/src/cmd/query_test.go b/client/go/src/cmd/query_test.go new file mode 100644 index 00000000000..2783bf34484 --- /dev/null +++ b/client/go/src/cmd/query_test.go @@ -0,0 +1,36 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// query command tests +// Author: bratseth + +package cmd + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestQuery(t *testing.T) { + assertQuery("?yql=select+from+sources+%2A+where+title+contains+%27foo%27", + "select from sources * where title contains 'foo'", t) +} + +func IgnoreTestQueryWithParameters(t *testing.T) { + assertQuery("?", "select from sources * where title contains 'foo'&hits=5", t) +} + +func IgnoreTestSimpleQueryMissingQuestionMark(t *testing.T) { + assertQuery("?", "query=select from sources * where title contains 'foo'", t) +} + +func IgnoreTestSimpleQueryMissingQuestionMarkAndQueryEquals(t *testing.T) { + assertQuery("?query=", "select from sources * where text contains 'foo'", t) +} + +func assertQuery(expectedQuery string, query string, t *testing.T) { + client := &mockHttpClient{ nextBody: "query result", } + assert.Equal(t, + "query result\n", + executeCommand(t, client, []string{"query", query},[]string{}), + "query output") + assert.Equal(t, getTarget(queryContext).query + "/search/" + expectedQuery, client.lastRequest.URL.String()) +} diff --git a/client/go/src/cmd/root.go b/client/go/src/cmd/root.go new file mode 100644 index 00000000000..8f33cb47c43 --- /dev/null +++ b/client/go/src/cmd/root.go @@ -0,0 +1,34 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Root Cobra command: vespa +// author: bratseth + +package cmd + +import ( + "github.com/spf13/cobra" +) + +var ( + // flags + // TODO: add timeout flag + // TODO: add flag to show http request made + targetArgument string + + rootCmd = &cobra.Command{ + Use: "vespa", + Short: "A command-line tool for working with Vespa instances", + Long: `TO +DO`, + } +) + +func init() { + cobra.OnInitialize(readConfig) + rootCmd.PersistentFlags().StringVarP(&targetArgument, "target", "t", "local", "The name or URL of the recipient of this command") +} + +// Execute executes the root command. +func Execute() error { + err := rootCmd.Execute() + return err +} diff --git a/client/go/src/cmd/status.go b/client/go/src/cmd/status.go new file mode 100644 index 00000000000..809b63623ae --- /dev/null +++ b/client/go/src/cmd/status.go @@ -0,0 +1,59 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// vespa status command +// author: bratseth + +package cmd + +import ( + "github.com/spf13/cobra" + "github.com/vespa-engine/vespa/utils" +) + +func init() { + rootCmd.AddCommand(statusCmd) + statusCmd.AddCommand(statusContainerCmd) + statusCmd.AddCommand(statusConfigServerCmd) +} + +var statusCmd = &cobra.Command{ + Use: "status", + Short: "Verifies that a vespa target is ready to use (container by default)", + Long: `TODO`, + Run: func(cmd *cobra.Command, args []string) { + status(getTarget(queryContext).query, "Container") + }, +} + +var statusContainerCmd = &cobra.Command{ + Use: "container", + Short: "Verifies that your Vespa container endpoint is ready [Default]", + Long: `TODO`, + Run: func(cmd *cobra.Command, args []string) { + status(getTarget(queryContext).query, "Container") + }, +} + +var statusConfigServerCmd = &cobra.Command{ + Use: "config-server", + Short: "Verifies that your Vespa config server endpoint is ready", + Long: `TODO`, + Run: func(cmd *cobra.Command, args []string) { + status(getTarget(deployContext).deploy, "Config server") + }, +} + +func status(target string, description string) { + path := "/ApplicationStatus" + response := utils.HttpGet(target, path, description) + if (response == nil) { + return + } + defer response.Body.Close() + + if response.StatusCode != 200 { + utils.Error(description, "at", target, "is not ready") + utils.Detail("Response status:", response.Status) + } else { + utils.Success(description, "at", target, "is ready") + } +} diff --git a/client/go/src/cmd/status_test.go b/client/go/src/cmd/status_test.go new file mode 100644 index 00000000000..70fb33c27de --- /dev/null +++ b/client/go/src/cmd/status_test.go @@ -0,0 +1,70 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// status command tests +// Author: bratseth + +package cmd + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestStatusConfigServerCommand(t *testing.T) { + assertConfigServerStatus("http://127.0.0.1:19071", []string{}, t) +} + +func TestStatusConfigServerCommandWithURLTarget(t *testing.T) { + assertConfigServerStatus("http://mydeploytarget", []string{"-t", "http://mydeploytarget"}, t) +} + +func TestStatusConfigServerCommandWithLocalTarget(t *testing.T) { + assertConfigServerStatus("http://127.0.0.1:19071", []string{"-t", "local"}, t) +} + +func TestStatusContainerCommand(t *testing.T) { + assertContainerStatus("http://127.0.0.1:8080", []string{}, t) +} + +func TestStatusContainerCommandWithUrlTarget(t *testing.T) { + assertContainerStatus("http://mycontainertarget", []string{"-t", "http://mycontainertarget"}, t) +} + +func TestStatusContainerCommandWithLocalTarget(t *testing.T) { + assertContainerStatus("http://127.0.0.1:8080", []string{"-t", "local"}, t) +} + +func TestStatusErrorResponse(t *testing.T) { + assertContainerError("http://127.0.0.1:8080", []string{}, t) +} + +func assertConfigServerStatus(target string, args []string, t *testing.T) { + client := &mockHttpClient{} + assert.Equal(t, + "\x1b[32mConfig server at " + target + " is ready\n", + executeCommand(t, client, []string{"status", "config-server"}, args), + "vespa status config-server") + assert.Equal(t, target + "/ApplicationStatus", client.lastRequest.URL.String()) +} + +func assertContainerStatus(target string, args []string, t *testing.T) { + client := &mockHttpClient{} + assert.Equal(t, + "\x1b[32mContainer at " + target + " is ready\n", + executeCommand(t, client, []string{"status", "container"}, args), + "vespa status container") + assert.Equal(t, target + "/ApplicationStatus", client.lastRequest.URL.String()) + + assert.Equal(t, + "\x1b[32mContainer at " + target + " is ready\n", + executeCommand(t, client, []string{"status"}, args), + "vespa status (the default)") + assert.Equal(t, target + "/ApplicationStatus", client.lastRequest.URL.String()) +} + +func assertContainerError(target string, args []string, t *testing.T) { + client := &mockHttpClient{ nextStatus: 500,} + assert.Equal(t, + "\x1b[31mContainer at " + target + " is not ready\n\x1b[33mResponse status: \n", + executeCommand(t, client, []string{"status", "container"}, args), + "vespa status container") +}
\ No newline at end of file diff --git a/client/go/src/cmd/target.go b/client/go/src/cmd/target.go new file mode 100644 index 00000000000..1d56c39540a --- /dev/null +++ b/client/go/src/cmd/target.go @@ -0,0 +1,60 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Models a target for Vespa commands +// author: bratseth + +package cmd + +import ( + "github.com/vespa-engine/vespa/utils" + "strings" +) + +type target struct { + deploy string + query string + document string +} + +type context int32 +const ( + deployContext context = 0 + queryContext context = 1 + documentContext context = 2 +) + +func getTarget(targetContext context) *target { + if strings.HasPrefix(targetArgument, "http") { + // TODO: Add default ports if missing + switch targetContext { + case deployContext: + return &target{ + deploy: targetArgument, + } + case queryContext: + return &target{ + query: targetArgument, + } + case documentContext: + return &target{ + document: targetArgument, + } + } + } + + // Otherwise, target is a name + + if targetArgument == "" || targetArgument == "local" { + return &target{ + deploy: "http://127.0.0.1:19071", + query: "http://127.0.0.1:8080", + document: "http://127.0.0.1:8080", + } + } + + if targetArgument == "cloud" { + return nil // TODO + } + + utils.Error("Unknown target argument '" + targetArgument + ": Use 'local', 'cloud' or an URL") + return nil +}
\ No newline at end of file diff --git a/client/go/src/cmd/testdata/A-Head-Full-of-Dreams.json b/client/go/src/cmd/testdata/A-Head-Full-of-Dreams.json new file mode 100644 index 00000000000..b68872a961e --- /dev/null +++ b/client/go/src/cmd/testdata/A-Head-Full-of-Dreams.json @@ -0,0 +1,14 @@ +{ + "fields": { + "album": "A Head Full of Dreams", + "artist": "Coldplay", + "year": 2015, + "category_scores": { + "cells": [ + { "address" : { "cat" : "pop" }, "value": 1 }, + { "address" : { "cat" : "rock" }, "value": 0.2 }, + { "address" : { "cat" : "jazz" }, "value": 0 } + ] + } + } +} diff --git a/client/go/src/cmd/testdata/application.zip b/client/go/src/cmd/testdata/application.zip Binary files differnew file mode 100644 index 00000000000..b017db6472d --- /dev/null +++ b/client/go/src/cmd/testdata/application.zip diff --git a/client/go/src/cmd/testdata/sample-apps-master.zip b/client/go/src/cmd/testdata/sample-apps-master.zip Binary files differnew file mode 100644 index 00000000000..6ad49361072 --- /dev/null +++ b/client/go/src/cmd/testdata/sample-apps-master.zip diff --git a/client/go/src/cmd/testdata/src/main/application/hosts.xml b/client/go/src/cmd/testdata/src/main/application/hosts.xml new file mode 100644 index 00000000000..5dd3ed0dded --- /dev/null +++ b/client/go/src/cmd/testdata/src/main/application/hosts.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!-- Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> +<hosts> + <host name="localhost"> + <alias>node1</alias> + </host> +</hosts> + diff --git a/client/go/src/cmd/testdata/src/main/application/schemas/msmarco.sd b/client/go/src/cmd/testdata/src/main/application/schemas/msmarco.sd new file mode 100644 index 00000000000..183e1a6421f --- /dev/null +++ b/client/go/src/cmd/testdata/src/main/application/schemas/msmarco.sd @@ -0,0 +1,299 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +schema msmarco { + document msmarco { + + field id type string { + indexing: summary | attribute + } + + field title type string { + indexing: index | summary + index: enable-bm25 + stemming: best + } + + field url type string { + indexing: index | summary + } + + field body type string { + indexing: index | summary + index: enable-bm25 + summary: dynamic + stemming: best + } + + field title_word2vec type tensor<float>(x[500]) { + indexing: attribute + } + + field body_word2vec type tensor<float>(x[500]) { + indexing: attribute + } + + field title_gse type tensor<float>(x[512]) { + indexing: attribute + } + + field body_gse type tensor<float>(x[512]) { + indexing: attribute + } + + field title_bert type tensor<float>(x[768]) { + indexing: attribute + } + + field body_bert type tensor<float>(x[768]) { + indexing: attribute + } + + } + + document-summary minimal { + summary id type string {} + } + + fieldset default { + fields: title, body + } + + rank-profile default { + first-phase { + expression: nativeRank(title, body) + } + } + + rank-profile bm25 inherits default { + first-phase { + expression: bm25(title) + bm25(body) + } + } + + rank-profile word2vec_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile gse_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_word2vec_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_gse_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile bm25_bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: bm25(title) + bm25(body) + dot_product_title() + dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_bm25_bert_title_body_all inherits default { + function dot_product_title() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: 0.9005951 * bm25(title) + 2.2043643 * bm25(body) + 0.13506432 * dot_product_title() + 0.5840874 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_linear_bm25_gse_title_body_and inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: 0.12408562 * bm25(title) + 0.36673144 * bm25(body) + 6.2273498 * dot_product_title() + 5.671119 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile listwise_linear_bm25_gse_title_body_or inherits default { + function dot_product_title() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + first-phase { + expression: 0.7150663 * bm25(title) + 0.9480147 * bm25(body) + 1.560068 * dot_product_title() + 1.5062317 * dot_product_body() + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + rankingExpression(dot_product_title) + rankingExpression(dot_product_body) + } + } + + rank-profile pointwise_linear_bm25 inherits default { + first-phase { + expression: 0.22499913 * bm25(title) + 0.07596389 * bm25(body) + } + } + + rank-profile listwise_linear_bm25 inherits default { + first-phase { + expression: 0.13446581 * bm25(title) + 0.5716889 * bm25(body) + } + } + + rank-profile collect_rank_features_embeddings inherits default { + function dot_product_title_word2vec() { + expression: sum(query(tensor)*attribute(title_word2vec)) + } + function dot_product_body_word2vec() { + expression: sum(query(tensor)*attribute(body_word2vec)) + } + function dot_product_title_gse() { + expression: sum(query(tensor_gse)*attribute(title_gse)) + } + function dot_product_body_gse() { + expression: sum(query(tensor_gse)*attribute(body_gse)) + } + function dot_product_title_bert() { + expression: sum(query(tensor_bert)*attribute(title_bert)) + } + function dot_product_body_bert() { + expression: sum(query(tensor_bert)*attribute(body_bert)) + } + first-phase { + expression: random + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + nativeRank(title) + nativeRank(body) + rankingExpression(dot_product_title_word2vec) + rankingExpression(dot_product_body_word2vec) + rankingExpression(dot_product_title_gse) + rankingExpression(dot_product_body_gse) + rankingExpression(dot_product_title_bert) + rankingExpression(dot_product_body_bert) + } + } + + rank-profile collect_rank_features inherits default { + first-phase { + expression: random + } + ignore-default-rank-features + rank-features { + bm25(title) + bm25(body) + nativeRank(title) + nativeRank(body) + } + } +} diff --git a/client/go/src/cmd/testdata/src/main/application/services.xml b/client/go/src/cmd/testdata/src/main/application/services.xml new file mode 100644 index 00000000000..766434798f0 --- /dev/null +++ b/client/go/src/cmd/testdata/src/main/application/services.xml @@ -0,0 +1,61 @@ +<?xml version='1.0' encoding='UTF-8'?> +<!-- Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> + +<services version="1.0"> + + <container id="text_search" version="1.0"> + <document-api/> + <search> + + <!-- Config for bolding in search result snippets --> + <config name="container.qr-searchers"> + <tag> + <bold> + <open><strong></open> + <close></strong></close> + </bold> + <separator>...</separator> + </tag> + </config> + + </search> + <document-processing/> + + <component id="com.yahoo.language.simple.SimpleLinguistics"/> + + <handler id="ai.vespa.example.text_search.site.SiteHandler" bundle="text-search"> + <binding>http://*/site/*</binding> + <binding>http://*/site</binding> + <config name="ai.vespa.example.text_search.site.site-handler"> + <vespaHostName>localhost</vespaHostName> + <vespaHostPort>8080</vespaHostPort> + </config> + </handler> + + <nodes jvmargs="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=*:8998"> + <node hostalias="node1" /> + </nodes> + + </container> + + <content id="msmarco" version="1.0"> + + <!-- Config for search result snippets --> + <config name="vespa.config.search.summary.juniperrc"> + <max_matches>2</max_matches> + <length>1000</length> + <surround_max>500</surround_max> + <min_length>300</min_length> + </config> + + <redundancy>2</redundancy> + <documents> + <document type='msmarco' mode="index"/> + <document-processing cluster="text_search"/> + </documents> + <nodes> + <node distribution-key='0' hostalias='node1'/> + </nodes> + </content> + +</services> diff --git a/client/go/src/go.mod b/client/go/src/go.mod new file mode 100644 index 00000000000..40499f09e04 --- /dev/null +++ b/client/go/src/go.mod @@ -0,0 +1,9 @@ +module github.com/vespa-engine/vespa + +go 1.16 + +require ( + github.com/spf13/cobra v1.2.1 + github.com/spf13/viper v1.8.1 + github.com/stretchr/testify v1.7.0 +) diff --git a/client/go/src/main.go b/client/go/src/main.go new file mode 100644 index 00000000000..3803ee357b2 --- /dev/null +++ b/client/go/src/main.go @@ -0,0 +1,13 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Cobra commands main file +// Author: bratseth + +package main + +import ( + "github.com/vespa-engine/vespa/cmd" +) + +func main() { + cmd.Execute() +} diff --git a/client/go/src/utils/http.go b/client/go/src/utils/http.go new file mode 100644 index 00000000000..977669a4da2 --- /dev/null +++ b/client/go/src/utils/http.go @@ -0,0 +1,56 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// A HTTP wrapper which handles some errors and provides a way to replace the HTTP client by a mock. +// Author: bratseth + +package utils + +import ( + "net/http" + "net/url" + "strings" + "time" +) + +// Set this to a mock HttpClient instead to unit test HTTP requests +var ActiveHttpClient = CreateClient(time.Second * 10) + +type HttpClient interface { + Do(request *http.Request, timeout time.Duration) (response *http.Response, error error) +} + +type defaultHttpClient struct { + client *http.Client +} + +func (c *defaultHttpClient) Do(request *http.Request, timeout time.Duration) (response *http.Response, error error) { + if c.client.Timeout != timeout { // Create a new client with the right timeout + c.client = &http.Client{Timeout: timeout,} + } + return c.client.Do(request) +} + +func CreateClient(timeout time.Duration) HttpClient { + return &defaultHttpClient{ + client: &http.Client{Timeout: timeout,}, + } +} + +// Convenience function for doing a HTTP GET +func HttpGet(host string, path string, description string) *http.Response { + url, urlError := url.Parse(host + path) + if urlError != nil { + Error("Invalid target url '" + host + path + "'") + return nil + } + return HttpDo(&http.Request{URL: url,}, time.Second * 10, description) +} + +func HttpDo(request *http.Request, timeout time.Duration, description string) *http.Response { + response, error := ActiveHttpClient.Do(request, timeout) + if error != nil { + Error("Could not connect to", strings.ToLower(description), "at", request.URL.Host) + Detail(error.Error()) + } + return response +} + diff --git a/client/go/src/utils/http_test.go b/client/go/src/utils/http_test.go new file mode 100644 index 00000000000..0383f5c9257 --- /dev/null +++ b/client/go/src/utils/http_test.go @@ -0,0 +1,46 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Basic testing of our HTTP client wrapper +// Author: bratseth + +package utils + +import ( + "bytes" + "github.com/stretchr/testify/assert" + "io/ioutil" + "net/http" + "testing" + "time" +) + +type mockHttpClient struct {} + +func (c mockHttpClient) Do(request *http.Request, timeout time.Duration) (response *http.Response, error error) { + var status int + var body string + if request.URL.String() == "http://host/okpath" { + status = 200 + body = "OK body" + } else { + status = 500 + body = "Unexpected url body" + } + + return &http.Response{ + StatusCode: status, + Header: make(http.Header), + Body: ioutil.NopCloser(bytes.NewBufferString(body)), + }, + nil +} + +func TestHttpRequest(t *testing.T) { + ActiveHttpClient = mockHttpClient{} + + response := HttpGet("http://host", "/okpath", "description") + assert.Equal(t, 200, response.StatusCode) + + response = HttpGet("http://host", "/otherpath", "description") + assert.Equal(t, 500, response.StatusCode) +} + diff --git a/client/go/src/utils/io.go b/client/go/src/utils/io.go new file mode 100644 index 00000000000..5ea4fbf7fa6 --- /dev/null +++ b/client/go/src/utils/io.go @@ -0,0 +1,31 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// File utilities. +// Author: bratseth + +package utils + +import ( + "errors" + "io" + "os" + "strings" +) + +// Returns true if the given path exists +func PathExists(path string) bool { + _, err := os.Stat(path) + return ! errors.Is(err, os.ErrNotExist) +} + +// Returns true is the given path points to an existing directory +func IsDirectory(path string) bool { + info, err := os.Stat(path) + return ! errors.Is(err, os.ErrNotExist) && info.IsDir() +} + +// Returns the content of a reader as a string +func ReaderToString(reader io.ReadCloser) string { + buffer := new(strings.Builder) + io.Copy(buffer, reader) + return buffer.String() +}
\ No newline at end of file diff --git a/client/go/src/utils/print.go b/client/go/src/utils/print.go new file mode 100644 index 00000000000..f13a2c45cd0 --- /dev/null +++ b/client/go/src/utils/print.go @@ -0,0 +1,49 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Print functions for color-coded text. +// Author: bratseth + +package utils + +import ( + "fmt" + "io" + "os" +) + +// Set this to have output written somewhere else than os.Stdout +var Out io.Writer + +func init() { + Out = os.Stdout +} + +// Prints in default color +func Print(messages ...string) { + print("", messages) +} + +// Prints in a color appropriate for errors +func Error(messages ...string) { + print("\033[31m", messages) +} + +// Prints in a color appropriate for success messages +func Success(messages ...string) { + print("\033[32m", messages) +} + +// Prints in a color appropriate for detail messages +func Detail(messages ...string) { + print("\033[33m", messages) +} + +func print(prefix string, messages []string) { + fmt.Fprint(Out, prefix) + for i := 0; i < len(messages); i++ { + fmt.Fprint(Out, messages[i]) + if (i < len(messages) - 1) { + fmt.Fprint(Out, " ") + } + } + fmt.Fprintln(Out, "") +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java index d22affaf5a3..8e0357c1c05 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java @@ -93,7 +93,8 @@ public class QuotaValidator extends Validator { private void throwIfBudgetExceeded(double spend, BigDecimal budget, SystemName systemName) { if (budget.doubleValue() < spend) { - throwBudgetException("Please free up some capacity! This deployment's quota use ($%.2f) exceeds reserved quota ($%.2f)!", spend, budget, systemName); + throw new IllegalArgumentException((systemName.equals(SystemName.Public) ? "" : systemName.value() + ": ") + + "Deployment would make your tenant exceed its quota and has been blocked! Please contact support to update your plan."); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ResourceLimits.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ResourceLimits.java index a12e183b409..cd9a0c89bf8 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/ResourceLimits.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ResourceLimits.java @@ -37,8 +37,7 @@ public class ResourceLimits implements FleetcontrollerConfig.Producer, ProtonCon // storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp builder.cluster_feed_block_limit.put("memory", memoryLimit.orElse(0.8)); builder.cluster_feed_block_limit.put("disk", diskLimit.orElse(0.8)); - builder.cluster_feed_block_limit.put("attribute-enum-store", 0.89); - builder.cluster_feed_block_limit.put("attribute-multi-value", 0.89); + builder.cluster_feed_block_limit.put("attribute-address-space", 0.89); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java index c764b5ab449..0997f29729f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java @@ -241,7 +241,8 @@ public class StorageGroup { int minNodesPerGroup = (int)Math.ceil((double)nodesSpec.minResources().nodes() / nodesSpec.minResources().groups()); if (minNodesPerGroup < redundancy) { // TODO: Fail on this on Vespa 8, and simplify - context.getDeployLogger().logApplicationPackage(Level.WARNING, + context.getDeployLogger() + .logApplicationPackage(Level.WARNING, "Cluster '" + clusterElement.stringAttribute("id") + "' " + "specifies redundancy " + redundancy + " but cannot be higher than " + "the minimum nodes per group, which is " + minNodesPerGroup); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java index e99a92b530a..a975a02e149 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java @@ -47,8 +47,7 @@ public class QuotaValidatorTest { tester.deploy(null, getServices("testCluster", 10), Environment.prod, null); fail(); } catch (RuntimeException e) { - assertEquals("Please free up some capacity! This deployment's quota use ($-.--) exceeds reserved quota ($-.--)!", - ValidationTester.censorNumbers(e.getMessage())); + assertEquals("Deployment would make your tenant exceed its quota and has been blocked! Please contact support to update your plan.", e.getMessage()); } } @@ -59,8 +58,7 @@ public class QuotaValidatorTest { tester.deploy(null, getServices("testCluster", 10), Environment.prod, null); fail(); } catch (RuntimeException e) { - assertEquals("publiccd: Please free up some capacity! This deployment's quota use ($-.--) exceeds reserved quota ($-.--)!", - ValidationTester.censorNumbers(e.getMessage())); + assertEquals("publiccd: Deployment would make your tenant exceed its quota and has been blocked! Please contact support to update your plan.", e.getMessage()); } } @@ -71,8 +69,7 @@ public class QuotaValidatorTest { tester.deploy(null, getServices("testCluster", 10), Environment.prod, null); fail(); } catch (RuntimeException e) { - assertEquals("publiccd: Please free up some capacity! This deployment's quota use ($-.--) exceeds reserved quota ($-.--)!", - ValidationTester.censorNumbers(e.getMessage())); + assertEquals("publiccd: Deployment would make your tenant exceed its quota and has been blocked! Please contact support to update your plan.", e.getMessage()); } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentSearchClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentSearchClusterTest.java index 46bd005deb6..b9a98dab432 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentSearchClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentSearchClusterTest.java @@ -99,7 +99,7 @@ public class ContentSearchClusterTest { private static void assertClusterControllerResourceLimits(double expDiskLimit, double expMemoryLimit, ContentCluster cluster) { var limits = getFleetcontrollerConfig(cluster).cluster_feed_block_limit(); - assertEquals(4, limits.size()); + assertEquals(3, limits.size()); assertEquals(expDiskLimit, limits.get("disk"), EPSILON); assertEquals(expMemoryLimit, limits.get("memory"), EPSILON); } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/FleetControllerClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/FleetControllerClusterTest.java index d17f2d36b9c..cc1b96dc588 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/FleetControllerClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/FleetControllerClusterTest.java @@ -124,11 +124,10 @@ public class FleetControllerClusterTest { private void assertLimits(double expDisk, double expMemory, FleetcontrollerConfig config) { var limits = config.cluster_feed_block_limit(); - assertEquals(4, limits.size()); + assertEquals(3, limits.size()); assertEquals(expDisk, limits.get("disk"), DELTA); assertEquals(expMemory, limits.get("memory"), DELTA); - assertEquals(0.89, limits.get("attribute-enum-store"), DELTA); - assertEquals(0.89, limits.get("attribute-multi-value"), DELTA); + assertEquals(0.89, limits.get("attribute-address-space"), DELTA); } private FleetcontrollerConfig getConfigForResourceLimitsTuning(Double diskLimit, Double memoryLimit) { diff --git a/container-disc/src/main/sh/vespa-start-container-daemon.sh b/container-disc/src/main/sh/vespa-start-container-daemon.sh index 223124c29d6..a0cbd9d9186 100755 --- a/container-disc/src/main/sh/vespa-start-container-daemon.sh +++ b/container-disc/src/main/sh/vespa-start-container-daemon.sh @@ -110,27 +110,8 @@ configure_cpu() { } configure_numactl() { - log_message debug "starting ${VESPA_SERVICE_NAME} for ${VESPA_CONFIG_ID}" - if numactl --interleave all true &> /dev/null; then - # We are allowed to use numactl - numnodes=$(numactl --hardware | - grep available | - awk '$3 == "nodes" { print $2 }') - if [ "$VESPA_AFFINITY_CPU_SOCKET" ] && - [ "$numnodes" -gt 1 ] - then - node=$(($VESPA_AFFINITY_CPU_SOCKET % $numnodes)) - log_message debug "with affinity to $VESPA_AFFINITY_CPU_SOCKET out of $numnodes cpu sockets" - numactlcmd="numactl --cpunodebind=$node --membind=$node" - else - log_message debug "with memory interleaving on all nodes" - numactlcmd="numactl --interleave all" - fi - else - log_message debug "without numactl (no permission or not available)" - numactlcmd="" - fi - log_message debug "numactlcmd: $numactlcmd" + numactlcmd=$(get_numa_ctl_cmd) + log_message debug "starting ${VESPA_SERVICE_NAME} for ${VESPA_CONFIG_ID} with numactl command : $numactlcmd" } configure_gcopts() { diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 4af337dcb67..4b2127d8a3a 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -12,6 +12,7 @@ vespa_define_module( TESTS src/tests/ann + src/tests/eval/addr_to_symbol src/tests/eval/aggr src/tests/eval/array_array_map src/tests/eval/cell_type_space diff --git a/eval/src/tests/eval/addr_to_symbol/CMakeLists.txt b/eval/src/tests/eval/addr_to_symbol/CMakeLists.txt new file mode 100644 index 00000000000..eddace54cfe --- /dev/null +++ b/eval/src/tests/eval/addr_to_symbol/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_addr_to_symbol_test_app TEST + SOURCES + addr_to_symbol_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_addr_to_symbol_test_app COMMAND eval_addr_to_symbol_test_app) diff --git a/eval/src/tests/eval/addr_to_symbol/addr_to_symbol_test.cpp b/eval/src/tests/eval/addr_to_symbol/addr_to_symbol_test.cpp new file mode 100644 index 00000000000..fade4bf961f --- /dev/null +++ b/eval/src/tests/eval/addr_to_symbol/addr_to_symbol_test.cpp @@ -0,0 +1,25 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/llvm/addr_to_symbol.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib::eval; + +TEST(AddrToSymbol, null_ptr) { + auto sym = addr_to_symbol(nullptr); + EXPECT_EQ(sym, "<nullptr>"); +} + +TEST(AddrToSymbol, global_symbol) { + auto sym = addr_to_symbol((const void *)addr_to_symbol); + fprintf(stderr, "global symbol: %s\n", sym.c_str()); + EXPECT_TRUE(sym.find("addr_to_symbol") < sym.size()); +} + +TEST(AddrToSymbol, local_symbol) { + auto sym = addr_to_symbol(get_addr_of_local_test_symbol()); + fprintf(stderr, "local symbol: %s\n", sym.c_str()); + EXPECT_TRUE(sym.find("my_local_test_symbol") < sym.size()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/llvm/CMakeLists.txt b/eval/src/vespa/eval/eval/llvm/CMakeLists.txt index 898d18c9e4c..a3ca410842a 100644 --- a/eval/src/vespa/eval/eval/llvm/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/llvm/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(eval_eval_llvm OBJECT SOURCES + addr_to_symbol.cpp compile_cache.cpp compiled_function.cpp deinline_forest.cpp diff --git a/eval/src/vespa/eval/eval/llvm/addr_to_symbol.cpp b/eval/src/vespa/eval/eval/llvm/addr_to_symbol.cpp new file mode 100644 index 00000000000..566ab931837 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/addr_to_symbol.cpp @@ -0,0 +1,57 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "addr_to_symbol.h" +#include <vespa/vespalib/util/classname.h> + +#include <dlfcn.h> +#include <llvm/Object/ObjectFile.h> + +using vespalib::demangle; +using llvm::object::ObjectFile; + +namespace vespalib::eval { + +namespace { + +void my_local_test_symbol() {} + +} // <unnamed> + +vespalib::string addr_to_symbol(const void *addr) { + if (addr == nullptr) { + return {"<nullptr>"}; + } + Dl_info info; + memset(&info, 0, sizeof(info)); + if (dladdr(addr, &info) == 0) { + // address not in any shared object + return {"<invalid>"}; + } + if (info.dli_sname != nullptr) { + // address of global symbol + return demangle(info.dli_sname); + } + // find addr offset into shared object + uint64_t offset = ((const char *)addr) - ((const char *)info.dli_fbase); + // use llvm to look up local symbols... + auto file = ObjectFile::createObjectFile(info.dli_fname); + if (!file) { + return {"<object_error>"}; + } + auto symbols = file.get().getBinary()->symbols(); + for (const auto &symbol: symbols) { + auto sym_name = symbol.getName(); + auto sym_addr = symbol.getAddress(); + if (sym_name && sym_addr && (*sym_addr == offset)) { + return demangle(sym_name->str().c_str()); + } + } + // could not resolve symbol + return {"<unknown>"}; +} + +const void *get_addr_of_local_test_symbol() { + return (const void *) my_local_test_symbol; +} + +} diff --git a/eval/src/vespa/eval/eval/llvm/addr_to_symbol.h b/eval/src/vespa/eval/eval/llvm/addr_to_symbol.h new file mode 100644 index 00000000000..29c83be7260 --- /dev/null +++ b/eval/src/vespa/eval/eval/llvm/addr_to_symbol.h @@ -0,0 +1,19 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace vespalib::eval { + +// Map an address to a symbolic name. +// Intended for function pointers. + +vespalib::string addr_to_symbol(const void *addr); + +// Return the address of a local symbol. +// Used for testing. + +const void *get_addr_of_local_test_symbol(); + +} diff --git a/persistence/src/vespa/persistence/spi/attribute_resource_usage.h b/persistence/src/vespa/persistence/spi/attribute_resource_usage.h index 02e0c248e3d..5cd8c93890f 100644 --- a/persistence/src/vespa/persistence/spi/attribute_resource_usage.h +++ b/persistence/src/vespa/persistence/spi/attribute_resource_usage.h @@ -13,7 +13,7 @@ namespace storage::spi { class AttributeResourceUsage { double _usage; - vespalib::string _name; // document_type.subdb.attribute + vespalib::string _name; // document_type.subdb.attribute.component public: AttributeResourceUsage(double usage, const vespalib::string& name) diff --git a/persistence/src/vespa/persistence/spi/resource_usage.cpp b/persistence/src/vespa/persistence/spi/resource_usage.cpp index 517a45a3d41..3dfc026c714 100644 --- a/persistence/src/vespa/persistence/spi/resource_usage.cpp +++ b/persistence/src/vespa/persistence/spi/resource_usage.cpp @@ -21,8 +21,7 @@ std::ostream& operator<<(std::ostream& out, const ResourceUsage& resource_usage) { out << "{disk_usage=" << resource_usage.get_disk_usage() << ", memory_usage=" << resource_usage.get_memory_usage() << - ", attribute_enum_store_usage=" << resource_usage.get_attribute_enum_store_usage() << - ", attribute_multivalue_usage=" << resource_usage.get_attribute_multivalue_usage() << "}"; + ", attribute_address_space_usage=" << resource_usage.get_attribute_address_space_usage() << "}"; return out; } diff --git a/persistence/src/vespa/persistence/spi/resource_usage.h b/persistence/src/vespa/persistence/spi/resource_usage.h index 921da8bbf06..82ff889c03e 100644 --- a/persistence/src/vespa/persistence/spi/resource_usage.h +++ b/persistence/src/vespa/persistence/spi/resource_usage.h @@ -14,21 +14,20 @@ class ResourceUsage { double _disk_usage; double _memory_usage; - AttributeResourceUsage _attribute_enum_store_usage; - AttributeResourceUsage _attribute_multivalue_usage; + AttributeResourceUsage _attribute_address_space_usage; public: - ResourceUsage(double disk_usage, double memory_usage, const AttributeResourceUsage &attribute_enum_store_usage, const AttributeResourceUsage &attribute_multivalue_usage) + ResourceUsage(double disk_usage, double memory_usage, + const AttributeResourceUsage &attribute_address_space_usage) : _disk_usage(disk_usage), _memory_usage(memory_usage), - _attribute_enum_store_usage(attribute_enum_store_usage), - _attribute_multivalue_usage(attribute_multivalue_usage) + _attribute_address_space_usage(attribute_address_space_usage) { } ResourceUsage(double disk_usage, double memory_usage) - : ResourceUsage(disk_usage, memory_usage, AttributeResourceUsage(), AttributeResourceUsage()) + : ResourceUsage(disk_usage, memory_usage, AttributeResourceUsage()) { } @@ -49,14 +48,12 @@ public: double get_disk_usage() const noexcept { return _disk_usage; } double get_memory_usage() const noexcept { return _memory_usage; } - const AttributeResourceUsage& get_attribute_enum_store_usage() const noexcept { return _attribute_enum_store_usage; } - const AttributeResourceUsage& get_attribute_multivalue_usage() const noexcept { return _attribute_multivalue_usage; } + const AttributeResourceUsage& get_attribute_address_space_usage() const noexcept { return _attribute_address_space_usage; } bool operator==(const ResourceUsage &rhs) const noexcept { return ((_disk_usage == rhs._disk_usage) && (_memory_usage == rhs._memory_usage) && - (_attribute_enum_store_usage == rhs._attribute_enum_store_usage) && - (_attribute_multivalue_usage == rhs._attribute_multivalue_usage)); + (_attribute_address_space_usage == rhs._attribute_address_space_usage)); } bool operator!=(const ResourceUsage &rhs) const noexcept { return !operator==(rhs); diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt index dfcc7542e09..2d5eb8dbc4f 100644 --- a/searchcore/CMakeLists.txt +++ b/searchcore/CMakeLists.txt @@ -61,6 +61,7 @@ vespa_define_module( src/tests/proton/attribute/attribute_populator src/tests/proton/attribute/attribute_usage_filter src/tests/proton/attribute/attribute_usage_sampler_functor + src/tests/proton/attribute/attribute_usage_stats src/tests/proton/attribute/attributes_state_explorer src/tests/proton/attribute/document_field_extractor src/tests/proton/attribute/document_field_populator diff --git a/searchcore/src/tests/proton/attribute/attribute_usage_filter/attribute_usage_filter_test.cpp b/searchcore/src/tests/proton/attribute/attribute_usage_filter/attribute_usage_filter_test.cpp index a1c7b0a152d..18b9962003c 100644 --- a/searchcore/src/tests/proton/attribute/attribute_usage_filter/attribute_usage_filter_test.cpp +++ b/searchcore/src/tests/proton/attribute/attribute_usage_filter/attribute_usage_filter_test.cpp @@ -95,65 +95,36 @@ TEST_F("Check that default filter allows write", Fixture) TEST_F("Check that enum store limit can be reached", Fixture) { - f.filter.setConfig(Fixture::Config(0.8, 1.0)); + f.filter.setConfig(Fixture::Config(0.8)); MyAttributeStats stats; stats.triggerEnumStoreLimit(); f.setAttributeStats(stats); - f.testWrite("enumStoreLimitReached: { " + f.testWrite("addressSpaceLimitReached: { " "action: \"" "add more content nodes" "\", " "reason: \"" - "enum store address space used (0.9375) > limit (0.8)" + "max address space in attribute vector components used (0.9375) > limit (0.8)" "\", " - "enumStore: { used: 32212254720, dead: 0, limit: 34359738368}, " - "attributeName: \"enumeratedName\", subdb: \"ready\"}"); + "addressSpace: { used: 32212254720, dead: 0, limit: 34359738368}, " + "attributeName: \"enumeratedName\", componentName: \"enum-store\", subdb: \"ready\"}"); } TEST_F("Check that multivalue limit can be reached", Fixture) { - f.filter.setConfig(Fixture::Config(1.0, 0.8)); + f.filter.setConfig(Fixture::Config(0.8)); MyAttributeStats stats; stats.triggerMultiValueLimit(); f.setAttributeStats(stats); - f.testWrite("multiValueLimitReached: { " + f.testWrite("addressSpaceLimitReached: { " "action: \"" "add more content nodes" "\", " "reason: \"" - "multiValue address space used (0.992188) > limit (0.8)" + "max address space in attribute vector components used (0.992188) > limit (0.8)" "\", " - "multiValue: { used: 133169152, dead: 0, limit: 134217728}, " - "attributeName: \"multiValueName\", subdb: \"ready\"}"); -} - -TEST_F("Check that both enumstore limit and multivalue limit can be reached", - Fixture) -{ - f.filter.setConfig(Fixture::Config(0.8, 0.8)); - MyAttributeStats stats; - stats.triggerEnumStoreLimit(); - stats.triggerMultiValueLimit(); - f.setAttributeStats(stats); - f.testWrite("enumStoreLimitReached: { " - "action: \"" - "add more content nodes" - "\", " - "reason: \"" - "enum store address space used (0.9375) > limit (0.8)" - "\", " - "enumStore: { used: 32212254720, dead: 0, limit: 34359738368}, " - "attributeName: \"enumeratedName\", subdb: \"ready\"}" - ", " - "multiValueLimitReached: { " - "action: \"" - "add more content nodes" - "\", " - "reason: \"" - "multiValue address space used (0.992188) > limit (0.8)" - "\", " - "multiValue: { used: 133169152, dead: 0, limit: 134217728}, " - "attributeName: \"multiValueName\", subdb: \"ready\"}"); + "addressSpace: { used: 133169152, dead: 0, limit: 134217728}, " + "attributeName: \"multiValueName\", componentName: \"multi-value\", subdb: \"ready\"}"); } TEST_F("listener is updated when attribute stats change", Fixture) diff --git a/searchcore/src/tests/proton/attribute/attribute_usage_stats/CMakeLists.txt b/searchcore/src/tests/proton/attribute/attribute_usage_stats/CMakeLists.txt new file mode 100644 index 00000000000..b015ca2a168 --- /dev/null +++ b/searchcore/src/tests/proton/attribute/attribute_usage_stats/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_attribute_usage_stats_test_app TEST + SOURCES + attribute_usage_stats_test.cpp + DEPENDS + searchcore_attribute + GTest::GTest +) +vespa_add_test(NAME searchcore_attribute_usage_stats_test_app COMMAND searchcore_attribute_usage_stats_test_app) diff --git a/searchcore/src/tests/proton/attribute/attribute_usage_stats/attribute_usage_stats_test.cpp b/searchcore/src/tests/proton/attribute/attribute_usage_stats/attribute_usage_stats_test.cpp new file mode 100644 index 00000000000..b85d3b43f5b --- /dev/null +++ b/searchcore/src/tests/proton/attribute/attribute_usage_stats/attribute_usage_stats_test.cpp @@ -0,0 +1,47 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchcore/proton/attribute/attribute_usage_stats.h> +#include <vespa/vespalib/gtest/gtest.h> + +using proton::AddressSpaceUsageStats; +using proton::AttributeUsageStats; +using search::AddressSpaceUsage; +using vespalib::AddressSpace; + +void +expect_max_usage(size_t used, const vespalib::string& attr_name, + const vespalib::string& comp_name, const vespalib::string& sub_name, + const AttributeUsageStats& stats) +{ + const auto& max = stats.max_address_space_usage(); + EXPECT_EQ(used, max.getUsage().used()); + EXPECT_EQ(attr_name, max.getAttributeName()); + EXPECT_EQ(comp_name, max.get_component_name()); + EXPECT_EQ(sub_name, max.getSubDbName()); +} + +TEST(AttributeUsageStatsTest, tracks_max_address_space_usage) +{ + AttributeUsageStats stats; + { + AddressSpaceUsage usage; + usage.set("comp1", AddressSpace(2, 0, 10)); + usage.set("comp2", AddressSpace(3, 0, 10)); + stats.merge(usage, "attr1", "sub1"); + expect_max_usage(3, "attr1", "comp2", "sub1", stats); + } + { + AddressSpaceUsage usage; + usage.set("comp3", AddressSpace(5, 0, 10)); + usage.set("comp4", AddressSpace(4, 0, 10)); + stats.merge(usage, "attr2", "sub2"); + expect_max_usage(5, "attr2", "comp3", "sub2", stats); + } + { + AddressSpaceUsage usage; + usage.set("comp5", AddressSpace(5, 0, 10)); + stats.merge(usage, "attr3", "sub2"); + expect_max_usage(5, "attr2", "comp3", "sub2", stats); + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchcore/src/tests/proton/persistenceengine/resource_usage_tracker/resource_usage_tracker_test.cpp b/searchcore/src/tests/proton/persistenceengine/resource_usage_tracker/resource_usage_tracker_test.cpp index 4144b87c61d..7e1cf89ccdf 100644 --- a/searchcore/src/tests/proton/persistenceengine/resource_usage_tracker/resource_usage_tracker_test.cpp +++ b/searchcore/src/tests/proton/persistenceengine/resource_usage_tracker/resource_usage_tracker_test.cpp @@ -1,11 +1,12 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/persistence/spi/resource_usage_listener.h> #include <vespa/persistence/spi/resource_usage.h> +#include <vespa/persistence/spi/resource_usage_listener.h> #include <vespa/searchcore/proton/attribute/attribute_usage_stats.h> #include <vespa/searchcore/proton/attribute/i_attribute_usage_listener.h> #include <vespa/searchcore/proton/persistenceengine/resource_usage_tracker.h> #include <vespa/searchcore/proton/test/disk_mem_usage_notifier.h> +#include <vespa/searchlib/attribute/address_space_components.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/util/idestructorcallback.h> #include <atomic> @@ -131,7 +132,7 @@ struct AttributeUsageStatsBuilder ~AttributeUsageStatsBuilder(); AttributeUsageStatsBuilder& reset() { stats = AttributeUsageStats(); return *this; } - AttributeUsageStatsBuilder& merge(const NamedAttribute& named_attribute, size_t used_enum_store, size_t used_multivalue); + AttributeUsageStatsBuilder& merge(const NamedAttribute& named_attribute, size_t used_address_space); AttributeUsageStats build() { return stats; } @@ -140,11 +141,11 @@ struct AttributeUsageStatsBuilder AttributeUsageStatsBuilder::~AttributeUsageStatsBuilder() = default; AttributeUsageStatsBuilder& -AttributeUsageStatsBuilder::merge(const NamedAttribute& named_attribute, size_t used_enum_store, size_t used_multivalue) +AttributeUsageStatsBuilder::merge(const NamedAttribute& named_attribute, size_t used_address_space) { - vespalib::AddressSpace enum_store_usage(used_enum_store, 0, usage_limit); - vespalib::AddressSpace multivalue_usage(used_multivalue, 0, usage_limit); - search::AddressSpaceUsage as_usage(enum_store_usage, multivalue_usage); + vespalib::AddressSpace address_space_usage(used_address_space, 0, usage_limit); + search::AddressSpaceUsage as_usage; + as_usage.set("comp", address_space_usage); stats.merge(as_usage, named_attribute.attribute, named_attribute.subdb); return *this; } @@ -153,11 +154,10 @@ double rel_usage(size_t usage) noexcept { return (double) usage / (double) usage_limit; } -ResourceUsage make_resource_usage(const vespalib::string& enum_store_name, size_t used_enum_store, const vespalib::string &multivalue_name, size_t used_multivalue) +ResourceUsage make_resource_usage(const vespalib::string& attr_name, size_t used_address_space) { - AttributeResourceUsage enum_store_usage(rel_usage(used_enum_store), enum_store_name); - AttributeResourceUsage multivalue_usage(rel_usage(used_multivalue), multivalue_name); - return ResourceUsage(0.0, 0.0, enum_store_usage, multivalue_usage); + AttributeResourceUsage address_space_usage(rel_usage(used_address_space), attr_name); + return ResourceUsage(0.0, 0.0, address_space_usage); } } @@ -170,24 +170,24 @@ TEST_F(ResourceUsageTrackerTest, aggregates_attribute_usage) auto aul2 = _tracker->make_attribute_usage_listener("doctype2"); AttributeUsageStatsBuilder b1; AttributeUsageStatsBuilder b2; - b1.merge(ready_a1, 10, 20).merge(ready_a2, 5, 30); - b2.merge(ready_a1, 15, 15); + b1.merge(ready_a1, 10).merge(ready_a2, 5); + b2.merge(ready_a1, 15); aul1->notify_attribute_usage(b1.build()); aul2->notify_attribute_usage(b2.build()); - EXPECT_EQ(make_resource_usage("doctype2.0.ready.a1", 15, "doctype1.0.ready.a2", 30), get_usage()); - b1.merge(notready_a1, 5, 31); + EXPECT_EQ(make_resource_usage("doctype2.0.ready.a1.comp", 15), get_usage()); + b1.merge(notready_a1, 16); aul1->notify_attribute_usage(b1.build()); - EXPECT_EQ(make_resource_usage("doctype2.0.ready.a1", 15, "doctype1.2.notready.a1", 31), get_usage()); - b1.reset().merge(ready_a1, 10, 20).merge(ready_a2, 5, 30); + EXPECT_EQ(make_resource_usage("doctype1.2.notready.a1.comp", 16), get_usage()); + b1.reset().merge(ready_a1, 10).merge(ready_a2, 5); aul1->notify_attribute_usage(b1.build()); - EXPECT_EQ(make_resource_usage("doctype2.0.ready.a1", 15, "doctype1.0.ready.a2", 30), get_usage()); + EXPECT_EQ(make_resource_usage("doctype2.0.ready.a1.comp", 15), get_usage()); aul2.reset(); - EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1", 10, "doctype1.0.ready.a2", 30), get_usage()); + EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1.comp", 10), get_usage()); aul1.reset(); - EXPECT_EQ(make_resource_usage("", 0, "", 0), get_usage()); + EXPECT_EQ(make_resource_usage("", 0), get_usage()); aul2 = _tracker->make_attribute_usage_listener("doctype2"); aul2->notify_attribute_usage(b2.build()); - EXPECT_EQ(make_resource_usage("doctype2.0.ready.a1", 15, "doctype2.0.ready.a1", 15), get_usage()); + EXPECT_EQ(make_resource_usage("doctype2.0.ready.a1.comp", 15), get_usage()); } TEST_F(ResourceUsageTrackerTest, can_skip_scan_when_aggregating_attributes) @@ -198,16 +198,16 @@ TEST_F(ResourceUsageTrackerTest, can_skip_scan_when_aggregating_attributes) auto aul2 = _tracker->make_attribute_usage_listener("doctype2"); AttributeUsageStatsBuilder b1; AttributeUsageStatsBuilder b2; - b1.merge(ready_a1, 20, 20).merge(ready_a2, 5, 30); - b2.merge(ready_a1, 15, 15); + b1.merge(ready_a1, 20).merge(ready_a2, 5); + b2.merge(ready_a1, 15); aul1->notify_attribute_usage(b1.build()); - EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1", 20, "doctype1.0.ready.a2", 30), get_usage()); + EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1.comp", 20), get_usage()); EXPECT_EQ(2u, get_update_count()); aul1->notify_attribute_usage(b1.build()); - EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1", 20, "doctype1.0.ready.a2", 30), get_usage()); + EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1.comp", 20), get_usage()); EXPECT_EQ(2u, get_update_count()); // usage for doctype1 has not changed aul2->notify_attribute_usage(b2.build()); - EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1", 20, "doctype1.0.ready.a2", 30), get_usage()); + EXPECT_EQ(make_resource_usage("doctype1.0.ready.a1.comp", 20), get_usage()); EXPECT_EQ(2u, get_update_count()); // usage for doctype2 is less than usage for doctype1 aul2.reset(); aul1.reset(); diff --git a/searchcore/src/vespa/searchcore/config/proton.def b/searchcore/src/vespa/searchcore/config/proton.def index 3a51176b0d8..daaab2b823a 100644 --- a/searchcore/src/vespa/searchcore/config/proton.def +++ b/searchcore/src/vespa/searchcore/config/proton.def @@ -431,12 +431,20 @@ visit.ignoremaxbytes bool default=true ## When set to 0 (default) we use 1 separate thread per document database. initialize.threads int default = 0 +## Portion of max address space used in components in attribute vectors +## before put and update operations in feed is blocked. +writefilter.attribute.address_space_limit double default = 0.9 + ## Portion of enumstore address space that can be used before put and update ## portion of feed is blocked. +## Deprecated -> Use address_space_limit +## TODO: remove this when enum store is removed from AttributeUsageStats writefilter.attribute.enumstorelimit double default = 0.9 ## Portion of attribute multivalue mapping address space that can be used ## before put and update portion of feed is blocked. +## Deprecated -> Use address_space_limit +## TODO: remove this when multi value is removed from AttributeUsageStats writefilter.attribute.multivaluelimit double default = 0.9 ## Portion of physical memory that can be resident memory in anonymous mapping diff --git a/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.cpp b/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.cpp index 1acfa64285c..b8cfe86323a 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.cpp @@ -8,6 +8,7 @@ namespace proton { AddressSpaceUsageStats::AddressSpaceUsageStats(const vespalib::AddressSpace & usage) : _usage(usage), _attributeName(), + _component_name(), _subDbName() { } @@ -17,11 +18,13 @@ AddressSpaceUsageStats::~AddressSpaceUsageStats() = default; void AddressSpaceUsageStats::merge(const vespalib::AddressSpace &usage, const vespalib::string &attributeName, + const vespalib::string &component_name, const vespalib::string &subDbName) { if (attributeName.empty() || usage.usage() > _usage.usage()) { _usage = usage; _attributeName = attributeName; + _component_name = component_name; _subDbName = subDbName; } } @@ -31,6 +34,7 @@ operator<<(std::ostream& out, const AddressSpaceUsageStats& rhs) { out << "{usage=" << rhs.getUsage() << ", attribute_name=" << rhs.getAttributeName() << + ", component_name=" << rhs.get_component_name() << ", subdb_name=" << rhs.getSubDbName() << "}"; return out; } diff --git a/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.h b/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.h index 9ed68693ec1..7edc9de5bad 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/address_space_usage_stats.h @@ -8,14 +8,14 @@ namespace proton { /** - * Class representing usage of a single address space (enum store or - * multi value) and the most largest attribute in that respect, relative - * to the limit. + * Class representing max address space usage (relative to the limit) + * among components in attributes vectors in all sub databases. */ class AddressSpaceUsageStats { vespalib::AddressSpace _usage; vespalib::string _attributeName; + vespalib::string _component_name; vespalib::string _subDbName; public: @@ -23,15 +23,18 @@ public: ~AddressSpaceUsageStats(); void merge(const vespalib::AddressSpace &usage, const vespalib::string &attributeName, + const vespalib::string &component_name, const vespalib::string &subDbName); const vespalib::AddressSpace &getUsage() const { return _usage; } const vespalib::string &getAttributeName() const { return _attributeName; } + const vespalib::string &get_component_name() const { return _component_name; } const vespalib::string &getSubDbName() const { return _subDbName; } bool operator==(const AddressSpaceUsageStats& rhs) const { return (_usage == rhs._usage) && (_attributeName == rhs._attributeName) && + (_component_name == rhs._component_name) && (_subDbName == rhs._subDbName); } }; diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.cpp index e19e7976227..807ad020df0 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.cpp @@ -14,38 +14,24 @@ void makeAddressSpaceMessage(std::ostream &os, os << "{ used: " << usage.getUsage().used() << ", dead: " << usage.getUsage().dead() << ", limit: " << - usage.getUsage().limit() << "}, attributeName: \"" << - usage.getAttributeName() << "\", subdb: \"" << - usage.getSubDbName() << "\"}"; + usage.getUsage().limit() << "}, " << + "attributeName: \"" << usage.getAttributeName() << "\", " << + "componentName: \"" << usage.get_component_name() << "\", " << + "subdb: \"" << usage.getSubDbName() << "\"}"; } -void makeEnumStoreMessage(std::ostream &os, - double used, double limit, - const AddressSpaceUsageStats &usage) +void make_error_message(std::ostream &os, + double used, double limit, + const AddressSpaceUsageStats &usage) { - os << "enumStoreLimitReached: { " + os << "addressSpaceLimitReached: { " "action: \"" "add more content nodes" "\", " "reason: \"" - "enum store address space used (" << used << ") > " + "max address space in attribute vector components used (" << used << ") > " "limit (" << limit << ")" - "\", enumStore: "; - makeAddressSpaceMessage(os, usage); -} - -void makeMultiValueMessage(std::ostream &os, - double used, double limit, - const AddressSpaceUsageStats &usage) -{ - os << "multiValueLimitReached: { " - "action: \"" - "add more content nodes" - "\", " - "reason: \"" - "multiValue address space used (" << used << ") > " - "limit (" << limit << ")" - "\", multiValue: "; + "\", addressSpace: "; makeAddressSpaceMessage(os, usage); } @@ -57,20 +43,11 @@ AttributeUsageFilter::recalcState(const Guard &guard) (void) guard; bool hasMessage = false; std::ostringstream message; - const AddressSpaceUsageStats &enumStoreUsage = _attributeStats.enumStoreUsage(); - double enumStoreUsed = enumStoreUsage.getUsage().usage(); - if (enumStoreUsed > _config._enumStoreLimit) { + const auto &max_usage = _attributeStats.max_address_space_usage(); + double used = max_usage.getUsage().usage(); + if (used > _config._address_space_limit) { hasMessage = true; - makeEnumStoreMessage(message, enumStoreUsed, _config._enumStoreLimit, enumStoreUsage); - } - const AddressSpaceUsageStats &multiValueUsage = _attributeStats.multiValueUsage(); - double multiValueUsed = multiValueUsage.getUsage().usage(); - if (multiValueUsed > _config._multiValueLimit) { - if (hasMessage) { - message << ", "; - } - hasMessage = true; - makeMultiValueMessage(message, multiValueUsed, _config._multiValueLimit, multiValueUsage); + make_error_message(message, used, _config._address_space_limit, max_usage); } if (hasMessage) { _state = State(false, message.str()); @@ -126,20 +103,6 @@ AttributeUsageFilter::set_listener(std::unique_ptr<IAttributeUsageListener> list _listener = std::move(listener); } -double -AttributeUsageFilter::getEnumStoreUsedRatio() const -{ - Guard guard(_lock); - return _attributeStats.enumStoreUsage().getUsage().usage(); -} - -double -AttributeUsageFilter::getMultiValueUsedRatio() const -{ - Guard guard(_lock); - return _attributeStats.multiValueUsage().getUsage().usage(); -} - bool AttributeUsageFilter::acceptWriteOperation() const { diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.h b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.h index 45035b40864..4c25bdfd8ca 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter.h @@ -42,8 +42,6 @@ public: AttributeUsageStats getAttributeUsageStats() const; void setConfig(Config config); void set_listener(std::unique_ptr<IAttributeUsageListener> listener); - double getEnumStoreUsedRatio() const; - double getMultiValueUsedRatio() const; bool acceptWriteOperation() const override; State getAcceptState() const override; }; diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter_config.h b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter_config.h index 66f85191a0e..501ece82970 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter_config.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_filter_config.h @@ -4,7 +4,7 @@ namespace proton { -/* +/** * Struct representing config for when to filter write operations * due to attribute resource usage (e.g. enum store and multivalue mapping). * If resource limit is reached then further writes are denied in @@ -14,23 +14,18 @@ namespace proton { */ struct AttributeUsageFilterConfig { - double _enumStoreLimit; - double _multiValueLimit; + double _address_space_limit; AttributeUsageFilterConfig() noexcept - : _enumStoreLimit(1.0), - _multiValueLimit(1.0) - { } + : _address_space_limit(1.0) + {} - AttributeUsageFilterConfig(double enumStoreLimit_in, - double multiValueLimit_in) noexcept - : _enumStoreLimit(enumStoreLimit_in), - _multiValueLimit(multiValueLimit_in) - { } + AttributeUsageFilterConfig(double address_space_limit) noexcept + : _address_space_limit(address_space_limit) + {} bool operator==(const AttributeUsageFilterConfig &rhs) const noexcept { - return ((_enumStoreLimit == rhs._enumStoreLimit) && - (_multiValueLimit == rhs._multiValueLimit)); + return (_address_space_limit == rhs._address_space_limit); } }; diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.cpp index e7d6713441d..f0ab56562a6 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.cpp @@ -4,28 +4,37 @@ #include <vespa/searchlib/attribute/address_space_components.h> #include <iostream> +using search::AddressSpaceComponents; + namespace proton { AttributeUsageStats::AttributeUsageStats() - : _enumStoreUsage(search::AddressSpaceComponents::default_enum_store_usage()), - _multiValueUsage(search::AddressSpaceComponents::default_multi_value_usage()) + : _enumStoreUsage(AddressSpaceComponents::default_enum_store_usage()), + _multiValueUsage(AddressSpaceComponents::default_multi_value_usage()), + _max_usage(vespalib::AddressSpace()) { } +AttributeUsageStats::~AttributeUsageStats() = default; + void AttributeUsageStats::merge(const search::AddressSpaceUsage &usage, const vespalib::string &attributeName, const vespalib::string &subDbName) { - _enumStoreUsage.merge(usage.enum_store_usage(), attributeName, subDbName); - _multiValueUsage.merge(usage.multi_value_usage(), attributeName, subDbName); + _enumStoreUsage.merge(usage.enum_store_usage(), attributeName, AddressSpaceComponents::enum_store, subDbName); + _multiValueUsage.merge(usage.multi_value_usage(), attributeName, AddressSpaceComponents::multi_value, subDbName); + for (const auto& entry : usage.get_all()) { + _max_usage.merge(entry.second, attributeName, entry.first, subDbName); + } } std::ostream& operator<<(std::ostream& out, const AttributeUsageStats& rhs) { out << "{enum_store=" << rhs.enumStoreUsage() << - ", multi_value=" << rhs.multiValueUsage() << "}"; + ", multi_value=" << rhs.multiValueUsage() << + ", max_address_space_usage=" << rhs.max_address_space_usage() << "}"; return out; } diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.h b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.h index 1eb6a9cc6be..762cc324f89 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.h +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_usage_stats.h @@ -16,19 +16,23 @@ class AttributeUsageStats { AddressSpaceUsageStats _enumStoreUsage; AddressSpaceUsageStats _multiValueUsage; + AddressSpaceUsageStats _max_usage; public: AttributeUsageStats(); + ~AttributeUsageStats(); void merge(const search::AddressSpaceUsage &usage, const vespalib::string &attributeName, const vespalib::string &subDbName); const AddressSpaceUsageStats& enumStoreUsage() const { return _enumStoreUsage; } const AddressSpaceUsageStats& multiValueUsage() const { return _multiValueUsage; } + const AddressSpaceUsageStats& max_address_space_usage() const { return _max_usage; } bool operator==(const AttributeUsageStats& rhs) const { return (_enumStoreUsage == rhs._enumStoreUsage) && - (_multiValueUsage == rhs._multiValueUsage); + (_multiValueUsage == rhs._multiValueUsage) && + (_max_usage == rhs._max_usage); } }; diff --git a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp index 713a03810b9..7bd07dba505 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp +++ b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.cpp @@ -91,11 +91,13 @@ DocumentDBTaggedMetrics::AttributeMetrics::AttributeMetrics(MetricSet *parent) DocumentDBTaggedMetrics::AttributeMetrics::~AttributeMetrics() = default; DocumentDBTaggedMetrics::AttributeMetrics::ResourceUsageMetrics::ResourceUsageMetrics(MetricSet *parent) - : MetricSet("resource_usage", {}, "Usage metrics for various attribute vector resources", parent), + : MetricSet("resource_usage", {}, "Metrics for various attribute vector resources usage", parent), enumStore("enum_store", {}, "The highest relative amount of enum store address space used among " "all enumerated attribute vectors in this document db (value in the range [0, 1])", this), multiValue("multi_value", {}, "The highest relative amount of multi-value address space used among " "all multi-value attribute vectors in this document db (value in the range [0, 1])", this), + address_space("address_space", {}, "The max relative address space used among " + "components in all attribute vectors in this document db (value in the range [0, 1])", this), feedingBlocked("feeding_blocked", {}, "Whether feeding is blocked due to attribute resource limits being reached (value is either 0 or 1)", this) { } diff --git a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h index 7f2e095a39b..8d225115c37 100644 --- a/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h +++ b/searchcore/src/vespa/searchcore/proton/metrics/documentdb_tagged_metrics.h @@ -88,6 +88,7 @@ struct DocumentDBTaggedMetrics : metrics::MetricSet { metrics::DoubleValueMetric enumStore; metrics::DoubleValueMetric multiValue; + metrics::DoubleValueMetric address_space; metrics::LongValueMetric feedingBlocked; ResourceUsageMetrics(metrics::MetricSet *parent); diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.cpp b/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.cpp index 9791b55f7b7..9f1c6e66b5c 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.cpp +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.cpp @@ -73,8 +73,7 @@ ResourceUsageTracker::ResourceUsageTracker(IDiskMemUsageNotifier& disk_mem_usage _listener(nullptr), _disk_mem_usage_notifier(disk_mem_usage_notifier), _attribute_usage(), - _attribute_enum_store_max_document_type(), - _attribute_multivalue_max_document_type() + _attribute_address_space_max_document_type() { _disk_mem_usage_notifier.addDiskMemUsageListener(this); } @@ -88,7 +87,7 @@ void ResourceUsageTracker::notifyDiskMemUsage(DiskMemUsageState state) { std::lock_guard guard(_lock); - _resource_usage = ResourceUsage(state.diskState().usage(), state.memoryState().usage(), _resource_usage.get_attribute_enum_store_usage(), _resource_usage.get_attribute_multivalue_usage()); + _resource_usage = ResourceUsage(state.diskState().usage(), state.memoryState().usage(), _resource_usage.get_attribute_address_space_usage()); if (_listener != nullptr) { _listener->update_resource_usage(_resource_usage); } @@ -125,6 +124,7 @@ namespace { bool same_usage(const AddressSpaceUsageStats &lhs, const AddressSpaceUsageStats &rhs) { return ((lhs.getUsage().usage() == rhs.getUsage().usage()) && + (lhs.get_component_name() == rhs.get_component_name()) && (lhs.getAttributeName() == rhs.getAttributeName()) && (lhs.getSubDbName() == rhs.getSubDbName())); } @@ -140,18 +140,15 @@ ResourceUsageTracker::notify_attribute_usage(const vespalib::string &document_ty { std::lock_guard guard(_lock); auto& old_usage = _attribute_usage[document_type]; - if (same_usage(old_usage.enumStoreUsage(), attribute_usage.enumStoreUsage()) && - same_usage(old_usage.multiValueUsage(), attribute_usage.multiValueUsage())) { + if (same_usage(old_usage.max_address_space_usage(), attribute_usage.max_address_space_usage())) { return; // usage for document type has not changed } old_usage = attribute_usage; - double enum_store_max = attribute_usage.enumStoreUsage().getUsage().usage(); - double multivalue_max = attribute_usage.multiValueUsage().getUsage().usage(); - double old_enum_store_max = _resource_usage.get_attribute_enum_store_usage().get_usage(); - double old_multivalue_max = _resource_usage.get_attribute_multivalue_usage().get_usage(); + double address_space_max = attribute_usage.max_address_space_usage().getUsage().usage(); + double old_address_space_max = _resource_usage.get_attribute_address_space_usage().get_usage(); - if (can_skip_scan(enum_store_max, old_enum_store_max, document_type == _attribute_enum_store_max_document_type) && - can_skip_scan(multivalue_max, old_multivalue_max, document_type == _attribute_multivalue_max_document_type)) { + if (can_skip_scan(address_space_max, old_address_space_max, + document_type == _attribute_address_space_max_document_type)) { return; // usage for document type is less than or equal to usage for other document types } if (scan_attribute_usage(false, guard) && _listener != nullptr) { @@ -168,7 +165,7 @@ class MaxAttributeUsage double _max_usage; vespalib::string get_name() const { - return *_document_type + "." + _max->getSubDbName() + "." + _max->getAttributeName(); + return *_document_type + "." + _max->getSubDbName() + "." + _max->getAttributeName() + "." + _max->get_component_name(); } public: @@ -203,23 +200,19 @@ public: bool ResourceUsageTracker::scan_attribute_usage(bool force_changed, std::lock_guard<std::mutex>&) { - MaxAttributeUsage enum_store_max; - MaxAttributeUsage multivalue_max; + MaxAttributeUsage address_space_max; for (const auto& kv : _attribute_usage) { - enum_store_max.sample(kv.first, kv.second.enumStoreUsage()); - multivalue_max.sample(kv.first, kv.second.multiValueUsage()); + address_space_max.sample(kv.first, kv.second.max_address_space_usage()); } ResourceUsage new_resource_usage(_resource_usage.get_disk_usage(), _resource_usage.get_memory_usage(), - enum_store_max.get_max_resource_usage(), - multivalue_max.get_max_resource_usage()); + address_space_max.get_max_resource_usage()); bool changed = (new_resource_usage != _resource_usage) || force_changed; if (changed) { _resource_usage = std::move(new_resource_usage); - _attribute_enum_store_max_document_type = enum_store_max.get_document_type(); - _attribute_multivalue_max_document_type = multivalue_max.get_document_type(); + _attribute_address_space_max_document_type = address_space_max.get_document_type(); } return changed; } diff --git a/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.h b/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.h index 879e60aeeac..40d8598a8da 100644 --- a/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.h +++ b/searchcore/src/vespa/searchcore/proton/persistenceengine/resource_usage_tracker.h @@ -31,8 +31,7 @@ class ResourceUsageTracker : public std::enable_shared_from_this<ResourceUsageTr storage::spi::IResourceUsageListener* _listener; IDiskMemUsageNotifier& _disk_mem_usage_notifier; vespalib::hash_map<vespalib::string, AttributeUsageStats> _attribute_usage; - vespalib::string _attribute_enum_store_max_document_type; - vespalib::string _attribute_multivalue_max_document_type; + vespalib::string _attribute_address_space_max_document_type; void remove_listener(); void remove_document_type(const vespalib::string &document_type); void notify_attribute_usage(const vespalib::string &document_type, const AttributeUsageStats &attribute_usage); diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp index 8b923c7a372..956e9ea198e 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdb_metrics_updater.cpp @@ -302,12 +302,14 @@ DocumentDBMetricsUpdater::updateMetrics(const metrics::MetricLockGuard & guard, void DocumentDBMetricsUpdater::updateAttributeResourceUsageMetrics(DocumentDBTaggedMetrics::AttributeMetrics &metrics) { - AttributeUsageStats attributeUsageStats = _writeFilter.getAttributeUsageStats(); + AttributeUsageStats stats = _writeFilter.getAttributeUsageStats(); bool feedBlocked = !_writeFilter.acceptWriteOperation(); - double enumStoreUsed = attributeUsageStats.enumStoreUsage().getUsage().usage(); - double multiValueUsed = attributeUsageStats.multiValueUsage().getUsage().usage(); + double enumStoreUsed = stats.enumStoreUsage().getUsage().usage(); + double multiValueUsed = stats.multiValueUsage().getUsage().usage(); + double address_space_used = stats.max_address_space_usage().getUsage().usage(); metrics.resourceUsage.enumStore.set(enumStoreUsed); metrics.resourceUsage.multiValue.set(multiValueUsed); + metrics.resourceUsage.address_space.set(address_space_used); metrics.resourceUsage.feedingBlocked.set(feedBlocked ? 1 : 0); } diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp index fb154081963..184bc339d98 100644 --- a/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/documentdbconfigmanager.cpp @@ -151,8 +151,7 @@ buildMaintenanceConfig(const BootstrapConfig::SP &bootstrapConfig, proton.lidspacecompaction.removeblockrate, isDocumentTypeGlobal), AttributeUsageFilterConfig( - proton.writefilter.attribute.enumstorelimit, - proton.writefilter.attribute.multivaluelimit), + proton.writefilter.attribute.addressSpaceLimit), vespalib::from_s(proton.writefilter.sampleinterval), BlockableMaintenanceJobConfig( proton.maintenancejobs.resourcelimitfactor, diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index a49816f1a52..084bdf90a14 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -36,6 +36,7 @@ LOG_SETUP("tensorattribute_test"); using document::WrongTensorTypeException; using search::AttributeGuard; using search::AttributeVector; +using search::CompactionStrategy; using search::attribute::DistanceMetric; using search::attribute::HnswIndexParams; using search::queryeval::GlobalFilter; @@ -199,11 +200,18 @@ public: void trim_hold_lists(generation_t first_used_gen) override { _trim_gen = first_used_gen; } + bool consider_compact(const CompactionStrategy&) override { + return false; + } + vespalib::MemoryUsage update_stat() override { + return vespalib::MemoryUsage(); + } vespalib::MemoryUsage memory_usage() const override { ++_memory_usage_cnt; return vespalib::MemoryUsage(); } void get_state(const vespalib::slime::Inserter&) const override {} + void shrink_lid_space(uint32_t) override { } std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override { if (_index_value != 0) { return std::make_unique<MockIndexSaver>(_index_value); diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 3f7ec140781..9e7f082e237 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -1,5 +1,6 @@ // Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchcommon/common/compaction_strategy.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/tensor/distance_functions.h> #include <vespa/searchlib/tensor/doc_vector_access.h> @@ -20,7 +21,7 @@ using namespace search::tensor; using namespace vespalib::slime; using vespalib::Slime; using search::BitVector; - +using search::CompactionStrategy; template <typename FloatType> class MyDocVectorAccess : public DocVectorAccess { @@ -42,6 +43,8 @@ public: ArrayRef ref(_vectors[docid]); return vespalib::eval::TypedCells(ref); } + + void clear() { _vectors.clear(); } }; struct LevelGenerator : public RandomLevelGenerator { @@ -111,6 +114,10 @@ public: MemoryUsage memory_usage() const { return index->memory_usage(); } + MemoryUsage commit_and_update_stat() { + commit(); + return index->update_stat(); + } void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) { EXPECT_EQ(exp_docid, index->get_entry_docid()); EXPECT_EQ(exp_level, index->get_entry_level()); @@ -166,6 +173,8 @@ public: docid, hit.docid, hit.distance, thr); } } + + FloatVectors& get_vectors() { return vectors; } }; @@ -533,6 +542,113 @@ TEST_F(HnswIndexTest, shrink_called_heuristic) EXPECT_TRUE(index->check_link_symmetry()); } +namespace { + +template <class ResultGraph> +ResultGraph +make_graph_helper(HnswIndex& index) +{ + using LevelArrayRef = HnswGraph::LevelArrayRef; + using LinkArrayRef = HnswGraph::LinkArrayRef; + auto& graph = index.get_graph(); + ResultGraph result(graph.size()); + assert(!graph.get_node_ref(0).valid()); + for (uint32_t doc_id = 1; doc_id < graph.size(); ++doc_id) { + auto& node = result[doc_id]; + auto node_ref = graph.get_node_ref(doc_id); + if constexpr (std::is_same_v<std::remove_reference_t<decltype(node)>, uint32_t>) { + node = node_ref.ref(); + } else { + LevelArrayRef level_array(graph.get_level_array(node_ref)); + for (uint32_t level = 0; level < level_array.size(); ++level) { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(node)>, std::vector<uint32_t>>) { + node.emplace_back(level_array[level].load_relaxed().ref()); + } else { + LinkArrayRef link_array(graph.get_link_array(level_array, level)); + node.emplace_back(std::vector<uint32_t>(link_array.begin(), link_array.end())); + } + } + } + } + return result; +} + +using LinkGraph = std::vector<std::vector<std::vector<uint32_t>>>; + +LinkGraph +make_link_graph(HnswIndex& index) +{ + return make_graph_helper<LinkGraph>(index); +} + +using LinkArrayRefGraph = std::vector<std::vector<uint32_t>>; + +LinkArrayRefGraph +make_link_array_refs(HnswIndex& index) +{ + return make_graph_helper<LinkArrayRefGraph>(index); +} + +using LevelArrayRefGraph = std::vector<uint32_t>; + +LevelArrayRefGraph +make_level_array_refs(HnswIndex& index) +{ + return make_graph_helper<LevelArrayRefGraph>(index); +} + +} + +TEST_F(HnswIndexTest, hnsw_graph_is_compacted) +{ + init(true); + get_vectors().clear(); + uint32_t doc_id = 1; + for (uint32_t x = 0; x < 100; ++x) { + for (uint32_t y = 0; y < 50; ++y) { + get_vectors().set(doc_id, { float(x), float(y) }); + ++doc_id; + } + } + uint32_t doc_id_end = doc_id; + for (doc_id = 1; doc_id < doc_id_end; ++doc_id) { + add_document(doc_id); + } + for (doc_id = 10; doc_id < doc_id_end; ++doc_id) { + remove_document(doc_id); + } + auto mem_1 = commit_and_update_stat(); + auto link_graph_1 = make_link_graph(*index); + auto link_array_refs_1 = make_link_array_refs(*index); + auto level_array_refs_1 = make_level_array_refs(*index); + // Normal compaction + EXPECT_TRUE(index->consider_compact(CompactionStrategy())); + auto mem_2 = commit_and_update_stat(); + EXPECT_LT(mem_2.usedBytes(), mem_1.usedBytes()); + for (uint32_t i = 0; i < 10; ++i) { + mem_1 = mem_2; + // Forced compaction to move things around + index->compact_link_arrays(true, false); + index->compact_level_arrays(true, false); + commit(); + index->update_stat(); + mem_2 = commit_and_update_stat(); + EXPECT_LE(mem_2.usedBytes(), mem_1.usedBytes()); + if (mem_2.usedBytes() == mem_1.usedBytes()) { + break; + } + } + auto link_graph_2 = make_link_graph(*index); + auto link_array_refs_2 = make_link_array_refs(*index); + auto level_array_refs_2 = make_level_array_refs(*index); + EXPECT_EQ(link_graph_1, link_graph_2); + EXPECT_NE(link_array_refs_1, link_array_refs_2); + EXPECT_NE(level_array_refs_1, level_array_refs_2); + index->shrink_lid_space(10); + auto mem_3 = commit_and_update_stat(); + EXPECT_LT(mem_3.usedBytes(), mem_2.usedBytes()); +} + TEST(LevelGeneratorTest, gives_various_levels) { InvLogLevelGenerator generator(4); diff --git a/searchlib/src/vespa/searchlib/attribute/address_space_usage.h b/searchlib/src/vespa/searchlib/attribute/address_space_usage.h index 8698fa6d9ef..9a92bb5d858 100644 --- a/searchlib/src/vespa/searchlib/attribute/address_space_usage.h +++ b/searchlib/src/vespa/searchlib/attribute/address_space_usage.h @@ -24,6 +24,7 @@ public: const vespalib::AddressSpace& multi_value_usage); void set(const vespalib::string& component, const vespalib::AddressSpace& usage); vespalib::AddressSpace get(const vespalib::string& component) const; + const AddressSpaceMap& get_all() const { return _map; } vespalib::AddressSpace enum_store_usage() const; vespalib::AddressSpace multi_value_usage() const; }; diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 62a1072de48..1639f5f8113 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -400,6 +400,18 @@ DenseTensorAttribute::getVersion() const } void +DenseTensorAttribute::onCommit() +{ + TensorAttribute::onCommit(); + if (_index) { + if (_index->consider_compact(getConfig().getCompactionStrategy())) { + incGeneration(); + updateStat(true); + } + } +} + +void DenseTensorAttribute::onGenerationChange(generation_t next_gen) { // TODO: Change onGenerationChange() to send current generation instead of next generation. @@ -430,6 +442,15 @@ DenseTensorAttribute::get_state(const vespalib::slime::Inserter& inserter) const } } +void +DenseTensorAttribute::onShrinkLidSpace() +{ + TensorAttribute::onShrinkLidSpace(); + if (_index) { + _index->shrink_lid_space(getCommittedDocIdLimit()); + } +} + vespalib::eval::TypedCells DenseTensorAttribute::get_vector(uint32_t docid) const { diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index 752db849b68..8899b1e4bd1 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -43,9 +43,11 @@ public: std::unique_ptr<AttributeSaver> onInitSave(vespalib::stringref fileName) override; void compactWorst() override; uint32_t getVersion() const override; + void onCommit() override; void onGenerationChange(generation_t next_gen) override; void removeOldGenerations(generation_t first_used_gen) override; void get_state(const vespalib::slime::Inserter& inserter) const override; + void onShrinkLidSpace() override; // Implements DocVectorAccess vespalib::eval::TypedCells get_vector(uint32_t docid) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index b1545d587b8..f58dbcafbec 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -9,6 +9,7 @@ namespace search::tensor { HnswGraph::HnswGraph() : node_refs(), + node_refs_size(1u), nodes(HnswIndex::make_default_node_store_config()), links(HnswIndex::make_default_link_store_config()), entry_docid_and_level() @@ -30,6 +31,9 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) vespalib::Array<AtomicEntryRef> levels(num_levels, AtomicEntryRef()); auto node_ref = nodes.add(levels); node_refs[docid].store_release(node_ref); + if (docid >= node_refs_size.load(std::memory_order_relaxed)) { + node_refs_size.store(docid + 1, std::memory_order_release); + } return node_ref; } @@ -47,6 +51,19 @@ HnswGraph::remove_node_for_document(uint32_t docid) auto old_links_ref = levels[i].load_acquire(); links.remove(old_links_ref); } + if (docid + 1 == node_refs_size.load(std::memory_order_relaxed)) { + trim_node_refs_size(); + } +} + +void +HnswGraph::trim_node_refs_size() +{ + uint32_t check_doc_id = node_refs_size.load(std::memory_order_relaxed) - 1; + while (check_doc_id > 0u && !node_refs[check_doc_id].load_relaxed().valid()) { + --check_doc_id; + } + node_refs_size.store(check_doc_id + 1, std::memory_order_release); } void diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index 4d07f74f8e3..57826088ca5 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -40,6 +40,7 @@ struct HnswGraph { using LinkArrayRef = LinkStore::ConstArrayRef; NodeRefVector node_refs; + std::atomic<uint32_t> node_refs_size; NodeStore nodes; LinkStore links; @@ -52,6 +53,8 @@ struct HnswGraph { void remove_node_for_document(uint32_t docid); + void trim_node_refs_size(); + NodeRef get_node_ref(uint32_t docid) const { return node_refs[docid].load_acquire(); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 8183a7caf3d..ca5f522457a 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -5,6 +5,7 @@ #include "hnsw_index_loader.h" #include "hnsw_index_saver.h" #include "random_level_generator.h" +#include <vespa/searchcommon/common/compaction_strategy.h> #include <vespa/searchlib/util/state_explorer_utils.h> #include <vespa/vespalib/data/slime/cursor.h> #include <vespa/vespalib/data/slime/inserter.h> @@ -228,7 +229,7 @@ HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level, const search::BitVector *filter) const { NearestPriQ candidates; - uint32_t doc_id_limit = _graph.node_refs.size(); + uint32_t doc_id_limit = _graph.node_refs_size.load(std::memory_order_acquire); if (filter) { doc_id_limit = std::min(filter->size(), doc_id_limit); } @@ -253,9 +254,11 @@ HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, } candidates.pop(); for (uint32_t neighbor_docid : _graph.get_link_array(cand.node_ref, level)) { + if (neighbor_docid >= doc_id_limit) { + continue; + } auto neighbor_ref = _graph.get_node_ref(neighbor_docid); if ((! neighbor_ref.valid()) - || (neighbor_docid >= doc_id_limit) || visited.is_marked(neighbor_docid)) { continue; @@ -282,7 +285,12 @@ HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distan _vectors(vectors), _distance_func(std::move(distance_func)), _level_generator(std::move(level_generator)), - _cfg(cfg) + _cfg(cfg), + _visited_set_pool(), + _cached_level_arrays_memory_usage(), + _cached_level_arrays_address_space_usage(0, 0, (1ull << 32)), + _cached_link_arrays_memory_usage(), + _cached_link_arrays_address_space_usage(0, 0, (1ull << 32)) { assert(_distance_func); } @@ -472,6 +480,93 @@ HnswIndex::trim_hold_lists(generation_t first_used_gen) _graph.links.trimHoldLists(first_used_gen); } +void +HnswIndex::compact_level_arrays(bool compact_memory, bool compact_address_space) +{ + auto context = _graph.nodes.compactWorst(compact_memory, compact_address_space); + uint32_t doc_id_limit = _graph.node_refs.size(); + vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], doc_id_limit); + context->compact(refs); +} + +void +HnswIndex::compact_link_arrays(bool compact_memory, bool compact_address_space) +{ + auto context = _graph.links.compactWorst(compact_memory, compact_address_space); + uint32_t doc_id_limit = _graph.node_refs.size(); + for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) { + EntryRef level_ref = _graph.node_refs[doc_id].load_relaxed(); + if (level_ref.valid()) { + vespalib::ArrayRef<AtomicEntryRef> refs(_graph.nodes.get_writable(level_ref)); + context->compact(refs); + } + } +} + +namespace { + +bool +consider_compact_arrays(const CompactionStrategy& compaction_strategy, vespalib::MemoryUsage& memory_usage, vespalib::AddressSpace& address_space_usage, std::function<void(bool,bool)> compact_arrays) +{ + size_t used_bytes = memory_usage.usedBytes(); + size_t dead_bytes = memory_usage.deadBytes(); + bool compact_memory = compaction_strategy.should_compact_memory(used_bytes, dead_bytes); + size_t used_address_space = address_space_usage.used(); + size_t dead_address_space = address_space_usage.dead(); + bool compact_address_space = compaction_strategy.should_compact_address_space(used_address_space, dead_address_space); + if (compact_memory || compact_address_space) { + compact_arrays(compact_memory, compact_address_space); + return true; + } + return false; +} + +} + +bool +HnswIndex::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy) +{ + return consider_compact_arrays(compaction_strategy, _cached_level_arrays_memory_usage, _cached_level_arrays_address_space_usage, + [this](bool compact_memory, bool compact_address_space) + { compact_level_arrays(compact_memory, compact_address_space); }); +} + +bool +HnswIndex::consider_compact_link_arrays(const CompactionStrategy& compaction_strategy) +{ + return consider_compact_arrays(compaction_strategy, _cached_link_arrays_memory_usage, _cached_link_arrays_address_space_usage, + [this](bool compact_memory, bool compact_address_space) + { compact_link_arrays(compact_memory, compact_address_space); }); +} + +bool +HnswIndex::consider_compact(const CompactionStrategy& compaction_strategy) +{ + bool result = false; + if (consider_compact_level_arrays(compaction_strategy)) { + result = true; + } + if (consider_compact_link_arrays(compaction_strategy)) { + result = true; + } + return result; +} + +vespalib::MemoryUsage +HnswIndex::update_stat() +{ + vespalib::MemoryUsage result; + result.merge(_graph.node_refs.getMemoryUsage()); + _cached_level_arrays_memory_usage = _graph.nodes.getMemoryUsage(); + _cached_level_arrays_address_space_usage = _graph.nodes.addressSpaceUsage(); + result.merge(_cached_level_arrays_memory_usage); + _cached_link_arrays_memory_usage = _graph.links.getMemoryUsage(); + _cached_link_arrays_address_space_usage = _graph.links.addressSpaceUsage(); + result.merge(_cached_link_arrays_memory_usage); + result.merge(_visited_set_pool.memory_usage()); + return result; +} + vespalib::MemoryUsage HnswIndex::memory_usage() const { @@ -526,6 +621,18 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const _cfg.neighbors_to_explore_at_construction()); } +void +HnswIndex::shrink_lid_space(uint32_t doc_id_limit) +{ + assert(doc_id_limit >= 1u); + assert(doc_id_limit >= _graph.node_refs_size.load(std::memory_order_relaxed)); + uint32_t old_doc_id_limit = _graph.node_refs.size(); + if (doc_id_limit >= old_doc_id_limit) { + return; + } + _graph.node_refs.shrink(doc_id_limit); +} + std::unique_ptr<NearestNeighborIndexSaver> HnswIndex::make_saver() const { diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index ef0f38c2263..6f6d213d6cc 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -80,6 +80,10 @@ protected: RandomLevelGenerator::UP _level_generator; Config _cfg; mutable vespalib::ReusableSetPool _visited_set_pool; + vespalib::MemoryUsage _cached_level_arrays_memory_usage; + vespalib::AddressSpace _cached_level_arrays_address_space_usage; + vespalib::MemoryUsage _cached_link_arrays_memory_usage; + vespalib::AddressSpace _cached_link_arrays_address_space_usage; uint32_t max_links_for_level(uint32_t level) const; void add_link_to(uint32_t docid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) { @@ -161,8 +165,15 @@ public: void remove_document(uint32_t docid) override; void transfer_hold_lists(generation_t current_gen) override; void trim_hold_lists(generation_t first_used_gen) override; + void compact_level_arrays(bool compact_memory, bool compact_addreess_space); + void compact_link_arrays(bool compact_memory, bool compact_address_space); + bool consider_compact_level_arrays(const CompactionStrategy& compaction_strategy); + bool consider_compact_link_arrays(const CompactionStrategy& compaction_strategy); + bool consider_compact(const CompactionStrategy& compaction_strategy) override; + vespalib::MemoryUsage update_stat() override; vespalib::MemoryUsage memory_usage() const override; void get_state(const vespalib::slime::Inserter& inserter) const override; + void shrink_lid_space(uint32_t doc_id_limit) override; std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override; bool load(const fileutil::LoadedBuffer& buf) override; @@ -184,6 +195,7 @@ public: void set_node(uint32_t docid, const HnswNode &node); bool check_link_symmetry() const; std::pair<uint32_t, bool> count_reachable_nodes() const; + HnswGraph& get_graph() { return _graph; } static vespalib::datastore::ArrayStoreConfig make_default_node_store_config(); static vespalib::datastore::ArrayStoreConfig make_default_link_store_config(); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp index ac98b28d105..c0aec9ff91a 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp @@ -38,7 +38,9 @@ HnswIndexLoader::load(const fileutil::LoadedBuffer& buf) } } if (_failed) return false; - _graph.node_refs.ensure_size(num_nodes); + _graph.node_refs.ensure_size(std::max(num_nodes, 1u)); + _graph.node_refs_size.store(std::max(num_nodes, 1u), std::memory_order_release); + _graph.trim_node_refs_size(); auto entry_node_ref = _graph.get_node_ref(entry_docid); _graph.set_entry_node({entry_docid, entry_node_ref, entry_level}); return true; diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index fd37cf80720..0122738e173 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -14,7 +14,10 @@ namespace vespalib::slime { struct Inserter; } namespace search::fileutil { class LoadedBuffer; } -namespace search { class BitVector; } +namespace search { +class BitVector; +class CompactionStrategy; +} namespace search::tensor { @@ -59,8 +62,11 @@ public: virtual void remove_document(uint32_t docid) = 0; virtual void transfer_hold_lists(generation_t current_gen) = 0; virtual void trim_hold_lists(generation_t first_used_gen) = 0; + virtual bool consider_compact(const CompactionStrategy& compaction_strategy) = 0; + virtual vespalib::MemoryUsage update_stat() = 0; virtual vespalib::MemoryUsage memory_usage() const = 0; virtual void get_state(const vespalib::slime::Inserter& inserter) const = 0; + virtual void shrink_lid_space(uint32_t doc_id_limit) = 0; /** * Creates a saver that is used to save the index to binary form. diff --git a/slobrok/src/vespa/slobrok/server/cmd.cpp b/slobrok/src/vespa/slobrok/server/cmd.cpp index 60060b9868f..b809f655a9d 100644 --- a/slobrok/src/vespa/slobrok/server/cmd.cpp +++ b/slobrok/src/vespa/slobrok/server/cmd.cpp @@ -55,7 +55,7 @@ ScriptCommand::makeRegRpcSrvCmd(SBEnv &env, } ScriptCommand -ScriptCommand::makeRemRemCmd(SBEnv &env, const std::string & name, const std::string &spec) +ScriptCommand::makeIgnoreCmd(SBEnv &env, const std::string & name, const std::string &spec) { auto data = std::make_unique<ScriptData>(env, name, spec, nullptr); data->_state = ScriptData::XCH_IGNORE; @@ -72,7 +72,7 @@ ScriptCommand::doRequest() void cleanupReservation(ScriptData & data) { - RpcServerMap &map = data.env._rpcsrvmap; + RpcServerMap &map = data.env.rpcServerMap(); const ReservedName *rsvp = map.getReservation(data.name.c_str()); if (rsvp != nullptr && rsvp->isLocal) { map.removeReservation(data.name.c_str()); @@ -88,13 +88,14 @@ ScriptCommand::doneHandler(OkState result) ScriptData & data = *dataUP; const char *name_p = data.name.c_str(); const char *spec_p = data.spec.c_str(); - ExchangeManager &xch = data.env._exchanger; - RpcServerManager &rsm = data.env._rpcsrvmanager; + ExchangeManager &xch = data.env.exchangeManager(); + RpcServerManager &rsm = data.env.rpcServerManager(); if (result.failed()) { - LOG(warning, "failed [%s->%s] in state %d: %s", - name_p, spec_p, data._state, result.errorMsg.c_str()); - cleanupReservation(data); + LOG(warning, "failed [%s->%s] in state %d: %s", name_p, spec_p, data._state, result.errorMsg.c_str()); + if (data._state != ScriptData::XCH_IGNORE) { + cleanupReservation(data); + } // XXX should handle different state errors differently? if (data.registerRequest != nullptr) { data.registerRequest->SetError(FRTE_RPC_METHOD_FAILED, result.errorMsg.c_str()); @@ -124,14 +125,13 @@ ScriptCommand::doneHandler(OkState result) data._state = ScriptData::RDC_INVAL; // all OK data.registerRequest->Return(); - goto alldone; + cleanupReservation(data); + return; } else if (data._state == ScriptData::XCH_IGNORE) { - goto alldone; + return; } // no other state should be possible LOG_ABORT("should not be reached"); - alldone: - cleanupReservation(data); } //----------------------------------------------------------------------------- diff --git a/slobrok/src/vespa/slobrok/server/cmd.h b/slobrok/src/vespa/slobrok/server/cmd.h index 02ae16e457b..d790ae93f5c 100644 --- a/slobrok/src/vespa/slobrok/server/cmd.h +++ b/slobrok/src/vespa/slobrok/server/cmd.h @@ -25,7 +25,7 @@ public: ~ScriptCommand(); static ScriptCommand makeRegRpcSrvCmd(SBEnv &env, const std::string &name, const std::string &spec, FRT_RPCRequest *req); - static ScriptCommand makeRemRemCmd(SBEnv &env, const std::string &name, const std::string &spec); + static ScriptCommand makeIgnoreCmd(SBEnv &env, const std::string &name, const std::string &spec); void doneHandler(OkState result); void doRequest(); diff --git a/slobrok/src/vespa/slobrok/server/exchange_manager.cpp b/slobrok/src/vespa/slobrok/server/exchange_manager.cpp index 632c823a2c1..87892c59f21 100644 --- a/slobrok/src/vespa/slobrok/server/exchange_manager.cpp +++ b/slobrok/src/vespa/slobrok/server/exchange_manager.cpp @@ -15,7 +15,7 @@ namespace slobrok { ExchangeManager::ExchangeManager(SBEnv &env, RpcServerMap &rpcsrvmap) : _partners(), _env(env), - _rpcsrvmanager(env._rpcsrvmanager), + _rpcsrvmanager(env.rpcServerManager()), _rpcsrvmap(rpcsrvmap) { } @@ -68,7 +68,7 @@ void ExchangeManager::forwardRemove(const std::string & name, const std::string & spec) { WorkPackage *package = new WorkPackage(WorkPackage::OP_REMOVE, *this, - ScriptCommand::makeRemRemCmd(_env, name, spec)); + ScriptCommand::makeIgnoreCmd(_env, name, spec)); for (const auto & entry : _partners) { package->addItem(entry.second.get()); } diff --git a/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp b/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp index dfcdc0ef9d1..0f0679e35ea 100644 --- a/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp +++ b/slobrok/src/vespa/slobrok/server/rpc_server_manager.cpp @@ -17,8 +17,8 @@ namespace slobrok { RpcServerManager::RpcServerManager(SBEnv &sbenv) : FNET_Task(sbenv.getScheduler()), - _rpcsrvmap(sbenv._rpcsrvmap), - _exchanger(sbenv._exchanger), + _rpcsrvmap(sbenv.rpcServerMap()), + _exchanger(sbenv.exchangeManager()), _env(sbenv), _addManageds(), _deleteList() diff --git a/slobrok/src/vespa/slobrok/server/rpchooks.cpp b/slobrok/src/vespa/slobrok/server/rpchooks.cpp index 2fefce1d474..062ec148bfb 100644 --- a/slobrok/src/vespa/slobrok/server/rpchooks.cpp +++ b/slobrok/src/vespa/slobrok/server/rpchooks.cpp @@ -234,7 +234,12 @@ RPCHooks::rpc_registerRpcServer(FRT_RPCRequest *req) LOG(debug, "RPC: invoked registerRpcServer(%s,%s)", dName, dSpec); _cnts.registerReqs++; - + { + // TODO: run only this path, and complete the request instead of ignoring + auto script = ScriptCommand::makeIgnoreCmd(_env, dName, dSpec); + ServiceMapping mapping{dName, dSpec}; + _env.localMonitorMap().addLocal(mapping, std::make_unique<ScriptCommand>(std::move(script))); + } // is this already OK? if (_rpcsrvmanager.alreadyManaged(dName, dSpec)) { LOG(debug, "registerRpcServer(%s,%s) OK, already managed", diff --git a/slobrok/src/vespa/slobrok/server/sbenv.cpp b/slobrok/src/vespa/slobrok/server/sbenv.cpp index d8a3e62f44e..1f54716c29c 100644 --- a/slobrok/src/vespa/slobrok/server/sbenv.cpp +++ b/slobrok/src/vespa/slobrok/server/sbenv.cpp @@ -119,7 +119,8 @@ SBEnv::SBEnv(const ConfigShim &shim) _rpcsrvmap() { srandom(time(nullptr) ^ getpid()); - _localMonitorSubscription = MapSubscription::subscribe(_rpcsrvmap.proxy(), _localRpcMonitorMap); + // note: feedback loop between these two: + _localMonitorSubscription = MapSubscription::subscribe(_consensusMap, _localRpcMonitorMap); _consensusSubscription = MapSubscription::subscribe(_localRpcMonitorMap.dispatcher(), _consensusMap); // TODO: use consensus as source here: _globalHistorySubscription = MapSubscription::subscribe(_rpcsrvmap.proxy(), _globalVisibleHistory); diff --git a/slobrok/src/vespa/slobrok/server/sbenv.h b/slobrok/src/vespa/slobrok/server/sbenv.h index 576e5d4b30c..44b7305814c 100644 --- a/slobrok/src/vespa/slobrok/server/sbenv.h +++ b/slobrok/src/vespa/slobrok/server/sbenv.h @@ -61,6 +61,14 @@ private: UnionServiceMap _consensusMap; ServiceMapHistory _globalVisibleHistory; + RpcServerManager _rpcsrvmanager; + ExchangeManager _exchanger; + RpcServerMap _rpcsrvmap; + + std::unique_ptr<MapSubscription> _localMonitorSubscription; + std::unique_ptr<MapSubscription> _consensusSubscription; + std::unique_ptr<MapSubscription> _globalHistorySubscription; + public: explicit SBEnv(const ConfigShim &shim); ~SBEnv(); @@ -73,14 +81,18 @@ public: void suspend(); void resume(); - RpcServerManager _rpcsrvmanager; - ExchangeManager _exchanger; - RpcServerMap _rpcsrvmap; + RpcServerManager& rpcServerManager() { return _rpcsrvmanager; } + ExchangeManager& exchangeManager() { return _exchanger; } + RpcServerMap& rpcServerMap() { return _rpcsrvmap; } ServiceMapHistory& globalHistory() { return _globalVisibleHistory; } + LocalRpcMonitorMap& localMonitorMap() { + return _localRpcMonitorMap; + } + ServiceMapHistory& localHistory() { return _localRpcMonitorMap.history(); } @@ -100,11 +112,6 @@ public: OkState removePeer(const std::string& name, const std::string &spec); void countFailedHeartbeat() { _rpcHooks.countFailedHeartbeat(); } - -private: - std::unique_ptr<MapSubscription> _localMonitorSubscription; - std::unique_ptr<MapSubscription> _consensusSubscription; - std::unique_ptr<MapSubscription> _globalHistorySubscription; }; } // namespace slobrok diff --git a/storage/src/tests/persistence/filestorage/service_layer_host_info_reporter_test.cpp b/storage/src/tests/persistence/filestorage/service_layer_host_info_reporter_test.cpp index e5ba04f789b..0b984eabea8 100644 --- a/storage/src/tests/persistence/filestorage/service_layer_host_info_reporter_test.cpp +++ b/storage/src/tests/persistence/filestorage/service_layer_host_info_reporter_test.cpp @@ -30,8 +30,7 @@ get_attribute_usage_element(const vespalib::Slime& root, const vespalib::string& return AttributeResourceUsage(usage, name.make_string()); } -const vespalib::string attr_es_name("doctype.subdb.esattr"); -const vespalib::string attr_mv_name("doctype.subdb.mvattr"); +const vespalib::string attr_name("doctype.subdb.attr.enum-store"); } @@ -43,12 +42,12 @@ struct ServiceLayerHostInfoReporterTest : ::testing::Test { ServiceLayerHostInfoReporterTest(); ~ServiceLayerHostInfoReporterTest(); - void notify(double disk_usage, double memory_usage, const AttributeResourceUsage &attribute_enum_store_usage, const AttributeResourceUsage &attribute_multivalue_usage) { + void notify(double disk_usage, double memory_usage, const AttributeResourceUsage &attribute_address_space_usage) { auto& listener = static_cast<spi::IResourceUsageListener&>(_reporter); - listener.update_resource_usage(ResourceUsage(disk_usage, memory_usage, attribute_enum_store_usage, attribute_multivalue_usage)); + listener.update_resource_usage(ResourceUsage(disk_usage, memory_usage, attribute_address_space_usage)); } void notify(double disk_usage, double memory_usage) { - notify(disk_usage, memory_usage, {0.0, ""}, {0.0, ""}); + notify(disk_usage, memory_usage, {0.0, ""}); } void set_noise_level(double level) { _reporter.set_noise_level(level); @@ -60,7 +59,8 @@ struct ServiceLayerHostInfoReporterTest : ::testing::Test { ResourceUsage get_slime_usage() { vespalib::Slime root; util::reporterToSlime(_reporter, root); - return ResourceUsage(get_usage_element(root, "disk"), get_usage_element(root, "memory"), get_attribute_usage_element(root, "attribute-enum-store"), get_attribute_usage_element(root, "attribute-multi-value")); + return ResourceUsage(get_usage_element(root, "disk"), get_usage_element(root, "memory"), + get_attribute_usage_element(root, "attribute-address-space")); } }; @@ -97,12 +97,9 @@ TEST_F(ServiceLayerHostInfoReporterTest, request_almost_immediate_node_state_as_ EXPECT_EQ(3, requested_almost_immediate_replies()); EXPECT_EQ(ResourceUsage(0.8, 0.7), get_old_usage()); EXPECT_EQ(ResourceUsage(0.7999, 0.6999), get_usage()); - notify(0.8, 0.7, {0.1, attr_es_name}, {}); - EXPECT_EQ(ResourceUsage(0.8, 0.7, {0.1, attr_es_name}, {}), get_old_usage()); - EXPECT_EQ(ResourceUsage(0.8, 0.7, {0.1, attr_es_name}, {}), get_usage()); - notify(0.8, 0.7, {0.1, attr_es_name}, {0.2, attr_mv_name}); - EXPECT_EQ(ResourceUsage(0.8, 0.7, {0.1, attr_es_name}, {0.2, attr_mv_name}), get_old_usage()); - EXPECT_EQ(ResourceUsage(0.8, 0.7, {0.1, attr_es_name}, {0.2, attr_mv_name}), get_usage()); + notify(0.8, 0.7, {0.1, attr_name}); + EXPECT_EQ(ResourceUsage(0.8, 0.7, {0.1, attr_name}), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.8, 0.7, {0.1, attr_name}), get_usage()); } TEST_F(ServiceLayerHostInfoReporterTest, can_set_noise_level) @@ -125,25 +122,14 @@ TEST_F(ServiceLayerHostInfoReporterTest, can_set_noise_level) } TEST_F(ServiceLayerHostInfoReporterTest, - first_valid_attribute_enum_store_sample_triggers_immediate_node_state_when_below_noise_level) + first_valid_attribute_address_space_sample_triggers_immediate_node_state_when_below_noise_level) { set_noise_level(0.02); constexpr double usage_below_noise_level = 0.019; - notify(0.0, 0.0, {usage_below_noise_level, attr_es_name}, {}); + notify(0.0, 0.0, {usage_below_noise_level, attr_name}); EXPECT_EQ(1, requested_almost_immediate_replies()); - EXPECT_EQ(ResourceUsage(0.0, 0.0, {usage_below_noise_level, attr_es_name}, {}), get_old_usage()); - EXPECT_EQ(ResourceUsage(0.0, 0.0, {usage_below_noise_level, attr_es_name}, {}), get_usage()); -} - -TEST_F(ServiceLayerHostInfoReporterTest, - first_valid_attribute_multi_value_sample_triggers_immediate_node_state_when_below_noise_level) -{ - set_noise_level(0.02); - constexpr double usage_below_noise_level = 0.019; - notify(0.0, 0.0, {}, {usage_below_noise_level, attr_mv_name}); - EXPECT_EQ(1, requested_almost_immediate_replies()); - EXPECT_EQ(ResourceUsage(0.0, 0.0, {}, {usage_below_noise_level, attr_mv_name}), get_old_usage()); - EXPECT_EQ(ResourceUsage(0.0, 0.0, {}, {usage_below_noise_level, attr_mv_name}), get_usage()); + EXPECT_EQ(ResourceUsage(0.0, 0.0, {usage_below_noise_level, attr_name}), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.0, 0.0, {usage_below_noise_level, attr_name}), get_usage()); } TEST_F(ServiceLayerHostInfoReporterTest, json_report_generated) @@ -151,8 +137,8 @@ TEST_F(ServiceLayerHostInfoReporterTest, json_report_generated) EXPECT_EQ(ResourceUsage(0.0, 0.0), get_slime_usage()); notify(0.5, 0.4); EXPECT_EQ(ResourceUsage(0.5, 0.4), get_slime_usage()); - notify(0.5, 0.4, {0.3, attr_es_name}, {0.2, attr_mv_name}); - EXPECT_EQ(ResourceUsage(0.5, 0.4, {0.3, attr_es_name}, {0.2, attr_mv_name}), get_slime_usage()); + notify(0.5, 0.4, {0.3, attr_name}); + EXPECT_EQ(ResourceUsage(0.5, 0.4, {0.3, attr_name}), get_slime_usage()); } } diff --git a/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp b/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp index 97244582f50..67668d8ea55 100644 --- a/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp @@ -18,8 +18,7 @@ namespace { const vespalib::string memory_label("memory"); const vespalib::string disk_label("disk"); -const vespalib::string attribute_enum_store_label("attribute-enum-store"); -const vespalib::string attribute_multi_value_label("attribute-multi-value"); +const vespalib::string attribute_address_space_label("attribute-address-space"); void write_usage(vespalib::JsonStream& output, const vespalib::string &label, double value) { @@ -40,18 +39,13 @@ bool want_immediate_report(const spi::ResourceUsage& old_usage, const spi::Resou { auto disk_usage_diff = fabs(new_usage.get_disk_usage() - old_usage.get_disk_usage()); auto memory_usage_diff = fabs(new_usage.get_memory_usage() - old_usage.get_memory_usage()); - auto enum_store_diff = fabs(new_usage.get_attribute_enum_store_usage().get_usage() - old_usage.get_attribute_enum_store_usage().get_usage()); - auto multivalue_diff = fabs(new_usage.get_attribute_multivalue_usage().get_usage() - old_usage.get_attribute_multivalue_usage().get_usage()); - bool enum_store_got_valid = !old_usage.get_attribute_enum_store_usage().valid() && - new_usage.get_attribute_enum_store_usage().valid(); - bool multivalue_got_valid = !old_usage.get_attribute_multivalue_usage().valid() && - new_usage.get_attribute_multivalue_usage().valid(); + auto address_space_diff = fabs(new_usage.get_attribute_address_space_usage().get_usage() - old_usage.get_attribute_address_space_usage().get_usage()); + bool address_space_got_valid = !old_usage.get_attribute_address_space_usage().valid() && + new_usage.get_attribute_address_space_usage().valid(); return ((disk_usage_diff > noise_level) || (memory_usage_diff > noise_level) || - (enum_store_diff > noise_level) || - (multivalue_diff > noise_level) || - enum_store_got_valid || - multivalue_got_valid); + (address_space_diff > noise_level) || + address_space_got_valid); } } @@ -121,8 +115,7 @@ ServiceLayerHostInfoReporter::report(vespalib::JsonStream& output) LOG(debug, "report(): usage=%s", to_string(usage).c_str()); write_usage(output, memory_label, usage.get_memory_usage()); write_usage(output, disk_label, usage.get_disk_usage()); - write_attribute_usage(output, attribute_enum_store_label, usage.get_attribute_enum_store_usage()); - write_attribute_usage(output, attribute_multi_value_label, usage.get_attribute_multivalue_usage()); + write_attribute_usage(output, attribute_address_space_label, usage.get_attribute_address_space_usage()); } output << End(); output << End(); diff --git a/vespabase/src/common-env.sh b/vespabase/src/common-env.sh index eb60154071c..d5f8381432f 100755 --- a/vespabase/src/common-env.sh +++ b/vespabase/src/common-env.sh @@ -292,3 +292,30 @@ log_debug_message () { log_warning_message () { log_message "warning" "$*" 1>&2 } + +get_numa_ctl_cmd () { + if ! type numactl &> /dev/null; then + echo "FATAL: Could not find required program numactl." + exit 1 + fi + + numnodes=$(numactl --hardware 2>/dev/null | + grep available | + awk '$3 == "nodes" { print $2 }') + + if [ -n "$numanodes" ]; then + # We are allowed to use numactl and have NUMA nodes + if [ "$VESPA_AFFINITY_CPU_SOCKET" ] && + [ "$numnodes" -gt 1 ] + then + node=$(($VESPA_AFFINITY_CPU_SOCKET % $numnodes)) + numactlcmd="numactl --cpunodebind=$node --membind=$node" + else + numactlcmd="numactl --interleave all" + fi + else + numactlcmd="" + fi + + echo $numactlcmd +} diff --git a/vespabase/src/start-cbinaries.sh b/vespabase/src/start-cbinaries.sh index f17829aa081..1ef45a71dec 100755 --- a/vespabase/src/start-cbinaries.sh +++ b/vespabase/src/start-cbinaries.sh @@ -163,29 +163,13 @@ configure_vespa_malloc () { fi } -configure_numa_ctl () { - numactl="" - if numactl --interleave all true &> /dev/null; then - # We are allowed to use numactl - numactl="numactl --interleave all" - if [ "$VESPA_AFFINITY_CPU_SOCKET" ]; then - numcpu=`numactl --hardware 2>/dev/null | grep available | cut -d' ' -f2` - if [ "$numcpu" ] && [ "$numcpu" -gt 1 ]; then - log_debug_message "Starting $0 with affinity $VESPA_AFFINITY_CPU_SOCKET out of $numcpu" - node=$(($VESPA_AFFINITY_CPU_SOCKET % $numcpu)) - numactl="numactl --cpunodebind=$node --membind=$node" - fi - fi - fi -} - configure_valgrind configure_huge_pages configure_use_madvise configure_vespa_malloc if $no_valgrind ; then - configure_numa_ctl + numactl=$(get_numa_ctl_cmd) ulimit -c unlimited log_debug_message "Starting $0 with : " \ $numactl env LD_PRELOAD=$LD_PRELOAD $0-bin "$@" diff --git a/vespalib/src/tests/datastore/array_store/array_store_test.cpp b/vespalib/src/tests/datastore/array_store/array_store_test.cpp index 562ecaaecfa..0de9b83935f 100644 --- a/vespalib/src/tests/datastore/array_store/array_store_test.cpp +++ b/vespalib/src/tests/datastore/array_store/array_store_test.cpp @@ -18,8 +18,15 @@ using generation_t = vespalib::GenerationHandler::generation_t; using MemStats = vespalib::datastore::test::MemStats; using BufferStats = vespalib::datastore::test::BufferStats; +namespace { + constexpr float ALLOC_GROW_FACTOR = 0.2; +EntryRef as_entry_ref(const EntryRef& ref) noexcept { return ref; } +EntryRef as_entry_ref(const AtomicEntryRef& ref) noexcept { return ref.load_relaxed(); } + +} + template <typename EntryT, typename RefT = EntryRefT<19> > struct Fixture { @@ -115,19 +122,20 @@ struct Fixture store.transferHoldLists(generation++); store.trimHoldLists(generation); } + template <typename TestedRefType> void compactWorst(bool compactMemory, bool compactAddressSpace) { ICompactionContext::UP ctx = store.compactWorst(compactMemory, compactAddressSpace); - std::vector<EntryRef> refs; + std::vector<TestedRefType> refs; for (auto itr = refStore.begin(); itr != refStore.end(); ++itr) { - refs.push_back(itr->first); + refs.emplace_back(itr->first); } - std::vector<EntryRef> compactedRefs = refs; - ctx->compact(ArrayRef<EntryRef>(compactedRefs)); + std::vector<TestedRefType> compactedRefs = refs; + ctx->compact(ArrayRef<TestedRefType>(compactedRefs)); ReferenceStore compactedRefStore; for (size_t i = 0; i < refs.size(); ++i) { - ASSERT_EQUAL(0u, compactedRefStore.count(compactedRefs[i])); - ASSERT_EQUAL(1u, refStore.count(refs[i])); - compactedRefStore.insert(std::make_pair(compactedRefs[i], refStore[refs[i]])); + ASSERT_EQUAL(0u, compactedRefStore.count(as_entry_ref(compactedRefs[i]))); + ASSERT_EQUAL(1u, refStore.count(as_entry_ref(refs[i]))); + compactedRefStore.insert(std::make_pair(as_entry_ref(compactedRefs[i]), refStore[as_entry_ref(refs[i])])); } refStore = compactedRefStore; } @@ -252,7 +260,11 @@ TEST_F("require that new underlying buffer is allocated when current is full", S TEST_DO(f.assertStoreContent()); } -TEST_F("require that the buffer with most dead space is compacted", NumberFixture(2)) +namespace { + +template <typename TestedRefType> +void +test_compaction(NumberFixture &f) { EntryRef size1Ref = f.add({1}); EntryRef size2Ref = f.add({2,2}); @@ -267,7 +279,7 @@ TEST_F("require that the buffer with most dead space is compacted", NumberFixtur uint32_t size3BufferId = f.getBufferId(size3Ref); EXPECT_EQUAL(3u, f.refStore.size()); - f.compactWorst(true, false); + f.compactWorst<TestedRefType>(true, false); EXPECT_EQUAL(3u, f.refStore.size()); f.assertStoreContent(); @@ -281,6 +293,18 @@ TEST_F("require that the buffer with most dead space is compacted", NumberFixtur EXPECT_TRUE(f.store.bufferState(size2Ref).isFree()); } +} + +TEST_F("require that the buffer with most dead space is compacted (EntryRef vector)", NumberFixture(2)) +{ + test_compaction<EntryRef>(f); +} + +TEST_F("require that the buffer with most dead space is compacted (AtomicEntryRef vector)", NumberFixture(2)) +{ + test_compaction<AtomicEntryRef>(f); +} + namespace { void testCompaction(NumberFixture &f, bool compactMemory, bool compactAddressSpace) @@ -300,7 +324,7 @@ void testCompaction(NumberFixture &f, bool compactMemory, bool compactAddressSpa uint32_t size3BufferId = f.getBufferId(size3Ref); EXPECT_EQUAL(3u, f.refStore.size()); - f.compactWorst(compactMemory, compactAddressSpace); + f.compactWorst<EntryRef>(compactMemory, compactAddressSpace); EXPECT_EQUAL(3u, f.refStore.size()); f.assertStoreContent(); diff --git a/vespalib/src/vespa/vespalib/datastore/array_store.hpp b/vespalib/src/vespa/vespalib/datastore/array_store.hpp index 5409c21594c..29db72c2ed3 100644 --- a/vespalib/src/vespa/vespalib/datastore/array_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/array_store.hpp @@ -157,6 +157,20 @@ public: } } } + void compact(vespalib::ArrayRef<AtomicEntryRef> refs) override { + if (!_bufferIdsToCompact.empty()) { + for (auto &ref : refs) { + if (ref.load_relaxed().valid()) { + RefT internalRef(ref.load_relaxed()); + if (compactingBuffer(internalRef.bufferId())) { + EntryRef newRef = _store.add(_store.get(ref.load_relaxed())); + std::atomic_thread_fence(std::memory_order_release); + ref.store_release(newRef); + } + } + } + } + } }; } diff --git a/vespalib/src/vespa/vespalib/datastore/i_compaction_context.h b/vespalib/src/vespa/vespalib/datastore/i_compaction_context.h index 72b473adf4a..291d751082a 100644 --- a/vespalib/src/vespa/vespalib/datastore/i_compaction_context.h +++ b/vespalib/src/vespa/vespalib/datastore/i_compaction_context.h @@ -16,6 +16,7 @@ struct ICompactionContext { using UP = std::unique_ptr<ICompactionContext>; virtual ~ICompactionContext() {} virtual void compact(vespalib::ArrayRef<EntryRef> refs) = 0; + virtual void compact(vespalib::ArrayRef<AtomicEntryRef> refs) = 0; }; } |