aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--application/abi-spec.json12
-rw-r--r--build_settings.cmake4
-rw-r--r--client/go/internal/cli/auth/zts/zts.go15
-rw-r--r--client/go/internal/cli/auth/zts/zts_test.go8
-rw-r--r--client/go/internal/cli/cmd/cert.go16
-rw-r--r--client/go/internal/cli/cmd/config.go70
-rw-r--r--client/go/internal/cli/cmd/config_test.go6
-rw-r--r--client/go/internal/cli/cmd/prod.go37
-rw-r--r--client/go/internal/vespa/deploy.go16
-rw-r--r--client/go/internal/vespa/deploy_test.go35
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java230
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java43
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java18
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java15
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java13
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java110
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java11
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java9
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java61
-rw-r--r--config-model/src/main/resources/schema/content.rnc2
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecificationTest.java104
-rw-r--r--configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java4
-rw-r--r--configgen/src/test/resources/allfeatures.reference18
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java3
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java4
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java3
-rw-r--r--container-core/abi-spec.json48
-rw-r--r--container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java2
-rw-r--r--container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java2
-rw-r--r--container-disc/abi-spec.json9
-rw-r--r--container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java19
-rw-r--r--container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java4
-rw-r--r--container-search/abi-spec.json18
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java15
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java4
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java5
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java170
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java37
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java18
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java47
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java13
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java71
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java13
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java3
-rw-r--r--container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj16
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/semantics/test/RangesTestCase.java17
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/ranges.sr2
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java194
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java6
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java44
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java1
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java4
-rw-r--r--documentapi/abi-spec.json15
-rw-r--r--eval/src/vespa/eval/eval/value_type.h20
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java11
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java6
-rw-r--r--jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java7
-rw-r--r--linguistics-components/abi-spec.json12
-rw-r--r--messagebus/abi-spec.json12
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java65
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java10
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java69
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java62
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java18
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java33
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottlerTest.java30
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json2
-rw-r--r--screwdriver.yaml2
-rw-r--r--searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp3
-rw-r--r--searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp4
-rw-r--r--searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp10
-rw-r--r--searchcore/src/tests/proton/index/diskindexcleaner_test.cpp6
-rw-r--r--searchcore/src/tests/proton/index/indexmanager_test.cpp2
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt2
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt2
-rw-r--r--searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp2
-rw-r--r--searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp27
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp4
-rw-r--r--searchlib/src/tests/attribute/bitvector/bitvector_test.cpp159
-rw-r--r--searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp2
-rw-r--r--searchlib/src/tests/attribute/posting_store/posting_store_test.cpp7
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp20
-rw-r--r--searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp10
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp4
-rw-r--r--searchlib/src/tests/diskindex/fusion/fusion_test.cpp6
-rw-r--r--searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp21
-rw-r--r--searchlib/src/tests/features/prod_features.cpp25
-rw-r--r--searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp2
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/basictype.h38
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/collectiontype.h54
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/config.cpp8
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/config.h53
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/distance_metric.h4
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h25
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/predicate_params.h13
-rw-r--r--searchlib/src/vespa/searchcommon/common/dictionary_config.h9
-rw-r--r--searchlib/src/vespa/searchcommon/common/growstrategy.h8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/configconverter.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h26
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.h2
-rw-r--r--searchlib/src/vespa/searchlib/common/indexmetainfo.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/field_merger.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/docstore/filechunk.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/features/matchfeature.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/fef/properties.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/fef/properties.h42
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h96
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/field_spec.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/field_spec.h34
-rw-r--r--searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/util/filekit.cpp81
-rw-r--r--searchlib/src/vespa/searchlib/util/filekit.h4
-rw-r--r--vespa-feed-client/pom.xml25
-rw-r--r--vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java243
-rw-r--r--vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java12
-rw-r--r--vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java8
-rw-r--r--vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/TlsDetailsFactory.java16
-rw-r--r--vespa-feed-client/src/main/java9/ai/vespa/feed/client/impl/TlsDetailsFactory.java20
-rw-r--r--vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/ApacheClusterTest.java77
-rw-r--r--vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/WireMockExtension.java42
-rw-r--r--vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java10
-rw-r--r--vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java1
-rw-r--r--vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java2
-rw-r--r--vespalib/src/tests/fastlib/io/bufferedfiletest.cpp25
-rw-r--r--vespalib/src/tests/fastos/file_test.cpp4
-rw-r--r--vespalib/src/tests/fileheader/fileheader_test.cpp31
-rw-r--r--vespalib/src/tests/fuzzy/CMakeLists.txt9
-rw-r--r--vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp507
-rw-r--r--vespalib/src/vespa/fastlib/io/bufferedfile.cpp11
-rw-r--r--vespalib/src/vespa/fastlib/io/bufferedfile.h1
-rw-r--r--vespalib/src/vespa/fastos/file.cpp10
-rw-r--r--vespalib/src/vespa/fastos/file.h21
-rw-r--r--vespalib/src/vespa/fastos/linux_file.cpp3
-rw-r--r--vespalib/src/vespa/fastos/unix_file.cpp139
-rw-r--r--vespalib/src/vespa/fastos/unix_file.h8
-rw-r--r--vespalib/src/vespa/vespalib/datastore/bufferstate.h16
-rw-r--r--vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp8
-rw-r--r--vespalib/src/vespa/vespalib/datastore/compaction_strategy.h53
-rw-r--r--vespalib/src/vespa/vespalib/datastore/datastorebase.h4
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt10
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h70
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h299
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg286
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp11
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h147
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp228
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp9
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h35
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp121
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp83
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h244
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp291
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/sparse_state.h175
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp108
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h33
-rw-r--r--vespalib/src/vespa/vespalib/text/lowercase.h4
-rw-r--r--vespalib/src/vespa/vespalib/text/utf8.cpp39
-rw-r--r--vespalib/src/vespa/vespalib/text/utf8.h47
-rw-r--r--vespalib/src/vespa/vespalib/util/alloc.cpp4
-rw-r--r--vespalib/src/vespa/vespalib/util/alloc.h7
-rw-r--r--vespalib/src/vespa/vespalib/util/growstrategy.h9
-rw-r--r--vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp2
-rw-r--r--vespalib/src/vespa/vespalib/util/small_vector.h2
200 files changed, 4924 insertions, 1873 deletions
diff --git a/application/abi-spec.json b/application/abi-spec.json
index c95039b4c1f..95a9d2a524a 100644
--- a/application/abi-spec.json
+++ b/application/abi-spec.json
@@ -89,7 +89,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -128,7 +129,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -159,7 +161,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -190,7 +193,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
diff --git a/build_settings.cmake b/build_settings.cmake
index 63535062c9b..d0bb50360da 100644
--- a/build_settings.cmake
+++ b/build_settings.cmake
@@ -54,20 +54,16 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" ST
if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin")
set(VESPA_ATOMIC_LIB "")
set(VESPA_GCC_LIB "")
- set(VESPA_STDCXX_FS_LIB "")
else()
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0)
set(VESPA_GCC_LIB "gcc")
- set(VESPA_STDCXX_FS_LIB "stdc++fs")
else()
set(VESPA_GCC_LIB "")
- set(VESPA_STDCXX_FS_LIB "")
endif()
endif()
else()
set(CXX_SPECIFIC_WARN_OPTS "-Wnoexcept -Wsuggest-override -Wnon-virtual-dtor -Wformat-security -Wmismatched-tags")
set(VESPA_GCC_LIB "gcc")
- set(VESPA_STDCXX_FS_LIB "stdc++fs")
endif()
# Detect uring shared library.
diff --git a/client/go/internal/cli/auth/zts/zts.go b/client/go/internal/cli/auth/zts/zts.go
index 0f73ea5912d..19ea6e48b0c 100644
--- a/client/go/internal/cli/auth/zts/zts.go
+++ b/client/go/internal/cli/auth/zts/zts.go
@@ -80,18 +80,21 @@ func (c *Client) AccessToken() (Token, error) {
return Token{}, err
}
defer response.Body.Close()
-
+ b, err := io.ReadAll(response.Body)
+ if err != nil {
+ return Token{}, err
+ }
if response.StatusCode != http.StatusOK {
- return Token{}, fmt.Errorf("zts: got status %d from %s", response.StatusCode, c.tokenURL.String())
+ body := string(b)
+ if body == "" {
+ body = "no body"
+ }
+ return Token{}, fmt.Errorf("zts: got status %d (%s) from %s", response.StatusCode, body, c.tokenURL.String())
}
var ztsResponse struct {
AccessToken string `json:"access_token"`
ExpirySecs int `json:"expires_in"`
}
- b, err := io.ReadAll(response.Body)
- if err != nil {
- return Token{}, err
- }
if err := json.Unmarshal(b, &ztsResponse); err != nil {
return Token{}, err
}
diff --git a/client/go/internal/cli/auth/zts/zts_test.go b/client/go/internal/cli/auth/zts/zts_test.go
index 15c60ed46d7..ad1ed66f460 100644
--- a/client/go/internal/cli/auth/zts/zts_test.go
+++ b/client/go/internal/cli/auth/zts/zts_test.go
@@ -46,6 +46,14 @@ func TestAccessToken(t *testing.T) {
}
expiresAt = clock.now().Add(30 * time.Minute)
assertToken(t, Token{Value: "bar", ExpiresAt: expiresAt}, token)
+
+ // Request body is included in error
+ httpClient.NextResponseString(503, "broken!")
+ _, err = client.AccessToken()
+ want := "zts: got status 503 (broken!) from http://example.com/zts/v1/oauth2/token"
+ if got := err.Error(); got != want {
+ t.Errorf("got err=%q, want %q", got, want)
+ }
}
func assertToken(t *testing.T, want, got Token) {
diff --git a/client/go/internal/cli/cmd/cert.go b/client/go/internal/cli/cmd/cert.go
index ccfce5eb7bb..f7320e37626 100644
--- a/client/go/internal/cli/cmd/cert.go
+++ b/client/go/internal/cli/cmd/cert.go
@@ -114,11 +114,11 @@ func doCert(cli *CLI, overwriteCertificate, skipApplicationPackage bool, args []
if !overwriteCertificate {
hint := "Use -f flag to force overwriting"
- if util.PathExists(privateKeyFile) {
- return errHint(fmt.Errorf("private key %s already exists", color.CyanString(privateKeyFile)), hint)
+ if util.PathExists(privateKeyFile.path) {
+ return errHint(fmt.Errorf("private key %s already exists", color.CyanString(privateKeyFile.path)), hint)
}
- if util.PathExists(certificateFile) {
- return errHint(fmt.Errorf("certificate %s already exists", color.CyanString(certificateFile)), hint)
+ if util.PathExists(certificateFile.path) {
+ return errHint(fmt.Errorf("certificate %s already exists", color.CyanString(certificateFile.path)), hint)
}
}
@@ -126,14 +126,14 @@ func doCert(cli *CLI, overwriteCertificate, skipApplicationPackage bool, args []
if err != nil {
return err
}
- if err := keyPair.WriteCertificateFile(certificateFile, overwriteCertificate); err != nil {
+ if err := keyPair.WriteCertificateFile(certificateFile.path, overwriteCertificate); err != nil {
return fmt.Errorf("could not write certificate: %w", err)
}
- if err := keyPair.WritePrivateKeyFile(privateKeyFile, overwriteCertificate); err != nil {
+ if err := keyPair.WritePrivateKeyFile(privateKeyFile.path, overwriteCertificate); err != nil {
return fmt.Errorf("could not write private key: %w", err)
}
- cli.printSuccess("Certificate written to ", color.CyanString(certificateFile))
- cli.printSuccess("Private key written to ", color.CyanString(privateKeyFile))
+ cli.printSuccess("Certificate written to ", color.CyanString(certificateFile.path))
+ cli.printSuccess("Private key written to ", color.CyanString(privateKeyFile.path))
if !skipApplicationPackage {
return doCertAdd(cli, overwriteCertificate, args)
}
diff --git a/client/go/internal/cli/cmd/config.go b/client/go/internal/cli/cmd/config.go
index eb79a2004c4..0a03686dd33 100644
--- a/client/go/internal/cli/cmd/config.go
+++ b/client/go/internal/cli/cmd/config.go
@@ -384,24 +384,43 @@ func (c *Config) caCertificatePath() string {
return c.environment["VESPA_CLI_DATA_PLANE_CA_CERT_FILE"]
}
-func (c *Config) certificatePath(app vespa.ApplicationID, targetType string) (string, error) {
- if override, ok := c.environment["VESPA_CLI_DATA_PLANE_CERT_FILE"]; ok {
- return override, nil
- }
- if targetType == vespa.TargetHosted {
- return athenzPath("cert")
- }
- return c.applicationFilePath(app, "data-plane-public-cert.pem")
+type credentialsFile struct {
+ path string
+ optional bool
}
-func (c *Config) privateKeyPath(app vespa.ApplicationID, targetType string) (string, error) {
- if override, ok := c.environment["VESPA_CLI_DATA_PLANE_KEY_FILE"]; ok {
- return override, nil
+func (c *Config) credentialsFile(app vespa.ApplicationID, targetType string, cert bool) (credentialsFile, error) {
+ envVar := "VESPA_CLI_DATA_PLANE_CERT_FILE"
+ athenzFile := "cert"
+ applicationFile := "data-plane-public-cert.pem"
+ if !cert {
+ envVar = "VESPA_CLI_DATA_PLANE_KEY_FILE"
+ athenzFile = "key"
+ applicationFile = "data-plane-private-key.pem"
+ }
+ if override, ok := c.environment[envVar]; ok {
+ return credentialsFile{override, false}, nil
}
if targetType == vespa.TargetHosted {
- return athenzPath("key")
+ path, err := athenzPath(athenzFile)
+ if err != nil {
+ return credentialsFile{}, err
+ }
+ return credentialsFile{path, false}, nil
}
- return c.applicationFilePath(app, "data-plane-private-key.pem")
+ path, err := c.applicationFilePath(app, applicationFile)
+ if err != nil {
+ return credentialsFile{}, err
+ }
+ return credentialsFile{path, true}, nil
+}
+
+func (c *Config) certificatePath(app vespa.ApplicationID, targetType string) (credentialsFile, error) {
+ return c.credentialsFile(app, targetType, true)
+}
+
+func (c *Config) privateKeyPath(app vespa.ApplicationID, targetType string) (credentialsFile, error) {
+ return c.credentialsFile(app, targetType, false)
}
func (c *Config) readTLSOptions(app vespa.ApplicationID, targetType string) (vespa.TLSOptions, error) {
@@ -413,16 +432,13 @@ func (c *Config) readTLSOptions(app vespa.ApplicationID, targetType string) (ves
// CA certificate
if caCertOk {
options.CACertificate = []byte(caCertText)
- } else {
- caCertFile := c.caCertificatePath()
- if caCertFile != "" {
- b, err := os.ReadFile(caCertFile)
- if err != nil {
- return options, err
- }
- options.CACertificate = b
- options.CACertificateFile = caCertFile
+ } else if caCertFile := c.caCertificatePath(); caCertFile != "" {
+ b, err := os.ReadFile(caCertFile)
+ if err != nil {
+ return options, err
}
+ options.CACertificate = b
+ options.CACertificateFile = caCertFile
}
// Certificate and private key
if certOk && keyOk {
@@ -440,15 +456,17 @@ func (c *Config) readTLSOptions(app vespa.ApplicationID, targetType string) (ves
if err != nil {
return vespa.TLSOptions{}, err
}
- kp, err := tls.LoadX509KeyPair(certFile, keyFile)
+ kp, err := tls.LoadX509KeyPair(certFile.path, keyFile.path)
+ allowMissing := os.IsNotExist(err) && keyFile.optional && certFile.optional
if err == nil {
options.KeyPair = []tls.Certificate{kp}
- options.PrivateKeyFile = keyFile
- options.CertificateFile = certFile
- } else if err != nil && !os.IsNotExist(err) {
+ options.PrivateKeyFile = keyFile.path
+ options.CertificateFile = certFile.path
+ } else if err != nil && !allowMissing {
return vespa.TLSOptions{}, err
}
}
+ // If we found a key pair, parse it and check expiry
if options.KeyPair != nil {
cert, err := x509.ParseCertificate(options.KeyPair[0].Certificate[0])
if err != nil {
diff --git a/client/go/internal/cli/cmd/config_test.go b/client/go/internal/cli/cmd/config_test.go
index 14a3cf7cbbc..b00be38d021 100644
--- a/client/go/internal/cli/cmd/config_test.go
+++ b/client/go/internal/cli/cmd/config_test.go
@@ -253,6 +253,12 @@ func TestConfigReadTLSOptions(t *testing.T) {
PrivateKeyFile: defaultKeyFile,
},
)
+
+ // Key pair files specified through environment are required
+ nonExistentFile := filepath.Join(homeDir, "non-existent-file")
+ cli, _, _ := newTestCLI(t, "VESPA_CLI_DATA_PLANE_CERT_FILE="+nonExistentFile, "VESPA_CLI_DATA_PLANE_KEY_FILE="+nonExistentFile)
+ _, err := cli.config.readTLSOptions(app, vespa.TargetLocal)
+ assert.True(t, os.IsNotExist(err))
}
func TestConfigTargetResolving(t *testing.T) {
diff --git a/client/go/internal/cli/cmd/prod.go b/client/go/internal/cli/cmd/prod.go
index 14fbae68b17..3b37197340f 100644
--- a/client/go/internal/cli/cmd/prod.go
+++ b/client/go/internal/cli/cmd/prod.go
@@ -102,8 +102,17 @@ https://cloud.vespa.ai/en/reference/deployment`,
}
}
+type prodDeployOptions struct {
+ copyCert bool
+ risk int
+ commit string
+ description string
+ authorEmail string
+ sourceURL string
+}
+
func newProdDeployCmd(cli *CLI) *cobra.Command {
- copyCert := false
+ var options prodDeployOptions
cmd := &cobra.Command{
Use: "deploy",
Aliases: []string{"submit"}, // TODO: Remove in Vespa 9
@@ -118,7 +127,9 @@ services.xml.
For more information about production deployments in Vespa Cloud see:
https://cloud.vespa.ai/en/production-deployment
-https://cloud.vespa.ai/en/automated-deployments`,
+https://cloud.vespa.ai/en/automated-deployments
+https://cloud.vespa.ai/en/reference/vespa-cloud-api#submission-properties
+`,
DisableAutoGenTag: true,
SilenceUsage: true,
Example: `$ mvn package # when adding custom Java components
@@ -142,21 +153,33 @@ $ vespa prod deploy`,
if err := verifyTests(cli, pkg); err != nil {
return err
}
- opts := vespa.DeploymentOptions{ApplicationPackage: pkg, Target: target}
- if err := maybeCopyCertificate(copyCert, true, cli, target, pkg); err != nil {
+ if err := maybeCopyCertificate(options.copyCert, true, cli, target, pkg); err != nil {
return err
}
- if err := vespa.Submit(opts); err != nil {
+ deployment := vespa.DeploymentOptions{ApplicationPackage: pkg, Target: target}
+ submission := vespa.Submission{
+ Risk: options.risk,
+ Commit: options.commit,
+ Description: options.description,
+ AuthorEmail: options.authorEmail,
+ SourceURL: options.sourceURL,
+ }
+ if err := vespa.Submit(deployment, submission); err != nil {
return fmt.Errorf("could not deploy application: %w", err)
} else {
cli.printSuccess("Deployed ", color.CyanString(pkg.Path))
log.Printf("See %s for deployment progress\n", color.CyanString(fmt.Sprintf("%s/tenant/%s/application/%s/prod/deployment",
- opts.Target.Deployment().System.ConsoleURL, opts.Target.Deployment().Application.Tenant, opts.Target.Deployment().Application.Application)))
+ deployment.Target.Deployment().System.ConsoleURL, deployment.Target.Deployment().Application.Tenant, deployment.Target.Deployment().Application.Application)))
}
return nil
},
}
- cmd.Flags().BoolVarP(&copyCert, "add-cert", "A", false, `Copy certificate of the configured application to the current application package`)
+ cmd.Flags().BoolVarP(&options.copyCert, "add-cert", "A", false, "Copy certificate of the configured application to the current application package (default false)")
+ cmd.Flags().IntVarP(&options.risk, "risk", "", 0, "The risk score of source code being deployed. 0 to ignore (default 0)")
+ cmd.Flags().StringVarP(&options.commit, "commit", "", "", "Identifier of the source code being deployed. For example a commit hash")
+ cmd.Flags().StringVarP(&options.description, "description", "", "", "Description of the source code being deployed. For example a git commit message")
+ cmd.Flags().StringVarP(&options.authorEmail, "author-email", "", "", "Email of the author of the commit being deployed")
+ cmd.Flags().StringVarP(&options.sourceURL, "source-url", "", "", "URL which points to the source code being deployed. For example the build job running the submission")
return cmd
}
diff --git a/client/go/internal/vespa/deploy.go b/client/go/internal/vespa/deploy.go
index d04b8ba631c..ae4d4678d66 100644
--- a/client/go/internal/vespa/deploy.go
+++ b/client/go/internal/vespa/deploy.go
@@ -51,6 +51,14 @@ type DeploymentOptions struct {
Version version.Version
}
+type Submission struct {
+ Risk int `json:"risk,omitempty"`
+ Commit string `json:"commit,omitempty"`
+ Description string `json:"description,omitempty"`
+ AuthorEmail string `json:"authorEmail,omitempty"`
+ SourceURL string `json:"sourceUrl,omitempty"`
+}
+
type LogLinePrepareResponse struct {
Time int64
Level string
@@ -247,7 +255,7 @@ func copyToPart(dst *multipart.Writer, src io.Reader, fieldname, filename string
return nil
}
-func Submit(opts DeploymentOptions) error {
+func Submit(opts DeploymentOptions, submission Submission) error {
if !opts.Target.IsCloud() {
return fmt.Errorf("%s: deploy is unsupported by %s target", opts, opts.Target.Type())
}
@@ -261,7 +269,11 @@ func Submit(opts DeploymentOptions) error {
}
var body bytes.Buffer
writer := multipart.NewWriter(&body)
- if err := copyToPart(writer, strings.NewReader("{}"), "submitOptions", ""); err != nil {
+ submitOptions, err := json.Marshal(submission)
+ if err != nil {
+ return err
+ }
+ if err := copyToPart(writer, bytes.NewReader(submitOptions), "submitOptions", ""); err != nil {
return err
}
applicationZip, err := opts.ApplicationPackage.zipReader(false)
diff --git a/client/go/internal/vespa/deploy_test.go b/client/go/internal/vespa/deploy_test.go
index ddb500d26e3..39a9f2bcdf2 100644
--- a/client/go/internal/vespa/deploy_test.go
+++ b/client/go/internal/vespa/deploy_test.go
@@ -69,6 +69,41 @@ func TestDeployCloud(t *testing.T) {
assert.Equal(t, string(values["deployOptions"]), `{"vespaVersion":"1.2.3"}`)
}
+func TestSubmit(t *testing.T) {
+ httpClient := mock.HTTPClient{}
+ target := createCloudTarget(t, "http://vespacloud", io.Discard)
+ cloudTarget, ok := target.(*cloudTarget)
+ require.True(t, ok)
+ cloudTarget.httpClient = &httpClient
+ appDir, _ := mock.ApplicationPackageDir(t, false, true)
+ opts := DeploymentOptions{
+ Target: target,
+ ApplicationPackage: ApplicationPackage{Path: appDir},
+ }
+ httpClient.NextResponseString(200, "ok")
+ require.Nil(t, Submit(opts, Submission{}))
+ require.Nil(t, httpClient.LastRequest.ParseMultipartForm(1<<20))
+ assert.Equal(t, "{}", httpClient.LastRequest.FormValue("submitOptions"))
+ f, err := httpClient.LastRequest.MultipartForm.File["applicationZip"][0].Open()
+ require.Nil(t, err)
+ defer f.Close()
+ contents := make([]byte, 5)
+ f.Read(contents)
+ assert.Equal(t, "PK\x03\x04\x14", string(contents))
+
+ require.Nil(t, Submit(opts, Submission{
+ Risk: 1,
+ Commit: "sha",
+ Description: "broken garbage",
+ AuthorEmail: "foo@example.com",
+ SourceURL: "https://github.com/foo/repo",
+ }))
+ require.Nil(t, httpClient.LastRequest.ParseMultipartForm(1<<20))
+ assert.Equal(t,
+ "{\"risk\":1,\"commit\":\"sha\",\"description\":\"broken garbage\",\"authorEmail\":\"foo@example.com\",\"sourceUrl\":\"https://github.com/foo/repo\"}",
+ httpClient.LastRequest.FormValue("submitOptions"))
+}
+
func TestApplicationFromString(t *testing.T) {
app, err := ApplicationFromString("t1.a1.i1")
assert.Nil(t, err)
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index c9f5cfeb9c8..8453fb3450c 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -13,9 +13,13 @@ import com.yahoo.vdslib.state.State;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
import com.yahoo.vespa.clustercontroller.core.hostinfo.StorageNode;
import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest;
+
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -59,7 +63,7 @@ public class NodeStateChangeChecker {
this.clusterInfo = cluster.clusterInfo();
this.inMoratorium = inMoratorium;
this.maxNumberOfGroupsAllowedToBeDown = cluster.maxNumberOfGroupsAllowedToBeDown();
- if ( ! groupVisiting.isHierarchical() && maxNumberOfGroupsAllowedToBeDown > 1)
+ if ( ! isGroupedSetup() && maxNumberOfGroupsAllowedToBeDown > 1)
throw new IllegalArgumentException("Cannot have both 1 group and maxNumberOfGroupsAllowedToBeDown > 1");
}
@@ -153,16 +157,22 @@ public class NodeStateChangeChecker {
if (result.notAllowed())
return result;
- if (maxNumberOfGroupsAllowedToBeDown == -1) {
- result = checkIfAnotherNodeInAnotherGroupHasWantedState(nodeInfo);
+ if (isGroupedSetup()) {
+ if (maxNumberOfGroupsAllowedToBeDown == -1) {
+ result = checkIfAnotherNodeInAnotherGroupHasWantedState(nodeInfo);
+ if (result.notAllowed())
+ return result;
+ if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription))
+ return allow();
+ } else {
+ var optionalResult = checkIfOtherNodesHaveWantedState(nodeInfo, newDescription, clusterState);
+ if (optionalResult.isPresent())
+ return optionalResult.get();
+ }
+ } else {
+ result = otherNodeHasWantedState(nodeInfo);
if (result.notAllowed())
return result;
- if (anotherNodeInGroupAlreadyAllowed(nodeInfo, newDescription))
- return allow();
- } else {
- var optionalResult = checkIfOtherNodesHaveWantedState(nodeInfo, newDescription, clusterState);
- if (optionalResult.isPresent())
- return optionalResult.get();
}
if (nodeIsDown(clusterState, nodeInfo)) {
@@ -185,6 +195,10 @@ public class NodeStateChangeChecker {
return allow();
}
+ private boolean isGroupedSetup() {
+ return groupVisiting.isHierarchical();
+ }
+
/** Refuse to override whatever an operator or unknown entity is doing. */
private static Result checkIfStateSetWithDifferentDescription(NodeInfo nodeInfo, String newDescription) {
State oldWantedState = nodeInfo.getUserWantedState().getState();
@@ -196,82 +210,88 @@ public class NodeStateChangeChecker {
}
/**
- * Returns a disallow-result if there is another node (in another group, if hierarchical)
- * that has a wanted state != UP. We disallow more than 1 suspended node/group at a time.
+ * Returns a disallow-result if there is another node in another group
+ * that has a wanted state != UP. We disallow more than 1 suspended group at a time.
*/
private Result checkIfAnotherNodeInAnotherGroupHasWantedState(StorageNodeInfo nodeInfo) {
- if (groupVisiting.isHierarchical()) {
- SettableOptional<Result> anotherNodeHasWantedState = new SettableOptional<>();
-
- groupVisiting.visit(group -> {
- if (!groupContainsNode(group, nodeInfo.getNode())) {
- Result result = otherNodeInGroupHasWantedState(group);
- if (result.notAllowed()) {
- anotherNodeHasWantedState.set(result);
- // Have found a node that is suspended, halt the visiting
- return false;
- }
+ SettableOptional<Result> anotherNodeHasWantedState = new SettableOptional<>();
+ groupVisiting.visit(group -> {
+ if (! groupContainsNode(group, nodeInfo.getNode())) {
+ Result result = otherNodeInGroupHasWantedState(group);
+ if (result.notAllowed()) {
+ anotherNodeHasWantedState.set(result);
+ // Have found a node that is suspended, halt the visiting
+ return false;
}
+ }
- return true;
- });
+ return true;
+ });
- return anotherNodeHasWantedState.asOptional().orElseGet(Result::allow);
- } else {
- // Returns a disallow-result if there is another node with a wanted state
- return otherNodeHasWantedState(nodeInfo);
- }
+ return anotherNodeHasWantedState.asOptional().orElseGet(Result::allow);
}
/**
* Returns an optional Result, where return value is:
- * For flat setup: Return Optional.of(disallowed) if wanted state is set on some node, else Optional.empty
- * For hierarchical setup: No wanted state for other nodes, return Optional.empty
- * Wanted state for nodes/groups are not UP:
- * if less than maxNumberOfGroupsAllowedToBeDown: return Optional.of(allowed)
- * else: if node is in group with nodes already down: return Optional.of(allowed), else Optional.of(disallowed)
+ * - No wanted state for other nodes, return Optional.empty
+ * - Wanted state for nodes/groups are not UP:
+ * - if less than maxNumberOfGroupsAllowedToBeDown: return Optional.of(allowed)
+ * else: if node is in group with nodes already down: return Optional.of(allowed), else Optional.of(disallowed)
*/
private Optional<Result> checkIfOtherNodesHaveWantedState(StorageNodeInfo nodeInfo, String newDescription, ClusterState clusterState) {
Node node = nodeInfo.getNode();
- if (groupVisiting.isHierarchical()) {
- Set<Integer> groupsWithNodesWantedStateNotUp = groupsWithUserWantedStateNotUp();
- if (groupsWithNodesWantedStateNotUp.size() == 0) {
- log.log(FINE, "groupsWithNodesWantedStateNotUp=0");
- return Optional.empty();
- }
+ Set<Integer> groupsWithNodesWantedStateNotUp = groupsWithUserWantedStateNotUp();
+ if (groupsWithNodesWantedStateNotUp.size() == 0) {
+ log.log(FINE, "groupsWithNodesWantedStateNotUp=0");
+ return Optional.empty();
+ }
- Set<Integer> groupsWithSameStateAndDescription = groupsWithSameStateAndDescription(MAINTENANCE, newDescription);
- if (aGroupContainsNode(groupsWithSameStateAndDescription, node)) {
- log.log(FINE, "Node is in group with same state and description, allow");
- return Optional.of(allow());
- }
- // There are groups with nodes not up, but with another description, probably operator set
- if (groupsWithSameStateAndDescription.size() == 0) {
- return Optional.of(disallow("Wanted state already set for another node in groups: " +
- sortSetIntoList(groupsWithNodesWantedStateNotUp)));
- }
+ Set<Integer> groupsWithSameStateAndDescription = groupsWithSameStateAndDescription(MAINTENANCE, newDescription);
+ if (aGroupContainsNode(groupsWithSameStateAndDescription, node)) {
+ log.log(FINE, "Node is in group with same state and description, allow");
+ return Optional.of(allow());
+ }
+ // There are groups with nodes not up, but with another description, probably operator set
+ if (groupsWithSameStateAndDescription.size() == 0) {
+ return Optional.of(disallow("Wanted state already set for another node in groups: " +
+ sortSetIntoList(groupsWithNodesWantedStateNotUp)));
+ }
- Set<Integer> retiredAndNotUpGroups = groupsWithNotRetiredAndNotUp(clusterState);
- int numberOfGroupsToConsider = retiredAndNotUpGroups.size();
- // Subtract one group if node is in a group with nodes already retired or not up, since number of such groups will
- // not increase if we allow node to go down
- if (aGroupContainsNode(retiredAndNotUpGroups, node)) {
- numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1;
- }
- if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) {
- log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups);
- return Optional.of(allow());
- }
+ Set<Integer> retiredAndNotUpGroups = groupsWithNotRetiredAndNotUp(clusterState);
+ int numberOfGroupsToConsider = retiredAndNotUpGroups.size();
+ // Subtract one group if node is in a group with nodes already retired or not up, since number of such groups will
+ // not increase if we allow node to go down
+ if (aGroupContainsNode(retiredAndNotUpGroups, node)) {
+ numberOfGroupsToConsider = retiredAndNotUpGroups.size() - 1;
+ }
- return Optional.of(disallow(String.format("At most %d groups can have wanted state: %s",
- maxNumberOfGroupsAllowedToBeDown,
- sortSetIntoList(retiredAndNotUpGroups))));
- } else {
- // Return a disallow-result if there is another node with a wanted state
- var otherNodeHasWantedState = otherNodeHasWantedState(nodeInfo);
- if (otherNodeHasWantedState.notAllowed())
- return Optional.of(otherNodeHasWantedState);
+ var result = checkRedundancy(retiredAndNotUpGroups, clusterState);
+ if (result.isPresent() && result.get().notAllowed())
+ return result;
+
+ if (numberOfGroupsToConsider < maxNumberOfGroupsAllowedToBeDown) {
+ log.log(FINE, "Allow, retiredAndNotUpGroups=" + retiredAndNotUpGroups);
+ return Optional.of(allow());
+ }
+
+ return Optional.of(disallow(String.format("At most %d groups can have wanted state: %s",
+ maxNumberOfGroupsAllowedToBeDown,
+ sortSetIntoList(retiredAndNotUpGroups))));
+ }
+
+ // Check redundancy for nodes seen from all distributors that are UP in cluster state for
+ // storage nodes that are in groups that should be UP
+ private Optional<Result> checkRedundancy(Set<Integer> retiredAndNotUpGroups, ClusterState clusterState) {
+ Set<Integer> indexesToCheck = new HashSet<>();
+ retiredAndNotUpGroups.forEach(index -> getNodesInGroup(index).forEach(node -> indexesToCheck.add(node.index())));
+
+ for (var distributorNodeInfo : clusterInfo.getDistributorNodeInfos()) {
+ if (clusterState.getNodeState(distributorNodeInfo.getNode()).getState() != UP) continue;
+
+ var r = checkRedundancySeenFromDistributor(distributorNodeInfo, indexesToCheck);
+ if (r.notAllowed())
+ return Optional.of(r);
}
return Optional.empty();
}
@@ -396,26 +416,56 @@ public class NodeStateChangeChecker {
}
private Result checkRedundancy(DistributorNodeInfo distributorNodeInfo, Node node) {
- List<StorageNode> storageNodes = distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes();
- for (StorageNode storageNode : storageNodes) {
- if (storageNode.getIndex() == node.getIndex()) {
- Integer minReplication = storageNode.getMinCurrentReplicationFactorOrNull();
- // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system).
- if (minReplication != null && minReplication < requiredRedundancy) {
- return disallow("Distributor " + distributorNodeInfo.getNodeIndex()
- + " says storage node " + node.getIndex()
- + " has buckets with redundancy as low as "
- + storageNode.getMinCurrentReplicationFactorOrNull()
- + ", but we require at least " + requiredRedundancy);
- } else {
- return allow();
- }
+ Integer minReplication = minReplication(distributorNodeInfo).get(node.getIndex());
+ return verifyRedundancy(distributorNodeInfo, minReplication, node.getIndex());
+ }
+
+ private Result checkRedundancySeenFromDistributor(DistributorNodeInfo distributorNodeInfo, Set<Integer> indexesToCheck) {
+ Map<Integer, Integer> replication = new LinkedHashMap<>(minReplication(distributorNodeInfo));
+
+ Integer minReplication = null;
+ Integer minReplicationIndex = null;
+ for (var entry : replication.entrySet()) {
+ Integer value = entry.getValue();
+ Integer nodeIndex = entry.getKey();
+ if ( ! indexesToCheck.contains(nodeIndex)) continue;
+ if (minReplication == null || (value != null && value < minReplication)) {
+ minReplication = value;
+ if (minReplication == null) continue;
+
+ minReplicationIndex = nodeIndex;
+ if (minReplication < requiredRedundancy) break;
}
}
+ return verifyRedundancy(distributorNodeInfo, minReplication, minReplicationIndex);
+ }
+
+ private Result verifyRedundancy(DistributorNodeInfo distributorNodeInfo, Integer minReplication, Integer minReplicationIndex) {
+ // Why test on != null? Missing min-replication is OK (indicate empty/few buckets on system).
+ if (minReplication != null && minReplication < requiredRedundancy) {
+ return disallow("Distributor " + distributorNodeInfo.getNodeIndex()
+ + " says storage node " + minReplicationIndex
+ + " has buckets with redundancy as low as "
+ + minReplication + ", but we require at least " + requiredRedundancy);
+ }
+
return allow();
}
+ // Replication per storage node index
+ private Map<Integer, Integer> minReplication(DistributorNodeInfo distributorNodeInfo) {
+ Map<Integer, Integer> replicationPerNodeIndex = new HashMap<>();
+ for (StorageNode storageNode : distributorNodeInfo.getHostInfo().getDistributor().getStorageNodes()) {
+ var currentValue = replicationPerNodeIndex.get(storageNode.getIndex());
+ Integer minReplicationFactor = storageNode.getMinCurrentReplicationFactorOrNull();
+ if (currentValue == null || (minReplicationFactor != null && minReplicationFactor < currentValue))
+ replicationPerNodeIndex.put(storageNode.getIndex(), minReplicationFactor);
+ }
+
+ return replicationPerNodeIndex;
+ }
+
/**
* We want to check with the distributors to verify that it is safe to take down the storage node.
* @param node the node to be checked
@@ -456,6 +506,16 @@ public class NodeStateChangeChecker {
.collect(Collectors.toSet());
}
+ private Group groupForThisIndex(int groupIndex) {
+ return clusterInfo.getAllNodeInfos().stream()
+ .map(NodeInfo::getGroup)
+ .filter(Objects::nonNull)
+ .filter(Group::isLeafGroup)
+ .filter(group -> group.getIndex() == groupIndex)
+ .findFirst()
+ .orElseThrow();
+ }
+
// groups with at least one node with the same state & description
private Set<Integer> groupsWithSameStateAndDescription(State state, String newDescription) {
return clusterInfo.getAllNodeInfos().stream()
@@ -485,6 +545,10 @@ public class NodeStateChangeChecker {
.collect(Collectors.toSet());
}
+ private List<ConfiguredNode> getNodesInGroup(int groupIndex) {
+ return groupForThisIndex(groupIndex).getNodes();
+ }
+
public static class Result {
public enum Action {
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index 7b20fcf694a..b73ee86251f 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;
+import com.yahoo.log.LogSetup;
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.state.ClusterState;
@@ -275,25 +276,8 @@ public class NodeStateChangeCheckerTest {
assertEquals("At most 2 groups can have wanted state: [0, 1]", result.reason());
}
- // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance,
- // try to set storage node 4 in group 2 to maintenance
- /* WIP
- {
- ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .2.s:m .3.s:m", currentClusterStateVersion));
- setStorageNodeWantedState(cluster, 0, UP, "");
- setStorageNodeWantedState(cluster, 1, UP, "");
- int nodeIndex = 4;
- Node node = new Node(STORAGE, nodeIndex);
- Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
- assertFalse(result.settingWantedStateIsAllowed(), result.toString());
- assertFalse(result.wantedStateAlreadySet());
- assertEquals("At most 2 groups can have wanted state: [0, 1]", result.getReason());
- }
-
- */
-
// 2 nodes in group 0 in maintenance, storage node 3 in group 1 is in maintenance with another description
- // (set in maintenance by operator), try to set storage node 3 in group 1 to maintenance, should bew allowed
+ // (set in maintenance by operator), try to set storage node 2 in group 1 to maintenance, should be allowed
{
ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m .3.s:m", currentClusterStateVersion));
setStorageNodeWantedState(cluster, 3, MAINTENANCE, "Maintenance, set by operator"); // Set to another description
@@ -305,6 +289,29 @@ public class NodeStateChangeCheckerTest {
assertFalse(result.isAlreadySet());
}
+ // 2 nodes in group 0 up again but buckets not in sync and 2 nodes in group 1 in maintenance,
+ // try to set storage node 4 in group 2 to maintenance
+ {
+ setStorageNodeWantedState(cluster, 0, MAINTENANCE, "Orchestrator");
+ setStorageNodeWantedState(cluster, 1, MAINTENANCE, "Orchestrator");
+ setStorageNodeWantedState(cluster, 2, UP, ""); // Set up again
+ setStorageNodeWantedState(cluster, 3, UP, ""); // Set up again
+ ClusterState clusterState = clusterState(String.format("version:%d distributor:8 storage:8 .0.s:m .1.s:m", currentClusterStateVersion));
+
+ // Set bucket in sync to 1 for node 2 in group 1
+ var distributorHostInfo = createDistributorHostInfo(1, 2, 1);
+ cluster.clusterInfo().getDistributorNodeInfo(0).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+ cluster.clusterInfo().getDistributorNodeInfo(1).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+ cluster.clusterInfo().getDistributorNodeInfo(2).setHostInfo(HostInfo.createHostInfo(distributorHostInfo));
+
+ int nodeIndex = 2;
+ Node node = new Node(STORAGE, nodeIndex);
+ Result result = nodeStateChangeChecker.evaluateTransition(node, clusterState, SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE);
+ assertFalse(result.allowed(), result.toString());
+ assertFalse(result.isAlreadySet());
+ assertEquals("Distributor 0 says storage node 0 has buckets with redundancy as low as 1, but we require at least 4", result.reason());
+ }
+
}
@ParameterizedTest
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
index d5a1b832aba..e9eb15592a2 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
@@ -118,29 +118,15 @@ public interface ModelContext {
@ModelFeatureFlag(owners = {"vekterli", "havardpe"}) default boolean enableConditionalPutRemoveWriteRepair() { return false; }
@ModelFeatureFlag(owners = {"mortent", "olaa"}) default boolean enableDataplaneProxy() { return false; }
@ModelFeatureFlag(owners = {"baldersheim"}) default boolean enableNestedMultivalueGrouping() { return false; }
+ @ModelFeatureFlag(owners = {"jonmv"}) default boolean useReconfigurableDispatcher() { return false; }
- //Below are all flags that must be kept until 7 is out of the door
+ // Below are all flags that must be kept until 7 is out of the door and implementations and/or default flag values are in sync with what is defined here.
@ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean ignoreThreadStackSizes() { return false; }
- @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default boolean useThreePhaseUpdates() { return true; }
@ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean skipCommunicationManagerThread() { return true; }
@ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean skipMbusRequestThread() { return true; }
@ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean skipMbusReplyThread() { return true; }
@ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean useQrserverServiceName() { return true; }
@ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean avoidRenamingSummaryFeatures() { return false; }
- @ModelFeatureFlag(owners = {"arnej"}, removeAfter="7.last") default boolean experimentalSdParsing() { return true; } // TODO: Remove after June 2022
- @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default boolean enableBitVectors() { return true; }
- @ModelFeatureFlag(owners = {"bjorncs"}, removeAfter="7.last") default boolean enableServerOcspStapling() { return true; }
- @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int defaultPoolNumThreads() { return 1; }
- @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int availableProcessors() { return 1; }
- @ModelFeatureFlag(owners = {"vekterli", "geirst"}, removeAfter="7.last") default boolean unorderedMergeChaining() { return true; }
- @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default String mergeThrottlingPolicy() { return "STATIC"; }
- @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default double persistenceThrottlingWsDecrementFactor() { return 1.2; }
- @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default double persistenceThrottlingWsBackoff() { return 0.95; }
- @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default int persistenceThrottlingWindowSize() { return -1; }
- @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default double persistenceThrottlingWsResizeRate() { return 3; }
- @ModelFeatureFlag(owners = {"vekterli"}, removeAfter="7.last") default boolean persistenceThrottlingOfMergeFeedOps() { return true; }
- @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int maxConcurrentMergesPerNode() { return 16; }
- @ModelFeatureFlag(owners = {"baldersheim"}, removeAfter="7.last") default int maxMergeQueueSize() { return 100; }
}
/** Warning: As elsewhere in this package, do not make backwards incompatible changes that will break old config models! */
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java
index ea4988f3029..41bbf5e1b6a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecification.java
@@ -101,9 +101,9 @@ public class NodesSpecification {
this.hasCountAttribute = hasCountAttribute;
}
- private static NodesSpecification create(boolean dedicated, boolean canFail, Version version,
- ModelElement nodesElement, Optional<DockerImage> dockerImageRepo,
- Optional<CloudAccount> cloudAccount) {
+ static NodesSpecification create(boolean dedicated, boolean canFail, Version version,
+ ModelElement nodesElement, Optional<DockerImage> dockerImageRepo,
+ Optional<CloudAccount> cloudAccount) {
var resolvedElement = resolveElement(nodesElement);
var combinedId = findCombinedId(nodesElement, resolvedElement);
var resourceConstraints = toResourceConstraints(resolvedElement);
@@ -126,8 +126,13 @@ public class NodesSpecification {
var nodes = rangeFrom(nodesElement, "count");
var groups = rangeFrom(nodesElement, "groups");
var groupSize = rangeFrom(nodesElement, "group-size");
- int defaultMaxGroups = groupSize.isEmpty() ? 1 : nodes.to().orElse(1); // Don't constrain the number of groups if group size is set
- var min = new ClusterResources(nodes.from().orElse(1), groups.from().orElse(1), nodeResources(nodesElement).getFirst());
+
+ // Find the tightest possible limits for groups to avoid falsely concluding we are autoscaling
+ // when only specifying group size
+ int defaultMinGroups = nodes.from().orElse(1) / groupSize.to().orElse(nodes.from().orElse(1));
+ int defaultMaxGroups = groupSize.isEmpty() ? 1 : nodes.to().orElse(1) / groupSize.from().orElse(1);
+
+ var min = new ClusterResources(nodes.from().orElse(1), groups.from().orElse(defaultMinGroups), nodeResources(nodesElement).getFirst());
var max = new ClusterResources(nodes.to().orElse(1), groups.to().orElse(defaultMaxGroups), nodeResources(nodesElement).getSecond());
return new ResourceConstraints(min, max, groupSize);
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java
index 5fa893e9599..ae60ed77a7a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/CloudSslProvider.java
@@ -2,8 +2,6 @@
package com.yahoo.vespa.model.container.http.ssl;
import com.yahoo.jdisc.http.ConnectorConfig;
-import com.yahoo.jdisc.http.ssl.impl.CloudSslContextProvider;
-import com.yahoo.jdisc.http.ssl.impl.ConfiguredSslContextFactoryProvider;
import java.util.Optional;
@@ -17,8 +15,6 @@ import static com.yahoo.jdisc.http.ConnectorConfig.Ssl.ClientAuth;
*/
public class CloudSslProvider extends SslProvider {
public static final String COMPONENT_ID_PREFIX = "configured-ssl-provider@";
- public static final String MTLSONLY_COMPONENT_CLASS = ConfiguredSslContextFactoryProvider.class.getName();
- public static final String TOKEN_COMPONENT_CLASS = CloudSslContextProvider.class.getName();
private final String privateKey;
private final String certificate;
@@ -26,8 +22,9 @@ public class CloudSslProvider extends SslProvider {
private final String caCertificate;
private final ClientAuth.Enum clientAuthentication;
- public CloudSslProvider(String servername, String privateKey, String certificate, String caCertificatePath, String caCertificate, ClientAuth.Enum clientAuthentication, boolean enableTokenSupport) {
- super(COMPONENT_ID_PREFIX, servername, componentClass(enableTokenSupport), null);
+ public CloudSslProvider(String servername, String privateKey, String certificate, String caCertificatePath,
+ String caCertificate, ClientAuth.Enum clientAuthentication) {
+ super(COMPONENT_ID_PREFIX, servername, "com.yahoo.jdisc.http.ssl.impl.CloudSslContextProvider", null);
this.privateKey = privateKey;
this.certificate = certificate;
this.caCertificatePath = caCertificatePath;
@@ -35,10 +32,6 @@ public class CloudSslProvider extends SslProvider {
this.clientAuthentication = clientAuthentication;
}
- private static String componentClass(boolean enableTokenSupport) {
- return enableTokenSupport ? TOKEN_COMPONENT_CLASS : MTLSONLY_COMPONENT_CLASS;
- }
-
@Override
public void amendConnectorConfig(ConnectorConfig.Builder builder) {
builder.ssl.enabled(true);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java
index 5bf348e5bb5..4f11611541d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/http/ssl/HostedSslConnectorFactory.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.http.ssl;
import com.yahoo.config.model.api.EndpointCertificateSecrets;
import com.yahoo.jdisc.http.ConnectorConfig;
-import com.yahoo.jdisc.http.ConnectorConfig.Ssl.ClientAuth;
import com.yahoo.security.tls.TlsContext;
import com.yahoo.vespa.model.container.http.ConnectorFactory;
@@ -18,96 +17,71 @@ import java.util.List;
*/
public class HostedSslConnectorFactory extends ConnectorFactory {
- private static final List<String> INSECURE_WHITELISTED_PATHS = List.of("/status.html");
- private static final String DEFAULT_HOSTED_TRUSTSTORE = "/opt/yahoo/share/ssl/certs/athenz_certificate_bundle.pem";
-
- private final boolean enforceClientAuth;
- private final boolean enforceHandshakeClientAuth;
- private final Collection<String> tlsCiphersOverride;
+ boolean requireTlsClientAuthDuringTlsHandshake;
+ private final List<String> tlsCiphersOverride;
private final boolean enableProxyProtocolMixedMode;
private final Duration endpointConnectionTtl;
- /**
- * Create connector factory that uses a certificate provided by the config-model / configserver and default hosted Vespa truststore.
- */
- public static HostedSslConnectorFactory withProvidedCertificate(
- String serverName, EndpointCertificateSecrets endpointCertificateSecrets, boolean enforceHandshakeClientAuth,
- Collection<String> tlsCiphersOverride, boolean enableProxyProtocolMixedMode, int port,
- Duration endpointConnectionTtl, boolean enableTokenSupport) {
- CloudSslProvider sslProvider = createConfiguredDirectSslProvider(
- serverName, endpointCertificateSecrets, DEFAULT_HOSTED_TRUSTSTORE, /*tlsCaCertificates*/null, enforceHandshakeClientAuth, enableTokenSupport);
- return new HostedSslConnectorFactory(sslProvider, false, enforceHandshakeClientAuth, tlsCiphersOverride,
- enableProxyProtocolMixedMode, port, endpointConnectionTtl);
- }
-
- /**
- * Create connector factory that uses a certificate provided by the config-model / configserver and a truststore configured by the application.
- */
- public static HostedSslConnectorFactory withProvidedCertificateAndTruststore(
- String serverName, EndpointCertificateSecrets endpointCertificateSecrets, String tlsCaCertificates,
- Collection<String> tlsCiphersOverride, boolean enableProxyProtocolMixedMode, int port,
- Duration endpointConnectionTtl, boolean enableTokenSupport) {
- CloudSslProvider sslProvider = createConfiguredDirectSslProvider(
- serverName, endpointCertificateSecrets, /*tlsCaCertificatesPath*/null, tlsCaCertificates, false, enableTokenSupport);
- return new HostedSslConnectorFactory(sslProvider, true, false, tlsCiphersOverride, enableProxyProtocolMixedMode,
- port, endpointConnectionTtl);
- }
-
- /**
- * Create connector factory that uses the default certificate and truststore provided by Vespa (through Vespa-global TLS configuration).
- */
- public static HostedSslConnectorFactory withDefaultCertificateAndTruststore(String serverName, Collection<String> tlsCiphersOverride,
- boolean enableProxyProtocolMixedMode, int port,
- Duration endpointConnectionTtl) {
- return new HostedSslConnectorFactory(new DefaultSslProvider(serverName), true, false, tlsCiphersOverride,
- enableProxyProtocolMixedMode, port, endpointConnectionTtl);
- }
+ public static Builder builder(String name, int listenPort) { return new Builder(name, listenPort); }
- private HostedSslConnectorFactory(SslProvider sslProvider, boolean enforceClientAuth,
- boolean enforceHandshakeClientAuth, Collection<String> tlsCiphersOverride,
- boolean enableProxyProtocolMixedMode, int port, Duration endpointConnectionTtl) {
- super(new Builder("tls"+port, port).sslProvider(sslProvider));
- this.enforceClientAuth = enforceClientAuth;
- this.enforceHandshakeClientAuth = enforceHandshakeClientAuth;
- this.tlsCiphersOverride = tlsCiphersOverride;
- this.enableProxyProtocolMixedMode = enableProxyProtocolMixedMode;
- this.endpointConnectionTtl = endpointConnectionTtl;
+ private HostedSslConnectorFactory(Builder builder) {
+ super(new ConnectorFactory.Builder("tls"+builder.port, builder.port).sslProvider(createSslProvider(builder)));
+ this.requireTlsClientAuthDuringTlsHandshake = builder.requireTlsClientAuthDuringTlsHandshake;
+ this.tlsCiphersOverride = List.copyOf(builder.tlsCiphersOverride);
+ this.enableProxyProtocolMixedMode = builder.enableProxyProtocolMixedMode;
+ this.endpointConnectionTtl = builder.endpointConnectionTtl;
}
- private static CloudSslProvider createConfiguredDirectSslProvider(
- String serverName, EndpointCertificateSecrets endpointCertificateSecrets, String tlsCaCertificatesPath, String tlsCaCertificates, boolean enforceHandshakeClientAuth, boolean enableTokenSupport) {
- var clientAuthentication = enforceHandshakeClientAuth ? ClientAuth.Enum.NEED_AUTH : ClientAuth.Enum.WANT_AUTH;
+ private static SslProvider createSslProvider(Builder builder) {
+ if (builder.endpointCertificate == null) return new DefaultSslProvider(builder.name);
+ var clientAuthentication = builder.requireTlsClientAuthDuringTlsHandshake
+ ? ConnectorConfig.Ssl.ClientAuth.Enum.NEED_AUTH : ConnectorConfig.Ssl.ClientAuth.Enum.WANT_AUTH;
return new CloudSslProvider(
- serverName,
- endpointCertificateSecrets.key(),
- endpointCertificateSecrets.certificate(),
- tlsCaCertificatesPath,
- tlsCaCertificates,
- clientAuthentication,
- enableTokenSupport);
+ builder.name, builder.endpointCertificate.key(), builder.endpointCertificate.certificate(),
+ builder.tlsCaCertificatesPath, builder.tlsCaCertificatesPem, clientAuthentication);
}
@Override
public void getConfig(ConnectorConfig.Builder connectorBuilder) {
super.getConfig(connectorBuilder);
- if (! enforceHandshakeClientAuth) {
- connectorBuilder
- .tlsClientAuthEnforcer(new ConnectorConfig.TlsClientAuthEnforcer.Builder()
- .pathWhitelist(INSECURE_WHITELISTED_PATHS)
- .enable(enforceClientAuth));
+ if (! requireTlsClientAuthDuringTlsHandshake) {
+ connectorBuilder.tlsClientAuthEnforcer(
+ new ConnectorConfig.TlsClientAuthEnforcer.Builder()
+ .pathWhitelist(List.of("/status.html")).enable(true));
}
// Disables TLSv1.3 as it causes some browsers to prompt user for client certificate (when connector has 'want' auth)
connectorBuilder.ssl.enabledProtocols(List.of("TLSv1.2"));
-
if (!tlsCiphersOverride.isEmpty()) {
connectorBuilder.ssl.enabledCipherSuites(tlsCiphersOverride.stream().sorted().toList());
} else {
connectorBuilder.ssl.enabledCipherSuites(TlsContext.ALLOWED_CIPHER_SUITES.stream().sorted().toList());
}
-
connectorBuilder
.proxyProtocol(new ConnectorConfig.ProxyProtocol.Builder().enabled(true).mixedMode(enableProxyProtocolMixedMode))
.idleTimeout(Duration.ofSeconds(30).toSeconds())
.maxConnectionLife(endpointConnectionTtl != null ? endpointConnectionTtl.toSeconds() : 0);
}
+
+ public static class Builder {
+ final String name;
+ final int port;
+ boolean requireTlsClientAuthDuringTlsHandshake;
+ List<String> tlsCiphersOverride;
+ boolean enableProxyProtocolMixedMode;
+ Duration endpointConnectionTtl;
+ EndpointCertificateSecrets endpointCertificate;
+ String tlsCaCertificatesPem;
+ String tlsCaCertificatesPath;
+
+ private Builder(String name, int port) { this.name = name; this.port = port; }
+ public Builder requireTlsClientAuthDuringTlsHandshake(boolean enable) {this.requireTlsClientAuthDuringTlsHandshake = enable; return this; }
+ public Builder endpointConnectionTtl(Duration ttl) { endpointConnectionTtl = ttl; return this; }
+ public Builder tlsCiphersOverride(Collection<String> ciphers) { tlsCiphersOverride = List.copyOf(ciphers); return this; }
+ public Builder proxyProtocolMixedMode(boolean enable) { enableProxyProtocolMixedMode = enable; return this; }
+ public Builder endpointCertificate(EndpointCertificateSecrets cert) { this.endpointCertificate = cert; return this; }
+ public Builder tlsCaCertificatesPath(String path) { this.tlsCaCertificatesPath = path; return this; }
+ public Builder tlsCaCertificatesPem(String pem) { this.tlsCaCertificatesPem = pem; return this; }
+
+ public HostedSslConnectorFactory build() { return new HostedSslConnectorFactory(this); }
+ }
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
index 414d4c817c7..f0296d49472 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
@@ -6,6 +6,8 @@ import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.semantics.SemanticRulesConfig;
import com.yahoo.search.config.IndexInfoConfig;
import com.yahoo.search.config.SchemaInfoConfig;
+import com.yahoo.search.dispatch.Dispatcher;
+import com.yahoo.search.dispatch.ReconfigurableDispatcher;
import com.yahoo.search.pagetemplates.PageTemplatesConfig;
import com.yahoo.search.query.profile.config.QueryProfilesConfig;
import com.yahoo.search.ranking.RankProfilesEvaluatorFactory;
@@ -49,6 +51,7 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
private final List<SearchCluster> searchClusters = new LinkedList<>();
private final Collection<String> schemasWithGlobalPhase;
private final boolean globalPhase;
+ private final boolean useReconfigurableDispatcher;
private QueryProfiles queryProfiles;
private SemanticRules semanticRules;
@@ -57,6 +60,7 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) {
super(chains);
this.globalPhase = deployState.featureFlags().enableGlobalPhase();
+ this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher();
this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState);
this.owningCluster = cluster;
@@ -81,16 +85,17 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
/** Adds a Dispatcher component to the owning container cluster for each search cluster */
private void initializeDispatchers(Collection<SearchCluster> searchClusters) {
+ Class<? extends Dispatcher> dispatcherClass = useReconfigurableDispatcher ? ReconfigurableDispatcher.class : Dispatcher.class;
for (SearchCluster searchCluster : searchClusters) {
if (searchCluster instanceof IndexedSearchCluster indexed) {
- var dispatcher = new DispatcherComponent(indexed);
+ var dispatcher = new DispatcherComponent(indexed, dispatcherClass);
owningCluster.addComponent(dispatcher);
}
if (globalPhase) {
for (var documentDb : searchCluster.getDocumentDbs()) {
- if (!schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue;
+ if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue;
var factory = new RankProfilesEvaluatorComponent(documentDb);
- if (! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) {
+ if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) {
owningCluster.addComponent(factory);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java
index f9a3a1f1990..fe2df8101bd 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/DispatcherComponent.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.model.container.search;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.osgi.provider.model.ComponentModel;
+import com.yahoo.search.dispatch.Dispatcher;
import com.yahoo.vespa.config.search.DispatchConfig;
import com.yahoo.vespa.config.search.DispatchNodesConfig;
import com.yahoo.vespa.model.container.component.Component;
@@ -22,15 +23,15 @@ public class DispatcherComponent extends Component<TreeConfigProducer<?>, Compon
private final IndexedSearchCluster indexedSearchCluster;
- public DispatcherComponent(IndexedSearchCluster indexedSearchCluster) {
- super(toComponentModel(indexedSearchCluster.getClusterName()));
+ public DispatcherComponent(IndexedSearchCluster indexedSearchCluster, Class<? extends Dispatcher> clazz) {
+ super(toComponentModel(indexedSearchCluster.getClusterName(), clazz));
this.indexedSearchCluster = indexedSearchCluster;
}
- private static ComponentModel toComponentModel(String clusterName) {
+ private static ComponentModel toComponentModel(String clusterName, Class<? extends Dispatcher> clazz) {
String dispatcherComponentId = "dispatcher." + clusterName; // used by ClusterSearcher
return new ComponentModel(dispatcherComponentId,
- com.yahoo.search.dispatch.Dispatcher.class.getName(),
+ clazz.getName(),
PlatformBundles.SEARCH_AND_DOCPROC_BUNDLE);
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 00feb0a1c76..3318138ebd7 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -16,7 +16,6 @@ import com.yahoo.config.model.ConfigModelContext;
import com.yahoo.config.model.api.ApplicationClusterEndpoint;
import com.yahoo.config.model.api.ConfigServerSpec;
import com.yahoo.config.model.api.ContainerEndpoint;
-import com.yahoo.config.model.api.EndpointCertificateSecrets;
import com.yahoo.config.model.api.TenantSecretStore;
import com.yahoo.config.model.application.provider.IncludeDirs;
import com.yahoo.config.model.builder.xml.ConfigModelBuilder;
@@ -109,7 +108,6 @@ import java.io.IOException;
import java.io.Reader;
import java.net.URI;
import java.security.cert.X509Certificate;
-import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -600,31 +598,35 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
.ifPresent(accessControl -> accessControl.configureDefaultHostedConnector(cluster.getHttp())); ;
}
- private void addAdditionalHostedConnector(DeployState deployState, ApplicationContainerCluster cluster) {
+ private void addAdditionalHostedConnector(DeployState state, ApplicationContainerCluster cluster) {
JettyHttpServer server = cluster.getHttp().getHttpServer().get();
String serverName = server.getComponentId().getName();
// If the deployment contains certificate/private key reference, setup TLS port
- HostedSslConnectorFactory connectorFactory;
- Collection<String> tlsCiphersOverride = deployState.getProperties().tlsCiphersOverride();
- boolean proxyProtocolMixedMode = deployState.getProperties().featureFlags().enableProxyProtocolMixedMode();
- Duration endpointConnectionTtl = deployState.getProperties().endpointConnectionTtl();
- var port = getDataplanePort(deployState);
- if (deployState.endpointCertificateSecrets().isPresent()) {
- boolean authorizeClient = deployState.zone().system().isPublic();
+ var builder = HostedSslConnectorFactory.builder(serverName, getDataplanePort(state))
+ .proxyProtocolMixedMode(state.getProperties().featureFlags().enableProxyProtocolMixedMode())
+ .tlsCiphersOverride(state.getProperties().tlsCiphersOverride())
+ .endpointConnectionTtl(state.getProperties().endpointConnectionTtl());
+ var endpointCert = state.endpointCertificateSecrets().orElse(null);
+ if (endpointCert != null) {
+ builder.endpointCertificate(endpointCert);
+ boolean isPublic = state.zone().system().isPublic();
List<X509Certificate> clientCertificates = getClientCertificates(cluster);
- if (authorizeClient && clientCertificates.isEmpty()) {
- throw new IllegalArgumentException("Client certificate authority security/clients.pem is missing - " +
- "see: https://cloud.vespa.ai/en/security/guide#data-plane");
+ if (isPublic) {
+ if (clientCertificates.isEmpty())
+ throw new IllegalArgumentException("Client certificate authority security/clients.pem is missing - " +
+ "see: https://cloud.vespa.ai/en/security/guide#data-plane");
+ builder.tlsCaCertificatesPem(X509CertificateUtils.toPem(clientCertificates));
+ } else {
+ builder.tlsCaCertificatesPath("/opt/yahoo/share/ssl/certs/athenz_certificate_bundle.pem");
}
- EndpointCertificateSecrets endpointCertificateSecrets = deployState.endpointCertificateSecrets().get();
-
- boolean enforceHandshakeClientAuth = cluster.getHttp().getAccessControl()
- .map(accessControl -> accessControl.clientAuthentication)
- .map(clientAuth -> clientAuth == AccessControl.ClientAuthentication.need)
- .orElse(false);
+ builder.requireTlsClientAuthDuringTlsHandshake(
+ cluster.getHttp().getAccessControl()
+ .map(accessControl -> accessControl.clientAuthentication)
+ .map(clientAuth -> clientAuth == AccessControl.ClientAuthentication.need)
+ .orElse(false));
- boolean enableTokenSupport = deployState.featureFlags().enableDataplaneProxy()
+ boolean enableTokenSupport = state.featureFlags().enableDataplaneProxy()
&& cluster.getClients().stream().anyMatch(c -> !c.tokens().isEmpty());
// Set up component to generate proxy cert if token support is enabled
@@ -633,24 +635,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
cluster.addSimpleComponent(DataplaneProxyService.class);
var dataplaneProxy = new DataplaneProxy(
- getDataplanePort(deployState),
- endpointCertificateSecrets.certificate(),
- endpointCertificateSecrets.key());
+ getDataplanePort(state),
+ endpointCert.certificate(),
+ endpointCert.key());
cluster.addComponent(dataplaneProxy);
}
-
- connectorFactory = authorizeClient
- ? HostedSslConnectorFactory.withProvidedCertificateAndTruststore(
- serverName, endpointCertificateSecrets, X509CertificateUtils.toPem(clientCertificates),
- tlsCiphersOverride, proxyProtocolMixedMode, port, endpointConnectionTtl, enableTokenSupport)
- : HostedSslConnectorFactory.withProvidedCertificate(
- serverName, endpointCertificateSecrets, enforceHandshakeClientAuth, tlsCiphersOverride,
- proxyProtocolMixedMode, port, endpointConnectionTtl, enableTokenSupport);
- } else {
- connectorFactory = HostedSslConnectorFactory.withDefaultCertificateAndTruststore(
- serverName, tlsCiphersOverride, proxyProtocolMixedMode, port,
- endpointConnectionTtl);
}
+ var connectorFactory = builder.build();
cluster.getHttp().getAccessControl().ifPresent(accessControl -> accessControl.configureHostedConnector(connectorFactory));
server.addConnector(connectorFactory);
}
diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc
index bb63dcd73ff..bb0e39a41ab 100644
--- a/config-model/src/main/resources/schema/content.rnc
+++ b/config-model/src/main/resources/schema/content.rnc
@@ -82,7 +82,7 @@ ClusterControllerTuning = element cluster-controller {
element stable-state-period { xsd:string { pattern = "([0-9\.]+)\s*([a-z]+)?" } }? &
element min-distributor-up-ratio { xsd:double }? &
element min-storage-up-ratio { xsd:double }? &
- element groups-allowed-down-ratio { xsd:double }?
+ element groups-allowed-down-ratio { xsd:double { minInclusive = "0" maxInclusive = "1" } }?
}
DispatchTuning = element dispatch {
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecificationTest.java b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecificationTest.java
new file mode 100644
index 00000000000..2a03a9307ca
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/NodesSpecificationTest.java
@@ -0,0 +1,104 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.model.builder.xml.dom;
+
+import com.yahoo.text.XML;
+import org.junit.jupiter.api.Test;
+import org.w3c.dom.Document;
+import com.yahoo.component.Version;
+
+import java.util.Optional;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * @author bratseth
+ */
+public class NodesSpecificationTest {
+
+ @Test
+ void noExplicitGroupLimits() {
+ var spec = nodesSpecification("<nodes count='30'/>");
+ assertEquals(30, spec.minResources().nodes());
+ assertEquals( 1, spec.minResources().groups());
+ assertEquals(30, spec.maxResources().nodes());
+ assertEquals( 1, spec.maxResources().groups()); // no grouping by default -> implicit max groups is 1
+ assertTrue(spec.groupSize().from().isEmpty());
+ assertTrue(spec.groupSize().to().isEmpty());
+ }
+
+ @Test
+ void testGroupSize1() {
+ var spec = nodesSpecification("<nodes count='30' group-size='1'/>");
+ assertEquals(30, spec.minResources().nodes());
+ assertEquals(30, spec.minResources().groups());
+ assertEquals(30, spec.maxResources().nodes());
+ assertEquals(30, spec.maxResources().groups());
+ assertEquals(1, spec.groupSize().from().getAsInt());
+ assertEquals(1, spec.groupSize().to().getAsInt());
+ }
+
+ @Test
+ void testGroupSize3() {
+ var spec = nodesSpecification("<nodes count='30' group-size='3'/>");
+ assertEquals(30, spec.minResources().nodes());
+ assertEquals(10, spec.minResources().groups());
+ assertEquals(30, spec.maxResources().nodes());
+ assertEquals(10, spec.maxResources().groups());
+ assertEquals( 3, spec.groupSize().from().getAsInt());
+ assertEquals( 3, spec.groupSize().to().getAsInt());
+ }
+
+ @Test
+ void testVariableGroupSize1() {
+ var spec = nodesSpecification("<nodes count='30' group-size='[15, 30]'/>");
+ assertEquals(30, spec.minResources().nodes());
+ assertEquals( 1, spec.minResources().groups());
+ assertEquals(30, spec.maxResources().nodes());
+ assertEquals( 2, spec.maxResources().groups());
+ assertEquals(15, spec.groupSize().from().getAsInt());
+ assertEquals(30, spec.groupSize().to().getAsInt());
+ }
+
+ @Test
+ void testVariableGroupSize2() {
+ var spec = nodesSpecification("<nodes count='30' group-size='[6, 10]'/>");
+ assertEquals(30, spec.minResources().nodes());
+ assertEquals( 3, spec.minResources().groups());
+ assertEquals(30, spec.maxResources().nodes());
+ assertEquals( 5, spec.maxResources().groups());
+ assertEquals( 6, spec.groupSize().from().getAsInt());
+ assertEquals(10, spec.groupSize().to().getAsInt());
+ }
+
+ @Test
+ void testGroupSizeLowerBound() {
+ var spec = nodesSpecification("<nodes count='30' group-size='[6, ]'/>");
+ assertEquals(30, spec.minResources().nodes());
+ assertEquals( 1, spec.minResources().groups());
+ assertEquals(30, spec.maxResources().nodes());
+ assertEquals( 5, spec.maxResources().groups());
+ assertEquals( 6, spec.groupSize().from().getAsInt());
+ assertTrue(spec.groupSize().to().isEmpty());
+ }
+
+ @Test
+ void testGroupSizeUpperBound() {
+ var spec = nodesSpecification("<nodes count='30' group-size='[, 10]'/>");
+ assertEquals(30, spec.minResources().nodes());
+ assertEquals( 3, spec.minResources().groups());
+ assertEquals(30, spec.maxResources().nodes());
+ assertEquals( 30, spec.maxResources().groups());
+ assertTrue(spec.groupSize().from().isEmpty());
+ assertEquals(10, spec.groupSize().to().getAsInt());
+ }
+
+ private NodesSpecification nodesSpecification(String nodesElement) {
+ Document nodesXml = XML.getDocument(nodesElement);
+ return NodesSpecification.create(false, false, Version.emptyVersion,
+ new ModelElement(nodesXml.getDocumentElement()),
+ Optional.empty(), Optional.empty());
+
+ }
+
+}
diff --git a/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java b/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java
index 78ef17f613a..6cd344466e4 100644
--- a/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java
+++ b/configgen/src/main/java/com/yahoo/config/codegen/BuilderGenerator.java
@@ -2,9 +2,9 @@
package com.yahoo.config.codegen;
import com.yahoo.config.codegen.LeafCNode.FileLeaf;
+import com.yahoo.config.codegen.LeafCNode.ModelLeaf;
import com.yahoo.config.codegen.LeafCNode.PathLeaf;
import com.yahoo.config.codegen.LeafCNode.UrlLeaf;
-import com.yahoo.config.codegen.LeafCNode.ModelLeaf;
import java.util.ArrayList;
import java.util.List;
@@ -41,7 +41,7 @@ public class BuilderGenerator {
private static String getDeclaration(InnerCNode node) {
String getInterfaces = (node.getParent() == null) ? "implements ConfigInstance.Builder" : "implements ConfigBuilder";
- return "public static class Builder " + getInterfaces + " {";
+ return "public static final class Builder " + getInterfaces + " {";
}
private static String getSpecialRootBuilderCode(InnerCNode node) {
diff --git a/configgen/src/test/resources/allfeatures.reference b/configgen/src/test/resources/allfeatures.reference
index 8a681048f65..b7a79f663e7 100644
--- a/configgen/src/test/resources/allfeatures.reference
+++ b/configgen/src/test/resources/allfeatures.reference
@@ -99,7 +99,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
void getConfig(Builder builder);
}
- public static class Builder implements ConfigInstance.Builder {
+ public static final class Builder implements ConfigInstance.Builder {
private Set<String> __uninitialized = new HashSet<String>(Arrays.asList(
"boolVal",
"intVal",
@@ -1345,7 +1345,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class Basic_struct extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>();
private String foo = null;
@@ -1432,7 +1432,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class Struct_of_struct extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>();
public Inner0.Builder inner0 = new Inner0.Builder();
@@ -1529,7 +1529,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class Inner0 extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>();
private String name = null;
@@ -1616,7 +1616,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class Inner1 extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>();
private String name = null;
@@ -1703,7 +1703,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class MyArray extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>(Arrays.asList(
"refVal"
));
@@ -1939,7 +1939,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class AnotherArray extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>();
private Integer foo = null;
@@ -2013,7 +2013,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class MyMap extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>(Arrays.asList(
"refVal"
));
@@ -2249,7 +2249,7 @@ public final class AllfeaturesConfig extends ConfigInstance {
*/
public final static class AnotherArray extends InnerNode {
- public static class Builder implements ConfigBuilder {
+ public static final class Builder implements ConfigBuilder {
private Set<String> __uninitialized = new HashSet<String>();
private Integer foo = null;
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
index a7a26343edf..dac881cf5ee 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
@@ -209,6 +209,7 @@ public class ModelContextImpl implements ModelContext {
private final boolean enableConditionalPutRemoveWriteRepair;
private final boolean enableDataplaneProxy;
private final boolean enableNestedMultivalueGrouping;
+ private final boolean useReconfigurableDispatcher;
public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
this.defaultTermwiseLimit = flagValue(source, appId, version, Flags.DEFAULT_TERM_WISE_LIMIT);
@@ -259,6 +260,7 @@ public class ModelContextImpl implements ModelContext {
this.enableConditionalPutRemoveWriteRepair = flagValue(source, appId, version, Flags.ENABLE_CONDITIONAL_PUT_REMOVE_WRITE_REPAIR);
this.enableDataplaneProxy = flagValue(source, appId, version, Flags.ENABLE_DATAPLANE_PROXY);
this.enableNestedMultivalueGrouping = flagValue(source, appId, version, Flags.ENABLE_NESTED_MULTIVALUE_GROUPING);
+ this.useReconfigurableDispatcher = flagValue(source, appId, version, Flags.USE_RECONFIGURABLE_DISPATCHER);
}
@Override public int heapSizePercentage() { return heapPercentage; }
@@ -317,6 +319,7 @@ public class ModelContextImpl implements ModelContext {
@Override public boolean enableConditionalPutRemoveWriteRepair() { return enableConditionalPutRemoveWriteRepair; }
@Override public boolean enableDataplaneProxy() { return enableDataplaneProxy; }
@Override public boolean enableNestedMultivalueGrouping() { return enableNestedMultivalueGrouping; }
+ @Override public boolean useReconfigurableDispatcher() { return useReconfigurableDispatcher; }
private static <V> V flagValue(FlagSource source, ApplicationId appId, Version vespaVersion, UnboundFlag<? extends V, ?, ?> flag) {
return flag.bindTo(source)
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java
index a8dc48e6c14..ef41512f979 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/tenant/DataplaneTokenSerializer.java
@@ -46,7 +46,9 @@ public class DataplaneTokenSerializer {
return new DataplaneToken.Version(
inspector.field(FINGERPRINT_FIELD).asString(),
inspector.field(CHECKACCESSHASH_FIELD).asString(),
- expirationStr.equals("<none>") ? Optional.empty() : Optional.of(Instant.parse(expirationStr)));
+ expirationStr.equals("<none>") ? Optional.empty()
+ : (expirationStr.isBlank()
+ ? Optional.of(Instant.EPOCH) : Optional.of(Instant.parse(expirationStr))));
}
public static Slime toSlime(List<DataplaneToken> dataplaneTokens) {
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java
index 2c898b8bf7d..342ea7b2297 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/session/PrepareParamsTest.java
@@ -9,9 +9,7 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.CloudAccount;
import com.yahoo.config.provision.TenantName;
import com.yahoo.container.jdisc.HttpRequest;
-
import com.yahoo.security.X509CertificateUtils;
-
import com.yahoo.slime.Cursor;
import com.yahoo.slime.Injector;
import com.yahoo.slime.ObjectInserter;
@@ -29,7 +27,6 @@ import java.security.cert.X509Certificate;
import java.time.Duration;
import java.util.List;
import java.util.OptionalInt;
-;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
diff --git a/container-core/abi-spec.json b/container-core/abi-spec.json
index 572d18b02f3..757afeb64e2 100644
--- a/container-core/abi-spec.json
+++ b/container-core/abi-spec.json
@@ -310,7 +310,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1032,7 +1033,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1091,7 +1093,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1128,7 +1131,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1174,7 +1178,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1205,7 +1210,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1240,7 +1246,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1329,7 +1336,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1366,7 +1374,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1768,7 +1777,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1806,7 +1816,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1855,7 +1866,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1884,7 +1896,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1915,7 +1928,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1946,7 +1960,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1977,7 +1992,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
diff --git a/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java b/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java
index 0e381860e98..3cf316db5c2 100644
--- a/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java
+++ b/container-core/src/main/java/com/yahoo/container/core/documentapi/VespaDocumentAccess.java
@@ -17,9 +17,7 @@ import com.yahoo.documentapi.VisitorParameters;
import com.yahoo.documentapi.VisitorSession;
import com.yahoo.documentapi.messagebus.MessageBusDocumentAccess;
import com.yahoo.documentapi.messagebus.MessageBusParams;
-import com.yahoo.documentapi.messagebus.protocol.DocumentProtocolPoliciesConfig;
import com.yahoo.messagebus.MessagebusConfig;
-import com.yahoo.vespa.config.content.DistributionConfig;
import com.yahoo.yolean.concurrent.Memoized;
import java.util.logging.Level;
diff --git a/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java b/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java
index 52b372638c8..3471627e887 100644
--- a/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java
+++ b/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java
@@ -12,7 +12,7 @@ import java.util.Set;
/**
* A component which tracks the up/down status of any clusters which should influence
* the up down status of this container itself, as well as the separate fact (from config)
- * that such clusters are present. This is a separate fact because we might know we have clusters configured
+ * that such clusters are present. This is a separate fact because we might know we have clusters configured,
* but we don't have positive information that they are up yet, and in this case we should be down.
*
* This is a separate component which has <b>no dependencies</b> such that the status tracked in this
diff --git a/container-disc/abi-spec.json b/container-disc/abi-spec.json
index 75246a77e03..92f21af0cde 100644
--- a/container-disc/abi-spec.json
+++ b/container-disc/abi-spec.json
@@ -68,7 +68,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -105,7 +106,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -135,7 +137,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
diff --git a/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java b/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java
index 16dedd0765d..ab5080b8f3f 100644
--- a/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java
+++ b/container-messagebus/src/main/java/com/yahoo/container/jdisc/messagebus/SessionCache.java
@@ -1,12 +1,11 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.container.jdisc.messagebus;
-import com.yahoo.component.annotation.Inject;
import com.yahoo.component.AbstractComponent;
+import com.yahoo.component.annotation.Inject;
import com.yahoo.container.jdisc.ContainerMbusConfig;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.documentapi.messagebus.protocol.DocumentProtocol;
-import com.yahoo.documentapi.messagebus.protocol.DocumentProtocolPoliciesConfig;
import com.yahoo.jdisc.ReferencedResource;
import com.yahoo.jdisc.References;
import com.yahoo.jdisc.ResourceReference;
@@ -25,7 +24,6 @@ import com.yahoo.messagebus.network.NetworkMultiplexer;
import com.yahoo.messagebus.shared.SharedIntermediateSession;
import com.yahoo.messagebus.shared.SharedMessageBus;
import com.yahoo.messagebus.shared.SharedSourceSession;
-import com.yahoo.vespa.config.content.DistributionConfig;
import com.yahoo.yolean.concurrent.Memoized;
import java.util.HashMap;
@@ -65,25 +63,18 @@ public final class SessionCache extends AbstractComponent {
@Inject
public SessionCache(NetworkMultiplexerProvider nets, ContainerMbusConfig containerMbusConfig,
DocumentTypeManager documentTypeManager,
- MessagebusConfig messagebusConfig,
- DocumentProtocolPoliciesConfig policiesConfig,
- DistributionConfig distributionConfig) {
- this(nets::net, containerMbusConfig, documentTypeManager,
- messagebusConfig, policiesConfig, distributionConfig);
+ MessagebusConfig messagebusConfig) {
+ this(nets::net, containerMbusConfig, documentTypeManager, messagebusConfig);
}
public SessionCache(Supplier<NetworkMultiplexer> net, ContainerMbusConfig containerMbusConfig,
DocumentTypeManager documentTypeManager,
- MessagebusConfig messagebusConfig,
- DocumentProtocolPoliciesConfig policiesConfig,
- DistributionConfig distributionConfig) {
+ MessagebusConfig messagebusConfig) {
this(net,
containerMbusConfig,
messagebusConfig,
- new DocumentProtocol(documentTypeManager,
- policiesConfig,
- distributionConfig));
+ new DocumentProtocol(documentTypeManager));
}
public SessionCache(Supplier<NetworkMultiplexer> net, ContainerMbusConfig containerMbusConfig,
diff --git a/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java b/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java
index e0cd9ca6dde..e41ce539b4a 100644
--- a/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java
+++ b/container-messagebus/src/test/java/com/yahoo/container/jdisc/messagebus/MbusClientProviderTest.java
@@ -39,9 +39,7 @@ public class MbusClientProviderTest {
SessionCache cache = new SessionCache(() -> NetworkMultiplexer.dedicated(new NullNetwork()),
new ContainerMbusConfig.Builder().build(),
new DocumentTypeManager(new DocumentmanagerConfig.Builder().build()),
- new MessagebusConfig.Builder().build(),
- new DocumentProtocolPoliciesConfig.Builder().build(),
- new DistributionConfig.Builder().build());
+ new MessagebusConfig.Builder().build());
MbusClientProvider p = new MbusClientProvider(cache, config);
assertNotNull(p.get());
p.deconstruct();
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index c41c1c79149..0f440957dfd 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -4456,7 +4456,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -4696,7 +4697,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -4758,7 +4760,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -4784,7 +4787,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -7186,7 +7190,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -7212,7 +7217,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
index fd8110e1173..d1377b8d373 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/BaseNodeMonitor.java
@@ -82,7 +82,7 @@ public abstract class BaseNodeMonitor<T> {
/** Thread-safely changes the state of this node if required */
protected abstract void setWorking(boolean working,String explanation);
- /** Returns whether or not this is monitoring an internal node. Default is false. */
+ /** Returns whether this is monitoring an internal node. Default is false. */
public boolean isInternal() { return internal; }
}
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
index 0b627e91bc5..332bf4ea2c4 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterMonitor.java
@@ -66,7 +66,7 @@ public class ClusterMonitor<T> {
* </ul>
*
* @param node the object representing the node
- * @param internal whether or not this node is internal to this cluster
+ * @param internal whether this node is internal to this cluster
*/
public void add(T node, boolean internal) {
nodeMonitors.put(node, new TrafficNodeMonitor<>(node, configuration, internal));
@@ -96,11 +96,10 @@ public class ClusterMonitor<T> {
* Ping all nodes which needs pinging to discover state changes
*/
public void ping(Executor executor) {
- for (Iterator<BaseNodeMonitor<T>> i = nodeMonitorIterator(); i.hasNext() && !closed.get(); ) {
- BaseNodeMonitor<T> monitor= i.next();
- nodeManager.ping(this, monitor.getNode(), executor); // Cause call to failed or responded
+ for (var monitor : nodeMonitors()) {
+ if (closed.get()) return; // Do nothing to change state if close has started.
+ nodeManager.ping(this, monitor.getNode(), executor);
}
- if (closed.get()) return; // Do nothing to change state if close has started.
nodeManager.pingIterationCompleted();
}
@@ -143,7 +142,7 @@ public class ClusterMonitor<T> {
// for all pings when there are no problems (important because it ensures that
// any thread local connections are reused) 2) a new thread will be started to execute
// new pings when a ping is not responding
- ExecutorService pingExecutor=Executors.newCachedThreadPool(ThreadFactoryFactory.getDaemonThreadFactory("search.ping"));
+ ExecutorService pingExecutor = Executors.newCachedThreadPool(ThreadFactoryFactory.getDaemonThreadFactory("search.ping"));
while (!closed.get()) {
try {
log.finest("Activating ping");
@@ -165,7 +164,9 @@ public class ClusterMonitor<T> {
}
pingExecutor.shutdown();
try {
- pingExecutor.awaitTermination(10, TimeUnit.SECONDS);
+ if ( ! pingExecutor.awaitTermination(10, TimeUnit.SECONDS)) {
+ log.warning("Timeout waiting for ping executor to terminate");
+ }
} catch (InterruptedException e) { }
log.info("Stopped cluster monitor thread " + getName());
}
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java
index 4af6757db8c..1cf36d75fc5 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/ClusterSearcher.java
@@ -48,7 +48,7 @@ public abstract class ClusterSearcher<T> extends PingableSearcher implements Nod
*
* @param id the id of this searcher
* @param connections the connections of the cluster
- * @param internal whether or not this cluster is internal (part of the same installation)
+ * @param internal whether this cluster is internal (part of the same installation)
*/
public ClusterSearcher(ComponentId id, List<T> connections, boolean internal) {
this(id, connections, new Hasher<>(), internal);
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java b/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java
index 1f6602053d9..f8f8c0d888d 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/MonitorConfiguration.java
@@ -22,7 +22,7 @@ public class MonitorConfiguration {
/**
* Returns the number of milliseconds to attempt to service a request
- * (at different nodes) before giving up. Default is 5000 ms.
+ * (at different nodes) before giving up. See {@link #requestTimeout}.
*/
public long getRequestTimeout() { return requestTimeout; }
diff --git a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
index 11475b6a0ca..108e7e3e34b 100644
--- a/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
+++ b/container-search/src/main/java/com/yahoo/search/cluster/TrafficNodeMonitor.java
@@ -23,7 +23,7 @@ public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> {
this.configuration = configuration;
}
- /** Whether or not this has ever responded successfully */
+ /** Whether this has ever responded successfully */
private boolean atStartUp = true;
public T getNode() { return node; }
@@ -55,7 +55,7 @@ public class TrafficNodeMonitor<T> extends BaseNodeMonitor<T> {
respondedAt = now();
succeededAt = respondedAt;
- setWorking(true,"Responds correctly");
+ setWorking(true, "Responds correctly");
}
/**
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java
index 77496114df1..c6fef88fa2d 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java
@@ -2,7 +2,6 @@
package com.yahoo.search.dispatch;
import java.io.Closeable;
-import java.time.Duration;
import java.util.function.BiConsumer;
/**
@@ -21,8 +20,8 @@ public abstract class CloseableInvoker implements Closeable {
private RequestDuration duration;
public void teardown(BiConsumer<Boolean, RequestDuration> teardown) {
- this.teardown = teardown;
- this.duration = new RequestDuration();
+ this.teardown = this.teardown == null ? teardown : this.teardown.andThen(teardown);
+ this.duration = this.duration == null ? new RequestDuration() : this.duration;
}
protected void setFinalStatus(boolean success) {
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
index db7e80a95e5..6f6b0fc2b79 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
@@ -1,9 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.dispatch;
-import com.yahoo.component.annotation.Inject;
import com.yahoo.component.AbstractComponent;
import com.yahoo.component.ComponentId;
+import com.yahoo.component.annotation.Inject;
import com.yahoo.compress.Compressor;
import com.yahoo.container.handler.VipStatus;
import com.yahoo.prelude.fastsearch.VespaBackEndSearcher;
@@ -12,13 +12,14 @@ import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.cluster.ClusterMonitor;
import com.yahoo.search.dispatch.SearchPath.InvalidSearchPathException;
+import com.yahoo.search.dispatch.rpc.RpcConnectionPool;
import com.yahoo.search.dispatch.rpc.RpcInvokerFactory;
import com.yahoo.search.dispatch.rpc.RpcPingFactory;
import com.yahoo.search.dispatch.rpc.RpcResourcePool;
import com.yahoo.search.dispatch.searchcluster.Group;
-import com.yahoo.search.dispatch.searchcluster.SearchGroups;
import com.yahoo.search.dispatch.searchcluster.Node;
import com.yahoo.search.dispatch.searchcluster.SearchCluster;
+import com.yahoo.search.dispatch.searchcluster.SearchGroups;
import com.yahoo.search.query.profile.types.FieldDescription;
import com.yahoo.search.query.profile.types.FieldType;
import com.yahoo.search.query.profile.types.QueryProfileType;
@@ -32,6 +33,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* A dispatcher communicates with search nodes to perform queries and fill hits.
@@ -54,19 +56,43 @@ public class Dispatcher extends AbstractComponent {
/** If set will control computation of how many hits will be fetched from each partition.*/
public static final CompoundName topKProbability = CompoundName.from(DISPATCH + "." + TOP_K_PROBABILITY);
+ private final InvokerFactoryFactory invokerFactories;
private final DispatchConfig dispatchConfig;
- private final RpcResourcePool rpcResourcePool;
+ private final RpcConnectionPool rpcResourcePool;
private final SearchCluster searchCluster;
- private final ClusterMonitor<Node> clusterMonitor;
private volatile VolatileItems volatileItems;
private static class VolatileItems {
+
final LoadBalancer loadBalancer;
final InvokerFactory invokerFactory;
- VolatileItems(LoadBalancer loadBalancer, InvokerFactory invokerFactory) {
+ final ClusterMonitor<Node> clusterMonitor;
+ final AtomicInteger inflight = new AtomicInteger(1); // Initial reference.
+ Runnable cleanup = () -> { };
+
+ VolatileItems(LoadBalancer loadBalancer, InvokerFactory invokerFactory, ClusterMonitor<Node> clusterMonitor) {
this.loadBalancer = loadBalancer;
this.invokerFactory = invokerFactory;
+ this.clusterMonitor = clusterMonitor;
+ }
+
+ private void countDown() {
+ if (inflight.decrementAndGet() == 0) cleanup.run();
+ }
+
+ private class Ref implements AutoCloseable {
+ boolean handedOff = false;
+ { inflight.incrementAndGet(); }
+ VolatileItems get() { return VolatileItems.this; }
+ /** Hands off the reference to the given invoker, which will decrement the counter when closed. */
+ <T extends CloseableInvoker> T register(T invoker) {
+ invoker.teardown((__, ___) -> countDown());
+ handedOff = true;
+ return invoker;
+ }
+ @Override public void close() { if ( ! handedOff) countDown(); }
}
+
}
private static final QueryProfileType argumentType;
@@ -81,34 +107,105 @@ public class Dispatcher extends AbstractComponent {
public static QueryProfileType getArgumentType() { return argumentType; }
+ interface InvokerFactoryFactory {
+ InvokerFactory create(RpcConnectionPool rpcConnectionPool, SearchGroups searchGroups, DispatchConfig dispatchConfig);
+ }
+
@Inject
- public Dispatcher(ComponentId clusterId, DispatchConfig dispatchConfig,
- DispatchNodesConfig nodesConfig, VipStatus vipStatus) {
- this.dispatchConfig = dispatchConfig;
- rpcResourcePool = new RpcResourcePool(dispatchConfig, nodesConfig);
- searchCluster = new SearchCluster(clusterId.stringValue(), dispatchConfig.minActivedocsPercentage(),
- toNodes(nodesConfig), vipStatus, new RpcPingFactory(rpcResourcePool));
- clusterMonitor = new ClusterMonitor<>(searchCluster, true);
- volatileItems = update(null);
+ public Dispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, DispatchNodesConfig nodesConfig, VipStatus vipStatus) {
+ this(clusterId, dispatchConfig, new RpcResourcePool(dispatchConfig, nodesConfig), nodesConfig, vipStatus, RpcInvokerFactory::new);
initialWarmup(dispatchConfig.warmuptime());
}
- /* For simple mocking in tests. Beware that searchCluster is shutdown on in deconstruct() */
- Dispatcher(ClusterMonitor<Node> clusterMonitor, SearchCluster searchCluster,
- DispatchConfig dispatchConfig, InvokerFactory invokerFactory) {
+ Dispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, RpcConnectionPool rpcConnectionPool,
+ DispatchNodesConfig nodesConfig, VipStatus vipStatus, InvokerFactoryFactory invokerFactories) {
+ this(dispatchConfig, rpcConnectionPool,
+ new SearchCluster(clusterId.stringValue(), dispatchConfig.minActivedocsPercentage(),
+ toNodes(nodesConfig), vipStatus, new RpcPingFactory(rpcConnectionPool)),
+ invokerFactories);
+ }
+
+ Dispatcher(DispatchConfig dispatchConfig, RpcConnectionPool rpcConnectionPool,
+ SearchCluster searchCluster, InvokerFactoryFactory invokerFactories) {
+ this(dispatchConfig, rpcConnectionPool, searchCluster, new ClusterMonitor<>(searchCluster, false), invokerFactories);
+ this.volatileItems.clusterMonitor.start(); // Populate nodes to monitor before starting it.
+ }
+
+ Dispatcher(DispatchConfig dispatchConfig, RpcConnectionPool rpcConnectionPool,
+ SearchCluster searchCluster, ClusterMonitor<Node> clusterMonitor, InvokerFactoryFactory invokerFactories) {
this.dispatchConfig = dispatchConfig;
- this.rpcResourcePool = null;
+ this.rpcResourcePool = rpcConnectionPool;
this.searchCluster = searchCluster;
- this.clusterMonitor = clusterMonitor;
- this.volatileItems = update(invokerFactory);
+ this.invokerFactories = invokerFactories;
+ this.volatileItems = update(clusterMonitor);
+ searchCluster.addMonitoring(clusterMonitor);
}
- private VolatileItems update(InvokerFactory invokerFactory) {
+ /* For simple mocking in tests. Beware that searchCluster is shutdown in deconstruct() */
+ Dispatcher(ClusterMonitor<Node> clusterMonitor, SearchCluster searchCluster,
+ DispatchConfig dispatchConfig, InvokerFactory invokerFactory) {
+ this(dispatchConfig, null, searchCluster, clusterMonitor, (__, ___, ____) -> invokerFactory);
+ }
+
+ /** Returns the snapshot of volatile items that need to be kept together, incrementing its reference counter. */
+ private VolatileItems.Ref volatileItems() {
+ return volatileItems.new Ref();
+ }
+
+ /**
+ * This is called whenever we have new config for backend nodes.
+ * Normally, we'd want to handle partial failure of the component graph, by reinstating the old state;
+ * however, in this case, such a failure would be local to this container, and we instead want to keep
+ * the newest config, as that is what most accurately represents the actual backend.
+ *
+ * The flow of reconfiguration is:
+ * 1. The volatile snapshot of disposable items is replaced with a new one that only references updated nodes.
+ * 2. Dependencies of the items in 1., which must be configured, are updated, yielding a list of resources to close.
+ * 3. When inflight operations against the old snapshot are done, all obsolete resources are cleaned up.
+ *
+ * Ownership details:
+ * 1. The RPC resource pool is owned by the dispatcher, and is updated on node set changes;
+ * it contains the means by which the container talks to backend nodes, so cleanup must be delayed until safe.
+ * 2. The invocation factory is owned by the volatile snapshot, and is swapped atomically with it;
+ * it is used by the dispatcher to create ephemeral invokers, which must complete before cleanup (above) can happen.
+ * 3. The load balancer is owned by the volatile snapshot, and is swapped atomically with it;
+ * it is used internally by the dispatcher to select search nodes for queries, and is discarded with its snapshot.
+ * 4. The cluster monitor is a subordinate to the search cluster, and does whatever that tells it to, at any time;
+ * it is technically owned by the volatile snapshot, but mostly to show it is swapped together with that.
+ * 5. The search cluster is owned by the dispatcher, and is updated on node set changes;
+ * its responsibility is to keep track of the state of the backend, and to provide a view of it to the dispatcher,
+ * as well as keep the container vip status updated accordingly; it should therefore preserve as much as possible
+ * of its state across reconfigurations: with new node config, it will immediately forget obsolete nodes, and set
+ * coverage information as if the new nodes have zero documents, before even checking their status; this is fine
+ * under the assumption that this is the common case, i.e., new nodes have no documents yet.
+ */
+ void updateWithNewConfig(DispatchNodesConfig nodesConfig) {
+ try (var items = volatileItems()) { // Marking a reference to the old snapshot, which we want to have cleaned up.
+ items.get().countDown(); // Decrement for its initial creation reference, so it may reach 0.
+
+ // Let the RPC pool know about the new nodes, and set up the delayed cleanup that we need to do.
+ Collection<? extends AutoCloseable> connectionPoolsToClose = rpcResourcePool.updateNodes(nodesConfig);
+ items.get().cleanup = () -> {
+ for (AutoCloseable pool : connectionPoolsToClose) {
+ try { pool.close(); } catch (Exception ignored) { }
+ }
+ };
+
+ // Update the nodes the search cluster keeps track of, and what nodes are monitored.
+ ClusterMonitor<Node> newMonitor = searchCluster.updateNodes(toNodes(nodesConfig), dispatchConfig.minActivedocsPercentage());
+
+ // Update the snapshot to use the new nodes set in the search cluster; the RPC pool is ready for this.
+ this.volatileItems = update(newMonitor);
+
+ // Wait for the old cluster monitor to die; it may be pinging nodes we want to shut down RPC connections to.
+ items.get().clusterMonitor.shutdown();
+ } // Close the old snapshot, which may trigger the RPC cleanup now, or when the last invoker is closed, by a search thread.
+ }
+
+ private VolatileItems update(ClusterMonitor<Node> clusterMonitor) {
var items = new VolatileItems(new LoadBalancer(searchCluster.groupList().groups(), toLoadBalancerPolicy(dispatchConfig.distributionPolicy())),
- (invokerFactory == null)
- ? new RpcInvokerFactory(rpcResourcePool, searchCluster.groupList(), dispatchConfig)
- : invokerFactory);
- searchCluster.addMonitoring(clusterMonitor);
+ invokerFactories.create(rpcResourcePool, searchCluster.groupList(), dispatchConfig),
+ clusterMonitor);
return items;
}
@@ -158,27 +255,30 @@ public class Dispatcher extends AbstractComponent {
@Override
public void deconstruct() {
// The clustermonitor must be shutdown first as it uses the invokerfactory through the searchCluster.
- clusterMonitor.shutdown();
+ volatileItems.clusterMonitor.shutdown();
if (rpcResourcePool != null) {
rpcResourcePool.close();
}
}
public FillInvoker getFillInvoker(Result result, VespaBackEndSearcher searcher) {
- return volatileItems.invokerFactory.createFillInvoker(searcher, result);
+ try (var items = volatileItems()) { // Take a snapshot, and release it when we're done.
+ return items.register(items.get().invokerFactory.createFillInvoker(searcher, result));
+ }
}
public SearchInvoker getSearchInvoker(Query query, VespaBackEndSearcher searcher) {
- VolatileItems items = volatileItems; // Take a snapshot
- int maxHitsPerNode = dispatchConfig.maxHitsPerNode();
- SearchInvoker invoker = getSearchPathInvoker(query, searcher, searchCluster.groupList(), items.invokerFactory, maxHitsPerNode)
- .orElseGet(() -> getInternalInvoker(query, searcher, searchCluster, items.loadBalancer, items.invokerFactory, maxHitsPerNode));
-
- if (query.properties().getBoolean(com.yahoo.search.query.Model.ESTIMATE)) {
- query.setHits(0);
- query.setOffset(0);
+ try (var items = volatileItems()) { // Take a snapshot, and release it when we're done.
+ int maxHitsPerNode = dispatchConfig.maxHitsPerNode();
+ SearchInvoker invoker = getSearchPathInvoker(query, searcher, searchCluster.groupList(), items.get().invokerFactory, maxHitsPerNode)
+ .orElseGet(() -> getInternalInvoker(query, searcher, searchCluster, items.get().loadBalancer, items.get().invokerFactory, maxHitsPerNode));
+
+ if (query.properties().getBoolean(com.yahoo.search.query.Model.ESTIMATE)) {
+ query.setHits(0);
+ query.setOffset(0);
+ }
+ return items.register(invoker);
}
- return invoker;
}
/** Builds an invoker based on searchpath */
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java
new file mode 100644
index 00000000000..625a8bcb6da
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/ReconfigurableDispatcher.java
@@ -0,0 +1,37 @@
+package com.yahoo.search.dispatch;
+
+import com.yahoo.component.ComponentId;
+import com.yahoo.config.subscription.ConfigSubscriber;
+import com.yahoo.container.handler.VipStatus;
+import com.yahoo.messagebus.network.rpc.SlobrokConfigSubscriber;
+import com.yahoo.vespa.config.search.DispatchConfig;
+import com.yahoo.vespa.config.search.DispatchNodesConfig;
+import com.yahoo.yolean.UncheckedInterruptedException;
+
+import java.util.Objects;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * @author jonmv
+ */
+public class ReconfigurableDispatcher extends Dispatcher {
+
+ private final ConfigSubscriber subscriber;
+
+ public ReconfigurableDispatcher(ComponentId clusterId, DispatchConfig dispatchConfig, VipStatus vipStatus) {
+ super(clusterId, dispatchConfig, new DispatchNodesConfig.Builder().build(), vipStatus);
+ this.subscriber = new ConfigSubscriber();
+ this.subscriber.subscribe(this::updateWithNewConfig, DispatchNodesConfig.class, clusterId.stringValue());
+ }
+
+ @Override
+ public void deconstruct() {
+ subscriber.close();
+ super.deconstruct();
+ }
+
+}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java b/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java
index 1206277a103..6b134dc23a6 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java
@@ -5,7 +5,7 @@ import java.time.Duration;
import java.time.Instant;
/**
- * Contains start and and time. Exposes a duration, and lets you measure the time difference between 2 requests.
+ * Contains start and end time. Exposes a duration, and lets you measure the time difference between 2 requests.
* It does use System.nanoTime to get a steady clock.
*
* @author baldersheim
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java
index 22ed8b6d9fa..6c1f666835c 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/Client.java
@@ -12,7 +12,7 @@ import java.util.Optional;
*
* @author bratseth
*/
-interface Client {
+public interface Client {
/** Creates a connection to a particular node in this */
NodeConnection createConnection(String hostname, int port);
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java
index fd8e0e4f81a..a93ddb0b360 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcConnectionPool.java
@@ -1,11 +1,27 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.dispatch.rpc;
+import com.yahoo.vespa.config.search.DispatchNodesConfig;
+
+import java.util.Collection;
+import java.util.List;
+
/**
* Interface for getting a connection given a node id.
*
* @author balderersheim
*/
-public interface RpcConnectionPool {
+public interface RpcConnectionPool extends AutoCloseable {
+
+ /** Returns a connection to the given node id. */
Client.NodeConnection getConnection(int nodeId);
+
+
+ /** Will return a list of items that need a delayed close when updating node set. */
+ default Collection<? extends AutoCloseable> updateNodes(DispatchNodesConfig nodesConfig) { return List.of(); }
+
+ /** Shuts down all connections in the pool, and the underlying RPC client. */
+ @Override
+ void close();
+
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java
index 154002c4f77..b6228994ac8 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcInvokerFactory.java
@@ -33,7 +33,7 @@ public class RpcInvokerFactory extends InvokerFactory {
super(cluster, dispatchConfig);
this.rpcResourcePool = rpcResourcePool;
this.compressor = new CompressService();
- decodeType = convert(dispatchConfig.summaryDecodePolicy());
+ this.decodeType = convert(dispatchConfig.summaryDecodePolicy());
}
@Override
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java
index 53dc54f7bc5..a59097e5fff 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcPing.java
@@ -37,7 +37,7 @@ public class RpcPing implements Pinger, Client.ResponseReceiver {
this.clusterMonitor = clusterMonitor;
this.pingSequenceId = node.createPingSequenceId();
this.pongHandler = pongHandler;
- this. compressor = compressor;
+ this.compressor = compressor;
}
@Override
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java
index 63530a7f650..d1f22514481 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcResourcePool.java
@@ -3,8 +3,10 @@ package com.yahoo.search.dispatch.rpc;
import com.yahoo.search.dispatch.FillInvoker;
import com.yahoo.search.dispatch.rpc.Client.NodeConnection;
+import com.yahoo.search.dispatch.rpc.RpcClient.RpcNodeConnection;
import com.yahoo.vespa.config.search.DispatchConfig;
import com.yahoo.vespa.config.search.DispatchNodesConfig;
+import com.yahoo.vespa.config.search.DispatchNodesConfig.Node;
import java.util.ArrayList;
import java.util.Collection;
@@ -19,7 +21,7 @@ import java.util.concurrent.ThreadLocalRandom;
*
* @author ollivir
*/
-public class RpcResourcePool implements RpcConnectionPool, AutoCloseable {
+public class RpcResourcePool implements RpcConnectionPool {
/** Connections to the search nodes this talks to, indexed by node id ("partid") */
private volatile Map<Integer, NodeConnectionPool> nodeConnectionPools = Map.of();
@@ -35,46 +37,35 @@ public class RpcResourcePool implements RpcConnectionPool, AutoCloseable {
}
public RpcResourcePool(DispatchConfig dispatchConfig, DispatchNodesConfig nodesConfig) {
- super();
rpcClient = new RpcClient("dispatch-client", dispatchConfig.numJrtTransportThreads());
numConnections = dispatchConfig.numJrtConnectionsPerNode();
- updateNodes(nodesConfig).forEach(item -> {
- try {
- item.close();
- } catch (Exception e) {}
+ updateNodes(nodesConfig).forEach(pool -> {
+ try { pool.close(); } catch (Exception ignored) { } // Shouldn't throw.
});
}
- /** Will return a list of items that need a delayed close */
- public Collection<AutoCloseable> updateNodes(DispatchNodesConfig nodesConfig) {
- List<AutoCloseable> toClose = new ArrayList<>();
- var builder = new HashMap<Integer, NodeConnectionPool>();
+ @Override
+ public Collection<? extends AutoCloseable> updateNodes(DispatchNodesConfig nodesConfig) {
+ Map<Integer, NodeConnectionPool> currentPools = new HashMap<>(nodeConnectionPools);
+ Map<Integer, NodeConnectionPool> nextPools = new HashMap<>();
// Who can be reused
- for (var node : nodesConfig.node()) {
- var prev = nodeConnectionPools.get(node.key());
- NodeConnection nc = prev != null ? prev.nextConnection() : null;
- if (nc instanceof RpcClient.RpcNodeConnection rpcNodeConnection
- && rpcNodeConnection.getPort() == node.port()
- && rpcNodeConnection.getHostname().equals(node.host()))
+ for (Node node : nodesConfig.node()) {
+ if ( currentPools.containsKey(node.key())
+ && currentPools.get(node.key()).nextConnection() instanceof RpcNodeConnection rpcNodeConnection
+ && rpcNodeConnection.getPort() == node.port()
+ && rpcNodeConnection.getHostname().equals(node.host()))
{
- builder.put(node.key(), prev);
+ nextPools.put(node.key(), currentPools.remove(node.key()));
} else {
- var connections = new ArrayList<NodeConnection>(numConnections);
+ ArrayList<NodeConnection> connections = new ArrayList<>(numConnections);
for (int i = 0; i < numConnections; i++) {
connections.add(rpcClient.createConnection(node.host(), node.port()));
}
- builder.put(node.key(), new NodeConnectionPool(connections));
+ nextPools.put(node.key(), new NodeConnectionPool(connections));
}
}
- // Who are not needed any more
- nodeConnectionPools.forEach((key, pool) -> {
- var survivor = builder.get(key);
- if (survivor == null || pool != survivor) {
- toClose.add(pool);
- }
- });
- this.nodeConnectionPools = Map.copyOf(builder);
- return toClose;
+ this.nodeConnectionPools = Map.copyOf(nextPools);
+ return currentPools.values();
}
@Override
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
index 121c12335f5..c8af5cea5aa 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.dispatch.searchcluster;
+import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
@@ -51,7 +52,7 @@ public class Group {
/**
* Returns whether this group has sufficient active documents
- * (compared to other groups) that is should receive traffic
+ * (compared to other groups) that should receive traffic
*/
public boolean hasSufficientCoverage() {
return hasSufficientCoverage;
@@ -66,14 +67,16 @@ public class Group {
}
public void aggregateNodeValues() {
- long activeDocs = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(Node::getActiveDocuments).sum();
+ List<Node> workingNodes = new ArrayList<>(nodes);
+ workingNodes.removeIf(node -> node.isWorking() != Boolean.TRUE);
+ long activeDocs = workingNodes.stream().mapToLong(Node::getActiveDocuments).sum();
activeDocuments = activeDocs;
- targetActiveDocuments = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(Node::getTargetActiveDocuments).sum();
+ targetActiveDocuments = workingNodes.stream().mapToLong(Node::getTargetActiveDocuments).sum();
isBlockingWrites = nodes.stream().anyMatch(Node::isBlockingWrites);
- int numWorkingNodes = workingNodes();
+ int numWorkingNodes = workingNodes.size();
if (numWorkingNodes > 0) {
long average = activeDocs / numWorkingNodes;
- long skew = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(node -> Math.abs(node.getActiveDocuments() - average)).sum();
+ long skew = workingNodes.stream().mapToLong(node -> Math.abs(node.getActiveDocuments() - average)).sum();
boolean balanced = skew <= activeDocs * maxContentSkew;
if (balanced != isBalanced) {
if (!isSparse())
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index 9c65cb3d4c0..3c8950f1f7f 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -6,15 +6,18 @@ import com.yahoo.net.HostName;
import com.yahoo.prelude.Pong;
import com.yahoo.search.cluster.ClusterMonitor;
import com.yahoo.search.cluster.NodeManager;
+import com.yahoo.yolean.UncheckedInterruptedException;
import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.Executor;
import java.util.logging.Logger;
-import java.util.stream.Collectors;
+
+import static java.util.stream.Collectors.groupingBy;
/**
* A model of a search cluster we might want to dispatch queries to.
@@ -28,7 +31,7 @@ public class SearchCluster implements NodeManager<Node> {
private final String clusterId;
private final VipStatus vipStatus;
private final PingFactory pingFactory;
- private final SearchGroupsImpl groups;
+ private volatile SearchGroupsImpl groups;
private volatile long nextLogTime = 0;
/**
@@ -45,6 +48,7 @@ public class SearchCluster implements NodeManager<Node> {
VipStatus vipStatus, PingFactory pingFactory) {
this(clusterId, toGroups(nodes, minActivedocsPercentage), vipStatus, pingFactory);
}
+
public SearchCluster(String clusterId, SearchGroupsImpl groups, VipStatus vipStatus, PingFactory pingFactory) {
this.clusterId = clusterId;
this.vipStatus = vipStatus;
@@ -55,13 +59,28 @@ public class SearchCluster implements NodeManager<Node> {
@Override
public String name() { return clusterId; }
- public VipStatus getVipStatus() { return vipStatus; }
+
+ /** Sets the new nodes to monitor to be the new nodes, but keep any existing node instances which equal the new ones. */
+ public ClusterMonitor<Node> updateNodes(Collection<Node> newNodes, double minActivedocsPercentage) {
+ Collection<Node> retainedNodes = groups.nodes();
+ Collection<Node> currentNodes = new HashSet<>(newNodes);
+ retainedNodes.retainAll(currentNodes); // Throw away all old nodes which are not in the new set.
+ currentNodes.removeIf(retainedNodes::contains); // Throw away all new nodes for which we have more information in an old object.
+ Collection<Node> addedNodes = List.copyOf(currentNodes);
+ currentNodes.addAll(retainedNodes); // Keep the old nodes that were replaced in the new set.
+ SearchGroupsImpl groups = toGroups(currentNodes, minActivedocsPercentage);
+ ClusterMonitor<Node> monitor = new ClusterMonitor<>(this, false);
+ for (Node node : groups.nodes()) monitor.add(node, true);
+ monitor.start();
+ try { while (addedNodes.stream().anyMatch(node -> node.isWorking() == null)) { Thread.sleep(1); } }
+ catch (InterruptedException e) { throw new UncheckedInterruptedException(e, true); }
+ pingIterationCompleted(groups);
+ this.groups = groups;
+ return monitor;
+ }
public void addMonitoring(ClusterMonitor<Node> clusterMonitor) {
- for (var group : groups()) {
- for (var node : group.nodes())
- clusterMonitor.add(node, true);
- }
+ for (Node node : groups.nodes()) clusterMonitor.add(node, true);
}
private static Node findLocalCorpusDispatchTarget(String selfHostname, SearchGroups groups) {
@@ -86,14 +105,14 @@ public class SearchCluster implements NodeManager<Node> {
private static SearchGroupsImpl toGroups(Collection<Node> nodes, double minActivedocsPercentage) {
Map<Integer, Group> groups = new HashMap<>();
- for (Map.Entry<Integer, List<Node>> group : nodes.stream().collect(Collectors.groupingBy(Node::group)).entrySet()) {
- Group g = new Group(group.getKey(), group.getValue());
- groups.put(group.getKey(), g);
- }
+ nodes.stream().collect(groupingBy(Node::group)).forEach((groupId, groupNodes) -> {
+ groups.put(groupId, new Group(groupId, groupNodes));
+ });
return new SearchGroupsImpl(Map.copyOf(groups), minActivedocsPercentage);
}
public SearchGroups groupList() { return groups; }
+
public Group group(int id) { return groups.get(id); }
private Collection<Group> groups() { return groups.groups(); }
@@ -107,14 +126,14 @@ public class SearchCluster implements NodeManager<Node> {
* or empty if we should not dispatch directly.
*/
public Optional<Node> localCorpusDispatchTarget() {
- if ( localCorpusDispatchTarget == null) return Optional.empty();
+ if (localCorpusDispatchTarget == null) return Optional.empty();
// Only use direct dispatch if the local group has sufficient coverage
Group localSearchGroup = groups.get(localCorpusDispatchTarget.group());
if ( ! localSearchGroup.hasSufficientCoverage()) return Optional.empty();
// Only use direct dispatch if the local search node is not down
- if ( localCorpusDispatchTarget.isWorking() == Boolean.FALSE) return Optional.empty();
+ if (localCorpusDispatchTarget.isWorking() == Boolean.FALSE) return Optional.empty();
return Optional.of(localCorpusDispatchTarget);
}
@@ -176,7 +195,7 @@ public class SearchCluster implements NodeManager<Node> {
return groups().stream().allMatch(group -> group.nodes().stream().allMatch(node -> node.isWorking() != null));
}
- public long nonWorkingNodeCount() {
+ long nonWorkingNodeCount() {
return groups().stream().flatMap(group -> group.nodes().stream()).filter(node -> node.isWorking() == Boolean.FALSE).count();
}
@@ -194,13 +213,13 @@ public class SearchCluster implements NodeManager<Node> {
/** Used by the cluster monitor to manage node status */
@Override
- public void ping(ClusterMonitor clusterMonitor, Node node, Executor executor) {
+ public void ping(ClusterMonitor<Node> clusterMonitor, Node node, Executor executor) {
Pinger pinger = pingFactory.createPinger(node, clusterMonitor, new PongCallback(node, clusterMonitor));
pinger.ping();
}
- private void pingIterationCompletedSingleGroup() {
- Group group = groups().iterator().next();
+ private void pingIterationCompletedSingleGroup(SearchGroupsImpl groups) {
+ Group group = groups.groups().iterator().next();
group.aggregateNodeValues();
// With just one group sufficient coverage may not be the same as full coverage, as the
// group will always be marked sufficient for use.
@@ -209,10 +228,10 @@ public class SearchCluster implements NodeManager<Node> {
trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments());
}
- private void pingIterationCompletedMultipleGroups() {
- groups().forEach(Group::aggregateNodeValues);
+ private void pingIterationCompletedMultipleGroups(SearchGroupsImpl groups) {
+ groups.groups().forEach(Group::aggregateNodeValues);
long medianDocuments = groups.medianDocumentsPerGroup();
- for (Group group : groups()) {
+ for (Group group : groups.groups()) {
boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), medianDocuments);
updateSufficientCoverage(group, sufficientCoverage);
trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments);
@@ -226,20 +245,20 @@ public class SearchCluster implements NodeManager<Node> {
*/
@Override
public void pingIterationCompleted() {
+ pingIterationCompleted(groups);
+ }
+
+ private void pingIterationCompleted(SearchGroupsImpl groups) {
if (groups.size() == 1) {
- pingIterationCompletedSingleGroup();
+ pingIterationCompletedSingleGroup(groups);
} else {
- pingIterationCompletedMultipleGroups();
+ pingIterationCompletedMultipleGroups(groups);
}
}
-
-
/**
* Calculate whether a subset of nodes in a group has enough coverage
*/
-
-
private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) {
if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about.
boolean changed = group.fullCoverageStatusChanged(fullCoverage);
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java
index b041ba28db9..5727931281a 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java
@@ -1,8 +1,16 @@
package com.yahoo.search.dispatch.searchcluster;
+import com.yahoo.stream.CustomCollectors;
+
import java.util.Collection;
+import java.util.Comparator;
+import java.util.LinkedHashSet;
import java.util.Set;
+import static java.util.Comparator.comparingInt;
+import static java.util.stream.Collectors.toCollection;
+import static java.util.stream.Collectors.toSet;
+
/**
* Simple interface for groups and their nodes in the content cluster
* @author baldersheim
@@ -14,6 +22,11 @@ public interface SearchGroups {
default boolean isEmpty() {
return size() == 0;
}
+ default Set<Node> nodes() {
+ return groups().stream().flatMap(group -> group.nodes().stream())
+ .sorted(comparingInt(Node::key))
+ .collect(toCollection(LinkedHashSet::new));
+ }
int size();
boolean isPartialGroupCoverageSufficient(Collection<Node> nodes);
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java
index 514f0de4fec..3c5dbe9927a 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java
@@ -3,10 +3,8 @@ package com.yahoo.search.dispatch.searchcluster;
import com.google.common.math.Quantiles;
import java.util.Collection;
-import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.stream.Collectors;
public class SearchGroupsImpl implements SearchGroups {
@@ -42,4 +40,5 @@ public class SearchGroupsImpl implements SearchGroups {
double[] activeDocuments = groups().stream().mapToDouble(Group::activeDocuments).toArray();
return (long) Quantiles.median().computeInPlace(activeDocuments);
}
+
}
diff --git a/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj b/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj
index 39ea6435393..a5953964b39 100644
--- a/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj
+++ b/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj
@@ -274,7 +274,8 @@ TermProduction nonphraseTermProduction() :
TermProduction termProduction;
}
{
- ( termProduction = referenceTermProduction() |
+ ( LOOKAHEAD(2)
+ termProduction = referenceTermProduction() |
termProduction = literalTermProduction() )
{
return termProduction;
@@ -314,8 +315,10 @@ ReferenceTermProduction referenceTermProduction() :
{
String reference;
boolean produceAll = false;
+ String comparisonPrefix = "";
}
{
+ ( comparisonPrefix = comparisonPrefix() )?
<LEFTSQUAREBRACKET>
reference = referenceIdentifier()
(<STAR> { produceAll = true; })?
@@ -326,10 +329,17 @@ ReferenceTermProduction referenceTermProduction() :
LiteralTermProduction literalTermProduction() :
{
String literal;
+ String comparisonPrefix = "";
}
{
- literal = identifier()
- { return new LiteralTermProduction(literal); }
+ ( ( comparisonPrefix = comparisonPrefix() )? literal = identifier() )
+ { return new LiteralTermProduction(comparisonPrefix + literal); }
+}
+
+String comparisonPrefix() : {}
+{
+ <SMALLER> { return "<"; } |
+ <LARGER> { return ">"; }
}
TermType termType() :
diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/RangesTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RangesTestCase.java
new file mode 100644
index 00000000000..2cdbfbdb3fb
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RangesTestCase.java
@@ -0,0 +1,17 @@
+package com.yahoo.prelude.semantics.test;
+
+import org.junit.jupiter.api.Test;
+
+public class RangesTestCase extends RuleBaseAbstractTestCase {
+
+ public RangesTestCase() {
+ super("ranges.sr");
+ }
+
+ @Test
+ void testPrice() {
+ assertSemantics("AND shoes price:<5000",
+ "shoes under 5000");
+ }
+
+}
diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/ranges.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/ranges.sr
new file mode 100644
index 00000000000..3b0120fd18a
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/ranges.sr
@@ -0,0 +1,2 @@
+under 5000 -> price:<5000;
+over [...] -> price:>[...];
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java
index 2603f89b546..1278afe3759 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java
@@ -1,27 +1,51 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.dispatch;
+import com.yahoo.compress.CompressionType;
+import com.yahoo.prelude.Pong;
import com.yahoo.prelude.fastsearch.VespaBackEndSearcher;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.cluster.ClusterMonitor;
+import com.yahoo.search.dispatch.Dispatcher.InvokerFactoryFactory;
+import com.yahoo.search.dispatch.rpc.Client.NodeConnection;
+import com.yahoo.search.dispatch.rpc.Client.ResponseReceiver;
+import com.yahoo.search.dispatch.rpc.RpcConnectionPool;
import com.yahoo.search.dispatch.searchcluster.MockSearchCluster;
-import com.yahoo.search.dispatch.searchcluster.SearchGroups;
import com.yahoo.search.dispatch.searchcluster.Node;
import com.yahoo.search.dispatch.searchcluster.PingFactory;
import com.yahoo.search.dispatch.searchcluster.Pinger;
import com.yahoo.search.dispatch.searchcluster.PongHandler;
import com.yahoo.search.dispatch.searchcluster.SearchCluster;
+import com.yahoo.search.dispatch.searchcluster.SearchGroups;
+import com.yahoo.search.searchchain.Execution;
import com.yahoo.vespa.config.search.DispatchConfig;
+import com.yahoo.vespa.config.search.DispatchNodesConfig;
import org.junit.jupiter.api.Test;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.Phaser;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
import static com.yahoo.search.dispatch.searchcluster.MockSearchCluster.createDispatchConfig;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
@@ -149,6 +173,172 @@ public class DispatcherTest {
dispatcher.deconstruct();
}
+ @Test
+ void testRpcResourceShutdownOnReconfiguration() throws InterruptedException, ExecutionException, IOException {
+ // Ping factory lets us tick each ping, so we may delay shutdown, due to monitor thread RPC usage.
+ Map<Integer, Phaser> pingPhasers = new ConcurrentHashMap<>();
+ pingPhasers.put(0, new Phaser(2));
+ pingPhasers.put(1, new Phaser(2));
+ pingPhasers.put(2, new Phaser(2));
+
+ PingFactory pingFactory = (node, monitor, pongHandler) -> () -> {
+ pingPhasers.get(node.key()).arriveAndAwaitAdvance();
+ pongHandler.handle(new Pong(2, 2));
+ pingPhasers.get(node.key()).arriveAndAwaitAdvance();
+ };
+
+ // Search cluster uses the ping factory, and zero nodes initially, later configured with two nodes.
+ SearchCluster cluster = new MockSearchCluster("cid", 0, 1, pingFactory);
+
+ // Dummy RPC layer where we manually tick responses for each node.
+ // When a response is let go, we verify the RPC resource is not yet closed.
+ // This is signalled by terminating its phaser, which is done by the dispatcher in delayed cleanup.
+ // We verify in the end that all connections have been shut down, prior to shutting down the RPC pool proper.
+ Map<Integer, Boolean > rpcResources = new HashMap<>();
+ AtomicLong cleanupThreadId = new AtomicLong();
+ AtomicInteger nodeIdOfSearcher0 = new AtomicInteger(-1);
+ RpcConnectionPool rpcPool = new RpcConnectionPool() {
+ // Returns a connection that lets us advance the searcher when we want to, as well as tracking which threads do what.
+ @Override public NodeConnection getConnection(int nodeId) {
+ nodeIdOfSearcher0.set(nodeId);
+ return new NodeConnection() {
+ @Override public void request(String rpcMethod, CompressionType compression, int uncompressedLength, byte[] compressedPayload, ResponseReceiver responseReceiver, double timeoutSeconds) {
+ assertTrue(rpcResources.get(nodeId));
+ }
+ @Override public void close() {
+ assertFalse(rpcResources.remove(nodeId));
+ }
+ };
+ }
+ // Verifies cleanup is done by the expected thread, by ID, and cleans up the "RPC connection" (phaser).
+ @Override public Collection<? extends AutoCloseable> updateNodes(DispatchNodesConfig config) {
+ for (DispatchNodesConfig.Node node : config.node())
+ rpcResources.putIfAbsent(node.key(), true);
+ return rpcResources.keySet().stream()
+ .filter(key -> config.node().stream().noneMatch(node -> node.key() == key))
+ .map(key -> (AutoCloseable) () -> {
+ assertTrue(rpcResources.put(key, false));
+ cleanupThreadId.set(Thread.currentThread().getId());
+ getConnection(key).close();
+ })
+ .toList();
+ };
+ // In the end, we have reconfigured down to 0 nodes, and no resources should be left running after cleanup.
+ @Override public void close() {
+ assertEquals(Map.of(), rpcResources);
+ }
+ };
+
+ // This factory just forwards search to the dummy RPC layer above, nothing more.
+ InvokerFactoryFactory invokerFactories = (rpcConnectionPool, searchGroups, dispatchConfig) -> new InvokerFactory(searchGroups, dispatchConfig) {
+ @Override protected Optional<SearchInvoker> createNodeSearchInvoker(VespaBackEndSearcher searcher, Query query, int maxHits, Node node) {
+ return Optional.of(new SearchInvoker(Optional.of(node)) {
+ @Override protected Object sendSearchRequest(Query query, Object context) {
+ rpcPool.getConnection(node.key()).request(null, null, 0, null, null, 0);
+ return null;
+ };
+ @Override protected InvokerResult getSearchResult(Execution execution) {
+ return new InvokerResult(new Result(new Query()));
+ }
+ @Override protected void release() { }
+ });
+ };
+ @Override public FillInvoker createFillInvoker(VespaBackEndSearcher searcher, Result result) {
+ return new FillInvoker() {
+ @Override protected void getFillResults(Result result, String summaryClass) { fail(); }
+ @Override protected void sendFillRequest(Result result, String summaryClass) { fail(); }
+ @Override protected void release() { fail(); }
+ };
+ }
+ };
+
+ Dispatcher dispatcher = new Dispatcher(dispatchConfig, rpcPool, cluster, invokerFactories);
+ ExecutorService executor = Executors.newFixedThreadPool(1);
+
+ // Set two groups with a single node each. The first cluster-monitor has nothing to do, and is shut down immediately.
+ // There are also no invokers, so the whole reconfiguration completes once the new cluster monitor has seen all nodes.
+ Future<?> reconfiguration = executor.submit(() -> {
+ dispatcher.updateWithNewConfig(new DispatchNodesConfig.Builder()
+ .node(new DispatchNodesConfig.Node.Builder().key(0).group(0).port(123).host("host0"))
+ .node(new DispatchNodesConfig.Node.Builder().key(1).group(1).port(123).host("host1"))
+ .build());
+ });
+
+ // Let pings return, to allow the search cluster to reconfigure.
+ pingPhasers.get(0).arriveAndAwaitAdvance();
+ pingPhasers.get(0).arriveAndAwaitAdvance();
+ pingPhasers.get(1).arriveAndAwaitAdvance();
+ pingPhasers.get(1).arriveAndAwaitAdvance();
+ // We need to wait for the cluster to have at least one group, lest dispatch will fail below.
+ reconfiguration.get();
+ assertNotEquals(cleanupThreadId.get(), Thread.currentThread().getId());
+ assertEquals(1, cluster.group(0).workingNodes());
+ assertEquals(1, cluster.group(1).workingNodes());
+
+ Node node0 = cluster.group(0).nodes().get(0); // Node0 will be replaced.
+ Node node1 = cluster.group(1).nodes().get(0); // Node1 will be retained.
+
+ // Start some searches, one against each group, since we have a round-robin policy.
+ SearchInvoker search0 = dispatcher.getSearchInvoker(new Query(), null);
+ search0.search(new Query(), null);
+ // Unknown whether the first or second search hits node0, so we must track that.
+ int offset = nodeIdOfSearcher0.get();
+ SearchInvoker search1 = dispatcher.getSearchInvoker(new Query(), null);
+ search1.search(new Query(), null);
+
+ // Wait for the current cluster monitor to be mid-ping-round.
+ pingPhasers.get(0).arriveAndAwaitAdvance();
+
+ // Then reconfigure the dispatcher with new nodes, replacing node0 with node2.
+ reconfiguration = executor.submit(() -> {
+ dispatcher.updateWithNewConfig(new DispatchNodesConfig.Builder()
+ .node(new DispatchNodesConfig.Node.Builder().key(2).group(0).port(123).host("host2"))
+ .node(new DispatchNodesConfig.Node.Builder().key(1).group(1).port(123).host("host1"))
+ .build());
+ });
+ // Reconfiguration starts, but groups are only updated once the search cluster has knowledge about all of them.
+ pingPhasers.get(1).arriveAndAwaitAdvance();
+ pingPhasers.get(1).arriveAndAwaitAdvance();
+ pingPhasers.get(2).arriveAndAwaitAdvance();
+ // Cluster has not yet updated its group reference.
+ assertEquals(1, cluster.group(0).workingNodes()); // Node0 is still working.
+ assertSame(node0, cluster.group(0).nodes().get(0));
+ pingPhasers.get(2).arriveAndAwaitAdvance();
+
+ // Old cluster monitor is waiting for that ping to complete before it can shut down, and let reconfiguration complete.
+ pingPhasers.get(0).arriveAndAwaitAdvance();
+ reconfiguration.get();
+ Node node2 = cluster.group(0).nodes().get(0);
+ assertNotSame(node0, node2);
+ assertSame(node1, cluster.group(1).nodes().get(0));
+
+ // Next search should hit group0 again, this time on node2.
+ SearchInvoker search2 = dispatcher.getSearchInvoker(new Query(), null);
+ search2.search(new Query(), null);
+
+ // Searches against nodes 1 and 2 complete.
+ (offset == 0 ? search0 : search1).close();
+ search2.close();
+
+ // We're still waiting for search against node0 to complete, before we can shut down its RPC connection.
+ assertEquals(Set.of(0, 1, 2), rpcResources.keySet());
+ (offset == 0 ? search1 : search0).close();
+ // Thread for search 0 should have closed the RPC pool now.
+ assertEquals(Set.of(1, 2), rpcResources.keySet());
+ assertEquals(cleanupThreadId.get(), Thread.currentThread().getId());
+
+ // Finally, reconfigure down to 0 nodes.
+ reconfiguration = executor.submit(() -> {
+ cleanupThreadId.set(Thread.currentThread().getId());
+ dispatcher.updateWithNewConfig(new DispatchNodesConfig.Builder().build());
+ });
+ pingPhasers.get(1).forceTermination();
+ pingPhasers.get(2).forceTermination();
+ reconfiguration.get();
+ assertNotEquals(cleanupThreadId.get(), Thread.currentThread().getId());
+ dispatcher.deconstruct();
+ }
+
interface FactoryStep {
boolean returnInvoker(List<Node> nodes, boolean acceptIncompleteCoverage);
}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java
index 5fb5b465c69..cd0791a3881 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/MockSearchCluster.java
@@ -15,7 +15,11 @@ import java.util.Map;
public class MockSearchCluster extends SearchCluster {
public MockSearchCluster(String clusterId, int groups, int nodesPerGroup) {
- super(clusterId, buildGroupListForTest(groups, nodesPerGroup, 88.0), null, null);
+ this(clusterId, groups, nodesPerGroup, null);
+ }
+
+ public MockSearchCluster(String clusterId, int groups, int nodesPerGroup, PingFactory pingFactory) {
+ super(clusterId, buildGroupListForTest(groups, nodesPerGroup, 88.0), null, pingFactory);
}
@Override
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
index 51256ec496e..bfe1aed1084 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -13,11 +13,18 @@ import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import static java.util.function.Function.identity;
+import static java.util.stream.Collectors.toMap;
import static org.junit.jupiter.api.Assertions.*;
/**
@@ -31,7 +38,7 @@ public class SearchClusterTest {
final int nodesPerGroup;
final VipStatus vipStatus;
final SearchCluster searchCluster;
- final ClusterMonitor clusterMonitor;
+ final ClusterMonitor<Node> clusterMonitor;
final List<AtomicInteger> numDocsPerNode;
List<AtomicInteger> pingCounts;
@@ -57,7 +64,7 @@ public class SearchClusterTest {
}
searchCluster = new SearchCluster(clusterId, 100.0, nodes,
vipStatus, new Factory(nodesPerGroup, numDocsPerNode, pingCounts));
- clusterMonitor = new ClusterMonitor(searchCluster, false);
+ clusterMonitor = new ClusterMonitor<>(searchCluster, false);
searchCluster.addMonitoring(clusterMonitor);
}
@@ -376,4 +383,37 @@ public class SearchClusterTest {
assertTrue(group.isBalanced());
}
+ @Test
+ void requireThatPreciselyTheRetainedNodesAreKeptWhenNodesAreUpdated() {
+ try (State state = new State("query", 2, IntStream.range(0, 6).mapToObj(i -> "node-" + i).toList())) {
+ List<Node> referenceNodes = List.of(new Node(0, "node-0", 0),
+ new Node(1, "node-1", 0),
+ new Node(0, "node-2", 1),
+ new Node(1, "node-3", 1),
+ new Node(0, "node-4", 2),
+ new Node(1, "node-5", 2));
+ SearchGroups oldGroups = state.searchCluster.groupList();
+ assertEquals(Set.copyOf(referenceNodes), oldGroups.nodes());
+
+ List<Node> updatedNodes = List.of(new Node(0, "node-1", 0), // Swap node-0 and node-1
+ new Node(1, "node-0", 0), // Swap node-1 and node-0
+ new Node(0, "node-4", 1), // Swap node-2 and node-4
+ new Node(1, "node-3", 1),
+ new Node(0, "node-2", 2), // Swap node-4 and node-2
+ new Node(1, "node-6", 2)); // Replace node-6
+ state.searchCluster.updateNodes(updatedNodes, 100.0);
+ SearchGroups newGroups = state.searchCluster.groupList();
+ assertEquals(Set.copyOf(updatedNodes), newGroups.nodes());
+
+ Map<Node, Node> oldNodesByIdentity = newGroups.nodes().stream().collect(toMap(identity(), identity()));
+ Map<Node, Node> newNodesByIdentity = newGroups.nodes().stream().collect(toMap(identity(), identity()));
+ assertSame(updatedNodes.get(0), newNodesByIdentity.get(updatedNodes.get(0)));
+ assertSame(updatedNodes.get(1), newNodesByIdentity.get(updatedNodes.get(1)));
+ assertSame(updatedNodes.get(2), newNodesByIdentity.get(updatedNodes.get(2)));
+ assertSame(oldNodesByIdentity.get(referenceNodes.get(3)), newNodesByIdentity.get(updatedNodes.get(3)));
+ assertSame(updatedNodes.get(4), newNodesByIdentity.get(updatedNodes.get(4)));
+ assertSame(updatedNodes.get(5), newNodesByIdentity.get(updatedNodes.get(5)));
+ }
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java
index 5df183d9abb..fbdab67869a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/DataplaneTokenSerializer.java
@@ -59,7 +59,9 @@ public class DataplaneTokenSerializer {
Instant creationTime = SlimeUtils.instant(versionCursor.field(creationTimeField));
String author = versionCursor.field(authorField).asString();
String expirationStr = versionCursor.field(expirationField).asString();
- Optional<Instant> expiration = expirationStr.equals("<none>") ? Optional.empty() : Optional.of(Instant.parse(expirationStr));
+ Optional<Instant> expiration = expirationStr.equals("<none>") ? Optional.empty()
+ : (expirationStr.isBlank()
+ ? Optional.of(Instant.EPOCH) : Optional.of(Instant.parse(expirationStr)));
return new DataplaneTokenVersions.Version(fingerPrint, checkAccessHash, creationTime, expiration, author);
})
.toList();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
index 99ad75d0ec4..4824ccc576a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
@@ -979,6 +979,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
fingerprintObject.setString("fingerprint", tokenVersion.fingerPrint().value());
fingerprintObject.setString("created", tokenVersion.creationTime().toString());
fingerprintObject.setString("author", tokenVersion.author());
+ fingerprintObject.setString("expiration", tokenVersion.expiration().map(Instant::toString).orElse("<none>"));
}
}
return new SlimeJsonResponse(slime);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java
index b6ac65467ac..3cd9d586350 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiCloudTest.java
@@ -8,7 +8,6 @@ import com.yahoo.config.provision.ApplicationName;
import com.yahoo.config.provision.InstanceName;
import com.yahoo.config.provision.TenantName;
import com.yahoo.restapi.RestApiException;
-import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.hosted.controller.ControllerTester;
@@ -42,7 +41,6 @@ import static com.yahoo.application.container.handler.Request.Method.DELETE;
import static com.yahoo.application.container.handler.Request.Method.GET;
import static com.yahoo.application.container.handler.Request.Method.POST;
import static com.yahoo.application.container.handler.Request.Method.PUT;
-import static com.yahoo.vespa.hosted.controller.restapi.application.ApplicationApiTest.createApplicationSubmissionData;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -476,7 +474,7 @@ public class ApplicationApiCloudTest extends ControllerContainerCloudTest {
(response) -> Assertions.assertThat(new String(response.getBody(), UTF_8)).matches(Pattern.compile(regexGenerateToken)),
200);
- String regexListTokens = "\\{\"tokens\":\\[\\{\"id\":\"myTokenId\",\"versions\":\\[\\{\"fingerprint\":\".*\",\"created\":\".*\",\"author\":\"user@test\"}]}]}";
+ String regexListTokens = "\\{\"tokens\":\\[\\{\"id\":\"myTokenId\",\"versions\":\\[\\{\"fingerprint\":\".*\",\"created\":\".*\",\"author\":\"user@test\",\"expiration\":\".*\"}]}]}";
tester.assertResponse(request("/application/v4/tenant/scoober/token", GET)
.roles(Role.developer(tenantName)),
(response) -> Assertions.assertThat(new String(response.getBody(), UTF_8)).matches(Pattern.compile(regexListTokens)),
diff --git a/documentapi/abi-spec.json b/documentapi/abi-spec.json
index 7662258037c..0252da8a4d1 100644
--- a/documentapi/abi-spec.json
+++ b/documentapi/abi-spec.json
@@ -1888,7 +1888,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1914,7 +1915,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -1936,7 +1938,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -2095,7 +2098,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -2136,7 +2140,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h
index 5c0d9e3317d..49f88edb2f9 100644
--- a/eval/src/vespa/eval/eval/value_type.h
+++ b/eval/src/vespa/eval/eval/value_type.h
@@ -25,13 +25,13 @@ public:
: name(name_in), size(npos) {}
Dimension(const vespalib::string &name_in, size_type size_in) noexcept
: name(name_in), size(size_in) {}
- bool operator==(const Dimension &rhs) const noexcept {
+ bool operator==(const Dimension &rhs) const {
return ((name == rhs.name) && (size == rhs.size));
}
- bool operator!=(const Dimension &rhs) const noexcept { return !(*this == rhs); }
- bool is_mapped() const noexcept { return (size == npos); }
- bool is_indexed() const noexcept { return (size != npos); }
- bool is_trivial() const noexcept { return (size == 1); }
+ bool operator!=(const Dimension &rhs) const { return !(*this == rhs); }
+ bool is_mapped() const { return (size == npos); }
+ bool is_indexed() const { return (size != npos); }
+ bool is_trivial() const { return (size == 1); }
};
private:
@@ -39,10 +39,10 @@ private:
CellType _cell_type;
std::vector<Dimension> _dimensions;
- ValueType() noexcept
+ ValueType()
: _error(true), _cell_type(CellType::DOUBLE), _dimensions() {}
- ValueType(CellType cell_type_in, std::vector<Dimension> &&dimensions_in) noexcept
+ ValueType(CellType cell_type_in, std::vector<Dimension> &&dimensions_in)
: _error(false), _cell_type(cell_type_in), _dimensions(std::move(dimensions_in)) {}
static ValueType error_if(bool has_error, ValueType else_type);
@@ -57,7 +57,7 @@ public:
CellMeta cell_meta() const { return {_cell_type, is_double()}; }
bool is_error() const { return _error; }
bool is_double() const;
- bool has_dimensions() const noexcept { return !_dimensions.empty(); }
+ bool has_dimensions() const { return !_dimensions.empty(); }
bool is_sparse() const;
bool is_dense() const;
bool is_mixed() const;
@@ -70,12 +70,12 @@ public:
std::vector<Dimension> mapped_dimensions() const;
size_t dimension_index(const vespalib::string &name) const;
std::vector<vespalib::string> dimension_names() const;
- bool operator==(const ValueType &rhs) const noexcept {
+ bool operator==(const ValueType &rhs) const {
return ((_error == rhs._error) &&
(_cell_type == rhs._cell_type) &&
(_dimensions == rhs._dimensions));
}
- bool operator!=(const ValueType &rhs) const noexcept { return !(*this == rhs); }
+ bool operator!=(const ValueType &rhs) const { return !(*this == rhs); }
ValueType map() const;
ValueType reduce(const std::vector<vespalib::string> &dimensions_in) const;
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index c3788a20ddc..0dd0c885eee 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -49,8 +49,8 @@ public class Flags {
private static volatile TreeMap<FlagId, FlagDefinition> flags = new TreeMap<>();
public static final UnboundBooleanFlag IPV6_IN_GCP = defineFeatureFlag(
- "ipv6-in-gcp", false,
- List.of("hakonhall"), "2023-05-15", "2023-07-15",
+ "ipv6-in-gcp", true,
+ List.of("hakonhall"), "2023-05-15", "2023-08-01",
"Provision GCP hosts with external IPv6 addresses",
"Takes effect on the next host provisioning");
@@ -429,6 +429,13 @@ public class Flags {
"Takes effect at redeployment",
ZONE_ID, APPLICATION_ID);
+ public static final UnboundBooleanFlag USE_RECONFIGURABLE_DISPATCHER = defineFeatureFlag(
+ "use-reconfigurable-dispatcher", false,
+ List.of("jonmv"), "2023-07-14", "2023-10-01",
+ "Whether to set up a ReconfigurableDispatcher with config self-sub for backend nodes",
+ "Takes effect at redeployment",
+ ZONE_ID, APPLICATION_ID);
+
/** WARNING: public for testing: All flags should be defined in {@link Flags}. */
public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners,
String createdAt, String expiresAt, String description,
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
index f99c0e32a36..348fd97e869 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
@@ -362,6 +362,12 @@ public class PermanentFlags {
"Takes effect on next redeployment",
APPLICATION_ID);
+ public static final UnboundBooleanFlag AUTOSCALING = defineFeatureFlag(
+ "autoscaling", true,
+ "Whether to enable autoscaling",
+ "Takes effect immediately",
+ APPLICATION_ID);
+
private PermanentFlags() {}
private static UnboundBooleanFlag defineFeatureFlag(
diff --git a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java
index 554c1d924a2..2dc80fc9d2b 100644
--- a/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java
+++ b/jdisc-security-filters/src/main/java/com/yahoo/jdisc/http/filter/security/cloud/CloudDataPlaneFilter.java
@@ -119,12 +119,7 @@ public class CloudDataPlaneFilter extends JsonSecurityRequestFilterBase {
var tokenVersion = TokenVersion.of(
token.id(), token.fingerprints().get(version), token.checkAccessHashes().get(version),
token.expirations().get(version));
- var expiration = tokenVersion.expiration().orElse(null);
- if (expiration != null && now.isAfter(expiration))
- log.fine(() -> "Ignoring expired version %s of token '%s' (expiration=%s)".formatted(
- tokenVersion.fingerprint(), tokenVersion.id(), expiration));
- else
- tokens.put(tokenVersion.accessHash(), tokenVersion);
+ tokens.put(tokenVersion.accessHash(), tokenVersion);
}
}
// Add reverse proxy certificate as required certificate for client definition
diff --git a/linguistics-components/abi-spec.json b/linguistics-components/abi-spec.json
index 4b713afba83..a2b5a98344f 100644
--- a/linguistics-components/abi-spec.json
+++ b/linguistics-components/abi-spec.json
@@ -22,7 +22,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -50,7 +51,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -196,7 +198,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -223,7 +226,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
diff --git a/messagebus/abi-spec.json b/messagebus/abi-spec.json
index acedccf80ba..15a24f82f75 100644
--- a/messagebus/abi-spec.json
+++ b/messagebus/abi-spec.json
@@ -410,7 +410,8 @@
"com.yahoo.config.ConfigInstance$Builder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -451,7 +452,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -476,7 +478,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
@@ -515,7 +518,8 @@
"com.yahoo.config.ConfigBuilder"
],
"attributes" : [
- "public"
+ "public",
+ "final"
],
"methods" : [
"public void <init>()",
diff --git a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java
index 1c61b65f77b..9936b4612c5 100644
--- a/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java
+++ b/metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java
@@ -51,6 +51,7 @@ public enum ConfigServerMetrics implements VespaMetrics {
NODES_NON_ACTIVE("nodes.nonActive", Unit.NODE, "The number of non-active nodes in a cluster"),
NODES_NON_ACTIVE_FRACTION("nodes.nonActiveFraction", Unit.NODE, "The fraction of non-active nodes vs total nodes in a cluster"),
NODES_EXCLUSIVE_SWITCH_FRACTION("nodes.exclusiveSwitchFraction", Unit.FRACTION, "The fraction of nodes in a cluster on exclusive network switches"),
+ NODES_EMPTY_EXCLUSIVE("nodes.emptyExclusive", Unit.NODE, "The number of exclusive hosts that do not have any nodes allocated to them"),
CLUSTER_COST("cluster.cost", Unit.DOLLAR_PER_HOUR, "The cost of the nodes allocated to a certain cluster, in $/hr"),
CLUSTER_LOAD_IDEAL_CPU("cluster.load.ideal.cpu", Unit.FRACTION, "The ideal cpu load of a certain cluster"),
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index 2287b768dee..e586e6277d5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -92,6 +92,7 @@ public class AllocationOptimizer {
.multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts
.divide(clusterModel.redundancyAdjustment()) // correct for double redundancy adjustment
.scaled(current.realResources().nodeResources());
+
// Combine the scaled resource values computed here
// with the currently configured non-scaled values, given in the limits, if any
var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified()
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index c75a5ca0b26..289025f9d21 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -168,7 +168,6 @@ public class ClusterModel {
}
public static Duration minScalingDuration(ClusterSpec clusterSpec) {
- if (clusterSpec.isStateful()) return Duration.ofHours(6);
return Duration.ofMinutes(5);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 856d6e07156..92f86325cf7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -7,6 +7,9 @@ import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Deployer;
import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.flags.BooleanFlag;
+import com.yahoo.vespa.flags.FetchVector;
+import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -34,6 +37,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
private final Autoscaler autoscaler;
private final Deployer deployer;
private final Metric metric;
+ private final BooleanFlag enabledFlag;
public AutoscalingMaintainer(NodeRepository nodeRepository,
Deployer deployer,
@@ -43,6 +47,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
this.autoscaler = new Autoscaler(nodeRepository);
this.deployer = deployer;
this.metric = metric;
+ this.enabledFlag = PermanentFlags.AUTOSCALING.bindTo(nodeRepository.flagSource());
}
@Override
@@ -53,6 +58,9 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
int attempts = 0;
int failures = 0;
for (var applicationNodes : activeNodesByApplication().entrySet()) {
+ boolean enabled = enabledFlag.with(FetchVector.Dimension.APPLICATION_ID,
+ applicationNodes.getKey().serializedForm()).value();
+ if (!enabled) continue;
for (var clusterNodes : nodesByCluster(applicationNodes.getValue()).entrySet()) {
attempts++;
if ( ! autoscale(applicationNodes.getKey(), clusterNodes.getKey()))
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
index 8213286639c..8a9a29f58c6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java
@@ -30,6 +30,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing
import com.yahoo.vespa.hosted.provision.provisioning.NodeCandidate;
import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer;
import com.yahoo.vespa.hosted.provision.provisioning.NodeSpec;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningThrottler;
import java.time.Duration;
import java.time.Instant;
@@ -57,6 +58,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
private final HostProvisioner hostProvisioner;
private final ListFlag<ClusterCapacity> preprovisionCapacityFlag;
+ private final ProvisioningThrottler throttler;
HostCapacityMaintainer(NodeRepository nodeRepository,
Duration interval,
@@ -66,6 +68,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
super(nodeRepository, interval, metric);
this.hostProvisioner = hostProvisioner;
this.preprovisionCapacityFlag = PermanentFlags.PREPROVISION_CAPACITY.bindTo(flagSource);
+ this.throttler = new ProvisioningThrottler(nodeRepository, metric);
}
@Override
@@ -203,19 +206,23 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer {
var clusterType = Optional.ofNullable(clusterCapacityDeficit.clusterType());
nodesPlusProvisioned.addAll(provisionHosts(clusterCapacityDeficit.count(),
toNodeResources(clusterCapacityDeficit),
- clusterType.map(ClusterSpec.Type::from)));
+ clusterType.map(ClusterSpec.Type::from),
+ nodeList));
}
}
- private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType) {
+ private List<Node> provisionHosts(int count, NodeResources nodeResources, Optional<ClusterSpec.Type> clusterType, NodeList allNodes) {
try {
+ if (throttler.throttle(allNodes, Agent.HostCapacityMaintainer)) {
+ throw new NodeAllocationException("Host provisioning is being throttled", true);
+ }
Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion);
List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count);
- List<Node> hosts = new ArrayList<>();
HostProvisionRequest request = new HostProvisionRequest(provisionIndices, NodeType.host, nodeResources,
ApplicationId.defaultId(), osVersion,
HostSharing.shared, clusterType, Optional.empty(),
nodeRepository().zone().cloud().account(), false);
+ List<Node> hosts = new ArrayList<>();
hostProvisioner.provisionHosts(request,
provisionedHosts -> {
hosts.addAll(provisionedHosts.stream().map(host -> host.generateHost(Duration.ZERO)).toList());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 3b846351b36..15913fec5ed 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -21,11 +21,13 @@ import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.ClusterId;
+import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.hosted.provision.persistence.CacheStats;
import com.yahoo.vespa.service.monitor.ServiceModel;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
import java.time.Duration;
+import java.time.Instant;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
@@ -64,7 +66,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
@Override
public double maintain() {
// Sort by hostname to get deterministic metric reporting order (and hopefully avoid changes
- // to metric reporting time so we get double reporting or no reporting within a minute)
+ // to metric reporting time, so we get double reporting or no reporting within a minute)
NodeList nodes = nodeRepository().nodes().list().sortedBy(Comparator.comparing(Node::hostname));
ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot();
@@ -79,6 +81,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
updateRepairTicketMetrics(nodes);
updateAllocationMetrics(nodes);
updateClusterMetrics(nodes);
+ updateEmptyExclusiveHosts(nodes);
return 1.0;
}
@@ -386,6 +389,19 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
.forEach((status, number) -> metric.set(ConfigServerMetrics.HOSTED_VESPA_BREAKFIXED_HOSTS.baseName(), number, getContext(Map.of("status", status))));
}
+ private void updateEmptyExclusiveHosts(NodeList nodes) {
+ Instant now = nodeRepository().clock().instant();
+ Duration minActivePeriod = Duration.ofMinutes(10);
+ int emptyHosts = nodes.parents().state(State.active)
+ .matching(node -> (node.type() != NodeType.host && node.type().isHost()) ||
+ node.exclusiveToApplicationId().isPresent())
+ .matching(host -> host.history().hasEventBefore(History.Event.Type.activated,
+ now.minus(minActivePeriod)))
+ .matching(host -> nodes.childrenOf(host).state(State.active).isEmpty())
+ .size();
+ metric.set(ConfigServerMetrics.NODES_EMPTY_EXCLUSIVE.baseName(), emptyHosts, null);
+ }
+
static Map<String, String> dimensions(ApplicationId application, ClusterSpec.Id cluster) {
Map<String, String> dimensions = new HashMap<>(dimensions(application));
dimensions.put("clusterid", cluster.value());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index a16290361fb..585a7f341b5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -195,39 +195,48 @@ public class NodeFailer extends NodeRepositoryMaintainer {
/**
* Called when a node should be moved to the failed state: Do that if it seems safe,
* which is when the node repo has available capacity to replace the node (and all its tenant nodes if host).
- * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken.
+ * Otherwise, not replacing the node ensures (by Orchestrator check) that no further action will be taken.
*/
private void failActive(FailingNode failing) {
Optional<Deployment> deployment =
deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5));
if (deployment.isEmpty()) return;
+ boolean redeploy = false;
// If the active node that we are trying to fail is of type host, we need to successfully fail all
// the children nodes running on it before we fail the host. Failing a child node in a dynamically
// provisioned zone may require provisioning new hosts that require the host application lock to be held,
// so we must release ours before failing the children.
- List<FailingNode> activeChildrenToFail = new ArrayList<>();
- boolean redeploy = false;
- try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // TODO: recursive lock for right order, only for hosts though
- // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail
- if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner)))
- return;
- if (lock.node().state() == Node.State.failed)
- return;
- if (!Objects.equals(failing.node().state(), lock.node().state()))
- return;
- failing = new FailingNode(lock.node(), failing.reason);
-
- String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason();
- for (Node failingTenantNode : nodeRepository().nodes().list().childrenOf(failing.node())) {
- if (failingTenantNode.state() == Node.State.active) {
- activeChildrenToFail.add(new FailingNode(failingTenantNode, reasonForChildFailure));
- } else if (failingTenantNode.state() != Node.State.failed) {
- nodeRepository().nodes().fail(failingTenantNode.hostname(), Agent.NodeFailer, reasonForChildFailure);
+ if (failing.node.type().isHost()) {
+ List<FailingNode> activeChildrenToFail = new ArrayList<>();
+ try (var lock = nodeRepository().nodes().lockAndGetRecursively(failing.node.hostname(), Optional.empty())) {
+ failing = shouldFail(lock.parent().node(), failing);
+ if (failing == null) return;
+
+ String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason();
+ for (var failingTenantNode : lock.children()) {
+ if (failingTenantNode.node().state() == Node.State.active) {
+ activeChildrenToFail.add(new FailingNode(failingTenantNode.node(), reasonForChildFailure));
+ } else if (failingTenantNode.node().state() != Node.State.failed) {
+ nodeRepository().nodes().fail(failingTenantNode.node().hostname(), Agent.NodeFailer, reasonForChildFailure);
+ }
+ }
+
+ if (activeChildrenToFail.isEmpty()) {
+ log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason);
+ markWantToFail(failing.node(), true, lock.parent());
+ redeploy = true;
}
}
+ // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned,
+ // so failActive() may take a long time to complete, but the remaining children should be fast.
+ activeChildrenToFail.forEach(this::failActive);
+ }
+ else {
+ try (var lock = nodeRepository().nodes().lockAndGetRequired(failing.node)) {
+ failing = shouldFail(lock.node(), failing);
+ if (failing == null) return;
- if (activeChildrenToFail.isEmpty()) {
log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason);
markWantToFail(failing.node(), true, lock);
redeploy = true;
@@ -237,13 +246,19 @@ public class NodeFailer extends NodeRepositoryMaintainer {
// Redeploy to replace failing node
if (redeploy) {
redeploy(deployment.get(), failing);
- return;
}
+ }
- // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned,
- // so failActive() may take a long time to complete, but the remaining children should be fast.
- activeChildrenToFail.forEach(this::failActive);
-
+ // Returns an updated FailingNode if we should still fail the node, otherwise null
+ private static FailingNode shouldFail(Node fresh, FailingNode stale) {
+ // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail
+ if (!Objects.equals(stale.node.allocation().map(Allocation::owner), fresh.allocation().map(Allocation::owner)))
+ return null;
+ if (fresh.state() == Node.State.failed)
+ return null;
+ if (!Objects.equals(stale.node.state(), fresh.state()))
+ return null;
+ return new FailingNode(fresh, stale.reason);
}
private void redeploy(Deployment deployment, FailingNode failing) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index bf046c09899..1ae9b00d794 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -72,7 +72,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
}
boolean redeploy = false;
List<String> nodesToDeactivate = new ArrayList<>();
- try (var lock = nodeRepository().applications().lock(application)) { // TODO: take recusrive lock for right order
+ try (var lock = nodeRepository().applications().lock(application)) {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
Map<Removal, NodeList> nodesByRemovalReason = activeNodes.owner(application)
.retired()
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java
index 92faacbca23..c388273b1a6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java
@@ -258,12 +258,10 @@ public class CuratorDb {
Pair<Integer, Node> cached = cachedNodes.getIfPresent(path);
if (cached != null && cached.getFirst().equals(stat)) return cached.getSecond();
cachedNodes.invalidate(path);
- try {
- return cachedNodes.get(path, () -> new Pair<>(stat, read(path, nodeSerializer::fromJson).get())).getSecond();
- }
- catch (ExecutionException e) {
- throw new UncheckedExecutionException(e.getCause());
- }
+ Optional<Node> node = session.getData(path).filter(data -> data.length > 0).map(nodeSerializer::fromJson);
+ if (node.isEmpty()) return null;
+ cachedNodes.put(path, new Pair<>(stat, node.get()));
+ return node.get();
});
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
index 43b8cd08989..bcc63a6704a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
@@ -16,6 +16,7 @@ import com.yahoo.config.provision.ProvisionLock;
import com.yahoo.config.provision.ProvisionLogger;
import com.yahoo.config.provision.Provisioner;
import com.yahoo.config.provision.Zone;
+import com.yahoo.jdisc.Metric;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
@@ -61,7 +62,8 @@ public class NodeRepositoryProvisioner implements Provisioner {
@Inject
public NodeRepositoryProvisioner(NodeRepository nodeRepository,
Zone zone,
- ProvisionServiceProvider provisionServiceProvider) {
+ ProvisionServiceProvider provisionServiceProvider,
+ Metric metric) {
this.nodeRepository = nodeRepository;
this.allocationOptimizer = new AllocationOptimizer(nodeRepository);
this.capacityPolicies = new CapacityPolicies(nodeRepository);
@@ -71,7 +73,8 @@ public class NodeRepositoryProvisioner implements Provisioner {
this.nodeResourceLimits = new NodeResourceLimits(nodeRepository);
this.preparer = new Preparer(nodeRepository,
provisionServiceProvider.getHostProvisioner(),
- loadBalancerProvisioner);
+ loadBalancerProvisioner,
+ metric);
this.activator = new Activator(nodeRepository, loadBalancerProvisioner);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
index 8975dda8e60..79b1bccbbde 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
@@ -7,6 +7,7 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeAllocationException;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.jdisc.Metric;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
@@ -36,11 +37,13 @@ public class Preparer {
private final NodeRepository nodeRepository;
private final Optional<HostProvisioner> hostProvisioner;
private final Optional<LoadBalancerProvisioner> loadBalancerProvisioner;
+ private final ProvisioningThrottler throttler;
- public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner) {
+ public Preparer(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner, Metric metric) {
this.nodeRepository = nodeRepository;
this.hostProvisioner = hostProvisioner;
this.loadBalancerProvisioner = loadBalancerProvisioner;
+ this.throttler = new ProvisioningThrottler(nodeRepository, metric);
}
/**
@@ -110,6 +113,9 @@ public class Preparer {
Optional.of(cluster.id()),
requested.cloudAccount(),
deficit.dueToFlavorUpgrade());
+ if (throttler.throttle(allNodes, Agent.system)) {
+ throw new NodeAllocationException("Host provisioning is being throttled", true);
+ }
hostProvisioner.get().provisionHosts(request, whenProvisioned);
} catch (NodeAllocationException e) {
// Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java
new file mode 100644
index 00000000000..b08e7dbccb0
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottler.java
@@ -0,0 +1,69 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.provisioning;
+
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.History;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Objects;
+import java.util.logging.Logger;
+
+/**
+ * Throttles provisioning of new hosts in dynamically provisioned zones.
+ *
+ * @author mpolden
+ */
+public class ProvisioningThrottler {
+
+ /** Metric that indicates whether throttling is active where 1 means active and 0 means inactive */
+ private static final String throttlingActiveMetric = "throttledHostProvisioning";
+
+ private static final Logger LOG = Logger.getLogger(ProvisioningThrottler.class.getName());
+
+ private static final int MIN_SIZE = 100;
+ private static final int MAX_GROWTH = 200;
+ private static final double MAX_GROWTH_RATE = 0.4;
+
+ private final NodeRepository nodeRepository;
+ private final Metric metric;
+
+ public ProvisioningThrottler(NodeRepository nodeRepository, Metric metric) {
+ this.nodeRepository = Objects.requireNonNull(nodeRepository);
+ this.metric = Objects.requireNonNull(metric);
+ }
+
+ private Duration window() {
+ return nodeRepository.zone().system().isCd() ? Duration.ofHours(2) : Duration.ofHours(8);
+ }
+
+ /** Returns whether provisioning should be throttled at given instant */
+ public boolean throttle(NodeList allNodes, Agent agent) {
+ Duration window = window();
+ Instant startOfWindow = nodeRepository.clock().instant().minus(window);
+ NodeList hosts = allNodes.hosts();
+ int existingHosts = hosts.not().state(Node.State.deprovisioned).size();
+ int provisionedRecently = hosts.matching(host -> host.history().hasEventAfter(History.Event.Type.provisioned, startOfWindow))
+ .size();
+ boolean throttle = throttle(provisionedRecently, existingHosts, window, agent);
+ metric.set(throttlingActiveMetric, throttle ? 1 : 0, null);
+ return throttle;
+ }
+
+ static boolean throttle(int recent, int total, Duration window, Agent agent) {
+ if (total < MIN_SIZE && recent < MAX_GROWTH) return false; // Allow burst in small zones
+ int maxGrowth = Math.min(MAX_GROWTH, (int) (total * MAX_GROWTH_RATE));
+ boolean throttle = recent > maxGrowth;
+ if (throttle) {
+ LOG.warning(String.format("Throttling provisioning of new hosts by %s: %d hosts have been provisioned " +
+ "in the past %s, which exceeds growth limit of %d", agent,
+ recent, window, maxGrowth));
+ }
+ return throttle;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java
index 65abcbef698..7ded74b7451 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/InMemoryProvisionLogger.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.ProvisionLogger;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
+import java.util.logging.Logger;
/**
* A logger which remembers all messages logged in addition to writing them to standard out.
@@ -14,18 +15,20 @@ import java.util.logging.Level;
*/
public class InMemoryProvisionLogger implements ProvisionLogger {
+ private static final Logger LOG = Logger.getLogger(InMemoryProvisionLogger.class.getName());
+
private final List<String> systemLog = new ArrayList<>();
private final List<String> applicationLog = new ArrayList<>();
@Override
public void log(Level level, String message) {
- System.out.println("ProvisionLogger system " + level + ": " + message);
+ LOG.info("ProvisionLogger system " + level + ": " + message);
systemLog.add(level + ": " + message);
}
@Override
public void logApplicationPackage(Level level, String message) {
- System.out.println("ProvisionLogger application " + level + ": " + message);
+ LOG.info("ProvisionLogger application " + level + ": " + message);
applicationLog.add(level + ": " + message);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
index 40460e70861..26478d2b566 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
@@ -25,6 +25,7 @@ import com.yahoo.config.provision.Zone;
import com.yahoo.config.provision.ZoneEndpoint;
import com.yahoo.config.provision.ZoneEndpoint.AccessType;
import com.yahoo.config.provision.ZoneEndpoint.AllowedUrn;
+import com.yahoo.jdisc.test.MockMetric;
import com.yahoo.transaction.Mutex;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.curator.mock.MockCurator;
@@ -104,7 +105,7 @@ public class MockNodeRepository extends NodeRepository {
}
private void populate() {
- NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, Zone.defaultZone(), new MockProvisionServiceProvider());
+ NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, Zone.defaultZone(), new MockProvisionServiceProvider(), new MockMetric());
List<Node> nodes = new ArrayList<>();
// Regular nodes
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index bd31c7578b9..47206265c68 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -43,10 +43,10 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max))
.build();
fixture.tester.clock().advance(Duration.ofDays(2));
- fixture.loader().applyLoad(new Load(0.17f, 0.17, 0.12), 1, true, true, 100);
+ fixture.loader().applyLoad(new Load(0.8f, 0.17, 0.12), 1, true, true, 100);
var result = fixture.autoscale();
assertTrue(result.resources().isEmpty());
- assertNotEquals(Autoscaling.Status.insufficient, result.status());
+ assertEquals(Autoscaling.Status.insufficient, result.status());
fixture.tester.clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(new Load(0.08f, 0.17, 0.12), 1, true, true, 100);
@@ -65,7 +65,7 @@ public class AutoscalingTest {
fixture.loader().applyCpuLoad(0.7f, 10);
var scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high",
- 9, 1, 3.6, 8.5, 360.9,
+ 9, 1, 2.8, 6.8, 288.7,
fixture.autoscale());
fixture.deploy(Capacity.from(scaledResources));
@@ -87,7 +87,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(7));
fixture.loader().applyCpuLoad(0.1f, 10);
fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly",
- 8, 1, 1.0, 8.3, 338.4,
+ 6, 1, 1.1, 8.8, 346.8,
fixture.autoscale());
}
@@ -169,6 +169,7 @@ public class AutoscalingTest {
@Test
public void test_only_autoscaling_up_quickly() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
+ fixture.setScalingDuration(Duration.ofHours(12)); // Fixture sets last completion to be 1 day into the past
fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10);
fixture.tester().assertResources("Scaling up (only) since resource usage is too high",
8, 1, 7.1, 9.3, 75.4,
@@ -179,6 +180,7 @@ public class AutoscalingTest {
@Test
public void test_scale_in_both_directions_when_ok_to_scale_down() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
+ fixture.setScalingDuration(Duration.ofHours(12)); // Fixture sets last completion to be 1 day into the past
fixture.tester.clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10);
fixture.tester().assertResources("Scaling cpu and disk up and memory down",
@@ -189,6 +191,7 @@ public class AutoscalingTest {
@Test
public void test_scale_in_both_directions_when_ok_to_scale_down_exclusive() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(false).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester.clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10);
fixture.tester().assertResources("Scaling cpu and disk up, memory follows",
@@ -199,33 +202,36 @@ public class AutoscalingTest {
@Test
public void test_autoscaling_uses_peak() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.loader().applyCpuLoad(0.01, 100);
fixture.loader().applyCpuLoad(0.70, 1);
fixture.loader().applyCpuLoad(0.01, 100);
fixture.tester().assertResources("Scaling up since peak resource usage is too high",
- 8, 1, 4.3, 9.3, 36.2,
+ 8, 1, 4.3, 7.4, 29.0,
fixture.autoscale());
}
@Test
public void test_autoscaling_uses_peak_exclusive() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(false).build();
+ fixture.setScalingDuration(Duration.ofHours(9));
fixture.loader().applyCpuLoad(0.01, 100);
fixture.loader().applyCpuLoad(0.70, 1);
fixture.loader().applyCpuLoad(0.01, 100);
fixture.tester().assertResources("Scaling up since peak resource usage is too high",
- 9, 1, 4, 16.0, 150,
+ 9, 1, 4, 8.0, 100,
fixture.autoscale());
}
@Test
public void test_autoscaling_uses_peak_preprovisioned() {
var fixture = DynamicProvisioningTester.fixture().hostCount(15).build();
+ fixture.setScalingDuration(Duration.ofHours(9));
fixture.loader().applyCpuLoad(0.01, 100);
fixture.loader().applyCpuLoad(0.70, 1);
fixture.loader().applyCpuLoad(0.01, 100);
- fixture.tester().assertResources("Scaling up since peak resource usage is too high",
- 8, 1, 4.3, 9.7, 42.9,
+ fixture.tester().assertResources("Scaling up cpu since peak resource usage is too high",
+ 8, 1, 4.3, 7.7, 34.3,
fixture.autoscale());
}
@@ -278,6 +284,7 @@ public class AutoscalingTest {
.allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == slow));
fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.loader().applyCpuLoad(0.25, 120);
// Changing min and max from slow to any
@@ -329,6 +336,7 @@ public class AutoscalingTest {
.initialResources(Optional.of(now))
.capacity(Capacity.from(min, max)).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(1));
fixture.loader().applyLoad(new Load(0.25, 0.95, 0.95), 120);
fixture.tester().assertResources("Scaling up to limit since resource usage is too high",
@@ -384,6 +392,7 @@ public class AutoscalingTest {
.initialResources(Optional.of(now))
.capacity(Capacity.from(min, max))
.build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyCpuLoad(0.4, 240);
fixture.tester().assertResources("Scaling cpu up",
@@ -402,9 +411,9 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max, IntRange.of(2, 3), false, true, Optional.empty(), ClusterInfo.empty()))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.loader().applyCpuLoad(0.4, 240);
+ fixture.loader().applyCpuLoad(0.6, 240);
fixture.tester().assertResources("Scaling cpu up",
- 12, 6, 2.8, 4.2, 27.5,
+ 12, 6, 3.0, 4.2, 27.5,
fixture.autoscale());
}
@@ -446,6 +455,7 @@ public class AutoscalingTest {
public void suggestions_ignores_limits() {
ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1));
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).capacity(Capacity.from(min, min)).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyCpuLoad(1.0, 120);
fixture.tester().assertResources("Suggesting above capacity limit",
@@ -457,6 +467,7 @@ public class AutoscalingTest {
public void suggestions_ignores_limits_exclusive() {
ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1));
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(false).capacity(Capacity.from(min, min)).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyCpuLoad(1.0, 120);
fixture.tester().assertResources("Suggesting above capacity limit",
@@ -504,6 +515,7 @@ public class AutoscalingTest {
.initialResources(Optional.of(now))
.capacity(Capacity.from(min, max))
.build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(new Load(0.5, 0.8, 0.1), 120);
fixture.tester().assertResources("Suggesting resources where disk is 3x memory (this is a content cluster)",
@@ -524,6 +536,7 @@ public class AutoscalingTest {
.initialResources(Optional.of(now))
.capacity(Capacity.from(min, max))
.build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(new Load(0.5, 0.8, 0.1), 120);
fixture.tester().assertResources("Suggesting resources where disk is 3x memory (this is a content cluster)",
@@ -547,7 +560,7 @@ public class AutoscalingTest {
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyCpuLoad(0.9, 120);
fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time",
- 10, 5, 7.7, 41.5, 124.6,
+ 8, 4, 7.4, 41.5, 124.6,
fixture.autoscale());
}
@@ -562,6 +575,7 @@ public class AutoscalingTest {
.capacity(Capacity.from(min, max, IntRange.of(1), false, true, Optional.empty(), ClusterInfo.empty()))
.build();
fixture.tester().clock().advance(Duration.ofDays(2));
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.loader().applyCpuLoad(0.9, 120);
fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time",
7, 7, 9.4, 78.6, 235.8,
@@ -578,6 +592,7 @@ public class AutoscalingTest {
.initialResources(Optional.of(now))
.capacity(Capacity.from(min, max))
.build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
Duration timePassed = fixture.loader().addCpuMeasurements(0.25, 120);
fixture.tester().clock().advance(timePassed.negated());
@@ -608,7 +623,7 @@ public class AutoscalingTest {
}
@Test
- public void test_autoscaling_group_size() {
+ public void test_autoscaling_groupsize() {
var min = new ClusterResources( 2, 2, new NodeResources(1, 1, 1, 1));
var now = new ClusterResources(6, 2, new NodeResources(10, 100, 100, 1));
var max = new ClusterResources(30, 30, new NodeResources(100, 100, 1000, 1));
@@ -634,6 +649,7 @@ public class AutoscalingTest {
.initialResources(Optional.of(now))
.capacity(Capacity.from(min, max))
.build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
fixture.loader().applyLoad(new Load(0.16, 0.02, 0.5), 120);
fixture.tester().assertResources("Scaling down memory",
@@ -644,10 +660,10 @@ public class AutoscalingTest {
@Test
public void scaling_down_only_after_delay() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
- fixture.loader().applyCpuLoad(0.02, 120);
+ fixture.loader().applyCpuLoad(0.02, 5);
assertTrue("Too soon after initial deployment", fixture.autoscale().resources().isEmpty());
fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1));
- fixture.loader().applyCpuLoad(0.02, 120);
+ fixture.loader().applyCpuLoad(0.02, 5);
fixture.tester().assertResources("Scaling down since enough time has passed",
3, 1, 1.0, 23.6, 101.4,
fixture.autoscale());
@@ -663,20 +679,20 @@ public class AutoscalingTest {
.build();
fixture.tester.clock().advance(Duration.ofDays(1));
- fixture.loader().applyCpuLoad(0.25, 120);
+ fixture.loader().applyCpuLoad(0.5, 120);
// (no read share stored)
fixture.tester().assertResources("Advice to scale up since we set aside for bcp by default",
- 5, 1, 3, 100, 100,
+ 7, 1, 3, 100, 100,
fixture.autoscale());
- fixture.loader().applyCpuLoad(0.25, 120);
+ fixture.loader().applyCpuLoad(0.5, 120);
fixture.storeReadShare(0.25, 0.5);
fixture.tester().assertResources("Half of global share is the same as the default assumption used above",
- 5, 1, 3, 100, 100,
+ 7, 1, 3, 100, 100,
fixture.autoscale());
fixture.tester.clock().advance(Duration.ofDays(1));
- fixture.loader().applyCpuLoad(0.25, 120);
+ fixture.loader().applyCpuLoad(0.5, 120);
fixture.storeReadShare(0.5, 0.5);
fixture.tester().assertResources("Advice to scale down since we don't need room for bcp",
4, 1, 3, 100, 100,
@@ -686,6 +702,7 @@ public class AutoscalingTest {
@Test
public void test_autoscaling_considers_growth_rate() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.tester().clock().advance(Duration.ofDays(2));
Duration timeAdded = fixture.loader().addLoadMeasurements(100, t -> t == 0 ? 200.0 : 100.0, t -> 0.0);
@@ -719,9 +736,9 @@ public class AutoscalingTest {
@Test
public void test_autoscaling_weights_growth_rate_by_confidence() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
double scalingFactor = 1.0/6000; // To make the average query rate low
- fixture.setScalingDuration(Duration.ofMinutes(60));
fixture.tester().clock().advance(Duration.ofDays(2));
Duration timeAdded = fixture.loader().addLoadMeasurements(100,
t -> scalingFactor * (100.0 + (t < 50 ? t * t * t : 155000 - (t - 49) * (t - 49) * (t - 49))),
@@ -736,6 +753,7 @@ public class AutoscalingTest {
@Test
public void test_autoscaling_considers_query_vs_write_rate() {
var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build();
+ fixture.setScalingDuration(Duration.ofHours(6));
fixture.loader().addCpuMeasurements(0.4, 220);
@@ -877,13 +895,13 @@ public class AutoscalingTest {
fixture.currentResources().advertisedResources());
fixture.tester().deploy(fixture.applicationId(), clusterSpec(false), fixture.capacity());
- fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 100);
+ fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 5);
fixture.tester().assertResources("Exclusive nodes makes no difference here",
2, 1, 4, 8, 100.0,
fixture.autoscale());
fixture.tester().deploy(fixture.applicationId(), clusterSpec(true), fixture.capacity());
- fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 100);
+ fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 5);
fixture.tester().assertResources("Reverts to the initial resources",
2, 1, 4, 8, 100,
fixture.currentResources().advertisedResources());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
index 29a7aff3e6a..637932681ee 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java
@@ -206,7 +206,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration2.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 50.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 3.0, 7.4, 29.0,
+ 8, 1, 2.9, 7.4, 29.0,
fixture.autoscale());
// Mostly local
@@ -216,7 +216,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration3.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 90.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 2.3, 7.4, 29.0,
+ 8, 1, 2.2, 7.4, 29.0,
fixture.autoscale());
// Local only
@@ -226,7 +226,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration4.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 2.2, 7.4, 29.0,
+ 8, 1, 2.1, 7.4, 29.0,
fixture.autoscale());
// No group info, should be the same as the above
@@ -236,7 +236,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration5.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 100.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 2.2, 7.4, 29.0,
+ 8, 1, 2.1, 7.4, 29.0,
fixture.autoscale());
// 40 query rate, no group info (for reference to the below)
@@ -266,7 +266,7 @@ public class AutoscalingUsingBcpGroupInfoTest {
fixture.tester().clock().advance(duration8.negated());
fixture.loader().addQueryRateMeasurements(10, __ -> 40.0);
fixture.tester().assertResources("Scaling up cpu using bcp group cpu info",
- 8, 1, 1.9, 7.4, 29.0,
+ 8, 1, 1.8, 7.4, 29.0,
fixture.autoscale());
}
@@ -288,16 +288,18 @@ public class AutoscalingUsingBcpGroupInfoTest {
// External load is measured to zero -> 0
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.loader().addCpuMeasurements(0.7f, 10);
+ var duration = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration.negated());
fixture.loader().addQueryRateMeasurements(10, i -> 0.0);
assertEquals(new Autoscaling.Metrics(0, 1.0, 0),
fixture.autoscale().metrics());
// External load
fixture.tester().clock().advance(Duration.ofDays(2));
- fixture.loader().addCpuMeasurements(0.7f, 10);
+ duration = fixture.loader().addCpuMeasurements(0.7f, 10);
+ fixture.tester().clock().advance(duration.negated());
fixture.loader().addQueryRateMeasurements(10, i -> 110.0);
- assertEquals(new Autoscaling.Metrics(110, 1.1, 0.05),
+ assertEquals(new Autoscaling.Metrics(110, 1.0, 0.05),
round(fixture.autoscale().metrics()));
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index 53d753e2850..3091f82143d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -26,7 +26,6 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
import com.yahoo.vespa.hosted.provision.autoscale.Load;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
-import com.yahoo.vespa.hosted.provision.node.ClusterId;
import com.yahoo.vespa.hosted.provision.node.Generation;
import com.yahoo.vespa.hosted.provision.node.IP;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
@@ -133,13 +132,14 @@ public class MetricsReporterTest {
expectedMetrics.put("suspendedSeconds", 123L);
expectedMetrics.put("numberOfServices", 0L);
- expectedMetrics.put("cache.nodeObject.hitRate", 0.5555555555555556D);
+ expectedMetrics.put("cache.nodeObject.hitRate", 5D/7D);
expectedMetrics.put("cache.nodeObject.evictionCount", 0L);
expectedMetrics.put("cache.nodeObject.size", 2L);
expectedMetrics.put("cache.curator.hitRate", 3D/5D);
expectedMetrics.put("cache.curator.evictionCount", 0L);
expectedMetrics.put("cache.curator.size", 2L);
+ expectedMetrics.put("nodes.emptyExclusive", 0);
nodeRepository.nodes().list();
tester.clock().setInstant(Instant.ofEpochSecond(124));
@@ -278,7 +278,6 @@ public class MetricsReporterTest {
assertEquals(4, getMetric("nodes.active", metric, dimensions));
assertEquals(0, getMetric("nodes.nonActive", metric, dimensions));
-
Map<String, String> clusterDimensions = Map.of("applicationId", applicationId.toFullString(),
"clusterid", clusterSpec.id().value());
assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions));
@@ -341,6 +340,34 @@ public class MetricsReporterTest {
assertEquals(1D, getMetric("nodes.exclusiveSwitchFraction", metric, MetricsReporter.dimensions(app, spec2.id())).doubleValue(), Double.MIN_VALUE);
}
+ @Test
+ public void empty_exclusive_hosts() {
+ ProvisioningTester tester = new ProvisioningTester.Builder().build();
+ ApplicationId app = ApplicationId.from("t1", "a1", "default");
+ TestMetric metric = new TestMetric();
+ MetricsReporter metricsReporter = metricsReporter(metric, tester);
+ NodeResources resources = new NodeResources(8, 32, 100, 10);
+ List<Node> hosts = tester.makeReadyNodes(4, resources, NodeType.host, 5);
+ tester.activateTenantHosts();
+ tester.patchNodes(hosts, (host) -> host.withExclusiveToApplicationId(app));
+
+ // Hosts are not considered empty until enough time passes
+ metricsReporter.maintain();
+ assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue());
+ tester.clock().advance(Duration.ofMinutes(10));
+ metricsReporter.maintain();
+ assertEquals(hosts.size(), metric.values.get("nodes.emptyExclusive").intValue());
+
+ // Deploy application
+ ClusterSpec spec = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("c1")).vespaVersion("1").build();
+ Capacity capacity = Capacity.from(new ClusterResources(4, 1, resources));
+ tester.deploy(app, spec, capacity);
+
+ // Host are now in use
+ metricsReporter.maintain();
+ assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue());
+ }
+
private Number getMetric(String name, TestMetric metric, Map<String, String> dimensions) {
List<TestMetric.TestContext> metrics = metric.context.get(name).stream()
.filter(ctx -> ctx.properties.entrySet().containsAll(dimensions.entrySet()))
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java
index 7763459dd92..79644206918 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java
@@ -10,6 +10,7 @@ import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.Provisioner;
import com.yahoo.config.provision.Zone;
+import com.yahoo.jdisc.test.MockMetric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -60,7 +61,7 @@ public class InfraDeployerImplTest {
private final NodeRepositoryTester tester = new NodeRepositoryTester();
private final NodeRepository nodeRepository = tester.nodeRepository();
- private final Provisioner provisioner = spy(new NodeRepositoryProvisioner(nodeRepository, Zone.defaultZone(), new EmptyProvisionServiceProvider()));
+ private final Provisioner provisioner = spy(new NodeRepositoryProvisioner(nodeRepository, Zone.defaultZone(), new EmptyProvisionServiceProvider(), new MockMetric()));
private final InfrastructureVersions infrastructureVersions = nodeRepository.infrastructureVersions();
private final DuperModelInfraApi duperModelInfraApi = mock(DuperModelInfraApi.class);
private final InfraDeployerImpl infraDeployer;
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index a3a90d58c2c..bca48b19ccf 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -22,12 +22,12 @@ import com.yahoo.config.provision.NodeResources.DiskSpeed;
import com.yahoo.config.provision.NodeResources.StorageType;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.ProvisionLock;
-import com.yahoo.config.provision.ProvisionLogger;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.TenantName;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.FlavorsConfig;
+import com.yahoo.jdisc.test.MockMetric;
import com.yahoo.test.ManualClock;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.applicationmodel.InfrastructureApplication;
@@ -73,7 +73,6 @@ import java.util.UUID;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.UnaryOperator;
-import java.util.logging.Level;
import java.util.stream.Collectors;
import static com.yahoo.config.provision.NodeResources.StorageType.local;
@@ -131,7 +130,7 @@ public class ProvisioningTester {
true,
spareCount,
1000);
- this.provisioner = new NodeRepositoryProvisioner(nodeRepository, zone, provisionServiceProvider);
+ this.provisioner = new NodeRepositoryProvisioner(nodeRepository, zone, provisionServiceProvider, new MockMetric());
this.capacityPolicies = new CapacityPolicies(nodeRepository);
this.provisionLogger = new InMemoryProvisionLogger();
this.loadBalancerService = loadBalancerService;
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottlerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottlerTest.java
new file mode 100644
index 00000000000..f38b4732ed7
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningThrottlerTest.java
@@ -0,0 +1,30 @@
+package com.yahoo.vespa.hosted.provision.provisioning;
+
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import org.junit.jupiter.api.Test;
+
+import java.time.Duration;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static com.yahoo.vespa.hosted.provision.provisioning.ProvisioningThrottler.throttle;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * @author mpolden
+ */
+class ProvisioningThrottlerTest {
+
+ @Test
+ void throttling() {
+ Agent agent = Agent.system;
+ Duration window = Duration.ofHours(1);
+ assertFalse(throttle(199, 99, window, agent));
+ assertTrue(throttle(200, 99, window, agent));
+ assertFalse(throttle(40, 100, window, agent));
+ assertTrue(throttle(41, 100, window, agent));
+ assertTrue(throttle(100, 100, window, agent));
+ assertFalse(throttle(200, 2100, window, agent));
+ assertTrue(throttle(201, 2100, window, agent));
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
index 42925b797d7..05a62ff944d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json
@@ -94,7 +94,7 @@
"at" : 123
}
],
- "scalingDuration": 21600000
+ "scalingDuration": 300000
}
}
}
diff --git a/screwdriver.yaml b/screwdriver.yaml
index 79a1569633f..19374a436d5 100644
--- a/screwdriver.yaml
+++ b/screwdriver.yaml
@@ -91,7 +91,7 @@ jobs:
screwdriver.cd/cpu: 7
screwdriver.cd/ram: 16
screwdriver.cd/disk: HIGH
- screwdriver.cd/timeout: 120
+ screwdriver.cd/timeout: 150
screwdriver.cd/dockerEnabled: true
screwdriver.cd/dockerCpu: TURBO
screwdriver.cd/dockerRam: HIGH
diff --git a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp
index b7f5731ddf4..3a7e2a706cb 100644
--- a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp
+++ b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp
@@ -15,6 +15,7 @@
#include <vector>
#include <limits>
#include <unistd.h>
+#include <filesystem>
#include <vespa/log/log.h>
LOG_SETUP("vespa-gen-testdocs");
@@ -550,7 +551,7 @@ DocumentGenerator::generate(uint32_t docMin, uint32_t docIdLimit,
bool headers, bool json)
{
string fullName(prependBaseDir(baseDir, feedFileName));
- FastOS_File::Delete(fullName.c_str());
+ std::filesystem::remove(std::filesystem::path(fullName));
Fast_BufferedFile f(new FastOS_File);
f.WriteOpen(fullName.c_str());
if (json) {
diff --git a/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp b/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp
index db2675a7779..1fc5c40a47a 100644
--- a/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/fileconfigmanager/fileconfigmanager_test.cpp
@@ -15,9 +15,9 @@
#include <vespa/searchcore/proton/test/documentdb_config_builder.h>
#include <vespa/searchcore/proton/test/transport_helper.h>
#include <vespa/searchsummary/config/config-juniperrc.h>
-#include <vespa/vespalib/io/fileutil.h>
#include <vespa/config-bucketspaces.h>
#include <vespa/vespalib/testkit/test_kit.h>
+#include <filesystem>
using namespace cloud::config::filedistribution;
@@ -165,7 +165,7 @@ TEST_FF("requireThatConfigCanBeSerializedAndDeserialized", Transport(), Document
TEST_FF("requireThatConfigCanBeLoadedWithoutExtraConfigsDataFile", Transport(), DocumentDBConfig::SP(makeBaseConfigSnapshot(f1.transport())))
{
saveBaseConfigSnapshot(f1.transport(), *f2, 70);
- EXPECT_FALSE(vespalib::unlink("out/config-70/extraconfigs.dat"));
+ EXPECT_FALSE(std::filesystem::remove(std::filesystem::path("out/config-70/extraconfigs.dat")));
DocumentDBConfig::SP esnap(makeEmptyConfigSnapshot());
{
FileConfigManager cm(f1.transport(), "out", myId, "dummy");
diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
index 6e9d4be97aa..f88e89db25e 100644
--- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
+++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp
@@ -21,11 +21,11 @@
#include <vespa/searchlib/queryeval/simpleresult.h>
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/vespalib/gtest/gtest.h>
-#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/test/insertion_operators.h>
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
+#include <filesystem>
#include <thread>
#include <vespa/log/log.h>
@@ -525,7 +525,7 @@ TEST(DocumentMetaStoreTest, gids_can_be_saved_and_loaded)
EXPECT_EQ(numLids + 1, dms2.getNumDocs());
EXPECT_EQ(numLids - (3 - i), dms2.getNumUsedLids());
}
- vespalib::unlink("documentmetastore2.dat");
+ std::filesystem::remove(std::filesystem::path("documentmetastore2.dat"));
}
TEST(DocumentMetaStoreTest, bucket_used_bits_are_lbounded_at_load_time)
@@ -551,7 +551,7 @@ TEST(DocumentMetaStoreTest, bucket_used_bits_are_lbounded_at_load_time)
BucketId expected_bucket(storage::spi::BucketLimits::MinUsedBits, gid.convertToBucketId().getRawId());
assertGid(gid, lid, dms2, expected_bucket, Timestamp(1000));
- vespalib::unlink("documentmetastore2.dat");
+ std::filesystem::remove(std::filesystem::path("documentmetastore2.dat"));
}
TEST(DocumentMetaStore, stats_are_updated)
@@ -1915,8 +1915,8 @@ TEST(DocumentMetaStoreTest, document_sizes_are_saved)
assertSize(dms4, 1, 1);
assertSize(dms4, 2, 1);
assertSize(dms4, 3, 1);
- vespalib::unlink("documentmetastore3.dat");
- vespalib::unlink("documentmetastore4.dat");
+ std::filesystem::remove(std::filesystem::path("documentmetastore3.dat"));
+ std::filesystem::remove(std::filesystem::path("documentmetastore4.dat"));
}
namespace {
diff --git a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp
index 35583ea46da..e8926a957b7 100644
--- a/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp
+++ b/searchcore/src/tests/proton/index/diskindexcleaner_test.cpp
@@ -120,7 +120,7 @@ void Test::requireThatIndexesInUseAreNotRemoved() {
void Test::requireThatInvalidFlushIndexesAreRemoved() {
createIndexes();
- FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete();
+ std::filesystem::remove(std::filesystem::path(index_dir + "/index.flush.4/serial.dat"));
DiskIndexes disk_indexes;
DiskIndexCleaner::clean(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
@@ -131,7 +131,7 @@ void Test::requireThatInvalidFlushIndexesAreRemoved() {
void Test::requireThatInvalidFusionIndexesAreRemoved() {
createIndexes();
- FastOS_File((index_dir + "/index.fusion.2/serial.dat").c_str()).Delete();
+ std::filesystem::remove(std::filesystem::path(index_dir + "/index.fusion.2/serial.dat"));
DiskIndexes disk_indexes;
DiskIndexCleaner::clean(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
@@ -144,7 +144,7 @@ void Test::requireThatInvalidFusionIndexesAreRemoved() {
void Test::requireThatRemoveDontTouchNewIndexes() {
createIndexes();
- FastOS_File((index_dir + "/index.flush.4/serial.dat").c_str()).Delete();
+ std::filesystem::remove(std::filesystem::path(index_dir + "/index.flush.4/serial.dat"));
DiskIndexes disk_indexes;
DiskIndexCleaner::removeOldIndexes(index_dir, disk_indexes);
vector<string> indexes = readIndexes();
diff --git a/searchcore/src/tests/proton/index/indexmanager_test.cpp b/searchcore/src/tests/proton/index/indexmanager_test.cpp
index 2f6ebcd967f..a7209ea8897 100644
--- a/searchcore/src/tests/proton/index/indexmanager_test.cpp
+++ b/searchcore/src/tests/proton/index/indexmanager_test.cpp
@@ -728,7 +728,7 @@ TEST_F(IndexManagerTest, require_that_serial_number_is_read_on_load)
void crippleFusion(uint32_t fusionId) {
vespalib::asciistream ost;
ost << index_dir << "/index.flush." << fusionId << "/serial.dat";
- FastOS_File(ost.str().data()).Delete();
+ std::filesystem::remove(std::filesystem::path(ost.str()));
}
TEST_F(IndexManagerTest, require_that_failed_fusion_is_retried)
diff --git a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt
index 7bb1c9b878c..d4de8e578bd 100644
--- a/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/proton/common/CMakeLists.txt
@@ -29,6 +29,4 @@ vespa_add_library(searchcore_pcommon STATIC
statusreport.cpp
DEPENDS
searchcore_proton_metrics
- EXTERNAL_DEPENDS
- ${VESPA_STDCXX_FS_LIB}
)
diff --git a/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp b/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp
index e25eb5c422c..c893bb6fe2b 100644
--- a/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/common/hw_info_sampler.cpp
@@ -5,7 +5,6 @@
#include <vespa/config/print/fileconfigwriter.h>
#include <vespa/config/subscription/configsubscriber.hpp>
#include <vespa/fastos/file.h>
-#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/util/time.h>
#include <vespa/vespalib/util/resource_limits.h>
#include <vespa/vespalib/util/size_literals.h>
@@ -107,7 +106,7 @@ double measureDiskWriteSpeed(const vespalib::string &path,
double elapsed = vespalib::to_s(after - before);
diskWriteSpeed = diskWriteLen / elapsed / 1_Mi;
}
- vespalib::unlink(fileName);
+ std::filesystem::remove(std::filesystem::path(fileName));
return diskWriteSpeed;
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt
index 0f36ab4e834..f5544ed1b15 100644
--- a/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/proton/server/CMakeLists.txt
@@ -126,6 +126,4 @@ vespa_add_library(searchcore_server STATIC
searchcore_summaryengine
searchcore_reference
configdefinitions
- EXTERNAL_DEPENDS
- ${VESPA_STDCXX_FS_LIB}
)
diff --git a/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp b/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp
index 704364dfb9b..51d6938b13b 100644
--- a/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp
+++ b/searchcore/src/vespa/searchcorespi/index/diskindexcleaner.cpp
@@ -39,7 +39,7 @@ bool isValidIndex(const string &index_dir) {
}
void invalidateIndex(const string &index_dir) {
- vespalib::unlink(index_dir + "/serial.dat");
+ std::filesystem::remove(std::filesystem::path(index_dir + "/serial.dat"));
vespalib::File::sync(index_dir);
}
diff --git a/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp b/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp
index 97afce79861..a5f796cf48e 100644
--- a/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp
+++ b/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp
@@ -25,6 +25,7 @@ using search::index::SchemaUtil;
using search::SerialNum;
using vespalib::IllegalStateException;
using vespalib::FileHeader;
+using std::filesystem::path;
namespace searchcorespi::index {
@@ -64,8 +65,9 @@ IndexWriteUtilities::writeSerialNum(SerialNum serialNum,
vespalib::File::sync(dir);
if (ok) {
- FastOS_File renameFile(tmpFileName.c_str());
- ok &= renameFile.Rename(fileName.c_str());
+ std::error_code ec;
+ std::filesystem::rename(path(tmpFileName), path(fileName), ec);
+ ok = !ec;
}
if (!ok) {
std::ostringstream msg;
@@ -84,26 +86,15 @@ IndexWriteUtilities::copySerialNumFile(const vespalib::string &sourceDir,
vespalib::string tmpDest = dest + ".tmp";
std::error_code ec;
- std::filesystem::copy_file(std::filesystem::path(source), std::filesystem::path(tmpDest), ec);
+ std::filesystem::copy_file(path(source), path(tmpDest), ec);
if (ec) {
LOG(error, "Unable to copy file '%s'", source.c_str());
return false;
}
- FastOS_File file(tmpDest.c_str());
- if (!file.OpenReadWrite()) {
- LOG(error, "Unable to open '%s' for fsync", tmpDest.c_str());
- return false;
- }
- if (!file.Sync()) {
- LOG(error, "Unable to fsync '%s'", tmpDest.c_str());
- return false;
- }
- if (!file.Close()) {
- LOG(error, "Unable to close '%s'", tmpDest.c_str());
- return false;
- }
+ vespalib::File::sync(tmpDest);
vespalib::File::sync(destDir);
- if (!file.Rename(dest.c_str())) {
+ std::filesystem::rename(path(tmpDest), path(dest), ec);
+ if (ec) {
LOG(error, "Unable to rename file '%s' to '%s'", tmpDest.c_str(), dest.c_str());
return false;
}
@@ -159,7 +150,7 @@ IndexWriteUtilities::updateDiskIndexSchema(const vespalib::string &indexDir,
}
vespalib::string schemaTmpName = schemaName + ".tmp";
vespalib::string schemaOrigName = schemaName + ".orig";
- vespalib::unlink(schemaTmpName);
+ std::filesystem::remove(path(schemaTmpName));
if (!newSchema->saveToFile(schemaTmpName)) {
LOG(error, "Could not save schema to '%s'",
schemaTmpName.c_str());
diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
index c072f722677..82c1839e63b 100644
--- a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
+++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
@@ -10,7 +10,7 @@
#include <vespa/searchlib/common/fileheadercontext.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/vespalib/data/databuffer.h>
-#include <vespa/fastos/file.h>
+#include <filesystem>
#include <vespa/log/log.h>
LOG_SETUP("attributefilewriter_test");
@@ -24,7 +24,7 @@ namespace {
vespalib::string testFileName("test.dat");
vespalib::string hello("Hello world");
-void removeTestFile() { FastOS_File::Delete(testFileName.c_str()); }
+void removeTestFile() { std::filesystem::remove(std::filesystem::path(testFileName)); }
struct Fixture {
TuneFileAttributes _tuneFileAttributes;
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
index 5fa8889a01d..6e622c840b6 100644
--- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
@@ -53,9 +53,15 @@ struct BitVectorTest
StringAttribute & asString(AttributePtr &v);
FloatingPointAttribute & asFloat(AttributePtr &v);
- AttributePtr make(Config cfg, const vespalib::string &pref, bool fastSearch, bool filter);
+ AttributePtr
+ make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableOnlyBitVector,
+ bool filter);
- void addDocs(const AttributePtr &v, size_t sz);
+ void
+ addDocs(const AttributePtr &v, size_t sz);
template <typename VectorType>
void populate(VectorType &v, uint32_t low, uint32_t high, bool set);
@@ -63,16 +69,22 @@ struct BitVectorTest
template <typename VectorType>
void populateAll(VectorType &v, uint32_t low, uint32_t high, bool set);
- void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, bool prefix);
+ void
+ buildTermQuery(std::vector<char> & buffer,
+ const vespalib::string & index,
+ const vespalib::string & term, bool prefix);
template <typename V>
- vespalib::string getSearchStr();
+ vespalib::string
+ getSearchStr();
template <typename V, typename T>
- SearchContextPtr getSearch(const V & vec, const T & term, bool prefix, bool useBitVector);
+ SearchContextPtr
+ getSearch(const V & vec, const T & term, bool prefix, bool useBitVector);
template <typename V>
- SearchContextPtr getSearch(const V & vec, bool useBitVector);
+ SearchContextPtr
+ getSearch(const V & vec, bool useBitVector);
void
checkSearch(AttributePtr v,
@@ -95,7 +107,10 @@ struct BitVectorTest
template <typename VectorType, typename BufferType>
void
- test(BasicType bt, CollectionType ct, const vespalib::string &pref, bool fastSearch, bool filter);
+ test(BasicType bt, CollectionType ct, const vespalib::string &pref,
+ bool fastSearch,
+ bool enableOnlyBitVector,
+ bool filter);
template <typename VectorType, typename BufferType>
void
@@ -180,7 +195,8 @@ BitVectorTest::getSearchStr<StringAttribute>()
template <typename V, typename T>
SearchContextPtr
-BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, bool useBitVector)
+BitVectorTest::getSearch(const V &vec, const T &term, bool prefix,
+ bool useBitVector)
{
std::vector<char> query;
vespalib::asciistream ss;
@@ -195,7 +211,8 @@ BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, bool useBitVe
template <>
SearchContextPtr
-BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, bool useBitVector)
+BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v,
+ bool useBitVector)
{
return getSearch<IntegerAttribute>(v, "[-42;-42]", false, useBitVector);
}
@@ -203,23 +220,32 @@ BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, bool useBi
template <>
SearchContextPtr
BitVectorTest::
-getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v, bool useBitVector)
+getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v,
+ bool useBitVector)
{
- return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false, useBitVector);
+ return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false,
+ useBitVector);
}
template <>
SearchContextPtr
-BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v, bool useBitVector)
+BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v,
+ bool useBitVector)
{
- return getSearch<StringAttribute, const vespalib::string &>(v, "foo", false, useBitVector);
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "foo", false, useBitVector);
}
BitVectorTest::AttributePtr
-BitVectorTest::make(Config cfg, const vespalib::string &pref, bool fastSearch, bool filter)
+BitVectorTest::make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableOnlyBitVector,
+ bool filter)
{
cfg.setFastSearch(fastSearch);
+ cfg.setEnableOnlyBitVector(enableOnlyBitVector);
cfg.setIsFilter(filter);
AttributePtr v = AttributeFactory::createAttribute(pref, cfg);
return v;
@@ -241,9 +267,11 @@ BitVectorTest::addDocs(const AttributePtr &v, size_t sz)
template <>
void
-BitVectorTest::populate(IntegerAttribute &v, uint32_t low, uint32_t high, bool set)
+BitVectorTest::populate(IntegerAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
{
- for (size_t i(low), m(high); i < m; i+= 5) {
+ for(size_t i(low), m(high); i < m; i+= 5) {
if (!set) {
v.clearDoc(i);
} else if (v.hasMultiValue()) {
@@ -260,9 +288,11 @@ BitVectorTest::populate(IntegerAttribute &v, uint32_t low, uint32_t high, bool s
template <>
void
-BitVectorTest::populate(FloatingPointAttribute &v, uint32_t low, uint32_t high, bool set)
+BitVectorTest::populate(FloatingPointAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
{
- for (size_t i(low), m(high); i < m; i+= 5) {
+ for(size_t i(low), m(high); i < m; i+= 5) {
if (!set) {
v.clearDoc(i);
} else if (v.hasMultiValue()) {
@@ -279,9 +309,11 @@ BitVectorTest::populate(FloatingPointAttribute &v, uint32_t low, uint32_t high,
template <>
void
-BitVectorTest::populate(StringAttribute &v, uint32_t low, uint32_t high, bool set)
+BitVectorTest::populate(StringAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
{
- for (size_t i(low), m(high); i < m; i+= 5) {
+ for(size_t i(low), m(high); i < m; i+= 5) {
if (!set) {
v.clearDoc(i);
} else if (v.hasMultiValue()) {
@@ -297,9 +329,11 @@ BitVectorTest::populate(StringAttribute &v, uint32_t low, uint32_t high, bool se
template <>
void
-BitVectorTest::populateAll(IntegerAttribute &v, uint32_t low, uint32_t high, bool set)
+BitVectorTest::populateAll(IntegerAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
{
- for (size_t i(low), m(high); i < m; ++i) {
+ for(size_t i(low), m(high); i < m; ++i) {
if (!set) {
v.clearDoc(i);
} else if (v.hasMultiValue()) {
@@ -317,9 +351,11 @@ BitVectorTest::populateAll(IntegerAttribute &v, uint32_t low, uint32_t high, boo
template <>
void
-BitVectorTest::populateAll(FloatingPointAttribute &v, uint32_t low, uint32_t high, bool set)
+BitVectorTest::populateAll(FloatingPointAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
{
- for (size_t i(low), m(high); i < m; ++i) {
+ for(size_t i(low), m(high); i < m; ++i) {
if (!set) {
v.clearDoc(i);
} else if (v.hasMultiValue()) {
@@ -337,9 +373,11 @@ BitVectorTest::populateAll(FloatingPointAttribute &v, uint32_t low, uint32_t hig
template <>
void
-BitVectorTest::populateAll(StringAttribute &v, uint32_t low, uint32_t high, bool set)
+BitVectorTest::populateAll(StringAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
{
- for (size_t i(low), m(high); i < m; ++i) {
+ for(size_t i(low), m(high); i < m; ++i) {
if (!set) {
v.clearDoc(i);
} else if (v.hasMultiValue()) {
@@ -378,7 +416,8 @@ BitVectorTest::checkSearch(AttributePtr v,
assert(!checkStride || (docId % 5) == 2u);
sb->unpack(docId);
EXPECT_EQUAL(md.getDocId(), docId);
- if (v->getCollectionType() == CollectionType::SINGLE || !weights) {
+ if (v->getCollectionType() == CollectionType::SINGLE ||
+ !weights) {
EXPECT_EQUAL(1, md.getWeight());
} else if (v->getCollectionType() == CollectionType::ARRAY) {
EXPECT_EQUAL(2, md.getWeight());
@@ -417,10 +456,15 @@ BitVectorTest::checkSearch(AttributePtr v,
template <typename VectorType, typename BufferType>
void
-BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pref, bool fastSearch, bool filter)
+BitVectorTest::test(BasicType bt,
+ CollectionType ct,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableOnlyBitVector,
+ bool filter)
{
Config cfg(bt, ct);
- AttributePtr v = make(cfg, pref, fastSearch, filter);
+ AttributePtr v = make(cfg, pref, fastSearch, enableOnlyBitVector, filter);
addDocs(v, 1024);
auto &tv = as<VectorType>(v);
populate(tv, 2, 1023, true);
@@ -428,7 +472,7 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre
SearchContextPtr sc = getSearch<VectorType>(tv, true);
checkSearch(v, std::move(sc), 2, 1022, 205, !fastSearch && !filter, true);
sc = getSearch<VectorType>(tv, false);
- checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true);
const search::IDocumentWeightAttribute *dwa = v->asDocumentWeightAttribute();
if (dwa != nullptr) {
search::IDocumentWeightAttribute::LookupResult lres =
@@ -437,8 +481,8 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre
using SI = search::queryeval::SearchIterator;
TermFieldMatchData md;
SI::UP dwsi(new DWSI(md, *dwa, lres));
- if (!filter) {
- TEST_DO(checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true));
+ if (!enableOnlyBitVector) {
+ checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true);
} else {
dwsi->initRange(1, v->getCommittedDocIdLimit());
EXPECT_TRUE(dwsi->isAtEnd());
@@ -446,13 +490,13 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre
}
populate(tv, 2, 973, false);
sc = getSearch<VectorType>(tv, true);
- checkSearch(v, std::move(sc), 977, 1022, 10, !filter, true);
+ checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector &&!filter, true);
populate(tv, 2, 973, true);
sc = getSearch<VectorType>(tv, true);
checkSearch(v, std::move(sc), 2, 1022, 205, !fastSearch && !filter, true);
addDocs(v, 15000);
sc = getSearch<VectorType>(tv, true);
- checkSearch(v, std::move(sc), 2, 1022, 205, !filter, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && !filter, true);
populateAll(tv, 10, 15000, true);
sc = getSearch<VectorType>(tv, true);
checkSearch(v, std::move(sc), 2, 14999, 14992, !fastSearch && !filter, false);
@@ -464,65 +508,85 @@ void
BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pref)
{
LOG(info, "test run, pref is %s", pref.c_str());
- test<VectorType, BufferType>(bt, ct, pref, false, false);
- test<VectorType, BufferType>(bt, ct, pref, false, true);
- test<VectorType, BufferType>(bt, ct, pref, true, false);
- test<VectorType, BufferType>(bt, ct, pref, true, true);
+ test<VectorType, BufferType>(bt, ct, pref, false, false, false);
+ test<VectorType, BufferType>(bt, ct, pref, false, false, true);
+ test<VectorType, BufferType>(bt, ct, pref, true, false, false);
+ test<VectorType, BufferType>(bt, ct, pref, true, false, true);
+ test<VectorType, BufferType>(bt, ct, pref, true, true, false);
+ test<VectorType, BufferType>(bt, ct, pref, true, true, true);
}
TEST_F("Test bitvectors with single value int32", BitVectorTest)
{
f.template test<IntegerAttribute,
- IntegerAttribute::largeint_t>(BasicType::INT32, CollectionType::SINGLE, "int32_sv");
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::SINGLE,
+ "int32_sv");
}
TEST_F("Test bitvectors with array value int32", BitVectorTest)
{
f.template test<IntegerAttribute,
- IntegerAttribute::largeint_t>(BasicType::INT32, CollectionType::ARRAY, "int32_a");
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::ARRAY,
+ "int32_a");
}
TEST_F("Test bitvectors with weighted set value int32", BitVectorTest)
{
f.template test<IntegerAttribute,
- IntegerAttribute::WeightedInt>(BasicType::INT32, CollectionType::WSET, "int32_sv");
+ IntegerAttribute::WeightedInt>(BasicType::INT32,
+ CollectionType::WSET,
+ "int32_sv");
}
TEST_F("Test bitvectors with single value double", BitVectorTest)
{
f.template test<FloatingPointAttribute,
- double>(BasicType::DOUBLE, CollectionType::SINGLE, "double_sv");
+ double>(BasicType::DOUBLE,
+ CollectionType::SINGLE,
+ "double_sv");
}
TEST_F("Test bitvectors with array value double", BitVectorTest)
{
f.template test<FloatingPointAttribute,
- double>(BasicType::DOUBLE, CollectionType::ARRAY, "double_a");
+ double>(BasicType::DOUBLE,
+ CollectionType::ARRAY,
+ "double_a");
}
TEST_F("Test bitvectors with weighted set value double", BitVectorTest)
{
f.template test<FloatingPointAttribute,
- FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE, CollectionType::WSET, "double_ws");
+ FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE,
+ CollectionType::WSET,
+ "double_ws");
}
TEST_F("Test bitvectors with single value string", BitVectorTest)
{
f.template test<StringAttribute,
- vespalib::string>(BasicType::STRING, CollectionType::SINGLE, "string_sv");
+ vespalib::string>(BasicType::STRING,
+ CollectionType::SINGLE,
+ "string_sv");
}
TEST_F("Test bitvectors with array value string", BitVectorTest)
{
f.template test<StringAttribute,
- vespalib::string>(BasicType::STRING, CollectionType::ARRAY, "string_a");
+ vespalib::string>(BasicType::STRING,
+ CollectionType::ARRAY,
+ "string_a");
}
TEST_F("Test bitvectors with weighted set value string", BitVectorTest)
{
f.template test<StringAttribute,
- StringAttribute::WeightedString>(BasicType::STRING, CollectionType::WSET, "string_ws");
+ StringAttribute::WeightedString>(BasicType::STRING,
+ CollectionType::WSET,
+ "string_ws");
}
@@ -569,4 +633,5 @@ TEST("Test that bitvector iterators adheres to SearchIterator requirements") {
}
}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp b/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp
index 79ef6e42bb2..c5d70109015 100644
--- a/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp
+++ b/searchlib/src/tests/attribute/compaction/attribute_compaction_test.cpp
@@ -123,7 +123,7 @@ void hammerAttribute(AttributePtr &v, DocIdRange range, uint32_t count)
Config compactAddressSpaceAttributeConfig(bool enableAddressSpaceCompact)
{
Config cfg(BasicType::INT8, CollectionType::ARRAY);
- cfg.setCompactionStrategy({ 1.0f, (enableAddressSpaceCompact ? 0.2f : 1.0f) });
+ cfg.setCompactionStrategy({ 1.0, (enableAddressSpaceCompact ? 0.2 : 1.0) });
return cfg;
}
diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
index 227dbfadbc0..57029f92111 100644
--- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
+++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
@@ -4,7 +4,9 @@
#include <vespa/searchcommon/attribute/status.h>
#include <vespa/searchlib/attribute/postingstore.h>
#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/vespalib/btree/btreerootbase.hpp>
+#include <vespa/vespalib/btree/btreeroot.hpp>
#include <vespa/searchlib/attribute/postingstore.hpp>
#include <vespa/vespalib/datastore/buffer_type.hpp>
#include <vespa/vespalib/gtest/gtest.h>
@@ -40,7 +42,7 @@ std::ostream& operator<<(std::ostream& os, const PostingStoreSetup setup)
Config make_config(PostingStoreSetup param) {
Config cfg;
- cfg.setIsFilter(param.enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(param.enable_only_bitvector);
return cfg;
}
@@ -210,7 +212,8 @@ PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length)
EXPECT_EQ(make_exp_sequence(4, 4 + sequence_length), get_sequence(ref1));
EXPECT_EQ(make_exp_sequence(5, 5 + sequence_length), get_sequence(ref2));
auto usage_after = store.getMemoryUsage();
- if ((sequence_length < huge_sequence_length) || !_config.getIsFilter()) {
+ if (sequence_length < huge_sequence_length ||
+ !_config.getEnableOnlyBitVector()) {
EXPECT_GT(usage_before.deadBytes(), usage_after.deadBytes());
} else {
EXPECT_EQ(usage_before.deadBytes(), usage_after.deadBytes());
diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
index 25de1105973..00e2a82d24e 100644
--- a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
+++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
@@ -471,7 +471,7 @@ PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::ve
}
EXPECT_EQ(doc, docEnd);
} else {
- EXPECT_TRUE(has_bitvector && vec.getIsFilter());
+ EXPECT_TRUE(has_bitvector && vec.getEnableOnlyBitVector());
numHits = postingList.getBitVectorEntry(find_result.second)->_bv->reader().countTrueBits();
}
if (has_bitvector) {
@@ -612,21 +612,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n
{
Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("sint32", cfg);
testPostingList<Int32PostingListAttribute>(ptr1, numDocs, values);
}
{
Config cfg(Config(BasicType::INT32, CollectionType::ARRAY));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("aint32", cfg);
testPostingList<Int32ArrayPostingListAttribute>(ptr1, numDocs, values);
}
{
Config cfg(Config(BasicType::INT32, CollectionType::WSET));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("wsint32", cfg);
testPostingList<Int32WsetPostingListAttribute>(ptr1, numDocs, values);
}
@@ -640,21 +640,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n
{
Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("sfloat", cfg);
testPostingList<FloatPostingListAttribute>(ptr1, numDocs, values);
}
{
Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("afloat", cfg);
testPostingList<FloatArrayPostingListAttribute>(ptr1, numDocs, values);
}
{
Config cfg(Config(BasicType::FLOAT, CollectionType::WSET));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("wsfloat", cfg);
testPostingList<FloatWsetPostingListAttribute>(ptr1, numDocs, values);
}
@@ -674,21 +674,21 @@ PostingListAttributeTest::testPostingList(bool enable_only_bitvector, uint32_t n
{
Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("sstr", cfg);
testPostingList<StringPostingListAttribute>(ptr1, numDocs, charValues);
}
{
Config cfg(Config(BasicType::STRING, CollectionType::ARRAY));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("astr", cfg);
testPostingList<StringArrayPostingListAttribute>(ptr1, numDocs, charValues);
}
{
Config cfg(Config(BasicType::STRING, CollectionType::WSET));
cfg.setFastSearch(true);
- cfg.setIsFilter(enable_only_bitvector);
+ cfg.setEnableOnlyBitVector(enable_only_bitvector);
AttributePtr ptr1 = create_attribute("wsstr", cfg);
testPostingList<StringWsetPostingListAttribute>(ptr1, numDocs, charValues);
}
diff --git a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
index e356187a19f..07b64864d9a 100644
--- a/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
+++ b/searchlib/src/tests/attribute/reference_attribute/reference_attribute_test.cpp
@@ -13,9 +13,9 @@
#include <vespa/searchlib/test/mock_gid_to_lid_mapping.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/vespalib/gtest/gtest.h>
-#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/test/insertion_operators.h>
#include <cinttypes>
+#include <filesystem>
#include <vespa/log/log.h>
LOG_SETUP("reference_attribute_test");
@@ -312,8 +312,8 @@ TEST_F(ReferenceAttributeTest, attribute_can_be_saved_and_loaded)
assertRef(doc1, 1);
assertRef(doc2, 2);
assertRef(doc1, 4);
- EXPECT_TRUE(vespalib::unlink("test.dat"));
- EXPECT_TRUE(vespalib::unlink("test.udat"));
+ EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.dat")));
+ EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.udat")));
}
TEST_F(ReferenceAttributeTest, update_uses_gid_mapper_to_set_target_lid)
@@ -399,8 +399,8 @@ TEST_F(ReferenceAttributeTest, populateTargetLids_uses_gid_mapper_to_update_lid_
save();
load();
checkPopulateTargetLids(*this);
- EXPECT_TRUE(vespalib::unlink("test.dat"));
- EXPECT_TRUE(vespalib::unlink("test.udat"));
+ EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.dat")));
+ EXPECT_TRUE(std::filesystem::remove(std::filesystem::path("test.udat")));
}
TEST_F(ReferenceAttributeTest, populateTargetLids_handles_removes)
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
index 9cfd5946dbb..2f3684874ee 100644
--- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
+++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
@@ -510,8 +510,8 @@ TEST("require that single weighted set turns filter on filter fields") {
SimpleStringTerm node("foo", "", 0, Weight(1));
Result result = do_search(attribute_manager, node, strict);
EXPECT_EQUAL(3u, result.est_hits);
- EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") != vespalib::string::npos);
- EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") == vespalib::string::npos);
+ EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos);
+ EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") != vespalib::string::npos);
ASSERT_EQUAL(3u, result.hits.size());
EXPECT_FALSE(result.est_empty);
EXPECT_EQUAL(20u, result.hits[0].docid);
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
index ca8eaa176a4..8acb39853e9 100644
--- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
@@ -383,12 +383,6 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
fic.dump(ib);
ib.close();
- vespalib::string tsName = dump2dir + "/.teststamp";
- using FileKit = search::FileKit;
- ASSERT_TRUE(FileKit::createStamp(tsName));
- ASSERT_TRUE(FileKit::hasStamp(tsName));
- ASSERT_TRUE(FileKit::removeStamp(tsName));
- ASSERT_FALSE(FileKit::hasStamp(tsName));
vespalib::ThreadStackExecutor executor(4);
do {
diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
index 5370eff78cf..1d7bd9b8504 100644
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
@@ -204,7 +204,7 @@ TEST("test that DirectIOPadding works accordng to spec") {
EXPECT_EQUAL(1u, padAfter);
EXPECT_TRUE(file.Close());
- FastOS_File::Delete(file.GetFileName());
+ std::filesystem::remove(std::filesystem::path(file.GetFileName()));
}
#endif
@@ -748,23 +748,6 @@ TEST("testWriteRead") {
std::filesystem::remove_all(std::filesystem::path("empty"));
}
-TEST("requireThatSyncTokenIsUpdatedAfterFlush") {
-#if 0
- std::string file = "sync.dat";
- FastOS_File::Delete(file.c_str());
- {
- vespalib::DataBuffer buf;
- SimpleDataStore store(file);
- EXPECT_EQUAL(0u, store.lastSyncToken());
- makeData(buf, 10);
- store.write(0, buf, 10);
- store.flush(4);
- EXPECT_EQUAL(4u, store.lastSyncToken());
- }
- FastOS_File::Delete(file.c_str());
-#endif
-}
-
TEST("requireThatFlushTimeIsAvailableAfterFlush") {
DirectoryHandler testDir("flushtime");
vespalib::system_time before(vespalib::system_clock::now());
@@ -1022,7 +1005,7 @@ TEST_F("require that lid space can be increased after being compacted and then s
TEST_F("require that there is control of static memory usage", Fixture)
{
vespalib::MemoryUsage usage = f.store.getMemoryUsage();
- EXPECT_EQUAL(520u + sizeof(LogDataStore::NameIdSet) + sizeof(std::mutex), sizeof(LogDataStore));
+ EXPECT_EQUAL(536u + sizeof(LogDataStore::NameIdSet) + sizeof(std::mutex), sizeof(LogDataStore));
EXPECT_EQUAL(74108u, usage.allocatedBytes());
EXPECT_EQUAL(384u, usage.usedBytes());
}
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index dc64c3328e4..c22d3b3abb8 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -67,6 +67,7 @@ using search::attribute::WeightedEnumContent;
using search::attribute::test::AttributeBuilder;
using search::common::GeoLocation;
using search::common::GeoLocationSpec;
+using vespalib::eval::ValueType;
using AttributePtr = AttributeVector::SP;
using AVC = search::attribute::Config;
@@ -391,6 +392,14 @@ Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env)
avs.push_back(AttributeFactory::createAttribute("sbool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 14
avs.push_back(AttributeFactory::createAttribute("sebool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 15
avs.push_back(AttributeFactory::createAttribute("sdouble", AVC(AVBT::DOUBLE, AVCT::SINGLE))); // 16
+ {
+ AVC cfg(AVBT::TENSOR, AVCT::SINGLE);
+ cfg.setTensorType(ValueType::from_spec("tensor(x[2])"));
+ avs.push_back(AttributeFactory::createAttribute("tensor", cfg));
+ }
+ avs.push_back(AttributeFactory::createAttribute("predicate", AVC(AVBT::PREDICATE, AVCT::SINGLE))); // 18
+ avs.push_back(AttributeFactory::createAttribute("reference", AVC(AVBT::REFERENCE, AVCT::SINGLE))); // 19
+ avs.push_back(AttributeFactory::createAttribute("raw", AVC(AVBT::RAW, AVCT::SINGLE))); // 20
// simulate a unique only attribute as specified in sd
AVC cfg(AVBT::INT32, AVCT::SINGLE);
@@ -417,7 +426,11 @@ Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env)
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sdouble")
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sbyte")
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sbool")
- .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sebool");
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sebool")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw");
}
for (const auto & attr : avs) {
@@ -1499,6 +1512,10 @@ Test::testMatch()
ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint");
ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "tensor");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "predicate");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "reference");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "raw");
FtIndexEnvironment idx_env;
idx_env.getBuilder()
@@ -1507,7 +1524,11 @@ Test::testMatch()
.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz")
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint")
.addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint")
- .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw");
StringList params, in, out;
FT_SETUP_OK(pt, params, in, out.add("score").add("totalWeight"));
diff --git a/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp
index dc9c68c4539..986848c39b7 100644
--- a/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp
+++ b/searchlib/src/tests/searchcommon/attribute/config/attribute_config_test.cpp
@@ -31,6 +31,7 @@ TEST_F("test default attribute config", Fixture)
EXPECT_EQUAL(CollectionType::Type::SINGLE,
f._config.collectionType().type());
EXPECT_TRUE(!f._config.fastSearch());
+ EXPECT_TRUE(!f._config.getEnableOnlyBitVector());
EXPECT_TRUE(!f._config.getIsFilter());
EXPECT_TRUE(!f._config.fastAccess());
EXPECT_TRUE(f._config.tensorType().is_error());
@@ -42,6 +43,7 @@ TEST_F("test integer weightedset attribute config",
EXPECT_EQUAL(BasicType::Type::INT32, f._config.basicType().type());
EXPECT_EQUAL(CollectionType::Type::WSET, f._config.collectionType().type());
EXPECT_TRUE(!f._config.fastSearch());
+ EXPECT_TRUE(!f._config.getEnableOnlyBitVector());
EXPECT_TRUE(!f._config.getIsFilter());
EXPECT_TRUE(!f._config.fastAccess());
EXPECT_TRUE(f._config.tensorType().is_error());
diff --git a/searchlib/src/vespa/searchcommon/attribute/basictype.h b/searchlib/src/vespa/searchcommon/attribute/basictype.h
index 407348fea92..46387dd2738 100644
--- a/searchlib/src/vespa/searchcommon/attribute/basictype.h
+++ b/searchlib/src/vespa/searchcommon/attribute/basictype.h
@@ -9,7 +9,7 @@ namespace search::attribute {
class BasicType
{
public:
- enum Type : uint8_t {
+ enum Type {
NONE = 0,
STRING = 1,
BOOL = 2,
@@ -28,33 +28,33 @@ class BasicType
MAX_TYPE
};
- explicit BasicType(int t) noexcept : _type(Type(t)) { }
- explicit BasicType(unsigned int t) noexcept : _type(Type(t)) { }
- BasicType(Type t) noexcept : _type(t) { }
+ explicit BasicType(int t) : _type(Type(t)) { }
+ explicit BasicType(unsigned int t) : _type(Type(t)) { }
+ BasicType(Type t) : _type(t) { }
explicit BasicType(const vespalib::string & t) : _type(asType(t)) { }
- Type type() const noexcept { return _type; }
- const char * asString() const noexcept { return asString(_type); }
- size_t fixedSize() const noexcept { return fixedSize(_type); }
- static BasicType fromType(bool) noexcept { return BOOL; }
- static BasicType fromType(int8_t) noexcept { return INT8; }
- static BasicType fromType(int16_t) noexcept { return INT16; }
- static BasicType fromType(int32_t) noexcept { return INT32; }
- static BasicType fromType(int64_t) noexcept { return INT64; }
- static BasicType fromType(float) noexcept { return FLOAT; }
- static BasicType fromType(double) noexcept { return DOUBLE; }
- bool operator==(const BasicType &b) const noexcept { return _type == b._type; }
- bool operator!=(const BasicType &b) const noexcept { return _type != b._type; }
+ Type type() const { return _type; }
+ const char * asString() const { return asString(_type); }
+ size_t fixedSize() const { return fixedSize(_type); }
+ static BasicType fromType(bool) { return BOOL; }
+ static BasicType fromType(int8_t) { return INT8; }
+ static BasicType fromType(int16_t) { return INT16; }
+ static BasicType fromType(int32_t) { return INT32; }
+ static BasicType fromType(int64_t) { return INT64; }
+ static BasicType fromType(float) { return FLOAT; }
+ static BasicType fromType(double) { return DOUBLE; }
+ bool operator==(const BasicType &b) const { return _type == b._type; }
+ bool operator!=(const BasicType &b) const { return _type != b._type; }
private:
- static const char * asString(Type t) noexcept { return _typeTable[t]._name; }
- static size_t fixedSize(Type t) noexcept { return _typeTable[t]._fixedSize; }
+ static const char * asString(Type t) { return _typeTable[t]._name; }
+ static size_t fixedSize(Type t) { return _typeTable[t]._fixedSize; }
static Type asType(const vespalib::string & t);
Type _type;
struct TypeInfo {
- Type _type;
+ Type _type;
unsigned int _fixedSize;
const char * _name;
};
diff --git a/searchlib/src/vespa/searchcommon/attribute/collectiontype.h b/searchlib/src/vespa/searchcommon/attribute/collectiontype.h
index 05fad8cbc64..35cb7612ed0 100644
--- a/searchlib/src/vespa/searchcommon/attribute/collectiontype.h
+++ b/searchlib/src/vespa/searchcommon/attribute/collectiontype.h
@@ -9,7 +9,7 @@ namespace search::attribute {
class CollectionType
{
public:
- enum Type : uint8_t {
+ enum Type {
/**
* Single value type with one value stored for each document.
**/
@@ -26,30 +26,32 @@ class CollectionType
MAX_TYPE
};
- CollectionType(Type t = SINGLE, bool remove = false, bool create = false) noexcept
- : _type(t),
- _removeIfZero(remove),
- _createIfNonExistant(create)
- { }
+ CollectionType(Type t = SINGLE, bool remove = false, bool create = false) :
+ _type(t),
+ _removeIfZero(remove),
+ _createIfNonExistant(create)
+ {
+ }
explicit
- CollectionType(const vespalib::string & t, bool remove = false, bool create = false)
- : _type(asType(t)),
- _removeIfZero(remove),
- _createIfNonExistant(create)
- { }
+ CollectionType(const vespalib::string & t, bool remove = false, bool create = false) :
+ _type(asType(t)),
+ _removeIfZero(remove),
+ _createIfNonExistant(create)
+ {
+ }
- Type type() const noexcept { return _type; }
- bool isMultiValue() const noexcept { return _type != SINGLE; }
- bool isWeightedSet() const noexcept { return _type == WSET; }
- bool isArray() const noexcept { return _type == ARRAY; }
- bool removeIfZero() const noexcept { return _removeIfZero; }
- bool createIfNonExistant() const noexcept { return _createIfNonExistant; }
- const char * asString() const noexcept { return asString(_type); }
- void removeIfZero(bool newValue) noexcept { _removeIfZero = newValue; }
- void createIfNonExistant(bool newValue) noexcept { _createIfNonExistant = newValue; }
- bool operator!=(const CollectionType &b) const noexcept { return !(operator==(b)); }
- bool operator==(const CollectionType &b) const noexcept {
+ Type type() const { return _type; }
+ bool isMultiValue() const { return _type != SINGLE; }
+ bool isWeightedSet() const { return _type == WSET; }
+ bool isArray() const { return _type == ARRAY; }
+ bool removeIfZero() const { return _removeIfZero; }
+ bool createIfNonExistant() const { return _createIfNonExistant; }
+ const char * asString() const { return asString(_type); }
+ void removeIfZero(bool newValue) { _removeIfZero = newValue; }
+ void createIfNonExistant(bool newValue) { _createIfNonExistant = newValue; }
+ bool operator!=(const CollectionType &b) const { return !(operator==(b)); }
+ bool operator==(const CollectionType &b) const {
return _type == b._type &&
_removeIfZero == b._removeIfZero &&
_createIfNonExistant == b._createIfNonExistant;
@@ -61,12 +63,12 @@ class CollectionType
const char * _name;
};
- static const char * asString(Type t) noexcept { return _typeTable[t]._name; }
+ static const char * asString(Type t) { return _typeTable[t]._name; }
static Type asType(const vespalib::string &t);
- Type _type : 4;
- bool _removeIfZero : 1;
- bool _createIfNonExistant : 1;
+ Type _type;
+ bool _removeIfZero;
+ bool _createIfNonExistant;
static const TypeInfo _typeTable[MAX_TYPE];
};
diff --git a/searchlib/src/vespa/searchcommon/attribute/config.cpp b/searchlib/src/vespa/searchcommon/attribute/config.cpp
index 7c302a10731..91495025dee 100644
--- a/searchlib/src/vespa/searchcommon/attribute/config.cpp
+++ b/searchlib/src/vespa/searchcommon/attribute/config.cpp
@@ -19,18 +19,19 @@ Config::Config(BasicType bt, CollectionType ct, bool fastSearch_) noexcept
: _basicType(bt),
_type(ct),
_fastSearch(fastSearch_),
+ _enableOnlyBitVector(false),
_isFilter(false),
_fastAccess(false),
_mutable(false),
_paged(false),
- _distance_metric(DistanceMetric::Euclidean),
+ _maxUnCommittedMemory(MAX_UNCOMMITTED_MEMORY),
_match(Match::UNCASED),
_dictionary(),
- _maxUnCommittedMemory(MAX_UNCOMMITTED_MEMORY),
_growStrategy(),
_compactionStrategy(),
_predicateParams(),
_tensorType(vespalib::eval::ValueType::error_type()),
+ _distance_metric(DistanceMetric::Euclidean),
_hnsw_index_params()
{
}
@@ -42,11 +43,12 @@ Config & Config::operator = (Config &&) noexcept = default;
Config::~Config() = default;
bool
-Config::operator==(const Config &b) const noexcept
+Config::operator==(const Config &b) const
{
return _basicType == b._basicType &&
_type == b._type &&
_fastSearch == b._fastSearch &&
+ _enableOnlyBitVector == b._enableOnlyBitVector &&
_isFilter == b._isFilter &&
_fastAccess == b._fastAccess &&
_mutable == b._mutable &&
diff --git a/searchlib/src/vespa/searchcommon/attribute/config.h b/searchlib/src/vespa/searchcommon/attribute/config.h
index 17c762267cc..32cac7ec9d6 100644
--- a/searchlib/src/vespa/searchcommon/attribute/config.h
+++ b/searchlib/src/vespa/searchcommon/attribute/config.h
@@ -21,7 +21,7 @@ namespace search::attribute {
*/
class Config {
public:
- enum class Match : uint8_t { CASED, UNCASED };
+ enum class Match { CASED, UNCASED };
using CompactionStrategy = vespalib::datastore::CompactionStrategy;
Config() noexcept;
Config(BasicType bt) noexcept : Config(bt, CollectionType::SINGLE) { }
@@ -33,27 +33,29 @@ public:
Config & operator = (Config &&) noexcept;
~Config();
- BasicType basicType() const noexcept { return _basicType; }
- CollectionType collectionType() const noexcept { return _type; }
- bool fastSearch() const noexcept { return _fastSearch; }
- bool paged() const noexcept { return _paged; }
- const PredicateParams &predicateParams() const noexcept { return _predicateParams; }
- const vespalib::eval::ValueType & tensorType() const noexcept { return _tensorType; }
- DistanceMetric distance_metric() const noexcept { return _distance_metric; }
+ BasicType basicType() const { return _basicType; }
+ CollectionType collectionType() const { return _type; }
+ bool fastSearch() const { return _fastSearch; }
+ bool paged() const { return _paged; }
+ const PredicateParams &predicateParams() const { return _predicateParams; }
+ const vespalib::eval::ValueType & tensorType() const { return _tensorType; }
+ DistanceMetric distance_metric() const { return _distance_metric; }
const std::optional<HnswIndexParams>& hnsw_index_params() const { return _hnsw_index_params; }
/**
* Check if attribute posting list can consist of only a bitvector with
* no corresponding btree.
*/
- bool getIsFilter() const noexcept { return _isFilter; }
- bool isMutable() const noexcept { return _mutable; }
+ bool getEnableOnlyBitVector() const { return _enableOnlyBitVector; }
+
+ bool getIsFilter() const { return _isFilter; }
+ bool isMutable() const { return _mutable; }
/**
* Check if this attribute should be fast accessible at all times.
* If so, attribute is kept in memory also for non-searchable documents.
*/
- bool fastAccess() const noexcept { return _fastAccess; }
+ bool fastAccess() const { return _fastAccess; }
const GrowStrategy & getGrowStrategy() const { return _growStrategy; }
const CompactionStrategy &getCompactionStrategy() const { return _compactionStrategy; }
@@ -81,6 +83,14 @@ public:
* document frequency goes down, since recreated btree representation
* will then have lost weight information.
*/
+ Config & setEnableOnlyBitVector(bool enableOnlyBitVector) {
+ _enableOnlyBitVector = enableOnlyBitVector;
+ return *this;
+ }
+
+ /**
+ * Hide weight information when searching in attributes.
+ */
Config & setIsFilter(bool isFilter) { _isFilter = isFilter; return *this; }
Config & setMutable(bool isMutable) { _mutable = isMutable; return *this; }
Config & setPaged(bool paged_in) { _paged = paged_in; return *this; }
@@ -92,28 +102,29 @@ public:
}
Config & set_dictionary_config(const DictionaryConfig & cfg) { _dictionary = cfg; return *this; }
Config & set_match(Match match) { _match = match; return *this; }
- bool operator!=(const Config &b) const noexcept { return !(operator==(b)); }
- bool operator==(const Config &b) const noexcept ;
+ bool operator!=(const Config &b) const { return !(operator==(b)); }
+ bool operator==(const Config &b) const;
- uint64_t getMaxUnCommittedMemory() const noexcept { return _maxUnCommittedMemory; }
+ uint64_t getMaxUnCommittedMemory() const { return _maxUnCommittedMemory; }
Config & setMaxUnCommittedMemory(uint64_t value) { _maxUnCommittedMemory = value; return *this; }
private:
BasicType _basicType;
CollectionType _type;
- bool _fastSearch : 1;
- bool _isFilter : 1;
- bool _fastAccess : 1;
- bool _mutable : 1;
- bool _paged : 1;
- DistanceMetric _distance_metric;
+ bool _fastSearch;
+ bool _enableOnlyBitVector;
+ bool _isFilter;
+ bool _fastAccess;
+ bool _mutable;
+ bool _paged;
+ uint64_t _maxUnCommittedMemory;
Match _match;
DictionaryConfig _dictionary;
- uint64_t _maxUnCommittedMemory;
GrowStrategy _growStrategy;
CompactionStrategy _compactionStrategy;
PredicateParams _predicateParams;
vespalib::eval::ValueType _tensorType;
+ DistanceMetric _distance_metric;
std::optional<HnswIndexParams> _hnsw_index_params;
};
diff --git a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h
index 35f5fb4fe6b..9f9f45810b9 100644
--- a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h
+++ b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h
@@ -2,10 +2,8 @@
#pragma once
-#include <cstdint>
-
namespace search::attribute {
-enum DistanceMetric : uint8_t { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, Dotproduct };
+enum class DistanceMetric { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, Dotproduct };
}
diff --git a/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h b/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h
index 205a75c188f..d81eb9c5d3c 100644
--- a/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h
+++ b/searchlib/src/vespa/searchcommon/attribute/persistent_predicate_params.h
@@ -10,23 +10,24 @@ namespace search::attribute {
* Persistent parameters for predicate attributes.
*/
class PersistentPredicateParams {
+ uint32_t _arity;
int64_t _lower_bound;
int64_t _upper_bound;
- uint32_t _arity;
public:
- PersistentPredicateParams() noexcept
- : _lower_bound(std::numeric_limits<int64_t>::min()),
- _upper_bound(std::numeric_limits<int64_t>::max()),
- _arity(8)
- { }
- uint32_t arity() const noexcept { return _arity; }
- int64_t lower_bound() const noexcept { return _lower_bound; }
- int64_t upper_bound() const noexcept { return _upper_bound; }
- void setArity(uint32_t v) noexcept { _arity = v; }
- void setBounds(int64_t lower, int64_t upper) noexcept { _lower_bound = lower; _upper_bound = upper; }
+ PersistentPredicateParams()
+ : _arity(8),
+ _lower_bound(std::numeric_limits<int64_t>::min()),
+ _upper_bound(std::numeric_limits<int64_t>::max())
+ {
+ }
+ uint32_t arity() const { return _arity; }
+ int64_t lower_bound() const { return _lower_bound; }
+ int64_t upper_bound() const { return _upper_bound; }
+ void setArity(uint32_t v) { _arity = v; }
+ void setBounds(int64_t lower, int64_t upper) { _lower_bound = lower; _upper_bound = upper; }
- bool operator==(const PersistentPredicateParams &rhs) const noexcept {
+ bool operator==(const PersistentPredicateParams &rhs) const {
return ((_arity == rhs._arity) &&
(_lower_bound == rhs._lower_bound) &&
(_upper_bound == rhs._upper_bound));
diff --git a/searchlib/src/vespa/searchcommon/attribute/predicate_params.h b/searchlib/src/vespa/searchcommon/attribute/predicate_params.h
index 7e9258ab5db..133b7331689 100644
--- a/searchlib/src/vespa/searchcommon/attribute/predicate_params.h
+++ b/searchlib/src/vespa/searchcommon/attribute/predicate_params.h
@@ -11,16 +11,17 @@ namespace search::attribute {
*/
class PredicateParams : public PersistentPredicateParams
{
- float _dense_posting_list_threshold;
+ double _dense_posting_list_threshold;
public:
- PredicateParams() noexcept
+ PredicateParams()
: PersistentPredicateParams(),
_dense_posting_list_threshold(0.4)
- { }
+ {
+ }
- float dense_posting_list_threshold() const noexcept { return _dense_posting_list_threshold; }
- void setDensePostingListThreshold(float v) noexcept { _dense_posting_list_threshold = v; }
- bool operator==(const PredicateParams &rhs) const noexcept {
+ double dense_posting_list_threshold() const { return _dense_posting_list_threshold; }
+ void setDensePostingListThreshold(double v) { _dense_posting_list_threshold = v; }
+ bool operator==(const PredicateParams &rhs) const {
return (PersistentPredicateParams::operator==(rhs) &&
(_dense_posting_list_threshold == rhs._dense_posting_list_threshold));
}
diff --git a/searchlib/src/vespa/searchcommon/common/dictionary_config.h b/searchlib/src/vespa/searchcommon/common/dictionary_config.h
index f504439c5a3..f51341ad799 100644
--- a/searchlib/src/vespa/searchcommon/common/dictionary_config.h
+++ b/searchlib/src/vespa/searchcommon/common/dictionary_config.h
@@ -3,7 +3,6 @@
#pragma once
#include <iosfwd>
-#include <cstdint>
namespace search {
@@ -12,8 +11,8 @@ namespace search {
*/
class DictionaryConfig {
public:
- enum class Type : uint8_t { BTREE, HASH, BTREE_AND_HASH };
- enum class Match : uint8_t { CASED, UNCASED };
+ enum class Type { BTREE, HASH, BTREE_AND_HASH };
+ enum class Match { CASED, UNCASED };
DictionaryConfig() noexcept : _type(Type::BTREE), _match(Match::UNCASED) {}
DictionaryConfig(Type type) noexcept : _type(type), _match(Match::UNCASED) {}
DictionaryConfig(Type type, Match match) noexcept : _type(type), _match(match) {}
@@ -21,8 +20,8 @@ public:
Match getMatch() const { return _match; }
bool operator == (const DictionaryConfig & b) const { return (_type == b._type) && (_match == b._match); }
private:
- Type _type : 4;
- Match _match : 4;
+ Type _type;
+ Match _match;
};
std::ostream& operator<<(std::ostream& os, const DictionaryConfig & cfg);
diff --git a/searchlib/src/vespa/searchcommon/common/growstrategy.h b/searchlib/src/vespa/searchcommon/common/growstrategy.h
index 86750eafbfc..8766989ded0 100644
--- a/searchlib/src/vespa/searchcommon/common/growstrategy.h
+++ b/searchlib/src/vespa/searchcommon/common/growstrategy.h
@@ -23,17 +23,17 @@ public:
{
}
- static GrowStrategy make(uint32_t docsInitialCapacity, float docsGrowFactor, uint32_t docsGrowDelta) noexcept {
+ static GrowStrategy make(uint32_t docsInitialCapacity, float docsGrowFactor, uint32_t docsGrowDelta) {
return {docsInitialCapacity, docsGrowFactor, docsGrowDelta, 0, 0.2};
}
- float getMultiValueAllocGrowFactor() const noexcept { return _multiValueAllocGrowFactor; }
+ float getMultiValueAllocGrowFactor() const { return _multiValueAllocGrowFactor; }
- bool operator==(const GrowStrategy & rhs) const noexcept {
+ bool operator==(const GrowStrategy & rhs) const {
return vespalib::GrowStrategy::operator==(rhs) &&
(_multiValueAllocGrowFactor == rhs._multiValueAllocGrowFactor);
}
- bool operator!=(const GrowStrategy & rhs) const noexcept {
+ bool operator!=(const GrowStrategy & rhs) const {
return !(operator==(rhs));
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 399c0266ec9..ba791444dea 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -130,10 +130,26 @@ private:
public:
AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute,
- const string &query_stack, const SearchContextParams &params);
+ const string &query_stack, const SearchContextParams &params)
+ : AttributeFieldBlueprint(field, attribute, QueryTermDecoder::decodeTerm(query_stack), params)
+ { }
AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute,
- QueryTermSimple::UP term, const SearchContextParams &params);
- ~AttributeFieldBlueprint() override;
+ QueryTermSimple::UP term, const SearchContextParams &params)
+ : SimpleLeafBlueprint(field),
+ _attr(attribute),
+ _query_term(term->getTermString()),
+ _search_context(attribute.createSearchContext(std::move(term), params)),
+ _type(OTHER)
+ {
+ uint32_t estHits = _search_context->approximateHits();
+ HitEstimate estimate(estHits, estHits == 0);
+ setEstimate(estimate);
+ if (attribute.isFloatingPointType()) {
+ _type = FLOAT;
+ } else if (attribute.isIntegerType()) {
+ _type = INT;
+ }
+ }
SearchIteratorUP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override {
assert(tfmda.size() == 1);
@@ -165,30 +181,6 @@ public:
bool getRange(vespalib::string &from, vespalib::string &to) const override;
};
-AttributeFieldBlueprint::~AttributeFieldBlueprint() = default;
-
-AttributeFieldBlueprint::AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute,
- const string &query_stack, const SearchContextParams &params)
- : AttributeFieldBlueprint(field, attribute, QueryTermDecoder::decodeTerm(query_stack), params)
-{ }
-AttributeFieldBlueprint::AttributeFieldBlueprint(const FieldSpec &field, const IAttributeVector &attribute,
- QueryTermSimple::UP term, const SearchContextParams &params)
- : SimpleLeafBlueprint(field),
- _attr(attribute),
- _query_term(term->getTermString()),
- _search_context(attribute.createSearchContext(std::move(term), params)),
- _type(OTHER)
-{
- uint32_t estHits = _search_context->approximateHits();
- HitEstimate estimate(estHits, estHits == 0);
- setEstimate(estimate);
- if (attribute.isFloatingPointType()) {
- _type = FLOAT;
- } else if (attribute.isIntegerType()) {
- _type = INT;
- }
-}
-
vespalib::string
get_type(const IAttributeVector& attr)
{
@@ -637,11 +629,7 @@ public:
return bitvector_iterator;
}
}
- if (_attr.has_weight_iterator(_dict_entry.posting_idx)) {
- return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry);
- } else {
- return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict);
- }
+ return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry);
}
SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const override {
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index d606daaa3e0..f4ab447ed51 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -125,8 +125,7 @@ bool AttributeVector::hasArrayType() const { return _config->collectionType().is
bool AttributeVector::getIsFilter() const { return _config->getIsFilter(); }
bool AttributeVector::getIsFastSearch() const { return _config->fastSearch(); }
bool AttributeVector::isMutable() const { return _config->isMutable(); }
-attribute::BasicType::Type AttributeVector::getBasicType() const { return _config->basicType().type(); }
-attribute::CollectionType::Type AttributeVector::getCollectionType() const { return _config->collectionType().type(); }
+bool AttributeVector::getEnableOnlyBitVector() const { return _config->getEnableOnlyBitVector(); }
bool
AttributeVector::isEnumerated(const vespalib::GenericHeader &header)
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index 68dfe52643f..e3a7fdeb2c3 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -288,6 +288,7 @@ public:
bool getIsFilter() const override final;
bool getIsFastSearch() const override final;
bool isMutable() const;
+ bool getEnableOnlyBitVector() const;
const Config &getConfig() const noexcept { return *_config; }
void update_config(const Config& cfg);
@@ -319,8 +320,8 @@ public:
AddressSpaceUsage getAddressSpaceUsage() const;
- BasicType::Type getBasicType() const override final;
- CollectionType::Type getCollectionType() const override final;
+ BasicType::Type getBasicType() const override final { return getInternalBasicType().type(); }
+ CollectionType::Type getCollectionType() const override final { return getInternalCollectionType().type(); }
uint32_t getCommittedDocIdLimit() const override final { return _committedDocIdLimit.load(std::memory_order_acquire); }
bool isImported() const override;
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
index 4e300fe3800..7f04efd940b 100644
--- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
@@ -104,6 +104,7 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg)
Config retval(bType, cType);
PredicateParams predicateParams;
retval.setFastSearch(cfg.fastsearch);
+ retval.setEnableOnlyBitVector(cfg.enableonlybitvector);
retval.setIsFilter(cfg.enableonlybitvector);
retval.setFastAccess(cfg.fastaccess);
retval.setMutable(cfg.ismutable);
diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h
index d6499708b76..be36bcd185a 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h
@@ -43,7 +43,6 @@ struct IDocumentWeightAttribute
virtual void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const = 0;
virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const = 0;
virtual DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const = 0;
- virtual bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept = 0;
virtual std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const = 0;
virtual ~IDocumentWeightAttribute() = default;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
index f45ba3c8773..71c50ccb270 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
@@ -41,7 +41,6 @@ private:
void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override;
DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override;
std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override;
- bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override;
};
DocumentWeightAttributeAdapter _document_weight_attribute_adapter;
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
index 89ef0a7d8a0..1009fa2fb5f 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
@@ -146,17 +146,12 @@ MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::make_bi
}
template <typename B, typename M>
-bool
-MultiValueNumericPostingAttribute<B, M>::DocumentWeightAttributeAdapter::has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept
-{
- return self.getPostingList().has_btree(idx);
-}
-
-template <typename B, typename M>
const IDocumentWeightAttribute *
MultiValueNumericPostingAttribute<B, M>::asDocumentWeightAttribute() const
{
- if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) {
+ if (this->hasWeightedSetType() &&
+ this->getBasicType() == AttributeVector::BasicType::INT64 &&
+ !this->getConfig().getIsFilter()) {
return &_document_weight_attribute_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
index 5c4d97660f6..b25c31a7dea 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
@@ -39,7 +39,6 @@ private:
void create(vespalib::datastore::EntryRef idx, std::vector<DocumentWeightIterator> &dst) const override;
DocumentWeightIterator create(vespalib::datastore::EntryRef idx) const override;
std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override;
- bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override;
};
DocumentWeightAttributeAdapter _document_weight_attribute_adapter;
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index 3042a9d0bb9..19840b5a474 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -159,13 +159,6 @@ MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::create(
}
template <typename B, typename M>
-bool
-MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept
-{
- return self.getPostingList().has_btree(idx);
-}
-
-template <typename B, typename M>
std::unique_ptr<queryeval::SearchIterator>
MultiValueStringPostingAttributeT<B, M>::DocumentWeightAttributeAdapter::make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const
{
@@ -176,7 +169,9 @@ template <typename B, typename T>
const IDocumentWeightAttribute *
MultiValueStringPostingAttributeT<B, T>::asDocumentWeightAttribute() const
{
- if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::STRING)) {
+ if (this->hasWeightedSetType() &&
+ this->getBasicType() == AttributeVector::BasicType::STRING &&
+ !this->getConfig().getIsFilter()) {
return &_document_weight_attribute_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index 725491c4702..d32d8cde7ea 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -154,7 +154,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict)
DocIt postings;
vespalib::ConstArrayRef<Posting> array = _merger.getArray();
postings.set(&array[0], &array[array.size()]);
- if (_postingList.isFilter()) {
+ if (_postingList._isFilter) {
return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, postings);
} else {
return std::make_unique<AttributePostingListIteratorT<DocIt>>(_baseSearchCtx, _hasWeight, matchData, postings);
@@ -182,7 +182,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict)
DocIt postings;
const Posting *array = postingList.getKeyDataEntry(_pidx, clusterSize);
postings.set(array, array + clusterSize);
- if (postingList.isFilter()) {
+ if (postingList._isFilter) {
return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, postings);
} else {
return std::make_unique<AttributePostingListIteratorT<DocIt>>(_baseSearchCtx, _hasWeight, matchData, postings);
@@ -191,7 +191,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict)
typename PostingList::BTreeType::FrozenView frozen(_frozenRoot, postingList.getAllocator());
using DocIt = typename PostingList::ConstIterator;
- if (_postingList.isFilter()) {
+ if (_postingList._isFilter) {
return std::make_unique<FilterAttributePostingListIteratorT<DocIt>>(_baseSearchCtx, matchData, frozen.getRoot(), frozen.getAllocator());
} else {
return std::make_unique<AttributePostingListIteratorT<DocIt>> (_baseSearchCtx, _hasWeight, matchData, frozen.getRoot(), frozen.getAllocator());
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index 09af15e35d5..2703201b292 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -19,7 +19,9 @@ using vespalib::btree::BTreeNoLeafData;
using vespalib::datastore::EntryRefFilter;
PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &status, const Config &config)
- : _bvSize(64u),
+ : _enableOnlyBitVector(config.getEnableOnlyBitVector()),
+ _isFilter(config.getIsFilter()),
+ _bvSize(64u),
_bvCapacity(128u),
_minBvDocFreq(64),
_maxBvDocFreq(std::numeric_limits<uint32_t>::max()),
@@ -27,9 +29,9 @@ PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &s
_dictionary(dictionary),
_status(status),
_bvExtraBytes(0),
- _compaction_spec(),
- _isFilter(config.getIsFilter())
-{ }
+ _compaction_spec()
+{
+}
PostingStoreBase2::~PostingStoreBase2() = default;
@@ -58,7 +60,8 @@ PostingStoreBase2::resizeBitVectors(uint32_t newSize, uint32_t newCapacity)
template <typename DataT>
-PostingStore<DataT>::PostingStore(IEnumStoreDictionary& dictionary, Status &status, const Config &config)
+PostingStore<DataT>::PostingStore(IEnumStoreDictionary& dictionary, Status &status,
+ const Config &config)
: Parent(false),
PostingStoreBase2(dictionary, status, config),
_bvType(1, 1024u, RefType::offsetSize())
@@ -182,7 +185,8 @@ PostingStore<DataT>::applyNew(EntryRef &ref, AddIter a, AddIter ae)
template <typename DataT>
void
-PostingStore<DataT>::makeDegradedTree(EntryRef &ref, const BitVector &bv)
+PostingStore<DataT>::makeDegradedTree(EntryRef &ref,
+ const BitVector &bv)
{
assert(!ref.valid());
BTreeTypeRefPair tPair(allocBTree());
@@ -260,7 +264,7 @@ PostingStore<DataT>::makeBitVector(EntryRef &ref)
assert(bv.countTrueBits() == expDocFreq);
BitVectorRefPair bPair(allocBitVector());
BitVectorEntry *bve = bPair.data;
- if (isFilter()) {
+ if (_enableOnlyBitVector) {
BTreeType *tree = getWTreeEntry(iRef);
tree->clear(_allocator);
_store.hold_entry(ref);
@@ -297,7 +301,7 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref, AddIter aOrg, AddIter ae)
assert(bv.countTrueBits() == expDocFreq);
BitVectorRefPair bPair(allocBitVector());
BitVectorEntry *bve = bPair.data;
- if (!isFilter()) {
+ if (!_enableOnlyBitVector) {
applyNewTree(bve->_tree, aOrg, ae, CompareT());
}
bve->_bv = bvsp;
@@ -311,7 +315,11 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref, AddIter aOrg, AddIter ae)
template <typename DataT>
void
-PostingStore<DataT>::apply(BitVector &bv, AddIter a, AddIter ae, RemoveIter r, RemoveIter re)
+PostingStore<DataT>::apply(BitVector &bv,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re)
{
while (a != ae || r != re) {
if (r != re && (a == ae || *r < a->_key)) {
@@ -337,7 +345,11 @@ PostingStore<DataT>::apply(BitVector &bv, AddIter a, AddIter ae, RemoveIter r, R
template <typename DataT>
void
-PostingStore<DataT>::apply(EntryRef &ref, AddIter a, AddIter ae, RemoveIter r, RemoveIter re)
+PostingStore<DataT>::apply(EntryRef &ref,
+ AddIter a,
+ AddIter ae,
+ RemoveIter r,
+ RemoveIter re)
{
if (!ref.valid()) {
// No old data
@@ -494,9 +506,11 @@ PostingStore<DataT>::beginFrozen(const EntryRef ref) const
return ConstIterator(shortArray, clusterSize, _allocator, _aggrCalc);
}
+
template <typename DataT>
void
-PostingStore<DataT>::beginFrozen(const EntryRef ref, std::vector<ConstIterator> &where) const
+PostingStore<DataT>::beginFrozen(const EntryRef ref,
+ std::vector<ConstIterator> &where) const
{
if (!ref.valid()) {
where.emplace_back();
@@ -728,7 +742,8 @@ PostingStore<DataT>::compact_worst_buffers(CompactionSpec compaction_spec, const
filter.add_buffers(_bvType.get_active_buffers());
}
_dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs)
- { return move(refs); }, filter);
+ { return move(refs); },
+ filter);
compacting_buffers->finish();
}
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index 8c6ed3d9497..57f523acefe 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -27,7 +27,7 @@ public:
std::shared_ptr<GrowableBitVector> _bv; // bitvector
public:
- BitVectorEntry() noexcept
+ BitVectorEntry()
: _tree(),
_bv()
{ }
@@ -36,22 +36,25 @@ public:
class PostingStoreBase2
{
+public:
+ bool _enableOnlyBitVector;
+ bool _isFilter;
protected:
- static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u;
uint32_t _bvSize;
uint32_t _bvCapacity;
+public:
uint32_t _minBvDocFreq; // Less than this ==> destroy bv
uint32_t _maxBvDocFreq; // Greater than or equal to this ==> create bv
- std::set<uint32_t> _bvs; // Current bitvectors
- IEnumStoreDictionary& _dictionary;
- Status &_status;
- uint64_t _bvExtraBytes;
+protected:
+ std::set<uint32_t> _bvs; // Current bitvectors
+ IEnumStoreDictionary& _dictionary;
+ Status &_status;
+ uint64_t _bvExtraBytes;
PostingStoreCompactionSpec _compaction_spec;
-private:
- bool _isFilter;
+
+ static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u;
public:
- bool isFilter() const noexcept { return _isFilter; }
PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &status, const Config &config);
virtual ~PostingStoreBase2();
bool resizeBitVectors(uint32_t newSize, uint32_t newCapacity);
@@ -108,7 +111,7 @@ public:
bool removeSparseBitVectors() override;
void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs);
- static bool isBitVector(uint32_t typeId) noexcept { return typeId == BUFFERTYPE_BITVECTOR; }
+ static bool isBitVector(uint32_t typeId) { return typeId == BUFFERTYPE_BITVECTOR; }
void applyNew(EntryRef &ref, AddIter a, AddIter ae);
@@ -183,9 +186,6 @@ public:
BitVectorEntry *getWBitVectorEntry(RefType ref) {
return _store.template getEntry<BitVectorEntry>(ref);
}
- bool has_btree(const EntryRef ref) const noexcept {
- return !ref.valid() || !isBitVector(getTypeId(RefType(ref))) || !isFilter();
- }
std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(RefType ref, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const;
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index 6e56f5477c2..17a0e6256d4 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -41,7 +41,7 @@ StringSearchHelper::StringSearchHelper(StringSearchHelper&&) noexcept = default;
StringSearchHelper::~StringSearchHelper() = default;
bool
-StringSearchHelper::isMatch(const char *src) const noexcept {
+StringSearchHelper::isMatch(const char *src) const {
if (__builtin_expect(isRegex(), false)) {
return getRegex().valid() && getRegex().partial_match(std::string_view(src));
}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
index 0c52692ee04..7bfcf0e4292 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
@@ -22,7 +22,7 @@ public:
StringSearchHelper(const StringSearchHelper &) = delete;
StringSearchHelper & operator =(const StringSearchHelper &) = delete;
~StringSearchHelper();
- bool isMatch(const char *src) const noexcept;
+ bool isMatch(const char *src) const;
bool isPrefix() const noexcept { return _isPrefix; }
bool isRegex() const noexcept { return _isRegex; }
bool isCased() const noexcept { return _isCased; }
diff --git a/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp
index 60d04d7e3ad..42c9ed4e1c6 100644
--- a/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp
+++ b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp
@@ -4,8 +4,9 @@
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vespalib/util/guard.h>
-#include <cassert>
#include <algorithm>
+#include <cassert>
+#include <filesystem>
#include <vespa/log/log.h>
LOG_SETUP(".indexmetainfo");
@@ -305,7 +306,7 @@ IndexMetaInfo::save(const vespalib::string &baseName)
{
vespalib::string fileName = makeFileName(baseName);
vespalib::string newName = fileName + ".new";
- vespalib::unlink(newName);
+ std::filesystem::remove(std::filesystem::path(newName));
vespalib::FilePointer f(fopen(newName.c_str(), "w"));
if (!f.valid()) {
LOG(warning, "could not open file for writing: %s", newName.c_str());
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
index 4ae0ce2621a..f4d129bfc58 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
@@ -22,7 +22,7 @@ readHeader(vespalib::FileHeader &h,
const vespalib::string &name)
{
Fast_BufferedFile file(32_Ki);
- file.OpenReadOnly(name.c_str());
+ file.ReadOpenExisting(name.c_str());
h.readFile(file);
}
@@ -58,8 +58,7 @@ BitVectorFileWrite::open(const vespalib::string &name,
if (tuneFileWrite.getWantDirectIO()) {
_datFile->EnableDirectIO();
}
- // XXX no checking for success:
- _datFile->OpenWriteOnly(datname.c_str());
+ _datFile->WriteOpen(datname.c_str());
if (_datHeaderLen == 0) {
assert(_numKeys == 0);
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
index 6913c03262c..0caf89a0730 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
@@ -20,7 +20,7 @@ void
readHeader(vespalib::FileHeader &h, const vespalib::string &name)
{
Fast_BufferedFile file(32_Ki);
- file.OpenReadOnly(name.c_str());
+ file.ReadOpenExisting(name.c_str());
h.readFile(file);
}
@@ -66,8 +66,7 @@ BitVectorIdxFileWrite::open(const vespalib::string &name,
_idxFile->EnableDirectIO();
}
- // XXX no checking for success:
- _idxFile->OpenWriteOnly(idxname.c_str());
+ _idxFile->WriteOpen(idxname.c_str());
if (_idxHeaderLen == 0) {
assert(_numKeys == 0);
diff --git a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp
index 24d790afe74..fb1fe98aa88 100644
--- a/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/field_merger.cpp
@@ -482,10 +482,6 @@ FieldMerger::merge_field_start()
return;
}
- if (FileKit::hasStamp(_field_dir + "/.mergeocc_done")) {
- _state = State::MERGE_DONE;
- return;
- }
std::filesystem::create_directory(std::filesystem::path(_field_dir));
LOG(debug, "merge_field for field %s dir %s", _field_name.c_str(), _field_dir.c_str());
@@ -507,10 +503,6 @@ FieldMerger::merge_field_finish()
merge_postings_failed();
return;
}
- if (!FileKit::createStamp(_field_dir + "/.mergeocc_done")) {
- _failed = true;
- return;
- }
vespalib::File::sync(_field_dir);
if (!clean_tmp_dirs()) {
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
index 7f6f5c4ed15..6d849532931 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
@@ -5,8 +5,9 @@
#include "extposocc.h"
#include "pagedict4file.h"
#include <vespa/vespalib/util/error.h>
-#include <vespa/log/log.h>
+#include <filesystem>
+#include <vespa/log/log.h>
LOG_SETUP(".diskindex.fieldwriter");
using search::index::FieldLengthInfo;
@@ -184,7 +185,7 @@ FieldWriter::remove(const vespalib::string &prefix)
{
for (const char **j = termOccNames; *j != nullptr; ++j) {
vespalib::string tmpName = prefix + *j;
- FastOS_File::Delete(tmpName.c_str());
+ std::filesystem::remove(std::filesystem::path(tmpName));
}
}
diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
index 159af42635f..98541dba646 100644
--- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp
@@ -15,6 +15,7 @@
#include <vespa/vespalib/util/arrayqueue.hpp>
#include <vespa/vespalib/util/array.hpp>
#include <vespa/fastos/file.h>
+#include <filesystem>
#include <future>
#include <vespa/log/log.h>
@@ -141,23 +142,14 @@ verifyOrAssert(const TmpChunkMetaV & v)
}
}
-vespalib::string eraseErrorMsg(const vespalib::string & fileName, int error) {
- return make_string("Error erasing file '%s'. Error is '%s'",
- fileName.c_str(), getErrorString(error).c_str());
-}
-
}
void
FileChunk::erase()
{
_file.reset();
- if (!FastOS_File::Delete(_idxFileName.c_str()) && (errno != ENOENT)) {
- throw std::runtime_error(eraseErrorMsg(_idxFileName, errno));
- }
- if (!FastOS_File::Delete(_dataFileName.c_str()) && (errno != ENOENT)) {
- throw std::runtime_error(eraseErrorMsg(_dataFileName, errno));
- }
+ std::filesystem::remove(std::filesystem::path(_idxFileName));
+ std::filesystem::remove(std::filesystem::path(_dataFileName));
}
size_t
@@ -569,18 +561,14 @@ void
FileChunk::eraseIdxFile(const vespalib::string & name)
{
vespalib::string fileName(createIdxFileName(name));
- if ( ! FastOS_File::Delete(fileName.c_str())) {
- throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str()));
- }
+ std::filesystem::remove(std::filesystem::path(fileName));
}
void
FileChunk::eraseDatFile(const vespalib::string & name)
{
vespalib::string fileName(createDatFileName(name));
- if ( ! FastOS_File::Delete(fileName.c_str())) {
- throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str()));
- }
+ std::filesystem::remove(std::filesystem::path(fileName));
}
diff --git a/searchlib/src/vespa/searchlib/features/matchfeature.cpp b/searchlib/src/vespa/searchlib/features/matchfeature.cpp
index 53a7ce6e108..7a2148510d4 100644
--- a/searchlib/src/vespa/searchlib/features/matchfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/matchfeature.cpp
@@ -10,9 +10,34 @@
using namespace search::fef;
using CollectionType = FieldInfo::CollectionType;
+using DataType = FieldInfo::DataType;
namespace search::features {
+namespace {
+
+auto attribute_match_data_types = ParameterDataTypeSet::normalTypeSet();
+
+bool matchable_field(const FieldInfo& info)
+{
+ auto field_type = info.type();
+ if (field_type != FieldType::INDEX && field_type != FieldType::ATTRIBUTE) {
+ return false;
+ }
+ auto data_type = info.get_data_type();
+ if (data_type == DataType::TENSOR || data_type == DataType::RAW) {
+ // not matchable
+ return false;
+ }
+ if (field_type == FieldType::ATTRIBUTE && !attribute_match_data_types.allowedType(data_type)) {
+ // bad data type for attributeMatch feature
+ return false;
+ }
+ return true;
+}
+
+}
+
MatchExecutor::MatchExecutor(const MatchParams & params) :
FeatureExecutor(),
_params(params)
@@ -67,30 +92,28 @@ MatchBlueprint::setup(const IIndexEnvironment & env,
{
for (uint32_t i = 0; i < env.getNumFields(); ++i) {
const FieldInfo * info = env.getField(i);
- if (info->get_data_type() == FieldInfo::DataType::TENSOR) {
- // not matchable
+ if (!matchable_field(*info)) {
continue;
}
- if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) {
- _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name()));
- if (info->type() == FieldType::INDEX) {
- if (info->collection() == CollectionType::SINGLE) {
- defineInput("fieldMatch(" + info->name() + ")");
- } else {
- defineInput("elementCompleteness(" + info->name() + ")");
- }
- } else if (info->type() == FieldType::ATTRIBUTE) {
- defineInput("attributeMatch(" + info->name() + ")");
+ _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name()));
+ if (info->type() == FieldType::INDEX) {
+ if (info->collection() == CollectionType::SINGLE) {
+ defineInput("fieldMatch(" + info->name() + ")");
+ } else {
+ defineInput("elementCompleteness(" + info->name() + ")");
}
+ } else if (info->type() == FieldType::ATTRIBUTE) {
+ defineInput("attributeMatch(" + info->name() + ")");
}
}
describeOutput("score", "Normalized sum over all matched fields");
describeOutput("totalWeight", "Sum of rank weights for all matched fields");
for (uint32_t i = 0; i < env.getNumFields(); ++i) {
const FieldInfo * info = env.getField(i);
- if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) {
- describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'");
+ if (!matchable_field(*info)) {
+ continue;
}
+ describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'");
}
return true;
}
diff --git a/searchlib/src/vespa/searchlib/fef/properties.cpp b/searchlib/src/vespa/searchlib/fef/properties.cpp
index 2cc4e50b593..6f334630dc5 100644
--- a/searchlib/src/vespa/searchlib/fef/properties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/properties.cpp
@@ -11,7 +11,7 @@ const Property::Value Property::_emptyValue;
const Property::Values Property::_emptyValues;
const Property::Value &
-Property::getAt(uint32_t idx) const noexcept
+Property::getAt(uint32_t idx) const
{
if (idx < (*_values).size()) {
return (*_values)[idx];
@@ -22,7 +22,7 @@ Property::getAt(uint32_t idx) const noexcept
//-----------------------------------------------------------------------------
uint32_t
-Properties::rawHash(const void *buf, uint32_t len) noexcept
+Properties::rawHash(const void *buf, uint32_t len)
{
uint32_t res = 0;
unsigned const char *pt = (unsigned const char *) buf;
@@ -33,7 +33,7 @@ Properties::rawHash(const void *buf, uint32_t len) noexcept
return res;
}
-Properties::Properties() noexcept
+Properties::Properties()
: _numValues(0),
_data()
{
@@ -59,7 +59,7 @@ Properties::add(vespalib::stringref key, vespalib::stringref value)
}
uint32_t
-Properties::count(vespalib::stringref key) const noexcept
+Properties::count(vespalib::stringref key) const
{
if (!key.empty()) {
auto node = _data.find(key);
@@ -112,14 +112,14 @@ Properties::clear()
}
bool
-Properties::operator==(const Properties &rhs) const noexcept
+Properties::operator==(const Properties &rhs) const
{
return (_numValues == rhs._numValues &&
_data == rhs._data);
}
uint32_t
-Properties::hashCode() const noexcept
+Properties::hashCode() const
{
uint32_t hash = numKeys() + numValues();
for (const auto& elem : _data) {
@@ -159,7 +159,7 @@ Properties::visitNamespace(vespalib::stringref ns,
}
Property
-Properties::lookup(vespalib::stringref key) const noexcept
+Properties::lookup(vespalib::stringref key) const
{
if (key.empty()) {
return Property();
@@ -172,7 +172,7 @@ Properties::lookup(vespalib::stringref key) const noexcept
}
Property Properties::lookup(vespalib::stringref namespace1,
- vespalib::stringref key) const noexcept
+ vespalib::stringref key) const
{
if (namespace1.empty() || key.empty()) {
return Property();
@@ -184,7 +184,7 @@ Property Properties::lookup(vespalib::stringref namespace1,
Property Properties::lookup(vespalib::stringref namespace1,
vespalib::stringref namespace2,
- vespalib::stringref key) const noexcept
+ vespalib::stringref key) const
{
if (namespace1.empty() || namespace2.empty() || key.empty()) {
return Property();
@@ -197,7 +197,7 @@ Property Properties::lookup(vespalib::stringref namespace1,
Property Properties::lookup(vespalib::stringref namespace1,
vespalib::stringref namespace2,
vespalib::stringref namespace3,
- vespalib::stringref key) const noexcept
+ vespalib::stringref key) const
{
if (namespace1.empty() || namespace2.empty() || namespace3.empty() || key.empty()) {
return Property();
@@ -207,7 +207,7 @@ Property Properties::lookup(vespalib::stringref namespace1,
return lookup(fullKey);
}
-void Properties::swap(Properties & rhs) noexcept
+void Properties::swap(Properties & rhs)
{
_data.swap(rhs._data);
std::swap(_numValues, rhs._numValues);
diff --git a/searchlib/src/vespa/searchlib/fef/properties.h b/searchlib/src/vespa/searchlib/fef/properties.h
index 80e8c70939c..a6ae83b0339 100644
--- a/searchlib/src/vespa/searchlib/fef/properties.h
+++ b/searchlib/src/vespa/searchlib/fef/properties.h
@@ -37,7 +37,7 @@ private:
*
* @param values the values for this property
**/
- Property(const Values &values) noexcept : _values(&values) { }
+ Property(const Values &values) : _values(&values) { }
public:
/**
@@ -46,14 +46,14 @@ public:
* object on the stack in the application, and will also be used
* by the @ref Properties class when a lookup gives no results.
**/
- Property() noexcept : _values(&_emptyValues) { }
+ Property() : _values(&_emptyValues) { }
/**
* Check if we found what we were looking for or not.
*
* @return true if the key we looked up had at least one value
**/
- bool found() const noexcept {
+ bool found() const {
return !(*_values).empty();
}
@@ -63,7 +63,7 @@ public:
*
* @return first value for the looked up key, or ""
**/
- const Value &get() const noexcept {
+ const Value &get() const {
if ((*_values).empty()) {
return _emptyValue;
}
@@ -78,7 +78,7 @@ public:
* @return first value for the looked up key, or fallBack
* @param fallBack value to return if no values were found
**/
- const Value & get(const Value &fallBack) const noexcept {
+ const Value & get(const Value &fallBack) const {
if ((*_values).empty()) {
return fallBack;
}
@@ -90,7 +90,7 @@ public:
*
* @return number of values for this property
**/
- uint32_t size() const noexcept { return (*_values).size(); }
+ uint32_t size() const { return (*_values).size(); }
/**
* Obtain a specific value for the looked up key.
@@ -98,7 +98,7 @@ public:
* @return the requested value, or "" if idx was out of bounds
* @param idx the index of the value we want to access
**/
- const Value &getAt(uint32_t idx) const noexcept;
+ const Value &getAt(uint32_t idx) const;
};
//-----------------------------------------------------------------------------
@@ -127,7 +127,7 @@ public:
/**
* Virtual destructor to allow safe subclassing.
**/
- virtual ~IPropertiesVisitor() = default;
+ virtual ~IPropertiesVisitor() {}
};
//-----------------------------------------------------------------------------
@@ -156,7 +156,7 @@ private:
* @param buf data pointer
* @param len data length
**/
- static uint32_t rawHash(const void *buf, uint32_t len) noexcept;
+ static uint32_t rawHash(const void *buf, uint32_t len);
public:
using UP = std::unique_ptr<Properties>;
@@ -164,7 +164,7 @@ public:
/**
* Create an empty properties object.
**/
- Properties() noexcept;
+ Properties();
Properties(Properties &&) noexcept = default;
Properties & operator=(Properties &&) noexcept = default;
Properties(const Properties &);
@@ -192,7 +192,7 @@ public:
* @return number of values for the given key
* @param key the key
**/
- uint32_t count(vespalib::stringref key) const noexcept;
+ uint32_t count(vespalib::stringref key) const;
/**
* Remove all values for the given key.
@@ -226,14 +226,14 @@ public:
*
* @return number of keys
**/
- uint32_t numKeys() const noexcept { return _data.size(); }
+ uint32_t numKeys() const { return _data.size(); }
/**
* Obtain the total number of values stored in this object.
*
* @return number of values
**/
- uint32_t numValues() const noexcept { return _numValues; }
+ uint32_t numValues() const { return _numValues; }
/**
* Check if rhs contains the same key/value pairs as this
@@ -242,14 +242,14 @@ public:
*
* @return true if we are equal to rhs
**/
- bool operator==(const Properties &rhs) const noexcept;
+ bool operator==(const Properties &rhs) const;
/**
* Calculate a hash code for this object
*
* @return hash code for this object
**/
- uint32_t hashCode() const noexcept;
+ uint32_t hashCode() const;
/**
* Visit all key/value pairs
@@ -275,7 +275,7 @@ public:
* @return object encapsulating lookup result
* @param key the key to look up
**/
- Property lookup(vespalib::stringref key) const noexcept;
+ Property lookup(vespalib::stringref key) const;
/**
* Look up a key inside a namespace using the proposed namespace
@@ -289,7 +289,7 @@ public:
* @param key the key to look up
**/
Property lookup(vespalib::stringref namespace1,
- vespalib::stringref key) const noexcept;
+ vespalib::stringref key) const;
/**
* Look up a key inside a namespace using the proposed namespace
@@ -305,7 +305,7 @@ public:
**/
Property lookup(vespalib::stringref namespace1,
vespalib::stringref namespace2,
- vespalib::stringref key) const noexcept;
+ vespalib::stringref key) const;
/**
* Look up a key inside a namespace using the proposed namespace
@@ -323,13 +323,13 @@ public:
Property lookup(vespalib::stringref namespace1,
vespalib::stringref namespace2,
vespalib::stringref namespace3,
- vespalib::stringref key) const noexcept;
+ vespalib::stringref key) const;
- void swap(Properties & rhs) noexcept ;
+ void swap(Properties & rhs);
};
inline void
-swap(Properties & a, Properties & b) noexcept
+swap(Properties & a, Properties & b)
{
a.swap(b);
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index 3f6085ef7ff..488c58e3119 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -87,7 +87,7 @@ Blueprint::sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit)
return { uint32_t(std::min(sum, uint64_t(limit))), empty };
}
-Blueprint::State::State() noexcept
+Blueprint::State::State()
: _fields(),
_estimateHits(0),
_tree_size(1),
@@ -97,13 +97,13 @@ Blueprint::State::State() noexcept
_cost_tier(COST_TIER_NORMAL)
{}
-Blueprint::State::State(FieldSpecBase field) noexcept
+Blueprint::State::State(FieldSpecBase field)
: State()
{
_fields.add(field);
}
-Blueprint::State::State(FieldSpecBaseList fields_in) noexcept
+Blueprint::State::State(FieldSpecBaseList fields_in)
: _fields(std::move(fields_in)),
_estimateHits(0),
_tree_size(1),
@@ -116,7 +116,7 @@ Blueprint::State::State(FieldSpecBaseList fields_in) noexcept
Blueprint::State::~State() = default;
-Blueprint::Blueprint() noexcept
+Blueprint::Blueprint()
: _parent(0),
_sourceId(0xffffffff),
_docid_limit(0),
@@ -383,7 +383,7 @@ StateCache::notifyChange() {
IntermediateBlueprint::~IntermediateBlueprint() = default;
void
-IntermediateBlueprint::setDocIdLimit(uint32_t limit) noexcept
+IntermediateBlueprint::setDocIdLimit(uint32_t limit)
{
Blueprint::setDocIdLimit(limit);
for (Blueprint::UP &child : _children) {
@@ -576,7 +576,7 @@ IntermediateBlueprint::createSearch(fef::MatchData &md, bool strict) const
return createIntermediateSearch(std::move(subSearches), strict, md);
}
-IntermediateBlueprint::IntermediateBlueprint() noexcept = default;
+IntermediateBlueprint::IntermediateBlueprint() = default;
IntermediateBlueprint &
IntermediateBlueprint::addChild(Blueprint::UP child)
@@ -737,6 +737,13 @@ LeafBlueprint::optimize(Blueprint* &self)
}
void
+LeafBlueprint::setEstimate(HitEstimate est)
+{
+ _state.estimate(est);
+ notifyChange();
+}
+
+void
LeafBlueprint::set_cost_tier(uint32_t value)
{
assert(value < 0x100);
@@ -745,6 +752,13 @@ LeafBlueprint::set_cost_tier(uint32_t value)
}
void
+LeafBlueprint::set_allow_termwise_eval(bool value)
+{
+ _state.allow_termwise_eval(value);
+ notifyChange();
+}
+
+void
LeafBlueprint::set_want_global_filter(bool value)
{
_state.want_global_filter(value);
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index 8d230b6ec01..dc7a0992d82 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -48,11 +48,11 @@ public:
uint32_t estHits;
bool empty;
- HitEstimate() noexcept : estHits(0), empty(true) {}
- HitEstimate(uint32_t estHits_, bool empty_) noexcept
+ HitEstimate() : estHits(0), empty(true) {}
+ HitEstimate(uint32_t estHits_, bool empty_)
: estHits(estHits_), empty(empty_) {}
- bool operator < (const HitEstimate &other) const noexcept {
+ bool operator < (const HitEstimate &other) const {
if (empty == other.empty) {
return (estHits < other.estHits);
} else {
@@ -77,21 +77,21 @@ public:
static constexpr uint8_t COST_TIER_EXPENSIVE = 2;
static constexpr uint8_t COST_TIER_MAX = 255;
- State() noexcept;
- State(FieldSpecBase field) noexcept;
- State(FieldSpecBaseList fields_in) noexcept;
+ State();
+ State(FieldSpecBase field);
+ State(FieldSpecBaseList fields_in);
State(const State &rhs) = delete;
State(State &&rhs) noexcept = default;
State &operator=(const State &rhs) = delete;
State &operator=(State &&rhs) noexcept = default;
~State();
- bool isTermLike() const noexcept { return !_fields.empty(); }
- const FieldSpecBaseList &fields() const noexcept { return _fields; }
+ bool isTermLike() const { return !_fields.empty(); }
+ const FieldSpecBaseList &fields() const { return _fields; }
- size_t numFields() const noexcept { return _fields.size(); }
- const FieldSpecBase &field(size_t idx) const noexcept { return _fields[idx]; }
- const FieldSpecBase *lookupField(uint32_t fieldId) const noexcept {
+ size_t numFields() const { return _fields.size(); }
+ const FieldSpecBase &field(size_t idx) const { return _fields[idx]; }
+ const FieldSpecBase *lookupField(uint32_t fieldId) const {
for (const FieldSpecBase & field : _fields) {
if (field.getFieldId() == fieldId) {
return &field;
@@ -100,27 +100,27 @@ public:
return nullptr;
}
- void estimate(HitEstimate est) noexcept {
+ void estimate(HitEstimate est) {
_estimateHits = est.estHits;
_estimateEmpty = est.empty;
}
- HitEstimate estimate() const noexcept { return HitEstimate(_estimateHits, _estimateEmpty); }
- double hit_ratio(uint32_t docid_limit) const noexcept {
+ HitEstimate estimate() const { return HitEstimate(_estimateHits, _estimateEmpty); }
+ double hit_ratio(uint32_t docid_limit) const {
uint32_t total_hits = _estimateHits;
uint32_t total_docs = std::max(total_hits, docid_limit);
return (total_docs == 0) ? 0.0 : double(total_hits) / double(total_docs);
}
- void tree_size(uint32_t value) noexcept {
+ void tree_size(uint32_t value) {
assert(value < 0x100000);
_tree_size = value;
}
- uint32_t tree_size() const noexcept { return _tree_size; }
- void allow_termwise_eval(bool value) noexcept { _allow_termwise_eval = value; }
- bool allow_termwise_eval() const noexcept { return _allow_termwise_eval; }
- void want_global_filter(bool value) noexcept { _want_global_filter = value; }
- bool want_global_filter() const noexcept { return _want_global_filter; }
- void cost_tier(uint8_t value) noexcept { _cost_tier = value; }
- uint8_t cost_tier() const noexcept { return _cost_tier; }
+ uint32_t tree_size() const { return _tree_size; }
+ void allow_termwise_eval(bool value) { _allow_termwise_eval = value; }
+ bool allow_termwise_eval() const { return _allow_termwise_eval; }
+ void want_global_filter(bool value) { _want_global_filter = value; }
+ bool want_global_filter() const { return _want_global_filter; }
+ void cost_tier(uint8_t value) { _cost_tier = value; }
+ uint8_t cost_tier() const { return _cost_tier; }
};
// utility that just takes maximum estimate
@@ -137,7 +137,7 @@ public:
// utility to get the greater estimate to sort first, higher tiers last
struct TieredGreaterEstimate {
- bool operator () (const auto &a, const auto &b) const noexcept {
+ bool operator () (const auto &a, const auto &b) const {
const auto &lhs = a->getState();
const auto &rhs = b->getState();
if (lhs.cost_tier() != rhs.cost_tier()) {
@@ -149,7 +149,7 @@ public:
// utility to get the lesser estimate to sort first, higher tiers last
struct TieredLessEstimate {
- bool operator () (const auto &a, const auto &b) const noexcept {
+ bool operator () (const auto &a, const auto &b) const {
const auto &lhs = a->getState();
const auto &rhs = b->getState();
if (lhs.cost_tier() != rhs.cost_tier()) {
@@ -189,20 +189,20 @@ public:
// hit that isn't certain to be a match).
enum class FilterConstraint { UPPER_BOUND, LOWER_BOUND };
- Blueprint() noexcept;
+ Blueprint();
Blueprint(const Blueprint &) = delete;
Blueprint &operator=(const Blueprint &) = delete;
virtual ~Blueprint();
- void setParent(Blueprint *parent) noexcept { _parent = parent; }
- Blueprint *getParent() const noexcept { return _parent; }
+ void setParent(Blueprint *parent) { _parent = parent; }
+ Blueprint *getParent() const { return _parent; }
bool has_parent() const { return (_parent != nullptr); }
- Blueprint &setSourceId(uint32_t sourceId) noexcept { _sourceId = sourceId; return *this; }
- uint32_t getSourceId() const noexcept { return _sourceId; }
+ Blueprint &setSourceId(uint32_t sourceId) { _sourceId = sourceId; return *this; }
+ uint32_t getSourceId() const { return _sourceId; }
- virtual void setDocIdLimit(uint32_t limit) noexcept { _docid_limit = limit; }
- uint32_t get_docid_limit() const noexcept { return _docid_limit; }
+ virtual void setDocIdLimit(uint32_t limit) { _docid_limit = limit; }
+ uint32_t get_docid_limit() const { return _docid_limit; }
static Blueprint::UP optimize(Blueprint::UP bp);
virtual void optimize(Blueprint* &self) = 0;
@@ -227,7 +227,7 @@ public:
virtual const State &getState() const = 0;
const Blueprint &root() const;
- double hit_ratio() const noexcept { return getState().hit_ratio(_docid_limit); }
+ double hit_ratio() const { return getState().hit_ratio(_docid_limit); }
virtual void fetchPostings(const ExecuteInfo &execInfo) = 0;
virtual void freeze() = 0;
@@ -319,10 +319,10 @@ protected:
public:
using IndexList = std::vector<size_t>;
- IntermediateBlueprint() noexcept;
+ IntermediateBlueprint();
~IntermediateBlueprint() override;
- void setDocIdLimit(uint32_t limit) noexcept final;
+ void setDocIdLimit(uint32_t limit) final;
void optimize(Blueprint* &self) final;
void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
@@ -360,30 +360,24 @@ private:
State _state;
protected:
void optimize(Blueprint* &self) final;
- void setEstimate(HitEstimate est) {
- _state.estimate(est);
- notifyChange();
- }
+ void setEstimate(HitEstimate est);
void set_cost_tier(uint32_t value);
- void set_allow_termwise_eval(bool value) {
- _state.allow_termwise_eval(value);
- notifyChange();
- }
+ void set_allow_termwise_eval(bool value);
void set_want_global_filter(bool value);
void set_tree_size(uint32_t value);
- LeafBlueprint(bool allow_termwise_eval) noexcept
+ LeafBlueprint(bool allow_termwise_eval)
: _state()
{
_state.allow_termwise_eval(allow_termwise_eval);
}
- LeafBlueprint(FieldSpecBase field, bool allow_termwise_eval) noexcept
+ LeafBlueprint(FieldSpecBase field, bool allow_termwise_eval)
: _state(field)
{
_state.allow_termwise_eval(allow_termwise_eval);
}
- LeafBlueprint(FieldSpecBaseList fields, bool allow_termwise_eval) noexcept
+ LeafBlueprint(FieldSpecBaseList fields, bool allow_termwise_eval)
: _state(std::move(fields))
{
_state.allow_termwise_eval(allow_termwise_eval);
@@ -392,7 +386,7 @@ protected:
public:
~LeafBlueprint() override = default;
const State &getState() const final { return _state; }
- void setDocIdLimit(uint32_t limit) noexcept final { Blueprint::setDocIdLimit(limit); }
+ void setDocIdLimit(uint32_t limit) final { Blueprint::setDocIdLimit(limit); }
void fetchPostings(const ExecuteInfo &execInfo) override;
void freeze() final;
SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override;
@@ -403,15 +397,15 @@ public:
// for leaf nodes representing a single term
struct SimpleLeafBlueprint : LeafBlueprint {
- explicit SimpleLeafBlueprint() noexcept : LeafBlueprint(true) {}
- explicit SimpleLeafBlueprint(FieldSpecBase field) noexcept : LeafBlueprint(field, true) {}
- explicit SimpleLeafBlueprint(FieldSpecBaseList fields) noexcept: LeafBlueprint(std::move(fields), true) {}
+ explicit SimpleLeafBlueprint() : LeafBlueprint(true) {}
+ explicit SimpleLeafBlueprint(FieldSpecBase field) : LeafBlueprint(field, true) {}
+ explicit SimpleLeafBlueprint(FieldSpecBaseList fields) : LeafBlueprint(std::move(fields), true) {}
};
// for leaf nodes representing more complex structures like wand/phrase
struct ComplexLeafBlueprint : LeafBlueprint {
- explicit ComplexLeafBlueprint(FieldSpecBase field) noexcept : LeafBlueprint(field, false) {}
- explicit ComplexLeafBlueprint(FieldSpecBaseList fields) noexcept : LeafBlueprint(std::move(fields), false) {}
+ explicit ComplexLeafBlueprint(FieldSpecBase field) : LeafBlueprint(field, false) {}
+ explicit ComplexLeafBlueprint(FieldSpecBaseList fields) : LeafBlueprint(std::move(fields), false) {}
};
//-----------------------------------------------------------------------------
diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp
index cd1ddd5b92e..121591723e2 100644
--- a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp
@@ -5,9 +5,9 @@
namespace search::queryeval {
-FieldSpecBase::FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) noexcept
- : _fieldId(fieldId | (isFilter_ ? 0x1000000u : 0)),
- _handle(handle)
+FieldSpecBase::FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) :
+ _fieldId(fieldId | (isFilter_ ? 0x1000000u : 0)),
+ _handle(handle)
{
assert(fieldId < 0x1000000); // Can be represented by 24 bits
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.h b/searchlib/src/vespa/searchlib/queryeval/field_spec.h
index c4cd1ac2de8..fd925fdf4ff 100644
--- a/searchlib/src/vespa/searchlib/queryeval/field_spec.h
+++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.h
@@ -19,15 +19,15 @@ namespace search::queryeval {
class FieldSpecBase
{
public:
- FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_ = false) noexcept;
+ FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_ = false);
// resolve where to put match information for this term/field combination
fef::TermFieldMatchData *resolve(fef::MatchData &md) const;
const fef::TermFieldMatchData *resolve(const fef::MatchData &md) const;
- uint32_t getFieldId() const noexcept { return _fieldId & 0xffffff; }
- fef::TermFieldHandle getHandle() const noexcept { return _handle; }
+ uint32_t getFieldId() const { return _fieldId & 0xffffff; }
+ fef::TermFieldHandle getHandle() const { return _handle; }
/// a filter produces less detailed match data
- bool isFilter() const noexcept { return _fieldId & 0x1000000; }
+ bool isFilter() const { return _fieldId & 0x1000000; }
private:
uint32_t _fieldId; // field id in ranking framework
fef::TermFieldHandle _handle; // handle used when exposing match data to ranking framework
@@ -40,13 +40,13 @@ class FieldSpec : public FieldSpecBase
{
public:
FieldSpec(const vespalib::string & name, uint32_t fieldId,
- fef::TermFieldHandle handle, bool isFilter_ = false) noexcept
+ fef::TermFieldHandle handle, bool isFilter_ = false)
: FieldSpecBase(fieldId, handle, isFilter_),
_name(name)
{}
~FieldSpec();
- const vespalib::string & getName() const noexcept { return _name; }
+ const vespalib::string & getName() const { return _name; }
private:
vespalib::string _name; // field name
};
@@ -61,7 +61,7 @@ private:
List _list;
public:
- FieldSpecBaseList() noexcept = default;
+ FieldSpecBaseList() = default;
FieldSpecBaseList(FieldSpecBaseList &&) noexcept = default;
FieldSpecBaseList & operator=(FieldSpecBaseList &&) noexcept = default;
FieldSpecBaseList(const FieldSpecBaseList &) = default;
@@ -69,15 +69,15 @@ public:
~FieldSpecBaseList();
void reserve(size_t sz) { _list.reserve(sz); }
using const_iterator = const FieldSpecBase *;
- FieldSpecBaseList &add(const FieldSpecBase &spec) noexcept {
+ FieldSpecBaseList &add(const FieldSpecBase &spec) {
_list.push_back(spec);
return *this;
}
- bool empty() const noexcept { return _list.empty(); }
- size_t size() const noexcept { return _list.size(); }
- const_iterator begin() const noexcept { return _list.begin(); }
- const_iterator end() const noexcept { return _list.end(); }
- const FieldSpecBase &operator[](size_t i) const noexcept { return _list[i]; }
+ bool empty() const { return _list.empty(); }
+ size_t size() const { return _list.size(); }
+ const_iterator begin() const { return _list.begin(); }
+ const_iterator end() const { return _list.end(); }
+ const FieldSpecBase &operator[](size_t i) const { return _list[i]; }
};
/**
@@ -89,7 +89,7 @@ private:
vespalib::SmallVector<FieldSpec, 1> _list;
public:
- FieldSpecList() noexcept = default;
+ FieldSpecList() = default;
FieldSpecList(FieldSpecList &&) noexcept = delete;
FieldSpecList & operator=(FieldSpecList &&) noexcept = delete;
FieldSpecList(const FieldSpecList &) noexcept = delete;
@@ -99,9 +99,9 @@ public:
_list.push_back(spec);
return *this;
}
- bool empty() const noexcept { return _list.empty(); }
- size_t size() const noexcept { return _list.size(); }
- const FieldSpec &operator[](size_t i) const noexcept { return _list[i]; }
+ bool empty() const { return _list.empty(); }
+ size_t size() const { return _list.size(); }
+ const FieldSpec &operator[](size_t i) const { return _list[i]; }
void clear() { _list.clear(); }
};
diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
index 0cb72202811..aaf361b65ca 100644
--- a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
+++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp
@@ -6,6 +6,7 @@
#include <vespa/searchlib/common/fileheadercontext.h>
#include <vespa/fastlib/io/bufferedfile.h>
#include <cassert>
+#include <filesystem>
#include <vespa/log/log.h>
LOG_SETUP(".transactionlog.domainpart");
@@ -371,7 +372,7 @@ DomainPart::erase(SerialNum to)
bool retval(true);
if (to > get_range_to()) {
close();
- _transLog->Delete();
+ std::filesystem::remove(std::filesystem::path(_fileName));
} else {
auto range_from = get_range_from();
if (to > range_from) {
diff --git a/searchlib/src/vespa/searchlib/util/filekit.cpp b/searchlib/src/vespa/searchlib/util/filekit.cpp
index 07eab9bb2be..4012ef00dae 100644
--- a/searchlib/src/vespa/searchlib/util/filekit.cpp
+++ b/searchlib/src/vespa/searchlib/util/filekit.cpp
@@ -9,87 +9,6 @@ LOG_SETUP(".filekit");
namespace search {
-using vespalib::getLastErrorString;
-
-bool
-FileKit::createStamp(const vespalib::string &name)
-{
- FastOS_File stamp;
- FastOS_StatInfo statInfo;
- bool statres;
-
- statres = FastOS_File::Stat(name.c_str(), &statInfo);
-
- if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) {
- LOG(error, "FATAL: Could not check stamp file %s: %s",
- name.c_str(), getLastErrorString().c_str());
- return false;
- }
- if (statres && statInfo._size > 0) {
- LOG(error, "FATAL: Stamp file not empty: %s", name.c_str());
- return false;
- }
-
- if (!stamp.OpenWriteOnlyTruncate(name.c_str())) {
- LOG(error, "FATAL: Could not create stamp file %s: %s",
- name.c_str(), getLastErrorString().c_str());
- return false;
- }
- return true;
-}
-
-
-bool
-FileKit::hasStamp(const vespalib::string &name)
-{
- FastOS_StatInfo statInfo;
- bool statres;
-
- statres = FastOS_File::Stat(name.c_str(), &statInfo);
-
- if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) {
- LOG(error, "FATAL: Could not check stamp file %s: %s",
- name.c_str(), getLastErrorString().c_str());
- return false;
- }
- return statres;
-}
-
-
-bool
-FileKit::removeStamp(const vespalib::string &name)
-{
- FastOS_StatInfo statInfo;
- bool deleteres;
- bool statres;
-
- statres = FastOS_File::Stat(name.c_str(), &statInfo);
-
- if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) {
- LOG(error, "FATAL: Could not check stamp file %s: %s",
- name.c_str(), getLastErrorString().c_str());
- return false;
- }
- if (statres && statInfo._size > 0) {
- LOG(error, "FATAL: Stamp file not empty: %s", name.c_str());
- return false;
- }
-
- do {
- deleteres = FastOS_File::Delete(name.c_str());
- //FIX! errno
- } while (!deleteres && errno == EINTR);
-
- if (!deleteres &&
- FastOS_File::GetLastError() != FastOS_File::ERR_ENOENT) {
- LOG(error, "FATAL: Could not remove stamp file %s: %s",
- name.c_str(), getLastErrorString().c_str());
- return false;
- }
- return true;
-}
-
-
vespalib::system_time
FileKit::getModificationTime(const vespalib::string &name)
{
diff --git a/searchlib/src/vespa/searchlib/util/filekit.h b/searchlib/src/vespa/searchlib/util/filekit.h
index 8c994ff5866..dbd6d2e5a2e 100644
--- a/searchlib/src/vespa/searchlib/util/filekit.h
+++ b/searchlib/src/vespa/searchlib/util/filekit.h
@@ -10,10 +10,6 @@ namespace search {
class FileKit
{
public:
- static bool createStamp(const vespalib::string &name);
- static bool hasStamp(const vespalib::string &name);
- static bool removeStamp(const vespalib::string &name);
-
/**
* Returns the modification time of the given file/directory,
* or time stamp 0 if stating of file/directory fails.
diff --git a/vespa-feed-client/pom.xml b/vespa-feed-client/pom.xml
index 19130b52268..b7787d68881 100644
--- a/vespa-feed-client/pom.xml
+++ b/vespa-feed-client/pom.xml
@@ -25,11 +25,6 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.httpcomponents.client5</groupId>
- <artifactId>httpclient5</artifactId>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>org.eclipse.jetty.http2</groupId>
<artifactId>http2-http-client-transport</artifactId>
<scope>compile</scope>
@@ -51,11 +46,6 @@
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>com.github.tomakehurst</groupId>
- <artifactId>wiremock-jre8-standalone</artifactId>
- <scope>test</scope>
- </dependency>
</dependencies>
<build>
@@ -80,21 +70,6 @@
<showDeprecation>true</showDeprecation>
</configuration>
</execution>
- <execution>
- <id>compile-java-9</id>
- <phase>compile</phase>
- <goals>
- <goal>compile</goal>
- </goals>
- <configuration>
- <release>9</release>
- <compileSourceRoots>
- <compileSourceRoot>${project.basedir}/src/main/java9</compileSourceRoot>
- </compileSourceRoots>
- <outputDirectory>${project.build.outputDirectory}/META-INF/versions/9</outputDirectory>
- <showDeprecation>true</showDeprecation>
- </configuration>
- </execution>
</executions>
</plugin>
<plugin>
diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java
deleted file mode 100644
index 96c65a6b165..00000000000
--- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/ApacheCluster.java
+++ /dev/null
@@ -1,243 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package ai.vespa.feed.client.impl;
-
-import ai.vespa.feed.client.FeedClientBuilder.Compression;
-import ai.vespa.feed.client.HttpResponse;
-import org.apache.hc.client5.http.async.methods.SimpleHttpRequest;
-import org.apache.hc.client5.http.async.methods.SimpleHttpResponse;
-import org.apache.hc.client5.http.config.ConnectionConfig;
-import org.apache.hc.client5.http.config.RequestConfig;
-import org.apache.hc.client5.http.impl.async.CloseableHttpAsyncClient;
-import org.apache.hc.client5.http.impl.async.HttpAsyncClients;
-import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder;
-import org.apache.hc.core5.concurrent.FutureCallback;
-import org.apache.hc.core5.http.ContentType;
-import org.apache.hc.core5.http.Header;
-import org.apache.hc.core5.http.HttpHeaders;
-import org.apache.hc.core5.http.message.BasicHeader;
-import org.apache.hc.core5.http2.config.H2Config;
-import org.apache.hc.core5.net.URIAuthority;
-import org.apache.hc.core5.reactor.IOReactorConfig;
-import org.apache.hc.core5.util.Timeout;
-
-import javax.net.ssl.SSLContext;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.zip.GZIPOutputStream;
-
-import static ai.vespa.feed.client.FeedClientBuilder.Compression.auto;
-import static ai.vespa.feed.client.FeedClientBuilder.Compression.gzip;
-import static org.apache.hc.core5.http.ssl.TlsCiphers.excludeH2Blacklisted;
-import static org.apache.hc.core5.http.ssl.TlsCiphers.excludeWeak;
-
-/**
- * @author jonmv
- */
-class ApacheCluster implements Cluster {
-
- private final List<Endpoint> endpoints = new ArrayList<>();
- private final List<BasicHeader> defaultHeaders = Arrays.asList(new BasicHeader(HttpHeaders.USER_AGENT, String.format("vespa-feed-client/%s (Apache)", Vespa.VERSION)),
- new BasicHeader("Vespa-Client-Version", Vespa.VERSION));
- private final Header gzipEncodingHeader = new BasicHeader(HttpHeaders.CONTENT_ENCODING, "gzip");
- private final Compression compression;
- private int someNumber = 0;
-
- private final ExecutorService dispatchExecutor = Executors.newFixedThreadPool(8, t -> new Thread(t, "request-dispatch-thread"));
- private final ScheduledExecutorService timeoutExecutor = Executors.newSingleThreadScheduledExecutor(t -> new Thread(t, "request-timeout-thread"));
-
- ApacheCluster(FeedClientBuilderImpl builder) throws IOException {
- for (int i = 0; i < builder.connectionsPerEndpoint; i++)
- for (URI endpoint : builder.endpoints)
- endpoints.add(new Endpoint(createHttpClient(builder), endpoint));
- this.compression = builder.compression;
- }
-
- @Override
- public void dispatch(HttpRequest wrapped, CompletableFuture<HttpResponse> vessel) {
- Endpoint leastBusy = endpoints.get(0);
- int min = Integer.MAX_VALUE;
- int start = ++someNumber % endpoints.size();
- for (int i = 0; i < endpoints.size(); i++) {
- Endpoint endpoint = endpoints.get((i + start) % endpoints.size());
- int inflight = endpoint.inflight.get();
- if (inflight < min) {
- leastBusy = endpoint;
- min = inflight;
- }
- }
- Endpoint endpoint = leastBusy;
- endpoint.inflight.incrementAndGet();
-
- dispatchExecutor.execute(() -> {
- try {
- SimpleHttpRequest request = new SimpleHttpRequest(wrapped.method(), wrapped.path());
- request.setScheme(endpoint.url.getScheme());
- request.setAuthority(new URIAuthority(endpoint.url.getHost(), portOf(endpoint.url)));
- request.setConfig(RequestConfig.custom().setConnectionRequestTimeout(Timeout.DISABLED).build());
- defaultHeaders.forEach(request::setHeader);
- wrapped.headers().forEach((name, value) -> request.setHeader(name, value.get()));
- if (wrapped.body() != null) {
- byte[] body = wrapped.body();
- if (compression == gzip || compression == auto && body.length > 512) {
- request.setHeader(gzipEncodingHeader);
- body = gzipped(body);
- }
- request.setBody(body, ContentType.APPLICATION_JSON);
- }
-
- Future<?> future = endpoint.client.execute(request,
- new FutureCallback<SimpleHttpResponse>() {
- @Override public void completed(SimpleHttpResponse response) { vessel.complete(new ApacheHttpResponse(response)); }
- @Override public void failed(Exception ex) { vessel.completeExceptionally(ex); }
- @Override public void cancelled() { vessel.cancel(false); }
- });
- // Manually schedule response timeout as the Apache HTTP/2 multiplexing client does not support response timeouts
- long timeoutMillis = wrapped.timeout() == null ? 190_000 : wrapped.timeout().toMillis();
- Future<?> cancellation = timeoutExecutor.schedule(
- () -> {
- vessel.completeExceptionally(
- new TimeoutException(String.format("Request timed out after %dms", timeoutMillis)));
- future.cancel(true);
- },
- timeoutMillis * 11 / 10 + 1_000, TimeUnit.MILLISECONDS);
- vessel.whenComplete((__, ___) -> cancellation.cancel(true));
- }
- catch (Throwable thrown) {
- vessel.completeExceptionally(thrown);
- }
- vessel.whenComplete((__, ___) -> endpoint.inflight.decrementAndGet());
- });
- }
-
- private byte[] gzipped(byte[] content) throws IOException{
- ByteArrayOutputStream buffer = new ByteArrayOutputStream(1 << 10);
- try (GZIPOutputStream zip = new GZIPOutputStream(buffer)) {
- zip.write(content);
- }
- return buffer.toByteArray();
- }
-
- @Override
- public void close() {
- Throwable thrown = null;
- dispatchExecutor.shutdownNow().forEach(Runnable::run);
- for (Endpoint endpoint : endpoints) {
- try {
- endpoint.client.close();
- }
- catch (Throwable t) {
- if (thrown == null) thrown = t;
- else thrown.addSuppressed(t);
- }
- }
- timeoutExecutor.shutdownNow().forEach(Runnable::run);
- if (thrown != null) throw new RuntimeException(thrown);
- }
-
-
- private static class Endpoint {
-
- private final CloseableHttpAsyncClient client;
- private final AtomicInteger inflight = new AtomicInteger(0);
- private final URI url;
-
- private Endpoint(CloseableHttpAsyncClient client, URI url) {
- this.client = client;
- this.url = url;
-
- this.client.start();
- }
-
- }
-
- private static CloseableHttpAsyncClient createHttpClient(FeedClientBuilderImpl builder) throws IOException {
- SSLContext sslContext = builder.constructSslContext();
- String[] allowedCiphers = excludeH2Blacklisted(excludeWeak(sslContext.getSupportedSSLParameters().getCipherSuites()));
- if (allowedCiphers.length == 0)
- throw new IllegalStateException("No adequate SSL cipher suites supported by the JVM");
-
- ClientTlsStrategyBuilder tlsStrategyBuilder = ClientTlsStrategyBuilder.create()
- .setCiphers(allowedCiphers)
- .setSslContext(sslContext);
- if (builder.hostnameVerifier != null)
- tlsStrategyBuilder.setHostnameVerifier(builder.hostnameVerifier);
-
- // Socket timeout must be longer than the longest feasible response timeout
- Timeout socketTimeout = Timeout.ofMinutes(15);
-
- ConnectionConfig connCfg = ConnectionConfig.custom()
- .setSocketTimeout(socketTimeout)
- .setConnectTimeout(Timeout.ofSeconds(10))
- .build();
-
- return HttpAsyncClients.customHttp2()
- .setH2Config(
- H2Config.custom()
- .setMaxConcurrentStreams(builder.maxStreamsPerConnection)
- .setCompressionEnabled(true)
- .setPushEnabled(false)
- .setInitialWindowSize(Integer.MAX_VALUE)
- .build())
- .setIOReactorConfig(
- IOReactorConfig.custom()
- .setIoThreadCount(Math.max(Math.min(Runtime.getRuntime().availableProcessors(), 8), 2))
- .setTcpNoDelay(true)
- .setSoTimeout(socketTimeout)
- .build())
- .setTlsStrategy(tlsStrategyBuilder.build())
- .setDefaultConnectionConfig(connCfg)
- .disableAutomaticRetries()
- .disableRedirectHandling()
- .disableCookieManagement()
- .build();
- }
-
- private static int portOf(URI url) {
- return url.getPort() == -1 ? url.getScheme().equals("http") ? 80 : 443
- : url.getPort();
- }
-
- private static class ApacheHttpResponse implements HttpResponse {
-
- private final SimpleHttpResponse wrapped;
-
- private ApacheHttpResponse(SimpleHttpResponse wrapped) {
- this.wrapped = wrapped;
- }
-
- @Override
- public int code() {
- return wrapped.getCode();
- }
-
- @Override
- public byte[] body() {
- return wrapped.getBodyBytes();
- }
-
- @Override
- public String contentType() {
- return wrapped.getContentType().getMimeType();
- }
-
- @Override
- public String toString() {
- return "HTTP response with code " + code() +
- (body() != null ? " and body '" + wrapped.getBodyText() + "'" : "");
- }
-
- }
-
-}
diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java
index f228717eba5..40c5fda8ce3 100644
--- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java
+++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/HttpFeedClient.java
@@ -55,8 +55,7 @@ class HttpFeedClient implements FeedClient {
private final boolean speedTest;
HttpFeedClient(FeedClientBuilderImpl builder) throws IOException {
- this(builder, builder.dryrun ?
- new DryrunCluster() : experimentalClientEnabled() ? new JettyCluster(builder) : new ApacheCluster(builder));
+ this(builder, builder.dryrun ? new DryrunCluster() : new JettyCluster(builder));
}
HttpFeedClient(FeedClientBuilderImpl builder, Cluster cluster) {
@@ -315,13 +314,4 @@ class HttpFeedClient implements FeedClient {
return query.toString();
}
- private static boolean experimentalClientEnabled() {
- String name = "VESPA_FEED_EXPERIMENTAL_CLIENT";
- return Optional.ofNullable(System.getenv(name))
- .map(Boolean::parseBoolean)
- .orElse(Optional.ofNullable(System.getProperty(name))
- .map(Boolean::parseBoolean)
- .orElse(true));
- }
-
}
diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java
index 1a125ebfbb5..cd7a4e6222e 100644
--- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java
+++ b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/JettyCluster.java
@@ -147,10 +147,11 @@ class JettyCluster implements Cluster {
h2Client.setInitialStreamRecvWindow(initialWindow);
ClientConnectionFactory.Info http2 = new ClientConnectionFactoryOverHTTP2.HTTP2(h2Client);
HttpClientTransportDynamic transport = new HttpClientTransportDynamic(connector, http2);
+ int connectionsPerEndpoint = b.connectionsPerEndpoint;
transport.setConnectionPoolFactory(dest -> {
MultiplexConnectionPool pool = new MultiplexConnectionPool(
- dest, Pool.StrategyType.RANDOM, b.connectionsPerEndpoint, false, dest, Integer.MAX_VALUE);
- pool.preCreateConnections(b.connectionsPerEndpoint);
+ dest, Pool.StrategyType.RANDOM, connectionsPerEndpoint, false, dest, Integer.MAX_VALUE);
+ pool.preCreateConnections(connectionsPerEndpoint);
return pool;
});
HttpClient httpClient = new HttpClient(transport);
@@ -186,9 +187,10 @@ class JettyCluster implements Cluster {
new HttpProxy(address, false, new Origin.Protocol(Collections.singletonList("h2c"), false)));
}
Map<String, Supplier<String>> proxyHeadersCopy = new TreeMap<>(b.proxyRequestHeaders);
+ URI proxyUri = URI.create(endpointUri(b.proxy));
if (!proxyHeadersCopy.isEmpty()) {
httpClient.getAuthenticationStore().addAuthenticationResult(new Authentication.Result() {
- @Override public URI getURI() { return URI.create(endpointUri(b.proxy)); }
+ @Override public URI getURI() { return proxyUri; }
@Override public void apply(Request r) {
r.headers(hs -> proxyHeadersCopy.forEach((k, v) -> hs.add(k, v.get())));
}
diff --git a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/TlsDetailsFactory.java b/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/TlsDetailsFactory.java
deleted file mode 100644
index 5183ce61761..00000000000
--- a/vespa-feed-client/src/main/java/ai/vespa/feed/client/impl/TlsDetailsFactory.java
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package ai.vespa.feed.client.impl;
-
-import org.apache.hc.core5.reactor.ssl.TlsDetails;
-
-import javax.net.ssl.SSLEngine;
-
-/**
- * @author bjorncs
- */
-public class TlsDetailsFactory {
- private TlsDetailsFactory() {}
-
- public static TlsDetails create(SSLEngine e) { return new TlsDetails(e.getSession(), "h2"); /*h2 == HTTP2*/ }
-}
-
diff --git a/vespa-feed-client/src/main/java9/ai/vespa/feed/client/impl/TlsDetailsFactory.java b/vespa-feed-client/src/main/java9/ai/vespa/feed/client/impl/TlsDetailsFactory.java
deleted file mode 100644
index f9903d9943d..00000000000
--- a/vespa-feed-client/src/main/java9/ai/vespa/feed/client/impl/TlsDetailsFactory.java
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package ai.vespa.feed.client.impl;
-
-import org.apache.hc.core5.reactor.ssl.TlsDetails;
-
-import javax.net.ssl.SSLEngine;
-
-/**
- * {@link SSLEngine#getApplicationProtocol()} is not available on all JDK8 versions
- * (https://bugs.openjdk.org/browse/JDK-8051498)
- *
- * @author bjorncs
- */
-public class TlsDetailsFactory {
- private TlsDetailsFactory() {}
-
- public static TlsDetails create(SSLEngine e) {
- return new TlsDetails(e.getSession(), e.getApplicationProtocol());
- }
-}
diff --git a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/ApacheClusterTest.java b/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/ApacheClusterTest.java
deleted file mode 100644
index cf9a36f2aa8..00000000000
--- a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/ApacheClusterTest.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package ai.vespa.feed.client.impl;
-
-import ai.vespa.feed.client.FeedClientBuilder.Compression;
-import ai.vespa.feed.client.HttpResponse;
-import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.RegisterExtension;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.net.URI;
-import java.time.Duration;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-import java.util.zip.GZIPOutputStream;
-
-import static com.github.tomakehurst.wiremock.client.WireMock.any;
-import static com.github.tomakehurst.wiremock.client.WireMock.anyRequestedFor;
-import static com.github.tomakehurst.wiremock.client.WireMock.anyUrl;
-import static com.github.tomakehurst.wiremock.client.WireMock.equalTo;
-import static com.github.tomakehurst.wiremock.client.WireMock.okJson;
-import static com.github.tomakehurst.wiremock.client.WireMock.postRequestedFor;
-import static com.github.tomakehurst.wiremock.client.WireMock.urlEqualTo;
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-class ApacheClusterTest {
-
- @RegisterExtension
- final WireMockExtension server = new WireMockExtension();
-
- @Test
- void testClient() throws Exception {
- for (Compression compression : Compression.values()) {
- try (ApacheCluster cluster = new ApacheCluster(new FeedClientBuilderImpl(List.of(URI.create("http://localhost:" + server.port())))
- .setCompression(compression))) {
- server.stubFor(any(anyUrl()))
- .setResponse(okJson("{}").build());
-
- CompletableFuture<HttpResponse> vessel = new CompletableFuture<>();
- cluster.dispatch(new HttpRequest("POST",
- "/path",
- Map.of("name1", () -> "value1",
- "name2", () -> "value2"),
- "content".getBytes(UTF_8),
- Duration.ofSeconds(10)),
- vessel);
-
- AutoCloseable verifyResponse = () -> {
- HttpResponse response = vessel.get(15, TimeUnit.SECONDS);
- assertEquals("{}", new String(response.body(), UTF_8));
- assertEquals(200, response.code());
- };
- AutoCloseable verifyServer = () -> {
- server.verify(1, anyRequestedFor(anyUrl()));
- RequestPatternBuilder expected = postRequestedFor(urlEqualTo("/path")).withHeader("name1", equalTo("value1"))
- .withHeader("name2", equalTo("value2"))
- .withHeader("Content-Type", equalTo("application/json; charset=UTF-8"))
- .withRequestBody(equalTo("content"));
- expected = switch (compression) {
- case auto, none -> expected.withoutHeader("Content-Encoding");
- case gzip -> expected.withHeader("Content-Encoding", equalTo("gzip"));
- };
- server.verify(1, expected);
- server.resetRequests();
- };
- try (verifyServer; verifyResponse) { }
- }
- }
- }
-
-}
diff --git a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/WireMockExtension.java b/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/WireMockExtension.java
deleted file mode 100644
index ef61213889b..00000000000
--- a/vespa-feed-client/src/test/java/ai/vespa/feed/client/impl/WireMockExtension.java
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package ai.vespa.feed.client.impl;
-
-import com.github.tomakehurst.wiremock.WireMockServer;
-import com.github.tomakehurst.wiremock.core.Options;
-import com.github.tomakehurst.wiremock.core.WireMockConfiguration;
-import org.junit.jupiter.api.extension.AfterEachCallback;
-import org.junit.jupiter.api.extension.BeforeEachCallback;
-import org.junit.jupiter.api.extension.ExtensionContext;
-
-/**
- * Allows wiremock to be used as a JUnit 5 extension, like
- * <pre>
- *
- * &#64RegisterExtension
- * WireMockExtension mockServer1 = new WireMockExtension();
- * </pre>
- */
-public class WireMockExtension extends WireMockServer implements BeforeEachCallback, AfterEachCallback {
-
- public WireMockExtension() {
- this(WireMockConfiguration.options()
- .dynamicPort()
- .dynamicHttpsPort());
- }
-
- public WireMockExtension(Options options) {
- super(options);
- }
-
- @Override
- public void beforeEach(ExtensionContext extensionContext) {
- start();
- }
-
- @Override
- public void afterEach(ExtensionContext extensionContext) {
- stop();
- resetAll();
- }
-
-}
diff --git a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java
index 1641bd7802f..a2ac86309d9 100644
--- a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java
+++ b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/TestReport.java
@@ -336,15 +336,15 @@ public class TestReport {
StackTraceElement[] stack = thrown.getStackTrace();
int i = 0;
- int previousNativeFrame = -1;
+ int firstReflectFrame = -1;
int cutoff = 0;
boolean rootedInTestFramework = false;
while (++i < stack.length) {
rootedInTestFramework |= testFrameworkRootClass.equals(stack[i].getClassName());
- if (stack[i].isNativeMethod())
- previousNativeFrame = i; // Native method invokes the first user test frame.
- if (rootedInTestFramework && previousNativeFrame > 0) {
- cutoff = previousNativeFrame;
+ if (firstReflectFrame == -1 && stack[i].getClassName().startsWith("jdk.internal.reflect."))
+ firstReflectFrame = i; // jdk.internal.reflect class invokes the first user test frame, on both jdk 17 and 21.
+ if (rootedInTestFramework && firstReflectFrame > 0) {
+ cutoff = firstReflectFrame;
break;
}
boolean isDynamicTestInvocation = "org.junit.jupiter.engine.descriptor.DynamicTestTestDescriptor".equals(stack[i].getClassName());
diff --git a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java
index 9bb0e7ff955..cf2a1700f28 100644
--- a/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java
+++ b/vespa-osgi-testrunner/src/main/java/com/yahoo/vespa/testrunner/VespaCliTestRunner.java
@@ -122,6 +122,7 @@ public class VespaCliTestRunner implements TestRunner {
// The CI environment variables tells Vespa CLI to omit certain warnings that do not apply to CI environments
builder.environment().put("CI", "true");
builder.environment().put("VESPA_CLI_CLOUD_CI", "true");
+ builder.environment().put("VESPA_CLI_CLOUD_SYSTEM", config.system().value());
builder.environment().put("VESPA_CLI_HOME", ensureDirectoryForVespaCli("cli-home").toString());
builder.environment().put("VESPA_CLI_CACHE_DIR", ensureDirectoryForVespaCli("cli-cache").toString());
builder.environment().put("VESPA_CLI_ENDPOINTS", toEndpointsConfig(config));
diff --git a/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java b/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java
index fcd0553566f..288442eaf7d 100644
--- a/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java
+++ b/vespa-osgi-testrunner/src/test/java/com/yahoo/vespa/testrunner/VespaCliTestRunnerTest.java
@@ -42,6 +42,7 @@ class VespaCliTestRunnerTest {
assertTrue(builder.environment().containsKey("VESPA_CLI_CLOUD_CI"));
assertTrue(builder.environment().containsKey("VESPA_CLI_HOME"));
assertTrue(builder.environment().containsKey("VESPA_CLI_CACHE_DIR"));
+ assertEquals("publiccd", builder.environment().get("VESPA_CLI_CLOUD_SYSTEM"));
assertEquals("{\"endpoints\":[{\"cluster\":\"default\",\"url\":\"https://dev.endpoint:443/\"}]}",
builder.environment().get("VESPA_CLI_ENDPOINTS"));
assertEquals(artifacts.resolve("key").toAbsolutePath().toString(),
@@ -72,6 +73,7 @@ class VespaCliTestRunnerTest {
assertTrue(builder.environment().containsKey("VESPA_CLI_CLOUD_CI"));
assertTrue(builder.environment().containsKey("VESPA_CLI_HOME"));
assertTrue(builder.environment().containsKey("VESPA_CLI_CACHE_DIR"));
+ assertEquals("cd", builder.environment().get("VESPA_CLI_CLOUD_SYSTEM"));
assertEquals("{\"endpoints\":[{\"cluster\":\"default\",\"url\":\"https://dev.endpoint:443/\"}]}",
builder.environment().get("VESPA_CLI_ENDPOINTS"));
assertEquals("/opt/vespa/var/vespa/sia/key",
diff --git a/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp b/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp
index 8aa9b943419..edf38122202 100644
--- a/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp
+++ b/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp
@@ -1,17 +1,26 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/fastlib/io/bufferedfile.h>
#include <vespa/vespalib/testkit/test_kit.h>
+#include <filesystem>
+namespace {
+
+void remove_testfiles()
+{
+ std::filesystem::remove(std::filesystem::path("testfile1"));
+ std::filesystem::remove(std::filesystem::path("testfile2"));
+ std::filesystem::remove(std::filesystem::path("testfile3"));
+ std::filesystem::remove(std::filesystem::path("testfile4"));
+ std::filesystem::remove(std::filesystem::path("testfile5"));
+}
+
+}
TEST("main") {
int value = 0;
FastOS_StatInfo statInfo;
- FastOS_File::Delete("testfile1");
- FastOS_File::Delete("testfile2");
- FastOS_File::Delete("testfile3");
- FastOS_File::Delete("testfile4");
- FastOS_File::Delete("testfile5");
+ remove_testfiles();
Fast_BufferedFile bufFile(4096);
@@ -84,11 +93,7 @@ TEST("main") {
}
printf (" -- SUCCESS\n\n");
- FastOS_File::Delete("testfile1");
- FastOS_File::Delete("testfile2");
- FastOS_File::Delete("testfile3");
- FastOS_File::Delete("testfile4");
- FastOS_File::Delete("testfile5");
+ remove_testfiles();
printf ("All tests OK for bufferedfiletest\n");
printf (" -- SUCCESS\n\n");
diff --git a/vespalib/src/tests/fastos/file_test.cpp b/vespalib/src/tests/fastos/file_test.cpp
index 6b58a4a1fd8..ecb35df5d26 100644
--- a/vespalib/src/tests/fastos/file_test.cpp
+++ b/vespalib/src/tests/fastos/file_test.cpp
@@ -165,7 +165,7 @@ TEST(FileTest, WriteOnlyTest) {
ASSERT_EQ(myFile->GetPosition(), 0);
EXPECT_LT(myFile->Read(dummyData, 6), 0);
EXPECT_TRUE(myFile->Close());
- EXPECT_TRUE(myFile->Delete());
+ EXPECT_TRUE(std::filesystem::remove(std::filesystem::path(woFilename)));
}
TEST(FileTest, ReadWriteTest) {
@@ -188,7 +188,7 @@ TEST(FileTest, ReadWriteTest) {
EXPECT_EQ(myFile->Read(dummyData2, 6), 0);
EXPECT_EQ(myFile->GetPosition(), 6);
EXPECT_TRUE(myFile->Close());
- EXPECT_TRUE(myFile->Delete());
+ EXPECT_TRUE(std::filesystem::remove(std::filesystem::path(rwFilename)));
}
TEST(FileTest, ScanDirectoryTest) {
diff --git a/vespalib/src/tests/fileheader/fileheader_test.cpp b/vespalib/src/tests/fileheader/fileheader_test.cpp
index 21e374e4f62..911c6ef7cfe 100644
--- a/vespalib/src/tests/fileheader/fileheader_test.cpp
+++ b/vespalib/src/tests/fileheader/fileheader_test.cpp
@@ -4,9 +4,16 @@
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/fastos/file.h>
+#include <filesystem>
using namespace vespalib;
+namespace {
+
+vespalib::string fileheader_tmp("fileheader.tmp");
+
+}
+
class Test : public vespalib::TestApp {
private:
void testTag();
@@ -337,7 +344,7 @@ Test::testFileReader()
{
{
FastOS_File file;
- ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str()));
uint8_t buf[256];
for (uint32_t i = 0; i < 256; ++i) {
@@ -347,7 +354,7 @@ Test::testFileReader()
}
{
FastOS_File file;
- ASSERT_TRUE(file.OpenReadOnly("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenReadOnly(fileheader_tmp.c_str()));
FileHeader::FileReader reader(file);
char buf[7];
@@ -362,7 +369,7 @@ Test::testFileReader()
EXPECT_EQUAL(256u, sum);
ASSERT_TRUE(file.Close());
- file.Delete();
+ std::filesystem::remove(std::filesystem::path(fileheader_tmp));
}
}
@@ -371,7 +378,7 @@ Test::testFileWriter()
{
{
FastOS_File file;
- ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str()));
FileHeader::FileWriter writer(file);
uint32_t sum = 0;
@@ -388,7 +395,7 @@ Test::testFileWriter()
}
{
FastOS_File file;
- ASSERT_TRUE(file.OpenReadOnly("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenReadOnly(fileheader_tmp.c_str()));
uint8_t buf[256];
EXPECT_EQUAL(256, file.Read(buf, 256));
@@ -397,7 +404,7 @@ Test::testFileWriter()
}
ASSERT_TRUE(file.Close());
- file.Delete();
+ std::filesystem::remove(std::filesystem::path(fileheader_tmp));
}
}
@@ -412,13 +419,13 @@ Test::testFileHeader()
header.putTag(FileHeader::Tag("baz", "666999"));
FastOS_File file;
- ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str()));
len = header.writeFile(file);
EXPECT_EQUAL(len, header.getSize());
}
{
FastOS_File file;
- ASSERT_TRUE(file.OpenReadWrite("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenReadWrite(fileheader_tmp.c_str()));
FileHeader header;
EXPECT_EQUAL(len, header.readFile(file));
@@ -441,11 +448,11 @@ Test::testFileHeader()
FileHeader header;
FastOS_File file;
- ASSERT_TRUE(file.OpenReadOnly("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenReadOnly(fileheader_tmp.c_str()));
EXPECT_EQUAL(len, header.readFile(file));
EXPECT_EQUAL(len, header.getSize());
ASSERT_TRUE(file.Close());
- file.Delete();
+ std::filesystem::remove(std::filesystem::path(fileheader_tmp));
EXPECT_TRUE(header.hasTag("foo"));
EXPECT_EQUAL(9.6, header.getTag("foo").asFloat());
@@ -571,12 +578,12 @@ Test::testRewriteErrors()
{
FastOS_File file;
- ASSERT_TRUE(file.OpenWriteOnlyTruncate("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenWriteOnlyTruncate(fileheader_tmp.c_str()));
EXPECT_EQUAL(len, header.writeFile(file));
}
{
FastOS_File file;
- ASSERT_TRUE(file.OpenReadWrite("fileheader.tmp"));
+ ASSERT_TRUE(file.OpenReadWrite(fileheader_tmp.c_str()));
header.putTag(FileHeader::Tag("baz", "cox"));
EXPECT_TRUE(len != header.getSize());
try {
diff --git a/vespalib/src/tests/fuzzy/CMakeLists.txt b/vespalib/src/tests/fuzzy/CMakeLists.txt
index bc48e775711..00a89d0a604 100644
--- a/vespalib/src/tests/fuzzy/CMakeLists.txt
+++ b/vespalib/src/tests/fuzzy/CMakeLists.txt
@@ -16,3 +16,12 @@ vespa_add_executable(vespalib_levenshtein_distance_test_app TEST
GTest::GTest
)
vespa_add_test(NAME vespalib_levenshtein_distance_test_app COMMAND vespalib_levenshtein_distance_test_app)
+
+vespa_add_executable(vespalib_levenshtein_dfa_test_app TEST
+ SOURCES
+ levenshtein_dfa_test.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+)
+vespa_add_test(NAME vespalib_levenshtein_dfa_test_app COMMAND vespalib_levenshtein_dfa_test_app)
diff --git a/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp b/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp
new file mode 100644
index 00000000000..6966fd0b703
--- /dev/null
+++ b/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp
@@ -0,0 +1,507 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/fuzzy/levenshtein_dfa.h>
+#include <vespa/vespalib/fuzzy/dfa_stepping_base.h>
+#include <vespa/vespalib/fuzzy/unicode_utils.h>
+#include <vespa/vespalib/fuzzy/levenshtein_distance.h> // For benchmarking purposes
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <charconv>
+#include <concepts>
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <string_view>
+#include <gtest/gtest.h>
+
+using namespace ::testing;
+using namespace vespalib::fuzzy;
+namespace fs = std::filesystem;
+
+static std::string benchmark_dictionary;
+
+struct LevenshteinDfaTest : TestWithParam<LevenshteinDfa::DfaType> {
+
+ static LevenshteinDfa::DfaType dfa_type() noexcept { return GetParam(); }
+
+ static std::optional<uint32_t> calculate(std::string_view left, std::string_view right, uint32_t threshold) {
+ auto dfa_lhs = LevenshteinDfa::build(left, threshold, dfa_type());
+ auto maybe_match_lhs = dfa_lhs.match(right, nullptr);
+
+ auto dfa_rhs = LevenshteinDfa::build(right, threshold, dfa_type());
+ auto maybe_match_rhs = dfa_rhs.match(left, nullptr);
+
+ EXPECT_EQ(maybe_match_lhs.matches(), maybe_match_rhs.matches());
+ if (maybe_match_lhs.matches()) {
+ EXPECT_EQ(maybe_match_lhs.edits(), maybe_match_rhs.edits());
+ return {maybe_match_lhs.edits()};
+ }
+ return std::nullopt;
+ }
+
+ static std::optional<uint32_t> calculate(std::u8string_view left, std::u8string_view right, uint32_t threshold) {
+ std::string_view lhs_ch(reinterpret_cast<const char*>(left.data()), left.size());
+ std::string_view rhs_ch(reinterpret_cast<const char*>(right.data()), right.size());
+ return calculate(lhs_ch, rhs_ch, threshold);
+ }
+
+};
+
+INSTANTIATE_TEST_SUITE_P(AllDfaTypes,
+ LevenshteinDfaTest,
+ Values(LevenshteinDfa::DfaType::Explicit,
+ LevenshteinDfa::DfaType::Implicit),
+ PrintToStringParamName());
+
+// Same as existing non-DFA Levenshtein tests, but with some added instantiations
+// for smaller max distances.
+TEST_P(LevenshteinDfaTest, edge_cases_have_correct_edit_distance) {
+ EXPECT_EQ(calculate("abc", "abc", 2), std::optional{0});
+ for (auto max : {1, 2}) {
+ EXPECT_EQ(calculate("abc", "ab1", max), std::optional{1}) << max;
+ EXPECT_EQ(calculate("abc", "1bc", max), std::optional{1}) << max;
+ EXPECT_EQ(calculate("abc", "a1c", max), std::optional{1}) << max;
+ EXPECT_EQ(calculate("abc", "ab", max), std::optional{1}) << max;
+ EXPECT_EQ(calculate("abc", "abcd", max), std::optional{1}) << max;
+ EXPECT_EQ(calculate("a", "", max), std::optional{1}) << max;
+ }
+ EXPECT_EQ(calculate("bc", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("ab", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("cd", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("ad", "abcd", 2), std::optional{2});
+ EXPECT_EQ(calculate("abc", "a12", 2), std::optional{2});
+ EXPECT_EQ(calculate("abc", "123", 2), std::nullopt);
+ EXPECT_EQ(calculate("ab", "", 1), std::nullopt);
+ EXPECT_EQ(calculate("ab", "", 2), std::optional{2});
+ EXPECT_EQ(calculate("abc", "", 2), std::nullopt);
+ EXPECT_EQ(calculate("abc", "123", 2), std::nullopt);
+}
+
+TEST_P(LevenshteinDfaTest, distance_is_in_utf32_code_point_space) {
+ // Each hiragana/katakana/kanji corresponds to multiple (3) UTF-8 chars but a single UTF-32 code point.
+ EXPECT_EQ(calculate(u8"猫", u8"猫", 2), std::optional{0});
+ EXPECT_EQ(calculate(u8"猫", u8"犬", 2), std::optional{1});
+ EXPECT_EQ(calculate(u8"猫と犬", u8"犬と猫", 2), std::optional{2});
+ EXPECT_EQ(calculate(u8"猫は好き", u8"犬が好き", 2), std::optional{2});
+ EXPECT_EQ(calculate(u8"カラオケ", u8"カラオケ", 2), std::optional{0});
+ EXPECT_EQ(calculate(u8"カラオケ", u8"カラoケ", 2), std::optional{1});
+ EXPECT_EQ(calculate(u8"カラオケ", u8"カraオケ", 2), std::optional{2});
+ EXPECT_EQ(calculate(u8"kaラオケ", u8"カラオケ", 2), std::optional{2});
+ EXPECT_EQ(calculate(u8"カラオケ", u8"カラoke", 2), std::nullopt);
+}
+
+void test_dfa_successor(const LevenshteinDfa& dfa, std::string_view source, std::string_view expected_successor) {
+ std::string successor;
+ auto m = dfa.match(source, &successor);
+ if (m.matches()) {
+ FAIL() << "Expected '" << source << "' to emit a successor, but it "
+ << "matched with " << static_cast<uint32_t>(m.edits())
+ << " edits (of max " << static_cast<uint32_t>(m.max_edits()) << " edits)";
+ }
+ EXPECT_EQ(successor, expected_successor);
+ EXPECT_TRUE(dfa.match(successor, nullptr).matches());
+}
+
+TEST_P(LevenshteinDfaTest, can_generate_successors_to_mismatching_source_strings) {
+ auto dfa = LevenshteinDfa::build("food", 1, dfa_type());
+
+ test_dfa_successor(dfa, "", "\x01""food");
+ test_dfa_successor(dfa, "faa", "faod");
+ test_dfa_successor(dfa, "fooooo", "foop");
+ test_dfa_successor(dfa, "ooof", "pfood");
+ test_dfa_successor(dfa, "fo", "fo\x01""d");
+ test_dfa_successor(dfa, "oo", "ood");
+ test_dfa_successor(dfa, "ooo", "oood");
+ test_dfa_successor(dfa, "foh", "fohd");
+ test_dfa_successor(dfa, "foho", "fohod");
+ test_dfa_successor(dfa, "foxx", "foyd");
+ test_dfa_successor(dfa, "xfa", "xfood");
+ test_dfa_successor(dfa, "gg", "good");
+ test_dfa_successor(dfa, "gp", "hfood");
+ test_dfa_successor(dfa, "ep", "f\x01""od");
+ test_dfa_successor(dfa, "hfoodz", "hood");
+ test_dfa_successor(dfa, "aooodz", "bfood");
+
+ // Also works with Unicode
+ // 2 chars
+ test_dfa_successor(dfa, "\xc3\x86""x", // "Æx"
+ "\xc3\x87""food"); // "Çfood"
+ // 3 chars
+ test_dfa_successor(dfa, "\xe7\x8c\xab""\xe3\x81\xaf", // "猫は"
+ "\xe7\x8c\xac""food"); // "猬food"
+ // 4 chars
+ test_dfa_successor(dfa, "\xf0\x9f\xa4\xa9""abc", // <starry eyed emoji>abc
+ "\xf0\x9f\xa4\xa9""food"); // <starry eyed emoji>food
+
+ // Note that as a general rule, emojis are fickle beasts to deal with since a single
+ // emoji often takes up multiple code points, which we consider separate characters
+ // but a user sees as a single actual rendered glyph.
+ // Multi-code point character edit distance support is left as an exercise for the reader :D
+}
+
+TEST_P(LevenshteinDfaTest, successor_is_well_defined_for_max_unicode_code_point_input) {
+ auto dfa = LevenshteinDfa::build("food", 1, dfa_type());
+ // The successor string must be lexicographically larger than the input string.
+ // In the presence of a wildcard output edge we handle this by increase the input
+ // character by 1 and encoding it back as UTF-8.
+ // It is possible (though arguably very unlikely) that the input character is
+ // U+10FFFF, which is the maximum valid Unicode character. We have to ensure that
+ // we can encode U+10FFFF + 1, even though it's technically outside the valid range.
+ // Luckily, UTF-8 can technically (there's that word again) encode up to U+1FFFFF,
+ // so the resulting string is byte-wise greater, and that's what matters since we
+ // don't guarantee that the successor string is _valid_ UTF-8.
+ // This problem does not happen with the target string, as it's an invalid character
+ // and will be replaced with the Unicode replacement char before we ever see it.
+ test_dfa_successor(dfa, "\xf4\x8f\xbf\xbf""xyz", // U+10FFFF
+ "\xf4\x90\x80\x80""food");// U+10FFFF+1
+}
+
+TEST_P(LevenshteinDfaTest, successor_is_well_defined_for_empty_target) {
+ auto dfa = LevenshteinDfa::build("", 1, dfa_type());
+ test_dfa_successor(dfa, "aa", "b");
+ test_dfa_successor(dfa, "b\x01", "c");
+ test_dfa_successor(dfa, "vespa", "w");
+}
+
+// We should normally be able to rely on higher-level components to ensure we
+// only receive valid UTF-8, but make sure we don't choke on it if we do get it.
+TEST_P(LevenshteinDfaTest, malformed_utf8_is_replaced_with_placeholder_char) {
+ // 0xff is not a valid encoding and is implicitly converted to U+FFFD,
+ // which is the standard Unicode replacement character.
+ EXPECT_EQ(calculate("\xff", "a", 2), std::optional{1});
+ EXPECT_EQ(calculate("\xff\xff", "a", 2), std::optional{2});
+ EXPECT_EQ(calculate("a", "\xff", 2), std::optional{1});
+ EXPECT_EQ(calculate("a", "\xff\xff\xff", 2), std::nullopt);
+ EXPECT_EQ(calculate("\xff", "\xef\xbf\xbd"/*U+FFFD*/, 2), std::optional{0});
+}
+
+TEST_P(LevenshteinDfaTest, unsupported_max_edits_value_throws) {
+ EXPECT_THROW((void)LevenshteinDfa::build("abc", 0, dfa_type()), std::invalid_argument);
+ EXPECT_THROW((void)LevenshteinDfa::build("abc", 3, dfa_type()), std::invalid_argument);
+}
+
+// Turn integer v into its bitwise string representation with the MSB as the leftmost character.
+template <std::unsigned_integral T>
+std::string bits_to_str(T v) {
+ constexpr const uint8_t n_bits = sizeof(T) * 8;
+ std::string ret(n_bits, '0');
+ for (uint8_t bit = 0; bit < n_bits; ++bit) {
+ if (v & (1 << bit)) {
+ ret[n_bits - bit - 1] = '1';
+ }
+ }
+ return ret;
+}
+
+using DfaTypeAndMaxEdits = std::tuple<LevenshteinDfa::DfaType, uint32_t>;
+
+struct LevenshteinDfaSuccessorTest : TestWithParam<DfaTypeAndMaxEdits> {
+ // Print test suffix as e.g. "/Explicit_1" instead of just a GTest-chosen number.
+ static std::string stringify_params(const TestParamInfo<ParamType>& info) {
+ std::ostringstream ss;
+ ss << std::get<0>(info.param) << '_' << std::get<1>(info.param);
+ return ss.str();
+ }
+};
+
+INSTANTIATE_TEST_SUITE_P(SupportedMaxEdits,
+ LevenshteinDfaSuccessorTest,
+ Combine(Values(LevenshteinDfa::DfaType::Explicit,
+ LevenshteinDfa::DfaType::Implicit),
+ Values(1, 2)),
+ LevenshteinDfaSuccessorTest::stringify_params);
+
+/**
+ * Exhaustively test successor generation by matching all target and source strings
+ * in {0,1}^8 against each other. Since we generate bit strings identical to the
+ * bit patterns of the underlying counter(s), any string at index `i+1` will compare
+ * lexicographically greater than the one at `i`. We use this to test that we never
+ * miss a valid match that comes between a mismatch and its generated successor.
+ *
+ * For each mismatch we note the successor it emitted. Verify that each subsequent
+ * match() invocation for a source string < the successor results in a mismatch.
+ *
+ * We test this for both max edit distance 1 and 2. Despite being an exhaustive test,
+ * this completes in a few dozen milliseconds even with ASan instrumentation.
+ *
+ * Inspired by approach used by Lucene DFA exhaustive testing.
+ */
+TEST_P(LevenshteinDfaSuccessorTest, exhaustive_successor_test) {
+ const auto [dfa_type, max_edits] = GetParam();
+ for (uint32_t i = 0; i < 256; ++i) {
+ const auto target = bits_to_str(static_cast<uint8_t>(i));
+ auto target_dfa = LevenshteinDfa::build(target, max_edits, dfa_type);
+ std::string skip_to, successor;
+ for (uint32_t j = 0; j < 256; ++j) {
+ const auto source = bits_to_str(static_cast<uint8_t>(j));
+ auto maybe_match = target_dfa.match(source, &successor);
+ if (maybe_match.matches() && !skip_to.empty()) {
+ ASSERT_GE(source, skip_to);
+ } else if (!maybe_match.matches()) {
+ ASSERT_FALSE(successor.empty()) << source;
+ ASSERT_GE(successor, skip_to) << source;
+ ASSERT_GT(successor, source) << source;
+ skip_to = successor;
+ }
+ }
+ }
+}
+
+namespace {
+
+template <uint8_t MaxEdits>
+void explore(const DfaSteppingBase<FixedMaxEditDistanceTraits<MaxEdits>>& stepper,
+ const typename DfaSteppingBase<FixedMaxEditDistanceTraits<MaxEdits>>::StateType& in_state)
+{
+ ASSERT_EQ(stepper.can_match(stepper.step(in_state, WILDCARD)),
+ stepper.can_wildcard_step(in_state));
+ if (!stepper.can_match(in_state)) {
+ return; // reached the end of the line
+ }
+ // DFS-explore all matching transitions, as well as one non-matching transition
+ auto t = stepper.transitions(in_state);
+ for (uint32_t c: t.u32_chars()) {
+ ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.step(in_state, c)));
+ }
+ ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.step(in_state, WILDCARD)));
+}
+
+} // anon ns
+
+using StateStepperTypes = Types<
+ DfaSteppingBase<FixedMaxEditDistanceTraits<1>>,
+ DfaSteppingBase<FixedMaxEditDistanceTraits<2>>
+>;
+
+template <typename SteppingBase>
+struct LevenshteinSparseStateTest : Test {};
+
+TYPED_TEST_SUITE(LevenshteinSparseStateTest, StateStepperTypes);
+
+// "Meta-test" for checking that the `can_wildcard_step` predicate function is
+// functionally equivalent to evaluating `can_match(stepper.step(in_state, WILDCARD))`
+TYPED_TEST(LevenshteinSparseStateTest, wildcard_step_predcate_is_equivalent_to_step_with_can_match) {
+ for (const char* target : {"", "a", "ab", "abc", "abcdef", "aaaaa"}) {
+ auto u32_target = utf8_string_to_utf32(target);
+ TypeParam stepper(u32_target);
+ ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.start()));
+ }
+}
+
+template <typename T>
+void do_not_optimize_away(T&& t) noexcept {
+ asm volatile("" : : "m"(t) : "memory"); // Clobber the value to avoid losing it to compiler optimizations
+}
+
+enum class BenchmarkType {
+ DfaExplicit,
+ DfaImplicit,
+ Legacy
+};
+
+const char* to_s(BenchmarkType t) noexcept {
+ // Note: need underscores since this is used as part of GTest-generated test instance names
+ switch (t) {
+ case BenchmarkType::DfaExplicit: return "DFA_explicit";
+ case BenchmarkType::DfaImplicit: return "DFA_implicit";
+ case BenchmarkType::Legacy: return "legacy";
+ }
+ abort();
+}
+
+[[nodiscard]] bool benchmarking_enabled() noexcept {
+ return !benchmark_dictionary.empty();
+}
+
+[[nodiscard]] std::vector<uint32_t> string_lengths() {
+ return {2, 8, 16, 64, 256, 1024, 1024*16, 1024*64};
+}
+
+struct LevenshteinBenchmarkTest : TestWithParam<BenchmarkType> {
+
+ static std::string stringify_params(const TestParamInfo<ParamType>& info) {
+ return to_s(info.param);
+ }
+
+ void SetUp() override {
+ if (!benchmarking_enabled()) {
+ GTEST_SKIP() << "benchmarking not enabled";
+ }
+ }
+
+ static BenchmarkType benchmark_type() noexcept { return GetParam(); }
+
+ static const std::vector<std::string>& load_dictionary_once() {
+ static auto sorted_lines = read_and_sort_all_lines(fs::path(benchmark_dictionary));
+ return sorted_lines;
+ }
+
+ static std::vector<std::string> read_and_sort_all_lines(const fs::path& file_path) {
+ std::ifstream ifs(file_path);
+ if (!ifs.is_open()) {
+ throw std::invalid_argument("File does not exist");
+ }
+ std::vector<std::string> lines;
+ std::string line;
+ while (std::getline(ifs, line)) {
+ lines.emplace_back(line);
+ }
+ std::sort(lines.begin(), lines.end());
+ return lines;
+ }
+};
+
+INSTANTIATE_TEST_SUITE_P(AllDfaTypes,
+ LevenshteinBenchmarkTest,
+ Values(BenchmarkType::DfaExplicit,
+ BenchmarkType::DfaImplicit,
+ BenchmarkType::Legacy),
+ LevenshteinBenchmarkTest::stringify_params);
+
+// ("abc", 1) => "a"
+// ("abc", 3) => "abc"
+// ("abc", 7) => "abcabca"
+// ... and so on.
+std::string repeated_string(std::string_view str, uint32_t sz) {
+ uint32_t chunks = sz / str.size();
+ std::string ret;
+ ret.reserve(sz);
+ for (uint32_t i = 0; i < chunks; ++i) {
+ ret += str;
+ }
+ uint32_t rem = sz % str.size();
+ ret += str.substr(0, rem);
+ return ret;
+}
+
+TEST_P(LevenshteinBenchmarkTest, benchmark_worst_case_matching_excluding_setup_time) {
+ using vespalib::BenchmarkTimer;
+ const auto type = benchmark_type();
+ fprintf(stderr, "------ %s ------\n", to_s(type));
+ for (uint8_t k : {1, 2}) {
+ for (uint32_t sz : string_lengths()) {
+ // Use same string as both source and target. This is the worst case in that the entire
+ // string must be matched and any sparse representation is always maximally filled since
+ // we never expend any edits via mismatches.
+ // Also ensure that we have multiple out-edges per node (i.e. don't just repeat "AAA" etc.).
+ std::string str = repeated_string("abcde", sz);
+ double min_time_s;
+ if (type == BenchmarkType::DfaExplicit || type == BenchmarkType::DfaImplicit) {
+ auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit
+ : LevenshteinDfa::DfaType::Implicit;
+ auto dfa = LevenshteinDfa::build(str, k, dfa_type);
+ min_time_s = BenchmarkTimer::benchmark([&] {
+ auto res = dfa.match(str, nullptr); // not benchmarking successor generation
+ do_not_optimize_away(res);
+ }, 1.0);
+ } else {
+ min_time_s = BenchmarkTimer::benchmark([&] {
+ auto str_u32 = utf8_string_to_utf32(str); // Must be done per term, so included in benchmark body
+ auto res = vespalib::LevenshteinDistance::calculate(str_u32, str_u32, k);
+ do_not_optimize_away(res);
+ }, 1.0);
+ }
+ fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0);
+ }
+ }
+}
+
+TEST(LevenshteinExplicitDfaBenchmarkTest, benchmark_explicit_dfa_construction) {
+ if (!benchmarking_enabled()) {
+ GTEST_SKIP() << "benchmarking not enabled";
+ }
+ using vespalib::BenchmarkTimer;
+ for (uint8_t k : {1, 2}) {
+ for (uint32_t sz : string_lengths()) {
+ std::string str = repeated_string("abcde", sz);
+ double min_time_s = BenchmarkTimer::benchmark([&] {
+ auto dfa = LevenshteinDfa::build(str, k, LevenshteinDfa::DfaType::Explicit);
+ do_not_optimize_away(dfa);
+ }, 2.0);
+ auto dfa = LevenshteinDfa::build(str, k, LevenshteinDfa::DfaType::Explicit);
+ size_t mem_usage = dfa.memory_usage();
+ fprintf(stderr, "k=%u, sz=%u: \t%g us \t%zu bytes\n", k, sz, min_time_s * 1000000.0, mem_usage);
+ }
+ }
+}
+
+TEST_P(LevenshteinBenchmarkTest, benchmark_brute_force_dictionary_scan) {
+ using vespalib::BenchmarkTimer;
+ const auto type = benchmark_type();
+ const auto dict = load_dictionary_once();
+ std::vector target_lengths = {1, 2, 4, 8, 12, 16, 24, 32, 64};
+ fprintf(stderr, "------ %s ------\n", to_s(type));
+ for (uint8_t k : {1, 2}) {
+ for (uint32_t sz : target_lengths) {
+ std::string str = repeated_string("abcde", sz);
+ double min_time_s;
+ if (type == BenchmarkType::DfaExplicit || type == BenchmarkType::DfaImplicit) {
+ auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit
+ : LevenshteinDfa::DfaType::Implicit;
+ auto dfa = LevenshteinDfa::build(str, k, dfa_type);
+ min_time_s = BenchmarkTimer::benchmark([&] {
+ for (const auto& line : dict) {
+ auto res = dfa.match(line, nullptr);
+ do_not_optimize_away(res);
+ }
+ }, 2.0);
+ } else {
+ min_time_s = BenchmarkTimer::benchmark([&] {
+ auto target_u32 = utf8_string_to_utf32(str);
+ for (const auto& line : dict) {
+ auto line_u32 = utf8_string_to_utf32(line);
+ auto res = vespalib::LevenshteinDistance::calculate(line_u32, target_u32, k);
+ do_not_optimize_away(res);
+ }
+ }, 2.0);
+ }
+ fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0);
+ }
+ }
+}
+
+TEST_P(LevenshteinBenchmarkTest, benchmark_skipping_dictionary_scan) {
+ const auto type = benchmark_type();
+ if (type == BenchmarkType::Legacy) {
+ GTEST_SKIP() << "Skipping not supported for legacy implementation";
+ }
+ using vespalib::BenchmarkTimer;
+ const auto dict = load_dictionary_once();
+ std::vector target_lengths = {1, 2, 4, 8, 12, 16, 24, 32, 64};
+ fprintf(stderr, "------ %s ------\n", to_s(type));
+ for (uint8_t k : {1, 2}) {
+ for (uint32_t sz : target_lengths) {
+ std::string str = repeated_string("abcde", sz);
+ auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit
+ : LevenshteinDfa::DfaType::Implicit;
+ auto dfa = LevenshteinDfa::build(str, k, dfa_type);
+ double min_time_s = BenchmarkTimer::benchmark([&] {
+ auto iter = dict.cbegin();
+ auto end = dict.cend();
+ std::string successor;
+ while (iter != end) {
+ auto maybe_match = dfa.match(*iter, &successor);
+ if (maybe_match.matches()) {
+ ++iter;
+ } else {
+ iter = std::lower_bound(iter, end, successor);
+ }
+ }
+ }, 2.0);
+ fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0);
+ }
+ }
+}
+
+// TODO:
+// - explicit successor generation benchmark
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ if (argc > 1) {
+ benchmark_dictionary = argv[1];
+ if (!fs::exists(fs::path(benchmark_dictionary))) {
+ fprintf(stderr, "Benchmark dictionary file '%s' does not exist\n", benchmark_dictionary.c_str());
+ return 1;
+ }
+ }
+ return RUN_ALL_TESTS();
+}
diff --git a/vespalib/src/vespa/fastlib/io/bufferedfile.cpp b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp
index aecf08edf6b..60c8a4a40ef 100644
--- a/vespalib/src/vespa/fastlib/io/bufferedfile.cpp
+++ b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp
@@ -287,7 +287,7 @@ Fast_BufferedFile::ReadOpenExisting(const char *name)
bool ok = Close();
ok &= _file->OpenReadOnlyExisting(true, name);
if (!ok) {
- fprintf(stderr, "ERROR opening %s for read: %s",
+ fprintf(stderr, "ERROR opening %s for read: %s\n",
_file->GetFileName(), getLastErrorString().c_str());
assert(ok);
}
@@ -304,7 +304,7 @@ Fast_BufferedFile::ReadOpen(const char *name)
bool ok = Close();
ok &= _file->OpenReadOnly(name);
if (!ok) {
- fprintf(stderr, "ERROR opening %s for read: %s",
+ fprintf(stderr, "ERROR opening %s for read: %s\n",
_file->GetFileName(), getLastErrorString().c_str());
assert(ok);
}
@@ -324,7 +324,7 @@ Fast_BufferedFile::WriteOpen(const char *name)
bool ok = Close();
ok &= _file->OpenWriteOnly(name);
if (!ok) {
- fprintf(stderr, "ERROR opening %s for write: %s",
+ fprintf(stderr, "ERROR opening %s for write: %s\n",
_file->GetFileName(), getLastErrorString().c_str());
assert(ok);
}
@@ -431,11 +431,6 @@ bool Fast_BufferedFile::Open(unsigned int openFlags, const char * name)
return ok;
}
-bool Fast_BufferedFile::Delete()
-{
- return _file->Delete();
-}
-
void Fast_BufferedFile::alignEndForDirectIO()
{
while( (_bufi - buf())%MIN_ALIGNMENT ) {
diff --git a/vespalib/src/vespa/fastlib/io/bufferedfile.h b/vespalib/src/vespa/fastlib/io/bufferedfile.h
index 2a5e0ec7535..8faada61f9d 100644
--- a/vespalib/src/vespa/fastlib/io/bufferedfile.h
+++ b/vespalib/src/vespa/fastlib/io/bufferedfile.h
@@ -217,7 +217,6 @@ public:
* Just forwarded to the real file to support FastOS_FileInterface.
*/
bool Open(unsigned int, const char*) override;
- bool Delete() override;
void alignEndForDirectIO();
};
diff --git a/vespalib/src/vespa/fastos/file.cpp b/vespalib/src/vespa/fastos/file.cpp
index fdbacb570b4..c8cce99b169 100644
--- a/vespalib/src/vespa/fastos/file.cpp
+++ b/vespalib/src/vespa/fastos/file.cpp
@@ -335,16 +335,6 @@ FastOS_FileInterface::getLastErrorString()
return FastOS_File::getErrorString(err);
}
-bool FastOS_FileInterface::Rename (const char *newFileName)
-{
- bool rc=false;
- if (FastOS_File::Rename(GetFileName(), newFileName)) {
- SetFileName(newFileName);
- rc = true;
- }
- return rc;
-}
-
void FastOS_FileInterface::dropFromCache() const
{
}
diff --git a/vespalib/src/vespa/fastos/file.h b/vespalib/src/vespa/fastos/file.h
index 1a637726e45..84b94df5acc 100644
--- a/vespalib/src/vespa/fastos/file.h
+++ b/vespalib/src/vespa/fastos/file.h
@@ -20,9 +20,6 @@ constexpr int FASTOS_FILE_OPEN_WRITE = (1<<1);
constexpr int FASTOS_FILE_OPEN_EXISTING = (1<<2);
constexpr int FASTOS_FILE_OPEN_CREATE = (1<<3);
constexpr int FASTOS_FILE_OPEN_TRUNCATE = (1<<4);
-constexpr int FASTOS_FILE_OPEN_STDOUT = (2<<5);
-constexpr int FASTOS_FILE_OPEN_STDERR = (3<<5);
-constexpr int FASTOS_FILE_OPEN_STDFLAGS = (3<<5);
constexpr int FASTOS_FILE_OPEN_DIRECTIO = (1<<7);
constexpr int FASTOS_FILE_OPEN_SYNCWRITES = (1<<9); // synchronous writes
@@ -338,24 +335,6 @@ public:
int64_t getSize() const { return const_cast<FastOS_FileInterface *>(this)->GetSize(); }
/**
- * Delete the file. This method requires that the file is
- * currently not opened.
- * @return Boolean success/failure
- */
- virtual bool Delete() = 0;
-
- /**
- * Rename/move a file or directory. This method requires that
- * the file is currently not opened. A move operation is
- * supported as long as the source and destination reside
- * on the same volume/device.
- * The method fails if the destination already exists.
- * @param newFileName New file name
- * @return Boolean success/failure
- */
- virtual bool Rename (const char *newFileName);
-
- /**
* Force completion of pending disk writes (flush cache).
*/
[[nodiscard]] virtual bool Sync() = 0;
diff --git a/vespalib/src/vespa/fastos/linux_file.cpp b/vespalib/src/vespa/fastos/linux_file.cpp
index 6fb29782957..dabe84a8f30 100644
--- a/vespalib/src/vespa/fastos/linux_file.cpp
+++ b/vespalib/src/vespa/fastos/linux_file.cpp
@@ -372,9 +372,6 @@ FastOS_Linux_File::Open(unsigned int openFlags, const char *filename)
bool rc;
_cachedSize = -1;
_filePointer = -1;
- if (_directIOEnabled && (_openFlags & FASTOS_FILE_OPEN_STDFLAGS) != 0) {
- _directIOEnabled = false;
- }
if (_syncWritesEnabled) {
openFlags |= FASTOS_FILE_OPEN_SYNCWRITES;
}
diff --git a/vespalib/src/vespa/fastos/unix_file.cpp b/vespalib/src/vespa/fastos/unix_file.cpp
index 802e85d7609..952d9820306 100644
--- a/vespalib/src/vespa/fastos/unix_file.cpp
+++ b/vespalib/src/vespa/fastos/unix_file.cpp
@@ -207,80 +207,50 @@ FastOS_UNIX_File::Open(unsigned int openFlags, const char *filename)
bool rc = false;
assert(_filedes == -1);
- if ((openFlags & FASTOS_FILE_OPEN_STDFLAGS) != 0) {
- FILE *file;
-
- switch(openFlags & FASTOS_FILE_OPEN_STDFLAGS) {
+ if (filename != nullptr) {
+ SetFileName(filename);
+ }
+ unsigned int accessFlags = CalcAccessFlags(openFlags);
- case FASTOS_FILE_OPEN_STDOUT:
- file = stdout;
- SetFileName("stdout");
- break;
+ _filedes = open(_filename.c_str(), accessFlags, 0664);
- case FASTOS_FILE_OPEN_STDERR:
- file = stderr;
- SetFileName("stderr");
- break;
+ rc = (_filedes != -1);
- default:
- fprintf(stderr, "Invalid open-flags %08X\n", openFlags);
- abort();
- }
-
-#ifdef __linux__
- _filedes = file->_fileno;
-#else
- _filedes = fileno(file);
-#endif
+ if (rc) {
_openFlags = openFlags;
- rc = true;
- } else {
- if (filename != nullptr) {
- SetFileName(filename);
- }
- unsigned int accessFlags = CalcAccessFlags(openFlags);
-
- _filedes = open(_filename.c_str(), accessFlags, 0664);
-
- rc = (_filedes != -1);
-
- if (rc) {
- _openFlags = openFlags;
- if (_mmapEnabled) {
- int64_t filesize = GetSize();
- auto mlen = static_cast<size_t>(filesize);
- if ((static_cast<int64_t>(mlen) == filesize) && (mlen > 0)) {
- void *mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | _mmapFlags, _filedes, 0);
- if (mbase == MAP_FAILED) {
- mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | (_mmapFlags & ALWAYS_SUPPORTED_MMAP_FLAGS), _filedes, 0);
- }
- if (mbase != MAP_FAILED) {
+ if (_mmapEnabled) {
+ int64_t filesize = GetSize();
+ auto mlen = static_cast<size_t>(filesize);
+ if ((static_cast<int64_t>(mlen) == filesize) && (mlen > 0)) {
+ void *mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | _mmapFlags, _filedes, 0);
+ if (mbase == MAP_FAILED) {
+ mbase = mmap(nullptr, mlen, PROT_READ, MAP_SHARED | (_mmapFlags & ALWAYS_SUPPORTED_MMAP_FLAGS), _filedes, 0);
+ }
+ if (mbase != MAP_FAILED) {
#ifdef __linux__
- int fadviseOptions = getFAdviseOptions();
- int eCode(0);
- if (POSIX_FADV_RANDOM == fadviseOptions) {
- eCode = posix_madvise(mbase, mlen, POSIX_MADV_RANDOM);
- } else if (POSIX_FADV_SEQUENTIAL == fadviseOptions) {
- eCode = posix_madvise(mbase, mlen, POSIX_MADV_SEQUENTIAL);
- }
- if (eCode != 0) {
- fprintf(stderr, "Failed: posix_madvise(%p, %ld, %d) = %d\n", mbase, mlen, fadviseOptions, eCode);
- }
-#endif
- _mmapbase = mbase;
- _mmaplen = mlen;
- } else {
- close(_filedes);
- _filedes = -1;
- std::ostringstream os;
- os << "mmap of file '" << GetFileName() << "' with flags '" << std::hex << (MAP_SHARED | _mmapFlags) << std::dec
- << "' failed with error :'" << getErrorString(GetLastOSError()) << "'";
- throw std::runtime_error(os.str());
+ int fadviseOptions = getFAdviseOptions();
+ int eCode(0);
+ if (POSIX_FADV_RANDOM == fadviseOptions) {
+ eCode = posix_madvise(mbase, mlen, POSIX_MADV_RANDOM);
+ } else if (POSIX_FADV_SEQUENTIAL == fadviseOptions) {
+ eCode = posix_madvise(mbase, mlen, POSIX_MADV_SEQUENTIAL);
+ }
+ if (eCode != 0) {
+ fprintf(stderr, "Failed: posix_madvise(%p, %ld, %d) = %d\n", mbase, mlen, fadviseOptions, eCode);
}
+#endif
+ _mmapbase = mbase;
+ _mmaplen = mlen;
+ } else {
+ close(_filedes);
+ _filedes = -1;
+ std::ostringstream os;
+ os << "mmap of file '" << GetFileName() << "' with flags '" << std::hex << (MAP_SHARED | _mmapFlags) << std::dec
+ << "' failed with error :'" << getErrorString(GetLastOSError()) << "'";
+ throw std::runtime_error(os.str());
}
}
}
-
}
return rc;
@@ -300,13 +270,9 @@ FastOS_UNIX_File::Close()
bool ok = true;
if (_filedes >= 0) {
- if ((_openFlags & FASTOS_FILE_OPEN_STDFLAGS) != 0) {
- ok = true;
- } else {
- do {
- ok = (close(_filedes) == 0);
- } while (!ok && errno == EINTR);
- }
+ do {
+ ok = (close(_filedes) == 0);
+ } while (!ok && errno == EINTR);
if (_mmapbase != nullptr) {
madvise(_mmapbase, _mmaplen, MADV_DONTNEED);
@@ -341,35 +307,6 @@ FastOS_UNIX_File::GetSize()
return fileSize;
}
-bool
-FastOS_UNIX_File::Delete(const char *name)
-{
- return (unlink(name) == 0);
-}
-
-
-bool
-FastOS_UNIX_File::Delete()
-{
- assert( ! IsOpened());
-
- return (unlink(_filename.c_str()) == 0);
-}
-
-bool FastOS_UNIX_File::Rename (const char *currentFileName, const char *newFileName)
-{
- bool rc = false;
-
- // Enforce documentation. If the destination file exists,
- // fail Rename.
- FastOS_StatInfo statInfo{};
- if (!FastOS_File::Stat(newFileName, &statInfo)) {
- rc = (rename(currentFileName, newFileName) == 0);
- } else {
- errno = EEXIST;
- }
- return rc;
-}
bool
FastOS_UNIX_File::Sync()
diff --git a/vespalib/src/vespa/fastos/unix_file.h b/vespalib/src/vespa/fastos/unix_file.h
index 3d1f6b9db3f..120b1d35add 100644
--- a/vespalib/src/vespa/fastos/unix_file.h
+++ b/vespalib/src/vespa/fastos/unix_file.h
@@ -30,13 +30,7 @@ protected:
bool _mmapEnabled;
static unsigned int CalcAccessFlags(unsigned int openFlags);
-
public:
- static bool Rename (const char *currentFileName, const char *newFileName);
- bool Rename (const char *newFileName) override {
- return FastOS_FileInterface::Rename(newFileName);
- }
-
static bool Stat(const char *filename, FastOS_StatInfo *statInfo);
static std::string getCurrentDirectory();
@@ -82,12 +76,10 @@ public:
bool SetPosition(int64_t desiredPosition) override;
int64_t GetPosition() override;
int64_t GetSize() override;
- bool Delete() override;
[[nodiscard]] bool Sync() override;
bool SetSize(int64_t newSize) override;
void dropFromCache() const override;
- static bool Delete(const char *filename);
static int GetLastOSError();
static Error TranslateError(const int osError);
static std::string getErrorString(const int osError);
diff --git a/vespalib/src/vespa/vespalib/datastore/bufferstate.h b/vespalib/src/vespa/vespalib/datastore/bufferstate.h
index 3de821928b8..01439586f5b 100644
--- a/vespalib/src/vespa/vespalib/datastore/bufferstate.h
+++ b/vespalib/src/vespa/vespalib/datastore/bufferstate.h
@@ -134,21 +134,21 @@ public:
class BufferAndMeta {
public:
- BufferAndMeta() noexcept : BufferAndMeta(nullptr, nullptr, 0, 0) { }
+ BufferAndMeta() : BufferAndMeta(nullptr, nullptr, 0, 0) { }
std::atomic<void*>& get_atomic_buffer() noexcept { return _buffer; }
void* get_buffer_relaxed() noexcept { return _buffer.load(std::memory_order_relaxed); }
const void* get_buffer_acquire() const noexcept { return _buffer.load(std::memory_order_acquire); }
- uint32_t getTypeId() const noexcept { return _typeId; }
- uint32_t get_array_size() const noexcept { return _array_size; }
- BufferState * get_state_relaxed() noexcept { return _state.load(std::memory_order_relaxed); }
+ uint32_t getTypeId() const { return _typeId; }
+ uint32_t get_array_size() const { return _array_size; }
+ BufferState * get_state_relaxed() { return _state.load(std::memory_order_relaxed); }
const BufferState * get_state_acquire() const { return _state.load(std::memory_order_acquire); }
uint32_t get_entry_size() const noexcept { return _entry_size; }
- void setTypeId(uint32_t typeId) noexcept { _typeId = typeId; }
- void set_array_size(uint32_t arraySize) noexcept { _array_size = arraySize; }
+ void setTypeId(uint32_t typeId) { _typeId = typeId; }
+ void set_array_size(uint32_t arraySize) { _array_size = arraySize; }
void set_entry_size(uint32_t entry_size) noexcept { _entry_size = entry_size; }
- void set_state(BufferState * state) noexcept { _state.store(state, std::memory_order_release); }
+ void set_state(BufferState * state) { _state.store(state, std::memory_order_release); }
private:
- BufferAndMeta(void* buffer, BufferState * state, uint32_t typeId, uint32_t arraySize) noexcept
+ BufferAndMeta(void* buffer, BufferState * state, uint32_t typeId, uint32_t arraySize)
: _buffer(buffer),
_state(state),
_typeId(typeId),
diff --git a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp
index eea49e80135..4eb4ff16864 100644
--- a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp
+++ b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.cpp
@@ -10,19 +10,19 @@
namespace vespalib::datastore {
bool
-CompactionStrategy::should_compact_memory(const MemoryUsage& memory_usage) const noexcept
+CompactionStrategy::should_compact_memory(const MemoryUsage& memory_usage) const
{
return should_compact_memory(memory_usage.usedBytes(), memory_usage.deadBytes());
}
bool
-CompactionStrategy::should_compact_address_space(const AddressSpace& address_space) const noexcept
+CompactionStrategy::should_compact_address_space(const AddressSpace& address_space) const
{
return should_compact_address_space(address_space.used(), address_space.dead());
}
CompactionSpec
-CompactionStrategy::should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const noexcept
+CompactionStrategy::should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const
{
return CompactionSpec(should_compact_memory(memory_usage), should_compact_address_space(address_space));
}
@@ -36,7 +36,7 @@ std::ostream& operator<<(std::ostream& os, const CompactionStrategy& compaction_
}
CompactionStrategy
-CompactionStrategy::make_compact_all_active_buffers_strategy() noexcept
+CompactionStrategy::make_compact_all_active_buffers_strategy()
{
return CompactionStrategy(0.0, 0.0, std::numeric_limits<uint32_t>::max(), 1.0);
}
diff --git a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h
index c0c1857deae..f78e123e5de 100644
--- a/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h
+++ b/vespalib/src/vespa/vespalib/datastore/compaction_strategy.h
@@ -25,15 +25,15 @@ public:
static constexpr size_t DEAD_BYTES_SLACK = 0x10000u;
static constexpr size_t DEAD_ADDRESS_SPACE_SLACK = 0x10000u;
private:
- float _maxDeadBytesRatio; // Max ratio of dead bytes before compaction
- float _maxDeadAddressSpaceRatio; // Max ratio of dead address space before compaction
- float _active_buffers_ratio; // Ratio of active buffers to compact for each reason (memory usage, address space usage)
+ double _maxDeadBytesRatio; // Max ratio of dead bytes before compaction
+ double _maxDeadAddressSpaceRatio; // Max ratio of dead address space before compaction
uint32_t _max_buffers; // Max number of buffers to compact for each reason (memory usage, address space usage)
- bool should_compact_memory(size_t used_bytes, size_t dead_bytes) const noexcept {
+ double _active_buffers_ratio; // Ratio of active buffers to compact for each reason (memory usage, address space usage)
+ bool should_compact_memory(size_t used_bytes, size_t dead_bytes) const {
return ((dead_bytes >= DEAD_BYTES_SLACK) &&
(dead_bytes > used_bytes * getMaxDeadBytesRatio()));
}
- bool should_compact_address_space(size_t used_address_space, size_t dead_address_space) const noexcept {
+ bool should_compact_address_space(size_t used_address_space, size_t dead_address_space) const {
return ((dead_address_space >= DEAD_ADDRESS_SPACE_SLACK) &&
(dead_address_space > used_address_space * getMaxDeadAddressSpaceRatio()));
}
@@ -41,37 +41,40 @@ public:
CompactionStrategy() noexcept
: _maxDeadBytesRatio(0.05),
_maxDeadAddressSpaceRatio(0.2),
- _active_buffers_ratio(0.1),
- _max_buffers(1)
- { }
- CompactionStrategy(float maxDeadBytesRatio, float maxDeadAddressSpaceRatio) noexcept
+ _max_buffers(1),
+ _active_buffers_ratio(0.1)
+ {
+ }
+ CompactionStrategy(double maxDeadBytesRatio, double maxDeadAddressSpaceRatio) noexcept
: _maxDeadBytesRatio(maxDeadBytesRatio),
_maxDeadAddressSpaceRatio(maxDeadAddressSpaceRatio),
- _active_buffers_ratio(0.1),
- _max_buffers(1)
- { }
- CompactionStrategy(float maxDeadBytesRatio, float maxDeadAddressSpaceRatio, uint32_t max_buffers, float active_buffers_ratio) noexcept
+ _max_buffers(1),
+ _active_buffers_ratio(0.1)
+ {
+ }
+ CompactionStrategy(double maxDeadBytesRatio, double maxDeadAddressSpaceRatio, uint32_t max_buffers, double active_buffers_ratio) noexcept
: _maxDeadBytesRatio(maxDeadBytesRatio),
_maxDeadAddressSpaceRatio(maxDeadAddressSpaceRatio),
- _active_buffers_ratio(active_buffers_ratio),
- _max_buffers(max_buffers)
- { }
- float getMaxDeadBytesRatio() const noexcept { return _maxDeadBytesRatio; }
- float getMaxDeadAddressSpaceRatio() const noexcept { return _maxDeadAddressSpaceRatio; }
+ _max_buffers(max_buffers),
+ _active_buffers_ratio(active_buffers_ratio)
+ {
+ }
+ double getMaxDeadBytesRatio() const { return _maxDeadBytesRatio; }
+ double getMaxDeadAddressSpaceRatio() const { return _maxDeadAddressSpaceRatio; }
uint32_t get_max_buffers() const noexcept { return _max_buffers; }
- float get_active_buffers_ratio() const noexcept { return _active_buffers_ratio; }
- bool operator==(const CompactionStrategy & rhs) const noexcept {
+ double get_active_buffers_ratio() const noexcept { return _active_buffers_ratio; }
+ bool operator==(const CompactionStrategy & rhs) const {
return (_maxDeadBytesRatio == rhs._maxDeadBytesRatio) &&
(_maxDeadAddressSpaceRatio == rhs._maxDeadAddressSpaceRatio) &&
(_max_buffers == rhs._max_buffers) &&
(_active_buffers_ratio == rhs._active_buffers_ratio);
}
- bool operator!=(const CompactionStrategy & rhs) const noexcept { return !(operator==(rhs)); }
+ bool operator!=(const CompactionStrategy & rhs) const { return !(operator==(rhs)); }
- bool should_compact_memory(const MemoryUsage& memory_usage) const noexcept;
- bool should_compact_address_space(const AddressSpace& address_space) const noexcept;
- CompactionSpec should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const noexcept;
- static CompactionStrategy make_compact_all_active_buffers_strategy() noexcept;
+ bool should_compact_memory(const MemoryUsage& memory_usage) const;
+ bool should_compact_address_space(const AddressSpace& address_space) const;
+ CompactionSpec should_compact(const MemoryUsage& memory_usage, const AddressSpace& address_space) const;
+ static CompactionStrategy make_compact_all_active_buffers_strategy();
};
std::ostream& operator<<(std::ostream& os, const CompactionStrategy& compaction_strategy);
diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.h b/vespalib/src/vespa/vespalib/datastore/datastorebase.h
index b91d6c7cfa6..dbcdbeb12b9 100644
--- a/vespalib/src/vespa/vespalib/datastore/datastorebase.h
+++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.h
@@ -72,9 +72,9 @@ public:
/**
* Get the primary buffer id for the given type id.
*/
- uint32_t primary_buffer_id(uint32_t typeId) const noexcept { return _primary_buffer_ids[typeId]; }
+ uint32_t primary_buffer_id(uint32_t typeId) const { return _primary_buffer_ids[typeId]; }
BufferState &getBufferState(uint32_t buffer_id) noexcept;
- const BufferAndMeta & getBufferMeta(uint32_t buffer_id) const noexcept { return _buffers[buffer_id]; }
+ const BufferAndMeta & getBufferMeta(uint32_t buffer_id) const { return _buffers[buffer_id]; }
uint32_t getMaxNumBuffers() const noexcept { return _buffers.size(); }
uint32_t get_bufferid_limit_acquire() const noexcept { return _bufferIdLimit.load(std::memory_order_acquire); }
uint32_t get_bufferid_limit_relaxed() noexcept { return _bufferIdLimit.load(std::memory_order_relaxed); }
diff --git a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
index 1d770163e06..bdbb03bcfee 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
@@ -1,8 +1,12 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(vespalib_vespalib_fuzzy OBJECT
- SOURCES
+ SOURCES
+ explicit_levenshtein_dfa.cpp
fuzzy_matcher.cpp
+ implicit_levenshtein_dfa.cpp
+ levenshtein_dfa.cpp
levenshtein_distance.cpp
- DEPENDS
- )
+ unicode_utils.cpp
+ DEPENDS
+)
diff --git a/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h b/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h
new file mode 100644
index 00000000000..c445c60cc01
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h
@@ -0,0 +1,70 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <concepts>
+#include <cstdint>
+
+namespace vespalib::fuzzy {
+
+// Concept that all DFA matcher implementations must satisfy
+template <typename T>
+concept DfaMatcher = requires(T a) {
+ typename T::StateType;
+ typename T::StateParamType;
+ typename T::EdgeType;
+
+ // Initial (starting) state of the DFA
+ { a.start() } -> std::same_as<typename T::StateType>;
+
+ // Whether a given state constitutes a string match within the maximum number of edits
+ { a.is_match(typename T::StateType{}) } -> std::same_as<bool>;
+
+ // Whether a given state _may_ result in a match, either in the given state or in the
+ // future if the remaining string input is within the max edit distance
+ { a.can_match(typename T::StateType{}) } -> std::same_as<bool>;
+
+ // Whether the given state is a valid state. Used for invariant checking.
+ { a.valid_state(typename T::StateType{}) } -> std::same_as<bool>;
+
+ // Iff the given state represents a terminal matching state, returns the number of
+ // edits required to reach the state. Otherwise, returns max edits + 1.
+ { a.match_edit_distance(typename T::StateType{}) } -> std::same_as<uint8_t>;
+
+ // Returns the state that is the result of matching the single logical Levenshtein
+ // matrix row represented by the given state with the input u32 character value.
+ { a.match_input(typename T::StateType{}, uint32_t{}) } -> std::same_as<typename T::StateType>;
+
+ // Returns the state that is the result of matching the single logical Levenshtein
+ // matrix row represented by the given state with a sentinel character that cannot
+ // match any character in the target string (i.e. is always a mismatch).
+ { a.match_wildcard(typename T::StateType{}) } -> std::same_as<typename T::StateType>;
+
+ // Whether there is exists an out edge from the given state that can accept a
+ // _higher_ UTF-32 code point value (character) than the input u32 value. Such an
+ // edge _may_ be a wildcard edge, which accepts any character.
+ { a.has_higher_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<bool>;
+
+ // Whether there exists an out edge from the given state whose u32 character value
+ // _exactly_ matches the input u32 value.
+ { a.has_exact_explicit_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<bool>;
+
+ // Returns the out edge `e` from the given state that satisfies _both_:
+ // 1. higher than the given u32 value
+ // 2. no other out edges are lower than `e`
+ // Only called in a context where the caller already knows that such an edge must exist.
+ { a.lowest_higher_explicit_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<typename T::EdgeType>;
+
+ // Returns the out edge from the given state that has the lowest character value
+ { a.smallest_explicit_out_edge(typename T::StateType{}) } -> std::same_as<typename T::EdgeType>;
+
+ // Whether the given edge is a valid edge. Used for invariant checking.
+ { a.valid_edge(typename T::EdgeType{}) } -> std::same_as<bool>;
+
+ // For a given edge, returns the UTF-32 code point value the edge represents
+ { a.edge_to_u32char(typename T::EdgeType{}) } -> std::same_as<uint32_t>;
+
+ // Returns the state that is the result of following the given edge from the given state.
+ { a.edge_to_state(typename T::StateType{}, typename T::EdgeType{}) } -> std::same_as<typename T::StateType>;
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h b/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h
new file mode 100644
index 00000000000..7e7881c5a14
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h
@@ -0,0 +1,299 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "sparse_state.h"
+#include <span>
+
+namespace vespalib::fuzzy {
+
+template <typename Traits>
+struct DfaSteppingBase {
+ using StateType = Traits::StateType;
+ using TransitionsType = Traits::TransitionsType;
+
+ std::span<const uint32_t> _u32_str; // TODO std::u32string_view
+
+ DfaSteppingBase(std::span<const uint32_t> str) noexcept
+ : _u32_str(str)
+ {
+ }
+
+ [[nodiscard]] static constexpr uint8_t max_edits() noexcept {
+ return Traits::max_edits();
+ }
+
+ /**
+ * Returns the initial state of the DFA. This represents the first row in the
+ * Levenshtein matrix.
+ */
+ [[nodiscard]] StateType start() const {
+ StateType ret;
+ const auto j = std::min(static_cast<uint32_t>(max_edits()),
+ static_cast<uint32_t>(_u32_str.size())); // e.g. the empty string as target
+ for (uint32_t i = 0; i <= j; ++i) {
+ ret.append(i, i);
+ }
+ return ret;
+ }
+
+ /**
+ * DFA stepping function that takes an input (sparse) state and a 32-bit character value
+ * (does not have to be valid UTF-32, but usually is) and generates a resulting state
+ * that represents applying the Levenshtein algorithm on a particular matrix row using
+ * the provided source string character.
+ *
+ * The returned state only includes elements where the edit distance (cost) is within
+ * the maximum number of edits. All other elements are implicitly beyond the max
+ * edit distance. It doesn't matter _how_ far beyond they are, since we have a fixed
+ * maximum to consider.
+ *
+ * Stepping a non-matching state S (can_match(S) == false) results in another non-
+ * matching state.
+ *
+ * As an example, this is a visualization of stepping through all source characters of
+ * the string "fxod" when matching the target string "food" with max edits k=1.
+ * Note: the actual internal representation is logical <column#, cost> tuples, but
+ * rendering as a matrix makes things easier to understand. Elements _not_ part of the
+ * state are rendered as '-'.
+ *
+ * f o o d
+ * start(): [0 1 - - -]
+ * 'f': [1 0 1 - -]
+ * 'x': [- 1 1 - -]
+ * 'o': [- - 1 1 -]
+ * 'd': [- - - - 1]
+ *
+ * In this case, the resulting edit distance is 1, with one substitution 'x' -> 'o'.
+ *
+ * If we pull out our trusty pen & paper and do the full matrix calculations, we see
+ * that the above is equivalent to the full matrix with all costs > k pruned away:
+ *
+ * f o o d
+ * [0 1 2 3 4]
+ * f [1 0 1 2 3]
+ * x [2 1 1 2 3]
+ * o [3 2 1 1 2]
+ * d [4 3 2 2 1]
+ *
+ * Since we're working on sparse states, stepping requires a bit of manual edge case
+ * handling when compared to a dense representation.
+ *
+ * We first have to handle the case where our state includes the 0th matrix column.
+ * In an explicit Levenshtein matrix of target string length n, source string length m,
+ * the first column is always the values [0, m], increasing with 1 per row (the first
+ * _row_ is handled by start()).
+ *
+ * To mirror this, if our sparse state includes column 0 we have to increment it by 1,
+ * unless doing so would bring the cost beyond our max number of edits, in which case
+ * we don't bother including the column in the new state at all. These correspond to
+ * the start() -> 'f' -> 'x' transitions in the example above.
+ *
+ * What remains is then to do the actual Levenshtein insert/delete/substitute formula
+ * for matching positions in the matrix. Let d represent the logical (full) Levenshtein
+ * distance matrix and cell d[i, j] be the minimum number of edits between source string
+ * character at i+1 and target string character at j+1:
+ *
+ * Insertion cost: d[i, j-1] + 1
+ * Deletion cost: d[i-1, j] + 1
+ * Substitution cost: d[i-1, j-1] + (s[i-1] == t[j-1] ? 1 :0)
+ *
+ * d[i, j] = min(Insertion cost, Deletion cost, Substitution cost)
+ *
+ * We have to turn this slightly on the head, as instead of going through a matrix row
+ * and "pulling" values from the previous row, we have to go through a state representing
+ * the previous row and "push" new values instead (iff these values are within max edits).
+ * This also means we compute costs for indexes offset by 1 from the source state index
+ * (can be visualized as the element one down diagonally to the right).
+ *
+ * Insertion considers the current row only, i.e. the state being generated. We always
+ * work left to right in column order, so we can check if the last element (if any)
+ * in our _new_ sparse state is equal to the index of our source state element. If not,
+ * we know that it was beyond max edits. Max edits + 1 is inherently beyond max edits
+ * and need not be included.
+ *
+ * Deletion considers the cell directly above our own, which is part of the input state
+ * if it exists. Since we're computing the costs of cells at index + 1, we know that the
+ * only way for this cell to be present in the state is if the _next_ element of our
+ * input state exists and has an index equal to index + 1. If so, the deletion cost is
+ * the cost recorded for this element + 1.
+ *
+ * Substitution considers the cell diagonally up to the left. This very conveniently
+ * happens to be the input state cell we're currently working from, so it's therefore
+ * always present.
+ *
+ * Example stepping with c='x', max edits k=1:
+ *
+ * ====== Initially ======
+ *
+ * f o o d
+ * state_in: [1 0 1 - -] (0:1, 1:0, 2:1)
+ * out: [] ()
+ *
+ * We have a 0th column in state_in, but incrementing it results in 2 > k, so not
+ * appended to out.
+ *
+ * ====== State (0:1), computing for index 1 ======
+ *
+ * Insertion: out state is empty (no cell to our left), so implicit insertion cost
+ * is > k
+ * Deletion: state_in[1] is (1:0), which means it represents the cell just above
+ * index 1. Deletion cost is therefore 0+1 = 1
+ * Substitution: (t[0] = 'f') != (c = 'x'), so substitution cost is 1+1 = 2
+ *
+ * Min cost is 1, which is <= k. Appending to output.
+ *
+ * out: [- 1] (1:1)
+ *
+ * ====== State (1:0), computing for index 2 ======
+ *
+ * Insertion: last element in out has index 1 (cell to our immediate left) with cost
+ * 1, so insertion cost is 1+1 = 2
+ * Deletion: state_in[2] is (2:1), which means it represents the cell just above
+ * index 2. Deletion cost is therefore 1+1 = 2
+ * Substitution: (t[1] = 'o') != (c = 'x'), so substitution cost is 0+1 = 1
+ *
+ * Min cost is 1, which is <= k. Appending to output.
+ *
+ * out: [- 1 1] (1:1, 2:1)
+ *
+ * ====== State (2:1), computing for index 3 ======
+ *
+ * Insertion: last element in out has index 2 (cell to our immediate left) with cost
+ * 1, so insertion cost is 1+1 = 2
+ * Deletion: state_in[3] does not exist, so implicit deletion cost is > k
+ * Substitution: (t[2] = 'o') != (c = 'x'), so substitution cost is 1+1 = 2
+ *
+ * Min cost is 2, which is > k. Not appending to output.
+ *
+ * Resulting output state (right-padded for clarity):
+ *
+ * [- 1 1 - -] (1:1, 2:1)
+ *
+ */
+ [[nodiscard]] StateType step(const StateType& state_in, uint32_t c) const {
+ if (state_in.empty()) {
+ return state_in; // A non-matching state can only step to another equally non-matching state
+ }
+ StateType new_state;
+ if ((state_in.index(0) == 0) && (state_in.cost(0) < max_edits())) {
+ new_state.append(0, state_in.cost(0) + 1);
+ }
+ for (uint32_t i = 0; i < state_in.size(); ++i) {
+ const auto idx = state_in.index(i);
+ if (idx == _u32_str.size()) [[unlikely]] {
+ break; // Can't process beyond matrix width
+ }
+ const uint8_t sub_cost = (_u32_str[idx] == c) ? 0 : 1;
+ // For our Levenshtein insert/delete/sub ops, we know that if a particular index is _not_
+ // in the sparse state, its implicit distance is beyond the max edits, and need not be
+ // considered.
+ auto dist = state_in.cost(i) + sub_cost; // (Substitution)
+ if (!new_state.empty() && (new_state.last_index() == idx)) { // (Insertion) anything to our immediate left?
+ dist = std::min(dist, new_state.last_cost() + 1);
+ }
+ if ((i < state_in.size() - 1) && (state_in.index(i + 1) == idx + 1)) { // (Deletion) anything immediately above?
+ dist = std::min(dist, state_in.cost(i + 1) + 1);
+ }
+ if (dist <= max_edits()) {
+ new_state.append(idx + 1, dist);
+ }
+ }
+ return new_state;
+ }
+
+ /**
+ * Simplified version of step() which does not assemble a new state, but only checks
+ * whether _any_ mismatching character can be substituted in and still result in a
+ * potentially matching state. This is the case if the resulting state would contain
+ * _at least one_ entry (recalling that we only retain entries that are within the
+ * max number of edits).
+ *
+ * Consider using this directly instead of `can_match(step(state, WILDCARD))`,
+ * which has the exact same semantics, but requires computing the full (sparse)
+ * state before checking if it has any element at all. can_wildcard_step() just
+ * jumps straight to the last part.
+ */
+ [[nodiscard]] bool can_wildcard_step(const StateType& state_in) const noexcept {
+ if (state_in.empty()) {
+ return false; // by definition
+ }
+ if ((state_in.index(0) == 0) && (state_in.cost(0) < max_edits())) {
+ return true;
+ }
+ for (uint32_t i = 0; i < state_in.size(); ++i) {
+ const auto idx = state_in.index(i);
+ if (idx == _u32_str.size()) [[unlikely]] {
+ break;
+ }
+ const uint8_t sub_cost = 1; // by definition
+ auto dist = state_in.cost(i) + sub_cost;
+ // Insertion only looks at the entries already computed in the current row
+ // and always increases the cost by 1. Since we always bail out immediately if
+ // there would have been at least one entry within max edits, we transitively
+ // know that since we have not bailed out yet there is no way we can get here
+ // and have insertion actually yield a match. So skip computing it entirely.
+ if ((i < state_in.size() - 1) && (state_in.index(i + 1) == idx + 1)) {
+ dist = std::min(dist, state_in.cost(i + 1) + 1);
+ }
+ if (dist <= max_edits()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks if the given state represents a terminal state within the max number of edits
+ */
+ [[nodiscard]] bool is_match(const StateType& state) const noexcept {
+ // If the last index is equal to the string's length, it means we were able to match
+ // the entire string and still be within the max edit distance.
+ return (!state.empty() && state.last_index() == static_cast<uint32_t>(_u32_str.size()));
+ }
+
+ /**
+ * Iff the input state represents a terminal matching state, returns the number of
+ * edits required to reach the state. Otherwise, returns max edits + 1.
+ */
+ [[nodiscard]] uint8_t match_edit_distance(const StateType& state) const noexcept {
+ if (!is_match(state)) {
+ return max_edits() + 1;
+ }
+ return state.last_cost();
+ }
+
+ /**
+ * Returns whether the given state _may_ end up matching the target string,
+ * depending on the remaining source string characters.
+ *
+ * Note: is_match(s) => can_match(s) is true, but
+ * can_match(s) => is_match(s) is false
+ */
+ [[nodiscard]] bool can_match(const StateType& state) const noexcept {
+ // The presence of any entries at all indicates that we may still potentially match
+ // the target string if the remaining input is within the maximum number of edits.
+ return !state.empty();
+ }
+
+ /**
+ * All valid character transitions from this state are those that are reachable
+ * within the max edit distance.
+ */
+ TransitionsType transitions(const StateType& state) const {
+ TransitionsType t;
+ for (size_t i = 0; i < state.size(); ++i) {
+ const auto idx = state.index(i);
+ if (idx < _u32_str.size()) [[likely]] {
+ t.add_char(_u32_str[idx]);
+ }
+ }
+ // We must ensure transitions are in increasing character order, so that the
+ // lowest possible higher char than any candidate char can be found with a
+ // simple "first-fit" linear scan.
+ t.sort();
+ return t;
+ }
+
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg b/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg
new file mode 100644
index 00000000000..0974e1d161f
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg
@@ -0,0 +1,286 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><!-- Generated by graphviz version 2.40.1 (20161225.0304)
+ --><!-- Title: levenshtein_dfa Pages: 1 --><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="333pt" height="488pt" viewBox="0.00 0.00 333.00 488.00">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 484)">
+<title>levenshtein_dfa</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-484 329,-484 329,4 -4,4"/>
+<!-- 0 -->
+<g id="node1" class="node">
+<title>0</title>
+<ellipse fill="none" stroke="#000000" cx="211" cy="-462" rx="18" ry="18"/>
+<text text-anchor="middle" x="211" y="-457.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">0</text>
+</g>
+<!-- 1 -->
+<g id="node2" class="node">
+<title>1</title>
+<ellipse fill="none" stroke="#000000" cx="157" cy="-373.2" rx="18" ry="18"/>
+<text text-anchor="middle" x="157" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">1</text>
+</g>
+<!-- 0&#45;&gt;1 -->
+<g id="edge1" class="edge">
+<title>0-&gt;1</title>
+<path fill="none" stroke="#000000" d="M201.5939,-446.5322C193.3592,-432.9906 181.2571,-413.0894 171.7374,-397.4348"/>
+<polygon fill="#000000" stroke="#000000" points="174.6658,-395.5142 166.4795,-388.7885 168.6849,-399.1513 174.6658,-395.5142"/>
+<text text-anchor="middle" x="189.9453" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 2 -->
+<g id="node3" class="node">
+<title>2</title>
+<ellipse fill="none" stroke="#000000" cx="211" cy="-373.2" rx="18" ry="18"/>
+<text text-anchor="middle" x="211" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">2</text>
+</g>
+<!-- 0&#45;&gt;2 -->
+<g id="edge2" class="edge">
+<title>0-&gt;2</title>
+<path fill="none" stroke="#000000" d="M211,-443.6006C211,-431.4949 211,-415.4076 211,-401.6674"/>
+<polygon fill="#000000" stroke="#000000" points="214.5001,-401.272 211,-391.272 207.5001,-401.2721 214.5001,-401.272"/>
+<text text-anchor="middle" x="214.8913" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 3 -->
+<g id="node4" class="node">
+<title>3</title>
+<ellipse fill="none" stroke="#000000" cx="265" cy="-373.2" rx="18" ry="18"/>
+<text text-anchor="middle" x="265" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">3</text>
+</g>
+<!-- 0&#45;&gt;3 -->
+<g id="edge3" class="edge">
+<title>0-&gt;3</title>
+<path fill="none" stroke="#000000" d="M220.4061,-446.5322C228.6408,-432.9906 240.7429,-413.0894 250.2626,-397.4348"/>
+<polygon fill="#000000" stroke="#000000" points="253.3151,-399.1513 255.5205,-388.7885 247.3342,-395.5142 253.3151,-399.1513"/>
+<text text-anchor="middle" x="244.7223" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 4 -->
+<g id="node5" class="node">
+<title>4</title>
+<ellipse fill="none" stroke="#000000" cx="157" cy="-284.4" rx="18" ry="18"/>
+<text text-anchor="middle" x="157" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">4</text>
+</g>
+<!-- 1&#45;&gt;4 -->
+<g id="edge4" class="edge">
+<title>1-&gt;4</title>
+<path fill="none" stroke="#000000" d="M157,-354.8006C157,-342.6949 157,-326.6076 157,-312.8674"/>
+<polygon fill="#000000" stroke="#000000" points="160.5001,-312.472 157,-302.472 153.5001,-312.4721 160.5001,-312.472"/>
+<text text-anchor="middle" x="158.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 1&#45;&gt;4 -->
+<g id="edge6" class="edge">
+<title>1-&gt;4</title>
+<path fill="none" stroke="#000000" d="M156.1231,-354.8902C155.8898,-349.221 155.6708,-342.9535 155.5554,-337.2 155.4057,-329.7348 155.4057,-327.8652 155.5554,-320.4 155.6041,-317.9728 155.6712,-315.454 155.7501,-312.9273"/>
+<polygon fill="#000000" stroke="#000000" points="159.2558,-312.8309 156.1231,-302.7098 152.2605,-312.5755 159.2558,-312.8309"/>
+<text text-anchor="middle" x="158.7223" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 5 -->
+<g id="node6" class="node">
+<title>5</title>
+<ellipse fill="none" stroke="#000000" cx="103" cy="-284.4" rx="18" ry="18"/>
+<text text-anchor="middle" x="103" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">5</text>
+</g>
+<!-- 1&#45;&gt;5 -->
+<g id="edge5" class="edge">
+<title>1-&gt;5</title>
+<path fill="none" stroke="#000000" d="M147.5939,-357.7322C139.3592,-344.1906 127.2571,-324.2894 117.7374,-308.6348"/>
+<polygon fill="#000000" stroke="#000000" points="120.6658,-306.7142 112.4795,-299.9885 114.6849,-310.3513 120.6658,-306.7142"/>
+<text text-anchor="middle" x="138.8913" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 6 -->
+<g id="node7" class="node">
+<title>6</title>
+<ellipse fill="none" stroke="#000000" cx="307" cy="-284.4" rx="18" ry="18"/>
+<text text-anchor="middle" x="307" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">6</text>
+</g>
+<!-- 2&#45;&gt;6 -->
+<g id="edge7" class="edge">
+<title>2-&gt;6</title>
+<path fill="none" stroke="#000000" d="M224.3484,-360.8527C240.3968,-346.0079 267.511,-320.9274 286.2858,-303.5607"/>
+<polygon fill="#000000" stroke="#000000" points="288.8006,-306.0022 293.765,-296.6424 284.0473,-300.8635 288.8006,-306.0022"/>
+<text text-anchor="middle" x="268.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 7 -->
+<g id="node8" class="node">
+<title>7</title>
+<ellipse fill="none" stroke="#000000" cx="190" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="190" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">7</text>
+</g>
+<!-- 2&#45;&gt;7 -->
+<g id="edge8" class="edge">
+<title>2-&gt;7</title>
+<path fill="none" stroke="#000000" d="M208.8708,-355.1934C205.2075,-324.212 197.6865,-260.606 193.3267,-223.7346"/>
+<polygon fill="#000000" stroke="#000000" points="196.7849,-223.1741 192.1348,-213.6543 189.8333,-223.9961 196.7849,-223.1741"/>
+<text text-anchor="middle" x="205.8913" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 3&#45;&gt;6 -->
+<g id="edge9" class="edge">
+<title>3-&gt;6</title>
+<path fill="none" stroke="#000000" d="M272.7034,-356.9127C278.881,-343.8515 287.6665,-325.2765 294.7989,-310.1966"/>
+<polygon fill="#000000" stroke="#000000" points="298.0914,-311.4211 299.2032,-300.8848 291.7635,-308.4281 298.0914,-311.4211"/>
+<text text-anchor="middle" x="290.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 8 -->
+<g id="node9" class="node">
+<title>8</title>
+<ellipse fill="none" stroke="#000000" cx="263" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="263" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">8</text>
+</g>
+<!-- 3&#45;&gt;8 -->
+<g id="edge10" class="edge">
+<title>3-&gt;8</title>
+<path fill="none" stroke="#000000" d="M264.7972,-355.1934C264.4483,-324.212 263.732,-260.606 263.3168,-223.7346"/>
+<polygon fill="#000000" stroke="#000000" points="266.8158,-223.6142 263.2033,-213.6543 259.8162,-223.6931 266.8158,-223.6142"/>
+<text text-anchor="middle" x="267.8913" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 4&#45;&gt;7 -->
+<g id="edge11" class="edge">
+<title>4-&gt;7</title>
+<path fill="none" stroke="#000000" d="M163.3627,-267.2785C168.1106,-254.5023 174.6869,-236.8062 180.1171,-222.194"/>
+<polygon fill="#000000" stroke="#000000" points="183.4541,-223.2619 183.6568,-212.669 176.8925,-220.8234 183.4541,-223.2619"/>
+<text text-anchor="middle" x="180.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 5&#45;&gt;7 -->
+<g id="edge14" class="edge">
+<title>5-&gt;7</title>
+<path fill="none" stroke="#000000" d="M115.8371,-271.2973C130.1833,-256.6543 153.5826,-232.7709 170.2601,-215.7483"/>
+<polygon fill="#000000" stroke="#000000" points="172.8948,-218.0603 177.393,-208.4678 167.8946,-213.1615 172.8948,-218.0603"/>
+<text text-anchor="middle" x="157.7223" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 9 -->
+<g id="node10" class="node">
+<title>9</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="60" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="60" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">9(1)</text>
+</g>
+<!-- 5&#45;&gt;9 -->
+<g id="edge12" class="edge">
+<title>5-&gt;9</title>
+<path fill="none" stroke="#000000" d="M95.1131,-268.1127C88.7885,-255.0515 79.7938,-236.4765 72.4916,-221.3966"/>
+<polygon fill="#000000" stroke="#000000" points="75.4909,-219.5597 67.9825,-212.0848 69.1907,-222.6105 75.4909,-219.5597"/>
+<text text-anchor="middle" x="89.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 10 -->
+<g id="node11" class="node">
+<title>10</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="126" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="126" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">10(1)</text>
+</g>
+<!-- 5&#45;&gt;10 -->
+<g id="edge13" class="edge">
+<title>5-&gt;10</title>
+<path fill="none" stroke="#000000" d="M107.5441,-266.856C110.7887,-254.3287 115.2168,-237.2326 118.9222,-222.9264"/>
+<polygon fill="#000000" stroke="#000000" points="122.3462,-223.6657 121.4654,-213.1076 115.5698,-221.9105 122.3462,-223.6657"/>
+<text text-anchor="middle" x="120.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 6&#45;&gt;8 -->
+<g id="edge15" class="edge">
+<title>6-&gt;8</title>
+<path fill="none" stroke="#000000" d="M298.9297,-268.1127C292.4172,-254.9694 283.1382,-236.2426 275.6413,-221.1125"/>
+<polygon fill="#000000" stroke="#000000" points="278.5951,-219.1904 271.0191,-211.784 272.3229,-222.2984 278.5951,-219.1904"/>
+<text text-anchor="middle" x="291.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 11 -->
+<g id="node12" class="node">
+<title>11</title>
+<ellipse fill="none" stroke="#000000" cx="170" cy="-106.8" rx="18" ry="18"/>
+<text text-anchor="middle" x="170" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">11</text>
+</g>
+<!-- 7&#45;&gt;11 -->
+<g id="edge16" class="edge">
+<title>7-&gt;11</title>
+<path fill="none" stroke="#000000" d="M185.9527,-177.63C183.1694,-165.2722 179.4189,-148.6197 176.2537,-134.5662"/>
+<polygon fill="#000000" stroke="#000000" points="179.5848,-133.4268 173.973,-124.4402 172.7558,-134.9649 179.5848,-133.4268"/>
+<text text-anchor="middle" x="184.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 12 -->
+<g id="node13" class="node">
+<title>12</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="113" cy="-18" rx="18" ry="18"/>
+<text text-anchor="middle" x="113" y="-13.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">12(1)</text>
+</g>
+<!-- 7&#45;&gt;12 -->
+<g id="edge17" class="edge">
+<title>7-&gt;12</title>
+<path fill="none" stroke="#000000" d="M195.7305,-178.1814C201.8942,-156.377 209.2729,-118.3097 197,-88.8 185.9488,-62.2279 158.7426,-42.3831 138.2647,-30.5667"/>
+<polygon fill="#000000" stroke="#000000" points="139.8619,-27.4509 129.4116,-25.7072 136.4936,-33.5872 139.8619,-27.4509"/>
+<text text-anchor="middle" x="206.8913" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 8&#45;&gt;11 -->
+<g id="edge18" class="edge">
+<title>8-&gt;11</title>
+<path fill="none" stroke="#000000" d="M249.6754,-182.8771C234.3441,-168.2382 208.9734,-144.0133 190.9836,-126.8359"/>
+<polygon fill="#000000" stroke="#000000" points="192.9495,-123.8738 183.2999,-119.4993 188.1154,-128.9366 192.9495,-123.8738"/>
+<text text-anchor="middle" x="227.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 13 -->
+<g id="node14" class="node">
+<title>13</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="18" cy="-106.8" rx="18" ry="18"/>
+<text text-anchor="middle" x="18" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">13(1)</text>
+</g>
+<!-- 9&#45;&gt;13 -->
+<g id="edge19" class="edge">
+<title>9-&gt;13</title>
+<path fill="none" stroke="#000000" d="M44.2356,-186.804C34.7839,-180.5699 23.5878,-171.2526 18.2174,-159.6 14.7103,-151.9903 13.6799,-143.0834 13.8048,-134.7757"/>
+<polygon fill="#000000" stroke="#000000" points="17.3023,-134.9283 14.4812,-124.716 10.3181,-134.4586 17.3023,-134.9283"/>
+<text text-anchor="middle" x="22.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 9&#45;&gt;13 -->
+<g id="edge21" class="edge">
+<title>9-&gt;13</title>
+<path fill="none" stroke="#000000" d="M52.2966,-179.3127C46.119,-166.2515 37.3335,-147.6765 30.2011,-132.5966"/>
+<polygon fill="#000000" stroke="#000000" points="33.2365,-130.8281 25.7968,-123.2848 26.9086,-133.8211 33.2365,-130.8281"/>
+<text text-anchor="middle" x="45.7223" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 14 -->
+<g id="node15" class="node">
+<title>14</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="72" cy="-106.8" rx="18" ry="18"/>
+<text text-anchor="middle" x="72" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">14(0)</text>
+</g>
+<!-- 9&#45;&gt;14 -->
+<g id="edge20" class="edge">
+<title>9-&gt;14</title>
+<path fill="none" stroke="#000000" d="M62.4284,-177.63C64.0874,-165.353 66.3193,-148.8372 68.2105,-134.8421"/>
+<polygon fill="#000000" stroke="#000000" points="71.7044,-135.1219 69.5752,-124.7433 64.7675,-134.1845 71.7044,-135.1219"/>
+<text text-anchor="middle" x="71.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 10&#45;&gt;11 -->
+<g id="edge22" class="edge">
+<title>10-&gt;11</title>
+<path fill="none" stroke="#000000" d="M134.0703,-179.3127C140.5828,-166.1694 149.8618,-147.4426 157.3587,-132.3125"/>
+<polygon fill="#000000" stroke="#000000" points="160.6771,-133.4984 161.9809,-122.984 154.4049,-130.3904 160.6771,-133.4984"/>
+<text text-anchor="middle" x="155.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 10&#45;&gt;12 -->
+<g id="edge23" class="edge">
+<title>10-&gt;12</title>
+<path fill="none" stroke="#000000" d="M124.6819,-177.5934C122.4142,-146.612 117.7583,-83.006 115.0594,-46.1346"/>
+<polygon fill="#000000" stroke="#000000" points="118.5423,-45.772 114.3215,-36.0543 111.561,-46.2831 118.5423,-45.772"/>
+<text text-anchor="middle" x="124.8913" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 11&#45;&gt;12 -->
+<g id="edge24" class="edge">
+<title>11-&gt;12</title>
+<path fill="none" stroke="#000000" d="M161.5849,-90.7772C155.8346,-80.1448 147.859,-65.9928 140,-54 137.0775,-49.5402 133.7942,-44.8997 130.5502,-40.4917"/>
+<polygon fill="#000000" stroke="#000000" points="133.2767,-38.294 124.4657,-32.4105 127.6846,-42.5045 133.2767,-38.294"/>
+<text text-anchor="middle" x="153.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 13&#45;&gt;12 -->
+<g id="edge25" class="edge">
+<title>13-&gt;12</title>
+<path fill="none" stroke="#000000" d="M29.0516,-92.0851C37.7254,-81.0086 50.4346,-65.7741 63.2174,-54 71.136,-46.7062 80.5495,-39.5675 89.0481,-33.5954"/>
+<polygon fill="#000000" stroke="#000000" points="91.192,-36.3695 97.4722,-27.8367 87.2416,-30.5907 91.192,-36.3695"/>
+<text text-anchor="middle" x="67.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 14&#45;&gt;12 -->
+<g id="edge26" class="edge">
+<title>14-&gt;12</title>
+<path fill="none" stroke="#000000" d="M79.7118,-90.0974C85.7248,-77.0741 94.1818,-58.7574 101.0669,-43.8455"/>
+<polygon fill="#000000" stroke="#000000" points="104.3085,-45.1739 105.3228,-34.6277 97.9532,-42.2395 104.3085,-45.1739"/>
+<text text-anchor="middle" x="100.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 14&#45;&gt;12 -->
+<g id="edge27" class="edge">
+<title>14-&gt;12</title>
+<path fill="none" stroke="#000000" d="M71.9481,-88.4225C72.5604,-77.8966 74.4493,-64.6966 79.5554,-54 82.4978,-47.8362 86.8709,-42.0067 91.4921,-36.9052"/>
+<polygon fill="#000000" stroke="#000000" points="94.2341,-39.1107 98.7881,-29.5445 89.2625,-34.1828 94.2341,-39.1107"/>
+<text text-anchor="middle" x="82.7223" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+</g>
+</svg> \ No newline at end of file
diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp
new file mode 100644
index 00000000000..f78de5cc082
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp
@@ -0,0 +1,11 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "explicit_levenshtein_dfa.hpp"
+
+namespace vespalib::fuzzy {
+
+template class ExplicitLevenshteinDfaImpl<1>;
+template class ExplicitLevenshteinDfaImpl<2>;
+template class ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<1>>;
+template class ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<2>>;
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h
new file mode 100644
index 00000000000..49baad21530
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h
@@ -0,0 +1,147 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "dfa_stepping_base.h"
+#include "levenshtein_dfa.h"
+#include "sparse_state.h"
+#include "unicode_utils.h"
+#include <vector>
+
+namespace vespalib::fuzzy {
+
+// A doomed state is one that cannot possibly match the target string
+constexpr const uint32_t DOOMED = UINT32_MAX;
+
+template <uint8_t MaxEdits>
+struct DfaNode {
+ static constexpr uint8_t MaxCharOutEdges = diag(MaxEdits); // Not counting wildcard edge
+
+ struct Edge {
+ uint32_t u32ch;
+ uint32_t node;
+ };
+
+ std::array<Edge, MaxCharOutEdges> match_out_edges_buf;
+ uint32_t wildcard_edge_to = DOOMED;
+ uint8_t num_match_out_edges = 0;
+ uint8_t edits = UINT8_MAX;
+
+ [[nodiscard]] bool has_wildcard_edge() const noexcept {
+ return wildcard_edge_to != DOOMED;
+ }
+
+ [[nodiscard]] uint32_t wildcard_edge_to_or_doomed() const noexcept {
+ return wildcard_edge_to;
+ }
+
+ [[nodiscard]] std::span<const Edge> match_out_edges() const noexcept {
+ return std::span(match_out_edges_buf.begin(), num_match_out_edges);
+ }
+
+ [[nodiscard]] uint32_t match_or_doomed(uint32_t ch) const noexcept {
+ // Always prefer the exact matching edges
+ for (const auto& e : match_out_edges()) {
+ if (e.u32ch == ch) {
+ return e.node;
+ }
+ }
+ // Fallback to wildcard edge if possible (could be doomed)
+ return wildcard_edge_to;
+ }
+
+ [[nodiscard]] bool has_exact_match(uint32_t ch) const noexcept {
+ for (const auto& e : match_out_edges()) {
+ if (e.u32ch == ch) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ [[nodiscard]] size_t has_higher_out_edge(uint32_t ch) const noexcept {
+ if (has_wildcard_edge()) {
+ return true; // implicitly possible to substitute a higher out edge char
+ }
+ return lowest_higher_explicit_out_edge(ch) != nullptr;
+ }
+
+ [[nodiscard]] const Edge* lowest_higher_explicit_out_edge(uint32_t ch) const noexcept {
+ // Important: these _must_ be sorted in increasing code point order
+ for (const auto& e : match_out_edges()) {
+ if (e.u32ch > ch) {
+ return &e;
+ }
+ }
+ return nullptr;
+ }
+
+ void add_match_out_edge(uint32_t out_char, uint32_t out_node) noexcept {
+ assert(num_match_out_edges < MaxCharOutEdges);
+ match_out_edges_buf[num_match_out_edges] = Edge(out_char, out_node);
+ ++num_match_out_edges;
+ }
+
+ void set_wildcard_out_edge(uint32_t out_node) noexcept {
+ assert(wildcard_edge_to == DOOMED);
+ wildcard_edge_to = out_node;
+ }
+};
+
+template <uint8_t MaxEdits>
+class ExplicitLevenshteinDfaImpl final : public LevenshteinDfa::Impl {
+public:
+ static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+
+ using DfaNodeType = DfaNode<MaxEdits>;
+ using MatchResult = LevenshteinDfa::MatchResult;
+private:
+ std::vector<DfaNodeType> _nodes;
+public:
+ ExplicitLevenshteinDfaImpl() noexcept = default;
+ ~ExplicitLevenshteinDfaImpl() override = default;
+
+ static constexpr uint8_t max_edits() noexcept { return MaxEdits; }
+
+ void ensure_node_array_large_enough_for_index(uint32_t node_index) {
+ if (node_index >= _nodes.size()) {
+ _nodes.resize(node_index + 1);
+ }
+ }
+
+ void set_node_edit_distance(uint32_t node_index, uint8_t edits) {
+ _nodes[node_index].edits = edits;
+ }
+
+ void add_outgoing_edge(uint32_t from_node_idx, uint32_t to_node_idx, uint32_t out_char) {
+ _nodes[from_node_idx].add_match_out_edge(out_char, to_node_idx);
+ }
+
+ void set_wildcard_edge(uint32_t from_node_idx, uint32_t to_node_idx) {
+ _nodes[from_node_idx].set_wildcard_out_edge(to_node_idx);
+ }
+
+ [[nodiscard]] MatchResult match(std::string_view u8str, std::string* successor_out) const override;
+
+ [[nodiscard]] size_t memory_usage() const noexcept override {
+ return sizeof(DfaNodeType) * _nodes.size();
+ }
+
+ void dump_as_graphviz(std::ostream& os) const override;
+};
+
+template <typename Traits>
+class ExplicitLevenshteinDfaBuilder {
+ std::vector<uint32_t> _u32_str_buf; // TODO std::u32string
+public:
+ explicit ExplicitLevenshteinDfaBuilder(std::string_view str)
+ : ExplicitLevenshteinDfaBuilder(utf8_string_to_utf32(str))
+ {}
+
+ explicit ExplicitLevenshteinDfaBuilder(std::vector<uint32_t> str) noexcept
+ : _u32_str_buf(std::move(str))
+ {}
+
+ [[nodiscard]] LevenshteinDfa build_dfa() const;
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp
new file mode 100644
index 00000000000..0960219aff3
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp
@@ -0,0 +1,228 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "explicit_levenshtein_dfa.h"
+#include "match_algorithm.hpp"
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <iostream>
+#include <span>
+#include <queue>
+
+namespace vespalib::fuzzy {
+
+// DfaMatcher adapter for explicit DFA implementation
+template <uint8_t MaxEdits>
+struct ExplicitDfaMatcher {
+ using DfaNodeType = typename ExplicitLevenshteinDfaImpl<MaxEdits>::DfaNodeType;
+ using StateType = const DfaNodeType*;
+ using EdgeType = const DfaNodeType::Edge*;
+
+ using StateParamType = const DfaNodeType*;
+
+ const std::span<const DfaNodeType> _nodes;
+
+ explicit ExplicitDfaMatcher(const std::span<const DfaNodeType> nodes) noexcept
+ : _nodes(nodes)
+ {}
+
+ static constexpr uint8_t max_edits() noexcept { return MaxEdits; }
+
+ StateType start() const noexcept {
+ return &_nodes[0];
+ }
+ bool has_higher_out_edge(StateType node, uint32_t mch) const noexcept {
+ return node->has_higher_out_edge(mch);
+ }
+ StateType match_input(StateType node, uint32_t mch) const noexcept {
+ auto maybe_node_idx = node->match_or_doomed(mch);
+ return ((maybe_node_idx != DOOMED) ? &_nodes[maybe_node_idx] : nullptr);
+ }
+ bool is_match(StateType node) const noexcept {
+ return node->edits <= max_edits();
+ }
+ bool can_match(StateType node) const noexcept {
+ return node != nullptr;
+ }
+ uint8_t match_edit_distance(StateType node) const noexcept {
+ return node->edits;
+ }
+ bool valid_state(StateType node) const noexcept {
+ return node != nullptr;
+ }
+ StateType match_wildcard(StateType node) const noexcept {
+ auto edge_to = node->wildcard_edge_to_or_doomed();
+ return ((edge_to != DOOMED) ? &_nodes[edge_to] : nullptr);
+ }
+ bool has_exact_explicit_out_edge(StateType node, uint32_t ch) const noexcept {
+ return node->has_exact_match(ch);
+ }
+ EdgeType lowest_higher_explicit_out_edge(StateType node, uint32_t ch) const noexcept {
+ return node->lowest_higher_explicit_out_edge(ch);
+ }
+ EdgeType smallest_explicit_out_edge(StateType node) const noexcept {
+ // Out-edges are pre-ordered in increasing code point order, so the first
+ // element is always the smallest possible matching character.
+ assert(!node->match_out_edges().empty());
+ return &node->match_out_edges().front();
+ }
+ bool valid_edge(EdgeType edge) const noexcept {
+ return edge != nullptr;
+ }
+ uint32_t edge_to_u32char(EdgeType edge) const noexcept {
+ return edge->u32ch;
+ }
+ StateType edge_to_state([[maybe_unused]] StateType node, EdgeType edge) const noexcept {
+ return &_nodes[edge->node];
+ }
+};
+
+template <uint8_t MaxEdits>
+LevenshteinDfa::MatchResult
+ExplicitLevenshteinDfaImpl<MaxEdits>::match(std::string_view u8str, std::string* successor_out) const {
+ ExplicitDfaMatcher<MaxEdits> matcher(_nodes);
+ return MatchAlgorithm<MaxEdits>::match(matcher, u8str, successor_out);
+}
+
+template <uint8_t MaxEdits>
+void ExplicitLevenshteinDfaImpl<MaxEdits>::dump_as_graphviz(std::ostream& os) const {
+ os << std::dec << "digraph levenshtein_dfa {\n";
+ os << " fontname=\"Helvetica,Arial,sans-serif\"\n";
+ os << " node [shape=circle, fontname=\"Helvetica,Arial,sans-serif\", fixedsize=true];\n";
+ os << " edge [fontname=\"Helvetica,Arial,sans-serif\"];\n";
+ for (size_t i = 0; i < _nodes.size(); ++i) {
+ const auto& node = _nodes[i];
+ if (node.edits <= max_edits()) {
+ os << " " << i << " [label=\"" << i << "(" << static_cast<int>(node.edits) << ")\", style=\"filled\"];\n";
+ }
+ for (const auto& edge : node.match_out_edges()) {
+ std::string as_utf8;
+ append_utf32_char_as_utf8(as_utf8, edge.u32ch);
+ os << " " << i << " -> " << edge.node << " [label=\"" << as_utf8 << "\"];\n";
+ }
+ if (node.wildcard_edge_to != DOOMED) {
+ os << " " << i << " -> " << node.wildcard_edge_to << " [label=\"*\"];\n";
+ }
+ }
+ os << "}\n";
+}
+
+namespace {
+
+template <typename StateType>
+struct ExploreState {
+ using NodeIdAndExplored = std::pair<uint32_t, bool>;
+ using SparseExploredStates = vespalib::hash_map<StateType, NodeIdAndExplored, typename StateType::hash>;
+
+ uint32_t state_counter;
+ SparseExploredStates explored_states;
+
+ ExploreState();
+ ~ExploreState();
+
+ [[nodiscard]] SparseExploredStates::iterator node_of(const StateType& state) {
+ auto maybe_explored = explored_states.find(state);
+ if (maybe_explored != explored_states.end()) {
+ return maybe_explored;
+ }
+ uint32_t this_node = state_counter;
+ assert(state_counter < UINT32_MAX);
+ ++state_counter;
+ return explored_states.insert(std::make_pair(state, std::make_pair(this_node, false))).first; // not yet explored;
+ }
+
+ [[nodiscard]] bool already_explored(const SparseExploredStates::iterator& node) const noexcept {
+ return node->second.second;
+ }
+
+ void tag_as_explored(SparseExploredStates::iterator& node) noexcept {
+ node->second.second = true;
+ }
+};
+
+template <typename StateType>
+ExploreState<StateType>::ExploreState()
+ : state_counter(0),
+ explored_states()
+{}
+
+template <typename StateType>
+ExploreState<StateType>::~ExploreState() = default;
+
+template <typename Traits>
+class ExplicitLevenshteinDfaBuilderImpl : public DfaSteppingBase<Traits> {
+ using Base = DfaSteppingBase<Traits>;
+
+ using StateType = typename Base::StateType;
+ using TransitionsType = typename Base::TransitionsType;
+
+ using Base::_u32_str;
+ using Base::max_edits;
+ using Base::start;
+ using Base::match_edit_distance;
+ using Base::step;
+ using Base::is_match;
+ using Base::can_match;
+ using Base::transitions;
+public:
+ explicit ExplicitLevenshteinDfaBuilderImpl(std::span<const uint32_t> str) noexcept
+ : DfaSteppingBase<Traits>(str)
+ {
+ assert(str.size() < UINT32_MAX / max_out_edges_per_node());
+ }
+
+ [[nodiscard]] static constexpr uint8_t max_out_edges_per_node() noexcept {
+ // Max possible out transition characters (2k+1) + one wildcard edge.
+ return diag(max_edits()) + 1;
+ }
+
+ [[nodiscard]] LevenshteinDfa build_dfa() const;
+};
+
+template <typename Traits>
+LevenshteinDfa ExplicitLevenshteinDfaBuilderImpl<Traits>::build_dfa() const {
+ auto dfa = std::make_unique<ExplicitLevenshteinDfaImpl<max_edits()>>();
+ ExploreState<StateType> exp;
+ // Use BFS instead of DFS to ensure most node edges point to nodes that are allocated _after_
+ // the parent node, which means the CPU can skip ahead instead of ping-ponging back and forth.
+ // This does _not_ always hold, such as if you have A->B and A->C->B (i.e. both parent and
+ // grandparent have a transition to the same state), in which case B may be allocated before C.
+ std::queue<StateType> to_explore;
+ to_explore.push(start());
+ while (!to_explore.empty()) {
+ auto state = std::move(to_explore.front());
+ to_explore.pop();
+ auto this_node = exp.node_of(state); // note: invalidated by subsequent calls to node_of
+ if (exp.already_explored(this_node)) {
+ continue;
+ }
+ exp.tag_as_explored(this_node);
+ const auto this_node_idx = this_node->second.first;
+ dfa->ensure_node_array_large_enough_for_index(this_node_idx);
+ dfa->set_node_edit_distance(this_node_idx, match_edit_distance(state));
+ auto t = transitions(state);
+ for (uint32_t out_c : t.u32_chars()) {
+ auto new_state = step(state, out_c);
+ auto out_node = exp.node_of(new_state);
+ dfa->add_outgoing_edge(this_node_idx, out_node->second.first, out_c);
+ to_explore.push(std::move(new_state));
+ }
+ auto wildcard_state = step(state, WILDCARD);
+ if (can_match(wildcard_state)) {
+ auto out_node = exp.node_of(wildcard_state);
+ dfa->set_wildcard_edge(this_node_idx, out_node->second.first);
+ to_explore.push(std::move(wildcard_state));
+ } // else: don't bother
+ }
+ return LevenshteinDfa(std::move(dfa));
+}
+
+} // anon ns
+
+template <typename Traits>
+LevenshteinDfa ExplicitLevenshteinDfaBuilder<Traits>::build_dfa() const {
+ ExplicitLevenshteinDfaBuilderImpl<Traits> builder(_u32_str_buf);
+ return builder.build_dfa();
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp
new file mode 100644
index 00000000000..8b9d2eddcac
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp
@@ -0,0 +1,9 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "implicit_levenshtein_dfa.hpp"
+
+namespace vespalib::fuzzy {
+
+template class ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<1>>;
+template class ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<2>>;
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h
new file mode 100644
index 00000000000..0846b95d135
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h
@@ -0,0 +1,35 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "levenshtein_dfa.h"
+#include "unicode_utils.h"
+#include <vector>
+
+namespace vespalib::fuzzy {
+
+template <typename Traits>
+class ImplicitLevenshteinDfa final : public LevenshteinDfa::Impl {
+ std::vector<uint32_t> _u32_str_buf; // TODO std::u32string
+public:
+ using MatchResult = LevenshteinDfa::MatchResult;
+
+ explicit ImplicitLevenshteinDfa(std::string_view str)
+ : ImplicitLevenshteinDfa(utf8_string_to_utf32(str))
+ {}
+
+ explicit ImplicitLevenshteinDfa(std::vector<uint32_t> str) noexcept
+ : _u32_str_buf(std::move(str))
+ {}
+
+ ~ImplicitLevenshteinDfa() override = default;
+
+ [[nodiscard]] MatchResult match(std::string_view u8str, std::string* successor_out) const override;
+
+ [[nodiscard]] size_t memory_usage() const noexcept override {
+ return _u32_str_buf.size() * sizeof(uint32_t);
+ }
+
+ void dump_as_graphviz(std::ostream& os) const override;
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp
new file mode 100644
index 00000000000..4ee468e424b
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp
@@ -0,0 +1,121 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "dfa_stepping_base.h"
+#include "implicit_levenshtein_dfa.h"
+#include "match_algorithm.hpp"
+#include "sparse_state.h"
+#include <cassert>
+#include <stdexcept>
+
+namespace vespalib::fuzzy {
+
+// DfaMatcher adapter for implicit DFA implementation
+template <typename Traits>
+struct ImplicitDfaMatcher : public DfaSteppingBase<Traits> {
+ using Base = DfaSteppingBase<Traits>;
+
+ using StateType = typename Base::StateType;
+ using EdgeType = uint32_t; // Just the raw u32 character value
+
+ using StateParamType = const StateType&;
+
+ using Base::_u32_str;
+ using Base::max_edits;
+ using Base::start;
+ using Base::match_edit_distance;
+ using Base::step;
+ using Base::can_wildcard_step;
+ using Base::is_match;
+ using Base::can_match;
+
+ explicit ImplicitDfaMatcher(std::span<const uint32_t> u32_str) noexcept
+ : Base(u32_str)
+ {}
+
+ // start, is_match, can_match, match_edit_distance are all provided by base type
+
+ template <typename F>
+ bool has_any_char_matching(const StateType& state, F&& f) const noexcept(noexcept(f(uint32_t{}))) {
+ for (uint32_t i = 0; i < state.size(); ++i) {
+ const auto idx = state.index(i);
+ if ((idx < _u32_str.size()) && f(_u32_str[idx])) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ template <typename F>
+ void for_each_char(const StateType& state, F&& f) const noexcept(noexcept(f(uint32_t{}))) {
+ for (uint32_t i = 0; i < state.size(); ++i) {
+ const auto idx = state.index(i);
+ if ((idx < _u32_str.size())) [[likely]] {
+ f(_u32_str[idx]);
+ }
+ }
+ }
+
+ bool has_explicit_higher_out_edge(const StateType& state, uint32_t ch) const noexcept {
+ return has_any_char_matching(state, [ch](uint32_t state_ch) noexcept {
+ return state_ch > ch;
+ });
+ }
+
+ bool has_higher_out_edge(const StateType& state, uint32_t mch) const noexcept {
+ return (has_explicit_higher_out_edge(state, mch) || can_wildcard_step(state));
+ }
+ StateType match_input(const StateType& state, uint32_t mch) const noexcept {
+ return step(state, mch);
+ }
+ bool valid_state(const StateType& state) const noexcept {
+ return !state.empty();
+ }
+ StateType match_wildcard(const StateType& state) const noexcept {
+ return step(state, WILDCARD);
+ }
+ bool has_exact_explicit_out_edge(const StateType& state, uint32_t ch) const noexcept {
+ return has_any_char_matching(state, [ch](uint32_t state_ch) noexcept {
+ return state_ch == ch;
+ });
+ }
+ EdgeType lowest_higher_explicit_out_edge(const StateType& state, uint32_t ch) const noexcept {
+ uint32_t min_ch = UINT32_MAX;
+ for_each_char(state, [ch, &min_ch](uint32_t state_ch) noexcept {
+ if ((state_ch > ch) && (state_ch < min_ch)) {
+ min_ch = state_ch;
+ }
+ });
+ return min_ch;
+ }
+ EdgeType smallest_explicit_out_edge(const StateType& state) const noexcept {
+ uint32_t min_ch = UINT32_MAX;
+ for_each_char(state, [&min_ch](uint32_t state_ch) noexcept {
+ min_ch = std::min(min_ch, state_ch);
+ });
+ return min_ch;
+ }
+ bool valid_edge(EdgeType edge) const noexcept {
+ return edge != UINT32_MAX;
+ }
+ uint32_t edge_to_u32char(EdgeType edge) const noexcept {
+ return edge;
+ }
+ StateType edge_to_state(const StateType& state, EdgeType edge) const noexcept {
+ return step(state, edge);
+ }
+};
+
+template <typename Traits>
+LevenshteinDfa::MatchResult
+ImplicitLevenshteinDfa<Traits>::match(std::string_view u8str, std::string* successor_out) const {
+ ImplicitDfaMatcher<Traits> matcher(_u32_str_buf);
+ return MatchAlgorithm<Traits::max_edits()>::match(matcher, u8str, successor_out);
+}
+
+template <typename Traits>
+void ImplicitLevenshteinDfa<Traits>::dump_as_graphviz(std::ostream&) const {
+ throw std::runtime_error("Graphviz output not available for implicit Levenshtein DFA");
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp
new file mode 100644
index 00000000000..e75ef8365bf
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp
@@ -0,0 +1,83 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "explicit_levenshtein_dfa.h"
+#include "implicit_levenshtein_dfa.h"
+#include "levenshtein_dfa.h"
+#include <vespa/vespalib/util/stringfmt.h>
+#include <memory>
+
+namespace vespalib::fuzzy {
+
+LevenshteinDfa::LevenshteinDfa(std::unique_ptr<Impl> impl) noexcept
+ : _impl(std::move(impl))
+{}
+
+LevenshteinDfa::LevenshteinDfa(LevenshteinDfa&&) noexcept = default;
+LevenshteinDfa& LevenshteinDfa::operator=(LevenshteinDfa&&) noexcept = default;
+
+LevenshteinDfa::~LevenshteinDfa() = default;
+
+LevenshteinDfa::MatchResult
+LevenshteinDfa::match(std::string_view u8str, std::string* successor_out) const {
+ return _impl->match(u8str, successor_out);
+}
+
+size_t LevenshteinDfa::memory_usage() const noexcept {
+ return _impl->memory_usage();
+}
+
+void LevenshteinDfa::dump_as_graphviz(std::ostream& out) const {
+ _impl->dump_as_graphviz(out);
+}
+
+LevenshteinDfa LevenshteinDfa::build(std::string_view target_string, uint8_t max_edits, DfaType dfa_type) {
+ if (max_edits != 1 && max_edits != 2) {
+ throw std::invalid_argument(make_string("Levenshtein DFA max_edits must be in {1, 2}, was %u", max_edits));
+ }
+ if (dfa_type == DfaType::Implicit) {
+ if (max_edits == 1) {
+ return LevenshteinDfa(std::make_unique<ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<1>>>(target_string));
+ } else { // max_edits == 2
+ return LevenshteinDfa(std::make_unique<ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<2>>>(target_string));
+ }
+ } else { // DfaType::Explicit
+ if (max_edits == 1) {
+ return ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<1>>(target_string).build_dfa();
+ } else { // max_edits == 2
+ return ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<2>>(target_string).build_dfa();
+ }
+ }
+
+}
+
+LevenshteinDfa LevenshteinDfa::build(std::string_view target_string, uint8_t max_edits) {
+ // TODO automatically select implementation based on target length/max edits?
+ // Suggestion:
+ // - Explicit DFA iff (k == 1 && |target| <= 256) || (k == 2 && |target| <= 64).
+ // - Implicit DFA otherwise.
+ // This keeps memory overhead < 64k and DFA construction time < 300 usec (measured on
+ // an M1 Pro; your mileage may vary etc).
+ // Ideally the implicit DFA would always be the fastest (or at least approximately as
+ // fast as the explicit DFA), but this is not yet the case.
+ return build(target_string, max_edits, DfaType::Implicit);
+}
+
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::MatchResult& mos) {
+ if (mos.matches()) {
+ os << "match(" << static_cast<int>(mos.edits()) << " edits)";
+ } else {
+ os << "mismatch";
+ }
+ return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::DfaType& dt) {
+ if (dt == LevenshteinDfa::DfaType::Implicit) {
+ os << "Implicit";
+ } else {
+ assert(dt == LevenshteinDfa::DfaType::Explicit);
+ os << "Explicit";
+ }
+ return os;
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h
new file mode 100644
index 00000000000..a26ccbe87ee
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h
@@ -0,0 +1,244 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <string_view>
+
+namespace vespalib::fuzzy {
+
+/**
+ * Levenshtein Deterministic Finite Automata (DFA)
+ *
+ * The Levenshtein distance (or edit distance) is the minimum number of edits (additions,
+ * deletions or substitutions) needed to transform a particular source string s to a
+ * particular target string t.
+ *
+ * Let m be the length of the source string and n be the length of the target string.
+ *
+ * The classic dynamic programming algorithm uses a n x m cost matrix and is therefore
+ * O(nm) in space and time. By observing that only 2 rows of the matrix are actually
+ * needed, this is commonly reduced to O(n) space complexity (still O(nm) time complexity).
+ * When the maximum number of allowed edits is constrained to k, some clever observations
+ * about the nature of the cost matrix allows for reducing the time complexity down to
+ * O(kn) (more specifically, O((2k+1) * n)). When k is fixed (e.g. k in {1, 2}), the
+ * time complexity simplifies down to O(n).
+ *
+ * This implements code for building and evaluating Levenshtein Deterministic Finite
+ * Automata, where the resulting DFA efficiently matches all possible source strings that
+ * can be transformed to the target string within k max edits. This allows for easy linear
+ * matching of strings.
+ *
+ * Inspiration:
+ * - http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
+ * - https://julesjacobs.com/2015/06/17/disqus-levenshtein-simple-and-fast.html
+ *
+ * The latter in particular was a close inspiration for the sparse DFA state management.
+ *
+ * ====== Dictionary skipping via successor string generation ======
+ *
+ * Scanning for edit distance matches frequently takes place against a sorted dictionary.
+ * When matching using a DFA, in the case where the source string does _not_ match, we can
+ * generate the _successor_ string; the next matching string that is lexicographically
+ * _greater_ than the source string. This string has the invariant that there are no
+ * possibly matching strings within k edits ordered after the source string but before
+ * the successor.
+ *
+ * This lets us do possibly massive leaps forward in the dictionary, turning a dictionary
+ * scan into a sublinear operation.
+ *
+ * Note that the implemented successor algorithm is slightly different from that described
+ * in the above blog post. The implemented algorithm requires zero extra data structures
+ * than the DFA itself and the target string and tries to be extra clever with reducing
+ * the number of code point conversions required
+ *
+ * ====== Unicode support ======
+ *
+ * Matching and successor generation is fully Unicode-aware. All input strings are expected
+ * to be in UTF-8, and the generated successor is also encoded as UTF-8 (with some caveats;
+ * see the documentation for match()).
+ *
+ * Internally, matching is done on UTF-32 code points and the DFA itself is built around
+ * UTF-32. This is unlike Lucene, which converts a UTF-32 DFA to an equivalent UTF-8 DFA.
+ *
+ * ====== Memory usage ======
+ *
+ * There is always a baseline DFA memory usage O(n) in the target string, as the
+ * underlying DFA needs to convert the input UTF-8 string to explicit UTF-32 chars.
+ *
+ * Aside from the baseline, memory usage depends on whether an explicit or implicit DFA
+ * is used.
+ *
+ * ------ Explicit DFA ------
+ *
+ * The explicit DFA graph takes up quite a bit more memory than the original string
+ * representation (one reason is the use of UTF-32 characters under the hood).
+ *
+ * Expected upper bound memory usage for a string of length n with max edits k is
+ *
+ * (2k+1) * N(k) * n * W(k)
+ *
+ * where N(1) is expected to be 32 and N(2) is 48, W(1) is 1.34 and W(2) is 3.2 (empirically
+ * derived).
+ *
+ * Memory usage during building is higher due to keeping track of the set of generated
+ * states in a hash table, but still linear in input size. This extra memory is freed
+ * once building is complete.
+ *
+ * ------ Implicit DFA ------
+ *
+ * Implicit DFAs have a O(1) memory usage during evaluation, which all lives on the stack
+ * or in registers (this does not include the successor string, which is provided by the
+ * caller).
+ *
+ * Since the sparse state stepping is currently not as fast as explicit DFA node traversal,
+ * string matching is slower than with the explicit DFA.
+ *
+ * ====== In short ======
+ *
+ * - Immutable; build once, run many times.
+ * - Explicit DFA build time is amortized linear in target string size.
+ * - Implicit DFA build time is O(1) (aside from initial UTF-32 conversion)
+ * - Zero-allocation matching.
+ * - Matching takes in raw UTF-8 input, no need to pre-convert.
+ * - Streaming UTF-8 to UTF-32 conversion; fully unicode-aware (DFA uses UTF-32 code
+ * points internally).
+ * - If required, it's possible (but not currently implemented) to bake case
+ * insensitive matching semantics into the generated DFA itself.
+ * - Allows for dictionary forward-skipping via successor algorithm.
+ * - Amortized zero allocations for successor string building when reusing string
+ * between matches.
+ * - Successor string is generated in-place as UTF-8 and can be directly used as input
+ * to a byte-wise dictionary seek.
+ */
+class LevenshteinDfa {
+public:
+ class MatchResult {
+ uint8_t _max_edits;
+ uint8_t _edits;
+ public:
+ constexpr MatchResult(uint8_t max_edits, uint8_t edits) noexcept
+ : _max_edits(max_edits),
+ _edits(edits)
+ {}
+
+ static constexpr MatchResult make_match(uint8_t max_edits, uint8_t edits) noexcept {
+ return {max_edits, edits};
+ }
+
+ static constexpr MatchResult make_mismatch(uint8_t max_edits) noexcept {
+ return {max_edits, static_cast<uint8_t>(max_edits + 1)};
+ }
+
+ [[nodiscard]] constexpr bool matches() const noexcept { return _edits <= _max_edits; }
+ [[nodiscard]] constexpr uint8_t edits() const noexcept { return _edits; }
+ [[nodiscard]] constexpr uint8_t max_edits() const noexcept { return _max_edits; }
+ };
+
+ struct Impl {
+ virtual ~Impl() = default;
+ [[nodiscard]] virtual MatchResult match(std::string_view u8str, std::string* successor_out) const = 0;
+ [[nodiscard]] virtual size_t memory_usage() const noexcept = 0;
+ virtual void dump_as_graphviz(std::ostream& out) const = 0;
+ };
+
+private:
+ std::unique_ptr<Impl> _impl;
+public:
+ explicit LevenshteinDfa(std::unique_ptr<Impl> impl) noexcept;
+ LevenshteinDfa(LevenshteinDfa&&) noexcept;
+ LevenshteinDfa& operator=(LevenshteinDfa&&) noexcept;
+ LevenshteinDfa(const LevenshteinDfa&) = delete;
+ LevenshteinDfa& operator=(const LevenshteinDfa&) = delete;
+ ~LevenshteinDfa();
+
+ /**
+ * Attempts to match the source string `source` with the target string this DFA was
+ * built with, emitting a successor string on mismatch if `successor_out` != nullptr.
+ *
+ * `source` must not contain any null UTF-8 chars.
+ *
+ * Match case:
+ * Iff `source` is _within_ the maximum edit distance, returns a MatchResult with
+ * matches() == true and edits() == the actual edit distance. If `successor_out`
+ * is not nullptr, the string pointed to is _not_ modified.
+ *
+ * Mismatch case:
+ * Iff `source` is _beyond_ the maximum edit distance, returns a MatchResult with
+ * matches() == false.
+ *
+ * Iff `successor_out` is not nullptr, the following holds:
+ * - `successor_out` is modified to contain the next (in byte-wise ordering) possible
+ * _matching_ string S so that there exists no other matching string S' that is
+ * greater than `source` but smaller than S.
+ * - `successor_out` contains UTF-8 bytes that are within what UTF-8 can legally
+ * encode in bitwise form, but the _code points_ they encode may not be valid.
+ * In particular, surrogate pair ranges and U+10FFFF+1 may be encoded, neither of
+ * which are valid UTF-8.
+ *
+ * It is expected that the consumer of `successor_out` is only interested in the
+ * memcmp()-ordering of strings and not whether they are technically valid Unicode.
+ * This should be the case for low-level dictionary data structures etc.
+ *
+ * Memory allocation:
+ * This function does not directly or indirectly allocate any heap memory if either:
+ *
+ * - the input string is within the max edit distance, or
+ * - `successor_out` is nullptr, or
+ * - `successor_out` has sufficient capacity to hold the generated successor
+ *
+ * By reusing the successor string across many calls, this therefore amortizes memory
+ * allocations down to near zero per invocation.
+ */
+ [[nodiscard]] MatchResult match(std::string_view source, std::string* successor_out) const;
+
+ /**
+ * Returns how much memory is used by the underlying DFA representation, in bytes.
+ */
+ [[nodiscard]] size_t memory_usage() const noexcept;
+
+ enum class DfaType {
+ Implicit,
+ Explicit
+ };
+
+ /**
+ * Builds and returns a Levenshtein DFA that matches all strings within `max_edits`
+ * edits of `target_string`. The type of DFA returned is specified by dfa_type.
+ *
+ * `max_edits` must be in {1, 2}. Throws std::invalid_argument if outside range.
+ *
+ * `target_string` must not contain any null UTF-8 chars.
+ */
+ [[nodiscard]] static LevenshteinDfa build(std::string_view target_string,
+ uint8_t max_edits,
+ DfaType dfa_type);
+
+ /**
+ * Same as build() but currently always returns an implicit DFA.
+ */
+ [[nodiscard]] static LevenshteinDfa build(std::string_view target_string, uint8_t max_edits);
+
+ /**
+ * Dumps the DFA as a Graphviz graph in text format to the provided output stream.
+ *
+ * Note: Only supported for _explicit_ DFAs. Trying to call this function on an implicit
+ * DFA will throw a std::runtime_error, as there is no concrete underlying graph
+ * structure to dump.
+ *
+ * Note that only _matching_ state transitions are present in the DFA, and therefore only
+ * such transitions are present in the generated graph. Overall this makes the graph for
+ * longer strings much more manageable, as the number of out-edges from a particular depth
+ * in the graph depends on the max number of edits and not on the length of the string
+ * itself. Otherwise, you'd have a whole bunch of nodes with out-edges to the same terminal
+ * non-matching state node.
+ */
+ void dump_as_graphviz(std::ostream& out) const;
+};
+
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::MatchResult& mos);
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::DfaType& dt);
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp b/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp
new file mode 100644
index 00000000000..206b69f8ebe
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp
@@ -0,0 +1,291 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "dfa_matcher.h"
+#include "levenshtein_dfa.h"
+#include "unicode_utils.h"
+#include <vespa/vespalib/text/utf8.h>
+#include <cassert>
+#include <concepts>
+
+namespace vespalib::fuzzy {
+
+/**
+ * Implementation of algorithm for linear-time k-max edits string matching and successor
+ * string generation over an abstract DFA representation.
+ *
+ * The implementation is agnostic to how the underlying DFA is implemented, but requires
+ * an appropriate adapter that satisfies the DfaMatcher concept contracts.
+ */
+template <uint8_t MaxEdits>
+struct MatchAlgorithm {
+ using MatchResult = LevenshteinDfa::MatchResult;
+
+ static constexpr uint8_t max_edits() noexcept { return MaxEdits; }
+
+ /**
+ * Matches UTF-8 source string `source` against the target DFA, optionally generating
+ * the successor string iff the source string is not within the maximum number of edits
+ * of the target string.
+ *
+ * The actual match loop is very simple: we try to match the DFA as far as we can
+ * before either consuming all input (source string) characters or ending up in a non-
+ * matching state before we have consumed all input. In the former case, we may be in
+ * a matching state (consider matching "foo" with the target string "food"; after
+ * consuming all input we'll be in a matching state with 1 edit). In the latter case,
+ * the input string cannot possible match.
+ *
+ * If we end up in a matching state, all is well. We simply return a MatchResult with
+ * the number of edits the state represents.
+ *
+ * The interesting bit happens the string does _not_ match and we are asked to provide a
+ * _successor_ string that _does_ match and is strictly greater in lexicographic order.
+ *
+ * We lean on some core invariants:
+ *
+ * - The m x n (|source| x |target|) Levenshtein matrix provides, for any m[i, j] with
+ * i in [1, m], j in [1, n], the _minimum possible_ number of edits that can transform
+ * the source string prefix of length `i` to the target string prefix of length `j`.
+ * This means there is no way of transforming said source prefix using _fewer_ edits.
+ *
+ * - Any given DFA state corresponds to a unique row in the Levenshtein matrix, thus
+ * transitively inheriting the invariants of the matrix row elements themselves, such
+ * as representing the minimum number of edits.
+ *
+ * We have two mismatch cases:
+ *
+ * 1. We've matched the entire source string without ending in an accepting state.
+ *
+ * This can only happen if the input is a (possibly edited) prefix of the target string.
+ * Any and all _longer_ strings with this prefix is inherently lexicographically greater,
+ * so we emit the smallest possible suffix that turns prefix || suffix into a matching
+ * string.
+ *
+ * See emit_smallest_matching_suffix() for details.
+ *
+ * 2. We've matched a prefix of the source string without ending in an accepting state.
+ *
+ * This case is trickier than when the entire source string is a prefix, as we cannot
+ * just emit a suffix to the source to create a matching, lexicographically greater string.
+ *
+ * Consider source "foxx" and target "food". There exists no suffix S in "food" that can
+ * turn "foxx" || S into a matching string within k=1 edits.
+ *
+ * So we have to backtrack to somewhere.
+ *
+ * That "somewhere" is the state that maximizes the size of the source prefix while
+ * allowing us to emit a greater suffix.
+ *
+ * For each state we visit, we check if there exists at least one higher out edge than the
+ * one taken out from that state (this is possibly a wildcard edge). If one exists, we
+ * copy the state to `last_state_with_higher_out` and remember the state's source string
+ * prefix as well as the source string character that transitions us away from the state
+ * (this will be our candidate for building a greater suffix).
+ *
+ * When we fail to match the entire source string, we know that last_state_with_higher_out
+ * represents the last possible branching point (and therefore the longest prefix) where
+ * we can substitute in or insert a higher character, in turn creating a greater suffix.
+ *
+ * Proof by contradiction: let `last_state_with_higher_out` be S and assume there exists
+ * a state S' that has a greater source string prefix than S while still allowing for
+ * emitting a lexicographically greater suffix that is within max edits k. We terminate
+ * the match loop once can_match(X) is false for any state X, where X subsumes S by
+ * definition. For S' to exist, it must be possible for a transition to exist from X to
+ * a later state that can have a higher out edge. However, edit distance costs can
+ * never decrease, only stay constant (with matching substitutions) or increase (with
+ * insertions, deletions or non-matching substitutions), so it's impossible to follow
+ * an out-edge from X to any later potentially matching state. Thus, S' can not exist
+ * and we have a contradiction.
+ *
+ * Since we want to generate the smallest possible larger string that matches, we ideally
+ * want to emit a character that is +1 of the source character after the shared prefix.
+ * This is using the "higher out"-character we remembered earlier. We do this if we have
+ * a wildcard out edge (or if there exists an explicit out-edge for value char+1).
+ * Otherwise, we have to follow the highest explicitly present out-edge.
+ *
+ * Once we have emitted one single character that gets us lexicographically higher than
+ * the source string, we then emit the smallest possible suffix to this. This uses the
+ * same minimal suffix generation logic as mismatch case 1).
+ *
+ * See `backtrack_and_emit_greater_suffix()` for details.
+ *
+ * Example:
+ * (This is easiest to follow by looking at examples/food_dfa.svg)
+ *
+ * Source "foxx", target "food" and k=1:
+ *
+ * After matching "fo" with 0 edits we reach a state with out-edges {d, o, *}. This state
+ * has an implicitly higher out-edge (*) and we remember it and the char 'x' for later.
+ * Edge 'x' can only happen via *, so we take that path.
+ *
+ * After matching "fox" with 1 edit we reach a state with out-edges {d, o}. There is
+ * no out-edge for 'x' and the state is not a matching state, so we need to backtrack
+ * and generate a successor.
+ *
+ * We backtrack to the state representing "fo" and emit it as a successor prefix. We
+ * observe that this state has a wildcard out-edge and emit 'x'+1 == 'y' to the successor
+ * string and continue with emitting the smallest suffix. We now have a successor
+ * prefix of "foy", with which we reach the same logical state as we did with "fox"
+ * previously. The smallest out-edge here is 'd', so we take it. This leaves us in an
+ * accepting (matching) state, so suffix generation completes.
+ *
+ * "foxx" -> "foyd"
+ *
+ * Note that it's possible for the prefix to be empty, which results in a successor
+ * that has nothing in common with the source altogether.
+ * Example: "gp" -> "hfood" (+1 char value case)
+ *
+ * Performance note:
+ * Both the input and successor output strings are in UTF-8 format. To avoid doing
+ * duplicate work, we keep track of the byte length of the string prefix that will be
+ * part of the successor and simply copy it verbatim instead of building the string
+ * from converted UTF-32 -> UTF-8 chars as we go.
+ *
+ * TODO we could probably also optimize the smallest suffix generation with this when
+ * we know we can no longer insert any smaller char substitutions and the only way
+ * to complete the string is to emit it verbatim.
+ * - To do this we'd need both the original UTF-8 target string as well as a
+ * secondary vector that maps u32 character index to the corresponding UTF-8 index.
+ * Both trivial to get as part of DFA initialization.
+ */
+ template <DfaMatcher Matcher>
+ static MatchResult match(const Matcher& matcher,
+ std::string_view source,
+ std::string* successor_out)
+ {
+ using StateType = typename Matcher::StateType;
+ vespalib::Utf8Reader u8_reader(source.data(), source.size());
+ uint32_t n_prefix_u8_bytes = 0;
+ uint32_t char_after_prefix = 0;
+ StateType last_state_with_higher_out = StateType{};
+
+ StateType state = matcher.start();
+ while (u8_reader.hasMore()) {
+ const auto u8_pos_before_char = u8_reader.getPos();
+ const uint32_t mch = u8_reader.getChar();
+ if (successor_out && matcher.has_higher_out_edge(state, mch)) {
+ last_state_with_higher_out = state;
+ n_prefix_u8_bytes = u8_pos_before_char;
+ char_after_prefix = mch;
+ }
+ auto maybe_next = matcher.match_input(state, mch);
+ if (matcher.can_match(maybe_next)) {
+ state = maybe_next;
+ } else {
+ // Can never match; find the successor if requested
+ if (successor_out) {
+ *successor_out = source.substr(0, n_prefix_u8_bytes);
+ assert(matcher.valid_state(last_state_with_higher_out));
+ backtrack_and_emit_greater_suffix(matcher, last_state_with_higher_out,
+ char_after_prefix, *successor_out);
+ }
+ return MatchResult::make_mismatch(max_edits());
+ }
+ }
+ const auto edits = matcher.match_edit_distance(state);
+ if (edits <= max_edits()) {
+ return MatchResult::make_match(max_edits(), edits);
+ }
+ if (successor_out) {
+ *successor_out = source;
+ emit_smallest_matching_suffix(matcher, state, *successor_out);
+ }
+ return MatchResult::make_mismatch(max_edits());
+ }
+
+ /**
+ * Instantly backtrack to the last possible branching point in the DFA where we can
+ * choose some higher outgoing edge character value and still match the DFA. If the node
+ * has a wildcard edge, we can bump the input char by one and generate the smallest
+ * possible matching suffix to that. Otherwise, choose the smallest out edge that is
+ * greater than the input character at that location and _then_ emit the smallest
+ * matching prefix.
+ *
+ * precondition: `last_node_with_higher_out` has either a wildcard edge or a char match
+ * edge that compares greater than `input_at_branch`.
+ */
+ template <DfaMatcher Matcher>
+ static void backtrack_and_emit_greater_suffix(
+ const Matcher& matcher,
+ typename Matcher::StateParamType last_state_with_higher_out,
+ const uint32_t input_at_branch,
+ std::string& successor)
+ {
+ auto wildcard_state = matcher.match_wildcard(last_state_with_higher_out);
+ if (matcher.can_match(wildcard_state)) {
+ // `input_at_branch` may be U+10FFFF, with +1 being outside legal Unicode _code point_
+ // range but _within_ what UTF-8 can technically _encode_.
+ // We assume that successor-consumers do not care about anything except byte-wise
+ // ordering. This is similar to what RE2's PossibleMatchRange emits to represent a
+ // UTF-8 upper bound, so not without precedent.
+ // If the resulting character corresponds to an existing out-edge we _must_ take it
+ // instead of the wildcard edge, or we'll end up in the wrong state.
+ const auto next_char = input_at_branch + 1;
+ if (!matcher.has_exact_explicit_out_edge(last_state_with_higher_out, next_char)) {
+ append_utf32_char_as_utf8(successor, next_char);
+ emit_smallest_matching_suffix(matcher, wildcard_state, successor);
+ return;
+ } // else: handle exact match below (it will be found as the first higher out edge)
+ }
+ const auto first_highest_edge = matcher.lowest_higher_explicit_out_edge(last_state_with_higher_out, input_at_branch);
+ assert(matcher.valid_edge(first_highest_edge));
+ append_utf32_char_as_utf8(successor, matcher.edge_to_u32char(first_highest_edge));
+ emit_smallest_matching_suffix(matcher, matcher.edge_to_state(last_state_with_higher_out, first_highest_edge), successor);
+ }
+
+ /**
+ * The smallest possible suffix is generated by following the smallest out-edge per state,
+ * until we reach a state that is a match. It is possible that the smallest out edge is a
+ * "wildcard" edge (our terminology), which means that we can insert/substitute an arbitrary
+ * character and still have `can_match(resulting state)` be true. In this case we emit the
+ * smallest possible non-null UTF-8 character (0x01).
+ *
+ * Examples:
+ * (These are easiest to follow by looking at examples/food_dfa.svg)
+ *
+ * Source "fo", target "food" and k=1:
+ *
+ * After matching "fo" we have 1 edit to spare. The smallest valid, non-empty UTF-8 suffix
+ * to this string must necessarily begin with 0x01, so that's what we emit. The smallest
+ * edge we can follow from the resulting state is 'd', and that is a accepting (matching)
+ * state.
+ *
+ * "fo" -> "fo\x01d"
+ *
+ * Source "fx", target "food" and k=1:
+ *
+ * After matching "fx" we have no edits to spare. The smallest character reachable from
+ * the state is 'o' (in fact, it is the only out edge available since we're down to zero
+ * available edits). The next state has an out-edge to 'd' and 'o', and we choose 'd'
+ * since it is smallest. This leaves us in an accepting (matching) state and we terminate
+ * the loop.
+ *
+ * "fx" -> "fxod"
+ */
+ // TODO consider variant for only emitting _prefix of suffix_ to avoid having to generate
+ // the full string? Won't generate a matching string, but will be lexicographically greater.
+ template <DfaMatcher Matcher>
+ static void emit_smallest_matching_suffix(
+ const Matcher& matcher,
+ typename Matcher::StateParamType from,
+ std::string& str)
+ {
+ auto state = from;
+ while (!matcher.is_match(state)) {
+ // If we can take a wildcard path, emit the smallest possible valid UTF-8 character (0x01).
+ // Otherwise, find the smallest char that can eventually lead us to a match.
+ auto wildcard_state = matcher.match_wildcard(state);
+ if (matcher.can_match(wildcard_state)) {
+ str += '\x01';
+ state = wildcard_state;
+ } else {
+ const auto smallest_out_edge = matcher.smallest_explicit_out_edge(state);
+ assert(matcher.valid_edge(smallest_out_edge));
+ append_utf32_char_as_utf8(str, matcher.edge_to_u32char(smallest_out_edge));
+ state = matcher.edge_to_state(state, smallest_out_edge);
+ }
+ }
+ }
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h
new file mode 100644
index 00000000000..40cfa5e6409
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h
@@ -0,0 +1,175 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <ostream>
+#include <span>
+#include <xxh3.h> // TODO factor out?
+
+namespace vespalib::fuzzy {
+
+// Sentinel U32 char for state stepping that cannot match any target string characters
+constexpr const uint32_t WILDCARD = UINT32_MAX;
+
+/**
+ * diag(n) is the width of the diagonal of the cost matrix that can possibly be
+ * within k edits. This means that for a fixed k, it suffices to maintain state
+ * for up to and including diag(k) consecutive cells for any given matrix row.
+ */
+constexpr inline uint8_t diag(uint8_t k) noexcept {
+ return k*2 + 1;
+}
+
+template <uint8_t MaxEdits>
+struct FixedSparseState {
+private:
+ static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+
+ std::array<uint32_t, diag(MaxEdits)> indices;
+ std::array<uint8_t, diag(MaxEdits)> costs; // elems are 1-1 with indices vector
+ uint8_t sz;
+public:
+ constexpr FixedSparseState() noexcept : indices(), costs(), sz(0) {}
+
+ [[nodiscard]] constexpr bool empty() const noexcept {
+ return (sz == 0);
+ }
+
+ [[nodiscard]] constexpr uint32_t size() const noexcept {
+ return sz;
+ }
+
+ [[nodiscard]] constexpr uint32_t index(uint32_t entry_idx) const noexcept {
+ return indices[entry_idx];
+ }
+
+ [[nodiscard]] constexpr uint8_t cost(uint32_t entry_idx) const noexcept {
+ return costs[entry_idx];
+ }
+
+ // Precondition: !empty()
+ [[nodiscard]] constexpr uint32_t last_index() const noexcept {
+ return indices[sz - 1];
+ }
+
+ // Precondition: !empty()
+ [[nodiscard]] constexpr uint8_t last_cost() const noexcept {
+ return costs[sz - 1];
+ }
+
+ void append(uint32_t index, uint8_t cost) noexcept {
+ assert(sz < diag(MaxEdits));
+ indices[sz] = index;
+ costs[sz] = cost;
+ ++sz;
+ }
+
+ constexpr bool operator==(const FixedSparseState& rhs) const noexcept {
+ if (sz != rhs.sz) {
+ return false;
+ }
+ return (std::equal(indices.begin(), indices.begin() + sz, rhs.indices.begin()) &&
+ std::equal(costs.begin(), costs.begin() + sz, rhs.costs.begin()));
+ }
+
+ struct hash {
+ size_t operator()(const FixedSparseState& s) const noexcept {
+ static_assert(std::is_same_v<uint32_t, std::decay_t<decltype(s.indices[0])>>);
+ static_assert(std::is_same_v<uint8_t, std::decay_t<decltype(s.costs[0])>>);
+ // FIXME GCC 12.2 worse-than-useless(tm) warning false positives :I
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+ return (XXH3_64bits(s.indices.data(), s.sz * sizeof(uint32_t)) ^
+ XXH3_64bits(s.costs.data(), s.sz));
+#pragma GCC diagnostic pop
+ }
+ };
+};
+
+/**
+ * Prints sparse states as a single matrix row. Columns prior to any state index
+ * are printed explicitly as '-' characters to make states line up when printed.
+ *
+ * Example output for the state (2:1, 3:1):
+ *
+ * [-, -, 1, 1]
+ *
+ * Only meant as a debugging aid during development, as states with high indices
+ * will emit very large strings.
+ */
+template <uint8_t MaxEdits> [[maybe_unused]]
+std::ostream& operator<<(std::ostream& os, const FixedSparseState<MaxEdits>& s) {
+ os << "[";
+ size_t last_idx = 0;
+ for (size_t i = 0; i < s.size(); ++i) {
+ if (i != 0) {
+ os << ", ";
+ }
+ for (size_t j = last_idx; j < s.indices[i]; ++j) {
+ os << "-, ";
+ }
+ last_idx = s.indices[i] + 1;
+ os << static_cast<uint32_t>(s.costs[i]);
+ }
+ os << "]";
+ return os;
+}
+
+template <uint8_t MaxEdits>
+struct FixedMaxEditsTransitions {
+ static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+
+ std::array<uint32_t, diag(MaxEdits)> out_u32_chars;
+ uint8_t size;
+
+ constexpr FixedMaxEditsTransitions() noexcept : out_u32_chars(), size(0) {}
+
+ [[nodiscard]] constexpr bool has_char(uint32_t u32ch) const noexcept {
+ for (uint8_t i = 0; i < size; ++i) {
+ if (out_u32_chars[i] == u32ch) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void add_char(uint32_t u32ch) noexcept {
+ if (!has_char(u32ch)) {
+ assert(size < diag(MaxEdits));
+ out_u32_chars[size] = u32ch;
+ ++size;
+ }
+ }
+
+ constexpr std::span<const uint32_t> u32_chars() const noexcept {
+ return {out_u32_chars.begin(), out_u32_chars.begin() + size};
+ }
+
+ constexpr std::span<uint32_t> u32_chars() noexcept {
+ return {out_u32_chars.begin(), out_u32_chars.begin() + size};
+ }
+
+ void sort() noexcept {
+ // TODO use custom sorting networks for fixed array sizes <= 5?
+ // FIXME GCC 12.2 worse-than-useless(tm) warning false positives :I
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+ std::sort(out_u32_chars.begin(), out_u32_chars.begin() + size);
+#pragma GCC diagnostic pop
+ }
+};
+
+template <uint8_t MaxEdits>
+struct FixedMaxEditDistanceTraits {
+ static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+ using StateType = FixedSparseState<MaxEdits>;
+ using TransitionsType = FixedMaxEditsTransitions<MaxEdits>;
+ constexpr static uint8_t max_edits() noexcept {
+ return MaxEdits;
+ }
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp
new file mode 100644
index 00000000000..648be234562
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp
@@ -0,0 +1,108 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "unicode_utils.h"
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+
+namespace vespalib::fuzzy {
+
+std::vector<uint32_t> utf8_string_to_utf32(std::string_view str) {
+ vespalib::stringref ch_str(str.data(), str.size());
+ vespalib::Utf8Reader utf8_reader(ch_str);
+ std::vector<uint32_t> u32ret;
+ u32ret.reserve(str.size()); // Will over-allocate for all non-ASCII
+ while (utf8_reader.hasMore()) {
+ u32ret.emplace_back(utf8_reader.getChar());
+ }
+ return u32ret;
+}
+
+std::vector<uint32_t> utf8_string_to_utf32(std::u8string_view u8str) {
+ return utf8_string_to_utf32(std::string_view(reinterpret_cast<const char*>(u8str.data()), u8str.size()));
+}
+
+[[noreturn]] void throw_bad_code_point(uint32_t codepoint) __attribute__((noinline));
+[[noreturn]] void throw_bad_code_point(uint32_t codepoint) {
+ throw std::invalid_argument(make_string("invalid UTF-32 codepoint: U+%04X (%u)", codepoint, codepoint));
+}
+
+namespace {
+
+/**
+ * Encodes a single UTF-32 `codepoint` to a 1-4 byte UTF-8 sequence.
+ * `
+ * `u8buf` must point to a buffer with at least 4 writable bytes.
+ *
+ * Returns the number of bytes written.
+ *
+ * See comments on append_utf32_char_as_utf8() as to why this is not a generic UTF-8
+ * encoding function that can be used in all possible scenarios.
+ */
+[[nodiscard]] uint8_t encode_utf8_char(uint32_t codepoint, unsigned char* u8buf) {
+ constexpr const uint8_t low_6bits_mask = 0x3F;
+
+ // Yanked and modified from utf8.cpp:
+ if (codepoint < 0x80) {
+ u8buf[0] = (char) codepoint;
+ return 1;
+ } else if (codepoint < 0x800) {
+ char low6 = (codepoint & low_6bits_mask);
+ low6 |= 0x80;
+ codepoint >>= 6;
+ char first5 = codepoint;
+ first5 |= 0xC0;
+ u8buf[0] = first5;
+ u8buf[1] = low6;
+ return 2;
+ } else if (codepoint < 0x10000) {
+ char low6 = (codepoint & low_6bits_mask);
+ low6 |= 0x80;
+
+ codepoint >>= 6;
+ char mid6 = (codepoint & low_6bits_mask);
+ mid6 |= 0x80;
+
+ codepoint >>= 6;
+ char first4 = codepoint;
+ first4 |= 0xE0;
+
+ u8buf[0] = first4;
+ u8buf[1] = mid6;
+ u8buf[2] = low6;
+ return 3;
+ } else if (codepoint <= 0x110000) { // Explicitly _include_ U+10FFFF + 1!
+ char low6 = (codepoint & low_6bits_mask);
+ low6 |= 0x80;
+
+ codepoint >>= 6;
+ char mid6 = (codepoint & low_6bits_mask);
+ mid6 |= 0x80;
+
+ codepoint >>= 6;
+ char hi6 = (codepoint & low_6bits_mask);
+ hi6 |= 0x80;
+
+ codepoint >>= 6;
+ char first3 = codepoint;
+ first3 |= 0xF0;
+
+ u8buf[0] = first3;
+ u8buf[1] = hi6;
+ u8buf[2] = mid6;
+ u8buf[3] = low6;
+ return 4;
+ } else {
+ throw_bad_code_point(codepoint);
+ }
+}
+
+} // anon ns
+
+// TODO optimize inlined in header for case where u32_char is < 0x80?
+void append_utf32_char_as_utf8(std::string& out_str, uint32_t u32_char) {
+ unsigned char u8buf[4];
+ uint8_t u8bytes = encode_utf8_char(u32_char, u8buf);
+ out_str.append(reinterpret_cast<const char*>(u8buf), u8bytes);
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h
new file mode 100644
index 00000000000..8627b01ff6a
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h
@@ -0,0 +1,33 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace vespalib::fuzzy {
+
+std::vector<uint32_t> utf8_string_to_utf32(std::string_view str);
+
+std::vector<uint32_t> utf8_string_to_utf32(std::u8string_view u8str);
+
+/**
+ * Encodes a single UTF-32 codepoint `u32_char` to a 1-4 byte UTF-8 sequence and
+ * appends it to `out_str.`
+ *
+ * Note that this will happily encode code points that aren't technically part of
+ * the valid UTF-8 range, but which will still be correct in memcmp() byte-wise
+ * ordering, which is the API contract we expose.
+ *
+ * In particular, this includes:
+ * - high/low surrogate ranges U+D800 through U+DFFF (surrogate pairs not allowed
+ * in UTF-8)
+ * - U+10FFFF + 1 (outside max code point range by one)
+ *
+ * ... So don't copy this function for use as a general UTF-8 emitter, as it is not
+ * _technically_ conformant!
+ */
+void append_utf32_char_as_utf8(std::string& out_str, uint32_t u32_char);
+
+}
diff --git a/vespalib/src/vespa/vespalib/text/lowercase.h b/vespalib/src/vespa/vespalib/text/lowercase.h
index 5c4e3e34e07..dc081c6ba2d 100644
--- a/vespalib/src/vespa/vespalib/text/lowercase.h
+++ b/vespalib/src/vespa/vespalib/text/lowercase.h
@@ -43,9 +43,9 @@ public:
* @param codepoint the character codepoint to be lowercased.
* @return lowercase UCS-4 character (codepoint if no lowercasing is performed).
**/
- static uint32_t convert(uint32_t codepoint) noexcept
+ static uint32_t convert(uint32_t codepoint)
{
- if (codepoint < 0x100) [[likely]] {
+ if (codepoint < 0x100) {
return lowercase_0_block[codepoint];
} else if (codepoint < 0x600) {
return lowercase_0_5_blocks[codepoint];
diff --git a/vespalib/src/vespa/vespalib/text/utf8.cpp b/vespalib/src/vespa/vespalib/text/utf8.cpp
index c950f62985f..cae2bbae682 100644
--- a/vespalib/src/vespa/vespalib/text/utf8.cpp
+++ b/vespalib/src/vespa/vespalib/text/utf8.cpp
@@ -16,16 +16,18 @@ void Utf8::throwX(const char *msg, unsigned int number)
throw IllegalArgumentException(what);
}
-uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept
+uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback)
{
if (_pos == size()) {
// this shouldn't happen ...
- LOG(warning, "last byte %02X of Utf8Reader block was incomplete UTF-8", firstbyte);
+ LOG(warning, "last byte %02X of Utf8Reader block was incomplete UTF-8",
+ firstbyte);
return fallback;
}
assert(hasMore()); // should never fall out of range
if (! Utf8::validFirstByte(firstbyte)) {
- LOG(debug, "invalid first byte %02X in Utf8Reader data block", firstbyte);
+ LOG(debug, "invalid first byte %02X in Utf8Reader data block",
+ firstbyte);
return fallback;
}
int need = Utf8::numContBytes(firstbyte);
@@ -46,7 +48,8 @@ uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback)
// check > 0x7F ?
return r;
} else {
- LOG(debug, "invalid continuation byte %02X in Utf8Reader data block", contbyte);
+ LOG(debug, "invalid continuation byte %02X in Utf8Reader data block",
+ contbyte);
return fallback;
}
}
@@ -66,7 +69,8 @@ uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback)
// check > 0x7FF ?
return r;
} else {
- LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block", contbyte1, contbyte2);
+ LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block",
+ contbyte1, contbyte2);
return fallback;
}
}
@@ -91,10 +95,11 @@ uint32_t Utf8Reader::getComplexChar(unsigned char firstbyte, uint32_t fallback)
uint32_t
-Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept
+Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback)
{
if (! Utf8::validFirstByte(firstbyte)) {
- LOG(debug, "invalid first byte %02X in Utf8Reader data block", firstbyte);
+ LOG(debug, "invalid first byte %02X in Utf8Reader data block",
+ firstbyte);
return fallback;
}
int need = Utf8::numContBytes(firstbyte);
@@ -103,7 +108,8 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe
if (need == 1) {
if (_p[0] == 0) {
- LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", firstbyte);
+ LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS",
+ firstbyte);
return fallback;
}
unsigned char contbyte = _p[0];
@@ -113,14 +119,16 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe
// check > 0x7F ?
return r;
} else {
- LOG(debug, "invalid continuation byte %02X in Utf8Reader data block", contbyte);
+ LOG(debug, "invalid continuation byte %02X in Utf8Reader data block",
+ contbyte);
return fallback;
}
}
if (need == 2) {
if (_p[0] == 0 || _p[1] == 0) {
- LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", firstbyte);
+ LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS",
+ firstbyte);
return fallback;
}
unsigned char contbyte1 = _p[0];
@@ -137,14 +145,16 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe
// check > 0x7FF ?
return r;
} else {
- LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block", contbyte1, contbyte2);
+ LOG(debug, "invalid continuation bytes %02X/%02X in Utf8Reader data block",
+ contbyte1, contbyte2);
return fallback;
}
}
assert(need == 3);
if (_p[0] == 0 || _p[1] == 0 || _p[2] == 0) {
- LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS", firstbyte);
+ LOG(debug, "incomplete character (first byte %02X) in Utf8ReaderZTS",
+ firstbyte);
return fallback;
}
unsigned char contbyte1 = _p[0];
@@ -158,7 +168,8 @@ Utf8ReaderForZTS::getComplexChar(unsigned char firstbyte, uint32_t fallback) noe
// check > 0xFFFF?
return decode4(firstbyte, contbyte1, contbyte2, contbyte3);
} else {
- LOG(debug, "invalid continuation bytes %02X/%02X/%02X in Utf8Reader data block", contbyte1, contbyte2, contbyte3);
+ LOG(debug, "invalid continuation bytes %02X/%02X/%02X in Utf8Reader data block",
+ contbyte1, contbyte2, contbyte3);
return fallback;
}
}
@@ -223,7 +234,7 @@ template class Utf8Writer<vespalib::string>;
template class Utf8Writer<std::string>;
template <typename T>
-T Utf8::filter_invalid_sequences(const T& input) noexcept
+T Utf8::filter_invalid_sequences(const T& input)
{
T retval;
Utf8Reader reader(input.c_str(), input.size());
diff --git a/vespalib/src/vespa/vespalib/text/utf8.h b/vespalib/src/vespa/vespalib/text/utf8.h
index 3367bd5b3d2..98e06ca5faf 100644
--- a/vespalib/src/vespa/vespalib/text/utf8.h
+++ b/vespalib/src/vespa/vespalib/text/utf8.h
@@ -34,14 +34,14 @@ public:
* UTF-8 encoded surrogates are also considered invalid.
**/
template <typename T>
- static T filter_invalid_sequences(const T& input) noexcept;
+ static T filter_invalid_sequences(const T& input);
/**
* check if a byte is valid as the first byte of an UTF-8 character.
* @param c the byte to be checked
* @return true if a valid UTF-8 character can start with this byte
**/
- static bool validFirstByte(unsigned char c) noexcept {
+ static bool validFirstByte(unsigned char c) {
return (c < 0x80 ||
(c > 0xC1 && c < 0xF5));
}
@@ -52,12 +52,12 @@ public:
* @param c the first byte (must pass validFirstByte check)
* @return 0, 1, 2, or 3
**/
- static int numContBytes(unsigned char c) noexcept {
+ static int numContBytes(unsigned char c) {
if (c < 0x80) return 0;
if (c > 0xC1 && c < 0xE0) return 1;
if (c > 0xDF && c < 0xF0) return 2;
if (c > 0xEF && c < 0xF5) return 3;
- return -1;
+ throwX("invalid first byte of UTF8 sequence", c);
}
/**
@@ -65,7 +65,7 @@ public:
* @param c the byte to be checked
* @return true if a valid UTF-8 character can contain this byte
**/
- static bool validContByte(unsigned char c) noexcept {
+ static bool validContByte(unsigned char c) {
return (c > 0x7F && c < 0xC0);
}
@@ -82,7 +82,8 @@ public:
* @param contbyte second byte in this UTF-8 character
* @return decoded UCS-4 codepoint in range [0, 0x7FF]
**/
- static uint32_t decode2(unsigned char firstbyte, unsigned char contbyte) noexcept
+ static uint32_t decode2(unsigned char firstbyte,
+ unsigned char contbyte)
{
uint32_t r = (firstbyte & low_5bits_mask);
r <<= 6;
@@ -107,7 +108,7 @@ public:
**/
static uint32_t decode3(unsigned char firstbyte,
unsigned char contbyte1,
- unsigned char contbyte2) noexcept
+ unsigned char contbyte2)
{
uint32_t r = (firstbyte & low_4bits_mask);
r <<= 6;
@@ -137,7 +138,7 @@ public:
static uint32_t decode4(unsigned char firstbyte,
unsigned char contbyte1,
unsigned char contbyte2,
- unsigned char contbyte3) noexcept
+ unsigned char contbyte3)
{
uint32_t r = (firstbyte & low_3bits_mask);
r <<= 6;
@@ -176,14 +177,14 @@ class Utf8Reader
private:
size_type _pos;
- uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept;
+ uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback);
public:
/**
* Construct a reader for the given block of data
* @param input data to read UTF-8 from (can be read-only)
**/
- Utf8Reader(stringref input) noexcept
+ Utf8Reader(stringref input)
: stringref(input), _pos(0)
{}
@@ -192,7 +193,7 @@ public:
* @param start pointer to the start of the block
* @param sz size of the block in bytes
**/
- Utf8Reader(const char *start, size_t sz) noexcept
+ Utf8Reader(const char *start, size_t sz)
: stringref(start, sz), _pos(0)
{}
@@ -200,7 +201,7 @@ public:
* check if the buffer has more data.
* @return true if there is more data
**/
- bool hasMore() const noexcept { return _pos < size(); }
+ bool hasMore() const { return _pos < size(); }
/**
* Decode the UTF-8 character at the current position.
@@ -210,7 +211,7 @@ public:
* @param fallback the value to return if invalid UTF-8 is found
* @return a valid UCS-4 codepoint (or the fallback value)
**/
- uint32_t getChar(uint32_t fallback) noexcept {
+ uint32_t getChar(uint32_t fallback) {
unsigned char firstbyte = (*this)[_pos++]; // always steps at least 1 position
if (firstbyte < 0x80) {
return firstbyte;
@@ -231,13 +232,13 @@ public:
*
* @return a valid UCS-4 codepoint
**/
- uint32_t getChar() noexcept { return getChar(Utf8::REPLACEMENT_CHAR); }
+ uint32_t getChar() { return getChar(Utf8::REPLACEMENT_CHAR); }
/**
* obtain the current byte offset position
* @return position in bytes
**/
- size_type getPos() const noexcept { return _pos; }
+ size_type getPos() const { return _pos; }
};
@@ -251,7 +252,7 @@ class Utf8ReaderForZTS
{
private:
const char * &_p;
- uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback) noexcept;
+ uint32_t getComplexChar(unsigned char firstbyte, uint32_t fallback);
public:
/**
@@ -264,7 +265,7 @@ public:
*
* @param start pointer to the start of the block
**/
- Utf8ReaderForZTS(const char * &start) noexcept
+ Utf8ReaderForZTS(const char * &start)
: _p(start)
{}
@@ -272,7 +273,7 @@ public:
* check if the buffer has more data.
* @return true if there is more data
**/
- bool hasMore() const noexcept {
+ bool hasMore() const {
return (*_p) != '\0';
}
@@ -284,9 +285,9 @@ public:
* @param fallback the value to return if invalid UTF-8 is found
* @return a valid UCS-4 codepoint (or the fallback value)
**/
- uint32_t getChar(uint32_t fallback) noexcept {
+ uint32_t getChar(uint32_t fallback) {
unsigned char firstbyte = *_p++; // always steps at least 1 position
- if (firstbyte < 0x80) [[likely]] {
+ if (firstbyte < 0x80) {
return firstbyte;
} else {
return getComplexChar(firstbyte, fallback);
@@ -305,7 +306,7 @@ public:
*
* @return a valid UCS-4 codepoint
**/
- uint32_t getChar() noexcept{ return getChar(Utf8::REPLACEMENT_CHAR); }
+ uint32_t getChar() { return getChar(Utf8::REPLACEMENT_CHAR); }
/**
* count the number of UCS-4 characters will be returned when
@@ -313,7 +314,7 @@ public:
* "strlen" does not count the zero termination, but bytes
* that aren't valid UTF-8 will count as one character each.
**/
- static size_t countChars(const char *p) noexcept {
+ static size_t countChars(const char *p) {
Utf8ReaderForZTS reader(p);
size_t i;
for (i = 0; reader.hasMore(); ++i) {
@@ -339,7 +340,7 @@ public:
* that the writer will append to. Must be writable
* and must be kept alive while the writer is active.
**/
- Utf8Writer(Target &target) noexcept : _target(target) {}
+ Utf8Writer(Target &target) : _target(target) {}
/**
* append the given character to the target string.
diff --git a/vespalib/src/vespa/vespalib/util/alloc.cpp b/vespalib/src/vespa/vespalib/util/alloc.cpp
index 2ba3bc252ae..204d80340aa 100644
--- a/vespalib/src/vespa/vespalib/util/alloc.cpp
+++ b/vespalib/src/vespa/vespalib/util/alloc.cpp
@@ -292,7 +292,7 @@ HeapAllocator::alloc(size_t sz) const {
PtrAndSize
HeapAllocator::salloc(size_t sz) {
if (sz == 0) {
- return PtrAndSize();
+ return PtrAndSize(nullptr, sz);
}
void * ptr = malloc(sz);
if (ptr == nullptr) {
@@ -311,7 +311,7 @@ void HeapAllocator::sfree(PtrAndSize alloc) noexcept {
PtrAndSize
AlignedHeapAllocator::alloc(size_t sz) const {
- if (!sz) { return PtrAndSize(); }
+ if (!sz) { return PtrAndSize(nullptr, 0); }
void* ptr;
int result = posix_memalign(&ptr, _alignment, sz);
if (result != 0) {
diff --git a/vespalib/src/vespa/vespalib/util/alloc.h b/vespalib/src/vespa/vespalib/util/alloc.h
index dca4d633b43..a27bcca0b47 100644
--- a/vespalib/src/vespa/vespalib/util/alloc.h
+++ b/vespalib/src/vespa/vespalib/util/alloc.h
@@ -49,7 +49,7 @@ public:
}
return *this;
}
- Alloc() noexcept : _alloc(), _allocator(nullptr) { }
+ Alloc() noexcept : _alloc(nullptr, 0), _allocator(nullptr) { }
~Alloc() noexcept {
reset();
}
@@ -83,9 +83,10 @@ private:
Alloc(const MemoryAllocator * allocator, size_t sz) noexcept
: _alloc(allocator->alloc(sz)),
_allocator(allocator)
- { }
+ {
+ }
Alloc(const MemoryAllocator * allocator) noexcept
- : _alloc(),
+ : _alloc(nullptr, 0),
_allocator(allocator)
{ }
void clear() noexcept {
diff --git a/vespalib/src/vespa/vespalib/util/growstrategy.h b/vespalib/src/vespa/vespalib/util/growstrategy.h
index 643e3f03023..02e18e44925 100644
--- a/vespalib/src/vespa/vespalib/util/growstrategy.h
+++ b/vespalib/src/vespa/vespalib/util/growstrategy.h
@@ -4,15 +4,14 @@
#include <algorithm>
#include <cstddef>
-#include <cstdint>
namespace vespalib {
class GrowStrategy {
private:
- uint32_t _initialCapacity;
- uint32_t _minimumCapacity;
- uint32_t _growDelta;
+ size_t _initialCapacity;
+ size_t _minimumCapacity;
+ size_t _growDelta;
float _growFactor;
public:
GrowStrategy() noexcept
@@ -34,7 +33,7 @@ public:
void setInitialCapacity(size_t v) noexcept { _initialCapacity = v; }
void setGrowDelta(size_t v) noexcept { _growDelta = v; }
- size_t calc_new_size(size_t base_size) const noexcept {
+ size_t calc_new_size(size_t base_size) const {
size_t delta = (base_size * getGrowFactor()) + getGrowDelta();
size_t new_size = base_size + std::max(delta, static_cast<size_t>(1));
return std::max(new_size, getMinimumCapacity());
diff --git a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp
index 2c0d0f4339d..9ed4806385d 100644
--- a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp
+++ b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp
@@ -46,7 +46,7 @@ PtrAndSize
MmapFileAllocator::alloc(size_t sz) const
{
if (sz == 0) {
- return PtrAndSize(); // empty allocation
+ return PtrAndSize(nullptr, 0); // empty allocation
}
sz = round_up_to_page_size(sz);
uint64_t offset = alloc_area(sz);
diff --git a/vespalib/src/vespa/vespalib/util/small_vector.h b/vespalib/src/vespa/vespalib/util/small_vector.h
index ba166362d33..b47cb5903b9 100644
--- a/vespalib/src/vespa/vespalib/util/small_vector.h
+++ b/vespalib/src/vespa/vespalib/util/small_vector.h
@@ -216,7 +216,7 @@ public:
template <typename T, size_t N, size_t M>
bool operator==(const SmallVector<T,N> &a,
- const SmallVector<T,M> &b) noexcept
+ const SmallVector<T,M> &b)
{
if (a.size() != b.size()) {
return false;