From 9b948718144e5d556cc6ea49d2ff6bbf05f00b0e Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Thu, 10 Sep 2020 19:54:48 +0200 Subject: Use full name in config definition file names --- storage/src/vespa/storage/bucketdb/CMakeLists.txt | 8 +- .../vespa/storage/bucketdb/stor-bucket-init.def | 35 --- .../src/vespa/storage/bucketdb/stor-bucketdb.def | 9 - .../vespa.config.content.core.stor-bucket-init.def | 35 +++ .../vespa.config.content.core.stor-bucketdb.def | 9 + storage/src/vespa/storage/config/CMakeLists.txt | 38 ++-- storage/src/vespa/storage/config/rpc-provider.def | 4 - storage/src/vespa/storage/config/stor-bouncer.def | 30 --- .../src/vespa/storage/config/stor-bucketmover.def | 37 ---- .../storage/config/stor-communicationmanager.def | 57 ----- .../storage/config/stor-distributormanager.def | 246 --------------------- .../vespa/storage/config/stor-integritychecker.def | 38 ---- .../vespa/storage/config/stor-messageforwarder.def | 4 - .../src/vespa/storage/config/stor-opslogger.def | 4 - .../vespa/storage/config/stor-prioritymapping.def | 20 -- storage/src/vespa/storage/config/stor-server.def | 90 -------- storage/src/vespa/storage/config/stor-status.def | 4 - .../storage/config/stor-visitordispatcher.def | 19 -- .../vespa.config.content.core.rpc-provider.def | 4 + .../vespa.config.content.core.stor-bouncer.def | 30 +++ .../vespa.config.content.core.stor-bucketmover.def | 37 ++++ ...nfig.content.core.stor-communicationmanager.def | 57 +++++ ...config.content.core.stor-distributormanager.def | 246 +++++++++++++++++++++ ...a.config.content.core.stor-integritychecker.def | 38 ++++ ...a.config.content.core.stor-messageforwarder.def | 4 + .../vespa.config.content.core.stor-opslogger.def | 4 + ...pa.config.content.core.stor-prioritymapping.def | 20 ++ .../vespa.config.content.core.stor-server.def | 90 ++++++++ .../vespa.config.content.core.stor-status.def | 4 + ....config.content.core.stor-visitordispatcher.def | 19 ++ storage/src/vespa/storage/visiting/CMakeLists.txt | 5 +- .../src/vespa/storage/visiting/stor-visitor.def | 63 ------ .../vespa.config.content.core.stor-visitor.def | 63 ++++++ 33 files changed, 681 insertions(+), 690 deletions(-) delete mode 100644 storage/src/vespa/storage/bucketdb/stor-bucket-init.def delete mode 100644 storage/src/vespa/storage/bucketdb/stor-bucketdb.def create mode 100644 storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucket-init.def create mode 100644 storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucketdb.def delete mode 100644 storage/src/vespa/storage/config/rpc-provider.def delete mode 100644 storage/src/vespa/storage/config/stor-bouncer.def delete mode 100644 storage/src/vespa/storage/config/stor-bucketmover.def delete mode 100644 storage/src/vespa/storage/config/stor-communicationmanager.def delete mode 100644 storage/src/vespa/storage/config/stor-distributormanager.def delete mode 100644 storage/src/vespa/storage/config/stor-integritychecker.def delete mode 100644 storage/src/vespa/storage/config/stor-messageforwarder.def delete mode 100644 storage/src/vespa/storage/config/stor-opslogger.def delete mode 100644 storage/src/vespa/storage/config/stor-prioritymapping.def delete mode 100644 storage/src/vespa/storage/config/stor-server.def delete mode 100644 storage/src/vespa/storage/config/stor-status.def delete mode 100644 storage/src/vespa/storage/config/stor-visitordispatcher.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.rpc-provider.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-bouncer.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-bucketmover.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-communicationmanager.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-distributormanager.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-integritychecker.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-messageforwarder.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-opslogger.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-prioritymapping.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-server.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-status.def create mode 100644 storage/src/vespa/storage/config/vespa.config.content.core.stor-visitordispatcher.def delete mode 100644 storage/src/vespa/storage/visiting/stor-visitor.def create mode 100644 storage/src/vespa/storage/visiting/vespa.config.content.core.stor-visitor.def (limited to 'storage') diff --git a/storage/src/vespa/storage/bucketdb/CMakeLists.txt b/storage/src/vespa/storage/bucketdb/CMakeLists.txt index 048cc25ec95..22d43eb494e 100644 --- a/storage/src/vespa/storage/bucketdb/CMakeLists.txt +++ b/storage/src/vespa/storage/bucketdb/CMakeLists.txt @@ -15,7 +15,7 @@ vespa_add_library(storage_bucketdb OBJECT storbucketdb.cpp DEPENDS ) -vespa_generate_config(storage_bucketdb stor-bucketdb.def) -install_config_definition(stor-bucketdb.def vespa.config.content.core.stor-bucketdb.def) -vespa_generate_config(storage_bucketdb stor-bucket-init.def) -install_config_definition(stor-bucket-init.def vespa.config.content.core.stor-bucket-init.def) +vespa_generate_config(storage_bucketdb vespa.config.content.core.stor-bucketdb.def) +vespa_generate_config(storage_bucketdb vespa.config.content.core.stor-bucket-init.def) + +install_config_definitions(src/vespa/storage/bucketdb) diff --git a/storage/src/vespa/storage/bucketdb/stor-bucket-init.def b/storage/src/vespa/storage/bucketdb/stor-bucket-init.def deleted file mode 100644 index 3517afabd44..00000000000 --- a/storage/src/vespa/storage/bucketdb/stor-bucket-init.def +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -# The maximum number of alien files found during init that should be stored in -# memory so they can be viewed on status page. -max_alien_files_logged int default=10 restart - -# The maximum number of pending info reads to each disk during initialization. -max_pending_info_reads_per_disk int default=20 restart - -# The minimum number of pending info reads to each disk during initialization. -# When pending falls below this, we will resume database scan to add more -# pending up to the maximum setting. -min_pending_info_reads_per_disk int default=4 restart - -# The priority of the read bucket info requests the initializer sends to the -# persistence layer. Currently chosen so that such operations will not pre- -# empt any regular external load or ideal state operations, but they will block -# very low priority background operations such as periodic GC (default pri of -# 200). A tradeoff must be made between fast initialization and the availability -# of data on the initializing node. -info_read_priority int default=185 restart - -# The priority of the list bucket requests the initializer sends to the -# persistence layer. Should always be lower than the read priority to ensure -# starting to read wont make listing wait. However, listing is currently pretty -# much required to be done before starting anyhow, so this option does little -# unless your directories are not hardware independent. -list_priority int default=100 restart - -# Whether the initializer should complete listing before starting to read -# bucket information. Shouldnt matter much performance wise so always set to -# true as it is now. Setting it false, disks done listing first will start -# to process info requests a bit earlier than otherwise. -complete_list_before_starting_read bool default=false restart diff --git a/storage/src/vespa/storage/bucketdb/stor-bucketdb.def b/storage/src/vespa/storage/bucketdb/stor-bucketdb.def deleted file mode 100644 index 470dd3afbf0..00000000000 --- a/storage/src/vespa/storage/bucketdb/stor-bucketdb.def +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -## Number of elements to retrieve in one bucket info chunk -bucketinfobatchsize int default=128 restart - -## Chunk level. Set what level of the path which defines one chunk. -## (See doxygen info in bucketmanager.h for more info) -chunklevel int default=1 restart diff --git a/storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucket-init.def b/storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucket-init.def new file mode 100644 index 00000000000..3517afabd44 --- /dev/null +++ b/storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucket-init.def @@ -0,0 +1,35 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +# The maximum number of alien files found during init that should be stored in +# memory so they can be viewed on status page. +max_alien_files_logged int default=10 restart + +# The maximum number of pending info reads to each disk during initialization. +max_pending_info_reads_per_disk int default=20 restart + +# The minimum number of pending info reads to each disk during initialization. +# When pending falls below this, we will resume database scan to add more +# pending up to the maximum setting. +min_pending_info_reads_per_disk int default=4 restart + +# The priority of the read bucket info requests the initializer sends to the +# persistence layer. Currently chosen so that such operations will not pre- +# empt any regular external load or ideal state operations, but they will block +# very low priority background operations such as periodic GC (default pri of +# 200). A tradeoff must be made between fast initialization and the availability +# of data on the initializing node. +info_read_priority int default=185 restart + +# The priority of the list bucket requests the initializer sends to the +# persistence layer. Should always be lower than the read priority to ensure +# starting to read wont make listing wait. However, listing is currently pretty +# much required to be done before starting anyhow, so this option does little +# unless your directories are not hardware independent. +list_priority int default=100 restart + +# Whether the initializer should complete listing before starting to read +# bucket information. Shouldnt matter much performance wise so always set to +# true as it is now. Setting it false, disks done listing first will start +# to process info requests a bit earlier than otherwise. +complete_list_before_starting_read bool default=false restart diff --git a/storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucketdb.def b/storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucketdb.def new file mode 100644 index 00000000000..470dd3afbf0 --- /dev/null +++ b/storage/src/vespa/storage/bucketdb/vespa.config.content.core.stor-bucketdb.def @@ -0,0 +1,9 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## Number of elements to retrieve in one bucket info chunk +bucketinfobatchsize int default=128 restart + +## Chunk level. Set what level of the path which defines one chunk. +## (See doxygen info in bucketmanager.h for more info) +chunklevel int default=1 restart diff --git a/storage/src/vespa/storage/config/CMakeLists.txt b/storage/src/vespa/storage/config/CMakeLists.txt index 4a20d510043..5382048ece7 100644 --- a/storage/src/vespa/storage/config/CMakeLists.txt +++ b/storage/src/vespa/storage/config/CMakeLists.txt @@ -4,27 +4,17 @@ vespa_add_library(storage_storageconfig OBJECT distributorconfiguration.cpp DEPENDS ) -vespa_generate_config(storage_storageconfig stor-communicationmanager.def) -install_config_definition(stor-communicationmanager.def vespa.config.content.core.stor-communicationmanager.def) -vespa_generate_config(storage_storageconfig stor-distributormanager.def) -install_config_definition(stor-distributormanager.def vespa.config.content.core.stor-distributormanager.def) -vespa_generate_config(storage_storageconfig stor-server.def) -install_config_definition(stor-server.def vespa.config.content.core.stor-server.def) -vespa_generate_config(storage_storageconfig stor-status.def) -install_config_definition(stor-status.def vespa.config.content.core.stor-status.def) -vespa_generate_config(storage_storageconfig stor-messageforwarder.def) -install_config_definition(stor-messageforwarder.def vespa.config.content.core.stor-messageforwarder.def) -vespa_generate_config(storage_storageconfig stor-opslogger.def) -install_config_definition(stor-opslogger.def vespa.config.content.core.stor-opslogger.def) -vespa_generate_config(storage_storageconfig stor-visitordispatcher.def) -install_config_definition(stor-visitordispatcher.def vespa.config.content.core.stor-visitordispatcher.def) -vespa_generate_config(storage_storageconfig stor-integritychecker.def) -install_config_definition(stor-integritychecker.def vespa.config.content.core.stor-integritychecker.def) -vespa_generate_config(storage_storageconfig stor-bucketmover.def) -install_config_definition(stor-bucketmover.def vespa.config.content.core.stor-bucketmover.def) -vespa_generate_config(storage_storageconfig stor-bouncer.def) -install_config_definition(stor-bouncer.def vespa.config.content.core.stor-bouncer.def) -vespa_generate_config(storage_storageconfig stor-prioritymapping.def) -install_config_definition(stor-prioritymapping.def vespa.config.content.core.stor-prioritymapping.def) -vespa_generate_config(storage_storageconfig rpc-provider.def) -install_config_definition(rpc-provider.def vespa.config.content.core.rpc-provider.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-communicationmanager.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-distributormanager.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-server.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-status.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-messageforwarder.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-opslogger.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-visitordispatcher.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-integritychecker.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-bucketmover.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-bouncer.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.stor-prioritymapping.def) +vespa_generate_config(storage_storageconfig vespa.config.content.core.rpc-provider.def) + +install_config_definitions(src/vespa/storage/config) diff --git a/storage/src/vespa/storage/config/rpc-provider.def b/storage/src/vespa/storage/config/rpc-provider.def deleted file mode 100644 index f54b0e00fe4..00000000000 --- a/storage/src/vespa/storage/config/rpc-provider.def +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -connectspec string default="tcp/localhost:17777" restart diff --git a/storage/src/vespa/storage/config/stor-bouncer.def b/storage/src/vespa/storage/config/stor-bouncer.def deleted file mode 100644 index 6af5ee078e9..00000000000 --- a/storage/src/vespa/storage/config/stor-bouncer.def +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -## Whether or not the bouncer should stop external load from -## entering node when the cluster state is down. -stop_external_load_when_cluster_down bool default=true - -## Sets what node states the node will allow incoming commands -## in. -stop_all_load_when_nodestate_not_in string default="uri" - -## Sets whether to just use (self) reported node state or to use wanted state -## if wanted state is worse than the current reported state. -use_wanted_state_if_possible bool default=true - -## The maximum clock skew allowed in the system. Any messages received -## that have a timestamp longer in the future than this will be failed. -max_clock_skew_seconds int default=5 - -## If this config value is != -1, the node will reject any external feed -## operations with a priority lower than that specified here. Note that since -## we map priorities in such a way that 0 is the _highest_ priority and 255 the -## _lowest_ priority, for two operations A and B, if B has a lower priority -## than A it will have a higher priority _integer_ value. -## -## Only mutating external feed operations will be blocked. Read-only operations -## and internal operations are always let through. -## -## Default is -1 (i.e. rejection is disabled and load is allowed through) -feed_rejection_priority_threshold int default=-1 diff --git a/storage/src/vespa/storage/config/stor-bucketmover.def b/storage/src/vespa/storage/config/stor-bucketmover.def deleted file mode 100644 index 1fc200f83ca..00000000000 --- a/storage/src/vespa/storage/config/stor-bucketmover.def +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -## Minimum time between bucket database iterations in the bucket mover. The -## minumum time is used when disks starts to get pretty full and we have plenty -## stuff we can move. -## restart flag was added automatically and needs to be verified. -minimum_recheck_interval_in_seconds int default=60 restart - -## Maximum time between bucket database iterations in the bucket mover. The -## maximum time is used when disks have plenty free space, so moving data is -## not critical. -## restart flag was added automatically and needs to be verified. -maximum_recheck_interval_in_seconds int default=3600 restart - -## Number of buckets to cache at a time when reading the bucket database -## restart flag was added automatically and needs to be verified. -bucket_iteration_chunk int default=1000 restart - -## Maximum fill rate above average fill rate for a target disk to be eligible -## as a target for a bucket move operation. -## restart flag was added automatically and needs to be verified. -max_target_fill_rate_above_average double default=0.01 restart - -## Number of bucket mover runs to keep in history vector -## restart flag was added automatically and needs to be verified. -max_history_size int default=10 restart - -## Max concurrent pending bucket move operations scheduled in total. -## restart flag was added automatically and needs to be verified. -max_pending int default=5 restart - -## Operation delay. If set, the bucket mover will wait for this amount of -## milliseconds between each operation. Useful in testing to make move run go -## slow enough to view without that much data. -## restart flag was added automatically and needs to be verified. -operation_delay int default=0 restart diff --git a/storage/src/vespa/storage/config/stor-communicationmanager.def b/storage/src/vespa/storage/config/stor-communicationmanager.def deleted file mode 100644 index 3e4b1fd6515..00000000000 --- a/storage/src/vespa/storage/config/stor-communicationmanager.def +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -mbusport int default=-1 restart - -rpcport int default=6000 restart - -# Limits for max pending concurrent number of operations towards a node before -# MessageBus starts busy-bouncing messages. Distributor and content nodes are -# treated separately. 0 means no limit. -mbus_distributor_node_max_pending_count int default=5000 -mbus_content_node_max_pending_count int default=0 - -# Limits for max total amount of memory (in bytes) used by operations towards -# a node before MessageBus starts busy-bouncing messages. Distributor and -# content nodes are treated separately. 0 means no limit. -mbus_distributor_node_max_pending_size int default=0 -mbus_content_node_max_pending_size int default=0 - -# Minimum size of packets to compress (0 means no compression) -mbus.compress.limit int default=1024 - -## Compression level for packets -mbus.compress.level int default=3 - -## Compression type for packets. -mbus.compress.type enum {NONE, LZ4, ZSTD} default=LZ4 - -## TTL for rpc target cache -mbus.rpctargetcache.ttl double default = 600 - -## Number of threads for mbus threadpool -## Any value below 1 will be 1. -mbus.num_threads int default=4 - -mbus.optimize_for enum {LATENCY, THROUGHPUT, ADAPTIVE} default = LATENCY - -## Enable to use above thread pool for encoding replies -## False will use network(fnet) thread -mbus.dispatch_on_encode bool default=true - -## Enable to use above thread pool for decoding replies -## False will use network(fnet) thread -## Todo: Change default once verified in large scale deployment. -mbus.dispatch_on_decode bool default=false - -## Skip messenger thread on reply -## Experimental -mbus.skip_reply_thread bool default=false - -## Skip messenger thread on reply -## Experimental -mbus.skip_request_thread bool default=false - -## Skip communication manager thread on mbus requests -## Experimental -skip_thread bool default=false diff --git a/storage/src/vespa/storage/config/stor-distributormanager.def b/storage/src/vespa/storage/config/stor-distributormanager.def deleted file mode 100644 index db2bfb61376..00000000000 --- a/storage/src/vespa/storage/config/stor-distributormanager.def +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -## Maximum number of ideal state operations scheduled by a distributor. -maxpendingidealstateoperations int default=100 - -## The total size of unique documents in a bucket before we split it due to -## being too big. By default this is now 32 MB. -splitsize int default=33544432 - -## The maximum amount of entries in a file before we should attempt to split it. -## A meta data entry in a slotfile currently uses 40 bytes. It is probably -## good to have the split size, such that all meta data entries are normally -## read when you do the initial read. With the default of 1024, meta data will -## take up around 40 kB, and the default initial read is 64 kB, allowing the -## file to grow a bit above max and still all be read in initial read. -splitcount int default=1024 - -## The maximum size of unique documents that allows the system to reduce -## the number of split bits on the bucket, or join two buckets together. -## The size must be lower than this number, and the count must be lower than -## joincount. -joinsize int default=16000000 - -## The maximum number of unique documents that allows for joining (see -## joinsize). -joincount int default=512 - -## Minimum level of splitting for buckets -minsplitcount int default=16 - -## If non-empty, continuously delete all the documents matching this selection. -garbagecollection.selectiontoremove string default="" - -## The interval with which each bucket is purged using the selection above. -## If 0, no garbage collection is done. -garbagecollection.interval int default=0 - -## If false, dont do splits inline with feeding. -inlinebucketsplitting bool default=true - -## List of state checkers (ideal state generators) that should be ignored in the cluster. -## One or more of the following (case insensitive): -## -## SynchronizeAndMove -## DeleteExtraCopies -## JoinBuckets -## SplitBucket -## SplitInconsistentBuckets -## SetBucketState -## GarbageCollection -blockedstatecheckers[] string restart - -## Whether or not distributor should issue reverts when operations partially -## fail. -enable_revert bool default=true - -## Maximum nodes involved in a merge operation. Currently, this can not be more -## than 16 nodes due to protocol limitations. However, decreasing the max may -## be useful if 16 node merges ends up too expensive. -maximum_nodes_per_merge int default=16 - -## For internal in process debugging, it may be useful to not start the -## distributor thread to be able to call tick() manually and run single threaded -start_distributor_thread bool default=true restart - -## The number of ticks calls done before a wait is done. This can be -## set higher than 10 for the distributor to improve speed of bucket iterations -## while still keep CPU load low/moderate. -ticks_before_wait int default=10 - -## The sleep time between ticks if there are no more queued tasks. -ticks_wait_time_ms int default=1 - -## Max processing time used by deadlock detector. -max_process_time_ms int default=5000 - -## Allow overriding default priorities of certain maintenance operations. -## This is an advanced feature, do not touch this unless you have a very good -## reason to do so! Configuring these values wrongly may cause starvation of -## important operations, leading to unpredictable behavior and/or data loss. -## -## Merge used to move data to ideal location -priority_merge_move_to_ideal_node int default=165 - -## Merge for copies that have gotten out of sync with each other -priority_merge_out_of_sync_copies int default=120 - -## Merge for restoring redundancy of copies -priority_merge_too_few_copies int default=120 - -## Copy activation when there are no other active copies (likely causing -## lack of search coverage for that bucket) -priority_activate_no_existing_active int default=100 - -## Copy activation when there is already an active copy for the bucket. -priority_activate_with_existing_active int default=100 - -## Deletion of bucket copy. Cheap on VDS, not necessarily so on indexed search. -priority_delete_bucket_copy int default=100 - -## Joining caused by bucket siblings getting sufficiently small to fit into a -## single bucket. -priority_join_buckets int default=155 - -## Splitting caused by system increasing its minimum distribution bit count. -priority_split_distribution_bits int default=200 - -## Splitting due to bucket exceeding max document count or byte size (see -## splitcount and splitsize config values) -priority_split_large_bucket int default=175 - -## Splitting due to buckets being inconsistently split. Should be higher -## priority than the vast majority of external load. -priority_split_inconsistent_bucket int default=110 - -## Background garbage collection. Should be lower priority than external load -## and other ideal state operations (aside from perhaps minimum bit splitting). -priority_garbage_collection int default=200 - -## The distributor can send joins that "lift" a bucket without any siblings -## higher up in the bucket tree hierarchy. The assumption is that if this -## is done for all sibling-less buckets, they will all eventually reach a -## level in the tree where they do in fact have a sibling and may (if their -## sizes allow) be joined into a single bucket. -enable_join_for_sibling_less_buckets bool default=false - -## There exists a distribution edge case where bucket siblings end up having -## non-equal ideal locations. This will normally inhibit join operations, as -## these are only allowed when all nodes have all source buckets involved in -## the join operation. Setting this property to true means such buckets may -## still be joined at the cost of transient inconsistencies for the buckets -## being joined into. -enable_inconsistent_join bool default=false - -## The distributor host info reporter may be disabled entirely, in which case -## no per-node statistics for merges, latencies or bucket replication factors -## will be reported back to the cluster controller. Disabling this may make -## sense in large clusters that do not make use of these reports directly or -## indirectly, as it causes potentially significant processing overhead on the -## cluster controller. -## This host reporter must never be disabled on a Hosted Vespa system, or -## automatic upgrades will stall. -enable_host_info_reporting bool default=true - -## For each available node, the distributor will report back to the cluster -## controller a value which indicates the minimum replication factor for any -## bucket contained on said node. This config exposes a way to alter how this -## replication factor is computed. -## -## Valid enum values and their semantics: -## -## TRUSTED - only trusted replicas are counted. -## ANY - any replica present is counted. This may return an overly optimistic -## view of the system. E.g. if there are 3 replicas, 1 having 1000 docs -## and 2 having 1 doc, all being out of sync, counting with ANY will still -## treat this as a minimum replication factor of 3. Conversely, with -## TRUSTED such a bucket would most likely have a factor of 0 (or 1 iff -## the trusted status for the replica with 1000 docs is known). -minimum_replica_counting_mode enum { TRUSTED, ANY } default=TRUSTED - -## Bucket activation only makes sense for indexed search clusters, but Proton -## may also be run in store-only or streaming mode, in which case it does not -## actually require any activations. If the model infers that Proton is running -## in such a mode, activation will be explicitly disabled. -## -## Activation is always disabled entirely for clusters using VDS as their -## engine, regardless of the value of this setting. -disable_bucket_activation bool default=false - - -## Maximum clock skew across nodes in the cluster, in whole seconds. -## Used to prevent timestamp collisions during distributor bucket ownership -## transfers. -## Zero means this mechanism is disabled. -max_cluster_clock_skew_sec int default=1 - -## If set, a distributor will only allow one active operation per document ID -## for puts, updates and removes. This helps prevent issues caused by concurrent -## modifications to documents when sent from multiple feed clients. -sequence_mutating_operations bool default=true - -## Number of seconds that scheduling of new merge operations should be inhibited -## towards a node if it has indicated that its merge queues are full or it is -## suffering from resource exhaustion. -inhibit_merge_sending_on_busy_node_duration_sec int default=10 - -## If set, enables potentially stale reads during cluster state transitions where -## buckets change ownership. This also implicitly enables support for two-phase -## cluster state transitions on the distributor. -## For this option to take effect, the cluster controller must also have two-phase -## states enabled. -allow_stale_reads_during_cluster_state_transitions bool default=false - -## If greater than zero, injects a thread sleep into certain parts of the bucket -## processing logic. This allows for easier testing of racing edge cases where the -## main distributor thread is CPU-blocked processing large amounts of buckets, but -## without actually needing to use a lot of buckets in the test itself. -## Setting any of these values only makes sense for testing! -simulated_db_pruning_latency_msec int default=0 -simulated_db_merging_latency_msec int default=0 - -## Whether to use a B-tree data structure for the distributor bucket database instead -## of the legacy database. Setting this option may trigger alternate code paths for -## read only operations, as the B-tree database is thread safe for concurrent reads. -use_btree_database bool default=true restart - -## If a bucket is inconsistent and an Update operation is received, a two-phase -## write-repair path is triggered in which a Get is sent to all diverging replicas. -## Once received, the update is applied on the distributor and pushed out to the -## content nodes as Puts. -## Iff this config is set to true AND all Gets return the same timestamp from all -## content nodes, the two-phase update path reverts back to the regular fast path. -## Since all replicas of the document were in sync, applying the update in-place -## shall be considered safe. -restart_with_fast_update_path_if_all_get_timestamps_are_consistent bool default=false - -## If set, no merge operations may be generated for any reason by a distributor. -## This is ONLY intended for system testing of certain transient edge cases and -## MUST NOT be set to true in a production environment. -merge_operations_disabled bool default=false - -## If set, Get operations that are initiated by the client (i.e. _not_ Get operations -## that are initiated by the distributor) will be forwarded to the backend with -## a flag signalling that weak read consistency may be used. This allows the -## backend to minimize internal locking. The downside is that it's not guaranteed -## to observe the most recent writes to the document, nor to observe an atomically -## consistent view of fields across document versions. -## This is mostly useful in a system that is effectively read-only. -use_weak_internal_read_consistency_for_client_gets bool default=false - -## If true, adds an initial metadata-only fetch phase to updates that touch buckets -## with inconsistent replicas. Metadata timestamps are compared and a single full Get -## is sent _only_ to one node with the highest timestamp. Without a metadata phase, -## full gets would be sent to _all_ nodes. -## Setting this option to true always implicitly enables the fast update restart -## feature, so it's not required to set that config to true, nor will setting it -## to false actually disable the feature. -enable_metadata_only_fetch_phase_for_inconsistent_updates bool default=false - -## If a distributor main thread tick is constantly processing requests or responses -## originating from other nodes, setting this value above zero will prevent implicit -## maintenance scans from being done as part of the tick for up to N rounds of ticking. -## This is to reduce the amount of CPU spent on ideal state calculations and bucket DB -## accesses when the distributor is heavily loaded with feed operations. -max_consecutively_inhibited_maintenance_ticks int default=20 diff --git a/storage/src/vespa/storage/config/stor-integritychecker.def b/storage/src/vespa/storage/config/stor-integritychecker.def deleted file mode 100644 index 657537ac015..00000000000 --- a/storage/src/vespa/storage/config/stor-integritychecker.def +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -## Minutes after midnight when integrity checker is allowed to start running. -## 0 means it will start/continue run at midnight. -dailycyclestart int default=0 - -## Minutes after midnight when integrity checker is not allowed to run anymore. -## If this equals dailycyclestart it is allowed to run all day. dailycyclestop -## is allowed to be less than dailycyclestart. -dailycyclestop int default=0 - -## Status of what is allowed done on what weekdays. Should be a string with -## seven characters, where the first represent sunday, the seventh saturday. -## The possible options are RrCc- which means: -## R - If state becomes R, and current cycle does not verify file content, -## abort current cycle, otherwise continue it. Start new cycle verifying -## all content of all files. -## r - Continue current cycle. Start new cycle using cheap partial file -## verification. -## c - Continue current cycle. Dont start a new cycle. -weeklycycle string default="Rrrrrrr" - -## Max concurrent pending bucket verifications. For max speed, each disk thread -## should have one to work with all the time. Default is 1, to ensure little -## resources are consumed by this process by default. Once request priority -## has been introduced, this default may become higher. -maxpending int default=2 - -## Minimum time since last cycle before starting a new one in minutes. -## Defaults to 24 hours. -mincycletime int default=1440 - -## Minimum time in seconds between each request. To throttle the system even -## slower if continuous one pending puts on more load on the system than you -## want. Works with multiple pending messages, though it doesnt make much sense -## unless maxpending equals 1. -requestdelay int default=0 diff --git a/storage/src/vespa/storage/config/stor-messageforwarder.def b/storage/src/vespa/storage/config/stor-messageforwarder.def deleted file mode 100644 index 4a3f481e659..00000000000 --- a/storage/src/vespa/storage/config/stor-messageforwarder.def +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -route string default="" restart diff --git a/storage/src/vespa/storage/config/stor-opslogger.def b/storage/src/vespa/storage/config/stor-opslogger.def deleted file mode 100644 index 3ba2c621427..00000000000 --- a/storage/src/vespa/storage/config/stor-opslogger.def +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -targetfile string default="" restart diff --git a/storage/src/vespa/storage/config/stor-prioritymapping.def b/storage/src/vespa/storage/config/stor-prioritymapping.def deleted file mode 100644 index decf4c68ee2..00000000000 --- a/storage/src/vespa/storage/config/stor-prioritymapping.def +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -highest int default=50 -very_high int default=60 -high_1 int default=70 -high_2 int default=80 -high_3 int default=90 -normal_1 int default=100 -normal_2 int default=110 -normal_3 int default=120 -normal_4 int default=130 -normal_5 int default=140 -normal_6 int default=150 -low_1 int default=160 -low_2 int default=170 -low_3 int default=180 -very_low int default=190 -lowest int default=200 - diff --git a/storage/src/vespa/storage/config/stor-server.def b/storage/src/vespa/storage/config/stor-server.def deleted file mode 100644 index e1446aa8ed1..00000000000 --- a/storage/src/vespa/storage/config/stor-server.def +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -## Root directory for all files related to this storage node. -## Will typically be "$VESPA_HOME/var/db/vespa/vds/// -root_folder string restart - -## VDS cluster -cluster_name string default="storage" restart - -## The index of this node. Each node of the same type in the same cluster need -## to have unique indexes. This should not be changed, as this is what we use -## to identify the node, and to decide what data should be on it. -node_index int default=0 restart - -## Set whether this is a distributor or a storage node. This will decide what -## storage links are set up. -is_distributor bool restart - -## Capacity of the node. How much data and load this node will get relative to -## other nodes. -node_capacity double default=1.0 restart - -## Capacity of the disks on this node. How much data and load will each disk -## get relative to the other disks on this node. -disk_capacity[] double restart - -## Reliability of this node. How much of the cluster redundancy factor can this -## node make up for. -node_reliability int default=1 restart - -## The upper bound of merges that any storage node can have active. -## A merge operation will be chained through all nodes involved in the -## merge, only actually starting the operation when every node has -## allowed it to pass through. -max_merges_per_node int default=16 -max_merge_queue_size int default=1024 - -## If the persistence provider indicates that it has exhausted one or more -## of its internal resources during a mutating operation, new merges will -## be bounced for this duration. Not allowing further merges helps take -## load off the node while it e.g. compacts its data stores or memory in -## the background. -## Note: this does not affect merges where the current node is marked as -## "source only", as merges do not cause mutations on such nodes. -resource_exhaustion_merge_back_pressure_duration_secs double default=30.0 - -## Whether the deadlock detector should be enabled or not. If disabled, it will -## still run, but it will never actually abort the process it is running in. -enable_dead_lock_detector bool default=false restart - -## Whether to enable deadlock detector warnings in log or not. If enabled, -## warnings will be written even if dead lock detecting is not enabled. -enable_dead_lock_detector_warnings bool default=true restart - -## Each thread registers how often it will at minimum register ticks (given that -## the system is not overloaded. If you are running Vespa on overloaded nodes, -## you can use this slack timeout to add to the thread timeouts in order to -## allow for more slack before dead lock detector kicks in. The value is in seconds. -dead_lock_detector_timeout_slack double default=240 restart - -## If set to 0, storage will attempt to auto-detect the number of VDS mount -## points to use. If set to a number, force this number. This number only makes -## sense on a storage node of course -disk_count int default=0 restart - -## Configure persistence provider. Temporary here to test. -persistence_provider.type enum {STORAGE, DUMMY, RPC } default=STORAGE restart -persistence_provider.rpc.connectspec string default="tcp/localhost:27777" restart - -## Whether or not to use the new metadata flow implementation. Default to not -## as it is currently in development and not even functional -switch_new_meta_data_flow bool default=false restart - -## When the content layer receives a set of changed buckets from the persistence -## layer, it must recheck all of these. Each such recheck results in an -## operation scheduled against the persistence queust and since the total -## number of buckets to recheck may reach hundreds of thousands in a large -## system, we send these in chunks to avoid saturating the queues with -## operations. -bucket_rechecking_chunk_size int default=100 - -## If greater than zero, simulates added latency caused by CPU processing during -## full bucket info requests. The latency is added per batch of operations processed. -## Only useful for testing! -simulated_bucket_request_latency_msec int default=0 - -## If set, content node processes will use a B-tree backed bucket database implementation -## instead of the legacy Judy-based implementation. -use_content_node_btree_bucket_db bool default=false restart diff --git a/storage/src/vespa/storage/config/stor-status.def b/storage/src/vespa/storage/config/stor-status.def deleted file mode 100644 index 640a03299c2..00000000000 --- a/storage/src/vespa/storage/config/stor-status.def +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -httpport int default=0 restart diff --git a/storage/src/vespa/storage/config/stor-visitordispatcher.def b/storage/src/vespa/storage/config/stor-visitordispatcher.def deleted file mode 100644 index 2e418c97989..00000000000 --- a/storage/src/vespa/storage/config/stor-visitordispatcher.def +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -# For any given client visitor operation, this specifies a maximum fan-out -# factor for concurrent content node visitor operations towards a particular -# content node. -# Having several visitor operations running concurrently increases the -# potential data processing parallelism on the content nodes at the expense -# of using additional resources. -maxvisitorspernodeperclientvisitor int default=16 - -# Minimum number of buckets that have to be present on a given content node -# before more than one visitor operation may be sent in parallel towards it. -# This config is directly related to maxvisitorspernodeperclientvisitor. -# Example: with max visitors of 4, min buckets of 5 and total of 40 buckets on -# a content node, a total of 4 visitors of 10 buckets each will be sent to the -# node. If min buckets were 20, only 2 visitors of 20 buckets each would be -# sent. -minbucketspervisitor int default=1 diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.rpc-provider.def b/storage/src/vespa/storage/config/vespa.config.content.core.rpc-provider.def new file mode 100644 index 00000000000..f54b0e00fe4 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.rpc-provider.def @@ -0,0 +1,4 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +connectspec string default="tcp/localhost:17777" restart diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-bouncer.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-bouncer.def new file mode 100644 index 00000000000..6af5ee078e9 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-bouncer.def @@ -0,0 +1,30 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## Whether or not the bouncer should stop external load from +## entering node when the cluster state is down. +stop_external_load_when_cluster_down bool default=true + +## Sets what node states the node will allow incoming commands +## in. +stop_all_load_when_nodestate_not_in string default="uri" + +## Sets whether to just use (self) reported node state or to use wanted state +## if wanted state is worse than the current reported state. +use_wanted_state_if_possible bool default=true + +## The maximum clock skew allowed in the system. Any messages received +## that have a timestamp longer in the future than this will be failed. +max_clock_skew_seconds int default=5 + +## If this config value is != -1, the node will reject any external feed +## operations with a priority lower than that specified here. Note that since +## we map priorities in such a way that 0 is the _highest_ priority and 255 the +## _lowest_ priority, for two operations A and B, if B has a lower priority +## than A it will have a higher priority _integer_ value. +## +## Only mutating external feed operations will be blocked. Read-only operations +## and internal operations are always let through. +## +## Default is -1 (i.e. rejection is disabled and load is allowed through) +feed_rejection_priority_threshold int default=-1 diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-bucketmover.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-bucketmover.def new file mode 100644 index 00000000000..1fc200f83ca --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-bucketmover.def @@ -0,0 +1,37 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## Minimum time between bucket database iterations in the bucket mover. The +## minumum time is used when disks starts to get pretty full and we have plenty +## stuff we can move. +## restart flag was added automatically and needs to be verified. +minimum_recheck_interval_in_seconds int default=60 restart + +## Maximum time between bucket database iterations in the bucket mover. The +## maximum time is used when disks have plenty free space, so moving data is +## not critical. +## restart flag was added automatically and needs to be verified. +maximum_recheck_interval_in_seconds int default=3600 restart + +## Number of buckets to cache at a time when reading the bucket database +## restart flag was added automatically and needs to be verified. +bucket_iteration_chunk int default=1000 restart + +## Maximum fill rate above average fill rate for a target disk to be eligible +## as a target for a bucket move operation. +## restart flag was added automatically and needs to be verified. +max_target_fill_rate_above_average double default=0.01 restart + +## Number of bucket mover runs to keep in history vector +## restart flag was added automatically and needs to be verified. +max_history_size int default=10 restart + +## Max concurrent pending bucket move operations scheduled in total. +## restart flag was added automatically and needs to be verified. +max_pending int default=5 restart + +## Operation delay. If set, the bucket mover will wait for this amount of +## milliseconds between each operation. Useful in testing to make move run go +## slow enough to view without that much data. +## restart flag was added automatically and needs to be verified. +operation_delay int default=0 restart diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-communicationmanager.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-communicationmanager.def new file mode 100644 index 00000000000..3e4b1fd6515 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-communicationmanager.def @@ -0,0 +1,57 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +mbusport int default=-1 restart + +rpcport int default=6000 restart + +# Limits for max pending concurrent number of operations towards a node before +# MessageBus starts busy-bouncing messages. Distributor and content nodes are +# treated separately. 0 means no limit. +mbus_distributor_node_max_pending_count int default=5000 +mbus_content_node_max_pending_count int default=0 + +# Limits for max total amount of memory (in bytes) used by operations towards +# a node before MessageBus starts busy-bouncing messages. Distributor and +# content nodes are treated separately. 0 means no limit. +mbus_distributor_node_max_pending_size int default=0 +mbus_content_node_max_pending_size int default=0 + +# Minimum size of packets to compress (0 means no compression) +mbus.compress.limit int default=1024 + +## Compression level for packets +mbus.compress.level int default=3 + +## Compression type for packets. +mbus.compress.type enum {NONE, LZ4, ZSTD} default=LZ4 + +## TTL for rpc target cache +mbus.rpctargetcache.ttl double default = 600 + +## Number of threads for mbus threadpool +## Any value below 1 will be 1. +mbus.num_threads int default=4 + +mbus.optimize_for enum {LATENCY, THROUGHPUT, ADAPTIVE} default = LATENCY + +## Enable to use above thread pool for encoding replies +## False will use network(fnet) thread +mbus.dispatch_on_encode bool default=true + +## Enable to use above thread pool for decoding replies +## False will use network(fnet) thread +## Todo: Change default once verified in large scale deployment. +mbus.dispatch_on_decode bool default=false + +## Skip messenger thread on reply +## Experimental +mbus.skip_reply_thread bool default=false + +## Skip messenger thread on reply +## Experimental +mbus.skip_request_thread bool default=false + +## Skip communication manager thread on mbus requests +## Experimental +skip_thread bool default=false diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-distributormanager.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-distributormanager.def new file mode 100644 index 00000000000..db2bfb61376 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-distributormanager.def @@ -0,0 +1,246 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## Maximum number of ideal state operations scheduled by a distributor. +maxpendingidealstateoperations int default=100 + +## The total size of unique documents in a bucket before we split it due to +## being too big. By default this is now 32 MB. +splitsize int default=33544432 + +## The maximum amount of entries in a file before we should attempt to split it. +## A meta data entry in a slotfile currently uses 40 bytes. It is probably +## good to have the split size, such that all meta data entries are normally +## read when you do the initial read. With the default of 1024, meta data will +## take up around 40 kB, and the default initial read is 64 kB, allowing the +## file to grow a bit above max and still all be read in initial read. +splitcount int default=1024 + +## The maximum size of unique documents that allows the system to reduce +## the number of split bits on the bucket, or join two buckets together. +## The size must be lower than this number, and the count must be lower than +## joincount. +joinsize int default=16000000 + +## The maximum number of unique documents that allows for joining (see +## joinsize). +joincount int default=512 + +## Minimum level of splitting for buckets +minsplitcount int default=16 + +## If non-empty, continuously delete all the documents matching this selection. +garbagecollection.selectiontoremove string default="" + +## The interval with which each bucket is purged using the selection above. +## If 0, no garbage collection is done. +garbagecollection.interval int default=0 + +## If false, dont do splits inline with feeding. +inlinebucketsplitting bool default=true + +## List of state checkers (ideal state generators) that should be ignored in the cluster. +## One or more of the following (case insensitive): +## +## SynchronizeAndMove +## DeleteExtraCopies +## JoinBuckets +## SplitBucket +## SplitInconsistentBuckets +## SetBucketState +## GarbageCollection +blockedstatecheckers[] string restart + +## Whether or not distributor should issue reverts when operations partially +## fail. +enable_revert bool default=true + +## Maximum nodes involved in a merge operation. Currently, this can not be more +## than 16 nodes due to protocol limitations. However, decreasing the max may +## be useful if 16 node merges ends up too expensive. +maximum_nodes_per_merge int default=16 + +## For internal in process debugging, it may be useful to not start the +## distributor thread to be able to call tick() manually and run single threaded +start_distributor_thread bool default=true restart + +## The number of ticks calls done before a wait is done. This can be +## set higher than 10 for the distributor to improve speed of bucket iterations +## while still keep CPU load low/moderate. +ticks_before_wait int default=10 + +## The sleep time between ticks if there are no more queued tasks. +ticks_wait_time_ms int default=1 + +## Max processing time used by deadlock detector. +max_process_time_ms int default=5000 + +## Allow overriding default priorities of certain maintenance operations. +## This is an advanced feature, do not touch this unless you have a very good +## reason to do so! Configuring these values wrongly may cause starvation of +## important operations, leading to unpredictable behavior and/or data loss. +## +## Merge used to move data to ideal location +priority_merge_move_to_ideal_node int default=165 + +## Merge for copies that have gotten out of sync with each other +priority_merge_out_of_sync_copies int default=120 + +## Merge for restoring redundancy of copies +priority_merge_too_few_copies int default=120 + +## Copy activation when there are no other active copies (likely causing +## lack of search coverage for that bucket) +priority_activate_no_existing_active int default=100 + +## Copy activation when there is already an active copy for the bucket. +priority_activate_with_existing_active int default=100 + +## Deletion of bucket copy. Cheap on VDS, not necessarily so on indexed search. +priority_delete_bucket_copy int default=100 + +## Joining caused by bucket siblings getting sufficiently small to fit into a +## single bucket. +priority_join_buckets int default=155 + +## Splitting caused by system increasing its minimum distribution bit count. +priority_split_distribution_bits int default=200 + +## Splitting due to bucket exceeding max document count or byte size (see +## splitcount and splitsize config values) +priority_split_large_bucket int default=175 + +## Splitting due to buckets being inconsistently split. Should be higher +## priority than the vast majority of external load. +priority_split_inconsistent_bucket int default=110 + +## Background garbage collection. Should be lower priority than external load +## and other ideal state operations (aside from perhaps minimum bit splitting). +priority_garbage_collection int default=200 + +## The distributor can send joins that "lift" a bucket without any siblings +## higher up in the bucket tree hierarchy. The assumption is that if this +## is done for all sibling-less buckets, they will all eventually reach a +## level in the tree where they do in fact have a sibling and may (if their +## sizes allow) be joined into a single bucket. +enable_join_for_sibling_less_buckets bool default=false + +## There exists a distribution edge case where bucket siblings end up having +## non-equal ideal locations. This will normally inhibit join operations, as +## these are only allowed when all nodes have all source buckets involved in +## the join operation. Setting this property to true means such buckets may +## still be joined at the cost of transient inconsistencies for the buckets +## being joined into. +enable_inconsistent_join bool default=false + +## The distributor host info reporter may be disabled entirely, in which case +## no per-node statistics for merges, latencies or bucket replication factors +## will be reported back to the cluster controller. Disabling this may make +## sense in large clusters that do not make use of these reports directly or +## indirectly, as it causes potentially significant processing overhead on the +## cluster controller. +## This host reporter must never be disabled on a Hosted Vespa system, or +## automatic upgrades will stall. +enable_host_info_reporting bool default=true + +## For each available node, the distributor will report back to the cluster +## controller a value which indicates the minimum replication factor for any +## bucket contained on said node. This config exposes a way to alter how this +## replication factor is computed. +## +## Valid enum values and their semantics: +## +## TRUSTED - only trusted replicas are counted. +## ANY - any replica present is counted. This may return an overly optimistic +## view of the system. E.g. if there are 3 replicas, 1 having 1000 docs +## and 2 having 1 doc, all being out of sync, counting with ANY will still +## treat this as a minimum replication factor of 3. Conversely, with +## TRUSTED such a bucket would most likely have a factor of 0 (or 1 iff +## the trusted status for the replica with 1000 docs is known). +minimum_replica_counting_mode enum { TRUSTED, ANY } default=TRUSTED + +## Bucket activation only makes sense for indexed search clusters, but Proton +## may also be run in store-only or streaming mode, in which case it does not +## actually require any activations. If the model infers that Proton is running +## in such a mode, activation will be explicitly disabled. +## +## Activation is always disabled entirely for clusters using VDS as their +## engine, regardless of the value of this setting. +disable_bucket_activation bool default=false + + +## Maximum clock skew across nodes in the cluster, in whole seconds. +## Used to prevent timestamp collisions during distributor bucket ownership +## transfers. +## Zero means this mechanism is disabled. +max_cluster_clock_skew_sec int default=1 + +## If set, a distributor will only allow one active operation per document ID +## for puts, updates and removes. This helps prevent issues caused by concurrent +## modifications to documents when sent from multiple feed clients. +sequence_mutating_operations bool default=true + +## Number of seconds that scheduling of new merge operations should be inhibited +## towards a node if it has indicated that its merge queues are full or it is +## suffering from resource exhaustion. +inhibit_merge_sending_on_busy_node_duration_sec int default=10 + +## If set, enables potentially stale reads during cluster state transitions where +## buckets change ownership. This also implicitly enables support for two-phase +## cluster state transitions on the distributor. +## For this option to take effect, the cluster controller must also have two-phase +## states enabled. +allow_stale_reads_during_cluster_state_transitions bool default=false + +## If greater than zero, injects a thread sleep into certain parts of the bucket +## processing logic. This allows for easier testing of racing edge cases where the +## main distributor thread is CPU-blocked processing large amounts of buckets, but +## without actually needing to use a lot of buckets in the test itself. +## Setting any of these values only makes sense for testing! +simulated_db_pruning_latency_msec int default=0 +simulated_db_merging_latency_msec int default=0 + +## Whether to use a B-tree data structure for the distributor bucket database instead +## of the legacy database. Setting this option may trigger alternate code paths for +## read only operations, as the B-tree database is thread safe for concurrent reads. +use_btree_database bool default=true restart + +## If a bucket is inconsistent and an Update operation is received, a two-phase +## write-repair path is triggered in which a Get is sent to all diverging replicas. +## Once received, the update is applied on the distributor and pushed out to the +## content nodes as Puts. +## Iff this config is set to true AND all Gets return the same timestamp from all +## content nodes, the two-phase update path reverts back to the regular fast path. +## Since all replicas of the document were in sync, applying the update in-place +## shall be considered safe. +restart_with_fast_update_path_if_all_get_timestamps_are_consistent bool default=false + +## If set, no merge operations may be generated for any reason by a distributor. +## This is ONLY intended for system testing of certain transient edge cases and +## MUST NOT be set to true in a production environment. +merge_operations_disabled bool default=false + +## If set, Get operations that are initiated by the client (i.e. _not_ Get operations +## that are initiated by the distributor) will be forwarded to the backend with +## a flag signalling that weak read consistency may be used. This allows the +## backend to minimize internal locking. The downside is that it's not guaranteed +## to observe the most recent writes to the document, nor to observe an atomically +## consistent view of fields across document versions. +## This is mostly useful in a system that is effectively read-only. +use_weak_internal_read_consistency_for_client_gets bool default=false + +## If true, adds an initial metadata-only fetch phase to updates that touch buckets +## with inconsistent replicas. Metadata timestamps are compared and a single full Get +## is sent _only_ to one node with the highest timestamp. Without a metadata phase, +## full gets would be sent to _all_ nodes. +## Setting this option to true always implicitly enables the fast update restart +## feature, so it's not required to set that config to true, nor will setting it +## to false actually disable the feature. +enable_metadata_only_fetch_phase_for_inconsistent_updates bool default=false + +## If a distributor main thread tick is constantly processing requests or responses +## originating from other nodes, setting this value above zero will prevent implicit +## maintenance scans from being done as part of the tick for up to N rounds of ticking. +## This is to reduce the amount of CPU spent on ideal state calculations and bucket DB +## accesses when the distributor is heavily loaded with feed operations. +max_consecutively_inhibited_maintenance_ticks int default=20 diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-integritychecker.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-integritychecker.def new file mode 100644 index 00000000000..657537ac015 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-integritychecker.def @@ -0,0 +1,38 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## Minutes after midnight when integrity checker is allowed to start running. +## 0 means it will start/continue run at midnight. +dailycyclestart int default=0 + +## Minutes after midnight when integrity checker is not allowed to run anymore. +## If this equals dailycyclestart it is allowed to run all day. dailycyclestop +## is allowed to be less than dailycyclestart. +dailycyclestop int default=0 + +## Status of what is allowed done on what weekdays. Should be a string with +## seven characters, where the first represent sunday, the seventh saturday. +## The possible options are RrCc- which means: +## R - If state becomes R, and current cycle does not verify file content, +## abort current cycle, otherwise continue it. Start new cycle verifying +## all content of all files. +## r - Continue current cycle. Start new cycle using cheap partial file +## verification. +## c - Continue current cycle. Dont start a new cycle. +weeklycycle string default="Rrrrrrr" + +## Max concurrent pending bucket verifications. For max speed, each disk thread +## should have one to work with all the time. Default is 1, to ensure little +## resources are consumed by this process by default. Once request priority +## has been introduced, this default may become higher. +maxpending int default=2 + +## Minimum time since last cycle before starting a new one in minutes. +## Defaults to 24 hours. +mincycletime int default=1440 + +## Minimum time in seconds between each request. To throttle the system even +## slower if continuous one pending puts on more load on the system than you +## want. Works with multiple pending messages, though it doesnt make much sense +## unless maxpending equals 1. +requestdelay int default=0 diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-messageforwarder.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-messageforwarder.def new file mode 100644 index 00000000000..4a3f481e659 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-messageforwarder.def @@ -0,0 +1,4 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +route string default="" restart diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-opslogger.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-opslogger.def new file mode 100644 index 00000000000..3ba2c621427 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-opslogger.def @@ -0,0 +1,4 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +targetfile string default="" restart diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-prioritymapping.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-prioritymapping.def new file mode 100644 index 00000000000..decf4c68ee2 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-prioritymapping.def @@ -0,0 +1,20 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +highest int default=50 +very_high int default=60 +high_1 int default=70 +high_2 int default=80 +high_3 int default=90 +normal_1 int default=100 +normal_2 int default=110 +normal_3 int default=120 +normal_4 int default=130 +normal_5 int default=140 +normal_6 int default=150 +low_1 int default=160 +low_2 int default=170 +low_3 int default=180 +very_low int default=190 +lowest int default=200 + diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-server.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-server.def new file mode 100644 index 00000000000..e1446aa8ed1 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-server.def @@ -0,0 +1,90 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## Root directory for all files related to this storage node. +## Will typically be "$VESPA_HOME/var/db/vespa/vds/// +root_folder string restart + +## VDS cluster +cluster_name string default="storage" restart + +## The index of this node. Each node of the same type in the same cluster need +## to have unique indexes. This should not be changed, as this is what we use +## to identify the node, and to decide what data should be on it. +node_index int default=0 restart + +## Set whether this is a distributor or a storage node. This will decide what +## storage links are set up. +is_distributor bool restart + +## Capacity of the node. How much data and load this node will get relative to +## other nodes. +node_capacity double default=1.0 restart + +## Capacity of the disks on this node. How much data and load will each disk +## get relative to the other disks on this node. +disk_capacity[] double restart + +## Reliability of this node. How much of the cluster redundancy factor can this +## node make up for. +node_reliability int default=1 restart + +## The upper bound of merges that any storage node can have active. +## A merge operation will be chained through all nodes involved in the +## merge, only actually starting the operation when every node has +## allowed it to pass through. +max_merges_per_node int default=16 +max_merge_queue_size int default=1024 + +## If the persistence provider indicates that it has exhausted one or more +## of its internal resources during a mutating operation, new merges will +## be bounced for this duration. Not allowing further merges helps take +## load off the node while it e.g. compacts its data stores or memory in +## the background. +## Note: this does not affect merges where the current node is marked as +## "source only", as merges do not cause mutations on such nodes. +resource_exhaustion_merge_back_pressure_duration_secs double default=30.0 + +## Whether the deadlock detector should be enabled or not. If disabled, it will +## still run, but it will never actually abort the process it is running in. +enable_dead_lock_detector bool default=false restart + +## Whether to enable deadlock detector warnings in log or not. If enabled, +## warnings will be written even if dead lock detecting is not enabled. +enable_dead_lock_detector_warnings bool default=true restart + +## Each thread registers how often it will at minimum register ticks (given that +## the system is not overloaded. If you are running Vespa on overloaded nodes, +## you can use this slack timeout to add to the thread timeouts in order to +## allow for more slack before dead lock detector kicks in. The value is in seconds. +dead_lock_detector_timeout_slack double default=240 restart + +## If set to 0, storage will attempt to auto-detect the number of VDS mount +## points to use. If set to a number, force this number. This number only makes +## sense on a storage node of course +disk_count int default=0 restart + +## Configure persistence provider. Temporary here to test. +persistence_provider.type enum {STORAGE, DUMMY, RPC } default=STORAGE restart +persistence_provider.rpc.connectspec string default="tcp/localhost:27777" restart + +## Whether or not to use the new metadata flow implementation. Default to not +## as it is currently in development and not even functional +switch_new_meta_data_flow bool default=false restart + +## When the content layer receives a set of changed buckets from the persistence +## layer, it must recheck all of these. Each such recheck results in an +## operation scheduled against the persistence queust and since the total +## number of buckets to recheck may reach hundreds of thousands in a large +## system, we send these in chunks to avoid saturating the queues with +## operations. +bucket_rechecking_chunk_size int default=100 + +## If greater than zero, simulates added latency caused by CPU processing during +## full bucket info requests. The latency is added per batch of operations processed. +## Only useful for testing! +simulated_bucket_request_latency_msec int default=0 + +## If set, content node processes will use a B-tree backed bucket database implementation +## instead of the legacy Judy-based implementation. +use_content_node_btree_bucket_db bool default=false restart diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-status.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-status.def new file mode 100644 index 00000000000..640a03299c2 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-status.def @@ -0,0 +1,4 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +httpport int default=0 restart diff --git a/storage/src/vespa/storage/config/vespa.config.content.core.stor-visitordispatcher.def b/storage/src/vespa/storage/config/vespa.config.content.core.stor-visitordispatcher.def new file mode 100644 index 00000000000..2e418c97989 --- /dev/null +++ b/storage/src/vespa/storage/config/vespa.config.content.core.stor-visitordispatcher.def @@ -0,0 +1,19 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +# For any given client visitor operation, this specifies a maximum fan-out +# factor for concurrent content node visitor operations towards a particular +# content node. +# Having several visitor operations running concurrently increases the +# potential data processing parallelism on the content nodes at the expense +# of using additional resources. +maxvisitorspernodeperclientvisitor int default=16 + +# Minimum number of buckets that have to be present on a given content node +# before more than one visitor operation may be sent in parallel towards it. +# This config is directly related to maxvisitorspernodeperclientvisitor. +# Example: with max visitors of 4, min buckets of 5 and total of 40 buckets on +# a content node, a total of 4 visitors of 10 buckets each will be sent to the +# node. If min buckets were 20, only 2 visitors of 20 buckets each would be +# sent. +minbucketspervisitor int default=1 diff --git a/storage/src/vespa/storage/visiting/CMakeLists.txt b/storage/src/vespa/storage/visiting/CMakeLists.txt index 05097163479..c8a824877c5 100644 --- a/storage/src/vespa/storage/visiting/CMakeLists.txt +++ b/storage/src/vespa/storage/visiting/CMakeLists.txt @@ -13,5 +13,6 @@ vespa_add_library(storage_visitor OBJECT visitorthread.cpp DEPENDS ) -vespa_generate_config(storage_visitor stor-visitor.def) -install_config_definition(stor-visitor.def vespa.config.content.core.stor-visitor.def) +vespa_generate_config(storage_visitor vespa.config.content.core.stor-visitor.def) + +install_config_definitions(src/vespa/storage/visiting) diff --git a/storage/src/vespa/storage/visiting/stor-visitor.def b/storage/src/vespa/storage/visiting/stor-visitor.def deleted file mode 100644 index 72b3699fe2d..00000000000 --- a/storage/src/vespa/storage/visiting/stor-visitor.def +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.content.core - -## Number of separate threads that runs visitors. -visitorthreads int default=16 restart - -## Default timeout of visitors that loses contact with client (in seconds) -disconnectedvisitortimeout int default=0 restart - -## Time period (in seconds) in which to ignore requests to visitors that doesnt -## exist anymore. (Normal for visitors to get some messages right after -## aborting, logging them as faults instead after this timeout has passed.) -ignorenonexistingvisitortimelimit int default=300 restart - -## The number of buckets that are visited in parallel in a visitor visiting -## multiple buckets. Default is 8, meaning if you send a create visitor to visit -## 100 buckets, 8 of them will be visited in parallel. -defaultparalleliterators int default=8 - -## Default number of maximum client replies pending. -defaultpendingmessages int default=32 - -## Default size of docblocks used to transfer visitor data. -defaultdocblocksize int default=4190208 - -## Default docblock timeout in ms used to transfer visitor data. -## Currently defaults to a day. This is to avoid slow visitor target problems, -## getting data resent faster than it can process, and since there are very few -## reasons to actually time out -defaultdocblocktimeout int default=180000 - -## Default timeout of visitor info messages: Progress and error reports. -## If these time out, the visitor will be aborted on the storage node. -defaultinfotimeout int default=60000 - -## Max concurrent visitors (legacy) -maxconcurrentvisitors int default=64 - -## Priority-based max concurrent visitors. Fixed is the total number of -## concurrent visitors that can run for any priorities. Variable -## increases the concurrency limit for higher priorities, the limit -## being linear with a messages priority. Example: if Fixed is 16 -## and Variable is 64, maxconcurrent for a pri 255 message is 16 and -## maxconcurrent for a pri 0 message is 16+64=80. -## If fixed is left as 0, it will take the value of maxconcurrentvisitors, -## allowing backwards compatability -maxconcurrentvisitors_fixed int default=16 -maxconcurrentvisitors_variable int default=64 - -## Max size of visitor priority queue -maxvisitorqueuesize int default=1024 - -# Limit of memory used _per visitor_ in bytes. -# Due to optimistic parallelization, it is possible for this limit to be -# initially violated when the visitor is first started. This can happen since -# the visitor does not know the size of the bucket contents before fetching -# any data from it and it will do so based on parallelization factors specified -# in the CreateVisitor command. If 3 buckets are initially visited in parallel -# and these both contain a single 100 MiB document, the memory usage of the -# visitor will peak at 300 MiB even if the configured limit is e.g. 20 MiB. -# Default value is set to 20 MiB, which attempts to keep a reasonably safe -# level in the face of a default number of max concurrent visitors (64). -visitor_memory_usage_limit int default=25165824 diff --git a/storage/src/vespa/storage/visiting/vespa.config.content.core.stor-visitor.def b/storage/src/vespa/storage/visiting/vespa.config.content.core.stor-visitor.def new file mode 100644 index 00000000000..72b3699fe2d --- /dev/null +++ b/storage/src/vespa/storage/visiting/vespa.config.content.core.stor-visitor.def @@ -0,0 +1,63 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## Number of separate threads that runs visitors. +visitorthreads int default=16 restart + +## Default timeout of visitors that loses contact with client (in seconds) +disconnectedvisitortimeout int default=0 restart + +## Time period (in seconds) in which to ignore requests to visitors that doesnt +## exist anymore. (Normal for visitors to get some messages right after +## aborting, logging them as faults instead after this timeout has passed.) +ignorenonexistingvisitortimelimit int default=300 restart + +## The number of buckets that are visited in parallel in a visitor visiting +## multiple buckets. Default is 8, meaning if you send a create visitor to visit +## 100 buckets, 8 of them will be visited in parallel. +defaultparalleliterators int default=8 + +## Default number of maximum client replies pending. +defaultpendingmessages int default=32 + +## Default size of docblocks used to transfer visitor data. +defaultdocblocksize int default=4190208 + +## Default docblock timeout in ms used to transfer visitor data. +## Currently defaults to a day. This is to avoid slow visitor target problems, +## getting data resent faster than it can process, and since there are very few +## reasons to actually time out +defaultdocblocktimeout int default=180000 + +## Default timeout of visitor info messages: Progress and error reports. +## If these time out, the visitor will be aborted on the storage node. +defaultinfotimeout int default=60000 + +## Max concurrent visitors (legacy) +maxconcurrentvisitors int default=64 + +## Priority-based max concurrent visitors. Fixed is the total number of +## concurrent visitors that can run for any priorities. Variable +## increases the concurrency limit for higher priorities, the limit +## being linear with a messages priority. Example: if Fixed is 16 +## and Variable is 64, maxconcurrent for a pri 255 message is 16 and +## maxconcurrent for a pri 0 message is 16+64=80. +## If fixed is left as 0, it will take the value of maxconcurrentvisitors, +## allowing backwards compatability +maxconcurrentvisitors_fixed int default=16 +maxconcurrentvisitors_variable int default=64 + +## Max size of visitor priority queue +maxvisitorqueuesize int default=1024 + +# Limit of memory used _per visitor_ in bytes. +# Due to optimistic parallelization, it is possible for this limit to be +# initially violated when the visitor is first started. This can happen since +# the visitor does not know the size of the bucket contents before fetching +# any data from it and it will do so based on parallelization factors specified +# in the CreateVisitor command. If 3 buckets are initially visited in parallel +# and these both contain a single 100 MiB document, the memory usage of the +# visitor will peak at 300 MiB even if the configured limit is e.g. 20 MiB. +# Default value is set to 20 MiB, which attempts to keep a reasonably safe +# level in the face of a default number of max concurrent visitors (64). +visitor_memory_usage_limit int default=25165824 -- cgit v1.2.3