aboutsummaryrefslogtreecommitdiffstats
path: root/configdefinitions/src/vespa/stor-filestor.def
blob: 80994bfb477e74ce2ab449af54b1933e62cb975d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
namespace=vespa.config.content

## Use the new storage core
use_new_core bool default=false restart

## FILE LAYOUT PARAMETERS

## Number of directories per level to spread files across
## DEPRECATED: see stor-memfilepersistence config instead
dir_spread int default=256 restart

## Number of directory levels
## DEPRECATED: see stor-memfilepersistence config instead
dir_levels int default=1 restart

## FILE SIZE PARAMETERS

## Minimum number of meta data entries in one slotfile. When creating new
## files or resizing files, enforce it to contain at least this many meta
## entries. Set to 512 by default, using 20544 bytes total for metadata in
## new files.
## DEPRECATED: see stor-memfilepersistence config instead
minimum_file_meta_slots int default=512

## Maximum number of entries in one slotfile. File must be split before
## accepting more data
##
## Default ensure meta data is less than 512 kB.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_file_meta_slots int default=13106

## Minimum size of header block. At least this amount of header space will
## be available in new or resized files. For 512 documents, this will be
## 200 bytes per document.
## DEPRECATED: see stor-memfilepersistence config instead
minimum_file_header_block_size int default=102848

## Maximum header block size (in bytes). Since the whole meta data list and
## header block needs to be read into memory for some operations, a limit
## can be set for the header block, to avoid consuming too much memory.
##
## Default is set high, as we dont configure it automatically right now, so we
## would rather people configure it down than up.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_file_header_block_size int default=33554432

## Minimum size of a single slotfile. When creating or resizing files, they
## will never become smaller than this. Default of 1 MB, will be 1807 byte
## per doc if we use all 512 meta data entries set as default min meta
## entries.
## DEPRECATED: see stor-memfilepersistence config instead
minimum_file_size int default=1048576

## Maximum size of a single slotfile. File must be split before accepting
## more data. Will return file full errors.
##
## Default is set high, as we dont configure it automatically right now, so we
## would rather people configure it down than up.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_file_size int default=268431360

## When creating new files, always create files as a multiplum of this size.
## Should be set to the block size on the disk, or possibly a multiplum of
## that, though we know no advantage of setting it to more than the block
## size. If you want more free space in files, you should rather adjust that
## with growfactor or min file sizes.
## DEPRECATED: see stor-memfilepersistence config instead
file_block_size int default=4096

## DETECT FAILURE PARAMETERS

## If persistence operations take more than this amount of milliseconds, it
## will be logged as a warning.
warn_on_slow_operations int default=5000

## After seeing given number of errors on a disk, storage will disable the
## disk and restart. If set to 0, storage will never disable disks. Note
## that if you get disk errors, which arent automatically fixed, this will
## make storage keep failing all operations towards this disk, and all
## retries will use the same disk and same file, so you might end up with a
## lot of operations that can never succeed. Thus, it is not recommended to
## turn this off.
fail_disk_after_error_count int default=1 restart

## If a disk operation takes more time in seconds than the disk operation
## timeout, storage will consider that a disk failure, and mark the disk
## bad, restart and not use the disk anymore. Note that currently, this
## includes CPU time too, so if used, this parameter should be set fairly
## high as we still sometimes have some operations that might take a few
## seconds. Disabled by default as we currently have these long operations
## during normal load.
disk_operation_timeout int default=0 restart

## Timestamps provided for entries written are normally provided externally.
## To detect clock skew between nodes, storage will warn if timestamps look
## out of sync. Future time clearly indicates clock skew and thus the limit
## here is low. Time in the past might just indicate that the operation has
## been queued for a while, so the default value here is not very strict.
## Time is given in seconds. Note that there are valid usecases for
## inserting data with old timestamps, for instance when synchronizing two
## copies of the same data. Warnings should not be printed in these cases.
time_future_limit int default=5
time_past_limit int default=3600

## Enabling debug verifications will make storage do extra verifications
## to find errors as soon as possible. These extra verifications will use up
## a lot of resources though, and should not be needed in normal operations.
## They are mostly to be used during test phases or when debugging problems.
## The value itself is a bit mask, where you can enable different kinds of
## verifications by setting given bits.
debug_verifications int default=0 restart

## CONSISTENCY PARAMETERS

## If true, fsync after all disk operations, to ensure no dirty OS file
## cache afterwards. This is only useful if using cached IO, which is not
## recommended to start with.
## DEPRECATED: see stor-memfilepersistence config instead
fsync_after_each_operation bool default=false restart

## Time period to keep all updates (given in seconds). One can revert any
## operation done within this time.
## DEPRECATED: see stor-memfilepersistence config instead
revert_time_period int default=300

## If a remove is older than the reverttimeperiod, the document it is
## removing may be removed from the file. There are a few advantages to
## keeping removes or a bit longer though. If you use this copy to
## synchronize another copy of data, having the remove entry makes it easy
## to detect that you should delete this entry from the other data copy.
## This is useful for internal synchronization of files within VDS if you
## use multiple copies, or for partial recovery or BCP situations. To
## guarantuee consistency in such situations, a data destination that have
## been unavailable for longer than this amount of time, should have its
## data cleared before being set into the system again. This value is given
## in seconds, with the default being one week
## DEPRECATED: see stor-memfilepersistence config instead
keep_remove_time_period int default=604800

## Maximum number of entries to keep for a single document. Heavy resending
## within the revert time period may add loads of entries to a file, ruining
## performance. This setting sets a maximum number of entries for each
## document. This will make entries potentially live shorter than the revert
## time period to avoid a resending worst case. A value of 0 means infinite.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_versions_of_single_document_stored int default=0

## PERFORMANCE PARAMETERS

## Number of threads to use for each mountpoint. VDS needs memory per thread
## to perform disk operations, so increasing this number will increase
## memory usage, but it will also make the disk queue on a given disk be
## able to be larger, such that the disk can choose operations to optimize
## seek time.
## See benchmarks for performance/memory tradeoff.
threads[].lowestpri int default=255 restart

## Pause operations (and block new ones from starting) with priority 
## lower than this value when executing operations with higher pri than 
## min_priority_to_be_blocking
max_priority_to_block int default=255 restart
min_priority_to_be_blocking int default=0 restart

## Chunksize to use while merging buckets between nodes.
##
## Default is set to 4 MB - 4k. This is to allow for malloc to waste some bytes
## with tracking info or to align to 512b without passing allocation limit,
## while still reading 4k blocks from disk.
bucket_merge_chunk_size int default=4190208 restart

## When reading a slotfile, one does not know the size of the meta data
## list, so one have to read a static amount of data, and possibly read more
## if one didnt read enough. This value needs to be at least 64 byte to read
## the initial header stating the true size of the meta data list.
## DEPRECATED: see stor-memfilepersistence config instead
initial_index_read int default=61440

## Similar to index read, when reading the document identifiers, one does
## not know the length of the name prior to reading. Thus, if you read less
## than the size, you will have to do an extra read to get the rest. If you
## use very long document identifiers you should increase this value to be
## larger than most of your identifiers.
## restart flag was added automatically and needs to be verified.
## DEPRECATED: see stor-memfilepersistence config instead
initial_name_read int default=512 restart

## When we need to read (or write) multiple entries in a file where we can
## either read a big enough section to cover all of them. But at some
## distance between the areas we need, it becomes beneficial to do multiple
## reads rather than to read over them. This setting set how many sequential
## bytes we dont need that we allow to be read/written in order to join two
## logical IO operations together in the application. Setting this low might
## be ok if system calls are cheap and we manage to queue up next IO
## operation in time such that the disk dont need to spin an extra round.
## Setting it high will make the disk more likely to process them together,
## but the time used to read/write the gap might have been used to do
## something useful instead.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_gap_to_read_through int default=65536

## Currently not in here as we dont have append functionality yet. Might
## improve performance in some cases.
## max_file_appends int default=0

## When writing with direct IO, we need to align blocks to 512b, and to
## avoid reading we write garbage after each doc to fill 512b block. When
## resizing or splitting the file we can realign the files such that we
## remove the gaps of existing data, as we will rewrite everything anyhow.
## If using very small documents this might improve your disk space
## utilization. For larger documents it doesnt really reduce much, so might 
## be useful to turn it off to save CPU.
## DEPRECATED: see stor-memfilepersistence config instead
remove_file_gaps_on_rewrite bool default=true restart

## The order we try to enforce in file body blocks. Visitors that only need
## to visit some of the data in a bucket will be able to read less if what
## it needs to read is located next to each other on disk. However, as we
## dont enforce order on write operations, this demands that resize/split
## operations do the resorting, which, if we cant do it all in memory is
## very expensive. ANY will not do any reordering. Timestamp will enforce
## timestamp order, which is fairly close to order that will normally be
## written anyway, so it should be cheap to reorder even if we cant do it
## all in memory. This might be useful if you often visit subsets based on
## time. RGID uses reverse GID, which stores data from one location
## together. This is useful if you want to visit data from one user from
## buckets that have many users often. This is a much more expensive sort
## though, and should only be done if we have enough memory.
## DEPRECATED: see stor-memfilepersistence config instead
body_block_order enum { ANY, TIMESTAMP, RGID } default=ANY restart

## If set to true, we will refuse to do reordering of memory unless we have
## enough memory to do it all in memory. See body_block_order comments.
## DEPRECATED: see stor-memfilepersistence config instead
only_reorder_body_in_memory bool default=true restart

## Whether or not we should verify checksums of all read data during regular
## operations like put, get & remove. Note that some operations, like merge
## and bucket integrity verify operations will still check checksums even if
## this is set false.
## DEPRECATED: see stor-memfilepersistence config instead
verify_checksum_on_regular_load bool default=true restart

## For streaming search, visiting is very performance critical. Thus you can
## specifically disable checksum verification for visiting.
## DEPRECATED: see stor-memfilepersistence config instead
verify_checksum_on_visit bool default=true restart

## Maximum size of index buffer that will be allowed to stay in memory and
## not being reduced back to this size after we no longer need it.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_sustainable_index_buffer_size int default=1044480 restart

## Maximum size of input buffer that will be allowed to stay in memory and
## not being reduced back to this size after we no longer need it.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_sustainable_input_buffer_size int default=1044480 restart

## Maximum size of output buffer that will be allowed to stay in memory and
## not being reduced back to this size after we no longer need it.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_sustainable_output_buffer_size int default=1044480 restart

## Whether to downsize index buffer immediately after usage if its above the
## maximum size. If not, it will not be resized down until someone requests
## to use it that needs less data in the buffer. Index buffer is used all
## the time so there should be little reason for immediately downsizing it.
## DEPRECATED: see stor-memfilepersistence config instead
downsize_index_buffer_immediately_after_use bool default=false restart

## Whether to downsize input buffer immediately after usage if its above the
## maximum size. If not, it will not be resized down until someone requests
## to use it that needs less data in the buffer. Input buffer is not used
## that often, so downsizing it immediately might save some memory.
## DEPRECATED: see stor-memfilepersistence config instead
downsize_input_buffer_immediately_after_use bool default=true restart

## Whether to downsize output buffer immediately after usage if its above
## the maximum size. If not, it will not be resized down until someone
## requests to use it that needs less data in the buffer. Input buffer is
## not used that often, so downsizing it immediately might save some memory.
## DEPRECATED: see stor-memfilepersistence config instead
downsize_output_buffer_immediately_after_use bool default=true restart

## Minimum size of buffer used to write a continuous file. If maximum amount
## of memory is not available. At least this amount will be allocated.
## DEPRECATED: see stor-memfilepersistence config instead
minimum_continuous_file_write_buffer_size int default=1044480 restart

## Maximum size of buffer used to write a continuous file. If set above max
## file size we will always write new files in one go, which probably makes
## for the least chance of getting fragmentation, but will also consume the
## most memory. Default of writing a MB at a time, should make for little
## performance loss because of disk seek time, and hopefully get little
## fragmentation while keeping memory usage down.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_continuous_file_write_buffer_size int default=1044480 restart

## Minimum amount of memory allocated to read source data during join. This
## amount of memory will be forced allocated.
## DEPRECATED: see stor-memfilepersistence config instead
minimum_join_source_body_read_buffer_size int default=1044480 restart

## This sets the maximum size of the buffer used in join to read source
## data. Join uses the least IO if this buffer is as big as the body block
## of the source file. Due to the memory manager, each join might get that
## much memory though.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_join_source_body_read_buffer_size int default=16773120 restart

## Minimum amount of memory allocated to read source data during export. This
## amount of memory will be forced allocated.
## DEPRECATED: see stor-memfilepersistence config instead
minimum_export_source_body_read_buffer_size int default=1044480 restart

## This sets the maximum size of the buffer used in export to read source
## data. Export uses the least IO if this buffer is as big as the body block
## of the source file. In addition, reordering of body block might not be
## feasibly unless the buffer is big enough to include the whole body block.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_export_source_body_read_buffer_size int default=33550336 restart

## Minimum size of buffer used to read data during defragmentation.
## DEPRECATED: see stor-memfilepersistence config instead
minimum_defrag_source_body_read_buffer_size int default=1044480 restart

## This sets the maximum size of the buffer used in defragmentation to read
## source data. Defragmentation uses the least IO if this buffer is as big
## as the body block of the source file, but this might consume some memory.
## Defragmentation is not enabled by default.
## DEPRECATED: see stor-memfilepersistence config instead
maximum_defrag_source_body_read_buffer_size int default=1044480 restart

## When merging, if we find more than this number of documents that exist on all
## of the same copies, send a separate apply bucket diff with these entries
## to an optimized merge chain that guarantuees minimum data transfer.
common_merge_chain_optimalization_minimum_size int default=64 restart

## When merging, it is possible to send more metadata than needed in order to
## let local nodes in merge decide which entries fits best to add this time
## based on disk location. Toggle this option on to use it. Note that memory
## consumption might increase in a 4.1 to 4.2 upgrade due to this, as 4.1
## dont support to only fill in part of the metadata provided and will always
## fill all.
enable_merge_local_node_choose_docs_optimalization bool default=true restart

## If set, when we need to cache the entire header, and we already have cached
## all the metadata, read the metadata to find max header position, and only
## read the part containing information.
## DEPRECATED: see stor-memfilepersistence config instead
read_only_used_header_part_when_possible bool default=true restart

## Enable the slotfile read cache. The cache holds recent metadata and header
## blocks read from disks, and even if small, is very useful for localized
## access.
## DEPRECATED: see stor-memfilepersistence config instead
enable_slotfile_cache bool default=true restart

## Let the slotfile cache the whole header on a header only operation not
## needing the entire header, if this amount of header only accesses needing
## part of the header has already happened.
##
## Set very high to begin with to never reduce performance. If you have heavy
## header only access to some files, you may get better performance by tuning
## this value.
## DEPRECATED: see stor-memfilepersistence config instead
slotfile_precache_header_after_access_count int default=512 restart

## Whether or not to enable the multibit split optimalization. This is useful
## if splitting is expensive, but listing document identifiers is fairly cheap.
## This is true for memfile persistence layer, but not for vespa search.
enable_multibit_split_optimalization bool default=true restart

## STORAGE SPACE vs IO/CPU PERFORMANCE OPTIONS

## If true, use direct IO, bypassing OS caches for disk access. This is very
## useful as VDS does a random distribution dividing load, so it is unlikely
## that the OS cache will ever hit, and thus it is a huge performance drain.
## DEPRECATED: see stor-memfilepersistence config instead
use_direct_io bool default=true restart

## All IO operations will be aligned to this amount of bytes if direct IO is
## enabled.
## DEPRECATED: see stor-memfilepersistence config instead
block_alignment_size int default=512 restart

## When a disk is fuller than this factor, we will not allow more data to be
## written to the system, unless this data is written in order to reduce
## storage consumption, such as resizing files to become smaller or add
## meta entries to write remove entries into. This value is set high as
## default as we expect a lot of users to have formatted their disks to
## already reserve 8% of the data to root user which is often default. We
## suggest using 0% reserved for root, and rather set this parameter lower
## to reserve space. That way, VDS have more space available in worst case
## in order to resize files to become smaller.
## DEPRECATED: see stor-memfilepersistence config instead
disk_full_factor double default=0.98 restart

## The grow factor sets how much free space to add to a file when we resize 
## it, whether we are making it smaller or larger. If the space we need
## after the operation triggering the resize is X, then the file will be
## resized to be of size X * growfactor. In a growing system with no
## deletes, a growfactor of 2 will make the files have 25% free space on
## average. Reducing it to 1.5 will reduce the average free space to 16.7%.
## DEPRECATED: see stor-memfilepersistence config instead
grow_factor double default=2.0

## If files fall below this fill rate, resize file to become smaller.
## Note that this parameter is tightly coupled with the growfactor. For
## instance, with a growfactor of 2.0, a file will contain 50 % free space
## after a resize. If the min fill rate then is 0.50, that means that if a
## single doc is deleted from this file, we need to resize it to
## become smaller straight away.
## DEPRECATED: see stor-memfilepersistence config instead
min_fill_rate double default=0.1

## Minimum part of defragmented space one need to reclaim to allow
## defragmentation of file. This value is given as a ratio of reclaimed
## space compared to the total size of the data block.
## Example: A body block of 100 MB, has 15 MB free space, with largest
## continuos free space of 5 MB. Gain of defragmentation will then be 0.1.
## DEPRECATED: see stor-memfilepersistence config instead
defrag_minimum_gain double default=1.0 restart

## When creating/resizing slotfiles, one uses average document sizes to
## decide how much free space to add to metadata, header and body portions.
## This option can be used to allocate extra free space to meta data in
## order to reduce the chance of the file needing resize due to lack of free
## meta data entries.
## DEPRECATED: see stor-memfilepersistence config instead
overrepresent_meta_data_factor double default=1.2

## When creating/resizing slotfiles, one uses average document sizes to
## decide how much free space to add to metadata, header and body portions.
## This option can be used to allocate extra free space to header data in
## order to reduce the chance of the file needing resize due to lack of
## header block space.
## DEPRECATED: see stor-memfilepersistence config instead
overrepresent_header_block_factor double default=1.1

## Load types to use cache for. If empty, cache for all.
## DEPRECATED: see stor-memfilepersistence config instead
load_types_to_cache[] string  restart