summaryrefslogtreecommitdiffstats
path: root/memfilepersistence
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /memfilepersistence
Publish
Diffstat (limited to 'memfilepersistence')
-rw-r--r--memfilepersistence/.gitignore2
-rw-r--r--memfilepersistence/CMakeLists.txt36
-rw-r--r--memfilepersistence/OWNERS1
-rw-r--r--memfilepersistence/README4
-rw-r--r--memfilepersistence/src/.gitignore4
-rw-r--r--memfilepersistence/src/Doxyfile994
-rw-r--r--memfilepersistence/src/tests/.gitignore8
-rw-r--r--memfilepersistence/src/tests/CMakeLists.txt14
-rw-r--r--memfilepersistence/src/tests/conformance/.gitignore2
-rw-r--r--memfilepersistence/src/tests/conformance/CMakeLists.txt6
-rw-r--r--memfilepersistence/src/tests/conformance/memfileconformancetest.cpp36
-rw-r--r--memfilepersistence/src/tests/device/.gitignore2
-rw-r--r--memfilepersistence/src/tests/device/CMakeLists.txt10
-rw-r--r--memfilepersistence/src/tests/device/devicemanagertest.cpp129
-rw-r--r--memfilepersistence/src/tests/device/devicemappertest.cpp51
-rw-r--r--memfilepersistence/src/tests/device/devicestest.cpp70
-rw-r--r--memfilepersistence/src/tests/device/mountpointlisttest.cpp255
-rw-r--r--memfilepersistence/src/tests/device/partitionmonitortest.cpp204
-rw-r--r--memfilepersistence/src/tests/init/.gitignore2
-rw-r--r--memfilepersistence/src/tests/init/CMakeLists.txt6
-rw-r--r--memfilepersistence/src/tests/init/filescannertest.cpp492
-rw-r--r--memfilepersistence/src/tests/mapper/.gitignore0
-rw-r--r--memfilepersistence/src/tests/spi/.gitignore2
-rw-r--r--memfilepersistence/src/tests/spi/CMakeLists.txt20
-rw-r--r--memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp735
-rw-r--r--memfilepersistence/src/tests/spi/buffer_test.cpp75
-rw-r--r--memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp78
-rw-r--r--memfilepersistence/src/tests/spi/iteratorhandlertest.cpp940
-rw-r--r--memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp504
-rw-r--r--memfilepersistence/src/tests/spi/logginglazyfile.h88
-rw-r--r--memfilepersistence/src/tests/spi/memcachetest.cpp412
-rw-r--r--memfilepersistence/src/tests/spi/memfileautorepairtest.cpp411
-rw-r--r--memfilepersistence/src/tests/spi/memfiletest.cpp987
-rw-r--r--memfilepersistence/src/tests/spi/memfiletestutils.cpp455
-rw-r--r--memfilepersistence/src/tests/spi/memfiletestutils.h294
-rw-r--r--memfilepersistence/src/tests/spi/memfilev1serializertest.cpp1110
-rw-r--r--memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp501
-rw-r--r--memfilepersistence/src/tests/spi/options_builder.h52
-rw-r--r--memfilepersistence/src/tests/spi/providerconformancetest.cpp74
-rw-r--r--memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp111
-rw-r--r--memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp663
-rw-r--r--memfilepersistence/src/tests/spi/simulatedfailurefile.h78
-rw-r--r--memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp213
-rw-r--r--memfilepersistence/src/tests/testhelper.cpp124
-rw-r--r--memfilepersistence/src/tests/testhelper.h54
-rw-r--r--memfilepersistence/src/tests/testrunner.cpp15
-rw-r--r--memfilepersistence/src/tests/tools/.gitignore2
-rw-r--r--memfilepersistence/src/tests/tools/CMakeLists.txt7
-rw-r--r--memfilepersistence/src/tests/tools/dumpslotfiletest.cpp138
-rw-r--r--memfilepersistence/src/tests/tools/vdsdisktooltest.cpp108
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/.gitignore3
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt13
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/.gitignore2
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt12
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h19
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp113
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h99
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h57
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp120
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/environment.h133
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp56
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h126
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp34
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h52
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/freeptr.h42
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/options.cpp185
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/options.h136
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp40
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h89
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/types.cpp38
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/common/types.h198
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/.gitignore2
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt14
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/device.cpp68
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/device.h75
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp213
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h77
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp101
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h105
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp141
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/directory.h69
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp45
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/disk.h48
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp125
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h77
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp651
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h138
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp66
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/partition.h59
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp392
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h157
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/init/.gitignore2
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt6
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp240
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h105
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore2
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt15
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp33
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h40
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp28
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h61
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp219
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h119
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp67
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h157
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp102
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h62
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h61
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp1029
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h71
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp698
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h84
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp340
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h109
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h121
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp538
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h365
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp103
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h59
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp84
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h67
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h96
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore2
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt11
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp1116
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h316
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp529
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h301
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp208
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h67
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h80
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h90
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp133
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h189
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp22
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h65
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp107
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h128
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore2
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt14
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp29
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h45
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp431
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h252
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp159
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h37
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp12
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h20
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp889
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h164
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h103
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp287
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h116
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp110
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h41
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp24
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h54
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h18
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp69
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h25
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore5
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt23
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp361
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h23
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp27
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp32
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp518
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h19
-rw-r--r--memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl47
169 files changed, 27172 insertions, 0 deletions
diff --git a/memfilepersistence/.gitignore b/memfilepersistence/.gitignore
new file mode 100644
index 00000000000..a9b20e8992d
--- /dev/null
+++ b/memfilepersistence/.gitignore
@@ -0,0 +1,2 @@
+Makefile
+Testing
diff --git a/memfilepersistence/CMakeLists.txt b/memfilepersistence/CMakeLists.txt
new file mode 100644
index 00000000000..005a1373b75
--- /dev/null
+++ b/memfilepersistence/CMakeLists.txt
@@ -0,0 +1,36 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_define_module(
+ DEPENDS
+ vespadefaults
+ fastos
+ vespalog
+ vespalib
+ document
+ config_cloudconfig
+ configdefinitions
+ vdslib
+ persistence
+ storageframework
+
+ LIBS
+ src/vespa/memfilepersistence
+ src/vespa/memfilepersistence/common
+ src/vespa/memfilepersistence/device
+ src/vespa/memfilepersistence/init
+ src/vespa/memfilepersistence/mapper
+ src/vespa/memfilepersistence/memfile
+ src/vespa/memfilepersistence/spi
+ src/vespa/memfilepersistence/tools
+
+ TEST_DEPENDS
+ persistence_persistence_conformancetest
+ vdstestlib
+
+ TESTS
+ src/tests
+ src/tests/conformance
+ src/tests/device
+ src/tests/init
+ src/tests/spi
+ src/tests/tools
+)
diff --git a/memfilepersistence/OWNERS b/memfilepersistence/OWNERS
new file mode 100644
index 00000000000..dbcff24b338
--- /dev/null
+++ b/memfilepersistence/OWNERS
@@ -0,0 +1 @@
+vekterli
diff --git a/memfilepersistence/README b/memfilepersistence/README
new file mode 100644
index 00000000000..38a0e92febb
--- /dev/null
+++ b/memfilepersistence/README
@@ -0,0 +1,4 @@
+This module contains Storage's persistence SPI implementation.
+
+It uses memory representation of files, which currently are persisted in
+slotfiles on disk.
diff --git a/memfilepersistence/src/.gitignore b/memfilepersistence/src/.gitignore
new file mode 100644
index 00000000000..d6d89678e22
--- /dev/null
+++ b/memfilepersistence/src/.gitignore
@@ -0,0 +1,4 @@
+/Makefile.ini
+/config_command.sh
+/project.dsw
+/memfilepersistence.mak
diff --git a/memfilepersistence/src/Doxyfile b/memfilepersistence/src/Doxyfile
new file mode 100644
index 00000000000..d40aff6f46c
--- /dev/null
+++ b/memfilepersistence/src/Doxyfile
@@ -0,0 +1,994 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+# Doxyfile 1.2.18
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = Storage
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = ../doc
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch,
+# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en
+# (Japanese with english messages), Korean, Norwegian, Polish, Portuguese,
+# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish and Ukrainian.
+
+OUTPUT_LANGUAGE = English
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these class will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited
+# members of a class in the documentation of that class as if those members were
+# ordinary class members. Constructors, destructors and assignment operators of
+# the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. It is allowed to use relative paths in the argument list.
+
+STRIP_FROM_PATH =
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower case letters. If set to YES upper case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# users are adviced to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explict @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# reimplements.
+
+INHERIT_DOCS = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 4
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consist of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C.
+# For instance some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources
+# only. Doxygen will then generate output that is more tailored for Java.
+# For instance namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = storage
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp
+# *.h++ *.idl *.odl
+
+FILE_PATTERNS = *.h *.cpp
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories
+# that are symbolic links (a Unix filesystem feature) are excluded from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+
+EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+
+INPUT_FILTER =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet
+
+HTML_STYLESHEET = ../cpp/vespa_link.css
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output dir.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non empty doxygen will try to run
+# the html help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the Html help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript and frames is required (for instance Mozilla, Netscape 4.0+,
+# or Internet explorer 4.0+). Note that for large projects the tree generation
+# can take a very long time. In such cases it is better to disable this feature.
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimised for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assigments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_XML = NO
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse the
+# parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tagfiles.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in Html, RTF and LaTeX) for classes with base or
+# super classes. Setting the tag to NO turns the diagrams off. Note that this
+# option is superceded by the HAVE_DOT option below. This is only a fallback. It is
+# recommended to install and use dot, since it yield more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found on the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT = 1024
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermedate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
+
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE = NO
+
+# The CGI_NAME tag should be the name of the CGI script that
+# starts the search engine (doxysearch) with the correct parameters.
+# A script with this name will be generated by doxygen.
+
+CGI_NAME = search.cgi
+
+# The CGI_URL tag should be the absolute URL to the directory where the
+# cgi binaries are located. See the documentation of your http daemon for
+# details.
+
+CGI_URL =
+
+# The DOC_URL tag should be the absolute URL to the directory where the
+# documentation is located. If left blank the absolute path to the
+# documentation, with file:// prepended to it, will be used.
+
+DOC_URL =
+
+# The DOC_ABSPATH tag should be the absolute path to the directory where the
+# documentation is located. If left blank the directory on the local machine
+# will be used.
+
+DOC_ABSPATH =
+
+# The BIN_ABSPATH tag must point to the directory where the doxysearch binary
+# is installed.
+
+BIN_ABSPATH = /usr/local/bin/
+
+# The EXT_DOC_PATHS tag can be used to specify one or more paths to
+# documentation generated for other projects. This allows doxysearch to search
+# the documentation for these projects as well.
+
+EXT_DOC_PATHS =
diff --git a/memfilepersistence/src/tests/.gitignore b/memfilepersistence/src/tests/.gitignore
new file mode 100644
index 00000000000..b8a959a31c5
--- /dev/null
+++ b/memfilepersistence/src/tests/.gitignore
@@ -0,0 +1,8 @@
+/.depend
+/Makefile
+/dirconfig.tmp
+/test.vlog
+/testfile.0
+/testrunner
+/vdsroot
+memfilepersistence_testrunner_app
diff --git a/memfilepersistence/src/tests/CMakeLists.txt b/memfilepersistence/src/tests/CMakeLists.txt
new file mode 100644
index 00000000000..ee0cea9e1a5
--- /dev/null
+++ b/memfilepersistence/src/tests/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(memfilepersistence_testrunner_app
+ SOURCES
+ testhelper.cpp
+ testrunner.cpp
+ DEPENDS
+ memfilepersistence_testconformance
+ memfilepersistence_testdevices
+ memfilepersistence_testinit
+ memfilepersistence_testspi
+ memfilepersistence_testtools
+ memfilepersistence
+)
+vespa_add_test(NAME memfilepersistence_testrunner_app COMMAND memfilepersistence_testrunner_app)
diff --git a/memfilepersistence/src/tests/conformance/.gitignore b/memfilepersistence/src/tests/conformance/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/tests/conformance/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/tests/conformance/CMakeLists.txt b/memfilepersistence/src/tests/conformance/CMakeLists.txt
new file mode 100644
index 00000000000..378f5751931
--- /dev/null
+++ b/memfilepersistence/src/tests/conformance/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_testconformance
+ SOURCES
+ memfileconformancetest.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp b/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp
new file mode 100644
index 00000000000..18a12788945
--- /dev/null
+++ b/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/spi/memfilepersistence.h>
+#include <vespa/persistence/conformancetest/conformancetest.h>
+
+LOG_SETUP(".test.conformance");
+
+using namespace storage::spi;
+
+namespace storage {
+namespace memfile {
+
+ /*
+struct MemFileConformanceTest : public ConformanceTest {
+ struct Factory : public PersistenceFactory {
+
+ PersistenceSPI::UP getPersistenceImplementation() {
+ return PersistenceSPI::UP(new MemFilePersistence);
+ }
+ };
+
+ MemFileConformanceTest()
+ : ConformanceTest(PersistenceFactory::UP(new Factory)) {}
+
+ CPPUNIT_TEST_SUITE(MemFileConformanceTest);
+ DEFINE_CONFORMANCE_TESTS();
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(MemFileConformanceTest);
+*/
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/device/.gitignore b/memfilepersistence/src/tests/device/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/tests/device/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/tests/device/CMakeLists.txt b/memfilepersistence/src/tests/device/CMakeLists.txt
new file mode 100644
index 00000000000..845c70ae8e3
--- /dev/null
+++ b/memfilepersistence/src/tests/device/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_testdevices
+ SOURCES
+ mountpointlisttest.cpp
+ devicemanagertest.cpp
+ devicestest.cpp
+ devicemappertest.cpp
+ partitionmonitortest.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/tests/device/devicemanagertest.cpp b/memfilepersistence/src/tests/device/devicemanagertest.cpp
new file mode 100644
index 00000000000..eeb5007f452
--- /dev/null
+++ b/memfilepersistence/src/tests/device/devicemanagertest.cpp
@@ -0,0 +1,129 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/vespalib/util/exception.h>
+#include <sys/errno.h>
+#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h>
+
+namespace storage {
+
+namespace memfile {
+
+class DeviceManagerTest : public CppUnit::TestFixture {
+ CPPUNIT_TEST_SUITE(DeviceManagerTest);
+ CPPUNIT_TEST(testEventClass);
+ CPPUNIT_TEST(testEventSending);
+ CPPUNIT_TEST(testXml);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testEventClass();
+ void testEventSending();
+ void testXml();
+
+ framework::defaultimplementation::FakeClock _clock;
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(DeviceManagerTest);
+
+void DeviceManagerTest::testEventClass()
+{
+ // Test that creation various IO events through common errno errors
+ // generates understandable errors.
+ {
+ IOEvent e(IOEvent::createEventFromErrno(1, ENOTDIR, "/mypath"));
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("IOEvent(PATH_FAILURE, Not a directory: /mypath, time 1)"),
+ e.toString(true));
+ CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, e.getState());
+ }
+ {
+ IOEvent e(IOEvent::createEventFromErrno(2, EACCES, "/mypath"));
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("IOEvent(NO_PERMISSION, Permission denied: /mypath, time 2)"),
+ e.toString(true));
+ CPPUNIT_ASSERT_EQUAL(Device::NO_PERMISSION, e.getState());
+ }
+ {
+ IOEvent e(IOEvent::createEventFromErrno(3, EIO, "/mypath"));
+ CPPUNIT_ASSERT_EQUAL(
+ std::string("IOEvent(IO_FAILURE, Input/output error: /mypath, time 3)"),
+ e.toString(true));
+ CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, e.getState());
+ }
+ {
+ IOEvent e(
+ IOEvent::createEventFromErrno(4, EBADF, "/mypath", VESPA_STRLOC));
+ CPPUNIT_ASSERT_PREFIX(
+ std::string("IOEvent(INTERNAL_FAILURE, Bad file descriptor: /mypath"
+ ", testEventClass in"),
+ e.toString(true));
+ CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, e.getState());
+ }
+}
+
+namespace {
+
+ struct Listener : public IOEventListener {
+ std::ostringstream ost;
+
+ Listener() : ost() { ost << "\n"; }
+ virtual ~Listener() {}
+
+ virtual void handleDirectoryEvent(Directory& dir, const IOEvent& e) {
+ ost << "Dir " << dir.getPath() << ": " << e.toString(true) << "\n";
+ }
+ virtual void handlePartitionEvent(Partition& part, const IOEvent& e) {
+ ost << "Partition " << part.getMountPoint() << ": "
+ << e.toString(true) << "\n";
+ }
+ virtual void handleDiskEvent(Disk& disk, const IOEvent& e) {
+ ost << "Disk " << disk.getId() << ": " << e.toString(true) << "\n";
+ }
+
+ };
+
+}
+
+void DeviceManagerTest::testEventSending()
+{
+ // Test that adding events to directories in the manager actually sends
+ // these events on to listeners.
+ DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock);
+ Listener l;
+ manager.addIOEventListener(l);
+ Directory::LP dir(manager.getDirectory("/home/foo/var", 0));
+ // IO failures are disk events. Will mark all partitions and
+ // directories on that disk bad
+ dir->addEvent(IOEvent::createEventFromErrno(1, EIO, "/home/foo/var/foo"));
+ dir->addEvent(IOEvent::createEventFromErrno(2, EBADF, "/home/foo/var/bar"));
+ dir->addEvent(IOEvent::createEventFromErrno(3, EACCES, "/home/foo/var/car"));
+ dir->addEvent(IOEvent::createEventFromErrno(4, EISDIR, "/home/foo/var/var"));
+ std::string expected("\n"
+ "Disk 1: IOEvent(IO_FAILURE, Input/output error: "
+ "/home/foo/var/foo, time 1)\n"
+ "Dir /home/foo/var: IOEvent(INTERNAL_FAILURE, Bad file "
+ "descriptor: /home/foo/var/bar, time 2)\n"
+ "Dir /home/foo/var: IOEvent(NO_PERMISSION, Permission denied: "
+ "/home/foo/var/car, time 3)\n"
+ "Dir /home/foo/var: IOEvent(PATH_FAILURE, Is a directory: "
+ "/home/foo/var/var, time 4)\n"
+ );
+ CPPUNIT_ASSERT_EQUAL(expected, l.ost.str());
+}
+
+void DeviceManagerTest::testXml()
+{
+ DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock);
+ Directory::LP dir(manager.getDirectory("/home/", 0));
+ dir->getPartition().initializeMonitor();
+ std::string xml = manager.toXml(" ");
+ CPPUNIT_ASSERT_MSG(xml,
+ xml.find("<partitionmonitor>") != std::string::npos);
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/tests/device/devicemappertest.cpp b/memfilepersistence/src/tests/device/devicemappertest.cpp
new file mode 100644
index 00000000000..a78554a6342
--- /dev/null
+++ b/memfilepersistence/src/tests/device/devicemappertest.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/devicemapper.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <sys/errno.h>
+
+namespace storage {
+
+namespace memfile {
+
+class DeviceMapperTest : public CppUnit::TestFixture {
+ CPPUNIT_TEST_SUITE(DeviceMapperTest);
+ CPPUNIT_TEST(testSimpleDeviceMapper);
+ CPPUNIT_TEST(testAdvancedDeviceMapper);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testSimpleDeviceMapper();
+ void testAdvancedDeviceMapper();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(DeviceMapperTest);
+
+void DeviceMapperTest::testSimpleDeviceMapper()
+{
+ SimpleDeviceMapper mapper;
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), mapper.getDeviceId("whatever&�"));
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), mapper.getDeviceId("whatever&�"));
+ CPPUNIT_ASSERT_EQUAL(uint64_t(2), mapper.getDeviceId("whatnot"));
+ std::string expected("Whatever& �=)/%#)=");
+ CPPUNIT_ASSERT_EQUAL(expected, mapper.getMountPoint(expected));
+}
+
+void DeviceMapperTest::testAdvancedDeviceMapper()
+{
+ AdvancedDeviceMapper mapper;
+ try{
+ mapper.getDeviceId("/doesnotexist");
+ CPPUNIT_FAIL("Expected exception");
+ } catch (vespalib::Exception& e) {
+ std::string what(e.what());
+ CPPUNIT_ASSERT_CONTAIN(
+ "Failed to run stat to find data on file /doesnotexist", what);
+ }
+}
+
+}
+
+} // storage
diff --git a/memfilepersistence/src/tests/device/devicestest.cpp b/memfilepersistence/src/tests/device/devicestest.cpp
new file mode 100644
index 00000000000..bd6898cb7ac
--- /dev/null
+++ b/memfilepersistence/src/tests/device/devicestest.cpp
@@ -0,0 +1,70 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <sys/errno.h>
+#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h>
+
+namespace storage {
+
+namespace memfile {
+
+class DevicesTest : public CppUnit::TestFixture {
+ CPPUNIT_TEST_SUITE(DevicesTest);
+ CPPUNIT_TEST(testDisk);
+ CPPUNIT_TEST(testPartition);
+ CPPUNIT_TEST(testDirectory);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testDisk();
+ void testPartition();
+ void testDirectory();
+
+ framework::defaultimplementation::FakeClock _clock;
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(DevicesTest);
+
+void DevicesTest::testDisk()
+{
+ DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock);
+ Disk::LP disk1(manager.getDisk("/something/on/disk"));
+ Disk::LP disk2(manager.getDisk("/something/on/disk"));
+ CPPUNIT_ASSERT_EQUAL(disk1->getId(), disk2->getId());
+ CPPUNIT_ASSERT_EQUAL(disk1.get(), disk2.get());
+ Disk::LP disk3(manager.getDisk("/something/on/disk2"));
+ CPPUNIT_ASSERT(disk2->getId() != disk3->getId());
+ disk3->toString(); // Add code coverage
+}
+
+void DevicesTest::testPartition()
+{
+ DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock);
+ Partition::LP part(manager.getPartition("/etc"));
+ CPPUNIT_ASSERT_EQUAL(std::string("/etc"), part->getMountPoint());
+ part->toString(); // Add code coverage
+}
+
+void DevicesTest::testDirectory()
+{
+ DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock);
+ Directory::LP dir1(manager.getDirectory("/on/disk", 0));
+ CPPUNIT_ASSERT_EQUAL(std::string("/on/disk"), dir1->getPath());
+ CPPUNIT_ASSERT(dir1->getLastEvent() == 0);
+ CPPUNIT_ASSERT_EQUAL(Device::OK, dir1->getState());
+ CPPUNIT_ASSERT(dir1->isOk());
+ CPPUNIT_ASSERT_EQUAL(std::string("/on/disk 0"), dir1->toString());
+
+ dir1->addEvent(Device::IO_FAILURE, "Ouch", "");
+ CPPUNIT_ASSERT(!dir1->isOk());
+ CPPUNIT_ASSERT(dir1->getLastEvent() != 0);
+ CPPUNIT_ASSERT_EQUAL(std::string("/on/disk 5 0 Ouch"), dir1->toString());
+ dir1->toString(); // Add code coverage
+}
+
+}
+
+} // storage
diff --git a/memfilepersistence/src/tests/device/mountpointlisttest.cpp b/memfilepersistence/src/tests/device/mountpointlisttest.cpp
new file mode 100644
index 00000000000..4cb5822ceb7
--- /dev/null
+++ b/memfilepersistence/src/tests/device/mountpointlisttest.cpp
@@ -0,0 +1,255 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <fstream>
+#include <vespa/memfilepersistence/device/mountpointlist.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h>
+
+using vespalib::LinkedPtr;
+using vespalib::fileExists;
+using vespalib::isDirectory;
+using vespalib::isSymLink;
+using vespalib::readLink;
+
+namespace storage {
+
+namespace memfile {
+
+class MountPointList_Test : public CppUnit::TestFixture {
+ CPPUNIT_TEST_SUITE(MountPointList_Test);
+ CPPUNIT_TEST(testScanning);
+ CPPUNIT_TEST(testStatusFile);
+ CPPUNIT_TEST(testInitDisks);
+ CPPUNIT_TEST_SUITE_END();
+
+ static const std::string _prefix;
+
+public:
+ void testScanning();
+ void testStatusFile();
+ void testInitDisks();
+
+ void init();
+ void tearDown();
+
+ framework::defaultimplementation::FakeClock _clock;
+
+private:
+ LinkedPtr<DeviceManager> newDeviceManager() {
+ return LinkedPtr<DeviceManager>(
+ new DeviceManager(
+ DeviceMapper::UP(new SimpleDeviceMapper),
+ _clock));
+ }
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(MountPointList_Test);
+
+const std::string MountPointList_Test::_prefix("./vdsroot");
+
+namespace {
+ void run(const std::string& cmd) {
+ CPPUNIT_ASSERT_MESSAGE(cmd, system(cmd.c_str()) == 0);
+ }
+}
+
+void MountPointList_Test::init()
+{
+ tearDown();
+ run("rm -rf "+_prefix);
+ run("mkdir -p "+_prefix+"/disks");
+
+ run("mkdir "+_prefix+"/disks/d0"); // Regular dir
+ run("mkdir "+_prefix+"/disks/d1"); // Inaccessible dir
+ run("chmod 000 "+_prefix+"/disks/d1");
+ run("mkdir "+_prefix+"/disks/D2"); // Wrongly named dir
+ run("mkdir "+_prefix+"/disks/d3"); // Regular non-empty dir
+ run("touch "+_prefix+"/disks/d3/foo");
+ run("touch "+_prefix+"/disks/d4"); // Not a dir
+ run("ln -s D2 "+_prefix+"/disks/d5"); // Symlink to dir
+ run("ln -s d4 "+_prefix+"/disks/d6"); // Symlink to file
+}
+
+void MountPointList_Test::tearDown()
+{
+ try{
+ if (fileExists(_prefix+"/disks/d1")) {
+ run("chmod 755 "+_prefix+"/disks/d1");
+ }
+ } catch (std::exception& e) {
+ std::cerr << "Failed to clean up: " << e.what() << "\n";
+ }
+}
+
+void MountPointList_Test::testScanning()
+{
+ init();
+ MountPointList list(_prefix,
+ std::vector<vespalib::string>(),
+ vespalib::LinkedPtr<DeviceManager>(
+ new DeviceManager(
+ DeviceMapper::UP(new SimpleDeviceMapper),
+ _clock)));
+ list.scanForDisks();
+
+ // Check that we got the expected entries.
+ CPPUNIT_ASSERT_EQUAL(7u, list.getSize());
+
+ for (uint32_t i=0; i<7u; ++i) {
+ std::ostringstream ost;
+ ost << _prefix << "/disks/d" << i;
+ CPPUNIT_ASSERT_EQUAL(ost.str(), list[i].getPath());
+ }
+
+ // Note.. scanForDisks() should not in any circumstances access the
+ // disks. Thus it should not know that d1 is inaccessible, or that d6
+ // is actually a symlink to a file
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[1].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[3].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[5].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[6].getState());
+
+ list.verifyHealthyDisks(-1);
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::NO_PERMISSION, list[1].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, list[3].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[5].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[6].getState());
+}
+
+void MountPointList_Test::testStatusFile()
+{
+ init();
+ std::string statusFileName(_prefix + "/disks.status");
+
+ // Try reading non-existing file, and writing a file
+ {
+ MountPointList list(_prefix,
+ std::vector<vespalib::string>(),
+ vespalib::LinkedPtr<DeviceManager>(
+ new DeviceManager(
+ DeviceMapper::UP(new SimpleDeviceMapper),
+ _clock)));
+
+ _clock.setAbsoluteTimeInSeconds(5678);
+ list.scanForDisks();
+
+ // File does not currently exist, that should be ok though.
+ list.readFromFile();
+ list.verifyHealthyDisks(-1);
+ CPPUNIT_ASSERT_EQUAL(7u, list.getSize());
+ list[5].addEvent(IOEvent(1234, Device::IO_FAILURE, "Argh", "Hmm"));
+ CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, list[5].getState());
+
+ // Write to file.
+ list.writeToFile();
+ }
+
+ // Check contents of file.
+ {
+ std::ifstream in(statusFileName.c_str());
+ std::string line;
+ CPPUNIT_ASSERT(std::getline(in, line));
+
+ CPPUNIT_ASSERT_PREFIX(
+ std::string(_prefix + "/disks/d1 3 5678 IoException: NO PERMISSION: "
+ "open(./vdsroot/disks/d1/chunkinfo, 0x1): Failed, "
+ "errno(13): Permission denied"),
+ line);
+ CPPUNIT_ASSERT(std::getline(in, line));
+ CPPUNIT_ASSERT_PREFIX(
+ std::string(_prefix +"/disks/d2 1 5678 Disk not found during scanning of "
+ "disks directory"),
+ line);
+ CPPUNIT_ASSERT(std::getline(in, line));
+ CPPUNIT_ASSERT_PREFIX(
+ std::string(_prefix + "/disks/d3 4 5678 Foreign data in mountpoint. New "
+ "mountpoints added should be empty."),
+ line);
+ CPPUNIT_ASSERT(std::getline(in, line));
+ CPPUNIT_ASSERT_PREFIX(
+ std::string(_prefix + "/disks/d4 2 5678 File d4 in disks directory is not "
+ "a directory."),
+ line);
+ CPPUNIT_ASSERT(std::getline(in, line));
+ CPPUNIT_ASSERT_PREFIX(std::string(_prefix + "/disks/d5 5 1234 Argh"),
+ line);
+ CPPUNIT_ASSERT(std::getline(in, line));
+ CPPUNIT_ASSERT_PREFIX(
+ std::string(_prefix + "/disks/d6 2 5678 The path exist, but is not a "
+ "directory."),
+ line);
+ CPPUNIT_ASSERT(std::getline(in, line));
+ CPPUNIT_ASSERT_EQUAL(std::string("EOF"), line);
+ }
+
+ // Starting over to get new device instances.
+ // Scan disk, read file, and check that erronious disks are not used.
+ {
+ MountPointList list(_prefix,
+ std::vector<vespalib::string>(),
+ vespalib::LinkedPtr<DeviceManager>(
+ new DeviceManager(
+ DeviceMapper::UP(new SimpleDeviceMapper),
+ _clock)));
+ list.scanForDisks();
+ list.readFromFile();
+ // Check that we got the expected entries.
+ CPPUNIT_ASSERT_EQUAL(7u, list.getSize());
+
+ // Note.. scanForDisks() should not under any circumstance access the
+ // disks. Thus it should not know that d1 is inaccessible.
+ CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::NO_PERMISSION, list[1].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, list[3].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, list[5].getState());
+ CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[6].getState());
+ }
+}
+
+void MountPointList_Test::testInitDisks()
+{
+ vespalib::string d3target = "d3target";
+ vespalib::string foodev = _prefix + "/foodev";
+ vespalib::string bardev = _prefix + "/bardev";
+
+ tearDown();
+ run("rm -rf " + _prefix);
+ run("mkdir -p " + _prefix + "/disks/d2");
+ run("ln -s " + d3target + " " + _prefix + "/disks/d3");
+
+ std::vector<vespalib::string> diskPaths {
+ // disks/d0 should become a regular directory
+ _prefix + "/disks/d0",
+ // disks/d1 should be a symlink to /foo
+ foodev,
+ // disks/d2 should already be a directory
+ "/ignored",
+ // disks/d3 should already be a symlink
+ "/ignored2"
+ };
+
+ MountPointList list(_prefix, diskPaths, newDeviceManager());
+ list.initDisks();
+
+ CPPUNIT_ASSERT(isDirectory(_prefix + "/disks"));
+ CPPUNIT_ASSERT(isDirectory(_prefix + "/disks/d0"));
+ CPPUNIT_ASSERT(isSymLink(_prefix + "/disks/d1"));
+ CPPUNIT_ASSERT_EQUAL(foodev, readLink(_prefix + "/disks/d1"));
+ CPPUNIT_ASSERT(isDirectory(_prefix + "/disks/d2"));
+ CPPUNIT_ASSERT(isSymLink(_prefix + "/disks/d3"));
+ CPPUNIT_ASSERT_EQUAL(d3target, readLink(_prefix + "/disks/d3"));
+}
+
+} // memfile
+
+} // storage
diff --git a/memfilepersistence/src/tests/device/partitionmonitortest.cpp b/memfilepersistence/src/tests/device/partitionmonitortest.cpp
new file mode 100644
index 00000000000..1a016edcc83
--- /dev/null
+++ b/memfilepersistence/src/tests/device/partitionmonitortest.cpp
@@ -0,0 +1,204 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/partitionmonitor.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+
+namespace storage {
+
+namespace memfile {
+
+struct PartitionMonitorTest : public CppUnit::TestFixture
+{
+ void testNormalUsage();
+ void testHighInodeFillrate();
+ void testAlwaysStatPolicy();
+ void testPeriodPolicy();
+ void testStatOncePolicy();
+ void testDynamicPolicy();
+ void testIsFull();
+
+ CPPUNIT_TEST_SUITE(PartitionMonitorTest);
+ CPPUNIT_TEST(testNormalUsage);
+ CPPUNIT_TEST(testHighInodeFillrate);
+ CPPUNIT_TEST(testAlwaysStatPolicy);
+ CPPUNIT_TEST(testPeriodPolicy);
+ CPPUNIT_TEST(testStatOncePolicy);
+ CPPUNIT_TEST(testDynamicPolicy);
+ CPPUNIT_TEST(testIsFull);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(PartitionMonitorTest);
+
+struct FakeStatter : public PartitionMonitor::Statter {
+ struct statvfs _info;
+
+ FakeStatter() {
+ _info.f_bsize = 4096;
+ _info.f_frsize = 4096;
+ _info.f_blocks = 1000;
+ _info.f_bfree = 500;
+ _info.f_bavail = 400;
+ _info.f_files = 64;
+ _info.f_ffree = 32;
+ _info.f_favail = 30;
+ _info.f_fsid = 13;
+ _info.f_namemax = 256;
+ }
+ void removeData(uint32_t size) {
+ _info.f_bavail += (size / _info.f_bsize);
+ _info.f_bfree += (size / _info.f_bsize);
+ }
+ void addData(uint32_t size) {
+ _info.f_bavail -= (size / _info.f_bsize);
+ _info.f_bfree -= (size / _info.f_bsize);
+ }
+
+ virtual void statFileSystem(const std::string&, struct statvfs& info) {
+ info = _info;
+ }
+};
+
+void PartitionMonitorTest::testNormalUsage()
+{
+ PartitionMonitor monitor("testrunner.cpp");
+ FakeStatter* statter = new FakeStatter();
+ monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter));
+ std::string expected(
+ "PartitionMonitor(testrunner.cpp, STAT_PERIOD(100), "
+ "2048000/3686400 used - 55.5556 % full)");
+ CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(false));
+ expected =
+ "PartitionMonitor(testrunner.cpp) {\n"
+ " Fill rate: 55.5556 %\n"
+ " Inode fill rate: 51.6129 %\n"
+ " Detected block size: 4096\n"
+ " File system id: 13\n"
+ " Total size: 3686400 (3600 kB)\n"
+ " Used size: 2048000 (2000 kB)\n"
+ " Queries since last stat: 0\n"
+ " Monitor policy: STAT_PERIOD(100)\n"
+ " Root only ratio 0\n"
+ " Max fill rate 98 %\n"
+ "}";
+ CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(true));
+ CPPUNIT_ASSERT(monitor.getFillRate() > 0.55);
+}
+
+void PartitionMonitorTest::testHighInodeFillrate()
+{
+ PartitionMonitor monitor("testrunner.cpp");
+ FakeStatter* statter = new FakeStatter();
+ statter->_info.f_favail = 2;
+ monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter));
+ std::string expected(
+ "PartitionMonitor(testrunner.cpp, STAT_PERIOD(100), "
+ "2048000/3686400 used - 94.1176 % full (inodes))");
+ CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(false));
+ expected =
+ "PartitionMonitor(testrunner.cpp) {\n"
+ " Fill rate: 55.5556 %\n"
+ " Inode fill rate: 94.1176 %\n"
+ " Detected block size: 4096\n"
+ " File system id: 13\n"
+ " Total size: 3686400 (3600 kB)\n"
+ " Used size: 2048000 (2000 kB)\n"
+ " Queries since last stat: 0\n"
+ " Monitor policy: STAT_PERIOD(100)\n"
+ " Root only ratio 0\n"
+ " Max fill rate 98 %\n"
+ "}";
+ CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(true));
+ CPPUNIT_ASSERT(monitor.getFillRate() > 0.94);
+}
+
+void PartitionMonitorTest::testAlwaysStatPolicy()
+{
+ PartitionMonitor monitor("testrunner.cpp");
+ FakeStatter* statter = new FakeStatter();
+ monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter));
+ monitor.setAlwaysStatPolicy();
+ for (uint32_t i=0; i<10; ++i) {
+ monitor.getFillRate();
+ CPPUNIT_ASSERT_EQUAL(0u, monitor._queriesSinceStat);
+ }
+}
+
+void PartitionMonitorTest::testPeriodPolicy()
+{
+ PartitionMonitor monitor("testrunner.cpp");
+ FakeStatter* statter = new FakeStatter();
+ monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter));
+ monitor.setStatPeriodPolicy(4);
+ for (uint32_t i=1; i<16; ++i) {
+ monitor.getFillRate();
+ CPPUNIT_ASSERT_EQUAL(i % 4, monitor._queriesSinceStat);
+ }
+}
+
+void PartitionMonitorTest::testStatOncePolicy()
+{
+ PartitionMonitor monitor("testrunner.cpp");
+ FakeStatter* statter = new FakeStatter();
+ monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter));
+ monitor.setStatOncePolicy();
+ for (uint32_t i=1; i<16; ++i) {
+ monitor.getFillRate();
+ CPPUNIT_ASSERT_EQUAL(i, monitor._queriesSinceStat);
+ }
+}
+
+void PartitionMonitorTest::testDynamicPolicy()
+{
+ PartitionMonitor monitor("testrunner.cpp");
+ FakeStatter* statter = new FakeStatter();
+ monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter));
+ monitor.setStatDynamicPolicy(2);
+ // Add some data, such that we see that period goes down
+ CPPUNIT_ASSERT_EQUAL(uint64_t(3698), monitor.calcDynamicPeriod());
+ CPPUNIT_ASSERT_EQUAL(55, (int) (100 * monitor.getFillRate()));
+ monitor.addingData(256 * 1024);
+ CPPUNIT_ASSERT_EQUAL(uint64_t(2592), monitor.calcDynamicPeriod());
+ CPPUNIT_ASSERT_EQUAL(62, (int) (100 * monitor.getFillRate()));
+ monitor.addingData(512 * 1024);
+ CPPUNIT_ASSERT_EQUAL(uint64_t(968), monitor.calcDynamicPeriod());
+ CPPUNIT_ASSERT_EQUAL(76, (int) (100 * monitor.getFillRate()));
+ // Add such that we hint that we have more data than possible on disk
+ monitor.addingData(1024 * 1024);
+ // Let fake stat just have a bit more data than before
+ statter->addData(256 * 1024);
+ // With high fill rate, we should check stat each time
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), monitor.calcDynamicPeriod());
+ // As period is 1, we will now do a new stat, it should find we
+ // actually have less fill rate
+ CPPUNIT_ASSERT_EQUAL(62, (int) (100 * monitor.getFillRate()));
+}
+
+void PartitionMonitorTest::testIsFull()
+{
+ PartitionMonitor monitor("testrunner.cpp");
+ monitor.setMaxFillness(0.85);
+ FakeStatter* statter = new FakeStatter();
+ monitor.setStatOncePolicy();
+ monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter));
+
+ CPPUNIT_ASSERT_EQUAL(55, (int) (100 * monitor.getFillRate()));
+ CPPUNIT_ASSERT(!monitor.isFull());
+ monitor.addingData(512 * 1024);
+ CPPUNIT_ASSERT_EQUAL(69, (int) (100 * monitor.getFillRate()));
+ CPPUNIT_ASSERT(!monitor.isFull());
+ monitor.addingData(600 * 1024);
+ CPPUNIT_ASSERT_EQUAL(86, (int) (100 * monitor.getFillRate()));
+ CPPUNIT_ASSERT(monitor.isFull());
+ monitor.removingData(32 * 1024);
+ CPPUNIT_ASSERT_EQUAL(85, (int) (100 * monitor.getFillRate()));
+ CPPUNIT_ASSERT(monitor.isFull());
+ monitor.removingData(32 * 1024);
+ CPPUNIT_ASSERT_EQUAL(84, (int) (100 * monitor.getFillRate()));
+ CPPUNIT_ASSERT(!monitor.isFull());
+}
+
+}
+
+} // storage
diff --git a/memfilepersistence/src/tests/init/.gitignore b/memfilepersistence/src/tests/init/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/tests/init/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/tests/init/CMakeLists.txt b/memfilepersistence/src/tests/init/CMakeLists.txt
new file mode 100644
index 00000000000..ebc4738a8c4
--- /dev/null
+++ b/memfilepersistence/src/tests/init/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_testinit
+ SOURCES
+ filescannertest.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/tests/init/filescannertest.cpp b/memfilepersistence/src/tests/init/filescannertest.cpp
new file mode 100644
index 00000000000..8b49a21dad0
--- /dev/null
+++ b/memfilepersistence/src/tests/init/filescannertest.cpp
@@ -0,0 +1,492 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/bucket/bucketid.h>
+#include <iomanip>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/memfilepersistence/init/filescanner.h>
+#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h>
+#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h>
+#include <vespa/storageframework/defaultimplementation/clock/realclock.h>
+#include <vespa/vdslib/state/nodestate.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/random.h>
+#include <sys/errno.h>
+
+namespace storage {
+namespace memfile {
+
+struct FileScannerTest : public CppUnit::TestFixture {
+ struct TestParameters {
+ uint32_t filesPerDisk;
+ uint32_t diskCount;
+ uint32_t bucketSplitBits;
+ uint32_t dirLevels;
+ uint32_t dirSpread;
+ uint32_t parts;
+ std::set<uint32_t> disksDown;
+ bool diskDownWithBrokenSymlink;
+ bool bucketWrongDir;
+ bool bucketMultipleDirs;
+ bool bucketMultipleDisks;
+ bool addTemporaryFiles;
+ bool addAlienFiles;
+ bool dirWithNoListPermission;
+ bool dirWithNoWritePermission;
+ bool dirWithNoExecutePermission;
+ bool fileWithNoReadPermission;
+ bool fileWithNoWritePermission;
+
+ TestParameters()
+ : filesPerDisk(10), diskCount(5), bucketSplitBits(20),
+ dirLevels(1), dirSpread(16), parts(1), disksDown(),
+ diskDownWithBrokenSymlink(false),
+ bucketWrongDir(false), bucketMultipleDirs(false),
+ bucketMultipleDisks(false),
+ addTemporaryFiles(false), addAlienFiles(false),
+ dirWithNoListPermission(false),
+ dirWithNoWritePermission(false),
+ dirWithNoExecutePermission(false),
+ fileWithNoReadPermission(false),
+ fileWithNoWritePermission(false) {}
+ void addAllComplexities() {
+ disksDown.insert(0);
+ disksDown.insert(2);
+ disksDown.insert(4);
+ bucketWrongDir = true;
+ bucketMultipleDirs = true;
+ bucketMultipleDisks = true;
+ parts = 7;
+ addTemporaryFiles = true;
+ addAlienFiles = true;
+ dirWithNoWritePermission = true;
+ fileWithNoWritePermission = true;
+ fileWithNoReadPermission = true;
+ }
+ };
+
+ void testNormalUsage() {
+ TestParameters params;
+ runTest(params);
+ }
+ void testMultipleParts() {
+ TestParameters params;
+ params.parts = 3;
+ runTest(params);
+ }
+ void testBucketInWrongDirectory() {
+ TestParameters params;
+ params.bucketWrongDir = true;
+ runTest(params);
+ }
+ void testBucketInMultipleDirectories() {
+ TestParameters params;
+ params.bucketMultipleDirs = true;
+ runTest(params);
+ }
+ void testZeroDirLevel() {
+ TestParameters params;
+ params.dirLevels = 0;
+ runTest(params);
+ }
+ void testSeveralDirLevels() {
+ TestParameters params;
+ params.dirLevels = 3;
+ runTest(params);
+ }
+ void testNonStandardDirSpread() {
+ TestParameters params;
+ params.dirSpread = 63;
+ runTest(params);
+ }
+ void testDiskDown() {
+ TestParameters params;
+ params.disksDown.insert(1);
+ runTest(params);
+ }
+ void testDiskDownBrokenSymlink() {
+ TestParameters params;
+ params.disksDown.insert(1);
+ params.disksDown.insert(3);
+ params.diskDownWithBrokenSymlink = true;
+ runTest(params);
+ }
+ void testRemoveTemporaryFile() {
+ TestParameters params;
+ params.addTemporaryFiles = true;
+ runTest(params);
+ }
+ void testAlienFile() {
+ TestParameters params;
+ params.addAlienFiles = true;
+ runTest(params);
+ }
+ void testUnlistableDirectory() {
+ TestParameters params;
+ params.dirWithNoListPermission = true;
+ runTest(params);
+ }
+ void testDirWithNoWritePermission() {
+ TestParameters params;
+ params.dirWithNoWritePermission = true;
+ runTest(params);
+ }
+ void testDirWithNoExecutePermission() {
+ TestParameters params;
+ params.dirWithNoWritePermission = true;
+ runTest(params);
+ }
+ void testFileWithNoReadPermission() {
+ TestParameters params;
+ params.bucketWrongDir = true;
+ params.fileWithNoReadPermission = true;
+ runTest(params);
+ }
+ void testFileWithNoWritePermission() {
+ TestParameters params;
+ params.bucketWrongDir = true;
+ params.fileWithNoWritePermission = true;
+ runTest(params);
+ }
+ void testAllFailuresCombined() {
+ TestParameters params;
+ params.addAllComplexities();
+ runTest(params);
+ }
+
+ CPPUNIT_TEST_SUITE(FileScannerTest);
+ CPPUNIT_TEST(testNormalUsage);
+ CPPUNIT_TEST(testMultipleParts);
+ CPPUNIT_TEST(testBucketInWrongDirectory);
+ CPPUNIT_TEST(testBucketInMultipleDirectories);
+ CPPUNIT_TEST(testZeroDirLevel);
+ CPPUNIT_TEST(testSeveralDirLevels);
+ CPPUNIT_TEST(testNonStandardDirSpread);
+ CPPUNIT_TEST(testDiskDown);
+ CPPUNIT_TEST(testDiskDownBrokenSymlink);
+ CPPUNIT_TEST(testRemoveTemporaryFile);
+ CPPUNIT_TEST(testAlienFile);
+ CPPUNIT_TEST(testUnlistableDirectory);
+ CPPUNIT_TEST(testDirWithNoWritePermission);
+ CPPUNIT_TEST(testDirWithNoExecutePermission);
+ CPPUNIT_TEST(testFileWithNoReadPermission);
+ CPPUNIT_TEST(testFileWithNoWritePermission);
+ CPPUNIT_TEST(testAllFailuresCombined);
+ CPPUNIT_TEST_SUITE_END();
+
+ // Actual implementation of the tests.
+
+ /** Run a console command and fail test if it fails. */
+ void run(std::string cmd);
+
+ /** Struct containing metadata for a single bucket. */
+ struct BucketData {
+ document::BucketId bucket;
+ uint32_t disk;
+ std::vector<uint32_t> directory;
+ bool shouldExist; // Set to false for buckets that won't exist due to
+ // some failure.
+
+ BucketData() : shouldExist(true) {}
+
+ bool sameDir(BucketData& other) const {
+ return (disk == other.disk && directory == other.directory);
+ }
+ };
+
+ /**
+ * Create an overview of the buckets we're gonna use in the test.
+ * (Without any failures introduced)
+ */
+ std::vector<BucketData> createBuckets(const TestParameters& params);
+
+ /**
+ * Create the data in the bucket map and introduce the failures specified
+ * in the test. Mark buckets in bucket list that won't exist due to the
+ * failures so we know how to verify result of test.
+ */
+ void createData(const TestParameters&, std::vector<BucketData>& buckets,
+ std::vector<std::string>& tempFiles,
+ std::vector<std::string>& alienFiles);
+
+ /**
+ * Run a test with a given set of parameters, calling createData to set up
+ * the data, and then using a file scanner to actually list the files.
+ */
+ void runTest(const TestParameters&);
+
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(FileScannerTest);
+
+void
+FileScannerTest::run(std::string cmd)
+{
+ int result = system(cmd.c_str());
+ if (result != 0) {
+ CPPUNIT_FAIL("Failed to run command '" + cmd + "'.");
+ }
+}
+
+std::vector<FileScannerTest::BucketData>
+FileScannerTest::createBuckets(const TestParameters& params)
+{
+ std::vector<BucketData> buckets;
+ BucketDirectoryMapper dirMapper(params.dirLevels, params.dirSpread);
+ for (uint32_t i=0; i<params.diskCount; ++i) {
+ if (params.disksDown.find(i) != params.disksDown.end()) {
+ continue;
+ }
+ for (uint32_t j=0; j<params.filesPerDisk; ++j) {
+ BucketData data;
+ data.bucket = document::BucketId(params.bucketSplitBits,
+ params.filesPerDisk * i + j);
+ data.disk = i;
+ data.directory = dirMapper.getPath(data.bucket);
+ buckets.push_back(data);
+ }
+ }
+ return buckets;
+}
+
+void
+FileScannerTest::createData(const TestParameters& params,
+ std::vector<BucketData>& buckets,
+ std::vector<std::string>& tempFiles,
+ std::vector<std::string>& alienFiles)
+{
+ if (params.bucketWrongDir) {
+ CPPUNIT_ASSERT(params.dirLevels > 0);
+ buckets[0].directory[0] = (buckets[0].directory[0] + 1)
+ % params.dirSpread;
+ }
+ if (params.bucketMultipleDirs) {
+ CPPUNIT_ASSERT(params.dirLevels > 0);
+ BucketData copy(buckets[1]);
+ copy.directory[0] = (buckets[1].directory[0] + 1) % params.dirSpread;
+ buckets.push_back(copy);
+ }
+ if (params.bucketMultipleDisks && params.dirLevels > 0) {
+ BucketData copy(buckets[2]);
+ uint32_t disk = 0;
+ for (; disk<params.diskCount; ++disk) {
+ if (disk == copy.disk) continue;
+ if (params.disksDown.find(disk) == params.disksDown.end()) break;
+ }
+ CPPUNIT_ASSERT(disk < params.diskCount);
+ copy.disk = disk;
+ buckets.push_back(copy);
+ }
+
+ run("mkdir -p vdsroot");
+ run("chmod -R a+rwx vdsroot");
+ run("rm -rf vdsroot");
+ run("mkdir -p vdsroot/disks");
+ vespalib::RandomGen randomizer;
+ uint32_t diskToHaveBrokenSymlink = (params.disksDown.empty()
+ ? 0 : randomizer.nextUint32(0, params.disksDown.size()));
+ uint32_t downIndex = 0;
+ for (uint32_t i=0; i<params.diskCount; ++i) {
+ if (params.disksDown.find(i) != params.disksDown.end()) {
+ if (downIndex++ == diskToHaveBrokenSymlink
+ && params.diskDownWithBrokenSymlink)
+ {
+ std::ostringstream path;
+ path << "vdsroot/disks/d" << i;
+ run("ln -s /non-existing-dir " + path.str());
+ }
+ } else {
+ std::ostringstream path;
+ path << "vdsroot/disks/d" << i;
+ run("mkdir -p " + path.str());
+ std::ofstream of((path.str() + "/chunkinfo").c_str());
+ of << "#chunkinfo\n" << i << "\n" << params.diskCount << "\n";
+ }
+ }
+ for (uint32_t i=0; i<buckets.size(); ++i) {
+ if (!buckets[i].shouldExist) continue;
+ std::ostringstream path;
+ path << "vdsroot/disks/d" << buckets[i].disk << std::hex;
+ for (uint32_t j=0; j<buckets[i].directory.size(); ++j) {
+ path << '/' << std::setw(4) << std::setfill('0')
+ << buckets[i].directory[j];
+ }
+ run("mkdir -p " + path.str());
+ if (params.dirWithNoListPermission && i == 8) {
+ run("chmod a-r " + path.str());
+ // Scanner will abort with exception, so we don't really know
+ // how many docs will not be found due to this.
+ continue;
+ }
+ if (params.dirWithNoExecutePermission && i == 9) {
+ run("chmod a-x " + path.str());
+ // Scanner will abort with exception, so we don't really know
+ // how many docs will not be found due to this.
+ continue;
+ }
+ path << '/' << std::setw(16) << std::setfill('0')
+ << buckets[i].bucket.getId() << ".0";
+ run("touch " + path.str());
+ if (params.addTemporaryFiles && i == 4) {
+ run("touch " + path.str() + ".tmp");
+ tempFiles.push_back(path.str() + ".tmp");
+ }
+ if (params.addAlienFiles && i == 6) {
+ run("touch " + path.str() + ".alien");
+ alienFiles.push_back(path.str() + ".alien");
+ }
+ if (params.fileWithNoWritePermission && i == 0) {
+ // Overlapping with wrong dir so it would want to move file
+ run("chmod a-w " + path.str());
+ }
+ if (params.fileWithNoReadPermission && i == 0) {
+ // Overlapping with wrong dir so it would want to move file
+ run("chmod a-r " + path.str());
+ }
+ if (params.dirWithNoWritePermission && i == 9) {
+ run("chmod a-w " + path.str());
+ }
+ }
+}
+
+namespace {
+ struct BucketDataFound {
+ uint16_t _disk;
+ bool _checked;
+
+ BucketDataFound() : _disk(65535), _checked(false) {}
+ BucketDataFound(uint32_t disk) : _disk(disk), _checked(false) {}
+ };
+}
+
+void
+FileScannerTest::runTest(const TestParameters& params)
+{
+ std::vector<BucketData> buckets(createBuckets(params));
+ std::vector<std::string> tempFiles;
+ std::vector<std::string> alienFiles;
+ createData(params, buckets, tempFiles, alienFiles);
+
+ framework::defaultimplementation::RealClock clock;
+ framework::defaultimplementation::ComponentRegisterImpl compReg;
+ compReg.setClock(clock);
+
+ MountPointList mountPoints("./vdsroot",
+ std::vector<vespalib::string>(),
+ vespalib::LinkedPtr<DeviceManager>(
+ new DeviceManager(
+ DeviceMapper::UP(new SimpleDeviceMapper),
+ clock)));
+ mountPoints.init(params.diskCount);
+
+ FileScanner scanner(compReg, mountPoints,
+ params.dirLevels, params.dirSpread);
+ std::map<document::BucketId, BucketDataFound> foundBuckets;
+ uint32_t extraBucketsSameDisk = 0;
+ uint32_t extraBucketsOtherDisk = 0;
+ for (uint32_t j=0; j<params.diskCount; ++j) {
+ // std::cerr << "Disk " << j << "\n";
+ if (params.disksDown.find(j) != params.disksDown.end()) continue;
+ for (uint32_t i=0; i<params.parts; ++i) {
+ document::BucketId::List bucketList;
+ try{
+ scanner.buildBucketList(bucketList, j, i, params.parts);
+ for (uint32_t k=0; k<bucketList.size(); ++k) {
+ if (foundBuckets.find(bucketList[k]) != foundBuckets.end())
+ {
+ if (j == foundBuckets[bucketList[k]]._disk) {
+ ++extraBucketsSameDisk;
+ } else {
+ ++extraBucketsOtherDisk;
+ }
+// std::cerr << "Bucket " << bucketList[k]
+// << " on disk " << j << " is already found on disk "
+// << foundBuckets[bucketList[k]]._disk << ".\n";
+ }
+ foundBuckets[bucketList[k]] = BucketDataFound(j);
+ }
+ } catch (vespalib::IoException& e) {
+ if (!(params.dirWithNoListPermission
+ && e.getType() == vespalib::IoException::NO_PERMISSION))
+ {
+ throw;
+ }
+ }
+ }
+ }
+ std::vector<BucketData> notFound;
+ std::vector<BucketData> wasFound;
+ std::vector<BucketDataFound> foundNonExisting;
+ // Verify that found buckets match buckets expected.
+ for (uint32_t i=0; i<buckets.size(); ++i) {
+ std::map<document::BucketId, BucketDataFound>::iterator found(
+ foundBuckets.find(buckets[i].bucket));
+ if (buckets[i].shouldExist && found == foundBuckets.end()) {
+ notFound.push_back(buckets[i]);
+ } else if (!buckets[i].shouldExist && found != foundBuckets.end()) {
+ wasFound.push_back(buckets[i]);
+ }
+ if (found != foundBuckets.end()) { found->second._checked = true; }
+ }
+ for (std::map<document::BucketId, BucketDataFound>::iterator it
+ = foundBuckets.begin(); it != foundBuckets.end(); ++it)
+ {
+ if (!it->second._checked) {
+ foundNonExisting.push_back(it->second);
+ }
+ }
+ if (params.dirWithNoListPermission) {
+ CPPUNIT_ASSERT(!notFound.empty());
+ } else if (!notFound.empty()) {
+ std::ostringstream ost;
+ ost << "Failed to find " << notFound.size() << " of "
+ << buckets.size() << " buckets. Including buckets:";
+ for (uint32_t i=0; i<5 && i<notFound.size(); ++i) {
+ ost << " " << notFound[i].bucket;
+ }
+ CPPUNIT_FAIL(ost.str());
+ }
+ CPPUNIT_ASSERT(wasFound.empty());
+ CPPUNIT_ASSERT(foundNonExisting.empty());
+ if (params.bucketMultipleDirs) {
+ // TODO: Test something else here? This is not correct test, as when
+ // there are two buckets on the same disk, one of them will be ignored by
+ // the bucket lister.
+ // CPPUNIT_ASSERT_EQUAL(1u, extraBucketsSameDisk);
+ } else {
+ CPPUNIT_ASSERT_EQUAL(0u, extraBucketsSameDisk);
+ }
+ if (params.bucketMultipleDisks) {
+ CPPUNIT_ASSERT_EQUAL(1u, extraBucketsOtherDisk);
+ } else {
+ CPPUNIT_ASSERT_EQUAL(0u, extraBucketsOtherDisk);
+ }
+ if (params.addTemporaryFiles) {
+ CPPUNIT_ASSERT_EQUAL(
+ 1, int(scanner.getMetrics()._temporaryFilesDeleted.getValue()));
+ } else {
+ CPPUNIT_ASSERT_EQUAL(
+ 0, int(scanner.getMetrics()._temporaryFilesDeleted.getValue()));
+ }
+ if (params.addAlienFiles) {
+ CPPUNIT_ASSERT_EQUAL(
+ 1, int(scanner.getMetrics()._alienFileCounter.getValue()));
+ } else {
+ CPPUNIT_ASSERT_EQUAL(
+ 0, int(scanner.getMetrics()._alienFileCounter.getValue()));
+ }
+ // We automatically delete temporary files (created by VDS, indicating
+ // an operation that only half finished.
+ for (uint32_t i=0; i<tempFiles.size(); ++i) {
+ CPPUNIT_ASSERT_MSG(tempFiles[i], !vespalib::fileExists(tempFiles[i]));
+ }
+ // We don't automatically delete alien files
+ for (uint32_t i=0; i<alienFiles.size(); ++i) {
+ CPPUNIT_ASSERT_MSG(alienFiles[i], vespalib::fileExists(alienFiles[i]));
+ }
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/mapper/.gitignore b/memfilepersistence/src/tests/mapper/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/memfilepersistence/src/tests/mapper/.gitignore
diff --git a/memfilepersistence/src/tests/spi/.gitignore b/memfilepersistence/src/tests/spi/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/tests/spi/CMakeLists.txt b/memfilepersistence/src/tests/spi/CMakeLists.txt
new file mode 100644
index 00000000000..d5dade96f57
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_testspi
+ SOURCES
+ memfiletestutils.cpp
+ providerconformancetest.cpp
+ memfilev1serializertest.cpp
+ memfilev1verifiertest.cpp
+ basicoperationhandlertest.cpp
+ splitoperationhandlertest.cpp
+ joinoperationhandlertest.cpp
+ iteratorhandlertest.cpp
+ memfiletest.cpp
+ memcachetest.cpp
+ simplememfileiobuffertest.cpp
+ memfileautorepairtest.cpp
+ shared_data_location_tracker_test.cpp
+ buffered_file_writer_test.cpp
+ buffer_test.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp
new file mode 100644
index 00000000000..2f7913b0e1f
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp
@@ -0,0 +1,735 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/simulatedfailurefile.h>
+#include <tests/spi/options_builder.h>
+#include <vespa/document/fieldset/fieldsetrepo.h>
+#include <vespa/document/fieldset/fieldsets.h>
+
+namespace storage {
+namespace memfile {
+namespace {
+ spi::LoadType defaultLoadType(0, "default");
+}
+
+class BasicOperationHandlerTest : public SingleDiskMemFileTestUtils
+{
+ CPPUNIT_TEST_SUITE(BasicOperationHandlerTest);
+ CPPUNIT_TEST(testGetHeaderOnly);
+ CPPUNIT_TEST(testGetFieldFiltering);
+ CPPUNIT_TEST(testRemove);
+ CPPUNIT_TEST(testRemoveWithNonMatchingTimestamp);
+ CPPUNIT_TEST(testRemoveWithNonMatchingTimestampAlwaysPersist);
+ CPPUNIT_TEST(testRemoveForExistingRemoveSameTimestamp);
+ CPPUNIT_TEST(testRemoveForExistingRemoveNewTimestamp);
+ CPPUNIT_TEST(testRemoveForExistingRemoveNewTimestampAlwaysPersist);
+ CPPUNIT_TEST(testRemoveDocumentNotFound);
+ CPPUNIT_TEST(testRemoveDocumentNotFoundAlwaysPersist);
+ CPPUNIT_TEST(testRemoveExistingOlderDocumentVersion);
+ CPPUNIT_TEST(testPutSameTimestampAsRemove);
+ CPPUNIT_TEST(testUpdateBody);
+ CPPUNIT_TEST(testUpdateHeaderOnly);
+ CPPUNIT_TEST(testUpdateTimestampExists);
+ CPPUNIT_TEST(testUpdateForNonExistentDocWillFail);
+ CPPUNIT_TEST(testUpdateMayCreateDoc);
+ CPPUNIT_TEST(testRemoveEntry);
+ CPPUNIT_TEST(testEraseFromCacheOnFlushException);
+ CPPUNIT_TEST(testEraseFromCacheOnMaintainException);
+ CPPUNIT_TEST(testEraseFromCacheOnDeleteBucketException);
+ CPPUNIT_TEST_SUITE_END();
+
+ void doTestRemoveDocumentNotFound(
+ OperationHandler::RemoveType persistRemove);
+ void doTestRemoveWithNonMatchingTimestamp(
+ OperationHandler::RemoveType persistRemove);
+ void doTestRemoveForExistingRemoveNewTimestamp(
+ OperationHandler::RemoveType persistRemove);
+public:
+ void setupTestConfig();
+ void testPutHeadersOnly();
+ void testPutHeadersOnlyDocumentNotFound();
+ void testPutHeadersOnlyTimestampNotFound();
+ void testGetHeaderOnly();
+ void testGetFieldFiltering();
+ void testRemove();
+ void testRemoveWithNonMatchingTimestamp();
+ void testRemoveWithNonMatchingTimestampAlwaysPersist();
+ void testRemoveForExistingRemoveSameTimestamp();
+ void testRemoveForExistingRemoveNewTimestamp();
+ void testRemoveForExistingRemoveNewTimestampAlwaysPersist();
+ void testRemoveDocumentNotFound();
+ void testRemoveDocumentNotFoundAlwaysPersist();
+ void testRemoveExistingOlderDocumentVersion();
+ void testPutSameTimestampAsRemove();
+ void testUpdateBody();
+ void testUpdateHeaderOnly();
+ void testUpdateTimestampExists();
+ void testUpdateForNonExistentDocWillFail();
+ void testUpdateMayCreateDoc();
+ void testRemoveEntry();
+ void testEraseFromCacheOnFlushException();
+ void testEraseFromCacheOnMaintainException();
+ void testEraseFromCacheOnDeleteBucketException();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(BasicOperationHandlerTest);
+
+/**
+ * Test that doing a header-only get gives back a document containing
+ * only the document header
+ */
+void
+BasicOperationHandlerTest::testGetHeaderOnly()
+{
+ document::BucketId bucketId(16, 4);
+
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ doc->setValue(doc->getField("hstringval"), document::StringFieldValue("hypnotoad"));
+ doc->setValue(doc->getField("headerval"), document::IntFieldValue(42));
+
+ doPut(doc, bucketId, Timestamp(4567), 0);
+ flush(bucketId);
+
+ spi::GetResult reply = doGet(bucketId, doc->getId(), document::HeaderFields());
+
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, reply.getErrorCode());
+ CPPUNIT_ASSERT(reply.hasDocument());
+ CPPUNIT_ASSERT_EQUAL(std::string("headerval: 42\nhstringval: hypnotoad\n"),
+ stringifyFields(reply.getDocument()));
+ CPPUNIT_ASSERT_EQUAL(
+ size_t(1),
+ getPersistenceProvider().getMetrics().headerOnlyGets.getValue());
+}
+
+void
+BasicOperationHandlerTest::testGetFieldFiltering()
+{
+ document::BucketId bucketId(16, 4);
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ doc->setValue(doc->getField("headerval"), document::IntFieldValue(42));
+ doc->setValue(doc->getField("hstringval"),
+ document::StringFieldValue("groovy"));
+
+ document::FieldSetRepo repo;
+
+ doPut(doc, bucketId, Timestamp(4567), 0);
+ flush(bucketId);
+ spi::GetResult reply(doGet(bucketId,
+ doc->getId(),
+ *repo.parse(*getTypeRepo(), "testdoctype1:hstringval")));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, reply.getErrorCode());
+ CPPUNIT_ASSERT(reply.hasDocument());
+ CPPUNIT_ASSERT_EQUAL(std::string("hstringval: groovy\n"),
+ stringifyFields(reply.getDocument()));
+ CPPUNIT_ASSERT_EQUAL(
+ size_t(1),
+ getPersistenceProvider().getMetrics().headerOnlyGets.getValue());
+}
+
+void
+BasicOperationHandlerTest::testRemove()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+
+ document::Document::SP doc = doPut(4, Timestamp(1));
+
+ CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(2),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+
+ getPersistenceProvider().flush(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+
+ env()._cache.clear();
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL));
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(2), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT((*file)[1].deleted());
+ CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), (*file)[1].getLocation(BODY));
+ CPPUNIT_ASSERT_EQUAL((*file)[0].getLocation(HEADER),
+ (*file)[1].getLocation(HEADER));
+}
+
+/**
+ * Test that removing a document with a max timestamp for which there
+ * is no matching document does not add a remove slot to the memfile
+ */
+void
+BasicOperationHandlerTest::doTestRemoveWithNonMatchingTimestamp(
+ OperationHandler::RemoveType persistRemove)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+
+ CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1233),
+ persistRemove));
+
+ getPersistenceProvider().flush(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(
+ uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE
+ ? 2 : 1),
+ file->getSlotCount());
+
+ int i = 0;
+ if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) {
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1233), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT((*file)[0].deleted());
+ CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), (*file)[0].getLocation(BODY));
+ CPPUNIT_ASSERT((*file)[0].getLocation(HEADER)
+ != (*file)[1].getLocation(HEADER));
+ CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[0]));
+ ++i;
+ }
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[i].getTimestamp());
+ CPPUNIT_ASSERT(!(*file)[i].deleted());
+ CPPUNIT_ASSERT(file->getDocument((*file)[i], ALL)->getValue("content").get());
+}
+
+/**
+ * Test that removing a document with a max timestamp for which there
+ * is no matching document does not add a remove slot to the memfile
+ */
+void
+BasicOperationHandlerTest::testRemoveWithNonMatchingTimestamp()
+{
+ doTestRemoveWithNonMatchingTimestamp(
+ OperationHandler::PERSIST_REMOVE_IF_FOUND);
+}
+
+void
+BasicOperationHandlerTest::testRemoveWithNonMatchingTimestampAlwaysPersist()
+{
+ doTestRemoveWithNonMatchingTimestamp(
+ OperationHandler::ALWAYS_PERSIST_REMOVE);
+}
+
+/**
+ * Test that doing a remove with a timestamp for which there already
+ * exists a remove does not add another remove slot
+ */
+void
+BasicOperationHandlerTest::testRemoveForExistingRemoveSameTimestamp()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+
+ CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1235),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+ CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1235),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+
+ getPersistenceProvider().flush(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+
+ // Should only be one remove entry still
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get());
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT((*file)[1].deleted());
+}
+
+void
+BasicOperationHandlerTest::doTestRemoveForExistingRemoveNewTimestamp(
+ OperationHandler::RemoveType persistRemove)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+
+ CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1235),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+ CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1236),
+ persistRemove));
+
+ getPersistenceProvider().flush(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(
+ uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE
+ ? 3 : 2),
+ file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get());
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT((*file)[1].deleted());
+
+ if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) {
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1236), (*file)[2].getTimestamp());
+ CPPUNIT_ASSERT((*file)[2].deleted());
+ }
+}
+
+/**
+ * Test that doing a second remove with a newer timestamp does not add
+ * another remove slot when PERSIST_REMOVE_IF_FOUND is specified
+ */
+void
+BasicOperationHandlerTest::testRemoveForExistingRemoveNewTimestamp()
+{
+ doTestRemoveForExistingRemoveNewTimestamp(
+ OperationHandler::PERSIST_REMOVE_IF_FOUND);
+}
+
+void
+BasicOperationHandlerTest::testRemoveForExistingRemoveNewTimestampAlwaysPersist()
+{
+ doTestRemoveForExistingRemoveNewTimestamp(
+ OperationHandler::ALWAYS_PERSIST_REMOVE);
+}
+
+/**
+ * Test removing an older version of a document. Older version should be removed
+ * in-place without attempting to add a new slot (which would fail).
+ */
+void
+BasicOperationHandlerTest::testRemoveExistingOlderDocumentVersion()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+
+ CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1235),
+ OperationHandler::ALWAYS_PERSIST_REMOVE));
+
+ getPersistenceProvider().flush(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+
+ CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1234),
+ OperationHandler::ALWAYS_PERSIST_REMOVE));
+
+ getPersistenceProvider().flush(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+
+ // Should now be two remove entries.
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[0]));
+ CPPUNIT_ASSERT((*file)[0].deleted());
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[1]));
+ CPPUNIT_ASSERT((*file)[1].deleted());
+}
+
+void
+BasicOperationHandlerTest::doTestRemoveDocumentNotFound(
+ OperationHandler::RemoveType persistRemove)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+ document::DocumentId docId("userdoc:test:4:0");
+ doPut(4, Timestamp(1234));
+
+ CPPUNIT_ASSERT_EQUAL(false,
+ doRemove(bucketId,
+ docId,
+ Timestamp(1235),
+ persistRemove));
+
+ getPersistenceProvider().flush(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(
+ uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE
+ ? 2 : 1),
+ file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp());
+ if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) {
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT((*file)[1].deleted());
+ CPPUNIT_ASSERT_EQUAL(docId, file->getDocumentId((*file)[1]));
+ }
+/* TODO: Test this in service layer tests.
+ CPPUNIT_ASSERT_EQUAL(
+ uint64_t(1),
+ env()._metrics.remove[documentapi::LoadType::DEFAULT].notFound.getValue());
+*/
+}
+
+/**
+ * Test that removing a non-existing document when PERSIST_EXISTING_ONLY is
+ * specified does not add a remove entry
+ */
+void
+BasicOperationHandlerTest::testRemoveDocumentNotFound()
+{
+ doTestRemoveDocumentNotFound(
+ OperationHandler::PERSIST_REMOVE_IF_FOUND);
+}
+
+void
+BasicOperationHandlerTest::testRemoveDocumentNotFoundAlwaysPersist()
+{
+ doTestRemoveDocumentNotFound(
+ OperationHandler::ALWAYS_PERSIST_REMOVE);
+}
+
+void
+BasicOperationHandlerTest::testPutSameTimestampAsRemove()
+{
+ document::BucketId bucketId(16, 4);
+
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+
+ CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId,
+ doc->getId(),
+ Timestamp(1235),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+
+ // Flush here to avoid put+remove being thrown away by duplicate timestamp
+ // exception evicting the cache and unpersisted changes.
+ flush(bucketId);
+
+ doPut(4, Timestamp(1235));
+ flush(bucketId);
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get());
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT((*file)[1].deleted());
+}
+
+/**
+ * Test that updating body results in a new memfile slot containing
+ * an updated document
+ */
+void
+BasicOperationHandlerTest::testUpdateBody()
+{
+ document::BucketId bucketId(16, 4);
+ document::StringFieldValue updateValue("foo");
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+ document::Document originalDoc(*doc);
+
+ document::DocumentUpdate::SP update = createBodyUpdate(
+ doc->getId(), updateValue);
+
+ spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(5678));
+ flush(bucketId);
+ CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp());
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get());
+ CPPUNIT_ASSERT_EQUAL(*(originalDoc.getValue("content")),
+ *file->getDocument((*file)[0], ALL)->getValue("content"));
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(5678), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT(file->getDocument((*file)[1], ALL)->getValue("content").get());
+ CPPUNIT_ASSERT_EQUAL(updateValue,
+ dynamic_cast<document::StringFieldValue&>(
+ *file->getDocument((*file)[1], ALL)->getValue(
+ "content")));
+ CPPUNIT_ASSERT_EQUAL(
+ size_t(0),
+ getPersistenceProvider().getMetrics().headerOnlyUpdates.getValue());
+}
+
+void
+BasicOperationHandlerTest::testUpdateHeaderOnly()
+{
+ document::BucketId bucketId(16, 4);
+ document::IntFieldValue updateValue(42);
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+
+ document::DocumentUpdate::SP update = createHeaderUpdate(
+ doc->getId(), updateValue);
+
+ spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(5678));
+ flush(bucketId);
+ CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp());
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("headerval").get() ==
+ NULL);
+
+ CPPUNIT_ASSERT_EQUAL(Timestamp(5678), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT(file->getDocument((*file)[1], ALL)->getValue("headerval").get());
+ CPPUNIT_ASSERT_EQUAL(updateValue,
+ dynamic_cast<document::IntFieldValue&>(
+ *file->getDocument((*file)[1], ALL)->getValue(
+ "headerval")));
+ CPPUNIT_ASSERT_EQUAL(
+ size_t(1),
+ getPersistenceProvider().getMetrics().headerOnlyUpdates.getValue());
+}
+
+void
+BasicOperationHandlerTest::testUpdateTimestampExists()
+{
+ document::BucketId bucketId(16, 4);
+ document::IntFieldValue updateValue(42);
+ document::Document::SP doc = doPut(4, Timestamp(1234));
+
+ document::DocumentUpdate::SP update = createHeaderUpdate(
+ doc->getId(), updateValue);
+
+ spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(1234));
+ flush(bucketId);
+ CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, result.getErrorCode());
+}
+
+void
+BasicOperationHandlerTest::testUpdateForNonExistentDocWillFail()
+{
+ document::BucketId bucketId(16, 4);
+ document::IntFieldValue updateValue(42);
+ Timestamp timestamp(5678);
+
+ // Is there an easier way to get a DocumentId?
+ document::Document::UP doc(
+ createRandomDocumentAtLocation(4, timestamp.getTime()));
+ const DocumentId& documentId = doc->getId();
+
+ document::DocumentUpdate::SP update = createHeaderUpdate(
+ documentId, updateValue);
+
+ spi::UpdateResult result = doUpdate(bucketId, update, timestamp);
+ flush(bucketId);
+ CPPUNIT_ASSERT_EQUAL(0, (int)result.getExistingTimestamp());
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(0), file->getSlotCount());
+}
+
+void
+BasicOperationHandlerTest::testUpdateMayCreateDoc()
+{
+ document::BucketId bucketId(16, 4);
+ document::IntFieldValue updateValue(42);
+ Timestamp timestamp(5678);
+
+ // Is there an easier way to get a DocumentId?
+ document::Document::UP doc(
+ createRandomDocumentAtLocation(4, timestamp.getTime()));
+ const DocumentId& documentId = doc->getId();
+
+ document::DocumentUpdate::SP update = createHeaderUpdate(
+ documentId, updateValue);
+ update->setCreateIfNonExistent(true);
+
+ spi::UpdateResult result = doUpdate(bucketId, update, timestamp);
+ flush(bucketId);
+ CPPUNIT_ASSERT_EQUAL(timestamp.getTime(),
+ (uint64_t)result.getExistingTimestamp());
+
+ MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(timestamp, (*file)[0].getTimestamp());
+
+ auto headerval = file->getDocument((*file)[0], ALL)->getValue("headerval");
+ CPPUNIT_ASSERT(headerval.get() != nullptr);
+ CPPUNIT_ASSERT_EQUAL(updateValue,
+ dynamic_cast<document::IntFieldValue&>(*headerval));
+}
+
+void
+BasicOperationHandlerTest::testRemoveEntry()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+
+ doPut(4, Timestamp(1234));
+ Document::SP doc = doPut(4, Timestamp(2345));
+ doPut(4, Timestamp(3456));
+
+ getPersistenceProvider().removeEntry(
+ spi::Bucket(bucketId, spi::PartitionId(0)),
+ spi::Timestamp(1234), context);
+ getPersistenceProvider().removeEntry(
+ spi::Bucket(bucketId, spi::PartitionId(0)),
+ spi::Timestamp(3456), context);
+ flush(bucketId);
+
+ memfile::MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(2345), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL));
+}
+
+void
+BasicOperationHandlerTest::setupTestConfig()
+{
+ using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig;
+ using MemFileConfigBuilder
+ = vespa::config::storage::StorMemfilepersistenceConfigBuilder;
+ MemFileConfigBuilder builder(
+ *env().acquireConfigReadLock().memFilePersistenceConfig());
+ builder.minimumFileMetaSlots = 2;
+ builder.minimumFileHeaderBlockSize = 3000;
+ auto newConfig = std::unique_ptr<MemFileConfig>(new MemFileConfig(builder));
+ env().acquireConfigWriteLock().setMemFilePersistenceConfig(
+ std::move(newConfig));
+}
+
+void
+BasicOperationHandlerTest::testEraseFromCacheOnFlushException()
+{
+ document::BucketId bucketId(16, 4);
+
+ setupTestConfig();
+
+ document::Document::SP doc(
+ createRandomDocumentAtLocation(4, 2345, 1024, 1024));
+ doPut(doc, bucketId, Timestamp(2345));
+ flush(bucketId);
+ // Must throw out cache to re-create lazyfile
+ env()._cache.clear();
+
+ env()._lazyFileFactory =
+ std::unique_ptr<Environment::LazyFileFactory>(
+ new SimulatedFailureLazyFile::Factory);
+
+ // Try partial write, followed by full rewrite
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < i+1; ++j) {
+ document::Document::SP doc2(
+ createRandomDocumentAtLocation(4, 4000 + j, 1500, 1500));
+ doPut(doc2, bucketId, Timestamp(4000 + j));
+ }
+ spi::Result result = flush(bucketId);
+ CPPUNIT_ASSERT(result.hasError());
+ CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write")
+ != vespalib::string::npos);
+
+ CPPUNIT_ASSERT(!env()._cache.contains(bucketId));
+
+ // Check that we still have first persisted put
+ memfile::MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(2345), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL));
+ }
+}
+
+void
+BasicOperationHandlerTest::testEraseFromCacheOnMaintainException()
+{
+ document::BucketId bucketId(16, 4);
+
+ setupTestConfig();
+
+ getFakeClock()._absoluteTime = framework::MicroSecTime(2000 * 1000000);
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options)
+ .revertTimePeriod(framework::MicroSecTime(100000ULL * 1000000))
+ .build());
+ // Put a doc twice to allow for revert time compaction to be done
+ document::Document::SP doc1(
+ createRandomDocumentAtLocation(4, 2345, 1024, 1024));
+ document::Document::SP doc2(
+ createRandomDocumentAtLocation(4, 2345, 1024, 1024));
+ doPut(doc1, bucketId, Timestamp(1000 * 1000000));
+ doPut(doc2, bucketId, Timestamp(1500 * 1000000));
+ flush(bucketId);
+ env()._cache.clear();
+
+ options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options)
+ .revertTimePeriod(framework::MicroSecTime(100ULL * 1000000))
+ .build());
+
+ env()._lazyFileFactory =
+ std::unique_ptr<Environment::LazyFileFactory>(
+ new SimulatedFailureLazyFile::Factory);
+
+ spi::Result result = getPersistenceProvider().maintain(
+ spi::Bucket(bucketId, spi::PartitionId(0)),
+ spi::HIGH);
+ CPPUNIT_ASSERT(result.hasError());
+ CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write")
+ != vespalib::string::npos);
+
+ CPPUNIT_ASSERT(!env()._cache.contains(bucketId));
+
+ // Check that we still have both persisted puts
+ memfile::MemFilePtr file(getMemFile(bucketId));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1000 * 1000000), (*file)[0].getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(*doc1, *file->getDocument((*file)[0], ALL));
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1500 * 1000000), (*file)[1].getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(*doc2, *file->getDocument((*file)[1], ALL));
+}
+
+void
+BasicOperationHandlerTest::testEraseFromCacheOnDeleteBucketException()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+ document::Document::SP doc(
+ createRandomDocumentAtLocation(4, 2345, 1024, 1024));
+ doPut(doc, bucketId, Timestamp(2345));
+ flush(bucketId);
+ env()._cache.clear();
+
+ SimulatedFailureLazyFile::Factory* factory(
+ new SimulatedFailureLazyFile::Factory);
+ factory->setReadOpsBeforeFailure(0);
+ env()._lazyFileFactory =
+ std::unique_ptr<Environment::LazyFileFactory>(factory);
+
+ // loadFile will fail
+ spi::Result result = getPersistenceProvider().deleteBucket(
+ spi::Bucket(bucketId, spi::PartitionId(0)), context);
+ CPPUNIT_ASSERT(result.hasError());
+ CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O read")
+ != vespalib::string::npos);
+
+ CPPUNIT_ASSERT(!env()._cache.contains(bucketId));
+
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/tests/spi/buffer_test.cpp b/memfilepersistence/src/tests/spi/buffer_test.cpp
new file mode 100644
index 00000000000..a2d917301fc
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/buffer_test.cpp
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/memfilepersistence/mapper/buffer.h>
+
+namespace storage {
+namespace memfile {
+
+class BufferTest : public CppUnit::TestFixture
+{
+public:
+ void getSizeReturnsInitiallyAllocatedSize();
+ void getSizeReturnsUnAlignedSizeForMMappedAllocs();
+ void resizeRetainsExistingDataWhenSizingUp();
+ void resizeRetainsExistingDataWhenSizingDown();
+ void bufferAddressIs512ByteAligned();
+
+ CPPUNIT_TEST_SUITE(BufferTest);
+ CPPUNIT_TEST(getSizeReturnsInitiallyAllocatedSize);
+ CPPUNIT_TEST(getSizeReturnsUnAlignedSizeForMMappedAllocs);
+ CPPUNIT_TEST(resizeRetainsExistingDataWhenSizingUp);
+ CPPUNIT_TEST(resizeRetainsExistingDataWhenSizingDown);
+ CPPUNIT_TEST(bufferAddressIs512ByteAligned);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(BufferTest);
+
+void
+BufferTest::getSizeReturnsInitiallyAllocatedSize()
+{
+ Buffer buf(1234);
+ CPPUNIT_ASSERT_EQUAL(size_t(1234), buf.getSize());
+}
+
+void
+BufferTest::getSizeReturnsUnAlignedSizeForMMappedAllocs()
+{
+ Buffer buf(vespalib::MMapAlloc::HUGEPAGE_SIZE + 1);
+ CPPUNIT_ASSERT_EQUAL(size_t(vespalib::MMapAlloc::HUGEPAGE_SIZE + 1),
+ buf.getSize());
+}
+
+void
+BufferTest::resizeRetainsExistingDataWhenSizingUp()
+{
+ std::string src = "hello world";
+ Buffer buf(src.size());
+ memcpy(buf.getBuffer(), src.data(), src.size());
+ buf.resize(src.size() * 2);
+ CPPUNIT_ASSERT_EQUAL(src.size() * 2, buf.getSize());
+ CPPUNIT_ASSERT_EQUAL(0, memcmp(buf.getBuffer(), src.data(), src.size()));
+}
+
+void
+BufferTest::resizeRetainsExistingDataWhenSizingDown()
+{
+ std::string src = "hello world";
+ Buffer buf(src.size());
+ memcpy(buf.getBuffer(), src.data(), src.size());
+ buf.resize(src.size() / 2);
+ CPPUNIT_ASSERT_EQUAL(src.size() / 2, buf.getSize());
+ CPPUNIT_ASSERT_EQUAL(0, memcmp(buf.getBuffer(), src.data(), src.size() / 2));
+}
+
+void
+BufferTest::bufferAddressIs512ByteAligned()
+{
+ Buffer buf(32);
+ CPPUNIT_ASSERT(reinterpret_cast<size_t>(buf.getBuffer()) % 512 == 0);
+}
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp b/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp
new file mode 100644
index 00000000000..b59e8a32258
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/memfilepersistence/mapper/bufferedfilewriter.h>
+#include <vespa/memfilepersistence/mapper/buffer.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+namespace storage {
+namespace memfile {
+
+class BufferedFileWriterTest : public CppUnit::TestFixture
+{
+public:
+ void noImplicitFlushingWhenDestructing();
+
+ CPPUNIT_TEST_SUITE(BufferedFileWriterTest);
+ CPPUNIT_TEST(noImplicitFlushingWhenDestructing);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(BufferedFileWriterTest);
+
+namespace {
+
+// Partial mock of vespalib::File. Unfortunately, there's currently no
+// base interface to implement so have to override a class that already has
+// implementation code present.
+class MockFile : public vespalib::File
+{
+public:
+ bool _didWrite;
+
+ MockFile(const std::string& filename)
+ : File(filename),
+ _didWrite(false)
+ {
+ }
+
+ void open(int flags, bool autoCreateDirectories) override {
+ (void) flags;
+ (void) autoCreateDirectories;
+ // Don't do anything here to prevent us from actually opening a file
+ // on disk.
+ }
+
+ off_t write(const void *buf, size_t bufsize, off_t offset) override {
+ (void) buf;
+ (void) bufsize;
+ (void) offset;
+ _didWrite = true;
+ return 0;
+ }
+};
+
+}
+
+void
+BufferedFileWriterTest::noImplicitFlushingWhenDestructing()
+{
+ MockFile file("foo");
+ {
+ Buffer buffer(1024);
+ BufferedFileWriter writer(file, buffer, buffer.getSize());
+ // Do a buffered write. This fits well within the buffer and should
+ // consequently not be immediately written out to the backing file.
+ writer.write("blarg", 5);
+ // Escape scope without having flushed anything.
+ }
+ // Since BufferedFileWriter is meant to be used with O_DIRECT files,
+ // flushing just implies writing rather than syncing (this is a half truth
+ // since you still sync directories etc to ensure metadata is written, but
+ // this constrained assumption works fine in the context of this test).
+ CPPUNIT_ASSERT(!file._didWrite);
+}
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp b/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp
new file mode 100644
index 00000000000..6fea98e3c8e
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp
@@ -0,0 +1,940 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <set>
+#include <vector>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/simulatedfailurefile.h>
+#include <tests/spi/options_builder.h>
+#include <vespa/document/fieldset/fieldsets.h>
+
+namespace storage {
+namespace memfile {
+namespace {
+ spi::LoadType defaultLoadType(0, "default");
+}
+
+class IteratorHandlerTest : public SingleDiskMemFileTestUtils
+{
+ CPPUNIT_TEST_SUITE(IteratorHandlerTest);
+ CPPUNIT_TEST(testCreateIterator);
+ CPPUNIT_TEST(testSomeSlotsRemovedBetweenInvocations);
+ CPPUNIT_TEST(testAllSlotsRemovedBetweenInvocations);
+ CPPUNIT_TEST(testIterateMetadataOnly);
+ CPPUNIT_TEST(testIterateHeadersOnly);
+ CPPUNIT_TEST(testIterateLargeDocument);
+ CPPUNIT_TEST(testDocumentsRemovedBetweenInvocations);
+ CPPUNIT_TEST(testUnrevertableRemoveBetweenInvocations);
+ CPPUNIT_TEST(testUnrevertableRemoveBetweenInvocationsIncludeRemoves);
+ CPPUNIT_TEST(testMatchTimestampRangeDocAltered);
+ CPPUNIT_TEST(testIterateAllVersions);
+ CPPUNIT_TEST(testFieldSetFiltering);
+ CPPUNIT_TEST(testIteratorInactiveOnException);
+ CPPUNIT_TEST(testDocsCachedBeforeDocumentSelection);
+ CPPUNIT_TEST(testTimestampRangeLimitedPrefetch);
+ CPPUNIT_TEST(testCachePrefetchRequirements);
+ CPPUNIT_TEST(testBucketEvictedFromCacheOnIterateException);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testCreateIterator();
+ void testSomeSlotsRemovedBetweenInvocations();
+ void testAllSlotsRemovedBetweenInvocations();
+ void testIterateMetadataOnly();
+ void testIterateHeadersOnly();
+ void testIterateLargeDocument();
+ void testDocumentsRemovedBetweenInvocations();
+ void testUnrevertableRemoveBetweenInvocations();
+ void testUnrevertableRemoveBetweenInvocationsIncludeRemoves();
+ void testMatchTimestampRangeDocAltered();
+ void testIterateAllVersions();
+ void testFieldSetFiltering();
+ void testIteratorInactiveOnException();
+ void testDocsCachedBeforeDocumentSelection();
+ void testTimestampRangeLimitedPrefetch();
+ void testCachePrefetchRequirements();
+ void testBucketEvictedFromCacheOnIterateException();
+
+ void setUp();
+ void tearDown();
+
+ struct Chunk
+ {
+ std::vector<spi::DocEntry::LP> _entries;
+ };
+
+private:
+ spi::Selection createSelection(const std::string& docSel) const;
+
+
+ spi::CreateIteratorResult create(
+ const spi::Bucket& b,
+ const spi::Selection& sel,
+ spi::IncludedVersions versions = spi::NEWEST_DOCUMENT_ONLY,
+ const document::FieldSet& fieldSet = document::AllFields())
+ {
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ return getPersistenceProvider().createIterator(b, fieldSet, sel,
+ versions, context);
+ }
+
+ typedef std::pair<Document::SP, spi::Timestamp> DocAndTimestamp;
+
+ std::vector<DocAndTimestamp> feedDocs(size_t numDocs,
+ uint32_t minSize = 110,
+ uint32_t maxSize = 110);
+
+ std::vector<Chunk> doIterate(spi::IteratorId id,
+ uint64_t maxByteSize,
+ size_t maxChunks = 0,
+ bool allowEmptyResult = false);
+
+ void verifyDocs(const std::vector<DocAndTimestamp>& wanted,
+ const std::vector<IteratorHandlerTest::Chunk>& chunks,
+ const std::set<vespalib::string>& removes
+ = std::set<vespalib::string>()) const;
+
+ void doTestUnrevertableRemoveBetweenInvocations(bool includeRemoves);
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(IteratorHandlerTest);
+
+void
+IteratorHandlerTest::setUp()
+{
+ SingleDiskMemFileTestUtils::setUp();
+}
+
+void
+IteratorHandlerTest::tearDown()
+{
+ SingleDiskMemFileTestUtils::tearDown();
+}
+
+spi::Selection
+IteratorHandlerTest::createSelection(const std::string& docSel) const
+{
+ return spi::Selection(spi::DocumentSelection(docSel));
+}
+
+void
+IteratorHandlerTest::testCreateIterator()
+{
+ spi::Bucket b(BucketId(16, 1234), spi::PartitionId(0));
+
+ spi::CreateIteratorResult iter1(create(b, createSelection("true")));
+ CPPUNIT_ASSERT_EQUAL(spi::IteratorId(1), iter1.getIteratorId());
+
+ spi::CreateIteratorResult iter2(create(b, createSelection("true")));
+ CPPUNIT_ASSERT_EQUAL(spi::IteratorId(2), iter2.getIteratorId());
+}
+
+std::vector<IteratorHandlerTest::Chunk>
+IteratorHandlerTest::doIterate(spi::IteratorId id,
+ uint64_t maxByteSize,
+ size_t maxChunks,
+ bool allowEmptyResult)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ std::vector<Chunk> chunks;
+
+ while (true) {
+ std::vector<spi::DocEntry::LP> entries;
+
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ id, maxByteSize, context));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ CPPUNIT_ASSERT(result.getEntries().size() > 0 || allowEmptyResult);
+
+ for (size_t i = 0; i < result.getEntries().size(); ++i) {
+ entries.push_back(result.getEntries()[i]);
+ }
+ chunks.push_back(Chunk());
+ chunks.back()._entries.swap(entries);
+ if (result.isCompleted()
+ || (maxChunks != 0 && chunks.size() >= maxChunks))
+ {
+ break;
+ }
+ }
+ return chunks;
+}
+
+namespace {
+
+size_t
+getDocCount(const std::vector<IteratorHandlerTest::Chunk>& chunks)
+{
+ size_t count = 0;
+ for (size_t i=0; i<chunks.size(); ++i) {
+ count += chunks[i]._entries.size();
+ }
+ return count;
+}
+
+size_t
+getRemoveEntryCount(const std::vector<spi::DocEntry::LP>& entries)
+{
+ size_t ret = 0;
+ for (size_t i = 0; i < entries.size(); ++i) {
+ if (entries[i]->isRemove()) {
+ ++ret;
+ }
+ }
+ return ret;
+}
+
+struct DocEntryIndirectTimestampComparator
+{
+ bool operator()(const spi::DocEntry::LP& e1,
+ const spi::DocEntry::LP& e2) const
+ {
+ return e1->getTimestamp() < e2->getTimestamp();
+ }
+};
+
+std::vector<spi::DocEntry::LP>
+getEntriesFromChunks(const std::vector<IteratorHandlerTest::Chunk>& chunks)
+{
+ std::vector<spi::DocEntry::LP> ret;
+ for (size_t chunk = 0; chunk < chunks.size(); ++chunk) {
+ for (size_t i = 0; i < chunks[chunk]._entries.size(); ++i) {
+ ret.push_back(chunks[chunk]._entries[i]);
+ }
+ }
+ std::sort(ret.begin(),
+ ret.end(),
+ DocEntryIndirectTimestampComparator());
+ return ret;
+}
+
+const vespalib::LazyFile&
+getFileHandle(const MemFile& mf1)
+{
+ return static_cast<const SimpleMemFileIOBuffer&>(
+ mf1.getMemFileIO()).getFileHandle();
+}
+
+const LoggingLazyFile&
+getLoggerFile(const MemFile& file)
+{
+ return dynamic_cast<const LoggingLazyFile&>(getFileHandle(file));
+}
+
+}
+
+void
+IteratorHandlerTest::verifyDocs(const std::vector<DocAndTimestamp>& wanted,
+ const std::vector<IteratorHandlerTest::Chunk>& chunks,
+ const std::set<vespalib::string>& removes) const
+{
+ std::vector<spi::DocEntry::LP> retrieved(
+ getEntriesFromChunks(chunks));
+ size_t removeCount = getRemoveEntryCount(retrieved);
+ // Ensure that we've got the correct number of puts and removes
+ CPPUNIT_ASSERT_EQUAL(removes.size(), removeCount);
+ CPPUNIT_ASSERT_EQUAL(wanted.size(), retrieved.size() - removeCount);
+
+ size_t wantedIdx = 0;
+ for (size_t i = 0; i < retrieved.size(); ++i) {
+ spi::DocEntry& entry(*retrieved[i]);
+ if (entry.getDocument() != 0) {
+ if (!(*wanted[wantedIdx].first == *entry.getDocument())) {
+ std::ostringstream ss;
+ ss << "Documents differ! Wanted:\n"
+ << wanted[wantedIdx].first->toString(true)
+ << "\n\nGot:\n"
+ << entry.getDocument()->toString(true);
+ CPPUNIT_FAIL(ss.str());
+ }
+ CPPUNIT_ASSERT_EQUAL(wanted[wantedIdx].second, entry.getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(wanted[wantedIdx].first->serialize()->getLength()
+ + sizeof(spi::DocEntry),
+ size_t(entry.getSize()));
+ ++wantedIdx;
+ } else {
+ // Remove-entry
+ CPPUNIT_ASSERT(entry.getDocumentId() != 0);
+ CPPUNIT_ASSERT_EQUAL(entry.getDocumentId()->getSerializedSize()
+ + sizeof(spi::DocEntry),
+ size_t(entry.getSize()));
+ if (removes.find(entry.getDocumentId()->toString()) == removes.end()) {
+ std::ostringstream ss;
+ ss << "Got unexpected remove entry for document id "
+ << *entry.getDocumentId();
+ CPPUNIT_FAIL(ss.str());
+ }
+ }
+ }
+}
+
+// Feed numDocs documents, starting from timestamp 1000
+std::vector<IteratorHandlerTest::DocAndTimestamp>
+IteratorHandlerTest::feedDocs(size_t numDocs,
+ uint32_t minSize,
+ uint32_t maxSize)
+{
+ std::vector<DocAndTimestamp> docs;
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ docs.push_back(
+ DocAndTimestamp(
+ doPut(4,
+ framework::MicroSecTime(1000 + i),
+ minSize,
+ maxSize),
+ spi::Timestamp(1000 + i)));
+ }
+ flush(document::BucketId(16, 4));
+ return docs;
+}
+
+void
+IteratorHandlerTest::testSomeSlotsRemovedBetweenInvocations()
+{
+ std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096);
+
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ spi::Selection sel(createSelection("true"));
+
+ spi::CreateIteratorResult iter(create(b, sel));
+ CPPUNIT_ASSERT(env()._cache.contains(b.getBucketId()));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 10000, 25);
+ CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size());
+
+ {
+ MemFilePtr file(getMemFile(b.getBucketId()));
+
+ for (int i = 0 ; i < 2; ++i) {
+ const MemSlot* slot = file->getSlotWithId(docs.front().first->getId());
+ CPPUNIT_ASSERT(slot != 0);
+ file->removeSlot(*slot);
+ docs.erase(docs.begin());
+ }
+ file->flushToDisk();
+ }
+
+ std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 10000);
+ CPPUNIT_ASSERT_EQUAL(size_t(24), chunks2.size());
+ std::copy(chunks2.begin(),
+ chunks2.end(),
+ std::back_insert_iterator<std::vector<Chunk> >(chunks));
+
+ verifyDocs(docs, chunks);
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+
+ // Bucket should not be evicted from cache during normal operation.
+ CPPUNIT_ASSERT(env()._cache.contains(b.getBucketId()));
+}
+
+void
+IteratorHandlerTest::testAllSlotsRemovedBetweenInvocations()
+{
+ std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096);
+
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ spi::Selection sel(createSelection("true"));
+
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25);
+ CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size());
+
+ {
+ MemFilePtr file(getMemFile(b.getBucketId()));
+
+ for (int i = 0 ; i < 75; ++i) {
+ const MemSlot* slot = file->getSlotWithId(docs[i].first->getId());
+ CPPUNIT_ASSERT(slot != 0);
+ file->removeSlot(*slot);
+ }
+ file->flushToDisk();
+ docs.erase(docs.begin(), docs.begin() + 75);
+ }
+
+ std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1, 0, true);
+ CPPUNIT_ASSERT_EQUAL(size_t(0), getDocCount(chunks2));
+ verifyDocs(docs, chunks);
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::testIterateMetadataOnly()
+{
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ std::vector<DocAndTimestamp> docs = feedDocs(10);
+
+ CPPUNIT_ASSERT(
+ doUnrevertableRemove(b.getBucketId(),
+ docs[docs.size() - 2].first->getId(),
+ Timestamp(1008)));
+
+ CPPUNIT_ASSERT(
+ doRemove(b.getBucketId(),
+ docs[docs.size() - 1].first->getId(),
+ framework::MicroSecTime(3001),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+
+ flush(b.getBucketId());
+
+ spi::Selection sel(createSelection("true"));
+ spi::CreateIteratorResult iter(
+ create(b, sel, spi::NEWEST_DOCUMENT_OR_REMOVE, document::NoFields()));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096);
+ std::vector<spi::DocEntry::LP> entries = getEntriesFromChunks(chunks);
+ CPPUNIT_ASSERT_EQUAL(docs.size(), entries.size());
+ std::vector<DocAndTimestamp>::const_iterator docIter(
+ docs.begin());
+ for (size_t i = 0; i < entries.size(); ++i, ++docIter) {
+ const spi::DocEntry& entry = *entries[i];
+
+ CPPUNIT_ASSERT(entry.getDocument() == 0);
+ CPPUNIT_ASSERT(entry.getDocumentId() == 0);
+ if (i == 9) {
+ CPPUNIT_ASSERT(entry.isRemove());
+ CPPUNIT_ASSERT_EQUAL(spi::Timestamp(3001), entry.getTimestamp());
+ } else if (i == 8) {
+ CPPUNIT_ASSERT(entry.isRemove());
+ CPPUNIT_ASSERT_EQUAL(spi::Timestamp(1008), entry.getTimestamp());
+ } else {
+ CPPUNIT_ASSERT(!entry.isRemove());
+ CPPUNIT_ASSERT_EQUAL(docIter->second, entry.getTimestamp());
+ }
+ }
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::testIterateHeadersOnly()
+{
+ std::vector<DocAndTimestamp> docs = feedDocs(20);
+ // Remove all bodies.
+ for (size_t i = 0; i < docs.size(); ++i) {
+ clearBody(*docs[i].first);
+ }
+
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ spi::Selection sel(createSelection("true"));
+
+ spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY,
+ document::HeaderFields()));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1024);
+ verifyDocs(docs, chunks);
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::testIterateLargeDocument()
+{
+ std::vector<DocAndTimestamp> docs = feedDocs(10, 10000, 10000);
+ std::vector<DocAndTimestamp> largedoc;
+ largedoc.push_back(docs.back());
+
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ spi::Selection sel(createSelection("true"));
+
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 100, 1);
+ verifyDocs(largedoc, chunks);
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::testDocumentsRemovedBetweenInvocations()
+{
+ int docCount = 100;
+ std::vector<DocAndTimestamp> docs = feedDocs(docCount);
+
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ spi::Selection sel(createSelection("true"));
+
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25);
+ CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size());
+
+ // Remove a subset of the documents. We should still get all the
+ // original documents from the iterator, assuming no compactions.
+ std::vector<DocumentId> removedDocs;
+ std::vector<DocAndTimestamp> nonRemovedDocs;
+ for (int i = 0; i < docCount; ++i) {
+ if (i % 3 == 0) {
+ removedDocs.push_back(docs[i].first->getId());
+ CPPUNIT_ASSERT(doRemove(b.getBucketId(),
+ removedDocs.back(),
+ framework::MicroSecTime(2000 + i),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+ } else {
+ nonRemovedDocs.push_back(docs[i]);
+ }
+ }
+ flush(b.getBucketId());
+
+ std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1);
+ CPPUNIT_ASSERT_EQUAL(size_t(75), chunks2.size());
+ std::copy(chunks2.begin(),
+ chunks2.end(),
+ std::back_insert_iterator<std::vector<Chunk> >(chunks));
+
+ verifyDocs(docs, chunks);
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::doTestUnrevertableRemoveBetweenInvocations(bool includeRemoves)
+{
+ int docCount = 100;
+ std::vector<DocAndTimestamp> docs = feedDocs(docCount);
+
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ spi::Selection sel(createSelection("true"));
+ spi::CreateIteratorResult iter(
+ create(b, sel,
+ includeRemoves ?
+ spi::NEWEST_DOCUMENT_OR_REMOVE : spi::NEWEST_DOCUMENT_ONLY));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25);
+ CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size());
+
+ // Remove a subset of the documents unrevertably.
+ std::vector<DocumentId> removedDocs;
+ std::vector<DocAndTimestamp> nonRemovedDocs;
+ for (int i = 0; i < docCount - 25; ++i) {
+ if (i < 10) {
+ removedDocs.push_back(docs[i].first->getId());
+ CPPUNIT_ASSERT(
+ doUnrevertableRemove(b.getBucketId(),
+ removedDocs.back(),
+ Timestamp(1000+i)));
+ } else {
+ nonRemovedDocs.push_back(docs[i]);
+ }
+ }
+ flush(b.getBucketId());
+
+ std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1);
+ std::vector<spi::DocEntry::LP> entries = getEntriesFromChunks(chunks2);
+ if (!includeRemoves) {
+ CPPUNIT_ASSERT_EQUAL(nonRemovedDocs.size(), chunks2.size());
+ verifyDocs(nonRemovedDocs, chunks2);
+ } else {
+ CPPUNIT_ASSERT_EQUAL(size_t(75), entries.size());
+ for (int i = 0; i < docCount - 25; ++i) {
+ spi::DocEntry& entry(*entries[i]);
+ if (i < 10) {
+ CPPUNIT_ASSERT(entry.isRemove());
+ } else {
+ CPPUNIT_ASSERT(!entry.isRemove());
+ }
+ }
+ }
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::testUnrevertableRemoveBetweenInvocations()
+{
+ doTestUnrevertableRemoveBetweenInvocations(false);
+}
+
+void
+IteratorHandlerTest::testUnrevertableRemoveBetweenInvocationsIncludeRemoves()
+{
+ doTestUnrevertableRemoveBetweenInvocations(true);
+}
+
+void
+IteratorHandlerTest::testMatchTimestampRangeDocAltered()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucketId(16, 4);
+ document::StringFieldValue updateValue1("update1");
+ document::StringFieldValue updateValue2("update2");
+
+ Document::SP originalDoc = doPut(4, Timestamp(1234));
+
+ {
+ document::DocumentUpdate::SP update = createBodyUpdate(
+ originalDoc->getId(), updateValue1);
+
+ spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(2345));
+ CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp());
+ }
+
+ {
+ document::DocumentUpdate::SP update = createBodyUpdate(
+ originalDoc->getId(), updateValue2);
+
+ spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(3456));
+ CPPUNIT_ASSERT_EQUAL(2345, (int)result.getExistingTimestamp());
+ }
+
+ CPPUNIT_ASSERT(
+ doRemove(bucketId,
+ originalDoc->getId(),
+ Timestamp(4567),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND));
+ flush(bucketId);
+
+ spi::Bucket b(bucketId, spi::PartitionId(0));
+
+ {
+ spi::Selection sel(createSelection("true"));
+ sel.setFromTimestamp(spi::Timestamp(0));
+ sel.setToTimestamp(spi::Timestamp(10));
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ iter.getIteratorId(), 4096, context));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ CPPUNIT_ASSERT_EQUAL(size_t(0), result.getEntries().size());
+ CPPUNIT_ASSERT(result.isCompleted());
+
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+ }
+
+ {
+ spi::Selection sel(createSelection("true"));
+ sel.setFromTimestamp(spi::Timestamp(10000));
+ sel.setToTimestamp(spi::Timestamp(20000));
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ iter.getIteratorId(), 4096, context));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ CPPUNIT_ASSERT_EQUAL(size_t(0), result.getEntries().size());
+ CPPUNIT_ASSERT(result.isCompleted());
+
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+ }
+
+ {
+ spi::Selection sel(createSelection("true"));
+ sel.setFromTimestamp(spi::Timestamp(0));
+ sel.setToTimestamp(spi::Timestamp(1234));
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ iter.getIteratorId(), 4096, context));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size());
+ CPPUNIT_ASSERT(result.isCompleted());
+
+ const Document& receivedDoc(*result.getEntries()[0]->getDocument());
+ if (!(*originalDoc == receivedDoc)) {
+ std::ostringstream ss;
+ ss << "Documents differ! Wanted:\n"
+ << originalDoc->toString(true)
+ << "\n\nGot:\n"
+ << receivedDoc.toString(true);
+ CPPUNIT_FAIL(ss.str());
+ }
+
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+ }
+
+ {
+ spi::Selection sel(createSelection("true"));
+ sel.setFromTimestamp(spi::Timestamp(0));
+ sel.setToTimestamp(spi::Timestamp(2345));
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ iter.getIteratorId(), 4096, context));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size());
+ CPPUNIT_ASSERT(result.isCompleted());
+
+ const Document& receivedDoc(*result.getEntries()[0]->getDocument());
+ CPPUNIT_ASSERT(receivedDoc.getValue("content").get());
+ CPPUNIT_ASSERT_EQUAL(updateValue1,
+ dynamic_cast<document::StringFieldValue&>(
+ *receivedDoc.getValue(
+ "content")));
+
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+ }
+
+ {
+ spi::Selection sel(createSelection("true"));
+ sel.setFromTimestamp(spi::Timestamp(0));
+ sel.setToTimestamp(spi::Timestamp(3456));
+ spi::CreateIteratorResult iter(create(b, sel));
+
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ iter.getIteratorId(), 4096, context));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size());
+ CPPUNIT_ASSERT(result.isCompleted());
+
+ const Document& receivedDoc(*result.getEntries()[0]->getDocument());
+ CPPUNIT_ASSERT(receivedDoc.getValue("content").get());
+ CPPUNIT_ASSERT_EQUAL(updateValue2,
+ dynamic_cast<document::StringFieldValue&>(
+ *receivedDoc.getValue(
+ "content")));
+
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+ }
+}
+
+void
+IteratorHandlerTest::testIterateAllVersions()
+{
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ std::vector<DocAndTimestamp> docs;
+
+ Document::SP originalDoc(createRandomDocumentAtLocation(
+ 4, 1001, 110, 110));
+
+ doPut(originalDoc, framework::MicroSecTime(1001), 0);
+
+ document::StringFieldValue updateValue1("update1");
+ {
+ document::DocumentUpdate::SP update = createBodyUpdate(
+ originalDoc->getId(), updateValue1);
+
+ spi::UpdateResult result = doUpdate(b.getBucketId(), update, Timestamp(2345));
+ CPPUNIT_ASSERT_EQUAL(1001, (int)result.getExistingTimestamp());
+ }
+ flush(b.getBucketId());
+
+ Document::SP updatedDoc(new Document(*originalDoc));
+ updatedDoc->setValue("content", document::StringFieldValue("update1"));
+ docs.push_back(DocAndTimestamp(originalDoc, spi::Timestamp(1001)));
+ docs.push_back(DocAndTimestamp(updatedDoc, spi::Timestamp(2345)));
+
+ spi::Selection sel(createSelection("true"));
+ spi::CreateIteratorResult iter(create(b, sel, spi::ALL_VERSIONS));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096);
+ verifyDocs(docs, chunks);
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::testFieldSetFiltering()
+{
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ Document::SP doc(createRandomDocumentAtLocation(
+ 4, 1001, 110, 110));
+ doc->setValue(doc->getField("headerval"), document::IntFieldValue(42));
+ doc->setValue(doc->getField("hstringval"),
+ document::StringFieldValue("groovy, baby!"));
+ doc->setValue(doc->getField("content"),
+ document::StringFieldValue("fancy content"));
+ doPut(doc, framework::MicroSecTime(1001), 0);
+ flush(b.getBucketId());
+
+ document::FieldSetRepo repo;
+ spi::Selection sel(createSelection("true"));
+ spi::CreateIteratorResult iter(
+ create(b, sel, spi::NEWEST_DOCUMENT_ONLY,
+ *repo.parse(*getTypeRepo(), "testdoctype1:hstringval,content")));
+ std::vector<spi::DocEntry::LP> entries(
+ getEntriesFromChunks(doIterate(iter.getIteratorId(), 4096)));
+ CPPUNIT_ASSERT_EQUAL(size_t(1), entries.size());
+ CPPUNIT_ASSERT_EQUAL(std::string("content: fancy content\n"
+ "hstringval: groovy, baby!\n"),
+ stringifyFields(*entries[0]->getDocument()));
+}
+
+void
+IteratorHandlerTest::testIteratorInactiveOnException()
+{
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ feedDocs(10);
+
+ env()._cache.clear();
+
+ simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1));
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ spi::CreateIteratorResult iter(create(b, createSelection("true")));
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ iter.getIteratorId(), 100000, context));
+ CPPUNIT_ASSERT(result.hasError());
+ // Check that iterator is marked as inactive
+ const SharedIteratorHandlerState& state(
+ getPersistenceProvider().getIteratorHandler().getState());
+ CPPUNIT_ASSERT(state._iterators.find(iter.getIteratorId().getValue())
+ != state._iterators.end());
+ CPPUNIT_ASSERT(state._iterators.find(iter.getIteratorId().getValue())
+ ->second.isActive() == false);
+
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+}
+
+void
+IteratorHandlerTest::testDocsCachedBeforeDocumentSelection()
+{
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096);
+
+ env()._cache.clear();
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options).maximumReadThroughGap(1024*1024).build());
+ env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>(
+ new LoggingLazyFile::Factory());
+
+ spi::Selection sel(createSelection("id.user=4"));
+ spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY,
+ document::BodyFields()));
+
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096);
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+ {
+ MemFilePtr file(getMemFile(b.getBucketId()));
+ // Should have 3 read ops; metadata, (precached) headers and bodies
+ CPPUNIT_ASSERT_EQUAL(size_t(3),
+ getLoggerFile(*file).operations.size());
+ }
+}
+
+void
+IteratorHandlerTest::testTimestampRangeLimitedPrefetch()
+{
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ // Feed docs with timestamp range [1000, 1100)
+ feedDocs(100, 4096, 4096);
+
+ env()._cache.clear();
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options).maximumReadThroughGap(512).build());
+ env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>(
+ new LoggingLazyFile::Factory());
+
+ spi::Selection sel(createSelection("id.user=4"));
+ sel.setFromTimestamp(spi::Timestamp(1050));
+ sel.setToTimestamp(spi::Timestamp(1059));
+ spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY,
+ document::BodyFields()));
+ std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096);
+ CPPUNIT_ASSERT_EQUAL(size_t(10), getDocCount(chunks));
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().destroyIterator(iter.getIteratorId(), context);
+ // Iterate over all slots, ensuring that only those that fall within the
+ // timestamp range have actually been cached.
+ {
+ MemFilePtr file(getMemFile(b.getBucketId()));
+ // Should have 3 read ops; metadata, (precached) headers and bodies
+ CPPUNIT_ASSERT_EQUAL(size_t(3),
+ getLoggerFile(*file).operations.size());
+ for (size_t i = 0; i < file->getSlotCount(); ++i) {
+ const MemSlot& slot((*file)[i]);
+ if (slot.getTimestamp() >= Timestamp(1050)
+ && slot.getTimestamp() <= Timestamp(1059))
+ {
+ CPPUNIT_ASSERT(file->partAvailable(slot, HEADER));
+ CPPUNIT_ASSERT(file->partAvailable(slot, BODY));
+ } else {
+ CPPUNIT_ASSERT(!file->partAvailable(slot, HEADER));
+ CPPUNIT_ASSERT(!file->partAvailable(slot, BODY));
+ }
+ }
+ }
+}
+
+void
+IteratorHandlerTest::testCachePrefetchRequirements()
+{
+ document::select::Parser parser(
+ env().repo(), env()._bucketFactory);
+ {
+ // No prefetch required.
+ // NOTE: since stuff like id.user=1234 won't work, we have to handle
+ // that explicitly in createIterator based on the assumption that a
+ // non-empty document selection at _least_ requires header to be read.
+ std::unique_ptr<document::select::Node> sel(
+ parser.parse("true"));
+ CachePrefetchRequirements req(
+ CachePrefetchRequirements::createFromSelection(env().repo(),
+ *sel));
+ CPPUNIT_ASSERT(!req.isHeaderPrefetchRequired());
+ CPPUNIT_ASSERT(!req.isBodyPrefetchRequired());
+ }
+
+ {
+ // Header prefetch required.
+ std::unique_ptr<document::select::Node> sel(
+ parser.parse("testdoctype1.hstringval='blarg'"));
+ CachePrefetchRequirements req(
+ CachePrefetchRequirements::createFromSelection(env().repo(),
+ *sel));
+ CPPUNIT_ASSERT(req.isHeaderPrefetchRequired());
+ CPPUNIT_ASSERT(!req.isBodyPrefetchRequired());
+ }
+
+ {
+ // Body prefetch required.
+ std::unique_ptr<document::select::Node> sel(
+ parser.parse("testdoctype1.content='foobar'"));
+ CachePrefetchRequirements req(
+ CachePrefetchRequirements::createFromSelection(env().repo(),
+ *sel));
+ CPPUNIT_ASSERT(!req.isHeaderPrefetchRequired());
+ CPPUNIT_ASSERT(req.isBodyPrefetchRequired());
+ }
+}
+
+void
+IteratorHandlerTest::testBucketEvictedFromCacheOnIterateException()
+{
+ spi::Bucket b(BucketId(16, 4), spi::PartitionId(0));
+ feedDocs(10);
+ env()._cache.clear();
+
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ spi::CreateIteratorResult iter(create(b, createSelection("true")));
+ simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1));
+ spi::IterateResult result(getPersistenceProvider().iterate(
+ iter.getIteratorId(), 100000, context));
+ CPPUNIT_ASSERT(result.hasError());
+
+ // This test is actually a bit disingenuous since calling iterate will
+ // implicitly invoke maintain() on an IO exception, which will subsequently
+ // evict the bucket due to the exception happening again in its context.
+ CPPUNIT_ASSERT(!env()._cache.contains(b.getBucketId()));
+}
+
+}
+}
diff --git a/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp
new file mode 100644
index 00000000000..78601b461ab
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp
@@ -0,0 +1,504 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/document/datatype/documenttype.h>
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/simulatedfailurefile.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+
+using document::DocumentType;
+
+namespace storage {
+namespace memfile {
+namespace {
+ spi::LoadType defaultLoadType(0, "default");
+}
+
+class JoinOperationHandlerTest : public MemFileTestUtils
+{
+ CPPUNIT_TEST_SUITE(JoinOperationHandlerTest);
+ CPPUNIT_TEST(testSimple);
+ CPPUNIT_TEST(testTargetExists);
+ CPPUNIT_TEST(testTargetWithOverlap);
+ CPPUNIT_TEST(testMultiDisk);
+ CPPUNIT_TEST(testMultiDiskFlushed);
+ CPPUNIT_TEST(testInternalJoin);
+ CPPUNIT_TEST(testInternalJoinDiskFull);
+ CPPUNIT_TEST(testTargetIoWriteExceptionEvictsTargetFromCache);
+ CPPUNIT_TEST(test1stSourceIoReadExceptionEvictsSourceFromCache);
+ CPPUNIT_TEST(test2ndSourceExceptionEvictsExistingTargetFromCache);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testSimple();
+ void testTargetExists();
+ void testTargetWithOverlap();
+ void testMultiDisk();
+ void testMultiDiskFlushed();
+ void testInternalJoin();
+ void testInternalJoinDiskFull();
+ void testTargetIoWriteExceptionEvictsTargetFromCache();
+ void test1stSourceIoReadExceptionEvictsSourceFromCache();
+ void test2ndSourceExceptionEvictsExistingTargetFromCache();
+
+ void insertDocumentInBucket(uint64_t location,
+ Timestamp timestamp,
+ document::BucketId bucket);
+
+private:
+ void feedSingleDisk();
+ void feedMultiDisk();
+ std::string getStandardMemFileStatus(uint32_t disk = 0);
+
+ spi::Result doJoin(const document::BucketId to,
+ const document::BucketId from1,
+ const document::BucketId from2);
+};
+
+namespace {
+
+document::BucketId TARGET = document::BucketId(15, 4);
+document::BucketId SOURCE1 = document::BucketId(16, 4);
+document::BucketId SOURCE2 = document::BucketId(16, (uint64_t)4 | ((uint64_t)1 << 15));
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(JoinOperationHandlerTest);
+
+void
+JoinOperationHandlerTest::feedSingleDisk()
+{
+ for (uint32_t i = 0; i < 100; i++) {
+ std::ostringstream ost;
+ ost << "userdoc:storage_test:1234:" << i;
+ const DocumentType& type(
+ *getTypeRepo()->getDocumentType("testdoctype1"));
+ document::Document::SP doc(
+ new document::Document(type, document::DocumentId(ost.str())));
+
+ document::BucketId bucket(
+ getBucketIdFactory().getBucketId(doc->getId()));
+ bucket.setUsedBits(33);
+ doPut(doc, Timestamp(1000 + i), 0, 33);
+ flush(bucket);
+ }
+}
+
+void
+JoinOperationHandlerTest::feedMultiDisk()
+{
+ for (uint32_t i = 0; i < 100; i += 2) {
+ doPutOnDisk(7, 4 | (1 << 15), Timestamp(1000 + i));
+ }
+ flush(SOURCE2);
+
+ for (uint32_t i = 1; i < 100; i += 2) {
+ doPutOnDisk(4, 4, Timestamp(1000 + i));
+ }
+ flush(SOURCE1);
+
+ {
+ MemFilePtr file(getMemFile(SOURCE1, 4));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(4, (int)file->getDisk());
+ }
+
+ {
+ MemFilePtr file(getMemFile(SOURCE2, 7));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(7, (int)file->getDisk());
+ }
+}
+
+std::string
+JoinOperationHandlerTest::getStandardMemFileStatus(uint32_t disk)
+{
+ std::ostringstream ost;
+
+ ost << getMemFileStatus(TARGET, disk) << "\n"
+ << getMemFileStatus(SOURCE1, disk ) << "\n"
+ << getMemFileStatus(SOURCE2, disk) << "\n";
+
+ return ost.str();
+}
+
+void
+JoinOperationHandlerTest::insertDocumentInBucket(
+ uint64_t location,
+ Timestamp timestamp,
+ document::BucketId bucket)
+{
+ Document::SP doc(
+ createRandomDocumentAtLocation(
+ location, timestamp.getTime(), 100, 100));
+ doPut(doc, bucket, timestamp);
+}
+
+spi::Result
+JoinOperationHandlerTest::doJoin(const document::BucketId to,
+ const document::BucketId from1,
+ const document::BucketId from2)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ return getPersistenceProvider().join(
+ spi::Bucket(from1, spi::PartitionId(0)),
+ spi::Bucket(from2, spi::PartitionId(0)),
+ spi::Bucket(to, spi::PartitionId(0)),
+ context);
+}
+
+void
+JoinOperationHandlerTest::testSimple()
+{
+ setupDisks(1);
+ feedSingleDisk();
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(33, 1234)));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ }
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(33, (uint64_t)1234 | ((uint64_t)1 << 32))));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ }
+
+ spi::Result result =
+ doJoin(document::BucketId(32, 1234),
+ document::BucketId(33, 1234),
+ document::BucketId(33, (uint64_t)1234 | ((uint64_t)1 << 32)));
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(32, (uint64_t)1234)));
+ CPPUNIT_ASSERT_EQUAL(100, (int)file->getSlotCount());
+ CPPUNIT_ASSERT(!file->slotsAltered());
+ }
+}
+
+void
+JoinOperationHandlerTest::testTargetExists()
+{
+ setupDisks(1);
+
+ for (uint32_t i = 0; i < 100; i += 2) {
+ doPut(4 | (1 << 15), Timestamp(1000 + i));
+ }
+ flush(SOURCE2);
+
+ for (uint32_t i = 1; i < 100; i += 2) {
+ doPut(4, Timestamp(1000 + i));
+ }
+ flush(SOURCE1);
+
+ for (uint32_t i = 0; i < 100; i++) {
+ uint32_t location = 4;
+ if (i % 2 == 0) {
+ location |= (1 << 15);
+ }
+
+ insertDocumentInBucket(location, Timestamp(500 + i), TARGET);
+ }
+ flush(TARGET);
+
+ doJoin(TARGET, SOURCE1, SOURCE2);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x3c00000000000004): 200,0\n"
+ "BucketId(0x4000000000000004): 0,0\n"
+ "BucketId(0x4000000000008004): 0,0\n"),
+ getStandardMemFileStatus());
+}
+
+void
+JoinOperationHandlerTest::testTargetWithOverlap()
+{
+ setupDisks(1);
+
+ for (uint32_t i = 0; i < 100; i += 2) {
+ doPut(4 | (1 << 15), Timestamp(1000 + i));
+ }
+ flush(SOURCE2);
+
+ for (uint32_t i = 1; i < 100; i += 2) {
+ doPut(4, Timestamp(1000 + i));
+ }
+ flush(SOURCE1);
+
+ for (uint32_t i = 0; i < 100; i++) {
+ uint32_t location = 4;
+ if (i % 2 == 0) {
+ location |= (1 << 15);
+ }
+
+ insertDocumentInBucket(location, Timestamp(950 + i), TARGET);
+ }
+ flush(TARGET);
+
+ doJoin(TARGET, SOURCE1, SOURCE2);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x3c00000000000004): 150,0\n"
+ "BucketId(0x4000000000000004): 0,0\n"
+ "BucketId(0x4000000000008004): 0,0\n"),
+ getStandardMemFileStatus());
+}
+
+void
+JoinOperationHandlerTest::testMultiDisk()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ setupDisks(10);
+ feedMultiDisk();
+
+ getPersistenceProvider().join(spi::Bucket(SOURCE2, spi::PartitionId(7)),
+ spi::Bucket(SOURCE1, spi::PartitionId(4)),
+ spi::Bucket(TARGET, spi::PartitionId(3)),
+ context);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x3c00000000000004): 100,3\n"
+ "BucketId(0x4000000000000004): 0,0\n"
+ "BucketId(0x4000000000008004): 0,0\n"),
+ getStandardMemFileStatus());
+}
+
+void
+JoinOperationHandlerTest::testMultiDiskFlushed()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ setupDisks(10);
+ feedMultiDisk();
+
+ // Flush everything to disk, to check that we can join even
+ // if it's not in cache before.
+ env()._cache.flushDirtyEntries();
+ env()._cache.clear();
+
+ getPersistenceProvider().join(spi::Bucket(SOURCE2, spi::PartitionId(7)),
+ spi::Bucket(SOURCE1, spi::PartitionId(4)),
+ spi::Bucket(TARGET, spi::PartitionId(3)),
+ context);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x3c00000000000004): 100,3\n"
+ "BucketId(0x4000000000000004): 0,3\n"
+ "BucketId(0x4000000000008004): 0,3\n"),
+ getStandardMemFileStatus(3));
+}
+
+void
+JoinOperationHandlerTest::testInternalJoin()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ setupDisks(10);
+
+ for (uint32_t i = 4; i < 6; i++) {
+ for (uint32_t j = 0; j < 10; j++) {
+ uint32_t location = 4;
+ doPutOnDisk(i, location, Timestamp(i * 1000 + j));
+ }
+ flush(document::BucketId(16, 4), i);
+ env()._cache.clear();
+ }
+
+ std::string fileName1 =
+ env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[4]);
+ std::string fileName2 =
+ env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[5]);
+
+ CPPUNIT_ASSERT(vespalib::stat(fileName1).get());
+ vespalib::FileInfo::UP file2(vespalib::stat(fileName2));
+
+ CPPUNIT_ASSERT(file2.get());
+ CPPUNIT_ASSERT(file2->_size > 0);
+
+ PartitionMonitor* mon = env().getDirectory(5).getPartition().getMonitor();
+ // Set disk under 80% full. Over 80%, we shouldn't move buckets to the target.
+ mon->setStatOncePolicy();
+ mon->overrideRealStat(512, 100000, 50000);
+ CPPUNIT_ASSERT(!mon->isFull(0, .80f));
+
+ getPersistenceProvider().join(spi::Bucket(SOURCE1, spi::PartitionId(4)),
+ spi::Bucket(SOURCE1, spi::PartitionId(4)),
+ spi::Bucket(SOURCE1, spi::PartitionId(5)),
+ context);
+
+ env()._cache.clear();
+
+ CPPUNIT_ASSERT(!vespalib::stat(fileName1).get());
+ CPPUNIT_ASSERT(vespalib::stat(fileName2).get());
+}
+
+void
+JoinOperationHandlerTest::testInternalJoinDiskFull()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ setupDisks(10);
+
+ for (uint32_t i = 4; i < 6; i++) {
+ for (uint32_t j = 0; j < 10; j++) {
+ uint32_t location = 4;
+ doPutOnDisk(i, location, Timestamp(i * 1000 + j));
+ }
+ flush(document::BucketId(16, 4), i);
+ env()._cache.clear();
+ }
+
+ std::string fileName1 =
+ env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[4]);
+ std::string fileName2 =
+ env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[5]);
+
+ CPPUNIT_ASSERT(vespalib::stat(fileName1).get());
+ vespalib::FileInfo::UP file2(vespalib::stat(fileName2));
+
+ CPPUNIT_ASSERT(file2.get());
+ CPPUNIT_ASSERT(file2->_size > 0);
+
+ PartitionMonitor* mon = env().getDirectory(5).getPartition().getMonitor();
+ // Set disk to 81% full. Over 80%, we shouldn't move buckets to the target.
+ mon->setStatOncePolicy();
+ mon->overrideRealStat(512, 100000, 81000);
+ CPPUNIT_ASSERT(!mon->isFull());
+ CPPUNIT_ASSERT(mon->isFull(0, .08f));
+
+ spi::Result result =
+ getPersistenceProvider().join(spi::Bucket(SOURCE1, spi::PartitionId(4)),
+ spi::Bucket(SOURCE1, spi::PartitionId(4)),
+ spi::Bucket(SOURCE1, spi::PartitionId(5)),
+ context);
+
+ CPPUNIT_ASSERT(result.hasError());
+}
+
+void
+JoinOperationHandlerTest::testTargetIoWriteExceptionEvictsTargetFromCache()
+{
+ setupDisks(1);
+ feedSingleDisk();
+
+ document::BucketId src1(33, 1234);
+ document::BucketId src2(33, 1234ULL | (1ULL << 32));
+ document::BucketId target(32, 1234);
+
+ CPPUNIT_ASSERT(env()._cache.contains(src1));
+ CPPUNIT_ASSERT(env()._cache.contains(src2));
+ CPPUNIT_ASSERT(!env()._cache.contains(target));
+
+ // Reading existing (fully cached) files will go fine, but writing
+ // new file will not.
+ simulateIoErrorsForSubsequentlyOpenedFiles();
+
+ spi::Result result = doJoin(target, src1, src2);
+ CPPUNIT_ASSERT(result.hasError());
+ CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write")
+ != vespalib::string::npos);
+
+ CPPUNIT_ASSERT(!env()._cache.contains(target));
+ // NOTE: since we end up renaming src1 -> target during the first
+ // iteration of join, src1 will actually be empty. This should not
+ // matter since the service layer will query the bucket info for
+ // all these afterwards and will thus pick up on this automatically.
+ unSimulateIoErrorsForSubsequentlyOpenedFiles();
+ {
+ MemFilePtr file(getMemFile(src1));
+ CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount());
+ CPPUNIT_ASSERT(!file->slotsAltered());
+ }
+ {
+ MemFilePtr file(getMemFile(src2));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ CPPUNIT_ASSERT(!file->slotsAltered());
+ }
+ {
+ MemFilePtr file(getMemFile(target));
+ // Renamed from src1
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ CPPUNIT_ASSERT(!file->slotsAltered());
+ }
+}
+
+void
+JoinOperationHandlerTest::test1stSourceIoReadExceptionEvictsSourceFromCache()
+{
+ setupDisks(1);
+ feedSingleDisk();
+
+ document::BucketId src1(33, 1234);
+ document::BucketId src2(33, 1234ULL | (1ULL << 32));
+ document::BucketId target(32, 1234);
+
+ env()._cache.clear();
+ // Allow for reading in initial metadata so that loadFile itself doesn't
+ // fail. This could otherwise cause a false negative since that happens
+ // during initial cache lookup on a cache miss, at which point any
+ // exception will always stop a file from being added to the cache. Here
+ // we want to test the case where a file has been successfully hoisted
+ // out of the cache initially.
+ simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1));
+
+ spi::Result result = doJoin(target, src1, src2);
+ CPPUNIT_ASSERT(result.hasError());
+ CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O read")
+ != vespalib::string::npos);
+
+ CPPUNIT_ASSERT(!env()._cache.contains(src1));
+ CPPUNIT_ASSERT(!env()._cache.contains(src2));
+ CPPUNIT_ASSERT(!env()._cache.contains(target));
+}
+
+/**
+ * It must be exception safe for any source bucket to throw an exception during
+ * processing. Otherwise the node will core due to cache sanity checks.
+ *
+ * See VESPA-674 for context. In this scenario, it was not possible to write
+ * to the target file when attempting to join in the 2nd source bucket due to
+ * the disk fill ratio exceeding configured limits.
+ */
+void
+JoinOperationHandlerTest::test2ndSourceExceptionEvictsExistingTargetFromCache()
+{
+ setupDisks(1);
+ feedSingleDisk();
+
+ constexpr uint64_t location = 1234;
+
+ document::BucketId src1(33, location);
+ document::BucketId src2(33, location | (1ULL << 32));
+ document::BucketId target(32, location);
+
+ // Ensure target file is _not_ empty so that copySlots is triggered for
+ // each source bucket (rather than just renaming the file, which does not
+ // invoke the file read/write paths).
+ insertDocumentInBucket(location, Timestamp(100000), target);
+ flush(target);
+
+ env()._cache.clear();
+ // File rewrites are buffered before ever reaching the failure simulation
+ // layer, so only 1 actual write is used to flush the target file after
+ // the first source file has been processed. Attempting to flush the writes
+ // for the second source file should fail with an exception.
+ simulateIoErrorsForSubsequentlyOpenedFiles(
+ IoErrors().afterReads(INT_MAX).afterWrites(1));
+
+ spi::Result result = doJoin(target, src1, src2);
+ CPPUNIT_ASSERT(result.hasError());
+ CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write")
+ != vespalib::string::npos);
+
+ CPPUNIT_ASSERT(!env()._cache.contains(src1));
+ CPPUNIT_ASSERT(!env()._cache.contains(src2));
+ CPPUNIT_ASSERT(!env()._cache.contains(target));
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/tests/spi/logginglazyfile.h b/memfilepersistence/src/tests/spi/logginglazyfile.h
new file mode 100644
index 00000000000..e54753f7c3e
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/logginglazyfile.h
@@ -0,0 +1,88 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/io/fileutil.h>
+#include <iostream>
+
+namespace storage {
+
+namespace memfile {
+
+class LoggingLazyFile : public vespalib::LazyFile {
+public:
+ class Factory : public Environment::LazyFileFactory {
+ public:
+ vespalib::LazyFile::UP createFile(const std::string& fileName) const {
+ return vespalib::LazyFile::UP(
+ new LoggingLazyFile(fileName, vespalib::File::DIRECTIO));
+ }
+ };
+
+ enum OpType {
+ READ = 0,
+ WRITE
+ };
+
+ struct Entry {
+ OpType opType;
+ size_t bufsize;
+ off_t offset;
+
+ std::string toString() const {
+ std::ostringstream ost;
+ ost << (opType == READ ? "Reading " : "Writing ")
+ << bufsize
+ << " bytes at "
+ << offset;
+ return ost.str();
+ }
+ };
+
+ mutable std::vector<Entry> operations;
+
+ LoggingLazyFile(const std::string& filename, int flags)
+ : LazyFile(filename, flags) {};
+
+ size_t getOperationCount() const {
+ return operations.size();
+ }
+
+ virtual off_t write(const void *buf, size_t bufsize, off_t offset) {
+ Entry e;
+ e.opType = WRITE;
+ e.bufsize = bufsize;
+ e.offset = offset;
+
+ operations.push_back(e);
+
+ return vespalib::LazyFile::write(buf, bufsize, offset);
+ }
+
+ virtual size_t read(void *buf, size_t bufsize, off_t offset) const {
+ Entry e;
+ e.opType = READ;
+ e.bufsize = bufsize;
+ e.offset = offset;
+
+ operations.push_back(e);
+
+ return vespalib::LazyFile::read(buf, bufsize, offset);
+ }
+
+ std::string toString() const {
+ std::ostringstream ost;
+ for (uint32_t i = 0; i < operations.size(); i++) {
+ ost << operations[i].toString() << "\n";
+ }
+
+ return ost.str();
+ }
+
+
+
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/tests/spi/memcachetest.cpp b/memfilepersistence/src/tests/spi/memcachetest.cpp
new file mode 100644
index 00000000000..d34159ce3f4
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/memcachetest.cpp
@@ -0,0 +1,412 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/memfile/memfilecache.h>
+#include <vespa/storageframework/defaultimplementation/memory/simplememorylogic.h>
+#include <tests/spi/memfiletestutils.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+
+
+namespace storage {
+namespace memfile {
+
+class MemCacheTest : public SingleDiskMemFileTestUtils
+{
+ CPPUNIT_TEST_SUITE(MemCacheTest);
+ CPPUNIT_TEST(testSimpleLRU);
+ CPPUNIT_TEST(testCacheSize);
+ CPPUNIT_TEST(testEvictBody);
+ CPPUNIT_TEST(testEvictHeader);
+ CPPUNIT_TEST(testKeepBodyWhenLessThanOneFourth);
+ CPPUNIT_TEST(testComplexEviction);
+ CPPUNIT_TEST(testEraseEmptyOnReturn);
+ CPPUNIT_TEST(testDeleteDoesNotReAddMemoryUsage);
+ CPPUNIT_TEST(testEraseDoesNotReAddMemoryUsage);
+ CPPUNIT_TEST(testGetWithNoCreation);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testSimpleLRU();
+ void testCacheSize();
+ void testReduceCacheSizeCallback();
+ void testReduceCacheSizeCallbackWhileActive();
+ void testEvictBody();
+ void testEvictHeader();
+ void testKeepBodyWhenLessThanOneFourth();
+ void testComplexEviction();
+ void testEraseEmptyOnReturn();
+ void testDeleteDoesNotReAddMemoryUsage();
+ void testEraseDoesNotReAddMemoryUsage();
+ void testGetWithNoCreation();
+
+private:
+ framework::defaultimplementation::ComponentRegisterImpl::UP _register;
+ framework::Component::UP _component;
+ FakeClock::UP _clock;
+ framework::defaultimplementation::MemoryManager::UP _memoryManager;
+ std::vector<framework::MemoryToken::LP> _stolenMemory;
+ std::unique_ptr<MemFilePersistenceMetrics> _metrics;
+
+ std::unique_ptr<MemFileCache> _cache;
+
+ void setSize(const document::BucketId& id,
+ uint64_t metaSize,
+ uint64_t headerSz = 0,
+ uint64_t bodySz = 0,
+ bool createIfNotInCache = true)
+ {
+ MemFilePtr file(_cache->get(id, env(), env().getDirectory(),
+ createIfNotInCache));
+ CPPUNIT_ASSERT(file.get());
+
+ file->_cacheSizeOverride.metaSize = metaSize;
+ file->_cacheSizeOverride.headerSize = headerSz;
+ file->_cacheSizeOverride.bodySize = bodySz;
+ }
+
+ std::string
+ getBucketStatus(uint32_t buckets)
+ {
+ std::ostringstream ost;
+ for (uint32_t i = 1; i < buckets + 1; i++) {
+ document::BucketId id(16, i);
+ ost << id << " ";
+ if (!_cache->contains(id)) {
+ ost << "<nil>\n";
+ } else {
+ MemFilePtr file(_cache->get(id, env(), env().getDirectory()));
+ if (file->_cacheSizeOverride.bodySize > 0) {
+ ost << "body,";
+ }
+ if (file->_cacheSizeOverride.headerSize > 0) {
+ ost << "header\n";
+ } else {
+ ost << "meta only\n";
+ }
+ }
+ }
+
+ return ost.str();
+ }
+
+ uint64_t cacheSize() {
+ return _cache->size();
+ }
+
+ document::BucketId getLRU() {
+ return _cache->getLeastRecentlyUsedBucket()->_bid;
+ }
+
+ void setCacheSize(uint64_t sz) {
+ MemFileCache::MemoryUsage usage;
+ usage.metaSize = sz / 3;
+ usage.headerSize = sz / 3;
+ usage.bodySize = sz - usage.metaSize - usage.headerSize;
+
+ _cache->setCacheSize(usage);
+ }
+
+ void stealMemory(uint64_t memToSteal) {
+ setCacheSize(_cache->getCacheSize() - memToSteal);
+ }
+
+ void setup(uint64_t maxMemory) {
+ tearDown();
+ _register.reset(
+ new framework::defaultimplementation::ComponentRegisterImpl);
+ _clock.reset(new FakeClock);
+ _register->setClock(*_clock);
+ _memoryManager.reset(
+ new framework::defaultimplementation::MemoryManager(
+ framework::defaultimplementation::AllocationLogic::UP(
+ new framework::defaultimplementation::SimpleMemoryLogic(
+ *_clock, maxMemory * 2))));
+ _register->setMemoryManager(*_memoryManager);
+ _component.reset(new framework::Component(*_register, "testcomponent"));
+ _metrics.reset(new MemFilePersistenceMetrics(*_component));
+ _cache.reset(new MemFileCache(*_register, _metrics->_cache));
+ setCacheSize(maxMemory);
+ _memoryManager->registerAllocationType(framework::MemoryAllocationType(
+ "steal", framework::MemoryAllocationType::FORCE_ALLOCATE));
+ }
+
+public:
+ void tearDown() {
+ _stolenMemory.clear();
+ _cache.reset(0);
+ _metrics.reset(0);
+ _component.reset(0);
+ _register.reset(0);
+ _memoryManager.reset(0);
+ _clock.reset(0);
+ }
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(MemCacheTest);
+
+namespace {
+ FakeClock clock;
+}
+
+void
+MemCacheTest::testSimpleLRU()
+{
+ setup(2000);
+
+ for (uint32_t i = 1; i < 4; i++) {
+ setSize(document::BucketId(16, i), 100);
+ }
+
+ CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 1), getLRU());
+
+ setSize(document::BucketId(16, 1), 100);
+
+ CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().hits.getValue());
+ CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 2), getLRU());
+}
+
+void
+MemCacheTest::testCacheSize()
+{
+ setup(400);
+
+ setSize(document::BucketId(16, 2), 100);
+ setSize(document::BucketId(16, 1), 150);
+
+ CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().hits.getValue());
+ CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue());
+
+ CPPUNIT_ASSERT_EQUAL(250ul, cacheSize());
+
+ setSize(document::BucketId(16, 1), 200);
+
+ CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().hits.getValue());
+ CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue());
+
+ CPPUNIT_ASSERT_EQUAL(300ul, cacheSize());
+
+ CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 2)));
+ CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 1)));
+
+ setSize(document::BucketId(16, 1), 301);
+
+ CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().hits.getValue());
+ CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue());
+
+ CPPUNIT_ASSERT(!_cache->contains(document::BucketId(16, 2)));
+ CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 1)));
+
+ _cache->clear();
+ CPPUNIT_ASSERT_EQUAL(0ul, cacheSize());
+}
+
+void
+MemCacheTest::testEvictBody()
+{
+ setup(1400);
+
+ CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().body_evictions.getValue());
+
+ setSize(BucketId(16, 1), 150, 100, 0);
+ setSize(BucketId(16, 2), 100, 100, 900);
+
+ CPPUNIT_ASSERT_EQUAL(1350ul, cacheSize());
+
+ stealMemory(150);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x4000000000000001) header\n"
+ "BucketId(0x4000000000000002) header\n"),
+ getBucketStatus(2));
+ CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().body_evictions.getValue());
+}
+
+void
+MemCacheTest::testKeepBodyWhenLessThanOneFourth()
+{
+ setup(450);
+
+ setSize(BucketId(16, 1), 150, 0, 0);
+ setSize(BucketId(16, 2), 100, 50, 50);
+
+ stealMemory(150);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x4000000000000001) <nil>\n"
+ "BucketId(0x4000000000000002) body,header\n"),
+ getBucketStatus(2));
+}
+
+void
+MemCacheTest::testEvictHeader()
+{
+ setup(550);
+
+ CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().header_evictions.getValue());
+
+ setSize(BucketId(16, 1), 150, 0, 0);
+ setSize(BucketId(16, 2), 100, 200, 100);
+
+ stealMemory(150);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x4000000000000001) meta only\n"
+ "BucketId(0x4000000000000002) meta only\n"),
+ getBucketStatus(2));
+ CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().header_evictions.getValue());
+}
+
+#define ASSERT_CACHE_EVICTIONS(meta, header, body) \
+ CPPUNIT_ASSERT_EQUAL(size_t(meta), _cache->getMetrics().body_evictions.getValue()); \
+ CPPUNIT_ASSERT_EQUAL(size_t(header), _cache->getMetrics().header_evictions.getValue()); \
+ CPPUNIT_ASSERT_EQUAL(size_t(body), _cache->getMetrics().meta_evictions.getValue());
+
+void
+MemCacheTest::testComplexEviction()
+{
+ setup(4200);
+
+ setSize(BucketId(16, 1), 150, 0, 0);
+ setSize(BucketId(16, 2), 100, 200, 200);
+ setSize(BucketId(16, 3), 100, 200, 0);
+ setSize(BucketId(16, 4), 100, 400, 0);
+ setSize(BucketId(16, 5), 100, 200, 400);
+ setSize(BucketId(16, 6), 100, 200, 300);
+ setSize(BucketId(16, 7), 100, 0, 0);
+ setSize(BucketId(16, 8), 100, 200, 400);
+ setSize(BucketId(16, 9), 100, 200, 250);
+
+ CPPUNIT_ASSERT_EQUAL(4100ul, cacheSize());
+
+ ASSERT_CACHE_EVICTIONS(0, 0, 0);
+
+ stealMemory(600);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x4000000000000001) meta only\n"
+ "BucketId(0x4000000000000002) header\n"
+ "BucketId(0x4000000000000003) header\n"
+ "BucketId(0x4000000000000004) header\n"
+ "BucketId(0x4000000000000005) header\n"
+ "BucketId(0x4000000000000006) body,header\n"
+ "BucketId(0x4000000000000007) meta only\n"
+ "BucketId(0x4000000000000008) body,header\n"
+ "BucketId(0x4000000000000009) body,header\n"),
+ getBucketStatus(9));
+
+ CPPUNIT_ASSERT_EQUAL(3500ul, cacheSize());
+
+ ASSERT_CACHE_EVICTIONS(2, 0, 0);
+
+ stealMemory(500);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x4000000000000001) meta only\n"
+ "BucketId(0x4000000000000002) meta only\n"
+ "BucketId(0x4000000000000003) meta only\n"
+ "BucketId(0x4000000000000004) header\n"
+ "BucketId(0x4000000000000005) header\n"
+ "BucketId(0x4000000000000006) body,header\n"
+ "BucketId(0x4000000000000007) meta only\n"
+ "BucketId(0x4000000000000008) body,header\n"
+ "BucketId(0x4000000000000009) body,header\n"),
+ getBucketStatus(9));
+
+ CPPUNIT_ASSERT_EQUAL(3100ul, cacheSize());
+
+ ASSERT_CACHE_EVICTIONS(2, 2, 0);
+
+ stealMemory(1000);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x4000000000000001) <nil>\n"
+ "BucketId(0x4000000000000002) meta only\n"
+ "BucketId(0x4000000000000003) meta only\n"
+ "BucketId(0x4000000000000004) meta only\n"
+ "BucketId(0x4000000000000005) meta only\n"
+ "BucketId(0x4000000000000006) header\n"
+ "BucketId(0x4000000000000007) meta only\n"
+ "BucketId(0x4000000000000008) body,header\n"
+ "BucketId(0x4000000000000009) body,header\n"),
+ getBucketStatus(9));
+
+ CPPUNIT_ASSERT_EQUAL(2050ul, cacheSize());
+
+ ASSERT_CACHE_EVICTIONS(3, 4, 1);
+
+ stealMemory(1100);
+
+ CPPUNIT_ASSERT_EQUAL(
+ std::string(
+ "BucketId(0x4000000000000001) <nil>\n"
+ "BucketId(0x4000000000000002) <nil>\n"
+ "BucketId(0x4000000000000003) <nil>\n"
+ "BucketId(0x4000000000000004) <nil>\n"
+ "BucketId(0x4000000000000005) <nil>\n"
+ "BucketId(0x4000000000000006) <nil>\n"
+ "BucketId(0x4000000000000007) meta only\n"
+ "BucketId(0x4000000000000008) header\n"
+ "BucketId(0x4000000000000009) body,header\n"),
+ getBucketStatus(9));
+
+ CPPUNIT_ASSERT_EQUAL(950ul, cacheSize());
+}
+
+#undef ASSERT_CACHE_EVICTIONS
+
+void
+MemCacheTest::testEraseEmptyOnReturn()
+{
+ setup(4200);
+ setSize(BucketId(16, 1), 0, 0, 0);
+ CPPUNIT_ASSERT(!_cache->contains(document::BucketId(16, 1)));
+}
+
+void
+MemCacheTest::testDeleteDoesNotReAddMemoryUsage()
+{
+ BucketId id(16, 1);
+ setup(1000);
+ setSize(id, 100, 200, 300);
+ CPPUNIT_ASSERT_EQUAL(600ul, cacheSize());
+ {
+ MemFilePtr file(_cache->get(id, env(), env().getDirectory()));
+ file.deleteFile();
+ }
+ CPPUNIT_ASSERT_EQUAL(0ul, cacheSize());
+
+}
+
+void
+MemCacheTest::testGetWithNoCreation()
+{
+ BucketId id(16, 1);
+ setup(1000);
+ setSize(id, 100, 200, 300, false);
+ CPPUNIT_ASSERT_EQUAL(0ul, cacheSize());
+}
+
+
+void
+MemCacheTest::testEraseDoesNotReAddMemoryUsage()
+{
+ BucketId id(16, 1);
+ setup(1000);
+ setSize(id, 100, 200, 300);
+ CPPUNIT_ASSERT_EQUAL(600ul, cacheSize());
+ {
+ MemFilePtr file(_cache->get(id, env(), env().getDirectory()));
+ file.eraseFromCache();
+ }
+ CPPUNIT_ASSERT_EQUAL(0ul, cacheSize());
+
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp b/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp
new file mode 100644
index 00000000000..04d82741e67
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp
@@ -0,0 +1,411 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h>
+#include <vespa/memfilepersistence/mapper/fileinfo.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <tests/spi/memfiletestutils.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFileAutoRepairTest : public SingleDiskMemFileTestUtils
+{
+public:
+ void setUp();
+ void tearDown();
+
+ void testFileMetadataCorruptionIsAutoRepaired();
+ void testDocumentContentCorruptionIsAutoRepaired();
+ void testCorruptionEvictsBucketFromCache();
+ void testRepairFailureInMaintainEvictsBucketFromCache();
+ void testZeroLengthFileIsDeleted();
+ void testTruncatedBodyLocationIsAutoRepaired();
+ void testTruncatedHeaderLocationIsAutoRepaired();
+ void testTruncatedHeaderBlockIsAutoRepaired();
+
+ void corruptBodyBlock();
+
+ CPPUNIT_TEST_SUITE(MemFileAutoRepairTest);
+ CPPUNIT_TEST(testFileMetadataCorruptionIsAutoRepaired);
+ CPPUNIT_TEST(testDocumentContentCorruptionIsAutoRepaired);
+ CPPUNIT_TEST(testCorruptionEvictsBucketFromCache);
+ CPPUNIT_TEST(testRepairFailureInMaintainEvictsBucketFromCache);
+ CPPUNIT_TEST(testZeroLengthFileIsDeleted);
+ CPPUNIT_TEST(testTruncatedBodyLocationIsAutoRepaired);
+ CPPUNIT_TEST(testTruncatedHeaderLocationIsAutoRepaired);
+ CPPUNIT_TEST(testTruncatedHeaderBlockIsAutoRepaired);
+ CPPUNIT_TEST_SUITE_END();
+
+private:
+ void assertDocumentIsSilentlyRemoved(
+ const document::BucketId& bucket,
+ const document::DocumentId& docId);
+
+ void reconfigureMinimumHeaderBlockSize(uint32_t newMinSize);
+
+ document::BucketId _bucket;
+ std::unique_ptr<FileSpecification> _file;
+ std::vector<document::DocumentId> _slotIds;
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(MemFileAutoRepairTest);
+
+namespace {
+ // A totall uncached memfile with content to use for verify testing
+ std::unique_ptr<MemFile> _memFile;
+
+ // Clear old content. Create new file. Make sure nothing is cached.
+ void prepareBucket(SingleDiskMemFileTestUtils& util,
+ const FileSpecification& file) {
+ _memFile.reset();
+ util.env()._cache.clear();
+ vespalib::unlink(file.getPath());
+ util.createTestBucket(file.getBucketId(), 0);
+ util.env()._cache.clear();
+ _memFile.reset(new MemFile(file, util.env()));
+ _memFile->getMemFileIO().close();
+
+ }
+
+ MetaSlot getSlot(uint32_t index) {
+ assert(_memFile.get());
+ vespalib::LazyFile file(_memFile->getFile().getPath(), 0);
+ MetaSlot result;
+ file.read(&result, sizeof(MetaSlot),
+ sizeof(Header) + sizeof(MetaSlot) * index);
+ return result;
+ }
+
+ void setSlot(uint32_t index, MetaSlot slot,
+ bool updateFileChecksum = true)
+ {
+ (void)updateFileChecksum;
+ assert(_memFile.get());
+ //if (updateFileChecksum) slot.updateFileChecksum();
+ vespalib::LazyFile file(_memFile->getFile().getPath(), 0);
+ file.write(&slot, sizeof(MetaSlot),
+ sizeof(Header) + sizeof(MetaSlot) * index);
+ }
+}
+
+void
+MemFileAutoRepairTest::setUp()
+{
+ SingleDiskMemFileTestUtils::setUp();
+ _bucket = BucketId(16, 0xa);
+ createTestBucket(_bucket, 0);
+
+ {
+ MemFilePtr memFilePtr(env()._cache.get(_bucket, env(), env().getDirectory()));
+ _file.reset(new FileSpecification(memFilePtr->getFile()));
+ CPPUNIT_ASSERT(memFilePtr->getSlotCount() >= 2);
+ for (size_t i = 0; i < memFilePtr->getSlotCount(); ++i) {
+ _slotIds.push_back(memFilePtr->getDocumentId((*memFilePtr)[i]));
+ }
+ }
+ env()._cache.clear();
+}
+
+void
+MemFileAutoRepairTest::tearDown()
+{
+ _file.reset(0);
+ _memFile.reset(0);
+ SingleDiskMemFileTestUtils::tearDown();
+};
+
+void
+MemFileAutoRepairTest::testFileMetadataCorruptionIsAutoRepaired()
+{
+ // Test corruption detected in initial metadata load
+ prepareBucket(*this, *_file);
+ document::DocumentId id(_slotIds[1]);
+ MetaSlot slot(getSlot(1));
+ CPPUNIT_ASSERT_EQUAL(slot._gid,
+ id.getGlobalId()); // Sanity checking...
+ {
+ MetaSlot s(slot);
+ s.setTimestamp(Timestamp(40));
+ setSlot(1, s);
+ }
+
+ CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets());
+
+ // File not in cache; should be detected in initial load
+ spi::GetResult res(doGet(_bucket, id, document::AllFields()));
+ // FIXME: currently loadFile is silently fixing corruptions!
+ //CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res.getErrorCode());
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode());
+ CPPUNIT_ASSERT(!res.hasDocument());
+
+ CPPUNIT_ASSERT_EQUAL(std::string("400000000000000a"), getModifiedBuckets());
+ CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets());
+
+ // File should now have been repaired, so a subsequent get for
+ // the same document should just return an empty (but OK) result.
+ spi::GetResult res2(doGet(_bucket, id, document::AllFields()));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res2.getErrorCode());
+ CPPUNIT_ASSERT(!res2.hasDocument());
+
+ CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets());
+}
+
+void
+MemFileAutoRepairTest::corruptBodyBlock()
+{
+ CPPUNIT_ASSERT(!env()._cache.contains(_bucket));
+ // Corrupt body block of slot 1
+ MetaSlot slot(getSlot(1));
+ {
+ MetaSlot s(slot);
+ s.setBodyPos(52);
+ s.setBodySize(18);
+ s.updateChecksum();
+ setSlot(1, s);
+ }
+}
+
+void
+MemFileAutoRepairTest::testDocumentContentCorruptionIsAutoRepaired()
+{
+ // Corrupt body block
+ prepareBucket(*this, *_file);
+ document::DocumentId id(_slotIds[1]);
+ corruptBodyBlock();
+
+ CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets());
+
+ spi::GetResult res(doGet(_bucket, id, document::AllFields()));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res.getErrorCode());
+ CPPUNIT_ASSERT(!res.hasDocument());
+
+ CPPUNIT_ASSERT(!env()._cache.contains(_bucket));
+
+ CPPUNIT_ASSERT_EQUAL(std::string("400000000000000a"), getModifiedBuckets());
+ CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets());
+
+ // File should now have been repaired, so a subsequent get for
+ // the same document should just return an empty (but OK) result.
+ spi::GetResult res2(doGet(_bucket, id, document::AllFields()));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res2.getErrorCode());
+ CPPUNIT_ASSERT(!res2.hasDocument());
+
+ // File should now be in cache OK
+ CPPUNIT_ASSERT(env()._cache.contains(_bucket));
+ CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets());
+}
+
+// Ideally we'd test this for each spi operation that accesses MemFiles, but
+// they all use the same eviction+auto-repair logic...
+void
+MemFileAutoRepairTest::testCorruptionEvictsBucketFromCache()
+{
+ prepareBucket(*this, *_file);
+ corruptBodyBlock();
+
+ // Read slot 0 and shove file into cache
+ spi::GetResult res(doGet(_bucket, _slotIds[0], document::AllFields()));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode());
+ CPPUNIT_ASSERT(res.hasDocument());
+ CPPUNIT_ASSERT(env()._cache.contains(_bucket));
+
+ spi::GetResult res2(doGet(_bucket, _slotIds[1], document::AllFields()));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res2.getErrorCode());
+ CPPUNIT_ASSERT(!res2.hasDocument());
+
+ // Out of the cache! Begone! Shoo!
+ CPPUNIT_ASSERT(!env()._cache.contains(_bucket));
+
+}
+
+void
+MemFileAutoRepairTest::testRepairFailureInMaintainEvictsBucketFromCache()
+{
+ prepareBucket(*this, *_file);
+ corruptBodyBlock();
+ spi::Result result(getPersistenceProvider().maintain(
+ spi::Bucket(_bucket, spi::PartitionId(0)), spi::HIGH));
+ // File being successfully repaired does not constitute a failure of
+ // the maintain() call.
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ // It should, however, shove it out of the cache.
+ CPPUNIT_ASSERT(!env()._cache.contains(_bucket));
+}
+
+void
+MemFileAutoRepairTest::testZeroLengthFileIsDeleted()
+{
+ // Completely truncate auto-created file
+ vespalib::LazyFile file(_file->getPath(), 0);
+ file.resize(0);
+
+ // No way to deal with zero-length files aside from deleting them.
+ spi::Result result(getPersistenceProvider().maintain(
+ spi::Bucket(_bucket, spi::PartitionId(0)), spi::HIGH));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode());
+ CPPUNIT_ASSERT(!env()._cache.contains(_bucket));
+ CPPUNIT_ASSERT(!vespalib::fileExists(_file->getPath()));
+}
+
+namespace {
+
+uint32_t
+alignDown(uint32_t value)
+{
+ uint32_t blocks = value / 512;
+ return blocks * 512;
+};
+
+FileInfo
+fileInfoFromMemFile(const MemFilePtr& mf)
+{
+ auto& ioBuf(dynamic_cast<const SimpleMemFileIOBuffer&>(
+ mf->getMemFileIO()));
+ return ioBuf.getFileInfo();
+}
+
+}
+
+void
+MemFileAutoRepairTest::assertDocumentIsSilentlyRemoved(
+ const document::BucketId& bucket,
+ const document::DocumentId& docId)
+{
+ // Corrupted (truncated) slot should be transparently removed during
+ // loadFile and it should be as if it was never there!
+ spi::Bucket spiBucket(bucket, spi::PartitionId(0));
+ spi::GetResult res(doGet(spiBucket, docId, document::AllFields()));
+ CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode());
+ CPPUNIT_ASSERT(!res.hasDocument());
+}
+
+void
+MemFileAutoRepairTest::testTruncatedBodyLocationIsAutoRepaired()
+{
+ document::BucketId bucket(16, 4);
+ document::Document::SP doc(
+ createRandomDocumentAtLocation(4, 1234, 1024, 1024));
+
+ doPut(doc, bucket, framework::MicroSecTime(1000));
+ flush(bucket);
+ FileInfo fileInfo;
+ {
+ MemFilePtr mf(getMemFile(bucket));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount());
+ fileInfo = fileInfoFromMemFile(mf);
+
+ const uint32_t bodyBlockStart(
+ sizeof(Header)
+ + fileInfo._metaDataListSize * sizeof(MetaSlot)
+ + fileInfo._headerBlockSize);
+
+ vespalib::LazyFile file(mf->getFile().getPath(), 0);
+ uint32_t slotBodySize = (*mf)[0].getLocation(BODY)._size;
+ CPPUNIT_ASSERT(slotBodySize > 0);
+ // Align down to nearest sector alignment to avoid unrelated DirectIO
+ // checks to kick in. Since the body block is always aligned on a
+ // sector boundary, we know this cannot truncate into the header block.
+ file.resize(alignDown(bodyBlockStart + slotBodySize - 1));
+ }
+ env()._cache.clear();
+ assertDocumentIsSilentlyRemoved(bucket, doc->getId());
+}
+
+void
+MemFileAutoRepairTest::testTruncatedHeaderLocationIsAutoRepaired()
+{
+ document::BucketId bucket(16, 4);
+ document::Document::SP doc(
+ createRandomDocumentAtLocation(4, 1234, 1024, 1024));
+ // Ensure header has a bunch of data (see alignment comments below).
+ doc->setValue(doc->getField("hstringval"),
+ document::StringFieldValue(std::string(1024, 'A')));
+
+ doPut(doc, bucket, framework::MicroSecTime(1000));
+ flush(bucket);
+ FileInfo fileInfo;
+ {
+ MemFilePtr mf(getMemFile(bucket));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount());
+ fileInfo = fileInfoFromMemFile(mf);
+
+ const uint32_t headerBlockStart(
+ sizeof(Header)
+ + fileInfo._metaDataListSize * sizeof(MetaSlot));
+
+ vespalib::LazyFile file(mf->getFile().getPath(), 0);
+ uint32_t slotHeaderSize = (*mf)[0].getLocation(HEADER)._size;
+ CPPUNIT_ASSERT(slotHeaderSize > 0);
+ // Align down to nearest sector alignment to avoid unrelated DirectIO
+ // checks to kick in. The header block is not guaranteed to start on
+ // sector boundary, but we assume there is enough slack in the header
+ // section for the metadata slots themselves to be untouched since we
+ // have a minimum header size of 1024 for the doc in question.
+ file.resize(alignDown(headerBlockStart + slotHeaderSize - 1));
+ }
+ env()._cache.clear();
+ assertDocumentIsSilentlyRemoved(bucket, doc->getId());
+}
+
+void
+MemFileAutoRepairTest::reconfigureMinimumHeaderBlockSize(uint32_t newMinSize)
+{
+ using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig;
+ using MemFileConfigBuilder
+ = vespa::config::storage::StorMemfilepersistenceConfigBuilder;
+ MemFileConfigBuilder builder(
+ *env().acquireConfigReadLock().memFilePersistenceConfig());
+ builder.minimumFileMetaSlots = 2;
+ builder.minimumFileHeaderBlockSize = newMinSize;
+ auto newConfig = std::unique_ptr<MemFileConfig>(new MemFileConfig(builder));
+ env().acquireConfigWriteLock().setMemFilePersistenceConfig(
+ std::move(newConfig));
+}
+
+void
+MemFileAutoRepairTest::testTruncatedHeaderBlockIsAutoRepaired()
+{
+ document::BucketId bucket(16, 4);
+ document::Document::SP doc(
+ createRandomDocumentAtLocation(4, 1234, 1, 1));
+ // Ensure header block is large enough that free space is added to the end.
+ reconfigureMinimumHeaderBlockSize(8192);
+ // Add header field and remove randomly generated body field, ensuring
+ // we have no data to add to body field. This will prevent slot body
+ // location checking from detecting a header truncation.
+ doc->setValue(doc->getField("hstringval"),
+ document::StringFieldValue("foo"));
+ doc->remove(doc->getField("content"));
+
+ doPut(doc, bucket, framework::MicroSecTime(1000));
+ flush(bucket);
+ FileInfo fileInfo;
+ {
+ MemFilePtr mf(getMemFile(bucket));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount());
+ fileInfo = fileInfoFromMemFile(mf);
+
+ const uint32_t headerBlockEnd(
+ sizeof(Header)
+ + fileInfo._metaDataListSize * sizeof(MetaSlot)
+ + fileInfo._headerBlockSize);
+
+ vespalib::LazyFile file(mf->getFile().getPath(), 0);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(0),
+ (*mf)[0].getLocation(BODY)._size); // No body.
+ const auto headerLoc((*mf)[0].getLocation(HEADER));
+ const uint32_t extent(headerLoc._pos + headerLoc._size);
+ // Make sure we don't intersect an existing slot range.
+ CPPUNIT_ASSERT(extent < alignDown(headerBlockEnd - 1));
+ file.resize(alignDown(headerBlockEnd - 1));
+ }
+ env()._cache.clear();
+ assertDocumentIsSilentlyRemoved(bucket, doc->getId());
+}
+
+}
+}
diff --git a/memfilepersistence/src/tests/spi/memfiletest.cpp b/memfilepersistence/src/tests/spi/memfiletest.cpp
new file mode 100644
index 00000000000..70b03271da9
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/memfiletest.cpp
@@ -0,0 +1,987 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/logginglazyfile.h>
+#include <tests/spi/options_builder.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/memfilepersistence/memfile/memfilecompactor.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <limits>
+
+namespace storage {
+namespace memfile {
+
+struct MemFileTest : public SingleDiskMemFileTestUtils
+{
+ typedef MemFileCompactor::SlotList SlotList;
+
+ /**
+ * Feed a document whose ID is deterministically generated from `seed` to
+ * bucket (16, 4) at time `timestamp`.
+ */
+ document::DocumentId feedDocument(
+ uint64_t seed,
+ uint64_t timestamp,
+ uint32_t headerSize = 0,
+ uint32_t minBodySize = 10,
+ uint32_t maxBodySize = 100);
+
+ /**
+ * Feed n instances of documents with the same ID to bucket (16, 4) using
+ * a timestamp range of [1000, 1000+n).
+ */
+ void feedSameDocNTimes(uint32_t n);
+
+ void setMaxDocumentVersionsOption(uint32_t n);
+
+ std::vector<Types::Timestamp> compactWithVersionLimit(uint32_t maxVersions);
+
+ void testCompactRemoveDoublePut();
+ void testCompactPutRemove();
+ void testCompactGidCollision();
+ void testCompactGidCollisionAndNot();
+ void testCompactWithMemFile();
+ void testCompactCombined();
+ void testCompactDifferentPuts();
+ void testNoCompactionWhenDocumentVersionsWithinLimit();
+ void testCompactWhenDocumentVersionsExceedLimit();
+ void testCompactLimit1KeepsNewestVersionOnly();
+ void testCompactionOptionsArePropagatedFromConfig();
+ void testZeroDocumentVersionConfigIsCorrected();
+ void testResizeToFreeSpace();
+ void testNoFileWriteOnNoOpCompaction();
+ void testCacheSize();
+ void testClearCache();
+ void testGetSlotsByTimestamp();
+ void testCacheInconsistentSlot();
+ void testEnsureCached();
+ void testAddSlotWhenDiskFull();
+ void testGetSerializedSize();
+ void testGetBucketInfo();
+ void testCopySlotsPreservesLocationSharing();
+ void testFlushingToNonExistingFileAlwaysRunsCompaction();
+ void testOrderDocSchemeDocumentsCanBeAddedToFile();
+
+ CPPUNIT_TEST_SUITE(MemFileTest);
+ CPPUNIT_TEST(testCompactRemoveDoublePut);
+ CPPUNIT_TEST(testCompactPutRemove);
+ CPPUNIT_TEST(testCompactGidCollision);
+ CPPUNIT_TEST(testCompactGidCollisionAndNot);
+ CPPUNIT_TEST(testCompactWithMemFile);
+ CPPUNIT_TEST(testCompactCombined);
+ CPPUNIT_TEST(testCompactDifferentPuts);
+ CPPUNIT_TEST(testNoCompactionWhenDocumentVersionsWithinLimit);
+ CPPUNIT_TEST(testCompactWhenDocumentVersionsExceedLimit);
+ CPPUNIT_TEST(testCompactLimit1KeepsNewestVersionOnly);
+ CPPUNIT_TEST(testCompactionOptionsArePropagatedFromConfig);
+ CPPUNIT_TEST(testZeroDocumentVersionConfigIsCorrected);
+ CPPUNIT_TEST(testNoFileWriteOnNoOpCompaction);
+ CPPUNIT_TEST(testCacheSize);
+ CPPUNIT_TEST(testClearCache);
+ CPPUNIT_TEST(testGetSlotsByTimestamp);
+ CPPUNIT_TEST(testEnsureCached);
+ CPPUNIT_TEST(testResizeToFreeSpace);
+ CPPUNIT_TEST(testAddSlotWhenDiskFull);
+ CPPUNIT_TEST(testGetSerializedSize);
+ CPPUNIT_TEST(testGetBucketInfo);
+ CPPUNIT_TEST(testCopySlotsPreservesLocationSharing);
+ CPPUNIT_TEST(testFlushingToNonExistingFileAlwaysRunsCompaction);
+ CPPUNIT_TEST(testOrderDocSchemeDocumentsCanBeAddedToFile);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(MemFileTest);
+
+/**
+ * Slots should actually be the same pointer. Use this assert to do correct
+ * check, and still print content of slots on failure.
+ */
+#define ASSERT_SLOT_EQUAL(slotptra, slotptrb) \
+{ \
+ CPPUNIT_ASSERT(slotptra != 0); \
+ CPPUNIT_ASSERT(slotptrb != 0); \
+ std::ostringstream slotdiff; \
+ slotdiff << "Expected: " << *slotptra << ", but got " << *slotptrb; \
+ CPPUNIT_ASSERT_EQUAL_MSG(slotdiff.str(), slotptra, slotptrb); \
+}
+
+namespace {
+
+framework::MicroSecTime sec(uint64_t n) {
+ return framework::MicroSecTime(n * 1000000ULL);
+}
+
+/**
+ * Utility functions for tests to call to do compacting, such that the
+ * tests themselves are not bound to the current interface.
+ *
+ * Also, this function translates second time to microsecond time.
+ */
+MemFileTest::SlotList getSlotsToRemove(
+ const MemFile& file, uint64_t currentTime,
+ uint64_t revertTime, uint64_t keepRemoveTime)
+{
+ MemFileCompactor compactor(
+ sec(currentTime),
+ CompactionOptions()
+ .maxDocumentVersions(
+ std::numeric_limits<uint32_t>::max())
+ .revertTimePeriod(sec(revertTime))
+ .keepRemoveTimePeriod(sec(keepRemoveTime)));
+ return compactor.getSlotsToRemove(file);
+}
+
+class AutoFlush
+{
+public:
+ AutoFlush(MemFilePtr& ptr) : _ptr(ptr) {}
+ ~AutoFlush() { _ptr->flushToDisk(); }
+private:
+ MemFilePtr& _ptr;
+};
+
+}
+
+document::DocumentId
+MemFileTest::feedDocument(
+ uint64_t seed,
+ uint64_t timestamp,
+ uint32_t headerSize,
+ uint32_t minDocSize,
+ uint32_t maxDocSize) {
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 4, seed, minDocSize, maxDocSize));
+
+ if (headerSize > 0) {
+ std::string val(headerSize, 'A');
+ doc->setValue(doc->getField("hstringval"),
+ document::StringFieldValue(val));
+ }
+
+ doPut(doc,
+ document::BucketId(16, 4),
+ Timestamp(timestamp * 1000000));
+
+ return doc->getId();
+}
+
+void
+MemFileTest::feedSameDocNTimes(uint32_t n)
+{
+ for (uint32_t i = 0; i < n; ++i) {
+ feedDocument(1234, 1000 + i);
+ }
+}
+
+void
+MemFileTest::setMaxDocumentVersionsOption(uint32_t n)
+{
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options)
+ .maxDocumentVersions(n)
+ .build());
+}
+
+void
+MemFileTest::testCacheSize()
+{
+ // Feed some puts
+ for (uint32_t i = 0; i < 4; i++) {
+ feedDocument(1234 * (i % 2), 1000 + 200 * i);
+ }
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+
+ CPPUNIT_ASSERT(file->getCacheSize().sum() > 0);
+}
+
+void
+MemFileTest::testClearCache()
+{
+ // Feed some puts
+ for (uint32_t i = 0; i < 4; i++) {
+ feedDocument(1234 * (i % 2), 1000 + 200 * i);
+ }
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ file->flushToDisk();
+
+ CPPUNIT_ASSERT(file->getCacheSize().bodySize > 0);
+ CPPUNIT_ASSERT(file->getCacheSize().headerSize > 0);
+
+ file->clearCache(HEADER);
+
+ CPPUNIT_ASSERT(file->getCacheSize().bodySize > 0);
+ CPPUNIT_ASSERT(file->getMemFileIO().getCachedSize(BODY) > 0);
+ CPPUNIT_ASSERT_EQUAL(0, (int)file->getCacheSize().headerSize);
+ CPPUNIT_ASSERT_EQUAL(uint64_t(0), file->getMemFileIO().getCachedSize(HEADER));
+
+ file->clearCache(BODY);
+
+ CPPUNIT_ASSERT_EQUAL(0, (int)file->getCacheSize().bodySize);
+ CPPUNIT_ASSERT_EQUAL(uint64_t(0), file->getMemFileIO().getCachedSize(BODY));
+}
+
+
+void
+MemFileTest::testCompactGidCollision()
+{
+ // Feed two puts
+ for (uint32_t i = 0; i < 2; i++) {
+ feedDocument(1234 * i, 1000 + 200 * i);
+ }
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ const_cast<MemSlot&>((*file)[1]).setGlobalId((*file)[0].getGlobalId());
+
+ CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount());
+
+ {
+ SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 86400));
+ CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size());
+ file->removeSlots(toRemove);
+ }
+}
+
+void
+MemFileTest::testCompactGidCollisionAndNot()
+{
+ // Feed some puts
+ for (uint32_t i = 0; i < 4; i++) {
+ feedDocument(1234 * (i % 2), 1000 + 200 * i);
+ }
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ const_cast<MemSlot&>((*file)[2]).setGlobalId((*file)[0].getGlobalId());
+ const_cast<MemSlot&>((*file)[3]).setGlobalId((*file)[1].getGlobalId());
+
+ CPPUNIT_ASSERT_EQUAL(4, (int)file->getSlotCount());
+
+ {
+ SlotList toRemove(getSlotsToRemove(*file, 2000, 300, 86400));
+
+ CPPUNIT_ASSERT_EQUAL(2, (int)toRemove.size());
+ ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]);
+ ASSERT_SLOT_EQUAL(&(*file)[1], toRemove[1]);
+ file->removeSlots(toRemove);
+ }
+}
+
+
+void
+MemFileTest::testCompactRemoveDoublePut()
+{
+ // Feed two puts at time 1000 and 1200
+ for (uint32_t i = 0; i < 2; i++) {
+ feedDocument(1234, 1000 + 200 * i);
+ }
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount());
+
+ {
+ // Not time to collect yet, newest is still revertable
+ SlotList toRemove(getSlotsToRemove(*file, 1300, 300, 86400));
+ CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size());
+ }
+
+ {
+ SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 86400));
+
+ CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size());
+ ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]);
+ file->removeSlots(toRemove);
+ }
+}
+
+void
+MemFileTest::testCompactPutRemove()
+{
+ document::DocumentId docId = feedDocument(1234, 1000);
+
+ doRemove(docId, Timestamp(1200*1000000), 0);
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+
+ {
+ // Since remove can still be reverted, we can't revert anything.
+ SlotList toRemove(getSlotsToRemove(*file, 1300, 300, 600));
+
+ CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size());
+ }
+
+ {
+ SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 600));
+
+ CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size());
+ ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]);
+ file->removeSlots(toRemove);
+ }
+
+ {
+ SlotList toRemove(getSlotsToRemove(*file, 1900, 300, 600));
+
+ CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size());
+ ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]);
+ file->removeSlots(toRemove);
+ }
+}
+
+void
+MemFileTest::testCompactCombined()
+{
+ document::DocumentId docId;
+
+ // Feed some puts at time 1000, 1200, 1400, 1600 and 1800 for same doc.
+ for (uint32_t i = 0; i < 5; i++) {
+ docId = feedDocument(1234, 1000 + i * 200);
+ }
+ flush(document::BucketId(16, 4));
+
+ // Now add remove at time 2000.
+ doRemove(docId, Timestamp(2000 * 1000000), 0);
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ CPPUNIT_ASSERT_EQUAL(6, (int)file->getSlotCount());
+
+ {
+ // Compact all redundant slots that are older than revert period of 300.
+ // This includes 1000, 1200, 1400 and 1600.
+ SlotList toRemove(getSlotsToRemove(*file, 2001, 300, 86400));
+ CPPUNIT_ASSERT_EQUAL(4, (int)toRemove.size());
+ for (int i = 0; i < 4; ++i) {
+ ASSERT_SLOT_EQUAL(&(*file)[i], toRemove[i]);
+ }
+ file->removeSlots(toRemove);
+ }
+}
+
+void
+MemFileTest::testCompactDifferentPuts()
+{
+ document::DocumentId docId;
+
+ // Feed some puts
+ for (uint32_t i = 0; i < 2; i++) {
+ for (uint32_t j = 0; j < 3; j++) {
+ feedDocument(1234 * j, 1000 + (i * 3 + j) * 200);
+ }
+ }
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ CPPUNIT_ASSERT_EQUAL(6, (int)file->getSlotCount());
+
+ {
+ SlotList toRemove(getSlotsToRemove(*file, 3000, 300, 86400));
+ CPPUNIT_ASSERT_EQUAL(3, (int)toRemove.size());
+
+ for (uint32_t i = 0; i < 3; i++) {
+ bool found = false;
+ for (uint32_t j = 0; j < 3; j++) {
+ if ((*file)[j] == *toRemove[i]) {
+ found = true;
+ }
+ }
+
+ CPPUNIT_ASSERT(found);
+ }
+ file->removeSlots(toRemove);
+ }
+}
+
+void
+MemFileTest::testCompactWithMemFile()
+{
+ // Feed two puts
+ for (uint32_t i = 0; i < 2; i++) {
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 4, 1234, 10, 100));
+
+ doPut(doc, document::BucketId(16, 4), Timestamp((1000 + i * 200)*1000000), 0);
+ }
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount());
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options)
+ .revertTimePeriod(framework::MicroSecTime(1000))
+ .build());
+
+ getFakeClock()._absoluteTime = framework::MicroSecTime(2000ULL * 1000000);
+
+ CPPUNIT_ASSERT(file->compact());
+ CPPUNIT_ASSERT(!file->compact());
+
+ CPPUNIT_ASSERT_EQUAL(1, (int)file->getSlotCount());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1200 * 1000000), (*file)[0].getTimestamp());
+}
+
+/**
+ * Feed 5 versions of a single document at absolute times 0 through 4 seconds
+ * and run compaction using the provided max document version option.
+ * Revert time/keep remove time options are effectively disabled for this test.
+ * Returns timestamps of all slots that are marked as compactable.
+ */
+std::vector<Types::Timestamp>
+MemFileTest::compactWithVersionLimit(uint32_t maxVersions)
+{
+ document::BucketId bucket(16, 4);
+ std::shared_ptr<Document> doc(
+ createRandomDocumentAtLocation(4, 1234, 10, 100));
+ uint32_t versionLimit = 5;
+ for (uint32_t i = 0; i < versionLimit; ++i) {
+ Timestamp ts(sec(i).getTime());
+ doPut(doc, bucket, ts, 0);
+ }
+ flush(bucket);
+
+ MemFilePtr file(getMemFile(bucket));
+ CPPUNIT_ASSERT_EQUAL(versionLimit, file->getSlotCount());
+
+ framework::MicroSecTime currentTime(sec(versionLimit));
+ MemFileCompactor compactor(
+ currentTime,
+ CompactionOptions()
+ .revertTimePeriod(sec(versionLimit))
+ .keepRemoveTimePeriod(sec(versionLimit))
+ .maxDocumentVersions(maxVersions));
+ auto slots = compactor.getSlotsToRemove(*file);
+ // Convert to timestamps since caller won't have access to actual MemFile.
+ std::vector<Timestamp> timestamps;
+ for (const MemSlot* slot : slots) {
+ timestamps.push_back(slot->getTimestamp());
+ }
+ return timestamps;
+}
+
+void
+MemFileTest::testNoCompactionWhenDocumentVersionsWithinLimit()
+{
+ auto timestamps = compactWithVersionLimit(5);
+ CPPUNIT_ASSERT(timestamps.empty());
+}
+
+void
+MemFileTest::testCompactWhenDocumentVersionsExceedLimit()
+{
+ auto timestamps = compactWithVersionLimit(2);
+ CPPUNIT_ASSERT_EQUAL(size_t(3), timestamps.size());
+ std::vector<Timestamp> expected = {
+ sec(0), sec(1), sec(2)
+ };
+ CPPUNIT_ASSERT_EQUAL(expected, timestamps);
+}
+
+void
+MemFileTest::testCompactLimit1KeepsNewestVersionOnly()
+{
+ auto timestamps = compactWithVersionLimit(1);
+ CPPUNIT_ASSERT_EQUAL(size_t(4), timestamps.size());
+ std::vector<Timestamp> expected = {
+ sec(0), sec(1), sec(2), sec(3)
+ };
+ CPPUNIT_ASSERT_EQUAL(expected, timestamps);
+}
+
+void
+MemFileTest::testCompactionOptionsArePropagatedFromConfig()
+{
+ vespa::config::storage::StorMemfilepersistenceConfigBuilder mfcBuilder;
+ vespa::config::content::PersistenceConfigBuilder pcBuilder;
+
+ pcBuilder.maximumVersionsOfSingleDocumentStored = 12345;
+ pcBuilder.revertTimePeriod = 555;
+ pcBuilder.keepRemoveTimePeriod = 777;
+
+ vespa::config::storage::StorMemfilepersistenceConfig mfc(mfcBuilder);
+ vespa::config::content::PersistenceConfig pc(pcBuilder);
+ Options opts(mfc, pc);
+
+ CPPUNIT_ASSERT_EQUAL(framework::MicroSecTime(555 * 1000000),
+ opts._revertTimePeriod);
+ CPPUNIT_ASSERT_EQUAL(framework::MicroSecTime(777 * 1000000),
+ opts._keepRemoveTimePeriod);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(12345), opts._maxDocumentVersions);
+}
+
+void
+MemFileTest::testZeroDocumentVersionConfigIsCorrected()
+{
+ vespa::config::storage::StorMemfilepersistenceConfigBuilder mfcBuilder;
+ vespa::config::content::PersistenceConfigBuilder pcBuilder;
+
+ pcBuilder.maximumVersionsOfSingleDocumentStored = 0;
+
+ vespa::config::storage::StorMemfilepersistenceConfig mfc(mfcBuilder);
+ vespa::config::content::PersistenceConfig pc(pcBuilder);
+ Options opts(mfc, pc);
+
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), opts._maxDocumentVersions);
+}
+
+void
+MemFileTest::testGetSlotsByTimestamp()
+{
+ for (uint32_t i = 0; i < 10; i++) {
+ feedDocument(i, 1000 + i);
+ }
+ flush(document::BucketId(16, 4));
+
+ std::vector<Timestamp> timestamps;
+ timestamps.push_back(Timestamp(999 * 1000000));
+ timestamps.push_back(Timestamp(1001 * 1000000));
+ timestamps.push_back(Timestamp(1002 * 1000000));
+ timestamps.push_back(Timestamp(1007 * 1000000));
+ timestamps.push_back(Timestamp(1100 * 1000000));
+ std::vector<const MemSlot*> slots;
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ file->getSlotsByTimestamp(timestamps, slots);
+ CPPUNIT_ASSERT_EQUAL(std::size_t(3), slots.size());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1001 * 1000000), slots[0]->getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1002 * 1000000), slots[1]->getTimestamp());
+ CPPUNIT_ASSERT_EQUAL(Timestamp(1007 * 1000000), slots[2]->getTimestamp());
+}
+
+void
+MemFileTest::testEnsureCached()
+{
+ // Feed some puts
+ for (uint32_t i = 0; i < 5; i++) {
+ feedDocument(i, 1000 + i * 200, 600, 600, 600);
+ }
+ flush(document::BucketId(16, 4));
+
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options).maximumReadThroughGap(512).build());
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ CPPUNIT_ASSERT(file.get());
+ CPPUNIT_ASSERT_EQUAL(5, (int)file->getSlotCount());
+
+ file->ensureDocumentIdCached((*file)[1]);
+
+ for (std::size_t i = 0; i < file->getSlotCount(); ++i) {
+ if (i == 1) {
+ CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i]));
+ } else {
+ CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i]));
+ }
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY));
+ }
+ }
+
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ file->ensureDocumentCached((*file)[2], true);
+
+ for (std::size_t i = 0; i < file->getSlotCount(); ++i) {
+ if (i == 2) {
+ CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER));
+ } else {
+ CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER));
+ }
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY));
+ }
+ }
+
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+
+ file->ensureDocumentCached((*file)[3], false);
+
+ for (std::size_t i = 0; i < file->getSlotCount(); ++i) {
+ if (i == 3) {
+ CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY));
+ } else {
+ CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER));
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY));
+ }
+ }
+ }
+
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+
+ std::vector<Timestamp> ts;
+ for (int i = 2; i < 5; ++i) {
+ ts.push_back((*file)[i].getTimestamp());
+ }
+
+ file->ensureDocumentCached(ts, false);
+
+ for (std::size_t i = 0; i < file->getSlotCount(); ++i) {
+ if (i > 1 && i < 5) {
+ CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY));
+ } else {
+ CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER));
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY));
+ }
+ }
+ }
+
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+
+ file->ensureHeaderBlockCached();
+
+ for (std::size_t i = 0; i < file->getSlotCount(); ++i) {
+ CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER));
+ CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY));
+ }
+ }
+
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+
+ file->ensureBodyBlockCached();
+
+ for (std::size_t i = 0; i < file->getSlotCount(); ++i) {
+ CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i]));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER));
+ CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY));
+ }
+ }
+}
+
+void
+MemFileTest::testResizeToFreeSpace()
+{
+ /**
+ * This test tests that files are resized to a smaller size when they need
+ * to be. This should happen during a call to flushToDisk() in MemFile,
+ * which is either dirty or if passed flag to check even if clean. (Which
+ * the integrity checker cycle uses). A clean file is used for testing to
+ * ensure that no part of the code only works for dirty files. This test
+ * only test for the case where body block is too large. The real
+ * implementation here will be in the flushUpdatesToFile() function for the
+ * given file formats. (VersionSerializer's) If more cases wants to be
+ * tested add those as unit tests for the versionserializers themselves.
+ */
+
+ // Create a test bucket to test with.
+ BucketId bucket(16, 0xa);
+ createTestBucket(bucket, 0);
+
+ off_t file_size =
+ ((SimpleMemFileIOBuffer&)getMemFile(bucket)->getMemFileIO()).
+ getFileHandle().getFileSize();
+
+ // Clear cache so we can manually modify backing file to increase the
+ // size of it.
+ FileSpecification file(getMemFile(bucket)->getFile());
+ env()._cache.clear();
+ {
+ // Extend file to 1 MB, which should create an excessively large
+ // body block such that file should be resized to be smaller
+ vespalib::LazyFile fileHandle(file.getPath(), 0);
+ fileHandle.write("foobar", 6, 2 * 1024 * 1024 - 6);
+ }
+ MemFilePtr memFile(getMemFile(bucket));
+ memFile->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE);
+ CPPUNIT_ASSERT_EQUAL(file_size,
+ ((SimpleMemFileIOBuffer&)memFile->getMemFileIO()).
+ getFileHandle().getFileSize());
+}
+
+namespace {
+
+const vespalib::LazyFile&
+getFileHandle(const MemFile& mf1)
+{
+ return dynamic_cast<const SimpleMemFileIOBuffer&>(
+ mf1.getMemFileIO()).getFileHandle();
+}
+
+const LoggingLazyFile&
+getLoggerFile(const MemFile& file)
+{
+ return dynamic_cast<const LoggingLazyFile&>(getFileHandle(file));
+}
+
+}
+
+void
+MemFileTest::testNoFileWriteOnNoOpCompaction()
+{
+ BucketId bucket(16, 4);
+ env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>(
+ new LoggingLazyFile::Factory());
+
+ // Feed some unique puts, none of which can be compacted away.
+ for (uint32_t i = 0; i < 2; i++) {
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 4, i, 10, 100));
+
+ doPut(doc, bucket, Timestamp((1000 + i * 200)*1000000), 0);
+ }
+ flush(bucket);
+
+ MemFilePtr file(getMemFile(bucket));
+
+ size_t opsBeforeFlush = getLoggerFile(*file).getOperationCount();
+ file->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE);
+ size_t opsAfterFlush = getLoggerFile(*file).getOperationCount();
+
+ // Disk should not have been touched, since no slots have been
+ // compacted away.
+ if (opsBeforeFlush != opsAfterFlush) {
+ std::cerr << "\n" << getLoggerFile(*file).toString() << "\n";
+ }
+ CPPUNIT_ASSERT_EQUAL(opsBeforeFlush, opsAfterFlush);
+}
+
+void
+MemFileTest::testAddSlotWhenDiskFull()
+{
+ {
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ {
+ // Add a dummy-slot that can later be removed
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ file->addPutSlot(*doc, Timestamp(1001));
+ }
+ }
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ AutoFlush af(file);
+ PartitionMonitor* mon = env().getDirectory().getPartition().getMonitor();
+ // Set disk to 99% full
+ mon->setStatOncePolicy();
+ mon->setMaxFillness(.98f);
+ mon->overrideRealStat(512, 100000, 99000);
+ CPPUNIT_ASSERT(mon->isFull());
+
+ // Test that addSlot with a non-persisted Put fails
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ try {
+ file->addPutSlot(*doc, Timestamp(10003));
+ CPPUNIT_ASSERT(false);
+ } catch (vespalib::IoException& e) {
+ CPPUNIT_ASSERT_EQUAL(vespalib::IoException::NO_SPACE, e.getType());
+ }
+ }
+
+ // Slots with valid header and body locations should also
+ // not fail, as these are added when the file is loaded
+ {
+ // Just steal parts from existing slot to ensure they're persisted
+ const MemSlot* existing = file->getSlotAtTime(Timestamp(1001));
+
+ MemSlot slot(existing->getGlobalId(),
+ Timestamp(1005),
+ existing->getLocation(HEADER),
+ existing->getLocation(BODY),
+ IN_USE,
+ 0x1234);
+ file->addSlot(slot);
+ }
+
+ // Removes should not fail when disk is full
+ {
+ file->addRemoveSlot(*file->getSlotAtTime(Timestamp(1001)), Timestamp(1003));
+ }
+}
+
+void
+MemFileTest::testGetSerializedSize() {
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 4, 1234, 1024, 1024));
+
+ std::string val("Header");
+ doc->setValue(doc->getField("hstringval"),
+ document::StringFieldValue(val));
+
+ doPut(doc, document::BucketId(16, 4), framework::MicroSecTime(1000));
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+ file->ensureBodyBlockCached();
+ const MemSlot* slot = file->getSlotAtTime(framework::MicroSecTime(1000));
+ CPPUNIT_ASSERT(slot != 0);
+
+ vespalib::nbostream serializedHeader;
+ doc->serializeHeader(serializedHeader);
+
+ vespalib::nbostream serializedBody;
+ doc->serializeBody(serializedBody);
+
+ CPPUNIT_ASSERT_EQUAL(uint32_t(serializedHeader.size()),
+ file->getSerializedSize(*slot, HEADER));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(serializedBody.size()),
+ file->getSerializedSize(*slot, BODY));
+}
+
+void
+MemFileTest::testGetBucketInfo()
+{
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 4, 1234, 100, 100));
+ doc->setValue(doc->getField("content"),
+ document::StringFieldValue("foo"));
+ document::Document::SP doc2(createRandomDocumentAtLocation(
+ 4, 1235, 100, 100));
+ doc2->setValue(doc->getField("content"),
+ document::StringFieldValue("bar"));
+
+ doPut(doc, document::BucketId(16, 4), framework::MicroSecTime(1000));
+ flush(document::BucketId(16, 4));
+
+ doPut(doc2, document::BucketId(16, 4), framework::MicroSecTime(1001));
+ flush(document::BucketId(16, 4));
+
+ // Do remove which should only add a single meta entry
+ doRemove(doc->getId(), Timestamp(1002), 0);
+ flush(document::BucketId(16, 4));
+
+ MemFilePtr file(getMemFile(document::BucketId(16, 4)));
+
+ CPPUNIT_ASSERT_EQUAL(3u, file->getSlotCount());
+ uint32_t maxHeaderExtent = (*file)[1].getLocation(HEADER)._pos
+ + (*file)[1].getLocation(HEADER)._size;
+ uint32_t maxBodyExtent = (*file)[1].getLocation(BODY)._pos
+ + (*file)[1].getLocation(BODY)._size;
+
+ uint32_t wantedUsedSize = 64 + 40*3 + maxHeaderExtent + maxBodyExtent;
+ BucketInfo info = file->getBucketInfo();
+ CPPUNIT_ASSERT_EQUAL(1u, info.getDocumentCount());
+ CPPUNIT_ASSERT_EQUAL(3u, info.getEntryCount());
+ CPPUNIT_ASSERT_EQUAL(wantedUsedSize, info.getUsedSize());
+ uint32_t wantedUniqueSize = (*file)[1].getLocation(HEADER)._size
+ + (*file)[1].getLocation(BODY)._size;
+ CPPUNIT_ASSERT_EQUAL(wantedUniqueSize, info.getDocumentSize());
+}
+
+void
+MemFileTest::testCopySlotsPreservesLocationSharing()
+{
+ document::BucketId bucket(16, 4);
+ // Feed two puts to same document (identical seed). These should not
+ // share any blocks. Note: implicit sec -> microsec conversion.
+ feedDocument(1234, 1000); // slot 0
+ auto docId = feedDocument(1234, 1001); // slot 1
+ // Update only header of last version of document. This should share
+ // slot body block 2 with that slot 1.
+ auto update = createHeaderUpdate(docId, document::IntFieldValue(5678));
+ doUpdate(bucket, update, Timestamp(1002 * 1000000), 0);
+ // Feed a remove for doc in slot 2. This should share the header block of
+ // slot 3 with the newest document in slot 2.
+ doRemove(docId, Timestamp(1003 * 1000000), 0);
+ flush(bucket);
+
+ {
+ MemFilePtr src(getMemFile(document::BucketId(16, 4)));
+ MemFilePtr dest(getMemFile(document::BucketId(17, 4)));
+ std::vector<Timestamp> timestamps {
+ Timestamp(1000 * 1000000),
+ Timestamp(1001 * 1000000),
+ Timestamp(1002 * 1000000),
+ Timestamp(1003 * 1000000)
+ };
+ std::vector<const MemSlot*> slots {
+ src->getSlotAtTime(Timestamp(1000 * 1000000)),
+ src->getSlotAtTime(Timestamp(1001 * 1000000)),
+ src->getSlotAtTime(Timestamp(1002 * 1000000)),
+ src->getSlotAtTime(Timestamp(1003 * 1000000))
+ };
+ dest->copySlotsFrom(*src, slots);
+ dest->flushToDisk();
+ CPPUNIT_ASSERT_EQUAL(uint32_t(4), dest->getSlotCount());
+
+ DataLocation header[4];
+ DataLocation body[4];
+ for (int i = 0; i < 4; ++i) {
+ const MemSlot* slot = dest->getSlotAtTime(timestamps[i]);
+ header[i] = slot->getLocation(HEADER);
+ body[i] = slot->getLocation(BODY);
+ }
+ CPPUNIT_ASSERT(!(header[0] == header[1]));
+
+ CPPUNIT_ASSERT_EQUAL(body[2], body[1]);
+ CPPUNIT_ASSERT_EQUAL(header[3], header[2]);
+ }
+}
+
+void
+MemFileTest::testFlushingToNonExistingFileAlwaysRunsCompaction()
+{
+ document::BucketId bucket(16, 4);
+
+ setMaxDocumentVersionsOption(1);
+ feedSameDocNTimes(10);
+ flush(bucket);
+
+ // Max version limit is 1, flushing should have compacted it down.
+ MemFilePtr file(getMemFile(bucket));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount());
+}
+
+void
+MemFileTest::testOrderDocSchemeDocumentsCanBeAddedToFile()
+{
+ // Quick explanation of the esoteric and particular values chosen below:
+ // orderdoc mangles the MSB of the bucket ID based on the document ID's
+ // ordering parameters and thus its bucket cannot be directly deduced from
+ // the generated GID. The values given here specify a document whose GID
+ // bits differ from those generated by the document and where a GID-only
+ // bucket ownership check would fail (nuking the node with an assertion).
+ // We have to make sure cases do not trigger false positives.
+ document::BucketId bucket(0x84000000ee723751);
+ auto doc = createDocument("the quick red fox trips over a hedge",
+ "orderdoc(3,1):storage_test:group1:9:9");
+ doPut(std::shared_ptr<Document>(std::move(doc)),
+ bucket,
+ Timestamp(1000000 * 1234));
+ flush(bucket);
+
+ MemFilePtr file(getMemFile(bucket));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount());
+ // Ideally we'd test the failure case as well, but that'd require framework
+ // support for death tests.
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/spi/memfiletestutils.cpp b/memfilepersistence/src/tests/spi/memfiletestutils.cpp
new file mode 100644
index 00000000000..1e882ccbe6b
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/memfiletestutils.cpp
@@ -0,0 +1,455 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/document/datatype/documenttype.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h>
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/simulatedfailurefile.h>
+#include <vespa/memfilepersistence/memfile/memfilecache.h>
+#include <vespa/storageframework/defaultimplementation/memory/simplememorylogic.h>
+#include <sys/time.h>
+
+using document::DocumentType;
+
+namespace storage {
+namespace memfile {
+
+namespace {
+ spi::LoadType defaultLoadType(0, "default");
+}
+
+namespace {
+ vdstestlib::DirConfig initialize(uint32_t numDisks) {
+ system(vespalib::make_string("rm -rf vdsroot").c_str());
+ for (uint32_t i = 0; i < numDisks; i++) {
+ system(vespalib::make_string("mkdir -p vdsroot/disks/d%d", i).c_str());
+ }
+ vdstestlib::DirConfig config(getStandardConfig(true));
+ return config;
+ }
+
+ template<typename T>
+ struct ConfigReader : public T::Subscriber
+ {
+ T config;
+
+ ConfigReader(const std::string& configId) {
+ T::subscribe(configId, *this);
+ }
+ void configure(const T& c) { config = c; }
+ };
+}
+
+MemFileTestEnvironment::MemFileTestEnvironment(
+ uint32_t numDisks,
+ framework::ComponentRegister& reg,
+ const document::DocumentTypeRepo& repo)
+ : _config(initialize(numDisks)),
+ _provider(reg, _config.getConfigId())
+{
+ _provider.setDocumentRepo(repo);
+ _provider.getPartitionStates();
+}
+
+MemFileTestUtils::MemFileTestUtils()
+{
+}
+
+MemFileTestUtils::~MemFileTestUtils()
+{
+}
+
+void
+MemFileTestUtils::setupDisks(uint32_t numDisks) {
+ tearDown();
+ _componentRegister.reset(
+ new framework::defaultimplementation::ComponentRegisterImpl);
+ _clock.reset(new FakeClock);
+ _componentRegister->setClock(*_clock);
+ _memoryManager.reset(
+ new framework::defaultimplementation::MemoryManager(
+ framework::defaultimplementation::AllocationLogic::UP(
+ new framework::defaultimplementation::SimpleMemoryLogic(
+ *_clock, 1024 * 1024 * 1024))));
+ _componentRegister->setMemoryManager(*_memoryManager);
+ _env.reset(new MemFileTestEnvironment(numDisks,
+ *_componentRegister,
+ *getTypeRepo()));
+}
+
+Environment&
+MemFileTestUtils::env()
+{
+ return static_cast<MemFilePersistenceProvider&>(
+ getPersistenceProvider()).getEnvironment();
+}
+
+MemFilePersistenceProvider&
+MemFileTestUtils::getPersistenceProvider()
+{
+ return _env->_provider;
+}
+
+MemFilePersistenceThreadMetrics&
+MemFileTestUtils::getMetrics()
+{
+ return getPersistenceProvider().getMetrics();
+}
+
+std::string
+MemFileTestUtils::getMemFileStatus(const document::BucketId& id,
+ uint32_t disk)
+{
+ MemFilePtr file(getMemFile(id, disk));
+ std::ostringstream ost;
+ ost << id << ": " << file->getSlotCount() << "," << file->getDisk();
+ return ost.str();
+}
+
+std::string
+MemFileTestUtils::getModifiedBuckets()
+{
+ spi::BucketIdListResult result(
+ getPersistenceProvider().getModifiedBuckets());
+ const spi::BucketIdListResult::List& list(result.getList());
+ std::ostringstream ss;
+ for (size_t i = 0; i < list.size(); ++i) {
+ if (i != 0) {
+ ss << ",";
+ }
+ ss << std::hex << list[i].getId();
+ }
+ return ss.str();
+}
+
+MemFilePtr
+MemFileTestUtils::getMemFile(const document::BucketId& id, uint16_t disk)
+{
+ return env()._cache.get(id, env(), env().getDirectory(disk));
+}
+
+spi::Result
+MemFileTestUtils::flush(const document::BucketId& id, uint16_t disk)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ return getPersistenceProvider().flush(
+ spi::Bucket(id, spi::PartitionId(disk)), context);
+}
+
+document::Document::SP
+MemFileTestUtils::doPutOnDisk(
+ uint16_t disk,
+ uint32_t location,
+ Timestamp timestamp,
+ uint32_t minSize,
+ uint32_t maxSize)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ location, timestamp.getTime(), minSize, maxSize));
+ getPersistenceProvider().put(
+ spi::Bucket(document::BucketId(16, location), spi::PartitionId(disk)),
+ spi::Timestamp(timestamp.getTime()),
+ doc,
+ context);
+ return doc;
+}
+
+bool
+MemFileTestUtils::doRemoveOnDisk(
+ uint16_t disk,
+ const document::BucketId& bucketId,
+ const document::DocumentId& docId,
+ Timestamp timestamp,
+ OperationHandler::RemoveType persistRemove)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ if (persistRemove == OperationHandler::PERSIST_REMOVE_IF_FOUND) {
+ spi::RemoveResult result = getPersistenceProvider().removeIfFound(
+ spi::Bucket(bucketId, spi::PartitionId(disk)),
+ spi::Timestamp(timestamp.getTime()),
+ docId,
+ context);
+ return result.wasFound();
+ }
+ spi::RemoveResult result = getPersistenceProvider().remove(
+ spi::Bucket(bucketId, spi::PartitionId(disk)),
+ spi::Timestamp(timestamp.getTime()),
+ docId,
+ context);
+
+ return result.wasFound();
+}
+
+bool
+MemFileTestUtils::doUnrevertableRemoveOnDisk(
+ uint16_t disk,
+ const document::BucketId& bucketId,
+ const DocumentId& docId,
+ Timestamp timestamp)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ spi::RemoveResult result =
+ getPersistenceProvider().remove(
+ spi::Bucket(bucketId, spi::PartitionId(disk)),
+ spi::Timestamp(timestamp.getTime()),
+ docId, context);
+
+ return result.wasFound();
+}
+
+spi::GetResult
+MemFileTestUtils::doGetOnDisk(
+ uint16_t disk,
+ const document::BucketId& bucketId,
+ const document::DocumentId& docId,
+ const document::FieldSet& fields)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ return getPersistenceProvider().get(
+ spi::Bucket(bucketId, spi::PartitionId(disk)),
+ fields, docId, context);
+}
+
+document::DocumentUpdate::SP
+MemFileTestUtils::createBodyUpdate(
+ const document::DocumentId& docId,
+ const document::FieldValue& updateValue)
+{
+ const DocumentType*
+ docType(getTypeRepo()->getDocumentType("testdoctype1"));
+ document::DocumentUpdate::SP update(
+ new document::DocumentUpdate(*docType, docId));
+ std::shared_ptr<document::AssignValueUpdate> assignUpdate(
+ new document::AssignValueUpdate(updateValue));
+ document::FieldUpdate fieldUpdate(docType->getField("content"));
+ fieldUpdate.addUpdate(*assignUpdate);
+ update->addUpdate(fieldUpdate);
+ return update;
+}
+
+document::DocumentUpdate::SP
+MemFileTestUtils::createHeaderUpdate(
+ const document::DocumentId& docId,
+ const document::FieldValue& updateValue)
+{
+ const DocumentType*
+ docType(getTypeRepo()->getDocumentType("testdoctype1"));
+ document::DocumentUpdate::SP update(
+ new document::DocumentUpdate(*docType, docId));
+ std::shared_ptr<document::AssignValueUpdate> assignUpdate(
+ new document::AssignValueUpdate(updateValue));
+ document::FieldUpdate fieldUpdate(docType->getField("headerval"));
+ fieldUpdate.addUpdate(*assignUpdate);
+ update->addUpdate(fieldUpdate);
+ return update;
+}
+
+void
+MemFileTestUtils::doPut(const document::Document::SP& doc,
+ Timestamp time,
+ uint16_t disk,
+ uint16_t usedBits)
+{
+ document::BucketId bucket(
+ getBucketIdFactory().getBucketId(doc->getId()));
+ bucket.setUsedBits(usedBits);
+ doPut(doc, bucket, time, disk);
+}
+
+void
+MemFileTestUtils::doPut(const document::Document::SP& doc,
+ document::BucketId bid,
+ Timestamp time,
+ uint16_t disk)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ getPersistenceProvider().put(spi::Bucket(bid, spi::PartitionId(disk)),
+ spi::Timestamp(time.getTime()), doc, context);
+}
+
+spi::UpdateResult
+MemFileTestUtils::doUpdate(document::BucketId bid,
+ const document::DocumentUpdate::SP& update,
+ Timestamp time,
+ uint16_t disk)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ return getPersistenceProvider().update(
+ spi::Bucket(bid, spi::PartitionId(disk)),
+ spi::Timestamp(time.getTime()), update, context);
+}
+
+void
+MemFileTestUtils::doRemove(const document::DocumentId& id, Timestamp time,
+ uint16_t disk, bool unrevertableRemove,
+ uint16_t usedBits)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ document::BucketId bucket(getBucketIdFactory().getBucketId(id));
+ bucket.setUsedBits(usedBits);
+
+ if (unrevertableRemove) {
+ getPersistenceProvider().remove(
+ spi::Bucket(bucket, spi::PartitionId(disk)),
+ spi::Timestamp(time.getTime()),
+ id, context);
+ } else {
+ spi::RemoveResult result = getPersistenceProvider().removeIfFound(
+ spi::Bucket(bucket, spi::PartitionId(disk)),
+ spi::Timestamp(time.getTime()),
+ id, context);
+
+ if (!result.wasFound()) {
+ throw vespalib::IllegalStateException(
+ "Attempted to remove non-existing doc " + id.toString(),
+ VESPA_STRLOC);
+ }
+ }
+}
+
+void
+MemFileTestUtils::copyHeader(document::Document& dest,
+ const document::Document& src)
+{
+ // FIXME(vekterli): temporary solution while we don't have
+ // fieldset pruning functionality in Document.
+ //dest.setHeaderPtr(src.getHeaderPtr());
+ vespalib::nbostream originalBodyStream;
+ dest.serializeBody(originalBodyStream);
+
+ vespalib::nbostream headerStream;
+ src.serializeHeader(headerStream);
+ document::ByteBuffer hbuf(headerStream.peek(), headerStream.size());
+ dest.deserializeHeader(*getTypeRepo(), hbuf);
+ // deserializeHeader clears fields struct, so have to re-set body
+ document::ByteBuffer bbuf(originalBodyStream.peek(),
+ originalBodyStream.size());
+ dest.deserializeBody(*getTypeRepo(), bbuf);
+}
+
+void
+MemFileTestUtils::copyBody(document::Document& dest,
+ const document::Document& src)
+{
+ // FIXME(vekterli): temporary solution while we don't have
+ // fieldset pruning functionality in Document.
+ //dest.setBodyPtr(src.getBodyPtr());
+ vespalib::nbostream stream;
+ src.serializeBody(stream);
+ document::ByteBuffer buf(stream.peek(), stream.size());
+ dest.deserializeBody(*getTypeRepo(), buf);
+}
+
+void
+MemFileTestUtils::clearBody(document::Document& doc)
+{
+ // FIXME(vekterli): temporary solution while we don't have
+ // fieldset pruning functionality in Document.
+ //doc->getBody().clear();
+ vespalib::nbostream stream;
+ doc.serializeHeader(stream);
+ doc.deserialize(*getTypeRepo(), stream);
+}
+
+void
+MemFileTestUtils::createTestBucket(const document::BucketId& bucket,
+ uint16_t disk)
+{
+
+ uint32_t opsPerType = 2;
+ uint32_t numberOfLocations = 2;
+ uint32_t minDocSize = 0;
+ uint32_t maxDocSize = 128;
+
+ for (uint32_t useHeaderOnly = 0; useHeaderOnly < 2; ++useHeaderOnly) {
+ bool headerOnly = (useHeaderOnly == 1);
+ for (uint32_t optype=0; optype < 4; ++optype) {
+ for (uint32_t i=0; i<opsPerType; ++i) {
+ uint32_t seed = useHeaderOnly * 10000 + optype * 1000 + i + 1;
+ uint64_t location = (seed % numberOfLocations);
+ location <<= 32;
+ location += (bucket.getRawId() & 0xffffffff);
+ document::Document::SP doc(
+ createRandomDocumentAtLocation(
+ location, seed, minDocSize, maxDocSize));
+ if (headerOnly) {
+ clearBody(*doc);
+ }
+ doPut(doc, Timestamp(seed), disk, bucket.getUsedBits());
+ if (optype == 0) { // Regular put
+ } else if (optype == 1) { // Overwritten later in time
+ Document::SP doc2(new Document(*doc));
+ doc2->setValue(doc2->getField("content"),
+ document::StringFieldValue("overwritten"));
+ doPut(doc2, Timestamp(seed + 500),
+ disk, bucket.getUsedBits());
+ } else if (optype == 2) { // Removed
+ doRemove(doc->getId(), Timestamp(seed + 500), disk, false,
+ bucket.getUsedBits());
+ } else if (optype == 3) { // Unrevertable removed
+ doRemove(doc->getId(), Timestamp(seed), disk, true,
+ bucket.getUsedBits());
+ }
+ }
+ }
+ }
+ flush(bucket, disk);
+}
+
+void
+MemFileTestUtils::simulateIoErrorsForSubsequentlyOpenedFiles(
+ const IoErrors& errs)
+{
+ std::unique_ptr<SimulatedFailureLazyFile::Factory> factory(
+ new SimulatedFailureLazyFile::Factory);
+ factory->setWriteOpsBeforeFailure(errs._afterWrites);
+ factory->setReadOpsBeforeFailure(errs._afterReads);
+ env()._lazyFileFactory = std::move(factory);
+}
+
+void
+MemFileTestUtils::unSimulateIoErrorsForSubsequentlyOpenedFiles()
+{
+ env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>(
+ new DefaultLazyFileFactory(0));
+}
+
+std::string
+MemFileTestUtils::stringifyFields(const document::Document& doc) const
+{
+ using namespace document;
+ std::vector<std::string> output;
+ const StructFieldValue& fields(doc.getFields());
+ for (StructFieldValue::const_iterator
+ it(fields.begin()), e(fields.end());
+ it != e; ++it)
+ {
+ std::ostringstream ss;
+ const Field& f(it.field());
+ ss << f.getName() << ": ";
+ FieldValue::UP val(fields.getValue(f));
+ if (val.get()) {
+ ss << val->toString();
+ } else {
+ ss << "(null)";
+ }
+ output.push_back(ss.str());
+ }
+ std::ostringstream ret;
+ std::sort(output.begin(), output.end());
+ std::copy(output.begin(), output.end(),
+ std::ostream_iterator<std::string>(ret, "\n"));
+ return ret.str();
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/spi/memfiletestutils.h b/memfilepersistence/src/tests/spi/memfiletestutils.h
new file mode 100644
index 00000000000..a13b902a214
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/memfiletestutils.h
@@ -0,0 +1,294 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::MemFileTestUtils
+ * \ingroup memfile
+ *
+ * \brief Utilities for unit tests of the MemFile layer.
+ *
+ * The memfile layer typically needs a MemFileEnvironment object that must be
+ * set up. This class creates such an object to be used by unit tests. Other
+ * utilities useful for only MemFile testing can be added here too.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/memfile/memfilecache.h>
+#include <tests/testhelper.h>
+#include <vespa/persistence/spi/persistenceprovider.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h>
+#include <vespa/document/base/testdocman.h>
+#include <vespa/storageframework/defaultimplementation/clock/realclock.h>
+#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h>
+#include <vespa/storageframework/defaultimplementation/memory/memorymanager.h>
+
+namespace storage {
+namespace memfile {
+
+struct FakeClock : public framework::Clock {
+public:
+ typedef std::unique_ptr<FakeClock> UP;
+
+ framework::MicroSecTime _absoluteTime;
+
+ FakeClock() {};
+
+ virtual void addSecondsToTime(uint32_t nr) {
+ _absoluteTime += framework::MicroSecTime(nr * uint64_t(1000000));
+ }
+
+ virtual framework::MicroSecTime getTimeInMicros() const {
+ return _absoluteTime;
+ }
+ virtual framework::MilliSecTime getTimeInMillis() const {
+ return getTimeInMicros().getMillis();
+ }
+ virtual framework::SecondTime getTimeInSeconds() const {
+ return getTimeInMicros().getSeconds();
+ }
+};
+
+struct MemFileTestEnvironment {
+ MemFileTestEnvironment(uint32_t numDisks,
+ framework::ComponentRegister& reg,
+ const document::DocumentTypeRepo& repo);
+
+ vdstestlib::DirConfig _config;
+ MemFilePersistenceProvider _provider;
+};
+
+class MemFileTestUtils : public Types, public document::TestDocMan, public CppUnit::TestFixture {
+private:
+ // This variables are kept in test class. Instances that needs to be
+ // unique per test needs to be setup in setupDisks and cleared in
+ // tearDown
+ document::BucketIdFactory _bucketIdFactory;
+ framework::defaultimplementation::ComponentRegisterImpl::UP _componentRegister;
+ FakeClock::UP _clock;
+ framework::defaultimplementation::MemoryManager::UP _memoryManager;
+ std::unique_ptr<MemFileTestEnvironment> _env;
+
+public:
+ MemFileTestUtils();
+ virtual ~MemFileTestUtils();
+
+ void setupDisks(uint32_t disks);
+
+ void tearDown() {
+ _env.reset();
+ _componentRegister.reset();
+ _memoryManager.reset();
+ _clock.reset();
+ }
+
+ std::string getMemFileStatus(const document::BucketId& id, uint32_t disk = 0);
+
+ std::string getModifiedBuckets();
+
+ /**
+ Flushes all cached data to disk and updates the bucket database accordingly.
+ */
+ void flush();
+
+ FakeClock& getFakeClock() { return *_clock; }
+
+ spi::Result flush(const document::BucketId& id, uint16_t disk = 0);
+
+ MemFilePersistenceProvider& getPersistenceProvider();
+
+ MemFilePtr getMemFile(const document::BucketId& id, uint16_t disk = 0);
+
+ Environment& env();
+
+ MemFilePersistenceThreadMetrics& getMetrics();
+
+ MemFileTestEnvironment& getEnv() { return *_env; }
+
+ /**
+ Performs a put to the given disk.
+ Returns the document that was inserted.
+ */
+ document::Document::SP doPutOnDisk(
+ uint16_t disk,
+ uint32_t location,
+ Timestamp timestamp,
+ uint32_t minSize = 0,
+ uint32_t maxSize = 128);
+
+ document::Document::SP doPut(
+ uint32_t location,
+ Timestamp timestamp,
+ uint32_t minSize = 0,
+ uint32_t maxSize = 128)
+ { return doPutOnDisk(0, location, timestamp, minSize, maxSize); }
+
+ /**
+ Performs a remove to the given disk.
+ Returns the new doccount if document was removed, or -1 if not found.
+ */
+ bool doRemoveOnDisk(
+ uint16_t disk,
+ const document::BucketId& bid,
+ const document::DocumentId& id,
+ Timestamp timestamp,
+ OperationHandler::RemoveType persistRemove);
+
+ bool doRemove(
+ const document::BucketId& bid,
+ const document::DocumentId& id,
+ Timestamp timestamp,
+ OperationHandler::RemoveType persistRemove) {
+ return doRemoveOnDisk(0, bid, id, timestamp, persistRemove);
+ }
+
+ bool doUnrevertableRemoveOnDisk(uint16_t disk,
+ const document::BucketId& bid,
+ const DocumentId& id,
+ Timestamp timestamp);
+
+ bool doUnrevertableRemove(const document::BucketId& bid,
+ const DocumentId& id,
+ Timestamp timestamp)
+ {
+ return doUnrevertableRemoveOnDisk(0, bid, id, timestamp);
+ }
+
+ virtual const document::BucketIdFactory& getBucketIdFactory() const
+ { return _bucketIdFactory; }
+
+ document::BucketIdFactory& getBucketIdFactory()
+ { return _bucketIdFactory; }
+
+ /**
+ * Do a remove toward storage set up in test environment.
+ *
+ * @id Document to remove.
+ * @disk If set, use this disk, otherwise lookup in bucket db.
+ * @unrevertableRemove If set, instead of adding put, turn put to remove.
+ * @usedBits Generate bucket to use from docid using this amount of bits.
+ */
+ void doRemove(const DocumentId& id, Timestamp, uint16_t disk,
+ bool unrevertableRemove = false, uint16_t usedBits = 16);
+
+ spi::GetResult doGetOnDisk(
+ uint16_t disk,
+ const document::BucketId& bucketId,
+ const document::DocumentId& docId,
+ const document::FieldSet& fields);
+
+ spi::GetResult doGet(
+ const document::BucketId& bucketId,
+ const document::DocumentId& docId,
+ const document::FieldSet& fields)
+ { return doGetOnDisk(0, bucketId, docId, fields); }
+
+ document::DocumentUpdate::SP createBodyUpdate(
+ const document::DocumentId& id,
+ const document::FieldValue& updateValue);
+
+ document::DocumentUpdate::SP createHeaderUpdate(
+ const document::DocumentId& id,
+ const document::FieldValue& updateValue);
+
+ virtual const document::DocumentTypeRepo::SP getTypeRepo() const
+ { return document::TestDocMan::getTypeRepoSP(); }
+
+ /**
+ * Do a put toward storage set up in test environment.
+ *
+ * @doc Document to put. Use TestDocMan to generate easily.
+ * @disk If set, use this disk, otherwise lookup in bucket db.
+ * @usedBits Generate bucket to use from docid using this amount of bits.
+ */
+ void doPut(const Document::SP& doc, Timestamp,
+ uint16_t disk, uint16_t usedBits = 16);
+
+ void doPut(const document::Document::SP& doc,
+ document::BucketId bid,
+ Timestamp time,
+ uint16_t disk = 0);
+
+ spi::UpdateResult doUpdate(document::BucketId bid,
+ const document::DocumentUpdate::SP& update,
+ Timestamp time,
+ uint16_t disk = 0);
+
+ /**
+ * Create a test bucket with various content representing most states a
+ * bucket can represent. (Such that tests have a nice test bucket to use
+ * that require operations to handle all the various bucket contents.
+ *
+ * @disk If set, use this disk, otherwise lookup in bucket db.
+ */
+ void createTestBucket(const BucketId&, uint16_t disk = 0xffff);
+
+ /**
+ * In-place modify doc so that it has no more body fields.
+ */
+ void clearBody(document::Document& doc);
+
+ /**
+ * Copy all header data from src into dest, replacing any
+ * header fields it may already have there. NOTE: this will
+ * also overwrite document ID, type etc!
+ */
+ void copyHeader(document::Document& dest,
+ const document::Document& src);
+
+ /**
+ * Copy all body data from src into dest, replacing any
+ * body fields it may already have there.
+ */
+ void copyBody(document::Document& dest,
+ const document::Document& src);
+
+ std::string stringifyFields(const Document& doc) const;
+
+ struct IoErrors {
+ int _afterReads;
+ int _afterWrites;
+
+ IoErrors()
+ : _afterReads(0),
+ _afterWrites(0)
+ {
+ }
+
+ IoErrors& afterReads(int n) {
+ _afterReads = n;
+ return *this;
+ }
+
+ IoErrors& afterWrites(int n) {
+ _afterWrites = n;
+ return *this;
+ }
+ };
+
+ /**
+ * Replaces internal LazyFile factory so that it produces LazyFile
+ * implementations that trigger I/O exceptions on read/write. Optionally,
+ * can supply a parameter setting explicit bounds on how many operations
+ * are allowed on a file before trigging exceptions from there on out. A
+ * bound of -1 in practice means "don't fail ever" while 0 means "fail the
+ * next op of that type".
+ */
+ void simulateIoErrorsForSubsequentlyOpenedFiles(
+ const IoErrors& errs = IoErrors());
+
+ /**
+ * Replace internal LazyFile factory with the default, non-failing impl.
+ */
+ void unSimulateIoErrorsForSubsequentlyOpenedFiles();
+};
+
+class SingleDiskMemFileTestUtils : public MemFileTestUtils
+{
+public:
+ void setUp() {
+ setupDisks(1);
+ }
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp b/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp
new file mode 100644
index 00000000000..a5d1c50d043
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp
@@ -0,0 +1,1110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <tests/spi/memfiletestutils.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/memfilepersistence/mapper/locationreadplanner.h>
+#include <tests/spi/simulatedfailurefile.h>
+#include <tests/spi/options_builder.h>
+
+namespace storage {
+namespace memfile {
+
+struct MemFileV1SerializerTest : public SingleDiskMemFileTestUtils
+{
+ void tearDown();
+ void setUpPartialWriteEnvironment();
+ void resetConfig(uint32_t minimumFileSize, uint32_t minimumFileHeaderBlockSize);
+ void doTestPartialWriteRemove(bool readAll);
+ void doTestPartialWriteUpdate(bool readAll);
+
+ void testWriteReadSingleDoc();
+ void testWriteReadPartial();
+ void testWriteReadPartialRemoved();
+ void testPartialWritePutHeaderOnly();
+ void testPartialWritePut();
+ void testPartialWriteRemoveCached();
+ void testPartialWriteRemoveNotCached();
+ void testPartialWriteUpdateCached();
+ void testPartialWriteUpdateNotCached();
+ void testPartialWriteTooMuchFreeSpace();
+ void testPartialWriteNotEnoughFreeSpace();
+ void testWriteReadSingleRemovedDoc();
+ void testLocationDiskIoPlannerSimple();
+ void testLocationDiskIoPlannerMergeReads();
+ void testLocationDiskIoPlannerAlignReads();
+ void testLocationDiskIoPlannerOneDocument();
+ void testSeparateReadsForHeaderAndBody();
+ void testLocationsRemappedConsistently();
+ void testHeaderBufferTooSmall();
+
+ /*std::unique_ptr<MemFile> createMemFile(FileSpecification& file,
+ bool callLoadFile)
+ {
+ return std::unique_ptr<MemFile>(new MemFile(file, env(), callLoadFile));
+ }*/
+
+ CPPUNIT_TEST_SUITE(MemFileV1SerializerTest);
+ CPPUNIT_TEST(testWriteReadSingleDoc);
+ CPPUNIT_TEST(testWriteReadPartial);
+ CPPUNIT_TEST(testWriteReadPartialRemoved);
+ CPPUNIT_TEST(testWriteReadSingleRemovedDoc);
+ CPPUNIT_TEST(testPartialWritePutHeaderOnly);
+ CPPUNIT_TEST(testPartialWritePut);
+ CPPUNIT_TEST(testPartialWriteRemoveCached);
+ CPPUNIT_TEST(testPartialWriteRemoveNotCached);
+ CPPUNIT_TEST(testPartialWriteUpdateCached);
+ CPPUNIT_TEST(testPartialWriteUpdateNotCached);
+ CPPUNIT_TEST(testLocationDiskIoPlannerSimple);
+ CPPUNIT_TEST(testLocationDiskIoPlannerMergeReads);
+ CPPUNIT_TEST(testLocationDiskIoPlannerAlignReads);
+ CPPUNIT_TEST(testLocationDiskIoPlannerOneDocument);
+ CPPUNIT_TEST(testSeparateReadsForHeaderAndBody);
+ CPPUNIT_TEST(testPartialWriteTooMuchFreeSpace);
+ CPPUNIT_TEST(testPartialWriteNotEnoughFreeSpace);
+ CPPUNIT_TEST(testLocationsRemappedConsistently);
+ CPPUNIT_TEST(testHeaderBufferTooSmall);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(MemFileV1SerializerTest);
+
+namespace {
+
+const vespalib::LazyFile&
+getFileHandle(const MemFile& mf1)
+{
+ return static_cast<const SimpleMemFileIOBuffer&>(
+ mf1.getMemFileIO()).getFileHandle();
+}
+
+const LoggingLazyFile&
+getLoggerFile(const MemFile& file)
+{
+ return static_cast<const LoggingLazyFile&>(getFileHandle(file));
+}
+
+bool isContentEqual(MemFile& mf1, MemFile& mf2,
+ bool requireEqualContentCached, std::ostream& error)
+{
+ MemFile::const_iterator it1(
+ mf1.begin(Types::ITERATE_GID_UNIQUE | Types::ITERATE_REMOVED));
+ MemFile::const_iterator it2(
+ mf2.begin(Types::ITERATE_GID_UNIQUE | Types::ITERATE_REMOVED));
+ while (true) {
+ if (it1 == mf1.end() && it2 == mf2.end()) {
+ return true;
+ }
+ if (it1 == mf1.end() || it2 == mf2.end()) {
+ error << "Different amount of GID unique slots";
+ return false;
+ }
+ if (it1->getTimestamp() != it2->getTimestamp()) {
+ error << "Different timestamps";
+ return false;
+ }
+ if (it1->getGlobalId() != it2->getGlobalId()) {
+ error << "Different gids";
+ return false;
+ }
+ if (it1->getPersistedFlags() != it2->getPersistedFlags()) {
+ error << "Different persisted flags";
+ return false;
+ }
+ if (requireEqualContentCached) {
+ if (mf1.partAvailable(*it1, Types::BODY)
+ ^ mf2.partAvailable(*it2, Types::BODY)
+ || mf1.partAvailable(*it1, Types::HEADER)
+ ^ mf2.partAvailable(*it2, Types::HEADER))
+ {
+ error << "Difference in cached content: ";
+ return false;
+ }
+ }
+
+ if (mf1.partAvailable(*it1, Types::HEADER) &&
+ mf2.partAvailable(*it2, Types::HEADER))
+ {
+ document::Document::UP doc1 = mf1.getDocument(*it1, Types::ALL);
+ document::Document::UP doc2 = mf2.getDocument(*it2, Types::ALL);
+
+ CPPUNIT_ASSERT(doc1.get());
+ CPPUNIT_ASSERT(doc2.get());
+
+ if (*doc1 != *doc2) {
+ error << "Documents different: Expected:\n"
+ << doc1->toString(true) << "\nActual:\n"
+ << doc2->toString(true) << "\n";
+ return false;
+ }
+ }
+ ++it1;
+ ++it2;
+ }
+}
+
+bool
+validateMemFileStructure(const MemFile& mf, std::ostream& error)
+{
+ const SimpleMemFileIOBuffer& ioBuf(
+ dynamic_cast<const SimpleMemFileIOBuffer&>(mf.getMemFileIO()));
+ const FileInfo& fileInfo(ioBuf.getFileInfo());
+ if (fileInfo.getFileSize() % 512) {
+ error << "File size is not a multiple of 512 bytes";
+ return false;
+ }
+ if (fileInfo.getBlockIndex(Types::BODY) % 512) {
+ error << "Body start index is not a multiple of 512 bytes";
+ return false;
+ }
+ if (fileInfo.getBlockSize(Types::BODY) % 512) {
+ error << "Body size is not a multiple of 512 bytes";
+ return false;
+ }
+ return true;
+}
+
+}
+
+void
+MemFileV1SerializerTest::tearDown() {
+ //_memFile.reset();
+}
+
+/**
+ * Adjust minimum slotfile size values to avoid rewriting file
+ * when we want to get a partial write
+ */
+void
+MemFileV1SerializerTest::setUpPartialWriteEnvironment()
+{
+ resetConfig(4096, 2048);
+}
+
+void
+MemFileV1SerializerTest::resetConfig(uint32_t minimumFileSize,
+ uint32_t minimumFileHeaderBlockSize)
+{
+ using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig;
+ using MemFileConfigBuilder
+ = vespa::config::storage::StorMemfilepersistenceConfigBuilder;
+
+ MemFileConfigBuilder persistenceConfig(
+ *env().acquireConfigReadLock().memFilePersistenceConfig());
+ persistenceConfig.minimumFileHeaderBlockSize = minimumFileHeaderBlockSize;
+ persistenceConfig.minimumFileSize = minimumFileSize;
+ auto newCfg = std::unique_ptr<MemFileConfig>(
+ new MemFileConfig(persistenceConfig));
+ env().acquireConfigWriteLock().setMemFilePersistenceConfig(
+ std::move(newCfg));
+}
+
+struct DummyMemFileIOInterface : MemFileIOInterface {
+ Document::UP getDocumentHeader(const document::DocumentTypeRepo&,
+ DataLocation) const
+ {
+ return Document::UP();
+ }
+
+ document::DocumentId getDocumentId(DataLocation) const {
+ return document::DocumentId("");
+ }
+
+ void readBody(const document::DocumentTypeRepo&,
+ DataLocation,
+ Document&) const
+ {
+ }
+ DataLocation addDocumentIdOnlyHeader(
+ const DocumentId&,
+ const document::DocumentTypeRepo&)
+ {
+ return DataLocation();
+ }
+ DataLocation addHeader(const Document&) { return DataLocation(); }
+ DataLocation addBody(const Document&) { return DataLocation(); }
+ void clear(DocumentPart) {}
+ bool verifyConsistent() const { return true; }
+ void move(const FileSpecification&) {}
+ DataLocation copyCache(const MemFileIOInterface&,
+ DocumentPart,
+ DataLocation)
+ {
+ return DataLocation();
+ }
+
+ void close() {};
+ bool isCached(DataLocation, DocumentPart) const { return false; }
+ bool isPersisted(DataLocation, DocumentPart) const { return false; }
+ uint32_t getSerializedSize(DocumentPart,
+ DataLocation) const { return 0; }
+
+ void ensureCached(Environment&,
+ DocumentPart,
+ const std::vector<DataLocation>&)
+ {}
+
+ size_t getCachedSize(DocumentPart) const { return 0; }
+};
+
+#define VESPA_MEMFILEV1_SETUP_SOURCE \
+ system("rm -f testfile.0"); \
+ document::Document::SP doc(createRandomDocumentAtLocation(4)); \
+ FileSpecification file(document::BucketId(16, 4), env().getDirectory(0), "testfile.0"); \
+ MemFile source(file, env());
+
+#define VESPA_MEMFILEV1_DIFF(source, target) \
+ "\nSource:\n" + source.toString(true) \
+ + "\nTarget:\n" + target.toString(true)
+
+#define VESPA_MEMFILEV1_VALIDATE_STRUCTURE(mfile) \
+{ \
+ std::ostringstream validateErr; \
+ if (!validateMemFileStructure(mfile, validateErr)) { \
+ CPPUNIT_FAIL(validateErr.str()); \
+ } \
+}
+
+#define VESPA_MEMFILEV1_ASSERT_SERIALIZATION(sourceMemFile) \
+env()._memFileMapper.flush(sourceMemFile, env()); \
+VESPA_MEMFILEV1_VALIDATE_STRUCTURE(sourceMemFile) \
+MemFile target(file, env()); \
+VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target) \
+{ \
+ target.ensureBodyBlockCached(); \
+ target.getBucketInfo(); \
+ std::ostringstream diff; \
+ if (!isContentEqual(sourceMemFile, target, true, diff)) { \
+ std::string msg = "MemFiles not content equal: " + diff.str() \
+ + VESPA_MEMFILEV1_DIFF(sourceMemFile, target); \
+ CPPUNIT_FAIL(msg); \
+ } \
+}
+
+void
+MemFileV1SerializerTest::testWriteReadSingleDoc()
+{
+ VESPA_MEMFILEV1_SETUP_SOURCE;
+ source.addPutSlot(*doc, Timestamp(1001));
+ std::string foo(VESPA_MEMFILEV1_DIFF(source, source));
+ VESPA_MEMFILEV1_ASSERT_SERIALIZATION(source);
+}
+
+void
+MemFileV1SerializerTest::testWriteReadPartial()
+{
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ std::map<Timestamp, Document::SP> docs;
+ {
+ MemFile source(file, env());
+
+ for (int i = 0; i < 50; ++i) {
+ Document::SP doc(createRandomDocumentAtLocation(4, i, 1000, 2000));
+ source.addPutSlot(*doc, Timestamp(1001 + i));
+ docs[Timestamp(1001 + i)] = doc;
+ }
+
+ env()._memFileMapper.flush(source, env());
+ VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source);
+ }
+
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options).maximumReadThroughGap(1024).build());
+ env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>(
+ new LoggingLazyFile::Factory());
+
+ MemFile target(file, env());
+
+ std::vector<Timestamp> timestamps;
+
+ for (int i = 0; i < 50; i+=4) {
+ timestamps.push_back(Timestamp(1001 + i));
+ }
+ CPPUNIT_ASSERT_EQUAL(size_t(13), timestamps.size());
+
+ getLoggerFile(target).operations.clear();
+ target.ensureDocumentCached(timestamps, false);
+ // Headers are small enough that they get read in 1 op + 13 body reads
+ CPPUNIT_ASSERT_EQUAL(14, (int)getLoggerFile(target).operations.size());
+
+ for (std::size_t i = 0; i < timestamps.size(); ++i) {
+ const MemSlot* slot = target.getSlotAtTime(timestamps[i]);
+ CPPUNIT_ASSERT(slot);
+ CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER));
+ CPPUNIT_ASSERT(target.partAvailable(*slot, BODY));
+ CPPUNIT_ASSERT_EQUAL(*docs[timestamps[i]], *target.getDocument(*slot, ALL));
+ }
+ VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target);
+}
+
+void
+MemFileV1SerializerTest::testWriteReadPartialRemoved()
+{
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ MemFile source(file, env());
+
+ for (int i = 0; i < 50; ++i) {
+ Document::SP doc(createRandomDocumentAtLocation(4, i, 1000, 2000));
+ source.addPutSlot(*doc, Timestamp(1001 + i));
+ source.addRemoveSlot(*source.getSlotAtTime(Timestamp(1001 + i)),
+ Timestamp(2001 + i));
+ }
+
+ env()._memFileMapper.flush(source, env());
+ VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source);
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options).maximumReadThroughGap(1024).build());
+ env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>(
+ new LoggingLazyFile::Factory);
+
+ MemFile target(file, env());
+
+ std::vector<Timestamp> timestamps;
+
+ for (int i = 0; i < 50; i+=4) {
+ timestamps.push_back(Timestamp(2001 + i));
+ }
+
+ getLoggerFile(target).operations.clear();
+ target.ensureDocumentCached(timestamps, false);
+ // All removed; should only read header locations
+ CPPUNIT_ASSERT_EQUAL(1, (int)getLoggerFile(target).operations.size());
+
+ for (std::size_t i = 0; i < timestamps.size(); ++i) {
+ const MemSlot* slot = target.getSlotAtTime(timestamps[i]);
+ const MemSlot* removedPut(
+ target.getSlotAtTime(timestamps[i] - Timestamp(1000)));
+ CPPUNIT_ASSERT(slot);
+ CPPUNIT_ASSERT(removedPut);
+ CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER));
+ CPPUNIT_ASSERT_EQUAL(removedPut->getLocation(HEADER),
+ slot->getLocation(HEADER));
+ CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), slot->getLocation(BODY));
+ }
+ VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target);
+}
+
+void MemFileV1SerializerTest::testWriteReadSingleRemovedDoc()
+{
+ VESPA_MEMFILEV1_SETUP_SOURCE;
+ source.addPutSlot(*doc, Timestamp(1001));
+ source.addRemoveSlot(
+ *source.getSlotAtTime(Timestamp(1001)), Timestamp(2001));
+ VESPA_MEMFILEV1_ASSERT_SERIALIZATION(source);
+}
+
+/**
+ * Write a single put with no body to the memfile and ensure it is
+ * persisted properly without a body block
+ */
+void
+MemFileV1SerializerTest::testPartialWritePutHeaderOnly()
+{
+ setUpPartialWriteEnvironment();
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ document::Document::SP doc(createRandomDocumentAtLocation(4));
+ {
+ MemFile source(file, env());
+ source.addPutSlot(*doc, Timestamp(1001));
+ env()._memFileMapper.flush(source, env());
+ VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source);
+ }
+ {
+ // Have to put a second time since the first one will always
+ // rewrite the entire file
+ MemFile target(file, env());
+ Document::SP doc2(createRandomDocumentAtLocation(4));
+ clearBody(*doc2);
+ target.addPutSlot(*doc2, Timestamp(1003));
+ env()._memFileMapper.flush(target, env());
+ VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target);
+ }
+ {
+ MemFile target(file, env());
+ target.ensureBodyBlockCached();
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount());
+
+ const MemSlot& slot = *target.getSlotAtTime(Timestamp(1003));
+ CPPUNIT_ASSERT(slot.getLocation(HEADER)._pos > 0);
+ CPPUNIT_ASSERT(slot.getLocation(HEADER)._size > 0);
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(0, 0), slot.getLocation(BODY));
+ VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target);
+ }
+}
+
+
+
+
+void
+MemFileV1SerializerTest::testLocationDiskIoPlannerSimple()
+{
+ std::vector<MemSlot> slots;
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1001),
+ DataLocation(0, 1024),
+ DataLocation(4096, 512), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1003),
+ DataLocation(1024, 1024),
+ DataLocation(8192, 512), 0, 0));
+ }
+
+ std::vector<DataLocation> headers;
+ std::vector<DataLocation> bodies;
+ headers.push_back(slots[0].getLocation(HEADER));
+ bodies.push_back(slots[0].getLocation(BODY));
+
+ DummyMemFileIOInterface dummyIo;
+ {
+ LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 100, 0);
+
+ CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size());
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(0, 1024),
+ planner.getIoOperations()[0]);
+ }
+ {
+ LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 100, 4096);
+
+ CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size());
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(8192, 512), // + block index
+ planner.getIoOperations()[0]);
+ }
+}
+
+void
+MemFileV1SerializerTest::testLocationDiskIoPlannerMergeReads()
+{
+ std::vector<MemSlot> slots;
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1001),
+ DataLocation(0, 1024),
+ DataLocation(5120, 512), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1002),
+ DataLocation(2048, 1024),
+ DataLocation(7168, 512), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1003),
+ DataLocation(1024, 1024),
+ DataLocation(9216, 512), 0, 0));
+ }
+
+ std::vector<DataLocation> headers;
+ std::vector<DataLocation> bodies;
+ for (int i = 0; i < 2; ++i) {
+ headers.push_back(slots[i].getLocation(HEADER));
+ bodies.push_back(slots[i].getLocation(BODY));
+ }
+
+ DummyMemFileIOInterface dummyIo;
+ {
+ LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 1025, 0);
+
+ CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size());
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(0, 3072),
+ planner.getIoOperations()[0]);
+ }
+
+ {
+ LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 1025, 0);
+
+ CPPUNIT_ASSERT_EQUAL(2, (int)planner.getIoOperations().size());
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(5120, 512),
+ planner.getIoOperations()[0]);
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(7168, 512),
+ planner.getIoOperations()[1]);
+ }
+}
+
+void
+MemFileV1SerializerTest::testLocationDiskIoPlannerOneDocument()
+{
+ std::vector<MemSlot> slots;
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1001),
+ DataLocation(0, 1024),
+ DataLocation(5120, 512), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1002),
+ DataLocation(2048, 1024),
+ DataLocation(7168, 512), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1003),
+ DataLocation(1024, 1024),
+ DataLocation(9216, 512), 0, 0));
+ }
+
+ std::vector<DataLocation> headers;
+ std::vector<DataLocation> bodies;
+ headers.push_back(slots[1].getLocation(HEADER));
+ bodies.push_back(slots[1].getLocation(BODY));
+
+ DummyMemFileIOInterface dummyIo;
+ {
+ LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 1000, 0);
+ CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size());
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(2048, 1024),
+ planner.getIoOperations()[0]);
+ }
+
+ {
+ LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 1000, 0);
+ CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size());
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(7168, 512),
+ planner.getIoOperations()[0]);
+ }
+}
+
+void
+MemFileV1SerializerTest::testLocationDiskIoPlannerAlignReads()
+{
+ std::vector<MemSlot> slots;
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1001),
+ DataLocation(7, 100),
+ DataLocation(5000, 500), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1002),
+ DataLocation(2000, 100),
+ DataLocation(7000, 500), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1003),
+ DataLocation(110, 200),
+ DataLocation(9000, 500), 0, 0));
+ }
+
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ slots.push_back(
+ MemSlot(
+ doc->getId().getGlobalId(),
+ Timestamp(1004),
+ DataLocation(3000, 100),
+ DataLocation(11000, 500), 0, 0));
+ }
+
+ std::vector<DataLocation> headers;
+ std::vector<DataLocation> bodies;
+ for (int i = 0; i < 2; ++i) {
+ headers.push_back(slots[i].getLocation(HEADER));
+ bodies.push_back(slots[i].getLocation(BODY));
+ }
+
+ DummyMemFileIOInterface dummyIo;
+ {
+ LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 512, 0);
+ std::vector<DataLocation> expected;
+ expected.push_back(DataLocation(0, 512));
+ expected.push_back(DataLocation(1536, 1024));
+
+ CPPUNIT_ASSERT_EQUAL(expected, planner.getIoOperations());
+ }
+ {
+ LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 512, 0);
+ std::vector<DataLocation> expected;
+ expected.push_back(DataLocation(4608, 1024));
+ expected.push_back(DataLocation(6656, 1024));
+
+ CPPUNIT_ASSERT_EQUAL(expected, planner.getIoOperations());
+ }
+}
+
+// TODO(vekterli): add read planner test with a location cached
+
+void
+MemFileV1SerializerTest::testSeparateReadsForHeaderAndBody()
+{
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ Document::SP doc(createRandomDocumentAtLocation(4, 0, 1000, 2000));
+ {
+ MemFile source(file, env());
+ source.addPutSlot(*doc, Timestamp(1001));
+
+ env()._memFileMapper.flush(source, env());
+ }
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options)
+ .maximumReadThroughGap(1024*1024*100)
+ .build());
+ env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>(
+ new LoggingLazyFile::Factory());
+
+ MemFile target(file, env());
+
+ std::vector<Timestamp> timestamps;
+ timestamps.push_back(Timestamp(1001));
+
+ getLoggerFile(target).operations.clear();
+ target.ensureDocumentCached(timestamps, false);
+
+ CPPUNIT_ASSERT_EQUAL(2, (int)getLoggerFile(target).operations.size());
+ const MemSlot* slot = target.getSlotAtTime(Timestamp(1001));
+ CPPUNIT_ASSERT(slot);
+ CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER));
+ CPPUNIT_ASSERT(target.partAvailable(*slot, BODY));
+ CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(*slot, ALL));
+
+ CPPUNIT_ASSERT(getMetrics().serialization.headerReadSize.getLast() > 0);
+ CPPUNIT_ASSERT(getMetrics().serialization.bodyReadSize.getLast() > 0);
+}
+
+/**
+ * Write a single put with body to the memfile and ensure it is
+ * persisted properly with both header and body blocks
+ */
+void
+MemFileV1SerializerTest::testPartialWritePut()
+{
+ setUpPartialWriteEnvironment();
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ {
+ MemFile source(file, env());
+ source.addPutSlot(*doc, Timestamp(1001));
+
+ env()._memFileMapper.flush(source, env());
+ }
+
+ {
+ // Have to put a second time since the first one will always
+ // rewrite the entire file
+ MemFile target(file, env());
+ Document::SP doc2(createRandomDocumentAtLocation(4));
+ target.addPutSlot(*doc2, Timestamp(1003));
+ env()._memFileMapper.flush(target, env());
+ }
+ {
+ MemFile target(file, env());
+ target.ensureBodyBlockCached();
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount());
+
+ const MemSlot& slot = *target.getSlotAtTime(Timestamp(1003));
+ CPPUNIT_ASSERT(slot.getLocation(HEADER)._pos > 0);
+ CPPUNIT_ASSERT(slot.getLocation(HEADER)._size > 0);
+
+ CPPUNIT_ASSERT(slot.getLocation(BODY)._size > 0);
+ CPPUNIT_ASSERT(slot.getLocation(BODY)._pos > 0);
+ }
+}
+
+void
+MemFileV1SerializerTest::doTestPartialWriteRemove(bool readAll)
+{
+ setUpPartialWriteEnvironment();
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ {
+ MemFile source(file, env());
+ source.addPutSlot(*doc, Timestamp(1001));
+ env()._memFileMapper.flush(source, env());
+ }
+ {
+ MemFile target(file, env());
+ // Only populate cache before removing if explicitly told so
+ if (readAll) {
+ target.ensureBodyBlockCached();
+ }
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), target.getSlotCount());
+ target.addRemoveSlot(target[0], Timestamp(1003));
+
+ env()._memFileMapper.flush(target, env());
+ }
+ {
+ MemFile target(file, env());
+ target.ensureBodyBlockCached();
+
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount());
+
+ const MemSlot& originalSlot = target[0];
+ const MemSlot& removeSlot = target[1];
+ CPPUNIT_ASSERT(originalSlot.getLocation(HEADER)._size > 0);
+ CPPUNIT_ASSERT(originalSlot.getLocation(BODY)._size > 0);
+ CPPUNIT_ASSERT_EQUAL(
+ originalSlot.getLocation(HEADER),
+ removeSlot.getLocation(HEADER));
+ CPPUNIT_ASSERT_EQUAL(
+ DataLocation(0, 0), removeSlot.getLocation(BODY));
+ }
+}
+
+/**
+ * Ensure that removes get the same header location as the Put
+ * they're removing, and that they get a zero body location
+ */
+void
+MemFileV1SerializerTest::testPartialWriteRemoveCached()
+{
+ doTestPartialWriteRemove(true);
+}
+
+void
+MemFileV1SerializerTest::testPartialWriteRemoveNotCached()
+{
+ doTestPartialWriteRemove(false);
+}
+
+void
+MemFileV1SerializerTest::doTestPartialWriteUpdate(bool readAll)
+{
+ setUpPartialWriteEnvironment();
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ {
+ MemFile source(file, env());
+ source.addPutSlot(*doc, Timestamp(1001));
+ env()._memFileMapper.flush(source, env());
+ }
+
+ Document::SP doc2;
+ {
+ MemFile target(file, env());
+ if (readAll) {
+ target.ensureBodyBlockCached();
+ }
+
+ doc2.reset(new Document(*doc->getDataType(), doc->getId()));
+ clearBody(*doc2);
+ doc2->setValue(doc->getField("hstringval"),
+ document::StringFieldValue("Some updated content"));
+
+ target.addUpdateSlot(*doc2, *target.getSlotAtTime(Timestamp(1001)),
+ Timestamp(1003));
+ env()._memFileMapper.flush(target, env());
+ }
+
+ {
+ MemFile target(file, env());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount());
+ const MemSlot& originalSlot = target[0];
+ const MemSlot& updateSlot = target[1];
+ CPPUNIT_ASSERT(originalSlot.getLocation(HEADER)._size > 0);
+ CPPUNIT_ASSERT(originalSlot.getLocation(BODY)._size > 0);
+ CPPUNIT_ASSERT_EQUAL(
+ originalSlot.getLocation(BODY),
+ updateSlot.getLocation(BODY));
+ CPPUNIT_ASSERT(
+ updateSlot.getLocation(HEADER)
+ != originalSlot.getLocation(HEADER));
+
+ CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(target[0], ALL));
+ copyHeader(*doc, *doc2);
+ CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(target[1], ALL));
+ }
+}
+
+/**
+ * Ensure that header updates keep the same body block
+ */
+void
+MemFileV1SerializerTest::testPartialWriteUpdateCached()
+{
+ doTestPartialWriteUpdate(true);
+}
+
+void
+MemFileV1SerializerTest::testPartialWriteUpdateNotCached()
+{
+ doTestPartialWriteUpdate(false);
+}
+
+void
+MemFileV1SerializerTest::testPartialWriteTooMuchFreeSpace()
+{
+ setUpPartialWriteEnvironment();
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ {
+ MemFile source(file, env());
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ source.addPutSlot(*doc, Timestamp(1001));
+ env()._memFileMapper.flush(source, env());
+ }
+ int64_t sizeBefore;
+ // Append filler to slotfile to make it too big for comfort,
+ // forcing a rewrite to shrink it down
+ {
+ vespalib::File slotfile(file.getPath());
+ slotfile.open(0);
+ CPPUNIT_ASSERT(slotfile.isOpen());
+ sizeBefore = slotfile.getFileSize();
+ slotfile.resize(sizeBefore * 20); // Well over min fill rate of 10%
+ }
+ // Write new slot to file; it should now be rewritten with the
+ // same file size as originally
+ {
+ MemFile source(file, env());
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ source.addPutSlot(*doc, Timestamp(1003));
+ env()._memFileMapper.flush(source, env());
+ }
+ {
+ vespalib::File slotfile(file.getPath());
+ slotfile.open(0);
+ CPPUNIT_ASSERT(slotfile.isOpen());
+ CPPUNIT_ASSERT_EQUAL(
+ sizeBefore,
+ slotfile.getFileSize());
+ }
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization
+ .fullRewritesDueToDownsizingFile.getValue());
+ CPPUNIT_ASSERT_EQUAL(uint64_t(0), getMetrics().serialization
+ .fullRewritesDueToTooSmallFile.getValue());
+}
+
+void
+MemFileV1SerializerTest::testPartialWriteNotEnoughFreeSpace()
+{
+ setUpPartialWriteEnvironment();
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ // Write file initially
+ MemFile source(file, env());
+ {
+ Document::SP doc(createRandomDocumentAtLocation(4));
+ source.addPutSlot(*doc, Timestamp(1001));
+ env()._memFileMapper.flush(source, env());
+ }
+
+ uint32_t minFile = 1024 * 512;
+ auto memFileCfg = env().acquireConfigReadLock().memFilePersistenceConfig();
+ resetConfig(minFile, memFileCfg->minimumFileHeaderBlockSize);
+
+ // Create doc bigger than initial minimum filesize,
+ // prompting a full rewrite
+ Document::SP doc(
+ createRandomDocumentAtLocation(4, 0, 4096, 4096));
+ source.addPutSlot(*doc, Timestamp(1003));
+
+ env()._memFileMapper.flush(source, env());
+
+ CPPUNIT_ASSERT_EQUAL(
+ minFile,
+ uint32_t(getFileHandle(source).getFileSize()));
+
+ CPPUNIT_ASSERT_EQUAL(uint64_t(0), getMetrics().serialization
+ .fullRewritesDueToDownsizingFile.getValue());
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization
+ .fullRewritesDueToTooSmallFile.getValue());
+
+ // Now, ensure we respect minimum file size and don't try to
+ // "helpfully" rewrite the file again (try to detect full
+ // file rewrite with help from the fact we don't currently
+ // check whether or not the file is < the minimum filesize.
+ // If that changes, so must this)
+ memFileCfg = env().acquireConfigReadLock().memFilePersistenceConfig();
+ resetConfig(2 * minFile, memFileCfg->minimumFileHeaderBlockSize);
+
+ source.addRemoveSlot(*source.getSlotAtTime(Timestamp(1003)),
+ Timestamp(1005));
+ env()._memFileMapper.flush(source, env());
+
+ CPPUNIT_ASSERT_EQUAL(
+ minFile,
+ uint32_t(getFileHandle(source).getFileSize()));
+
+ CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization
+ .fullRewritesDueToTooSmallFile.getValue());
+}
+
+// Test that we don't mess up when remapping locations that
+// have already been written during the same operation. That is:
+// part A is remapped (P1, S1) -> (P2, S2)
+// part B is remapped (P2, S2) -> (P3, S3)
+// Obviously, part B should not overwrite the location of part A,
+// but this will happen if we don't do the updating in one batch.
+void
+MemFileV1SerializerTest::testLocationsRemappedConsistently()
+{
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+
+ std::map<Timestamp, Document::SP> docs;
+ {
+ MemFile mf(file, env());
+ Document::SP tmpDoc(
+ createRandomDocumentAtLocation(4, 0, 100, 100));
+
+ // Create docs identical in size but differing only in doc ids
+ // By keeping same size but inserting with _lower_ timestamps
+ // for docs that get higher location positions, we ensure that
+ // when the file is rewritten, the lower timestamp slots will
+ // get remapped to locations that match existing locations for
+ // higher timestamp slots.
+ for (int i = 0; i < 2; ++i) {
+ std::ostringstream ss;
+ ss << "doc" << i;
+ DocumentId id(document::UserDocIdString("userdoc:foo:4:" + ss.str()));
+ Document::SP doc(new Document(*tmpDoc->getDataType(), id));
+ doc->getFields() = tmpDoc->getFields();
+ mf.addPutSlot(*doc, Timestamp(1000 - i));
+ docs[Timestamp(1000 - i)] = doc;
+ }
+
+ env()._memFileMapper.flush(mf, env());
+ // Dirty the cache for rewrite
+ {
+ DocumentId id2(document::UserDocIdString("userdoc:foo:4:doc9"));
+ Document::UP doc2(new Document(*tmpDoc->getDataType(), id2));
+ doc2->getFields() = tmpDoc->getFields();
+ mf.addPutSlot(*doc2, Timestamp(2000));
+ docs[Timestamp(2000)] = std::move(doc2);
+ }
+
+ // Force rewrite
+ auto memFileCfg = env().acquireConfigReadLock()
+ .memFilePersistenceConfig();
+ resetConfig(1024*512, memFileCfg ->minimumFileHeaderBlockSize);
+ env()._memFileMapper.flush(mf, env());
+ }
+
+ MemFile target(file, env());
+ target.ensureBodyBlockCached();
+
+ std::ostringstream err;
+ if (!env()._memFileMapper.verify(target, env(), err)) {
+ std::cerr << err.str() << "\n";
+ CPPUNIT_FAIL("MemFile verification failed");
+ }
+
+ typedef std::map<Timestamp, Document::SP>::iterator Iter;
+ for (Iter it(docs.begin()); it != docs.end(); ++it) {
+ const MemSlot* slot = target.getSlotAtTime(it->first);
+ CPPUNIT_ASSERT(slot);
+ CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER));
+ CPPUNIT_ASSERT(target.partAvailable(*slot, BODY));
+ CPPUNIT_ASSERT_EQUAL(*it->second, *target.getDocument(*slot, ALL));
+ }
+}
+
+/**
+ * Test that we read in the correct header information when we have to read
+ * in two passes to get it in its entirety.
+ */
+void
+MemFileV1SerializerTest::testHeaderBufferTooSmall()
+{
+ system("rm -f testfile.0");
+ FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0");
+ FileInfo wantedInfo;
+ {
+ MemFile f(file, env());
+ // 50*40 bytes of meta list data should be more than sufficient
+ for (size_t i = 0; i < 50; ++i) {
+ Document::SP doc(createRandomDocumentAtLocation(4, i));
+ f.addPutSlot(*doc, Timestamp(1001 + i));
+ env()._memFileMapper.flush(f, env());
+ }
+ SimpleMemFileIOBuffer& io(
+ dynamic_cast<SimpleMemFileIOBuffer&>(f.getMemFileIO()));
+ wantedInfo = io.getFileInfo();
+ }
+
+ // Force initial index read to be too small to contain all metadata,
+ // triggering buffer resize and secondary read.
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options).initialIndexRead(512).build());
+ {
+ MemFile f(file, env());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(50), f.getSlotCount());
+ // Ensure we've read correct file info
+ SimpleMemFileIOBuffer& io(
+ dynamic_cast<SimpleMemFileIOBuffer&>(f.getMemFileIO()));
+ const FileInfo& info(io.getFileInfo());
+ CPPUNIT_ASSERT_EQUAL(wantedInfo.getFileSize(), info.getFileSize());
+ CPPUNIT_ASSERT_EQUAL(wantedInfo.getHeaderBlockStartIndex(),
+ info.getHeaderBlockStartIndex());
+ CPPUNIT_ASSERT_EQUAL(wantedInfo.getBodyBlockStartIndex(),
+ info.getBodyBlockStartIndex());
+ CPPUNIT_ASSERT_EQUAL(wantedInfo.getBlockSize(HEADER),
+ info.getBlockSize(HEADER));
+ CPPUNIT_ASSERT_EQUAL(wantedInfo.getBlockSize(BODY),
+ info.getBlockSize(BODY));
+ }
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp b/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp
new file mode 100644
index 00000000000..0cf04eadaa2
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp
@@ -0,0 +1,501 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h>
+#include <vespa/memfilepersistence/mapper/fileinfo.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <tests/spi/memfiletestutils.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <tests/spi/simulatedfailurefile.h>
+
+namespace storage {
+namespace memfile {
+
+struct MemFileV1VerifierTest : public SingleDiskMemFileTestUtils
+{
+ void testVerify();
+
+ void tearDown();
+
+ std::unique_ptr<MemFile> createMemFile(FileSpecification& file,
+ bool callLoadFile)
+ {
+ return std::unique_ptr<MemFile>(new MemFile(file, env(), callLoadFile));
+ }
+
+ CPPUNIT_TEST_SUITE(MemFileV1VerifierTest);
+ CPPUNIT_TEST_IGNORED(testVerify);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(MemFileV1VerifierTest);
+
+namespace {
+ // A totall uncached memfile with content to use for verify testing
+ std::unique_ptr<MemFile> _memFile;
+
+ // Clear old content. Create new file. Make sure nothing is cached.
+ void prepareBucket(SingleDiskMemFileTestUtils& util,
+ const FileSpecification& file) {
+ _memFile.reset();
+ util.env()._cache.clear();
+ vespalib::unlink(file.getPath());
+ util.createTestBucket(file.getBucketId(), 0);
+ util.env()._cache.clear();
+ _memFile.reset(new MemFile(file, util.env()));
+ _memFile->getMemFileIO().close();
+
+ }
+
+ // Get copy of header of memfile created
+ Header getHeader() {
+ assert(_memFile.get());
+ vespalib::LazyFile file(_memFile->getFile().getPath(), 0);
+ Header result;
+ file.read(&result, sizeof(Header), 0);
+ return result;
+ }
+
+ MetaSlot getSlot(uint32_t index) {
+ assert(_memFile.get());
+ vespalib::LazyFile file(_memFile->getFile().getPath(), 0);
+ MetaSlot result;
+ file.read(&result, sizeof(MetaSlot),
+ sizeof(Header) + sizeof(MetaSlot) * index);
+ return result;
+ }
+
+ void setSlot(uint32_t index, MetaSlot slot,
+ bool updateFileChecksum = true)
+ {
+ (void)updateFileChecksum;
+ assert(_memFile.get());
+ //if (updateFileChecksum) slot.updateFileChecksum();
+ vespalib::LazyFile file(_memFile->getFile().getPath(), 0);
+ file.write(&slot, sizeof(MetaSlot),
+ sizeof(Header) + sizeof(MetaSlot) * index);
+ }
+
+ void setHeader(const Header& header) {
+ assert(_memFile.get());
+ vespalib::LazyFile file(_memFile->getFile().getPath(), 0);
+ file.write(&header, sizeof(Header), 0);
+ }
+
+ void verifySlotFile(MemFileV1VerifierTest& util,
+ const std::string& expectedError,
+ const std::string& message,
+ int32_t remainingEntries,
+ bool includeContent = true,
+ bool includeHeader = true)
+ {
+ assert(_memFile.get());
+ FileSpecification file(_memFile->getFile());
+ _memFile.reset();
+ _memFile = util.createMemFile(file, false);
+ std::ostringstream before;
+ try{
+ util.env()._memFileMapper.loadFile(*_memFile, util.env(), false);
+ _memFile->print(before, true, "");
+ } catch (vespalib::Exception& e) {
+ before << "Unknown. Exception during loadFile\n";
+ }
+ std::ostringstream errors;
+ uint32_t flags = (includeContent ? 0 : Types::DONT_VERIFY_BODY)
+ | (includeHeader ? 0 : Types::DONT_VERIFY_HEADER);
+ if (util.env()._memFileMapper.verify(
+ *_memFile, util.env(), errors, flags))
+ {
+ _memFile->print(std::cerr, true, "");
+ std::cerr << errors.str() << "\n";
+ CPPUNIT_FAIL("verify() failed to detect: " + message);
+ }
+ CPPUNIT_ASSERT_CONTAIN_MESSAGE(message + "\nBefore: " + before.str(),
+ expectedError, errors.str());
+ errors.str("");
+ if (util.env()._memFileMapper.repair(
+ *_memFile, util.env(), errors, flags))
+ {
+ CPPUNIT_FAIL("repair() failed to detect: " + message
+ + ": " + errors.str());
+ }
+ CPPUNIT_ASSERT_CONTAIN_MESSAGE(message + "\nBefore: " + before.str(),
+ expectedError, errors.str());
+ std::ostringstream remainingErrors;
+ if (!util.env()._memFileMapper.verify(
+ *_memFile, util.env(), remainingErrors, flags))
+ {
+ CPPUNIT_FAIL("verify() returns issue after repair of: "
+ + message + ": " + remainingErrors.str());
+ }
+ CPPUNIT_ASSERT_MESSAGE(remainingErrors.str(),
+ remainingErrors.str().size() == 0);
+ if (remainingEntries < 0) {
+ if (_memFile->fileExists()) {
+ CPPUNIT_FAIL(message + ": Expected file to not exist anymore");
+ }
+ } else if (dynamic_cast<SimpleMemFileIOBuffer&>(_memFile->getMemFileIO())
+ .getFileHandle().getFileSize() == 0)
+ {
+ std::ostringstream ost;
+ ost << "Expected " << remainingEntries << " to remain in file, "
+ << "but file does not exist\n";
+ CPPUNIT_FAIL(message + ": " + ost.str());
+ } else {
+ if (int64_t(_memFile->getSlotCount()) != remainingEntries) {
+ std::ostringstream ost;
+ ost << "Expected " << remainingEntries << " to remain in file, "
+ << "but found " << _memFile->getSlotCount() << "\n";
+ ost << errors.str() << "\n";
+ ost << "Before: " << before.str() << "\nAfter: ";
+ _memFile->print(ost, true, "");
+ CPPUNIT_FAIL(message + ": " + ost.str());
+ }
+ }
+ }
+}
+
+void
+MemFileV1VerifierTest::tearDown()
+{
+ _memFile.reset(0);
+ SingleDiskMemFileTestUtils::tearDown();
+};
+
+void
+MemFileV1VerifierTest::testVerify()
+{
+ BucketId bucket(16, 0xa);
+ std::unique_ptr<FileSpecification> file;
+ createTestBucket(bucket, 0);
+
+ {
+ MemFilePtr memFilePtr(env()._cache.get(bucket, env(), env().getDirectory()));
+ file.reset(new FileSpecification(memFilePtr->getFile()));
+ env()._cache.clear();
+ }
+ { // Ensure buildTestFile builds a valid file
+ // Initial file should be fine.
+ MemFile memFile(*file, env());
+ std::ostringstream errors;
+ if (!env()._memFileMapper.verify(memFile, env(), errors)) {
+ memFile.print(std::cerr, false, "");
+ CPPUNIT_FAIL("Slotfile failed verification: " + errors.str());
+ }
+ }
+ // Header tests
+ prepareBucket(*this, *file);
+ Header orgheader(getHeader());
+ { // Test wrong version
+ Header header(orgheader);
+ header.setVersion(0xc0edbabe);
+ header.updateChecksum();
+ setHeader(header);
+ verifySlotFile(*this,
+ "400000000000000a.0 is of wrong version",
+ "Faulty version",
+ -1);
+ }
+ { // Test meta data list size bigger than file
+ prepareBucket(*this, *file);
+ Header header(orgheader);
+ header.setMetaDataListSize(0xFFFF);
+ header.updateChecksum();
+ setHeader(header);
+ verifySlotFile(*this,
+ "indicates file is bigger than it physically is",
+ "Too big meta data list size",
+ -1);
+ }
+ { // Test header block size bigger than file
+ prepareBucket(*this, *file);
+ Header header(orgheader);
+ header.setHeaderBlockSize(0xFFFF);
+ header.updateChecksum();
+ setHeader(header);
+ verifySlotFile(*this,
+ "Header indicates file is bigger than it physically is",
+ "Too big header block size",
+ -1);
+ }
+ { // Test wrong header crc
+ prepareBucket(*this, *file);
+ Header header(orgheader);
+ header.setMetaDataListSize(4);
+ setHeader(header);
+ verifySlotFile(*this,
+ "Header checksum mismatch",
+ "Wrong header checksum",
+ -1);
+ }
+ // Meta data tests
+ prepareBucket(*this, *file);
+ MetaSlot slot6(getSlot(6));
+ { // Test extra removes - currently allowed
+ MetaSlot slot7(getSlot(7));
+ MetaSlot s(slot7);
+ s.setTimestamp(Timestamp(s._timestamp.getTime() - 1));
+ s.updateChecksum();
+ setSlot(6, s);
+ s.setTimestamp(Timestamp(s._timestamp.getTime() + 1));
+ s.updateChecksum();
+ setSlot(7, s);
+ std::ostringstream errors;
+ if (!env()._memFileMapper.verify(*_memFile, env(), errors)) {
+ _memFile->print(std::cerr, false, "");
+ std::cerr << errors.str() << "\n";
+ CPPUNIT_FAIL("Supposed to be legal with multiple remove values");
+ }
+ setSlot(7, slot7);
+ }
+ {
+ // Test metadata crc mismatch with "used" flag being accidentally
+ // flipped. Should not inhibit adding of subsequent slots.
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setUseFlag(false);
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Slot 6 at timestamp 2001 failed checksum verification",
+ "Crc failure with use flag", 23, false);
+ }
+ { // Test overlapping documents
+ MetaSlot s(slot6);
+ // Direct overlapping header
+ prepareBucket(*this, *file);
+ s.setHeaderPos(0);
+ s.setHeaderSize(51);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "overlaps with slot",
+ "Direct overlapping header", 6, false, false);
+ // Contained header
+ // (contained bit not valid header so fails on other error now)
+ prepareBucket(*this, *file);
+ s.setHeaderPos(176);
+ s.setHeaderSize(80);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "not big enough to contain a document id",
+ "Contained header", 7, false);
+ // Partly overlapping header
+ // (contained bit not valid header so fails on other error now)
+ prepareBucket(*this, *file);
+ s.setHeaderPos(191);
+ s.setHeaderSize(35);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "not big enough to contain a document id",
+ "Partly overlapping header", 7, false);
+ prepareBucket(*this, *file);
+ s.setHeaderPos(185);
+ s.setHeaderSize(33);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "not big enough to contain a document id",
+ "Partly overlapping header (2)", 7, false);
+ // Direct overlapping body
+ prepareBucket(*this, *file);
+ s = slot6;
+ s.setBodyPos(0);
+ s.setBodySize(136);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Multiple slots with different gids use same body position",
+ "Directly overlapping body", 6, false);
+ // Contained body
+ prepareBucket(*this, *file);
+ s.setBodyPos(10);
+ s.setBodySize(50);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "overlaps with slot",
+ "Contained body", 6, false);
+ CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(1)) == 0);
+ // Overlapping body
+ prepareBucket(*this, *file);
+ s.setBodyPos(160);
+ s.setBodySize(40);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "overlaps with slot",
+ "Overlapping body", 5, false);
+ CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(2)) == 0);
+ CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(1501)) == 0);
+ // Overlapping body, verifying bodies
+ // (Bad body bit should be removed first, so only one slot needs
+ // removing)
+ prepareBucket(*this, *file);
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Body checksum mismatch",
+ "Overlapping body(2)", 7, true);
+ }
+ { // Test out of bounds
+ MetaSlot s(slot6);
+
+ // Header out of bounds
+ prepareBucket(*this, *file);
+ s.setHeaderPos(500);
+ s.setHeaderSize(5000);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "goes out of bounds",
+ "Header out of bounds", 7, false, false);
+ // Body out of bounds
+ prepareBucket(*this, *file);
+ s = slot6;
+ s.setBodyPos(2400);
+ s.setBodySize(6000);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "goes out of bounds",
+ "Body out of bounds", 7, false);
+ }
+ { // Test timestamp collision
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setTimestamp(Timestamp(10002));
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "has same timestamp as slot 5",
+ "Timestamp collision", 6, false);
+ }
+ { // Test timestamp out of order
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setTimestamp(Timestamp(38));
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Slot 6 is out of timestamp order",
+ "Timestamp out of order", 8, false);
+ }
+ { // Test metadata crc mismatch
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setTimestamp(Timestamp(40));
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Slot 6 at timestamp 40 failed checksum verification",
+ "Crc failure", 7, false);
+ }
+ { // Test used after unused
+ // This might actually lose documents after the unused entries.
+ // The memfile will not know about the documents after unused entry.
+ // If the memfile contains changes and writes metadata back due to this,
+ // the following entries will be missing.
+ // (To prevent this repair would have to add metadata entries, but that
+ // may be problems if repair happens at a time where all header or body
+ // data in the file needs to be cached.)
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setUseFlag(false);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Slot 7 found after unused entries",
+ "Used after unused", 6, false);
+ }
+ { // Test header blob corrupt
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setHeaderPos(519);
+ s.setHeaderSize(86);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Header checksum mismatch",
+ "Corrupt header blob.", 7);
+ }
+ { // Test body blob corrupt
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setBodyPos(52);
+ s.setBodySize(18);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "Body checksum mismatch",
+ "Corrupt body blob.", 7);
+ }
+ { // Test too long name for header chunk
+ prepareBucket(*this, *file);
+ MetaSlot s(slot6);
+ s.setHeaderPos(160);
+ s.setHeaderSize(33);
+ s.updateChecksum();
+ setSlot(6, s);
+ verifySlotFile(*this,
+ "header is not big enough to contain a document",
+ "Too long name in header.", 7);
+ }
+ { // Test wrong file checksum
+// Currently disabled. Currently only possible to calculate file checksum from
+// memfile now, and memfile object wont be valid.
+/*
+ // First test if we actually have less entries at all..
+ prepareBucket(*this, *file);
+ MetaSlot s(getSlot(7));
+ s.setUseFlag(false);
+ s.updateChecksum();
+ setSlot(7, s, false);
+ s = getSlot(8);
+ s.setUseFlag(false);
+ s.updateChecksum();
+ setSlot(8, s, false);
+ verifySlotFile(*this,
+ "File checksum should have been",
+ "Wrong file checksum in file.", 7, false);
+std::cerr << "U\n";
+ // Then test with different timestamp in remaining document
+ prepareBucket(*this, *file);
+ s = getSlot(6);
+ s.setTimestamp(s._timestamp + 1);
+ s.updateChecksum();
+ setSlot(6, s, false);
+ verifySlotFile(*this,
+ "File checksum should have been",
+ "Wrong file checksum in file.", 9, false);
+std::cerr << "V\n";
+ // Then check with different gid
+ prepareBucket(*this, *file);
+ s = getSlot(6);
+ s._gid = GlobalId("sdfsdfsedsdfsdfsd");
+ s.updateChecksum();
+ setSlot(6, s, false);
+ verifySlotFile(*this,
+ "File checksum should have been",
+ "Wrong file checksum in file.", 9, false, false);
+*/
+ }
+ { // Test that documents not belonging in a bucket is removed
+// Currently disabled. Hard to test. Needs total rewrite
+/*
+ prepareBucket(*this, *file);
+ Blob b(createBlob(43u, "userdoc::0:315", "header", "body"));
+ _memFile->write(b, 80);
+ CPPUNIT_ASSERT_EQUAL(4u, _memFile->getBlobCount());
+ CPPUNIT_ASSERT(_memFile->read(b));
+ verifySlotFile(*this,
+ "belongs in bucket",
+ "Document not belonging there", 9);
+ CPPUNIT_ASSERT_EQUAL(3u, _memFile->getBlobCount());
+*/
+ }
+}
+
+}
+}
diff --git a/memfilepersistence/src/tests/spi/options_builder.h b/memfilepersistence/src/tests/spi/options_builder.h
new file mode 100644
index 00000000000..044e7f1d351
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/options_builder.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/common/environment.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <memory>
+
+namespace storage {
+namespace memfile {
+
+class OptionsBuilder
+{
+ Options _newOptions;
+public:
+ OptionsBuilder(const Options& opts)
+ : _newOptions(opts)
+ {
+ }
+
+ OptionsBuilder& maximumReadThroughGap(uint32_t readThroughGap) {
+ _newOptions._maximumGapToReadThrough = readThroughGap;
+ return *this;
+ }
+
+ OptionsBuilder& initialIndexRead(uint32_t bytesToRead) {
+ _newOptions._initialIndexRead = bytesToRead;
+ return *this;
+ }
+
+ OptionsBuilder& revertTimePeriod(framework::MicroSecTime revertTime) {
+ _newOptions._revertTimePeriod = revertTime;
+ return *this;
+ }
+
+ OptionsBuilder& defaultRemoveDocType(vespalib::stringref typeName) {
+ _newOptions._defaultRemoveDocType = typeName;
+ return *this;
+ }
+
+ OptionsBuilder& maxDocumentVersions(uint32_t maxVersions) {
+ _newOptions._maxDocumentVersions = maxVersions;
+ return *this;
+ }
+
+ std::unique_ptr<Options> build() const {
+ return std::unique_ptr<Options>(new Options(_newOptions));
+ }
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/tests/spi/providerconformancetest.cpp b/memfilepersistence/src/tests/spi/providerconformancetest.cpp
new file mode 100644
index 00000000000..526f61a812c
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/providerconformancetest.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/persistence/conformancetest/conformancetest.h>
+#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h>
+#include <vespa/storageframework/defaultimplementation/clock/realclock.h>
+#include <vespa/storageframework/defaultimplementation/memory/memorymanager.h>
+#include <vespa/storageframework/defaultimplementation/memory/simplememorylogic.h>
+#include <vespa/storageframework/generic/memory/memorymanagerinterface.h>
+#include <vespa/memfilepersistence/memfile/memfilecache.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h>
+#include <tests/spi/memfiletestutils.h>
+
+LOG_SETUP(".test.dummyimpl");
+
+namespace storage {
+namespace memfile {
+
+struct ProviderConformanceTest : public spi::ConformanceTest {
+ struct Factory : public PersistenceFactory {
+ framework::defaultimplementation::ComponentRegisterImpl _compRegister;
+ framework::defaultimplementation::RealClock _clock;
+ framework::defaultimplementation::MemoryManager _memoryManager;
+ std::unique_ptr<MemFileCache> cache;
+
+ Factory()
+ : _compRegister(),
+ _clock(),
+ _memoryManager(
+ framework::defaultimplementation::AllocationLogic::UP(
+ new framework::defaultimplementation::SimpleMemoryLogic(
+ _clock, 1024 * 1024 * 1024)))
+ {
+ _compRegister.setClock(_clock);
+ _compRegister.setMemoryManager(_memoryManager);
+ }
+
+ spi::PersistenceProvider::UP
+ getPersistenceImplementation(const document::DocumentTypeRepo::SP& repo,
+ const document::DocumenttypesConfig&)
+ {
+ system("rm -rf vdsroot");
+ system("mkdir -p vdsroot/disks/d0");
+ vdstestlib::DirConfig config(getStandardConfig(true));
+
+ MemFilePersistenceProvider::UP result(
+ new MemFilePersistenceProvider(
+ _compRegister,
+ config.getConfigId()));
+ result->setDocumentRepo(*repo);
+ return spi::PersistenceProvider::UP(result.release());
+ }
+
+ bool
+ supportsRevert() const
+ {
+ return true;
+ }
+ };
+
+ ProviderConformanceTest()
+ : spi::ConformanceTest(PersistenceFactory::UP(new Factory)) {}
+
+ CPPUNIT_TEST_SUITE(ProviderConformanceTest);
+ DEFINE_CONFORMANCE_TESTS();
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(ProviderConformanceTest);
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp b/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp
new file mode 100644
index 00000000000..fbf7badf5e4
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp
@@ -0,0 +1,111 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/memfilepersistence/memfile/shared_data_location_tracker.h>
+
+namespace storage {
+namespace memfile {
+
+class SharedDataLocationTrackerTest : public CppUnit::TestFixture
+{
+public:
+ void headerIsPassedDownToCacheAccessor();
+ void bodyIsPassedDownToCacheAccessor();
+ void firstInvocationReturnsNewLocation();
+ void multipleInvocationsForSharedSlotReturnSameLocation();
+
+ CPPUNIT_TEST_SUITE(SharedDataLocationTrackerTest);
+ CPPUNIT_TEST(headerIsPassedDownToCacheAccessor);
+ CPPUNIT_TEST(bodyIsPassedDownToCacheAccessor);
+ CPPUNIT_TEST(firstInvocationReturnsNewLocation);
+ CPPUNIT_TEST(multipleInvocationsForSharedSlotReturnSameLocation);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(SharedDataLocationTrackerTest);
+
+namespace {
+
+using Params = std::pair<Types::DocumentPart, DataLocation>;
+constexpr auto HEADER = Types::HEADER;
+constexpr auto BODY = Types::BODY;
+
+/**
+ * A simple mock of a buffer cache which records all invocations
+ * and returns a location increasing by 100 for each invocation.
+ */
+struct MockBufferCacheCopier : BufferCacheCopier
+{
+ // This is practically _screaming_ for GoogleMock.
+ std::vector<Params> invocations;
+
+ DataLocation doCopyFromSourceToLocal(
+ Types::DocumentPart part,
+ DataLocation sourceLocation) override
+ {
+ Params params(part, sourceLocation);
+ const size_t invocationsBefore = invocations.size();
+ invocations.push_back(params);
+ return DataLocation(invocationsBefore * 100,
+ invocationsBefore * 100 + 100);
+ }
+};
+
+}
+
+void
+SharedDataLocationTrackerTest::headerIsPassedDownToCacheAccessor()
+{
+ MockBufferCacheCopier cache;
+ SharedDataLocationTracker tracker(cache, HEADER);
+ tracker.getOrCreateSharedLocation({0, 100});
+ CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size());
+ CPPUNIT_ASSERT_EQUAL(Params(HEADER, {0, 100}), cache.invocations[0]);
+}
+
+void
+SharedDataLocationTrackerTest::bodyIsPassedDownToCacheAccessor()
+{
+ MockBufferCacheCopier cache;
+ SharedDataLocationTracker tracker(cache, BODY);
+ tracker.getOrCreateSharedLocation({0, 100});
+ CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size());
+ CPPUNIT_ASSERT_EQUAL(Params(BODY, {0, 100}), cache.invocations[0]);
+}
+
+void
+SharedDataLocationTrackerTest::firstInvocationReturnsNewLocation()
+{
+ MockBufferCacheCopier cache;
+ SharedDataLocationTracker tracker(cache, HEADER);
+ // Auto-incrementing per cache copy invocation.
+ CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100),
+ tracker.getOrCreateSharedLocation({500, 600}));
+ CPPUNIT_ASSERT_EQUAL(DataLocation(100, 200),
+ tracker.getOrCreateSharedLocation({700, 800}));
+
+ CPPUNIT_ASSERT_EQUAL(size_t(2), cache.invocations.size());
+ CPPUNIT_ASSERT_EQUAL(Params(HEADER, {500, 600}), cache.invocations[0]);
+ CPPUNIT_ASSERT_EQUAL(Params(HEADER, {700, 800}), cache.invocations[1]);
+}
+
+void
+SharedDataLocationTrackerTest
+ ::multipleInvocationsForSharedSlotReturnSameLocation()
+{
+ MockBufferCacheCopier cache;
+ SharedDataLocationTracker tracker(cache, HEADER);
+ CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100),
+ tracker.getOrCreateSharedLocation({500, 600}));
+ // Same source location, thus we can reuse the same destination location
+ // as well.
+ CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100),
+ tracker.getOrCreateSharedLocation({500, 600}));
+
+ CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size());
+ CPPUNIT_ASSERT_EQUAL(Params(HEADER, {500, 600}), cache.invocations[0]);
+}
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp b/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp
new file mode 100644
index 00000000000..af0466fafe7
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp
@@ -0,0 +1,663 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/options_builder.h>
+
+namespace storage {
+namespace memfile {
+
+class SimpleMemFileIOBufferTest : public SingleDiskMemFileTestUtils
+{
+ CPPUNIT_TEST_SUITE(SimpleMemFileIOBufferTest);
+ CPPUNIT_TEST(testAddAndReadDocument);
+ CPPUNIT_TEST(testNonExistingLocation);
+ CPPUNIT_TEST(testCopy);
+ CPPUNIT_TEST(testCacheLocation);
+ CPPUNIT_TEST(testPersist);
+ CPPUNIT_TEST(testGetSerializedSize);
+ CPPUNIT_TEST(testRemapLocations);
+ CPPUNIT_TEST(testAlignmentUtilFunctions);
+ CPPUNIT_TEST(testCalculatedCacheSize);
+ CPPUNIT_TEST(testSharedBuffer);
+ CPPUNIT_TEST(testSharedBufferUsage);
+ CPPUNIT_TEST(testHeaderChunkEncoderComputesSizesCorrectly);
+ CPPUNIT_TEST(testHeaderChunkEncoderSerializesIdCorrectly);
+ CPPUNIT_TEST(testHeaderChunkEncoderSerializesHeaderCorrectly);
+ CPPUNIT_TEST(testRemovesCanBeWrittenWithBlankDefaultDocument);
+ CPPUNIT_TEST(testRemovesCanBeWrittenWithIdInferredDoctype);
+ CPPUNIT_TEST(testRemovesWithInvalidDocTypeThrowsException);
+ CPPUNIT_TEST_SUITE_END();
+
+ using BufferType = SimpleMemFileIOBuffer::BufferType;
+ using BufferLP = BufferType::LP;
+ using BufferAllocation = SimpleMemFileIOBuffer::BufferAllocation;
+ using HeaderChunkEncoder = SimpleMemFileIOBuffer::HeaderChunkEncoder;
+ using SimpleMemFileIOBufferUP = std::unique_ptr<SimpleMemFileIOBuffer>;
+
+ BufferAllocation allocateBuffer(size_t sz) {
+ return BufferAllocation(BufferLP(new BufferType(sz)), 0, sz);
+ }
+
+ /**
+ * Create an I/O buffer instance with for a dummy bucket. If removeDocType
+ * is non-empty, remove entries will be written in backwards compatible
+ * mode.
+ */
+ SimpleMemFileIOBufferUP createIoBufferWithDummySpec(
+ vespalib::stringref removeDocType = "");
+
+public:
+ class DummyFileReader : public VersionSerializer {
+ public:
+ virtual FileVersion getFileVersion() { return FileVersion(); }
+ virtual void loadFile(MemFile&, Environment&,
+ Buffer&, uint64_t ) {}
+ virtual FlushResult flushUpdatesToFile(MemFile&, Environment&) {
+ return FlushResult::TooSmall;
+ }
+ virtual void rewriteFile(MemFile&, Environment&) {}
+ virtual bool verify(MemFile&, Environment&,
+ std::ostream&, bool,
+ uint16_t) { return false; };
+ virtual void cacheLocations(MemFileIOInterface&,
+ Environment&,
+ const Options&,
+ DocumentPart,
+ const std::vector<DataLocation>&) {}
+ };
+
+ DummyFileReader dfr;
+
+ void testAddAndReadDocument();
+ void testNonExistingLocation();
+ void testCopy();
+ void testCacheLocation();
+ void testPersist();
+ void testGetSerializedSize();
+ void testRemapLocations();
+ void testAlignmentUtilFunctions();
+ void testCalculatedCacheSize();
+ void testSharedBuffer();
+ void testSharedBufferUsage();
+ void testHeaderChunkEncoderComputesSizesCorrectly();
+ void testHeaderChunkEncoderSerializesIdCorrectly();
+ void testHeaderChunkEncoderSerializesHeaderCorrectly();
+ void testRemovesCanBeWrittenWithBlankDefaultDocument();
+ void testRemovesCanBeWrittenWithIdInferredDoctype();
+ void testRemovesWithInvalidDocTypeThrowsException();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(SimpleMemFileIOBufferTest);
+
+
+void
+SimpleMemFileIOBufferTest::testAddAndReadDocument()
+{
+ FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0");
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 123,
+ 456,
+ 789,
+ 1234));
+
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env());
+
+ DataLocation h = buffer.addHeader(*doc);
+ DataLocation b = buffer.addBody(*doc);
+
+ Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), h);
+ buffer.readBody(*getTypeRepo(), b, *newDoc);
+
+ CPPUNIT_ASSERT_EQUAL(*doc, *newDoc);
+ CPPUNIT_ASSERT_EQUAL(true, buffer.isCached(h, HEADER));
+ CPPUNIT_ASSERT_EQUAL(true, buffer.isCached(b, BODY));
+ CPPUNIT_ASSERT_EQUAL(false, buffer.isCached(h, BODY));
+ CPPUNIT_ASSERT_EQUAL(false, buffer.isCached(b, HEADER));
+ CPPUNIT_ASSERT_EQUAL(doc->getId(), buffer.getDocumentId(h));
+}
+
+void
+SimpleMemFileIOBufferTest::testPersist()
+{
+ FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0");
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 123,
+ 456,
+ 789,
+ 1234));
+
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env());
+
+ DataLocation h = buffer.addHeader(*doc);
+ DataLocation b = buffer.addBody(*doc);
+
+ CPPUNIT_ASSERT(!buffer.isPersisted(h, HEADER));
+ CPPUNIT_ASSERT(!buffer.isPersisted(b, BODY));
+
+ buffer.persist(HEADER, h, DataLocation(1000, h.size()));
+ buffer.persist(BODY, b, DataLocation(5000, b.size()));
+
+ Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), DataLocation(1000, h.size()));
+ buffer.readBody(*getTypeRepo(), DataLocation(5000, b.size()), *newDoc);
+
+ CPPUNIT_ASSERT(buffer.isPersisted(DataLocation(1000, h.size()), HEADER));
+ CPPUNIT_ASSERT(buffer.isPersisted(DataLocation(5000, b.size()), BODY));
+
+ CPPUNIT_ASSERT_EQUAL(*doc, *newDoc);
+}
+
+void
+SimpleMemFileIOBufferTest::testCopy()
+{
+ FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0");
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env());
+
+ for (uint32_t i = 0; i < 10; ++i) {
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 123,
+ 456,
+ 789,
+ 1234));
+
+ DataLocation h = buffer.addHeader(*doc);
+ DataLocation b = buffer.addBody(*doc);
+
+ SimpleMemFileIOBuffer buffer2(dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env());
+
+ DataLocation h2 = buffer2.copyCache(buffer, HEADER, h);
+ DataLocation b2 = buffer2.copyCache(buffer, BODY, b);
+
+ Document::UP newDoc = buffer2.getDocumentHeader(*getTypeRepo(), h2);
+ buffer2.readBody(*getTypeRepo(), b2, *newDoc);
+
+ CPPUNIT_ASSERT_EQUAL(*doc, *newDoc);
+ }
+}
+
+void
+SimpleMemFileIOBufferTest::testNonExistingLocation()
+{
+ FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0");
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 123,
+ 456,
+ 789,
+ 1234));
+
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env());
+
+ DataLocation h = buffer.addHeader(*doc);
+ DataLocation b = buffer.addBody(*doc);
+
+ buffer.clear(HEADER);
+
+ try {
+ Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), h);
+ CPPUNIT_ASSERT(false);
+ } catch (SimpleMemFileIOBuffer::PartNotCachedException& e) {
+ }
+
+ buffer.clear(BODY);
+
+ try {
+ document::Document newDoc;
+ buffer.readBody(*getTypeRepo(), b, newDoc);
+ CPPUNIT_ASSERT(false);
+ } catch (SimpleMemFileIOBuffer::PartNotCachedException& e) {
+ }
+}
+
+void
+SimpleMemFileIOBufferTest::testCacheLocation()
+{
+ FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0");
+
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ FileInfo::UP(new FileInfo(100, 10000, 50000)),
+ fileSpec,
+ env());
+
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 123,
+ 456,
+ 789,
+ 1234));
+
+ BufferAllocation headerBuf = buffer.serializeHeader(*doc);
+ BufferAllocation bodyBuf = buffer.serializeBody(*doc);
+
+ DataLocation hloc(1234, headerBuf.getSize());
+ DataLocation bloc(5678, bodyBuf.getSize());
+
+ buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0);
+ buffer.cacheLocation(BODY, bloc, bodyBuf.getSharedBuffer(), 0);
+
+ Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), hloc);
+ buffer.readBody(*getTypeRepo(), bloc, *newDoc);
+
+ CPPUNIT_ASSERT_EQUAL(*doc, *newDoc);
+}
+
+void
+SimpleMemFileIOBufferTest::testGetSerializedSize()
+{
+ FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0");
+
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ FileInfo::UP(new FileInfo(100, 10000, 50000)),
+ fileSpec,
+ env());
+
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 123,
+ 456,
+ 789,
+ 1234));
+
+ BufferAllocation headerBuf = buffer.serializeHeader(*doc);
+ BufferAllocation bodyBuf = buffer.serializeBody(*doc);
+
+ DataLocation hloc(1234, headerBuf.getSize());
+ DataLocation bloc(5678, bodyBuf.getSize());
+
+ buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0);
+ buffer.cacheLocation(BODY, bloc, bodyBuf.getSharedBuffer(), 0);
+
+ vespalib::nbostream serializedHeader;
+ doc->serializeHeader(serializedHeader);
+
+ vespalib::nbostream serializedBody;
+ doc->serializeBody(serializedBody);
+
+ CPPUNIT_ASSERT_EQUAL(uint32_t(serializedHeader.size()),
+ buffer.getSerializedSize(HEADER, hloc));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(serializedBody.size()),
+ buffer.getSerializedSize(BODY, bloc));
+}
+
+// Test that remapping does not overwrite datalocations that it has
+// already updated
+void
+SimpleMemFileIOBufferTest::testRemapLocations()
+{
+ FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0");
+
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ FileInfo::UP(new FileInfo(100, 10000, 50000)),
+ fileSpec,
+ env());
+
+ document::Document::SP doc(createRandomDocumentAtLocation(
+ 123,
+ 100,
+ 100));
+ BufferAllocation headerBuf = buffer.serializeHeader(*doc);
+ BufferAllocation bodyBuf = buffer.serializeBody(*doc);
+
+ document::Document::SP doc2(createRandomDocumentAtLocation(
+ 123,
+ 100,
+ 100));
+
+ BufferAllocation headerBuf2 = buffer.serializeHeader(*doc2);
+ BufferAllocation bodyBuf2 = buffer.serializeBody(*doc2);
+
+ DataLocation hloc(30000, headerBuf.getSize());
+ DataLocation hloc2(0, headerBuf2.getSize());
+ DataLocation hloc3(10000, hloc2._size);
+
+ buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0);
+ buffer.cacheLocation(HEADER, hloc2, headerBuf2.getSharedBuffer(), 0);
+
+ std::map<DataLocation, DataLocation> remapping;
+ remapping[hloc2] = hloc;
+ remapping[hloc] = hloc3;
+
+ buffer.remapAndPersistAllLocations(HEADER, remapping);
+
+ Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), hloc3);
+ document::ByteBuffer bbuf(bodyBuf.getBuffer(), bodyBuf.getSize());
+ newDoc->deserializeBody(*getTypeRepo(), bbuf);
+
+ CPPUNIT_ASSERT_EQUAL(*doc, *newDoc);
+
+ Document::UP newDoc2 = buffer.getDocumentHeader(*getTypeRepo(), hloc);
+ document::ByteBuffer bbuf2(bodyBuf.getBuffer(), bodyBuf.getSize());
+ newDoc2->deserializeBody(*getTypeRepo(), bbuf2);
+ CPPUNIT_ASSERT_EQUAL(*doc2, *newDoc2);
+}
+
+/**
+ * Not technically a part of SimpleMemFileIOBuffer, but used by it and
+ * currently contained within its header file. Move test somewhere else
+ * if the code itself is moved.
+ */
+void
+SimpleMemFileIOBufferTest::testAlignmentUtilFunctions()
+{
+ using namespace util;
+ CPPUNIT_ASSERT_EQUAL(size_t(0), alignUpPow2<4096>(0));
+ CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(1));
+ CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(512));
+ CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(4096));
+ CPPUNIT_ASSERT_EQUAL(size_t(8192), alignUpPow2<4096>(4097));
+ CPPUNIT_ASSERT_EQUAL(size_t(32), alignUpPow2<16>(20));
+ CPPUNIT_ASSERT_EQUAL(size_t(32), alignUpPow2<32>(20));
+ CPPUNIT_ASSERT_EQUAL(size_t(64), alignUpPow2<64>(20));
+ CPPUNIT_ASSERT_EQUAL(size_t(128), alignUpPow2<128>(20));
+
+ CPPUNIT_ASSERT_EQUAL(uint32_t(0), nextPow2(0));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), nextPow2(1));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(4), nextPow2(3));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(16), nextPow2(15));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(64), nextPow2(40));
+ CPPUNIT_ASSERT_EQUAL(uint32_t(64), nextPow2(64));
+}
+
+/**
+ * Test that allocated buffers are correctly reported with their sizes
+ * rounded up to account for mmap overhead.
+ */
+void
+SimpleMemFileIOBufferTest::testCalculatedCacheSize()
+{
+ FileSpecification fileSpec(BucketId(16, 123),
+ env().getDirectory(), "testfile.0");
+ SimpleMemFileIOBuffer buffer(dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env());
+
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(HEADER));
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(BODY));
+
+ // All buffers are on a 4k page granularity.
+ BufferAllocation sharedHeaderBuffer(allocateBuffer(1500)); // -> 4096
+ buffer.cacheLocation(HEADER, DataLocation(0, 85),
+ sharedHeaderBuffer.getSharedBuffer(), 0);
+ CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(HEADER));
+
+ buffer.cacheLocation(HEADER, DataLocation(200, 100),
+ sharedHeaderBuffer.getSharedBuffer(), 85);
+ CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(HEADER));
+
+ BufferAllocation singleHeaderBuffer(allocateBuffer(200)); // -> 4096
+ buffer.cacheLocation(HEADER, DataLocation(0, 100),
+ singleHeaderBuffer.getSharedBuffer(), 0);
+ CPPUNIT_ASSERT_EQUAL(size_t(8192), buffer.getCachedSize(HEADER));
+
+ BufferAllocation singleBodyBuffer(allocateBuffer(300)); // -> 4096
+ buffer.cacheLocation(BODY, DataLocation(0, 100),
+ singleBodyBuffer.getSharedBuffer(), 0);
+ CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(BODY));
+
+ buffer.clear(HEADER);
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(HEADER));
+
+ buffer.clear(BODY);
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(BODY));
+}
+
+void
+SimpleMemFileIOBufferTest::testSharedBuffer()
+{
+ typedef SimpleMemFileIOBuffer::SharedBuffer SharedBuffer;
+
+ {
+ SharedBuffer buf(1024);
+ CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getSize());
+ CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getFreeSize());
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getUsedSize());
+ CPPUNIT_ASSERT(buf.hasRoomFor(1024));
+ CPPUNIT_ASSERT(!buf.hasRoomFor(1025));
+
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(13));
+ // Allocation should be rounded up to nearest alignment.
+ // TODO: is this even necessary?
+ CPPUNIT_ASSERT_EQUAL(size_t(16), buf.getUsedSize());
+ CPPUNIT_ASSERT_EQUAL(size_t(1008), buf.getFreeSize());
+ CPPUNIT_ASSERT(buf.hasRoomFor(1008));
+ CPPUNIT_ASSERT(!buf.hasRoomFor(1009));
+ CPPUNIT_ASSERT_EQUAL(size_t(16), buf.allocate(1));
+ CPPUNIT_ASSERT_EQUAL(size_t(24), buf.getUsedSize());
+
+ CPPUNIT_ASSERT_EQUAL(size_t(24), buf.allocate(999));
+ CPPUNIT_ASSERT(!buf.hasRoomFor(1));
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize());
+ CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize());
+ }
+ // Test exact fit.
+ {
+ SharedBuffer buf(1024);
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(1024));
+ CPPUNIT_ASSERT(!buf.hasRoomFor(1));
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize());
+ CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize());
+ }
+ // Test 512-byte alignment.
+ {
+ SharedBuffer buf(1024);
+ CPPUNIT_ASSERT(buf.hasRoomFor(1000, SharedBuffer::ALIGN_512_BYTES));
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(10));
+ CPPUNIT_ASSERT(!buf.hasRoomFor(1000, SharedBuffer::ALIGN_512_BYTES));
+ CPPUNIT_ASSERT(!buf.hasRoomFor(513, SharedBuffer::ALIGN_512_BYTES));
+ CPPUNIT_ASSERT(buf.hasRoomFor(512, SharedBuffer::ALIGN_512_BYTES));
+ CPPUNIT_ASSERT_EQUAL(size_t(512), buf.allocate(512, SharedBuffer::ALIGN_512_BYTES));
+ CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize());
+ CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize());
+ }
+}
+
+void
+SimpleMemFileIOBufferTest::testSharedBufferUsage()
+{
+ FileSpecification fileSpec(BucketId(16, 123),
+ env().getDirectory(), "testfile.0");
+ SimpleMemFileIOBuffer ioBuf(dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env());
+
+ const size_t threshold = SimpleMemFileIOBuffer::WORKING_BUFFER_SIZE;
+
+ // Brand new allocation
+ BufferAllocation ba(ioBuf.allocateBuffer(HEADER, 1));
+ CPPUNIT_ASSERT(ba.buf.get());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba.pos);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(1), ba.size);
+ // Should reuse buffer, but get other offset
+ BufferAllocation ba2(ioBuf.allocateBuffer(HEADER, 500));
+ CPPUNIT_ASSERT_EQUAL(ba.buf.get(), ba2.buf.get());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(8), ba2.pos);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(500), ba2.size);
+ CPPUNIT_ASSERT_EQUAL(size_t(512), ba2.buf->getUsedSize());
+
+ // Allocate a buffer so big that it should get its own buffer instance
+ BufferAllocation ba3(ioBuf.allocateBuffer(HEADER, threshold));
+ CPPUNIT_ASSERT(ba3.buf.get() != ba2.buf.get());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba3.pos);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(threshold), ba3.size);
+
+ // But smaller allocs should still be done from working buffer
+ BufferAllocation ba4(ioBuf.allocateBuffer(HEADER, 512));
+ CPPUNIT_ASSERT_EQUAL(ba.buf.get(), ba4.buf.get());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(512), ba4.pos);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(512), ba4.size);
+ CPPUNIT_ASSERT_EQUAL(size_t(1024), ba4.buf->getUsedSize());
+
+ // Allocate lots of smaller buffers from the same buffer until we run out.
+ while (true) {
+ BufferAllocation tmp(ioBuf.allocateBuffer(HEADER, 1024));
+ CPPUNIT_ASSERT_EQUAL(ba.buf.get(), tmp.buf.get());
+ if (!tmp.buf->hasRoomFor(2048)) {
+ break;
+ }
+ }
+ BufferAllocation ba5(ioBuf.allocateBuffer(HEADER, 2048));
+ CPPUNIT_ASSERT(ba5.buf.get() != ba.buf.get());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba5.pos);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(2048), ba5.size);
+
+ // Allocating for different part should get different buffer.
+ BufferAllocation ba6(ioBuf.allocateBuffer(BODY, 128));
+ CPPUNIT_ASSERT(ba6.buf.get() != ba5.buf.get());
+ CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba6.pos);
+ CPPUNIT_ASSERT_EQUAL(uint32_t(128), ba6.size);
+}
+
+void
+SimpleMemFileIOBufferTest::testHeaderChunkEncoderComputesSizesCorrectly()
+{
+ document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100));
+
+ std::string idString = doc->getId().toString();
+ HeaderChunkEncoder encoder(doc->getId());
+ // Without document, payload is: 3x u32 + doc id string (no zero term).
+ CPPUNIT_ASSERT_EQUAL(sizeof(uint32_t)*3 + idString.size(),
+ static_cast<size_t>(encoder.encodedSize()));
+
+ encoder.bufferDocument(*doc);
+ vespalib::nbostream stream;
+ doc->serializeHeader(stream);
+ // With document, add size of serialized document to the mix.
+ CPPUNIT_ASSERT_EQUAL(sizeof(uint32_t)*3 + idString.size() + stream.size(),
+ static_cast<size_t>(encoder.encodedSize()));
+}
+
+SimpleMemFileIOBufferTest::SimpleMemFileIOBufferUP
+SimpleMemFileIOBufferTest::createIoBufferWithDummySpec(
+ vespalib::stringref removeDocType)
+{
+ FileSpecification fileSpec(BucketId(16, 123),
+ env().getDirectory(), "testfile.0");
+ // Override config.
+ auto options = env().acquireConfigReadLock().options();
+ env().acquireConfigWriteLock().setOptions(
+ OptionsBuilder(*options)
+ .defaultRemoveDocType(removeDocType)
+ .build());
+
+ SimpleMemFileIOBufferUP ioBuf(
+ new SimpleMemFileIOBuffer(
+ dfr,
+ vespalib::LazyFile::UP(),
+ std::unique_ptr<FileInfo>(new FileInfo),
+ fileSpec,
+ env()));
+ return ioBuf;
+}
+
+void
+SimpleMemFileIOBufferTest::testHeaderChunkEncoderSerializesIdCorrectly()
+{
+ document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100));
+ HeaderChunkEncoder encoder(doc->getId());
+
+ SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec());
+
+ BufferAllocation buf(ioBuf->allocateBuffer(HEADER, encoder.encodedSize()));
+ encoder.writeTo(buf);
+ DataLocation newLoc = ioBuf->addLocation(HEADER, buf);
+ document::DocumentId checkId = ioBuf->getDocumentId(newLoc);
+
+ CPPUNIT_ASSERT_EQUAL(doc->getId(), checkId);
+}
+
+void
+SimpleMemFileIOBufferTest::testHeaderChunkEncoderSerializesHeaderCorrectly()
+{
+ document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100));
+ HeaderChunkEncoder encoder(doc->getId());
+ encoder.bufferDocument(*doc);
+
+ SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec());
+ BufferAllocation buf(ioBuf->allocateBuffer(HEADER, encoder.encodedSize()));
+ encoder.writeTo(buf);
+ DataLocation newLoc = ioBuf->addLocation(HEADER, buf);
+ Document::UP checkDoc = ioBuf->getDocumentHeader(*getTypeRepo(), newLoc);
+
+ CPPUNIT_ASSERT_EQUAL(doc->getId(), checkDoc->getId());
+ CPPUNIT_ASSERT_EQUAL(doc->getType(), checkDoc->getType());
+}
+
+void
+SimpleMemFileIOBufferTest::testRemovesCanBeWrittenWithBlankDefaultDocument()
+{
+ SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1"));
+
+ document::DocumentId id("userdoc:yarn:12345:fluff");
+ DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo()));
+ // Despite adding with document id only, we should now actually have a
+ // valid document header. Will fail with a DeserializeException if no
+ // header has been written.
+ Document::UP removeWithHeader(
+ ioBuf->getDocumentHeader(*getTypeRepo(), loc));
+ CPPUNIT_ASSERT_EQUAL(removeWithHeader->getId(), id);
+ CPPUNIT_ASSERT_EQUAL(removeWithHeader->getType(),
+ *getTypeRepo()->getDocumentType("testdoctype1"));
+}
+
+void
+SimpleMemFileIOBufferTest::testRemovesCanBeWrittenWithIdInferredDoctype()
+{
+ SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1"));
+
+ document::DocumentId id("id:yarn:testdoctype2:n=12345:fluff");
+ DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo()));
+ // Since document id contains an explicit document type, the blank remove
+ // document header should be written with that type instead of the one
+ // provided as default via config.
+ Document::UP removeWithHeader(
+ ioBuf->getDocumentHeader(*getTypeRepo(), loc));
+ CPPUNIT_ASSERT_EQUAL(removeWithHeader->getId(), id);
+ CPPUNIT_ASSERT_EQUAL(removeWithHeader->getType(),
+ *getTypeRepo()->getDocumentType("testdoctype2"));
+}
+
+void
+SimpleMemFileIOBufferTest::testRemovesWithInvalidDocTypeThrowsException()
+{
+ SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1"));
+
+ document::DocumentId id("id:yarn:nosuchtype:n=12345:fluff");
+ try {
+ DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo()));
+ CPPUNIT_FAIL("No exception thrown on bad doctype");
+ } catch (const vespalib::Exception& e) {
+ CPPUNIT_ASSERT(e.getMessage().find("Could not serialize document "
+ "for remove with unknown doctype "
+ "'nosuchtype'")
+ != std::string::npos);
+ }
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/spi/simulatedfailurefile.h b/memfilepersistence/src/tests/spi/simulatedfailurefile.h
new file mode 100644
index 00000000000..1ded927a3d1
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/simulatedfailurefile.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/logginglazyfile.h>
+
+namespace storage {
+namespace memfile {
+
+class SimulatedFailureLazyFile : public vespalib::LazyFile
+{
+ mutable int _readOpsBeforeFailure;
+ mutable int _writeOpsBeforeFailure;
+public:
+ class Factory : public Environment::LazyFileFactory {
+ public:
+ Factory()
+ : _readOpsBeforeFailure(-1),
+ _writeOpsBeforeFailure(0)
+ {
+ }
+ vespalib::LazyFile::UP createFile(const std::string& fileName) const {
+ return vespalib::LazyFile::UP(
+ new SimulatedFailureLazyFile(fileName,
+ vespalib::File::DIRECTIO,
+ _readOpsBeforeFailure,
+ _writeOpsBeforeFailure));
+ }
+
+ void setReadOpsBeforeFailure(int ops) {
+ _readOpsBeforeFailure = ops;
+ }
+
+ void setWriteOpsBeforeFailure(int ops) {
+ _writeOpsBeforeFailure = ops;
+ }
+ private:
+ int _readOpsBeforeFailure;
+ int _writeOpsBeforeFailure;
+ };
+
+ SimulatedFailureLazyFile(
+ const std::string& filename,
+ int flags,
+ int readOpsBeforeFailure,
+ int writeOpsBeforeFailure)
+ : LazyFile(filename, flags),
+ _readOpsBeforeFailure(readOpsBeforeFailure),
+ _writeOpsBeforeFailure(writeOpsBeforeFailure)
+ {
+ }
+
+ off_t write(const void *buf, size_t bufsize, off_t offset)
+ {
+ if (_writeOpsBeforeFailure == 0) {
+ throw vespalib::IoException(
+ "A simulated I/O write exception was triggered",
+ vespalib::IoException::CORRUPT_DATA, VESPA_STRLOC);
+ }
+ --_writeOpsBeforeFailure;
+ return vespalib::LazyFile::write(buf, bufsize, offset);
+ }
+
+ size_t read(void *buf, size_t bufsize, off_t offset) const
+ {
+ if (_readOpsBeforeFailure == 0) {
+ throw vespalib::IoException(
+ "A simulated I/O read exception was triggered",
+ vespalib::IoException::CORRUPT_DATA, VESPA_STRLOC);
+ }
+ --_readOpsBeforeFailure;
+ return vespalib::LazyFile::read(buf, bufsize, offset);
+ }
+};
+
+} // ns memfile
+} // ns storage
+
diff --git a/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp
new file mode 100644
index 00000000000..75eab5c2972
--- /dev/null
+++ b/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp
@@ -0,0 +1,213 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+
+#include <vespa/document/datatype/documenttype.h>
+#include <tests/spi/memfiletestutils.h>
+#include <tests/spi/simulatedfailurefile.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+
+using document::DocumentType;
+
+namespace storage {
+namespace memfile {
+namespace {
+ spi::LoadType defaultLoadType(0, "default");
+}
+
+class SplitOperationHandlerTest : public SingleDiskMemFileTestUtils
+{
+
+ void doTestMultiDisk(uint16_t sourceDisk,
+ uint16_t targetDisk0,
+ uint16_t targetDisk1);
+
+
+ CPPUNIT_TEST_SUITE(SplitOperationHandlerTest);
+ CPPUNIT_TEST(testSimple);
+ CPPUNIT_TEST(testMultiDisk);
+ CPPUNIT_TEST(testMultiDiskNonZeroSourceIndex);
+ CPPUNIT_TEST(testExceptionDuringSplittingEvictsAllBuckets);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testSimple();
+ void testMultiDisk();
+ void testMultiDiskNonZeroSourceIndex();
+ void testExceptionDuringSplittingEvictsAllBuckets();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(SplitOperationHandlerTest);
+
+void
+SplitOperationHandlerTest::testSimple()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ setupDisks(1);
+
+ for (uint32_t i = 0; i < 100; i++) {
+ uint32_t location = 4;
+ if (i % 2 == 0) {
+ location |= (1 << 16);
+ }
+
+ doPut(location, Timestamp(1000 + i));
+ }
+ flush(document::BucketId(16, 4));
+
+ env()._cache.clear();
+
+ document::BucketId sourceBucket = document::BucketId(16, 4);
+ document::BucketId target1 = document::BucketId(17, 4);
+ document::BucketId target2 = document::BucketId(17, 4 | (1 << 16));
+
+ SplitOperationHandler handler(env());
+ spi::Result result = getPersistenceProvider().split(
+ spi::Bucket(sourceBucket, spi::PartitionId(0)),
+ spi::Bucket(target1, spi::PartitionId(0)),
+ spi::Bucket(target2, spi::PartitionId(0)),
+ context);
+
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(handler.getMemFile(sourceBucket, 0));
+ CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount());
+ }
+
+ {
+ MemFilePtr file(handler.getMemFile(target1, 0));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ for (uint32_t i = 0; i < file->getSlotCount(); ++i) {
+ file->getDocument((*file)[i], ALL);
+ }
+ }
+
+ {
+ MemFilePtr file(handler.getMemFile(target2, 0));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ for (uint32_t i = 0; i < file->getSlotCount(); ++i) {
+ file->getDocument((*file)[i], ALL);
+ }
+ }
+}
+
+void
+SplitOperationHandlerTest::doTestMultiDisk(uint16_t sourceDisk,
+ uint16_t targetDisk0,
+ uint16_t targetDisk1)
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ setupDisks(3);
+
+ for (uint32_t i = 0; i < 100; i++) {
+ uint32_t location = 4;
+ if (i % 2 == 0) {
+ location |= (1 << 16);
+ }
+
+ doPutOnDisk(sourceDisk, location, Timestamp(1000 + i));
+ }
+ flush(document::BucketId(16, 4));
+
+ env()._cache.clear();
+
+ document::BucketId sourceBucket = document::BucketId(16, 4);
+ document::BucketId target1 = document::BucketId(17, 4);
+ document::BucketId target2 = document::BucketId(17, 4 | (1 << 16));
+
+ SplitOperationHandler handler(env());
+ spi::Result result = getPersistenceProvider().split(
+ spi::Bucket(sourceBucket, spi::PartitionId(sourceDisk)),
+ spi::Bucket(target1, spi::PartitionId(targetDisk0)),
+ spi::Bucket(target2, spi::PartitionId(targetDisk1)),
+ context);
+
+ env()._cache.clear();
+
+ {
+ MemFilePtr file(handler.getMemFile(sourceBucket, sourceDisk));
+ CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount());
+ }
+
+ {
+ MemFilePtr file(handler.getMemFile(target1, targetDisk0));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ for (uint32_t i = 0; i < file->getSlotCount(); ++i) {
+ file->getDocument((*file)[i], ALL);
+ }
+ }
+
+ {
+ MemFilePtr file(handler.getMemFile(target2, targetDisk1));
+ CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount());
+ for (uint32_t i = 0; i < file->getSlotCount(); ++i) {
+ file->getDocument((*file)[i], ALL);
+ }
+ }
+}
+
+void
+SplitOperationHandlerTest::testMultiDisk()
+{
+ doTestMultiDisk(0, 1, 2);
+}
+
+void
+SplitOperationHandlerTest::testMultiDiskNonZeroSourceIndex()
+{
+ doTestMultiDisk(1, 2, 0);
+}
+
+void
+SplitOperationHandlerTest::testExceptionDuringSplittingEvictsAllBuckets()
+{
+ spi::Context context(defaultLoadType, spi::Priority(0),
+ spi::Trace::TraceLevel(0));
+ setupDisks(1);
+
+ for (uint32_t i = 0; i < 100; i++) {
+ uint32_t location = 4;
+ if (i % 2 == 0) {
+ location |= (1 << 16);
+ }
+
+ doPut(location, Timestamp(1000 + i));
+ }
+ flush(document::BucketId(16, 4));
+
+ simulateIoErrorsForSubsequentlyOpenedFiles();
+
+ document::BucketId sourceBucket(16, 4);
+ document::BucketId target1(17, 4);
+ document::BucketId target2(17, 4 | (1 << 16));
+
+ try {
+ SplitOperationHandler handler(env());
+ spi::Result result = getPersistenceProvider().split(
+ spi::Bucket(sourceBucket, spi::PartitionId(0)),
+ spi::Bucket(target1, spi::PartitionId(0)),
+ spi::Bucket(target2, spi::PartitionId(0)),
+ context);
+ CPPUNIT_FAIL("Exception not thrown on flush failure");
+ } catch (std::exception&) {
+ }
+
+ CPPUNIT_ASSERT(!env()._cache.contains(sourceBucket));
+ CPPUNIT_ASSERT(!env()._cache.contains(target1));
+ CPPUNIT_ASSERT(!env()._cache.contains(target2));
+
+ unSimulateIoErrorsForSubsequentlyOpenedFiles();
+
+ // Source must not have been deleted
+ {
+ SplitOperationHandler handler(env());
+ MemFilePtr file(handler.getMemFile(sourceBucket, 0));
+ CPPUNIT_ASSERT_EQUAL(100, (int)file->getSlotCount());
+ }
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/tests/testhelper.cpp b/memfilepersistence/src/tests/testhelper.cpp
new file mode 100644
index 00000000000..40a3512e400
--- /dev/null
+++ b/memfilepersistence/src/tests/testhelper.cpp
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <tests/testhelper.h>
+
+#include <vespa/log/log.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+LOG_SETUP(".testhelper");
+
+namespace storage {
+
+void addStorageDistributionConfig(vdstestlib::DirConfig& dc)
+{
+ vdstestlib::DirConfig::Config* config;
+ config = &dc.getConfig("stor-distribution", true);
+ config->clear();
+ config->set("group[1]");
+ config->set("group[0].name", "foo");
+ config->set("group[0].index", "0");
+ config->set("group[0].nodes[50]");
+
+ for (uint32_t i = 0; i < 50; i++) {
+ std::ostringstream key; key << "group[0].nodes[" << i << "].index";
+ std::ostringstream val; val << i;
+ config->set(key.str(), val.str());
+ }
+}
+
+vdstestlib::DirConfig getStandardConfig(bool storagenode) {
+ vdstestlib::DirConfig dc;
+ vdstestlib::DirConfig::Config* config;
+ config = &dc.addConfig("stor-cluster");
+ config = &dc.addConfig("load-type");
+ config = &dc.addConfig("bucket");
+ config = &dc.addConfig("messagebus");
+ config = &dc.addConfig("stor-prioritymapping");
+ config = &dc.addConfig("stor-bucketdbupdater");
+ config = &dc.addConfig("metricsmanager");
+ config->set("consumer[1]");
+ config->set("consumer[0].name", "\"status\"");
+ config->set("consumer[0].addedmetrics[1]");
+ config->set("consumer[0].addedmetrics[0]", "\"*\"");
+ config = &dc.addConfig("stor-communicationmanager");
+ config->set("rpcport", "0");
+ config->set("mbusport", "0");
+ config = &dc.addConfig("stor-bucketdb");
+ config->set("chunklevel", "0");
+ config = &dc.addConfig("stor-distributormanager");
+ config = &dc.addConfig("stor-opslogger");
+ config = &dc.addConfig("stor-memfilepersistence");
+ // Easier to see what goes wrong with only 1 thread per disk.
+ config->set("minimum_file_meta_slots", "2");
+ config->set("minimum_file_header_block_size", "368");
+ config->set("minimum_file_size", "4096");
+ config->set("threads[1]");
+ config->set("threads[0].lowestpri 255");
+ config->set("dir_spread", "4");
+ config->set("dir_levels", "0");
+ // Unit tests typically use fake low time values, so don't complain
+ // about them or compact/delete them by default. Override in tests testing that
+ // behavior
+ config = &dc.addConfig("persistence");
+ config->set("keep_remove_time_period", "2000000000");
+ config->set("revert_time_period", "2000000000");
+ config = &dc.addConfig("stor-bouncer");
+ config = &dc.addConfig("stor-integritychecker");
+ config = &dc.addConfig("stor-bucketmover");
+ config = &dc.addConfig("stor-messageforwarder");
+ config = &dc.addConfig("stor-server");
+ config->set("enable_dead_lock_detector", "false");
+ config->set("enable_dead_lock_detector_warnings", "false");
+ config->set("max_merges_per_node", "25");
+ config->set("max_merge_queue_size", "20");
+ config->set("root_folder",
+ (storagenode ? "vdsroot" : "vdsroot.distributor"));
+ config->set("is_distributor",
+ (storagenode ? "false" : "true"));
+ config = &dc.addConfig("stor-devices");
+ config->set("root_folder",
+ (storagenode ? "vdsroot" : "vdsroot.distributor"));
+ config = &dc.addConfig("stor-status");
+ config->set("httpport", "0");
+ config = &dc.addConfig("stor-visitor");
+ config->set("defaultdocblocksize", "8192");
+ // By default, need "old" behaviour of maxconcurrent
+ config->set("maxconcurrentvisitors_fixed", "4");
+ config->set("maxconcurrentvisitors_variable", "0");
+ config = &dc.addConfig("stor-visitordispatcher");
+ addFileConfig(dc, "documenttypes", "config-doctypes.cfg");
+ addStorageDistributionConfig(dc);
+ return dc;
+}
+
+void addFileConfig(vdstestlib::DirConfig& dc,
+ const std::string& configDefName,
+ const std::string& fileName)
+{
+ vdstestlib::DirConfig::Config* config;
+ config = &dc.getConfig(configDefName, true);
+ config->clear();
+ std::ifstream in(fileName.c_str());
+ std::string line;
+ while (std::getline(in, line, '\n')) {
+ std::string::size_type pos = line.find(' ');
+ if (pos == std::string::npos) {
+ config->set(line);
+ } else {
+ config->set(line.substr(0, pos), line.substr(pos + 1));
+ }
+ }
+ in.close();
+}
+
+TestName::TestName(const std::string& n)
+ : name(n)
+{
+ LOG(debug, "Starting test %s", name.c_str());
+}
+
+TestName::~TestName() {
+ LOG(debug, "Done with test %s", name.c_str());
+}
+
+} // storage
diff --git a/memfilepersistence/src/tests/testhelper.h b/memfilepersistence/src/tests/testhelper.h
new file mode 100644
index 00000000000..4445086d300
--- /dev/null
+++ b/memfilepersistence/src/tests/testhelper.h
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+#include <vespa/vdstestlib/cppunit/dirconfig.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+
+
+#include <fstream>
+#include <vespa/fastos/fastos.h>
+#include <sstream>
+
+#define ASSERT_REPLY_COUNT(count, dummylink) \
+ { \
+ std::ostringstream msgost; \
+ if ((dummylink).getNumReplies() != count) { \
+ for (uint32_t ijx=0; ijx<(dummylink).getNumReplies(); ++ijx) { \
+ msgost << (dummylink).getReply(ijx)->toString(true) << "\n"; \
+ } \
+ } \
+ CPPUNIT_ASSERT_EQUAL_MSG(msgost.str(), size_t(count), \
+ (dummylink).getNumReplies()); \
+ }
+#define ASSERT_COMMAND_COUNT(count, dummylink) \
+ { \
+ std::ostringstream msgost; \
+ if ((dummylink).getNumCommands() != count) { \
+ for (uint32_t ijx=0; ijx<(dummylink).getNumCommands(); ++ijx) { \
+ msgost << (dummylink).getCommand(ijx)->toString(true) << "\n"; \
+ } \
+ } \
+ CPPUNIT_ASSERT_EQUAL_MSG(msgost.str(), size_t(count), \
+ (dummylink).getNumCommands()); \
+ }
+
+namespace storage {
+
+void addFileConfig(vdstestlib::DirConfig& dc,
+ const std::string& configDefName,
+ const std::string& fileName);
+
+
+void addStorageDistributionConfig(vdstestlib::DirConfig& dc);
+
+vdstestlib::DirConfig getStandardConfig(bool storagenode);
+
+// Class used to print start and end of test. Enable debug when you want to see
+// which test creates what output or where we get stuck
+struct TestName {
+ std::string name;
+ TestName(const std::string& n);
+ ~TestName();
+};
+
+} // storage
+
diff --git a/memfilepersistence/src/tests/testrunner.cpp b/memfilepersistence/src/tests/testrunner.cpp
new file mode 100644
index 00000000000..16027870c47
--- /dev/null
+++ b/memfilepersistence/src/tests/testrunner.cpp
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <iostream>
+#include <vespa/log/log.h>
+#include <vespa/vdstestlib/cppunit/cppunittestrunner.h>
+
+LOG_SETUP("persistencecppunittests");
+
+int
+main(int argc, char **argv)
+{
+ vdstestlib::CppUnitTestRunner testRunner;
+ return testRunner.run(argc, argv);
+}
diff --git a/memfilepersistence/src/tests/tools/.gitignore b/memfilepersistence/src/tests/tools/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/tests/tools/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/tests/tools/CMakeLists.txt b/memfilepersistence/src/tests/tools/CMakeLists.txt
new file mode 100644
index 00000000000..aef718c7633
--- /dev/null
+++ b/memfilepersistence/src/tests/tools/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_testtools
+ SOURCES
+ dumpslotfiletest.cpp
+ vdsdisktooltest.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp b/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp
new file mode 100644
index 00000000000..112f8840e72
--- /dev/null
+++ b/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/config/subscription/configuri.h>
+#include <vespa/document/base/testdocrepo.h>
+#include <vespa/memfilepersistence/tools/dumpslotfile.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/vespalib/util/programoptions_testutils.h>
+#include <tests/spi/memfiletestutils.h>
+
+#include <vespa/document/config/config-documenttypes.h>
+
+namespace storage {
+namespace memfile {
+
+class DumpSlotFileTest : public SingleDiskMemFileTestUtils
+{
+ CPPUNIT_TEST_SUITE(DumpSlotFileTest);
+ CPPUNIT_TEST(testSimple);
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+ void testSimple();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(DumpSlotFileTest);
+
+#define ASSERT_MATCH(optstring, pattern) \
+{ \
+ vespalib::AppOptions opts("dumpslotfile " optstring); \
+ std::ostringstream out; \
+ config::ConfigUri configUri(config::ConfigUri::createFromInstance( \
+ document::TestDocRepo::getDefaultConfig())); \
+ std::unique_ptr<document::DocumenttypesConfig> config = config::ConfigGetter<document::DocumenttypesConfig>::getConfig(configUri.getConfigId(), configUri.getContext()); \
+ SlotFileDumper::dump(opts.getArgCount(), opts.getArguments(), \
+ configUri, out, out); \
+ CPPUNIT_ASSERT_MATCH_REGEX(pattern, out.str()); \
+ output = out.str(); \
+}
+
+void
+DumpSlotFileTest::testSimple()
+{
+ std::string output;
+ // Test syntax page
+ ASSERT_MATCH("--help", ".*Usage: dumpslotfile.*");
+ // Test non-existing file. (Handle as empty file)
+ ASSERT_MATCH("00a.0",
+ ".*BucketId\\(0x000000000000000a\\)"
+ ".*document count: 0.*non-existing.*");
+ // Parse bucketid without extension.
+ ASSERT_MATCH("000000000000000a",
+ ".*BucketId\\(0x000000000000000a\\) "
+ "\\(extracted from filename\\).*");
+ // Parse invalid bucket id.
+ ASSERT_MATCH("000010000000000g",
+ ".*Failed to extract bucket id from filename.*");
+ // Test toXml with no data. Thus doesn't require doc config
+ ASSERT_MATCH("--toxml --documentconfig whatevah 000a.0",
+ ".*<vespafeed>.*");
+ // Test invalid arguments
+ ASSERT_MATCH("--foobar", ".*Invalid option 'foobar'\\..*");
+ // What to show in XML doesn't make sense in non-xml mode
+ ASSERT_MATCH("--includeremoveddocs 0.0",
+ ".*Options for what to include in XML makes no sense when not "
+ "printing XML content.*");
+ ASSERT_MATCH("--includeremoveentries 0.0",
+ ".*Options for what to include in XML makes no sense when not "
+ "printing XML content.*");
+ // To binary only works for single doc
+ ASSERT_MATCH("--tobinary 0.0",
+ ".*To binary option only works for a single document.*");
+
+ BucketId bid(1, 0);
+ createTestBucket(bid, 0);
+ ASSERT_MATCH("-nN vdsroot/disks/d0/400000000000000.0",
+ ".*"
+ "Unique document count: 8.*"
+ "Total document size: [0-9]+.*"
+ "Used size: [0-9]+.*"
+ "Filename: .*/d0/.*"
+ "Filesize: 12288.*"
+ "SlotFileHeader.*"
+ "[0-9]+ empty entries.*"
+ "Header block.*"
+ "Content block.*"
+ "Slotfile verified.*"
+ );
+ ASSERT_MATCH("vdsroot/disks/d0/400000000000000.0", ".*ff ff ff ff.*");
+
+ // User friendly output
+ ASSERT_MATCH("--friendly -nN vdsroot/disks/d0/400000000000000.0",
+ ".*id:mail:testdoctype1:n=0:9380.html.*");
+
+ ASSERT_MATCH("--tobinary "
+ "--docid id:mail:testdoctype1:n=0:doesnotexisthere.html "
+ "vdsroot/disks/d0/400000000000000.0",
+ ".*No document with id id:mail:testdoctype1:n=0:doesnotexi.* "
+ "found.*");
+
+ // Should test XML with content.. But needs document config for it to work.
+ // Should be able to create programmatically from testdocman.
+ ASSERT_MATCH("--toxml --documentconfig '' "
+ "vdsroot/disks/d0/400000000000000.0",
+ ".*<vespafeed>\n"
+ "<document documenttype=\"testdoctype1\" "
+ "documentid=\"id:mail:testdoctype1:n=0:9639.html\">\n"
+ "<content>overwritten</content>\n"
+ "</document>.*");
+
+ // To binary
+ ASSERT_MATCH("--tobinary --docid id:mail:testdoctype1:n=0:9380.html "
+ "vdsroot/disks/d0/400000000000000.0",
+ ".*");
+ {
+ TestDocMan docMan;
+ document::ByteBuffer buf(output.c_str(), output.size());
+ document::Document doc(docMan.getTypeRepo(), buf);
+ CPPUNIT_ASSERT_EQUAL(std::string(
+ "<document documenttype=\"testdoctype1\" "
+ "documentid=\"id:mail:testdoctype1:n=0:9380.html\">\n"
+ "<content>To be, or not to be: that is the question:\n"
+ "Whether 'tis nobler in the mind to suffer\n"
+ "The slings and arrows of outrage</content>\n"
+ "</document>"), doc.toXml());
+ }
+
+ // Fail verification
+ {
+ vespalib::LazyFile file("vdsroot/disks/d0/400000000000000.0", 0);
+ file.write("corrupt", 7, 64);
+ }
+ ASSERT_MATCH("-nN vdsroot/disks/d0/400000000000000.0",
+ ".*lot 0 at timestamp [0-9]+ failed checksum verification.*");
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp b/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp
new file mode 100644
index 00000000000..29e780bc900
--- /dev/null
+++ b/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp
@@ -0,0 +1,108 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/config/subscription/configuri.h>
+#include <vespa/memfilepersistence/tools/vdsdisktool.h>
+#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h>
+#include <vespa/vdstestlib/cppunit/macros.h>
+#include <vespa/vespalib/util/programoptions_testutils.h>
+#include <tests/spi/memfiletestutils.h>
+
+namespace storage {
+namespace memfile {
+
+struct VdsDiskToolTest : public SingleDiskMemFileTestUtils
+{
+ framework::defaultimplementation::FakeClock _clock;
+ DeviceManager::LP _deviceManager;
+
+ void setUp();
+ void setupRoot();
+
+ void testSimple();
+
+ CPPUNIT_TEST_SUITE(VdsDiskToolTest);
+ CPPUNIT_TEST(testSimple);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(VdsDiskToolTest);
+
+#define ASSERT_MATCH(optstring, pattern, exitcode) \
+{ \
+ std::ostringstream out; \
+ int result = 1; \
+ try{ \
+ vespalib::AppOptions opts("vdsdisktool " optstring); \
+ result = VdsDiskTool::run(opts.getArgCount(), opts.getArguments(), \
+ "vdsroot", out, out); \
+ } catch (std::exception& e) { \
+ out << "Application aborted with exception:\n" << e.what() << "\n"; \
+ } \
+ CPPUNIT_ASSERT_MATCH_REGEX(pattern, out.str()); \
+ CPPUNIT_ASSERT_EQUAL(exitcode, result); \
+}
+
+namespace {
+ void createDisk(int i) {
+ std::ostringstream path;
+ path << "vdsroot/mycluster/storage/3/disks/d" << i;
+ CPPUNIT_ASSERT_EQUAL(0, system(("mkdir -p " + path.str()).c_str()));
+ }
+}
+
+void
+VdsDiskToolTest::setUp()
+{
+ system("rm -rf vdsroot");
+ _deviceManager.reset(new DeviceManager(
+ DeviceMapper::UP(new SimpleDeviceMapper), _clock));
+}
+
+void
+VdsDiskToolTest::setupRoot()
+{
+ system("rm -rf vdsroot");
+ createDisk(0);
+}
+
+void
+VdsDiskToolTest::testSimple()
+{
+ // Test syntax page
+ ASSERT_MATCH("--help", ".*Usage: vdsdisktool .*", 0);
+ // No VDS installation
+ ASSERT_MATCH("status", ".*No VDS installations found at all.*", 1);
+ // Common setup
+ setupRoot();
+ ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*"
+ "Disk 0: OK\\s*", 0);
+ // Two disks
+ system("mkdir -p vdsroot/mycluster/storage/3/disks/d1/");
+ ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*"
+ "Disk 0: OK\\s*"
+ "Disk 1: OK\\s*", 0);
+ // Two disks, non-continuous indexes
+ system("rm -rf vdsroot/mycluster/storage/3/disks/d1/");
+ system("mkdir -p vdsroot/mycluster/storage/3/disks/d2/");
+ ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*"
+ "Disk 0: OK\\s*"
+ "Disk 1: NOT_FOUND - Disk not found during scan.*"
+ "Disk 2: OK\\s*", 0);
+ // Status file existing
+ setupRoot();
+ createDisk(1);
+ MountPointList mountPoints("vdsroot/mycluster/storage/3",
+ std::vector<vespalib::string>(),
+ _deviceManager);
+ mountPoints.scanForDisks();
+ CPPUNIT_ASSERT_EQUAL(2u, mountPoints.getSize());
+ mountPoints[1].addEvent(Device::IO_FAILURE, "Bad", "Found in test");
+ mountPoints.writeToFile();
+ ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*"
+ "Disk 0: OK\\s*"
+ "Disk 1: IO_FAILURE - 0 Bad\\s*", 0);
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/.gitignore
new file mode 100644
index 00000000000..c43cd4d8c3b
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/.gitignore
@@ -0,0 +1,3 @@
+/.depend
+/Makefile
+/libmemfilepersistence.so.5.1
diff --git a/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt
new file mode 100644
index 00000000000..465e217210c
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence
+ SOURCES
+ $<TARGET_OBJECTS:memfilepersistence_device>
+ $<TARGET_OBJECTS:memfilepersistence_init>
+ $<TARGET_OBJECTS:memfilepersistence_mapper>
+ $<TARGET_OBJECTS:memfilepersistence_spi>
+ $<TARGET_OBJECTS:memfilepersistence_common>
+ $<TARGET_OBJECTS:memfilepersistence_memfile>
+ $<TARGET_OBJECTS:memfilepersistence_tools>
+ INSTALL lib64
+ DEPENDS
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt
new file mode 100644
index 00000000000..82a78fa1d0a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_common OBJECT
+ SOURCES
+ environment.cpp
+ options.cpp
+ types.cpp
+ filespecification.cpp
+ exceptions.cpp
+ slotmatcher.cpp
+ config_lock_guard.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h b/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h
new file mode 100644
index 00000000000..506699f7e31
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/config-stor-memfilepersistence.h>
+#include <vespa/config-stor-devices.h>
+#include <vespa/config-persistence.h>
+
+namespace storage {
+namespace memfile {
+
+// Friendly aliases to painfully long config names.
+using MemFilePersistenceConfig
+ = vespa::config::storage::StorMemfilepersistenceConfig;
+using PersistenceConfig = vespa::config::content::PersistenceConfig;
+using DevicesConfig = vespa::config::storage::StorDevicesConfig;
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp
new file mode 100644
index 00000000000..1e68024d8dd
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/common/config_lock_guard.h>
+#include <vespa/memfilepersistence/common/environment.h>
+
+namespace storage {
+namespace memfile {
+
+bool
+ConfigLockGuardBase::hasPersistenceConfig() const noexcept
+{
+ return (_env->_persistenceConfig.get() != nullptr);
+}
+
+std::shared_ptr<const PersistenceConfig>
+ConfigLockGuardBase::persistenceConfig() const noexcept
+{
+ return _env->_persistenceConfig;
+}
+
+bool
+ConfigLockGuardBase::hasMemFilePersistenceConfig() const noexcept
+{
+ return (_env->_config.get() != nullptr);
+}
+
+std::shared_ptr<const MemFilePersistenceConfig>
+ConfigLockGuardBase::memFilePersistenceConfig() const noexcept
+{
+ return _env->_config;
+}
+
+bool
+ConfigLockGuardBase::hasDevicesConfig() const noexcept
+{
+ return (_env->_devicesConfig.get() != nullptr);
+}
+
+std::shared_ptr<const DevicesConfig>
+ConfigLockGuardBase::devicesConfig() const noexcept
+{
+ return _env->_devicesConfig;
+}
+
+bool
+ConfigLockGuardBase::hasOptions() const noexcept
+{
+ return (_env->_options.get() != nullptr);
+}
+
+std::shared_ptr<const Options>
+ConfigLockGuardBase::options() const noexcept
+{
+ return _env->_options;
+}
+
+ConfigWriteLockGuard::ConfigWriteLockGuard(Environment& e)
+ : ConfigLockGuardBase(e),
+ _lock(e._configRWLock),
+ _mutableEnv(&e)
+{
+}
+
+ConfigWriteLockGuard::ConfigWriteLockGuard(ConfigWriteLockGuard&& other)
+ : ConfigLockGuardBase(std::move(other)),
+ _lock(other._lock), // Implicit lock stealing, no explicit moving
+ _mutableEnv(other._mutableEnv)
+{
+ other._mutableEnv = nullptr;
+}
+
+void
+ConfigWriteLockGuard::setPersistenceConfig(
+ std::unique_ptr<PersistenceConfig> cfg) noexcept
+{
+ mutableEnv()._persistenceConfig = std::move(cfg);
+}
+
+void
+ConfigWriteLockGuard::setMemFilePersistenceConfig(
+ std::unique_ptr<MemFilePersistenceConfig> cfg) noexcept
+{
+ mutableEnv()._config = std::move(cfg);
+}
+
+void
+ConfigWriteLockGuard::setDevicesConfig(
+ std::unique_ptr<DevicesConfig> cfg) noexcept
+{
+ mutableEnv()._devicesConfig = std::move(cfg);
+}
+
+void
+ConfigWriteLockGuard::setOptions(std::unique_ptr<Options> opts)
+{
+ mutableEnv()._options = std::move(opts);
+}
+
+ConfigReadLockGuard::ConfigReadLockGuard(const Environment& e)
+ : ConfigLockGuardBase(e),
+ _lock(e._configRWLock)
+{
+}
+
+ConfigReadLockGuard::ConfigReadLockGuard(ConfigReadLockGuard&& other)
+ : ConfigLockGuardBase(std::move(other)),
+ _lock(other._lock) // Implicit lock stealing, no explicit moving
+{
+}
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h
new file mode 100644
index 00000000000..b97b61010af
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/common/config_aliases.h>
+#include <vespa/memfilepersistence/common/options.h>
+#include <vespa/vespalib/util/rwlock.h>
+#include <memory>
+
+namespace storage {
+namespace memfile {
+
+class Environment;
+
+/**
+ * Shared guard base allowing read access to existing configs via both
+ * read and write guard subclasses.
+ */
+class ConfigLockGuardBase {
+public:
+ explicit ConfigLockGuardBase(const Environment& e)
+ : _env(&e)
+ {
+ }
+
+ ConfigLockGuardBase(ConfigLockGuardBase&& other)
+ : _env(other._env)
+ {
+ // If the source is attempted used after the move, ensure it nukes
+ // itself with a SIGSEGV.
+ other._env = nullptr;
+ }
+
+ // To avoid circular dependencies, all access of Environment internals
+ // must be in separate .cpp file.
+
+ bool hasPersistenceConfig() const noexcept;
+ std::shared_ptr<const PersistenceConfig> persistenceConfig() const noexcept;
+
+ bool hasMemFilePersistenceConfig() const noexcept;
+ std::shared_ptr<const MemFilePersistenceConfig>
+ memFilePersistenceConfig() const noexcept;
+
+ bool hasDevicesConfig() const noexcept;
+ std::shared_ptr<const DevicesConfig> devicesConfig() const noexcept;
+
+ bool hasOptions() const noexcept;
+ std::shared_ptr<const Options> options() const noexcept;
+
+ ConfigLockGuardBase(const ConfigLockGuardBase&) = delete;
+ ConfigLockGuardBase& operator=(const ConfigLockGuardBase&) = delete;
+
+private:
+ const Environment* _env;
+};
+
+class ConfigWriteLockGuard : public ConfigLockGuardBase {
+public:
+ explicit ConfigWriteLockGuard(Environment& e);
+ /**
+ * Moving a guard transfers ownership of the lock to the move target. It
+ * is illegal and undefined behavior to attempt to access the environment
+ * configuration through a guard whose lock has been transferred away.
+ */
+ ConfigWriteLockGuard(ConfigWriteLockGuard&& other);
+
+ // By definition, configs can only be mutated when the writer lock
+ // is held.
+ void setPersistenceConfig(std::unique_ptr<PersistenceConfig> cfg) noexcept;
+ void setMemFilePersistenceConfig(
+ std::unique_ptr<MemFilePersistenceConfig> cfg) noexcept;
+ void setDevicesConfig(std::unique_ptr<DevicesConfig> cfg) noexcept;
+ void setOptions(std::unique_ptr<Options> opts);
+
+private:
+ vespalib::RWLockWriter _lock;
+ // This points to the exact same object as the const ref in the base
+ // and basically serves as an alternative to const_cast.
+ Environment* _mutableEnv;
+
+ // Hide the fact that we're storing duplicate information to other
+ // methods.
+ Environment& mutableEnv() { return *_mutableEnv; }
+};
+
+class ConfigReadLockGuard : public ConfigLockGuardBase {
+public:
+ explicit ConfigReadLockGuard(const Environment& e);
+ ConfigReadLockGuard(ConfigReadLockGuard&& other);
+
+ // Config reader methods already implemented in base.
+
+private:
+ vespalib::RWLockReader _lock;
+};
+
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h b/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h
new file mode 100644
index 00000000000..9ca9c9e6294
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class ConfigKeeper
+ * \class memfile
+ *
+ * \brief Utility function for live reconfiguration
+ *
+ * When many threads want the same config, we don't want each of these threads
+ * to subscribe on the same config because of the following reasons:
+ * - No need to put lots of extra load on the config system.
+ * - Application doesn't know whether all users have the same config version
+ * at any given time.
+ *
+ * This class implements a utility class for handling this.
+ */
+#pragma once
+
+#include <vespa/vespalib/util/sync.h>
+
+namespace storage {
+
+template<typename ConfigClass>
+class ConfigKeeper {
+ vespalib::Monitor _configLock;
+ bool _configUpdated; // Set to true if updating config.
+ std::unique_ptr<ConfigClass> _nextConfig;
+ ConfigClass _config;
+
+public:
+ ConfigKeeper() : _configUpdated(false) {}
+
+ void updateConfig(const ConfigClass& config) {
+ vespalib::MonitorGuard lock(_configLock);
+ _nextConfig.reset(new ConfigClass(config));
+ _configUpdated = true;
+ }
+
+ void activateNewConfig() {
+ if (!_configUpdated) return;
+ vespalib::MonitorGuard lock(_configLock);
+ _config = *_nextConfig;
+ _nextConfig.reset(0);
+ _configUpdated = false;
+ lock.signal();
+ }
+
+ void waitForAnyActivation() {
+ vespalib::MonitorGuard lock(_configLock);
+ while (_configUpdated) lock.wait();
+ }
+
+ ConfigClass* operator->() { return &_config; }
+ ConfigClass& operator*() { return _config; }
+};
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp
new file mode 100644
index 00000000000..6cfe2269886
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp
@@ -0,0 +1,120 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/common/environment.h>
+#include <vespa/vespalib/util/random.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include <vespa/config/config.h>
+
+using config::ConfigGetter;
+
+namespace storage {
+namespace memfile {
+
+namespace {
+
+template <typename ConfigT>
+std::shared_ptr<ConfigT>
+resolveConfig(const config::ConfigUri& configUri)
+{
+ return {ConfigGetter<ConfigT>::getConfig(
+ configUri.getConfigId(), configUri.getContext())};
+}
+
+}
+
+vespalib::LazyFile::UP
+DefaultLazyFileFactory::createFile(const std::string& fileName) const
+{
+ return vespalib::LazyFile::UP(
+ new vespalib::LazyFile(
+ fileName, vespalib::File::DIRECTIO | _flags));
+}
+
+Environment::Environment(const config::ConfigUri & configUri,
+ MemFileCache& cache,
+ MemFileMapper& mapper,
+ const document::DocumentTypeRepo& typeRepo,
+ const framework::Clock& clock,
+ bool ignoreDisks)
+ : _clock(clock),
+ _cache(cache),
+ _memFileMapper(mapper),
+ _bucketFactory(),
+ _lazyFileFactory(new DefaultLazyFileFactory(
+ ignoreDisks ? vespalib::File::READONLY : 0)),
+ _repo(&typeRepo),
+ _config(resolveConfig<MemFilePersistenceConfig>(configUri)),
+ _persistenceConfig(resolveConfig<PersistenceConfig>(configUri)),
+ _devicesConfig(resolveConfig<DevicesConfig>(configUri)),
+ _options(std::make_shared<Options>(*_config, *_persistenceConfig))
+{
+ DeviceManager::LP manager(
+ new DeviceManager(DeviceMapper::UP(new SimpleDeviceMapper()),
+ _clock));
+
+ manager->setPartitionMonitorPolicy(
+ _devicesConfig->statfsPolicy, _devicesConfig->statfsPeriod);
+ _mountPoints.reset(new MountPointList(_devicesConfig->rootFolder,
+ _devicesConfig->diskPath,
+ manager));
+
+ if (!ignoreDisks) {
+ _mountPoints->init(0);
+
+ // Update full disk setting for partition monitors
+ for (uint32_t i=0; i<_mountPoints->getSize(); ++i) {
+ Directory& dir(getDirectory(i));
+ if (dir.getPartition().getMonitor() != 0) {
+ dir.getPartition().getMonitor()->setMaxFillness(
+ _options->_diskFullFactor);
+ }
+ }
+ }
+}
+
+Types::String
+Environment::calculatePathInDir(const Types::BucketId& id, Directory& dir)
+{
+ vespalib::asciistream os;
+ os << dir.getPath() << '/';
+ // Directories created should only depend on bucket identifier.
+ document::BucketId::Type seed = id.getId();
+ seed = seed ^ (seed >> 32);
+ vespalib::RandomGen randomizer(static_cast<uint32_t>(seed) ^ 0xba5eba11);
+
+ for (uint32_t i = 1; i <= (uint32_t)_config->dirLevels; ++i) {
+ os << vespalib::make_vespa_string(
+ "%.4x/",
+ randomizer.nextUint32() % _config->dirSpread);
+ }
+
+ os << vespalib::make_vespa_string("%.8" PRIx64 ".0", id.getId());
+ return os.str();
+}
+
+Environment::~Environment()
+{
+}
+
+Directory& Environment::getDirectory(uint16_t disk)
+{
+ return (*_mountPoints)[disk];
+}
+
+void
+Environment::addModifiedBucket(const document::BucketId& bid)
+{
+ vespalib::LockGuard guard(_modifiedBucketsLock);
+ _modifiedBuckets.push_back(bid);
+}
+
+void
+Environment::swapModifiedBuckets(document::BucketId::List & ids)
+{
+ vespalib::LockGuard guard(_modifiedBucketsLock);
+ _modifiedBuckets.swap(ids);
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/environment.h b/memfilepersistence/src/vespa/memfilepersistence/common/environment.h
new file mode 100644
index 00000000000..8a944f6921e
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/environment.h
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::slotfile::MemFileEnvironment
+ * \ingroup memfile
+ *
+ * \brief Keeps environment for MemFile operations
+ *
+ * The memfile layer needs quite a lot of stuff set up in order to work. Rather
+ * than passing all these bits around when creating new slotfiles, we rather
+ * have an environment where all the static pieces not related to single files
+ * will be kept.
+ */
+
+#pragma once
+
+#include <vespa/config/helper/configfetcher.h>
+#include <vespa/memfilepersistence/common/options.h>
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/common/config_lock_guard.h>
+#include <vespa/memfilepersistence/common/config_aliases.h>
+#include <vespa/storageframework/storageframework.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/memfilepersistence/device/mountpointlist.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFileMapper;
+class MemFileCache;
+
+struct Environment : public Types {
+ class LazyFileFactory {
+ public:
+ virtual ~LazyFileFactory() {};
+
+ virtual vespalib::LazyFile::UP
+ createFile(const std::string& fileName) const = 0;
+ };
+ using UP = std::unique_ptr<Environment>;
+
+ const framework::Clock& _clock;
+ MemFileCache& _cache;
+ MemFileMapper& _memFileMapper;
+ MountPointList::UP _mountPoints;
+ document::BucketIdFactory _bucketFactory;
+ std::unique_ptr<LazyFileFactory> _lazyFileFactory;
+ vespalib::Lock _modifiedBucketsLock;
+ document::BucketId::List _modifiedBuckets;
+
+ Environment(const config::ConfigUri & configUri,
+ MemFileCache&,
+ MemFileMapper&,
+ const document::DocumentTypeRepo&,
+ const framework::Clock&,
+ bool ignoreDisks = false);
+ ~Environment();
+
+ String calculatePathInDir(const Types::BucketId& id, Directory& dir);
+
+ vespalib::LazyFile::UP createFile(const std::string& fileName) const {
+ return _lazyFileFactory->createFile(fileName);
+ }
+
+ Directory& getDirectory(uint16_t disk = 0);
+
+ void addModifiedBucket(const document::BucketId&);
+ void swapModifiedBuckets(document::BucketId::List &);
+
+ ConfigReadLockGuard acquireConfigReadLock() const {
+ return ConfigReadLockGuard(*this);
+ }
+
+ ConfigWriteLockGuard acquireConfigWriteLock() {
+ return ConfigWriteLockGuard(*this);
+ }
+
+ /**
+ * Get the currently assigned document repo in a data race free manner.
+ * Forms a release/acquire pair with setRepo()
+ */
+ const document::DocumentTypeRepo& repo() const noexcept {
+ return *_repo.load(std::memory_order_acquire);
+ }
+ /**
+ * Sets the currently assigned document repo in a data race free manner.
+ * Forms a release/acquire pair with repo()
+ */
+ void setRepo(const document::DocumentTypeRepo* typeRepo) noexcept {
+ _repo.store(typeRepo, std::memory_order_release);
+ }
+private:
+ mutable vespalib::RWLock _configRWLock;
+ /**
+ * For simplicity, repos are currently kept alive for the duration of the
+ * process. This means we don't have to care about lifetime management, but
+ * we still have to ensure writes that set the repo are release/acquired
+ * paired with their reads. Repos are provided through the SPI and _not_
+ * through regular provider-level config subscription, so we therefore do
+ * not require the config lock to be held when reading/writing.
+ */
+ std::atomic<const document::DocumentTypeRepo*> _repo;
+ /**
+ * Configs are kept as shared_ptrs to allow lock window to remain as small
+ * as possible while still retaining thread safety during pointer
+ * reassignments.
+ */
+ std::shared_ptr<const MemFilePersistenceConfig> _config;
+ std::shared_ptr<const PersistenceConfig> _persistenceConfig;
+ std::shared_ptr<const DevicesConfig> _devicesConfig;
+ /**
+ * Options is not a true config as per se, but is an aggregate of multiple
+ * other configs and must thus be protected as if it were.
+ */
+ std::shared_ptr<const Options> _options;
+ // We entrust the config guards with access to our internals.
+ friend class ConfigLockGuardBase;
+ friend class ConfigWriteLockGuard;
+ friend class ConfigReadLockGuard;
+};
+
+struct DefaultLazyFileFactory
+ : public Environment::LazyFileFactory
+{
+ int _flags;
+
+ DefaultLazyFileFactory(int flags) : _flags(flags) {}
+
+ vespalib::LazyFile::UP createFile(const std::string& fileName) const;
+};
+
+} // storage
+} // memfile
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp
new file mode 100644
index 00000000000..16235f21707
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp
@@ -0,0 +1,56 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/common/exceptions.h>
+
+namespace storage {
+namespace memfile {
+
+VESPA_IMPLEMENT_EXCEPTION_SPINE(TimestampExistException);
+VESPA_IMPLEMENT_EXCEPTION_SPINE(InconsistentSlotException);
+VESPA_IMPLEMENT_EXCEPTION_SPINE(MemFileIoException);
+VESPA_IMPLEMENT_EXCEPTION(NoDisksException, vespalib::Exception);
+
+VESPA_IMPLEMENT_MEMFILE_EXCEPTION(SlotNotFoundException);
+VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InvalidArgumentException);
+VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InvalidStateException);
+VESPA_IMPLEMENT_MEMFILE_EXCEPTION(CorruptMemFileException);
+VESPA_IMPLEMENT_MEMFILE_EXCEPTION(MemFileWrapperException);
+VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InconsistentException);
+
+MemFileException::MemFileException(const FileSpecification& file)
+ : _file(file)
+{
+}
+
+MemFileException::~MemFileException()
+{
+}
+
+TimestampExistException::TimestampExistException(
+ const vespalib::string& message, const FileSpecification& file,
+ Types::Timestamp ts, const vespalib::string& location, int skipStack)
+ : Exception(message, location, skipStack + 1),
+ MemFileException(file),
+ _timestamp(ts)
+{
+}
+
+InconsistentSlotException::InconsistentSlotException(
+ const vespalib::string& message, const FileSpecification& file,
+ const MemSlot& slot, const vespalib::string& location, int skipstack)
+ : InconsistentException(message, file, location, skipstack + 1),
+ _slot(slot)
+{
+}
+
+MemFileIoException::MemFileIoException(
+ const vespalib::string& msg, const FileSpecification& file,
+ Type type, const vespalib::string& location, int skipStack)
+ : IoException(msg, type, location, skipStack + 1),
+ MemFileException(file)
+{
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h
new file mode 100644
index 00000000000..03edf7e7a83
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h
@@ -0,0 +1,126 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::IoException
+ * \ingroup memfile
+ *
+ * \brief Exception thrown by memfile layer for IO problems.
+ *
+ * Storage needs to know what disk was having issues for disk related problems,
+ * in case it needs to disable a non-working disk. Some information on what
+ * file was being operated on while one is having trouble is nice anyhow. Thus
+ * specific exceptions have been created to keep the file specification of the
+ * file in question. The MemFile layer may throw some exceptions that aren't
+ * MemFileExceptions though. These exceptions should not be disk/file related.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/common/filespecification.h>
+#include <vespa/memfilepersistence/memfile/memslot.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+#define VESPA_DEFINE_MEMFILE_EXCEPTION(name) \
+ struct name : public vespalib::Exception, public MemFileException { \
+ name(const vespalib::string& message, const FileSpecification& file, \
+ const vespalib::string& location, int skipStack = 0); \
+ ~name() throw(); \
+ VESPA_DEFINE_EXCEPTION_SPINE(name); \
+};
+
+#define VESPA_IMPLEMENT_MEMFILE_EXCEPTION(name) \
+ name::name(const vespalib::string& message, const FileSpecification& file, \
+ const vespalib::string& location, int skipStack) \
+ : vespalib::Exception(message, location, skipStack + 1), \
+ MemFileException(file) {} \
+ name::~name() throw() {} \
+ VESPA_IMPLEMENT_EXCEPTION_SPINE(name);
+
+namespace storage {
+namespace memfile {
+
+VESPA_DEFINE_EXCEPTION(NoDisksException, vespalib::Exception);
+
+/**
+ * \class storage::memfile::MemFileException
+ * \ingroup memfile
+ *
+ * \brief Interface to implement for exceptions that contain a file specification specifying what memfile was problematic.
+ */
+class MemFileException : protected Types {
+ FileSpecification _file;
+
+public:
+ MemFileException(const FileSpecification&);
+ virtual ~MemFileException() = 0;
+
+ const FileSpecification& getFile() const { return _file; }
+};
+
+VESPA_DEFINE_MEMFILE_EXCEPTION(SlotNotFoundException);
+VESPA_DEFINE_MEMFILE_EXCEPTION(InvalidArgumentException);
+VESPA_DEFINE_MEMFILE_EXCEPTION(InvalidStateException);
+VESPA_DEFINE_MEMFILE_EXCEPTION(CorruptMemFileException);
+VESPA_DEFINE_MEMFILE_EXCEPTION(MemFileWrapperException);
+
+/**
+ * \class storage::InconsistentException
+ * \ingroup memfile
+ *
+ * \brief Thrown by MemFile::verifyConsistent() if inconsistent
+ */
+VESPA_DEFINE_MEMFILE_EXCEPTION(InconsistentException);
+
+/**
+ * @class storage::TimestampExistException
+ * @ingroup filestorage
+ *
+ * @brief Thrown by SlotFile::write() when timestamp given is already in use.
+ */
+class TimestampExistException : public vespalib::Exception,
+ public MemFileException
+{
+ Timestamp _timestamp;
+public:
+ TimestampExistException(const vespalib::string& message,
+ const FileSpecification&, Timestamp ts,
+ const vespalib::string& location, int skipstack = 0);
+ virtual ~TimestampExistException() throw() {}
+
+ VESPA_DEFINE_EXCEPTION_SPINE(TimestampExistException);
+
+ Timestamp getTimestamp() const { return _timestamp; }
+};
+
+/**
+ * @class storage::InconsistentSlotException
+ * @ingroup filestorage
+ *
+ * @brief Thrown by MemFile::verifyConsistent() if a slot is inconsistent
+ */
+class InconsistentSlotException : public InconsistentException {
+ MemSlot _slot;
+
+public:
+ InconsistentSlotException(const vespalib::string& message,
+ const FileSpecification&, const MemSlot& slot,
+ const vespalib::string& location, int skipstack = 0);
+ virtual ~InconsistentSlotException() throw() {}
+
+ VESPA_DEFINE_EXCEPTION_SPINE(InconsistentSlotException);
+};
+
+class MemFileIoException : public vespalib::IoException,
+ public MemFileException
+{
+public:
+ MemFileIoException(const vespalib::string& msg, const FileSpecification&,
+ Type type, const vespalib::string& location,
+ int skipStack = 0);
+ virtual ~MemFileIoException() throw() {}
+
+ VESPA_DEFINE_EXCEPTION_SPINE(MemFileIoException);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp
new file mode 100644
index 00000000000..b3e90d53bb1
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/common/filespecification.h>
+
+namespace storage {
+namespace memfile {
+
+FileSpecification::FileSpecification(const BucketId& bucket, Directory& dir,
+ const String& path)
+ : _bucketId(bucket),
+ _dir(&dir),
+ _path(path),
+ _wantedVersion(TRADITIONAL_SLOTFILE)
+{
+ if (dir.getState() != Device::OK) {
+ throw vespalib::IllegalStateException(
+ "Attempt to create file specification for file on disk that "
+ "is not available: " + dir.toString(), VESPA_STRLOC);
+ }
+}
+
+void
+FileSpecification::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ (void) verbose; (void) indent;
+ out << "FileSpecification(" << _bucketId << ", " << *_dir << ", " << _path
+ << ", wanted version 0x" << std::hex << _wantedVersion << std::dec
+ << ")";
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h
new file mode 100644
index 00000000000..4d9cda2c47c
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h
@@ -0,0 +1,52 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::slotfile::FileSpecification
+ * \ingroup memfile
+ *
+ * \brief Information about the file currently worked on.
+ *
+ * The file specification specifies what file a given MemFile should work on.
+ */
+
+#pragma once
+
+#include <vespa/vespalib/util/printable.h>
+#include <vespa/memfilepersistence/device/directory.h>
+#include <vespa/memfilepersistence/common/types.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFileEnvironment;
+
+class FileSpecification : private Types,
+ public vespalib::Printable,
+ public boost::operators<FileSpecification>
+{
+ BucketId _bucketId;
+ Directory* _dir;
+ String _path;
+ FileVersion _wantedVersion;
+
+public:
+ FileSpecification(const BucketId&, Directory&, const String& path);
+
+ void setWantedVersion(FileVersion v) { _wantedVersion = v; }
+
+ const document::BucketId& getBucketId() const { return _bucketId; }
+ Directory& getDirectory() const { return *_dir; }
+ const String& getPath() const { return _path; }
+ FileVersion getWantedFileVersion() const { return _wantedVersion; }
+
+ virtual void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+ bool operator==(const FileSpecification& o) const {
+ return (_bucketId == o._bucketId && _dir == o._dir
+ && _path == o._path && _wantedVersion == o._wantedVersion);
+ }
+};
+
+} // storage
+} // memfile
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/freeptr.h b/memfilepersistence/src/vespa/memfilepersistence/common/freeptr.h
new file mode 100644
index 00000000000..de807efed2a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/freeptr.h
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @class storage::FreePtr
+ * @ingroup slotfile
+ *
+ * @brief Simple pointer wrapper that free() its content when deleted.
+ *
+ * Utility used to hold memory allocated with malloc directly.
+ */
+
+#pragma once
+
+#include <iostream>
+#include <sstream>
+
+namespace storage {
+
+template<typename T>
+class FreePtr {
+ T* _ptr;
+
+public:
+ FreePtr(T* ptr = 0) : _ptr(ptr) {}
+ ~FreePtr() { free(); }
+
+ FreePtr(FreePtr& ptr) : _ptr(ptr._ptr) { ptr._ptr = 0; }
+ FreePtr& operator=(FreePtr& ptr) { swap(ptr); ptr.free(); return *this; }
+
+ void reset(T* ptr = 0) { free(); _ptr = ptr; }
+ void swap(FreePtr<T>& other)
+ { T* tmp = _ptr; _ptr = other._ptr; other._ptr = tmp; }
+ T* get() { return _ptr; }
+ const T* get() const { return _ptr; }
+ T* operator->() { return _ptr; }
+ const T* operator->() const { return _ptr; }
+ T& operator*() { assert(_ptr != 0); return *_ptr; }
+ const T& operator*() const { assert(_ptr != 0); return *_ptr; }
+ void free() { if (_ptr != 0) { ::free(_ptr); _ptr = 0; } }
+};
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp
new file mode 100644
index 00000000000..7cf75c4f977
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/common/options.h>
+
+#include <vespa/log/log.h>
+#include <iomanip>
+#include <vespa/config-stor-memfilepersistence.h>
+
+LOG_SETUP(".persistence.slotfile.options");
+
+namespace storage {
+
+namespace memfile {
+
+Options::Options(const vespa::config::storage::StorMemfilepersistenceConfig& newConfig,
+ const vespa::config::content::PersistenceConfig& newPersistenceConfig)
+ : _minimumFileMetaSlots(newConfig.minimumFileMetaSlots),
+ _maximumFileMetaSlots(newConfig.maximumFileMetaSlots),
+ _minimumFileHeaderBlockSize(newConfig.minimumFileHeaderBlockSize),
+ _maximumFileHeaderBlockSize(newConfig.maximumFileHeaderBlockSize),
+ _minimumFileSize(newConfig.minimumFileSize),
+ _maximumFileSize(newConfig.maximumFileSize),
+ _fileBlockSize(newConfig.fileBlockSize),
+ _revertTimePeriod(newPersistenceConfig.revertTimePeriod * 1000000ll),
+ _keepRemoveTimePeriod(
+ newPersistenceConfig.keepRemoveTimePeriod * 1000000ll),
+ _maxDocumentVersions(
+ newPersistenceConfig.maximumVersionsOfSingleDocumentStored),
+ _cacheSize(newConfig.cacheSize),
+ _initialIndexRead(newConfig.initialIndexRead),
+ _maximumGapToReadThrough(newConfig.maximumGapToReadThrough),
+ _diskFullFactor(newConfig.diskFullFactor),
+ _growFactor(newConfig.growFactor),
+ _overrepresentMetaDataFactor(newConfig.overrepresentMetaDataFactor),
+ _overrepresentHeaderBlockFactor(newConfig.overrepresentHeaderBlockFactor),
+ _defaultRemoveDocType(
+ newConfig.store50BackwardsCompatibleRemoveEntriesWithDoctype)
+{
+ validate();
+}
+
+namespace {
+ template<typename Number>
+ void verifyAligned(Number n, uint32_t alignSize, const char* name) {
+ if (n % alignSize != 0) {
+ std::ostringstream ost;
+ ost << name << " " << n
+ << " must be dividable by block alignment size " << alignSize;
+ throw vespalib::IllegalStateException(
+ ost.str(), VESPA_STRLOC);
+ }
+ }
+}
+
+void Options::validate()
+{
+ uint32_t tmp32 = 0;
+
+ // REVERT / KEEP REMOVE TIME PERIODS
+ if (_revertTimePeriod > _keepRemoveTimePeriod) {
+ LOG(warning, "Keep all time period (%" PRIu64 ") is set larger than keep "
+ "removes time period (%" PRIu64 ". Adjusting keep removes "
+ "period to match",
+ _revertTimePeriod.getTime(), _keepRemoveTimePeriod.getTime());
+ _keepRemoveTimePeriod = _revertTimePeriod;
+ }
+ if (_maxDocumentVersions < 1) {
+ LOG(warning, "Max number of document versions attempted set to 0. "
+ "This is a bad idea for all the obvious reasons. Forcing "
+ "used value to be 1.");
+ _maxDocumentVersions = 1;
+ }
+ // MINIMUM FILE SIZES
+ if (_minimumFileMetaSlots < 1) {
+ LOG(warning, "Minimum file meta slots is not allowed to be less than "
+ "1. Setting it to 1.");
+ _minimumFileMetaSlots = 1;
+ }
+ if (_minimumFileMetaSlots > 1024*1024) {
+ LOG(warning, "Minimum file meta slots is not allowed to be more than "
+ "%u. Setting it to %u.", 1024*1024, 1024*1024);
+ _minimumFileMetaSlots = 1024*1024;
+ }
+ if (_minimumFileHeaderBlockSize > 2*1024*1024*1024u) {
+ LOG(warning, "Minimum file header block size is not allowed to be above"
+ " 2 GB. Altering it from %u B to 2 GB.",
+ _minimumFileHeaderBlockSize);
+ _minimumFileHeaderBlockSize = 2*1024*1024*1024u;
+ }
+ if (_minimumFileSize % _fileBlockSize != 0) {
+ tmp32 = _fileBlockSize
+ * ((_minimumFileSize + _fileBlockSize - 1) / _fileBlockSize);
+ LOG(warning, "Min file size %u not a multiplum of file block size %u. "
+ "Increasing minimum filesize to %u to match.",
+ _minimumFileSize, _fileBlockSize, tmp32);
+ _minimumFileSize = tmp32;
+ }
+ // MAXIMUM FILE SIZES
+ if (_maximumFileMetaSlots != 0
+ && _maximumFileMetaSlots < _minimumFileMetaSlots)
+ {
+ LOG(warning, "Maximum file meta slots cannot be less than the minimum. "
+ "Adjusting it from %u to %u.",
+ _maximumFileMetaSlots, _minimumFileMetaSlots);
+ _maximumFileMetaSlots = _minimumFileMetaSlots;
+ }
+ if (_maximumFileHeaderBlockSize != 0
+ && _maximumFileHeaderBlockSize < _minimumFileHeaderBlockSize)
+ {
+ LOG(warning, "Maximum file header block size cannot be less than the "
+ "minimum. Adjusting it from %u to %u.",
+ _maximumFileHeaderBlockSize, _minimumFileHeaderBlockSize);
+ _maximumFileHeaderBlockSize = _minimumFileHeaderBlockSize;
+ }
+ if (_maximumFileSize != 0 && _maximumFileSize < _minimumFileSize) {
+ LOG(warning, "Maximum file size cannot be less than the "
+ "minimum. Adjusting it from %u to %u.",
+ _maximumFileSize, _minimumFileSize);
+ _maximumFileSize = _minimumFileSize;
+ }
+ if (_maximumFileSize % _fileBlockSize != 0) {
+ tmp32 = _fileBlockSize
+ * ((_maximumFileSize + _fileBlockSize - 1) / _fileBlockSize);
+ LOG(warning, "Max file size %u not a multiplum of file block size %u. "
+ "Increasing maximum to %u bytes to match.",
+ _maximumFileSize, _fileBlockSize, tmp32);
+ _maximumFileSize = tmp32;
+ }
+
+ if (_growFactor < 1.0 || _growFactor >= 100.0) {
+ throw vespalib::IllegalStateException(
+ "The grow factor needs to be in the range [1, 100].",
+ VESPA_STRLOC);
+ }
+
+ if (!_defaultRemoveDocType.empty()) {
+ // Log the usage of this option to make it visible, as it is not
+ // something most people should use.
+ LOG(info,
+ "Will write remove entries in 5.0 backwards compatible mode. By "
+ "default this will be done using the '%s' document type unless "
+ "the document identifier specifies otherwise.",
+ _defaultRemoveDocType.c_str());
+ }
+}
+
+void Options::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ (void) verbose;
+ std::string s("\n" + indent + " ");
+
+ out << "SlotFile options:"
+ << s << "Minimum file meta slots: " << _minimumFileMetaSlots
+ << s << "Maximum file meta slots: " << _maximumFileMetaSlots
+ << s << "Minimum file header block size: "
+ << _minimumFileHeaderBlockSize << " b"
+ << s << "Maximum file header block size: "
+ << _maximumFileHeaderBlockSize << " b"
+ << s << "Minimum file size: " << _minimumFileSize << " b"
+ << s << "Maximum file size: " << _maximumFileSize << " b"
+ << s << "Filesystem block size: " << _fileBlockSize << " b"
+ << s << "Revert time period: " << _revertTimePeriod << " microsecs"
+ << s << "Keep remove time period: "
+ << _keepRemoveTimePeriod << "microsecs"
+ << s << "Max document versions: " << _maxDocumentVersions
+ << s << "Cache size: " << _cacheSize
+ << s << "Initial index read: " << _initialIndexRead << " b"
+ << s << "Maximum gap to read through: "
+ << _maximumGapToReadThrough << " b"
+ << s << "Disk full factor: " << _diskFullFactor
+ << s << "Grow factor: " << _growFactor
+ << s << "Overrepresent meta data factor: "
+ << _overrepresentMetaDataFactor
+ << s << "Overrepresent header block factor: "
+ << _overrepresentHeaderBlockFactor
+ << s << "Write removes with blank documents of default type: "
+ << _defaultRemoveDocType
+ << "";
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/options.h b/memfilepersistence/src/vespa/memfilepersistence/common/options.h
new file mode 100644
index 00000000000..831f43ab603
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/options.h
@@ -0,0 +1,136 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @class storage::Options
+ * @ingroup filestorage
+ *
+ * @brief Options used by slotfiles
+ *
+ * To avoid the need for static variables which cannot be altered while the
+ * system is running, and which forces all slotfile instances to work with the
+ * same options, this options class has been created to contain all the options
+ * a slotfile will use.
+ *
+ * @author H�kon Humberset
+ * @date 2005-10-26
+ */
+
+#pragma once
+
+#include <boost/operators.hpp>
+#include <vespa/vespalib/util/printable.h>
+#include <vespa/fastos/types.h> // For uint32_t on linux
+#include <string>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/storageframework/storageframework.h>
+#include <vespa/config-stor-memfilepersistence.h>
+#include <vespa/config-persistence.h>
+
+namespace storage {
+
+namespace memfile {
+
+struct Options : public vespalib::Printable,
+ public boost::operators<Options>
+{
+ // Parameters from def file. See config file for comments.
+
+ // FILE SIZE PARAMETERS
+
+ uint32_t _minimumFileMetaSlots;
+ uint32_t _maximumFileMetaSlots;
+ uint32_t _minimumFileHeaderBlockSize;
+ uint32_t _maximumFileHeaderBlockSize;
+ uint32_t _minimumFileSize;
+ uint32_t _maximumFileSize;
+ uint32_t _fileBlockSize;
+
+ // CONSISTENCY PARAMETERS
+ framework::MicroSecTime _revertTimePeriod;
+ framework::MicroSecTime _keepRemoveTimePeriod;
+ uint32_t _maxDocumentVersions;
+
+ // PERFORMANCE PARAMETERS
+ uint64_t _cacheSize;
+ uint32_t _initialIndexRead;
+ uint32_t _maximumGapToReadThrough;
+
+ double _diskFullFactor;
+ double _growFactor;
+ double _overrepresentMetaDataFactor;
+ double _overrepresentHeaderBlockFactor;
+
+ // COMPATIBILITY PARAMETERS
+ // If non-empty, will cause remove entries to be written with a blank
+ // document containing only the document type and identifier rather than
+ // just writing a document id with no document at all. Note that if a
+ // document identifier contains a type string it will override this default
+ // value.
+ // This is a feature for backwards compatibility with 5.0, as it chokes
+ // when trying to read remove entries without a document.
+ vespalib::string _defaultRemoveDocType;
+
+ /**
+ * Creates a new slotfile options instance. Implemented in header file,
+ * such that the current defaults can be easily viewed.
+ */
+ Options()
+ : _minimumFileMetaSlots(512),
+ _maximumFileMetaSlots(0),
+ _minimumFileHeaderBlockSize(102848),
+ _maximumFileHeaderBlockSize(0),
+ _minimumFileSize(1048576),
+ _maximumFileSize(0),
+ _fileBlockSize(4096),
+ _revertTimePeriod(300 * 1000000ull),
+ _keepRemoveTimePeriod(604800 * 1000000ull),
+ _maxDocumentVersions(5),
+ _cacheSize(0),
+ _initialIndexRead(65536),
+ _maximumGapToReadThrough(65536),
+ _diskFullFactor(0.98),
+ _growFactor(2.0),
+ _overrepresentMetaDataFactor(1.2),
+ _overrepresentHeaderBlockFactor(1.1),
+ _defaultRemoveDocType()
+ {
+ }
+
+ Options(const vespa::config::storage::StorMemfilepersistenceConfig& newConfig,
+ const vespa::config::content::PersistenceConfig& newPersistenceConfig);
+
+ void validate() const { const_cast<Options&>(*this).validate(); }
+ void validate();
+
+ /** Printable implementation */
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+ bool operator==(const Options& options) const {
+ if (_minimumFileMetaSlots == options._minimumFileMetaSlots
+ && _maximumFileMetaSlots == options._maximumFileMetaSlots
+ && _minimumFileHeaderBlockSize
+ == options._minimumFileHeaderBlockSize
+ && _maximumFileHeaderBlockSize
+ == options._maximumFileHeaderBlockSize
+ && _minimumFileSize == options._minimumFileSize
+ && _maximumFileSize == options._maximumFileSize
+ && _fileBlockSize == options._fileBlockSize
+ && _revertTimePeriod == options._revertTimePeriod
+ && _maxDocumentVersions == options._maxDocumentVersions
+ && _keepRemoveTimePeriod == options._keepRemoveTimePeriod
+ && _cacheSize == options._cacheSize
+ && _initialIndexRead == options._initialIndexRead
+ && _maximumGapToReadThrough == options._maximumGapToReadThrough
+ && _diskFullFactor == options._diskFullFactor
+ && _defaultRemoveDocType == options._defaultRemoveDocType)
+ {
+ return true;
+ }
+ return false;
+ }
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp
new file mode 100644
index 00000000000..dd045239877
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/common/slotmatcher.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+
+namespace storage {
+namespace memfile {
+
+Types::Timestamp
+SlotMatcher::Slot::getTimestamp() const
+{
+ return _slot.getTimestamp();
+}
+
+bool
+SlotMatcher::Slot::isRemove() const
+{
+ return _slot.deleted();
+}
+
+const document::GlobalId&
+SlotMatcher::Slot::getGlobalId() const
+{
+ return _slot.getGlobalId();
+}
+
+document::Document::UP
+SlotMatcher::Slot::getDocument(bool headerOnly) const
+{
+ return _file.getDocument(_slot, headerOnly ? HEADER_ONLY : ALL);
+}
+
+document::DocumentId
+SlotMatcher::Slot::getDocumentId() const
+{
+ return _file.getDocumentId(_slot);
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h
new file mode 100644
index 00000000000..ca5988e9a19
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::slotfile::SlotMatcher
+ * \ingroup memfile
+ *
+ * \brief Implement this to create a filter for MemSlot instances.
+ *
+ * Many operations want to do something to a subset of the slots in a file.
+ * Such operations can retrieve the slots that matches using an implementation
+ * of this filter.
+ *
+ * Creating a slot matcher, you should give information of what type of data
+ * you want to preload from disk. Typically you want to preload entries you
+ * need such as to prevent many disk accesses, but if there is some data you
+ * only need for a few entries, you can use the functions supplied in the
+ * matcher to get these instances even though they are not cached for all
+ * entries.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/memfile/memslot.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFile;
+
+class SlotMatcher : private Types {
+public:
+ enum PreloadFlag {
+ PRELOAD_META_DATA_ONLY = 0x0,
+ PRELOAD_BODY = 0x1,
+ PRELOAD_HEADER = 0x3,
+ PRELOAD_DOC_ID = 0x7
+ };
+
+protected:
+ SlotMatcher(PreloadFlag preld) : _preload(preld) {}
+
+ PreloadFlag _preload;
+
+public:
+ class Slot {
+ private:
+ const MemSlot& _slot;
+ const MemFile& _file;
+
+ public:
+ Slot(const MemSlot& slot, const MemFile& file)
+ : _slot(slot),
+ _file(file) {};
+
+ /**
+ Returns the timestamp of the slot.
+ */
+ Timestamp getTimestamp() const;
+
+ /**
+ * Returns whether a slot is a remove, either regular
+ * or unrevertable.
+ */
+ bool isRemove() const;
+
+ /**
+ Returns the global id of the slot.
+ */
+ const GlobalId& getGlobalId() const;
+
+ /**
+ * Get the document, optionally just the header. If not preloaded, will load
+ * this document from disk.
+ */
+ Document::UP getDocument(bool headerOnly) const;
+
+ document::DocumentId getDocumentId() const;
+ };
+
+ virtual ~SlotMatcher() {}
+
+ virtual bool match(const Slot&) = 0;
+
+ /** Do what is needed to preload wanted content. */
+ void preload(MemFile&) const {};
+};
+
+} // storage
+} // memfile
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp
new file mode 100644
index 00000000000..337638cadc9
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp
@@ -0,0 +1,38 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <iostream>
+#include <vespa/memfilepersistence/common/types.h>
+
+namespace storage {
+namespace memfile {
+
+const framework::MicroSecTime Types::MAX_TIMESTAMP(framework::MicroSecTime::max());
+const framework::MicroSecTime Types::UNSET_TIMESTAMP(0);
+
+void
+Types::verifyLegalFlags(uint32_t flags, uint32_t legal, const char* operation)
+{
+ if ((flags & legal) != flags) {
+ std::ostringstream ost;
+ ost << "Invalid flags given to operation " << operation << ". "
+ << std::hex << flags << " given, but only " << legal
+ << " are legal.";
+ throw vespalib::IllegalArgumentException(ost.str(), VESPA_STRLOC);
+ }
+}
+
+std::ostream&
+operator<<(std::ostream& os, const DataLocation& loc)
+{
+ os << "DataLocation("
+ << std::dec
+ << loc._pos
+ << ", "
+ << loc._size
+ << ")";
+ return os;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/types.h b/memfilepersistence/src/vespa/memfilepersistence/common/types.h
new file mode 100644
index 00000000000..bf4bdc98222
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/common/types.h
@@ -0,0 +1,198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::slotfile::Types
+ * \ingroup memfile
+ *
+ * \brief This class defines and includes some types used in the slotfile layer.
+ *
+ * As many of the types are used many places in the layer, we define them here
+ * rather than in one random class using them. This also makes it easy to switch
+ * implementation by switching out which class to use here.
+ *
+ * This class should not have any members, virtual classes or anything. We don't
+ * want it to add to the memory footprint of classes, as it will be used also
+ * by classes kept many times in memory cache.
+ */
+#pragma once
+
+#include <iosfwd>
+#include <vespa/document/bucket/bucketid.h>
+#include <vespa/document/base/documentid.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/storageframework/storageframework.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/persistence/spi/bucketinfo.h>
+
+namespace storage {
+namespace memfile {
+
+/**
+ * \class storage::slotfile::DataLocation
+ * \ingroup memfile
+ *
+ * \brief Points to data in a file storing documents.
+ *
+ * This file stores info on where header and body parts of document are stored.
+ * It is really format specific data, but for now it is implemented globally.
+ *
+ * All unused locations should be size zero pointing to address zero. A size
+ * of zero with a non-zero position is invalid, and used to indicate that this
+ * value is not set yet. (Typically when data isn't persisted to disk yet)
+ */
+struct DataLocation : public boost::operators<DataLocation> {
+ uint32_t _pos;
+ uint32_t _size;
+
+ DataLocation() : _pos(1), _size(0) {} // pos 1 size 0 is invalid value.
+ DataLocation(uint32_t pos, uint32_t sz) : _pos(pos), _size(sz) {}
+
+ uint32_t size() const { return _size; }
+
+ uint32_t endPos() const { return _pos + _size; }
+
+ bool valid() const { return (_size > 0 || _pos == 0); }
+
+ bool operator==(const DataLocation& other) const
+ { return (_pos == other._pos && _size == other._size); }
+
+ bool operator<(const DataLocation& other) const {
+ if (_pos == other._pos) {
+ return _size < other._size;
+ }
+
+ return _pos < other._pos;
+ }
+
+ bool contains(const DataLocation& other) const {
+ return (_pos <= other._pos && _pos + _size >= other._pos + other._size);
+ }
+};
+
+std::ostream& operator<<(std::ostream&, const DataLocation&);
+
+struct Types {
+ typedef document::BucketId BucketId;
+ typedef document::Document Document;
+ typedef vespalib::LinkedPtr<Document> DocLP;
+ typedef document::DocumentId DocumentId;
+ typedef document::GlobalId GlobalId;
+ typedef framework::MicroSecTime Timestamp;
+ typedef Timestamp RevertToken;
+ typedef vespalib::string String;
+ typedef spi::BucketInfo BucketInfo;
+
+ static const framework::MicroSecTime MAX_TIMESTAMP;
+ static const framework::MicroSecTime UNSET_TIMESTAMP;
+
+ enum FileVersion {
+ UNKNOWN = 0,
+ TRADITIONAL_SLOTFILE = 0xABCD0001
+ };
+
+ enum SlotFlag {
+ IN_USE = 0x01,
+ DELETED = 0x02,
+ DELETED_IN_PLACE = 0x04,
+ LEGAL_PERSISTED_SLOT_FLAGS = 0x07,
+
+ // States not stored in file. As we have set aside 16 bits for the
+ // flags in the fileformat, but use so few, we use some of the
+ // unused bits in the memory representation to store memory state.
+ ALTERED_IN_MEMORY = 0x02 << 8,
+ CHECKSUM_OUTDATED = 0x04 << 8,
+
+ // Masks to check for multiple bits
+ UNUSED = 0xf8f8
+ };
+
+ enum GetFlag {
+ ALL = 0,
+ HEADER_ONLY = 0x1,
+ LEGAL_GET_FLAGS = 0x1
+ };
+
+ enum IteratorFlag {
+ ITERATE_GID_UNIQUE = 0x1,
+ ITERATE_REMOVED = 0x2,
+ LEGAL_ITERATOR_FLAGS = 0x3
+ };
+
+ enum DocContentFlag {
+ HAS_HEADER_ONLY,
+ HAS_BODY
+ };
+
+ enum DocumentPart {
+ HEADER,
+ BODY
+ };
+
+ enum MemFileFlag {
+ FILE_EXIST = 0x0001,
+ HEADER_BLOCK_READ = 0x0002,
+ BODY_BLOCK_READ = 0x0004,
+ BUCKET_INFO_OUTDATED = 0x0008,
+ SLOTS_ALTERED = 0x0010,
+ LEGAL_MEMFILE_FLAGS = 0x001f
+ };
+
+ enum FileVerifyFlags {
+ DONT_VERIFY_HEADER = 0x0001,
+ DONT_VERIFY_BODY = 0x0002,
+ LEGAL_VERIFY_FLAGS = 0x0003
+ };
+
+ enum FlushFlag {
+ NONE = 0,
+ CHECK_NON_DIRTY_FILE_FOR_SPACE = 1
+ };
+
+ enum GetLocationsFlag {
+ NON_PERSISTED_LOCATIONS = 0x0001,
+ PERSISTED_LOCATIONS = 0x0002,
+ NO_SLOT_LIST = 0x0004
+ };
+
+ enum DocumentCopyType {
+ DEEP_COPY,
+ SHALLOW_COPY
+ };
+
+ static const char* getDocumentPartName(DocumentPart part) {
+ switch (part) {
+ case HEADER: return "Header";
+ case BODY: return "Body";
+ default: return "Invalid";
+ }
+ }
+
+ static const char* getFileVersionName(FileVersion version) {
+ switch (version) {
+ case UNKNOWN: return "UNKNOWN";
+ case TRADITIONAL_SLOTFILE: return "TRADITIONAL_SLOTFILE";
+ default: return "INVALID";
+ }
+ }
+
+ static const char* getMemFileFlagName(MemFileFlag flag) {
+ switch (flag) {
+ case FILE_EXIST: return "FILE_EXIST";
+ case HEADER_BLOCK_READ: return "HEADER_BLOCK_READ";
+ case BODY_BLOCK_READ: return "BODY_BLOCK_READ";
+ case BUCKET_INFO_OUTDATED: return "BUCKET_INFO_OUTDATED";
+ case SLOTS_ALTERED: return "SLOTS_ALTERED";
+ case LEGAL_MEMFILE_FLAGS: assert(false); // Not a single flag
+ default: return "INVALID";
+ }
+ }
+
+ static void verifyLegalFlags(uint32_t flags, uint32_t legal,
+ const char* operation);
+
+protected:
+ ~Types() {} // Noone should refer to objects as Types objects
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt
new file mode 100644
index 00000000000..2b2916b1ead
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_device OBJECT
+ SOURCES
+ device.cpp
+ disk.cpp
+ partition.cpp
+ directory.cpp
+ devicemapper.cpp
+ devicemanager.cpp
+ ioevent.cpp
+ partitionmonitor.cpp
+ mountpointlist.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp
new file mode 100644
index 00000000000..88283065790
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/device.h>
+
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/device/ioevent.h>
+
+LOG_SETUP(".persistence.device");
+
+#include <algorithm>
+
+namespace storage {
+
+namespace memfile {
+
+Device::Device(DeviceManager& manager)
+ : _manager(manager)
+{
+}
+
+Device::~Device()
+{
+}
+
+std::string Device::getStateString(State s)
+{
+ switch (s) {
+ case OK: return "OK";
+ case TOO_MANY_OPEN_FILES: return "TOO_MANY_OPEN_FILES";
+ case NOT_FOUND: return "NOT_FOUND";
+ case PATH_FAILURE: return "PATH_FAILURE";
+ case NO_PERMISSION: return "NO_PERMISSION";
+ case IO_FAILURE: return "IO_FAILURE";
+ case INTERNAL_FAILURE: return "INTERNAL_FAILURE";
+ case DISABLED_BY_ADMIN: return "DISABLED_BY_ADMIN";
+ default:
+ {
+ std::ostringstream ost;
+ ost << "UNKNOWN(" << s << ")";
+ return ost.str();
+ }
+ }
+}
+
+void
+Device::print(std::ostream& out, bool, const std::string&) const
+{
+ const IOEvent* event = getLastEvent();
+ if (event == 0) {
+ out << Device::OK;
+ } else {
+ out << event->getState() << " ";
+ out << event->getTimestamp() << " ";
+ std::string desc = event->getDescription();
+ std::replace(desc.begin(), desc.end(), '\n', ' ');
+ out << desc;
+ }
+}
+
+void
+Device::clearEvents()
+{
+ _events.clear();
+}
+
+} // memfile
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/device.h b/memfilepersistence/src/vespa/memfilepersistence/device/device.h
new file mode 100644
index 00000000000..dd582ff327a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/device.h
@@ -0,0 +1,75 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::Device
+ * \ingroup persistence
+ *
+ * @brief Class holding information about a device.
+ *
+ * Base class for devices, such as directories, partitions and disks.
+ */
+
+#pragma once
+
+#include <vespa/vespalib/util/printable.h>
+#include <list>
+#include <string>
+
+namespace storage {
+
+namespace memfile {
+
+class IOEvent;
+class DeviceManager;
+
+class Device : public vespalib::Printable {
+private:
+ // These objects are not possible to copy. They represents physical
+ // resources on a computer
+ Device(const Device&);
+ Device& operator=(Device&);
+
+protected:
+ DeviceManager& _manager;
+ std::list<IOEvent> _events;
+
+ Device(DeviceManager& manager);
+
+public:
+ /**
+ * Storage device states. Most serious states are at the bottom of the
+ * list. If a single state is requested from the device, the one with
+ * the highest value wins through.
+ */
+ enum State {
+ OK,
+ NOT_FOUND, // Not found
+ PATH_FAILURE, // Illegal path
+ NO_PERMISSION, // Permission problems
+ INTERNAL_FAILURE, // Probably problem with process.
+ IO_FAILURE, // Disk problems
+ TOO_MANY_OPEN_FILES, // Too many open files so we can't use disk.
+ // This is a global problem that will not be stored
+ // as disk state, but must exist in order to be
+ // able to report event.
+ DISABLED_BY_ADMIN // If disabled through admin tool
+ };
+
+ static std::string getStateString(State s);
+
+ virtual ~Device();
+
+ virtual void addEvent(const IOEvent& e) = 0;
+ virtual void clearEvents();
+ virtual const IOEvent* getLastEvent() const = 0;
+
+ const std::list<IOEvent>& getEvents() const { return _events; }
+
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+};
+
+} // memfile
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp
new file mode 100644
index 00000000000..d088f1dab46
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp
@@ -0,0 +1,213 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+
+#include <vespa/memfilepersistence/device/devicemapper.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+namespace storage {
+
+namespace memfile {
+
+DeviceManager::DeviceManager(DeviceMapper::UP mapper,
+ const framework::Clock& clock)
+ : _deviceMapper(std::move(mapper)),
+ _disks(),
+ _partitions(),
+ _directories(),
+ _eventListeners(),
+ _statPolicy(vespa::config::storage::StorDevicesConfig::STAT_DYNAMIC),
+ _statPeriod(0),
+ _clock(clock)
+{
+}
+
+void
+DeviceManager::setPartitionMonitorPolicy(
+ vespa::config::storage::StorDevicesConfig::StatfsPolicy policy, uint32_t period)
+{
+ _statPolicy = policy;
+ _statPeriod = period;
+ for (std::map<std::string, Partition::LP>::iterator it
+ = _partitions.begin(); it != _partitions.end(); ++it)
+ {
+ Partition& p(*it->second);
+ if (p.getMonitor() != 0) p.getMonitor()->setPolicy(policy, period);
+ }
+}
+
+void DeviceManager::notifyDiskEvent(Disk& d, const IOEvent& e)
+{
+ for (std::set<IOEventListener*>::iterator it = _eventListeners.begin();
+ it != _eventListeners.end(); ++it)
+ {
+ assert(*it != 0);
+ (*it)->handleDiskEvent(d, e);
+ }
+}
+
+void
+DeviceManager::notifyDirectoryEvent(Directory& dir, const IOEvent& e)
+{
+ for (std::set<IOEventListener*>::iterator it = _eventListeners.begin();
+ it != _eventListeners.end(); ++it)
+ {
+ assert(*it != 0);
+ (*it)->handleDirectoryEvent(dir, e);
+ }
+}
+
+void
+DeviceManager::notifyPartitionEvent(Partition& part, const IOEvent& e)
+{
+ for (std::set<IOEventListener*>::iterator it = _eventListeners.begin();
+ it != _eventListeners.end(); ++it)
+ {
+ assert(*it != 0);
+ (*it)->handlePartitionEvent(part, e);
+ }
+}
+
+void
+DeviceManager::addIOEventListener(IOEventListener& listener)
+{
+ _eventListeners.insert(&listener);
+}
+
+void
+DeviceManager::removeIOEventListener(IOEventListener& listener)
+{
+ _eventListeners.erase(&listener);
+}
+
+Directory::LP
+DeviceManager::getDirectory(const std::string& dir, uint16_t index)
+{
+ std::map<std::string, Directory::LP>::iterator it =
+ _directories.find(dir);
+ if (it != _directories.end()) {
+ return it->second;
+ }
+ Directory::LP d(new Directory(*this, index, dir));
+ _directories[dir] = d;
+ return d;
+}
+
+Directory::LP
+DeviceManager::deserializeDirectory(const std::string& serialized)
+{
+ // Deserialize object
+ Directory::LP d(new Directory(serialized, *this));
+ // If not existing, just add it.
+ std::map<std::string, Directory::LP>::iterator it =
+ _directories.find(d->getPath());
+ if (it == _directories.end()) {
+ _directories[d->getPath()] = d;
+ return d;
+ }
+ // If already existing, merge info with existing entry.
+ it->second->addEvents(*d);
+ return it->second;
+}
+
+Partition::LP
+DeviceManager::getPartition(const std::string& path)
+{
+ try{
+ std::string mountPoint(_deviceMapper->getMountPoint(path));
+ uint64_t id = _deviceMapper->getPartitionId(mountPoint);
+ std::map<std::string, Partition::LP>::iterator it(
+ _partitions.find(mountPoint));
+ if (it != _partitions.end()) {
+ return it->second;
+ }
+ Partition::LP part(new Partition(*this, id, mountPoint));
+ if (part->getMonitor() != 0) {
+ part->getMonitor()->setPolicy(_statPolicy, _statPeriod);
+ }
+ _partitions[mountPoint] = part;
+ return part;
+ } catch (vespalib::IoException& e) {
+ // If we fail to create partition, due to having IO troubles getting
+ // partition id or mount point, create a partition that doesn't
+ // correspond to a physical device containing the error found.
+ Partition::LP part(new Partition(*this, -1, path));
+ part->addEvent(IOEvent::createEventFromIoException(
+ e,
+ _clock.getTimeInSeconds().getTime()));
+ _partitions[path] = part;
+ return part;
+ }
+}
+
+Disk::LP
+DeviceManager::getDisk(const std::string& path)
+{
+ try{
+ int devnr = _deviceMapper->getDeviceId(path);
+ std::map<int, Disk::LP>::iterator it = _disks.find(devnr);
+ if (it != _disks.end()) {
+ return it->second;
+ }
+ Disk::LP disk(new Disk(*this, devnr));
+ _disks[devnr] = disk;
+ return disk;
+ } catch (vespalib::IoException& e) {
+ // Use negative ints for illegal ids. Make sure they don't already
+ // exist
+ int devnr = -1;
+ while (_disks.find(devnr) != _disks.end()) --devnr;
+ // If we fail to create partition, due to having IO troubles getting
+ // partition id or mount point, create a partition that doesn't
+ // correspond to a physical device containing the error found.
+ Disk::LP disk(new Disk(*this, devnr));
+ disk->addEvent(IOEvent::createEventFromIoException(
+ e,
+ _clock.getTimeInSeconds().getTime()));
+ _disks[devnr] = disk;
+ return disk;
+ }
+}
+
+void
+DeviceManager::printXml(vespalib::XmlOutputStream& xos) const
+{
+ using namespace vespalib::xml;
+ xos << XmlTag("devicemanager");
+ xos << XmlTag("mapper") << XmlAttribute("type", _deviceMapper->getName())
+ << XmlEndTag();
+ xos << XmlTag("devices");
+ for (std::map<int, Disk::LP>::const_iterator diskIt = _disks.begin();
+ diskIt != _disks.end(); ++diskIt)
+ {
+ xos << XmlTag("disk") << XmlAttribute("deviceId", diskIt->first);
+ for (std::map<std::string, Partition::LP>::const_iterator partIt
+ = _partitions.begin(); partIt != _partitions.end(); ++partIt)
+ {
+ if (partIt->second->getDisk() != *diskIt->second) continue;
+ xos << XmlTag("partition")
+ << XmlAttribute("id", partIt->second->getId())
+ << XmlAttribute("mountpoint", partIt->second->getMountPoint());
+ if (partIt->second->getMonitor() != 0) {
+ xos << *partIt->second->getMonitor();
+ }
+ for (std::map<std::string, Directory::LP>::const_iterator dirIt
+ = _directories.begin(); dirIt != _directories.end();
+ ++dirIt)
+ {
+ if (dirIt->second->getPartition() != *partIt->second) continue;
+ xos << XmlTag("directory")
+ << XmlAttribute("index", dirIt->second->getIndex())
+ << XmlAttribute("path", dirIt->second->getPath())
+ << XmlEndTag();
+ }
+ xos << XmlEndTag();
+ }
+ xos << XmlEndTag();
+ }
+ xos << XmlEndTag() << XmlEndTag();
+}
+
+} // memfile
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h
new file mode 100644
index 00000000000..dc1c6fdd68d
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::DeviceManager
+ * \ingroup persistence
+ *
+ * \brief Class keeping information about all devices.
+ *
+ * This class keeps track of all the devices so they can be looked up.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/device/devicemapper.h>
+#include <vespa/memfilepersistence/device/directory.h>
+#include <vespa/memfilepersistence/device/disk.h>
+#include <vespa/memfilepersistence/device/ioevent.h>
+#include <vespa/memfilepersistence/device/partition.h>
+#include <set>
+#include <vector>
+#include <vespa/vespalib/util/xmlserializable.h>
+#include <vespa/storageframework/generic/clock/clock.h>
+
+namespace storage {
+
+namespace memfile {
+
+class DeviceManager : public vespalib::XmlSerializable {
+ DeviceMapper::UP _deviceMapper;
+ std::map<int, Disk::LP> _disks;
+ std::map<std::string, Partition::LP> _partitions;
+ std::map<std::string, Directory::LP> _directories;
+ std::set<IOEventListener*> _eventListeners;
+ vespa::config::storage::StorDevicesConfig::StatfsPolicy _statPolicy;
+ uint32_t _statPeriod;
+ const framework::Clock& _clock;
+
+ DeviceManager(const DeviceManager&);
+ DeviceManager& operator=(const DeviceManager&);
+
+ void setFindDeviceFunction();
+
+public:
+ typedef vespalib::LinkedPtr<DeviceManager> LP;
+
+ DeviceManager(DeviceMapper::UP mapper,
+ const framework::Clock& clock);
+
+ void setPartitionMonitorPolicy(
+ vespa::config::storage::StorDevicesConfig::StatfsPolicy, uint32_t period = 0);
+
+ void notifyDiskEvent(Disk& disk, const IOEvent& e);
+ void notifyDirectoryEvent(Directory& dir, const IOEvent& e);
+ void notifyPartitionEvent(Partition& part, const IOEvent& e);
+
+ void addIOEventListener(IOEventListener& listener);
+ void removeIOEventListener(IOEventListener& listener);
+
+ Directory::LP getDirectory(const std::string& dir, uint16_t index);
+ Directory::LP deserializeDirectory(const std::string& serialized);
+ Partition::LP getPartition(const std::string& path);
+ Disk::LP getDisk(const std::string& path);
+
+ std::vector<Directory::LP> getDirectories(const Disk& disk) const;
+ std::vector<Directory::LP> getDirectories(const Partition& part) const;
+
+ vespa::config::storage::StorDevicesConfig::StatfsPolicy getStatPolicy() const
+ { return _statPolicy; }
+ uint32_t getStatPeriod() const { return _statPeriod; }
+
+ virtual void printXml(vespalib::XmlOutputStream&) const;
+
+ const framework::Clock& getClock() const { return _clock; }
+};
+
+} // memfile
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp
new file mode 100644
index 00000000000..e6f45fe9e4b
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/devicemapper.h>
+
+#include <vespa/vespalib/text/stringtokenizer.h>
+#include <fstream>
+#include <vespa/log/log.h>
+#include <sstream>
+#include <sys/stat.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+LOG_SETUP(".persistence.devicemapper");
+
+namespace storage {
+
+namespace memfile {
+
+namespace {
+ uint64_t getDevice(const std::string& path) {
+ struct stat info;
+ if (stat(path.c_str(), &info) != 0) {
+ std::ostringstream ost;
+ ost << "Failed to run stat to find data on file " << path
+ << ": errno(" << errno << ") - " << vespalib::getLastErrorString() << ".";
+ throw vespalib::IoException(
+ ost.str(), vespalib::IoException::getErrorType(errno),
+ VESPA_STRLOC);
+ }
+ return info.st_dev;
+ }
+}
+
+AdvancedDeviceMapper::AdvancedDeviceMapper()
+ : _mountPoints()
+{
+ // Initialize the mount point map
+ std::ifstream is;
+ is.exceptions(std::ifstream::badbit); // Throw exception on failure
+ is.open("/proc/mounts");
+ init(is);
+}
+
+void
+AdvancedDeviceMapper::init(std::istream& is)
+{
+ std::string line;
+ while (std::getline(is, line)) {
+ vespalib::StringTokenizer st(line, " \t\f\r\n", "");
+ if (st[0] == "none") {
+ LOG(debug, "Ignoring special mount point '%s'.", line.c_str());
+ continue;
+ }
+ if (st.size() < 3 || st[1][0] != '/') {
+ LOG(warning, "Found unexpected line in /proc/mounts: '%s'.",
+ line.c_str());
+ continue;
+ }
+ std::string mountPoint(st[1]);
+ try{
+ uint64_t deviceId = getDevice(mountPoint);
+ LOG(debug, "Added mountpoint '%s' with device id %" PRIu64 ".",
+ mountPoint.c_str(), deviceId);
+ _mountPoints[deviceId] = mountPoint;
+ } catch (vespalib::Exception& e) {
+ LOG(info, "Failed to get device of mountpoint %s. This is normal "
+ "for some special mountpoints, and doesn't matter unless "
+ "the device is used by VDS: %s",
+ mountPoint.c_str(), e.getMessage().c_str());
+ }
+ }
+}
+
+std::string
+AdvancedDeviceMapper::getMountPoint(const std::string& fileOnFS) const
+{
+ uint64_t dev = getDevice(fileOnFS);
+ std::map<uint64_t, std::string>::const_iterator it(_mountPoints.find(dev));
+ if (it == _mountPoints.end()) {
+ std::ostringstream ost;
+ ost << "Failed to find a device for file '" << fileOnFS << "'. Stat "
+ << "returned device " << dev << " but only the following devices "
+ << "are known:";
+ for (it = _mountPoints.begin(); it != _mountPoints.end(); ++it) {
+ ost << " (" << it->first << " - " << it->second << ")";
+ }
+ throw vespalib::IoException(
+ ost.str(), vespalib::IoException::INTERNAL_FAILURE,
+ VESPA_STRLOC);
+ }
+ return it->second;
+}
+
+uint64_t
+AdvancedDeviceMapper::getPartitionId(const std::string& fileOnFS) const
+{
+ return getDevice(fileOnFS);
+}
+
+}
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h
new file mode 100644
index 00000000000..dd25283d029
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * The device mapper is used to get some interesting information for
+ * storage devies.
+ */
+#pragma once
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <memory>
+
+namespace storage {
+
+namespace memfile {
+
+/**
+ * @class DeviceMapper
+ * @ingroup persistence
+ *
+ * @brief Maps directories to partition and disk information.
+ */
+struct DeviceMapper {
+ typedef std::unique_ptr<DeviceMapper> UP;
+
+ virtual ~DeviceMapper() {}
+
+ virtual const char* getName() const = 0;
+
+ virtual std::string getMountPoint(const std::string& fileOnFS) const = 0;
+ virtual uint64_t getPartitionId(const std::string& fileOnFS) const = 0;
+ virtual uint64_t getDeviceId(const std::string& fileOnFS) const = 0;
+};
+
+/**
+ * @class SimpleDeviceMapper
+ * @ingroup persistence
+ *
+ * @brief Simple device mapper, not trying to detect any information.
+ *
+ * This simple device mapper, assumes all directories used are actually
+ * mountpoints, and that all mountpoints are on separate disks. This returns
+ * dummy device numbers.
+ *
+ * Using this, each directory used will be handled separately, and there is no
+ * dependency on information to retrieve from OS.
+ */
+class SimpleDeviceMapper : public DeviceMapper {
+ mutable std::map<std::string, int> _devices;
+ mutable int _lastDevice;
+
+ SimpleDeviceMapper(const SimpleDeviceMapper&);
+ SimpleDeviceMapper& operator=(const SimpleDeviceMapper&);
+
+public:
+ SimpleDeviceMapper() : _devices(), _lastDevice(0) {}
+
+ uint64_t getPartitionId(const std::string& fileOnFS) const {
+ std::map<std::string, int>::const_iterator it = _devices.find(fileOnFS);
+ if (it != _devices.end()) {
+ return it->second;
+ }
+ int dev = ++_lastDevice;
+ _devices[fileOnFS] = dev;
+ return dev;
+ }
+ std::string getMountPoint(const std::string& path) const { return path; }
+ virtual uint64_t getDeviceId(const std::string& fileOnFS) const {
+ return getPartitionId(fileOnFS);
+ }
+ virtual const char* getName() const
+ { return "Simple (All directories on individual fake devices)"; }
+};
+
+/**
+ * @class AdvancedDeviceMapper
+ * @ingroup persistence
+ *
+ * @brief Device mapper trying to find a real physical model using stat/statfs.
+ *
+ * Using this device mapper, stat/statfs will be used to try to find a real
+ * model. Directories mapping to common components wil cause all directories to
+ * fail if the common component fails.
+ */
+struct AdvancedDeviceMapper : public DeviceMapper {
+ std::map<uint64_t, std::string> _mountPoints;
+
+ AdvancedDeviceMapper();
+ void init(std::istream&);
+
+ virtual std::string getMountPoint(const std::string& fileOnFS) const;
+ virtual uint64_t getPartitionId(const std::string& fileOnFS) const;
+ virtual uint64_t getDeviceId(const std::string& fileOnFS) const {
+ // Not found a way to detect partitions on common device.
+ // Returning partition ids for now.
+ return getPartitionId(fileOnFS);
+ }
+ virtual const char* getName() const
+ { return "Advanced (Read devices attempted found)"; }
+};
+
+}
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp
new file mode 100644
index 00000000000..9c03e1eb449
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp
@@ -0,0 +1,141 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/directory.h>
+
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+LOG_SETUP(".persistence.device.directory");
+
+namespace storage {
+
+namespace memfile {
+
+const IOEvent*
+Directory::getLastEvent() const
+{
+ if (!_events.empty()) return &_events.back();
+ return _partition->getLastEvent();
+}
+
+Device::State
+Directory::getState() const
+{
+ const IOEvent* event = getLastEvent();
+ return (event ? event->getState() : Device::OK);
+}
+
+void
+Directory::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ out << _path << " ";
+ Device::print(out, verbose, indent);
+}
+
+Directory::Directory(DeviceManager& manager, uint16_t index,
+ const std::string& path)
+ : Device(manager),
+ _index(index),
+ _path(path),
+ _partition(manager.getPartition(path))
+{
+ assert(_partition.get());
+}
+
+namespace {
+ struct Entry {
+ std::string path;
+ Device::State status;
+ std::string description;
+ };
+
+ Entry parseDirectoryString(const std::string& serialized) {
+ while (1) {
+ Entry e;
+ std::string::size_type pos1 = serialized.find(' ');
+ if (pos1 == std::string::npos) break;
+ e.path = serialized.substr(0, pos1);
+ std::string::size_type pos2 = serialized.find(' ', pos1 + 1);
+ std::string num = serialized.substr(pos1 + 1, pos2 - pos1 - 1);
+ char* c;
+ e.status = static_cast<Device::State>(
+ strtoul(num.c_str(), &c, 10));
+ if (*c != '\0') break;
+ if (pos2 != std::string::npos) {
+ e.description = serialized.substr(pos2 + 1);
+ }
+ return e;
+ }
+ std::string msg = "Illegal line in disk status file: '" + serialized
+ + "'. Ignoring it.";
+ LOG(warning, "%s", msg.c_str());
+ throw vespalib::IllegalArgumentException(msg, VESPA_STRLOC);
+ }
+}
+
+Directory::Directory(const std::string& serialized,
+ DeviceManager& manager)
+ : Device(manager),
+ _index(0),
+ _path(parseDirectoryString(serialized).path),
+ _partition(manager.getPartition(_path))
+{
+ assert(_partition.get());
+ Entry e = parseDirectoryString(serialized);
+ if (e.status != Device::OK) {
+ addEvent(IOEvent(manager.getClock().getTimeInSeconds().getTime(),
+ e.status, e.description, VESPA_STRLOC));
+ }
+}
+
+void Directory::addEvent(const IOEvent& e)
+{
+ switch (e.getState()) {
+ case Device::IO_FAILURE:
+ _partition->addEvent(e);
+ break;
+ case Device::PATH_FAILURE:
+ case Device::NO_PERMISSION:
+ case Device::INTERNAL_FAILURE:
+ case Device::DISABLED_BY_ADMIN:
+ default:
+ if (!e.isGlobal()) {
+ _events.push_back(e);
+ }
+ _manager.notifyDirectoryEvent(*this, e);
+ }
+}
+
+void
+Directory::addEvent(Device::State s,
+ const std::string& description,
+ const std::string& location)
+{
+ addEvent(IOEvent(
+ _manager.getClock().getTimeInSeconds().getTime(),
+ s,
+ description,
+ location));
+
+}
+
+void Directory::addEvents(const Directory& d)
+{
+ std::list<IOEvent> events;
+ events.insert(events.end(), d.getEvents().begin(), d.getEvents().end());
+ events.insert(events.end(), d.getPartition().getEvents().begin(),
+ d.getPartition().getEvents().end());
+ events.insert(events.end(), d.getPartition().getDisk().getEvents().begin(),
+ d.getPartition().getDisk().getEvents().end());
+ for (std::list<IOEvent>::const_iterator it = events.begin();
+ it != events.end(); ++it)
+ {
+ addEvent(*it);
+ }
+}
+
+} // memfile
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/directory.h b/memfilepersistence/src/vespa/memfilepersistence/device/directory.h
new file mode 100644
index 00000000000..7bd2f7dcd53
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/directory.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::Directory
+ * \ingroup persistence
+ *
+ * \brief Class representing a directory used by Vespa storage.
+ *
+ * IMPORTANT: Directory objects may be generated for faulty directories too,
+ * thus creating the object must not result in a disk operation.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/device/partition.h>
+
+namespace storage {
+
+namespace memfile {
+
+class Directory : public Device {
+ uint16_t _index;
+ std::string _path;
+ Partition::LP _partition;
+
+ // Only DeviceManager can create these objects, so we only need
+ // to cope with these constructors being so similar there.
+ Directory(DeviceManager&, uint16_t index, const std::string& path);
+ Directory(const std::string& serialized, DeviceManager& manager);
+
+ void addEvents(const Directory& d);
+
+ friend class DeviceManager;
+
+public:
+ typedef vespalib::LinkedPtr<Directory> LP;
+ void setIndex(uint16_t index) { _index = index; } // Used when deserializing
+
+ uint16_t getIndex() const { return _index; }
+ const std::string& getPath() const { return _path; }
+ Partition& getPartition() { return *_partition; }
+ const Partition& getPartition() const { return *_partition; }
+
+ const IOEvent* getLastEvent() const;
+ virtual void addEvent(const IOEvent& e);
+ virtual void addEvent(Device::State s,
+ const std::string& description,
+ const std::string& location);
+
+ State getState() const;
+ bool isOk() const { return (getLastEvent() == 0); }
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+ bool operator==(const Directory& d) const { return (_path == d._path); }
+ bool operator!=(const Directory& d) const { return (_path != d._path); }
+
+ // Easy access functions, using the partition monitor to query state of
+ // partition
+
+ /** Query whether partition is full after adding given amount of data. */
+ bool isFull(int64_t afterAdding = 0, double maxFillRate = -1) const {
+ return _partition->getMonitor() == 0
+ || _partition->getMonitor()->isFull(afterAdding, maxFillRate);
+ }
+
+};
+
+} // memfile
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp
new file mode 100644
index 00000000000..4e207d326ed
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/disk.h>
+
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+
+LOG_SETUP(".persistence.device.disk");
+
+namespace storage {
+
+namespace memfile {
+
+Disk::Disk(DeviceManager& manager, uint64_t id)
+ : Device(manager),
+ _id(id)
+{
+}
+
+void Disk::addEvent(const IOEvent& e)
+{
+ if (!e.isGlobal()) {
+ _events.push_back(e);
+ }
+ _manager.notifyDiskEvent(*this, e);
+}
+
+const IOEvent*
+Disk::getLastEvent() const
+{
+ if (getEvents().size() > 0)
+ return &getEvents().back();
+ return 0;
+}
+
+void
+Disk::print(std::ostream& out, bool verbose, const std::string& indent) const
+{
+ out << "Disk id: " << _id << " ";
+ Device::print(out, verbose, indent);
+}
+
+} // memfile
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/disk.h b/memfilepersistence/src/vespa/memfilepersistence/device/disk.h
new file mode 100644
index 00000000000..77549a12470
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/disk.h
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::Disk
+ * \ingroup persistence
+ *
+ * \brief Class representing a storage unit on a node.
+ *
+ * Class representing a storage unit on a node, which can be a physical disk, or
+ * a device set up by a RAID controller or similar.
+ *
+ * IMPORTANT: Disk objects may be generated for faulty disks too, thus creating
+ * the object must not result in a disk operation.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/device/device.h>
+#include <vespa/vespalib/util/linkedptr.h>
+
+namespace storage {
+
+namespace memfile {
+
+class Disk : public Device {
+ uint64_t _id;
+
+ Disk(DeviceManager&, uint64_t id);
+
+ friend class DeviceManager;
+
+public:
+ typedef vespalib::LinkedPtr<Disk> LP;
+
+ uint64_t getId() const { return _id; }
+
+ virtual void addEvent(const IOEvent& e);
+ const IOEvent* getLastEvent() const;
+
+ bool operator==(const Disk& disk) const { return (_id == disk._id); }
+ bool operator!=(const Disk& disk) const { return (_id != disk._id); }
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+};
+
+} // memfile
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp
new file mode 100644
index 00000000000..a85d66d1cbb
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp
@@ -0,0 +1,125 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/memfilepersistence/device/ioevent.h>
+#include <vespa/memfilepersistence/device/device.h>
+#include <cerrno>
+
+namespace storage {
+
+namespace memfile {
+
+IOEvent::IOEvent()
+ : _state(Device::OK),
+ _description(),
+ _location(),
+ _global(false),
+ _timestamp(0)
+{
+}
+
+namespace {
+ vespalib::string stripBacktrace(const vespalib::string& s) {
+ vespalib::string::size_type pos = s.find("Backtrace:");
+ if (pos == vespalib::string::npos) return s;
+ while (pos > 0 && (s[pos - 1] == ' ' || s[pos - 1] == '\n')) {
+ --pos;
+ }
+ return s.substr(0, pos);
+ }
+}
+
+IOEvent::IOEvent(uint32_t timestamp, Device::State s, const vespalib::string& description,
+ const vespalib::string& location, bool global)
+ : _state(s),
+ _description(stripBacktrace(description)),
+ _location(location),
+ _global(global),
+ _timestamp(timestamp)
+{
+}
+
+IOEvent
+IOEvent::createEventFromErrno(uint32_t timestamp,
+ int error, const vespalib::string& extraInfo,
+ const vespalib::string& location)
+{
+ vespalib::string err(vespalib::getErrorString(error));
+ err += ": " + extraInfo;
+ switch (error) {
+ case ENOENT:
+ return IOEvent(timestamp, Device::NOT_FOUND, err, location);
+ case ENOTDIR:
+ case ENAMETOOLONG:
+ case ELOOP:
+ case EISDIR: // Using directory as file
+ case EOPNOTSUPP: // Operation not supported by filesystem
+ case EROFS:
+ case EMLINK:
+ case ENXIO:
+ case ESPIPE: // Descriptor is a pip/socket/fifo
+ return IOEvent(timestamp, Device::PATH_FAILURE, err, location);
+ case EACCES:
+ return IOEvent(timestamp, Device::NO_PERMISSION, err, location);
+ case EIO: // IO error occured.
+ case EINTR: // Read from slow device interrupted before any data.
+ return IOEvent(timestamp, Device::IO_FAILURE, err, location);
+ case EMFILE:
+ return IOEvent(timestamp, Device::TOO_MANY_OPEN_FILES, err,
+ location, true);
+ case EAGAIN: // Non-blocking read but no data available
+ case EBADF: // Invalid file descriptor
+ case EFAULT: // Buffer pointer invalid
+ case EINVAL: // Faulty input parameter
+ case ENFILE:
+ default:
+ return IOEvent(timestamp, Device::INTERNAL_FAILURE, err, location);
+ }
+}
+
+IOEvent
+IOEvent::createEventFromIoException(vespalib::IoException& e, uint32_t timestamp)
+{
+ Device::State type = Device::INTERNAL_FAILURE;
+ switch (e.getType()) {
+ case vespalib::IoException::NOT_FOUND:
+ type = Device::NOT_FOUND; break;
+ case vespalib::IoException::ILLEGAL_PATH:
+ type = Device::PATH_FAILURE; break;
+ case vespalib::IoException::NO_PERMISSION:
+ type = Device::NO_PERMISSION; break;
+ case vespalib::IoException::DISK_PROBLEM:
+ type = Device::IO_FAILURE; break;
+ case vespalib::IoException::TOO_MANY_OPEN_FILES:
+ type = Device::TOO_MANY_OPEN_FILES; break;
+ case vespalib::IoException::INTERNAL_FAILURE:
+ case vespalib::IoException::NO_SPACE:
+ case vespalib::IoException::CORRUPT_DATA:
+ case vespalib::IoException::DIRECTORY_HAVE_CONTENT:
+ case vespalib::IoException::FILE_FULL:
+ case vespalib::IoException::ALREADY_EXISTS:
+ case vespalib::IoException::UNSPECIFIED:
+ type = Device::INTERNAL_FAILURE; break;
+ }
+ return IOEvent(timestamp, type, e.getMessage(), e.getLocation());
+}
+
+void
+IOEvent::print(std::ostream & os, bool verbose, const std::string& indent) const
+{
+ (void) indent;
+ os << "IOEvent(";
+ os << Device::getStateString(_state);
+ if (verbose) {
+ if (_description.size() > 0) {
+ os << ", " << _description;
+ }
+ if (_location.size() > 0) {
+ os << ", " << _location;
+ }
+ os << ", time " << _timestamp;
+ }
+ os << ")";
+}
+
+} // memfile
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h
new file mode 100644
index 00000000000..d30026c9f8c
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::IOEvent
+ * \ingroup persistence
+ *
+ * \brief Class representing an IO event. An event tied to a device.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/device/device.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+namespace storage {
+
+namespace memfile {
+
+class IOEvent : public vespalib::Printable {
+public:
+ IOEvent();
+
+ IOEvent(uint32_t timestamp,
+ Device::State s,
+ const vespalib::string & description,
+ const vespalib::string & location,
+ bool global = false);
+
+ static IOEvent createEventFromErrno(uint32_t timestamp,
+ int error,
+ const vespalib::string& extraInfo = "",
+ const vespalib::string& location = "");
+ static IOEvent createEventFromIoException(vespalib::IoException& e,
+ uint32_t timestamp);
+
+ Device::State getState() const { return _state; }
+ const vespalib::string& getDescription() const { return _description; }
+
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const override;
+
+ /**
+ * Global events aren't tied to device they was found in. They should not
+ * be saved on each device or be a reason to disable one.
+ */
+ bool isGlobal() const { return _global; }
+
+ uint32_t getTimestamp() const { return _timestamp; }
+
+private:
+ Device::State _state;
+ vespalib::string _description;
+ vespalib::string _location;
+ bool _global;
+ uint32_t _timestamp;
+};
+
+class Directory;
+class Partition;
+class Disk;
+
+/**
+ * \class storage::IOEventListener
+ * \ingroup persistence
+ *
+ * \brief Interface to implement if you want IO events. Register at manager.
+ */
+struct IOEventListener {
+ virtual void handleDirectoryEvent(Directory& dir, const IOEvent& e) = 0;
+ virtual void handlePartitionEvent(Partition& part, const IOEvent& e) = 0;
+ virtual void handleDiskEvent(Disk& disk, const IOEvent& e) = 0;
+
+ virtual ~IOEventListener() {}
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp
new file mode 100644
index 00000000000..0f5dbb288f1
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp
@@ -0,0 +1,651 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/mountpointlist.h>
+
+#include <vespa/config/helper/configfetcher.h>
+#include <vespa/vespalib/util/guard.h>
+#include <vespa/vespalib/text/stringtokenizer.h>
+#include <errno.h>
+#include <fstream>
+#include <vespa/log/log.h>
+#include <vespa/persistence/spi/exceptions.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <vespa/memfilepersistence/common/exceptions.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/vdslib/state/nodestate.h>
+#include <vespa/vespalib/util/xmlserializable.h>
+
+LOG_SETUP(".persistence.mountpointlist");
+
+namespace storage {
+
+namespace memfile {
+
+using vespalib::getLastErrorString;
+using vespalib::DirPointer;
+
+MountPointList::MountPointList(const std::string& vdsRoot,
+ const std::vector<vespalib::string>& diskPath,
+ DeviceManager::LP manager)
+ : framework::XmlStatusReporter("mountpointlist", "Disk directories"),
+ _deviceManager(manager),
+ _vdsRoot(vdsRoot),
+ _diskPath(diskPath),
+ _mountPoints(0)
+{
+}
+
+spi::PartitionStateList
+MountPointList::getPartitionStates() const
+{
+ spi::PartitionStateList list(_mountPoints.size());
+ for (uint32_t i=0; i<_mountPoints.size(); ++i) {
+ if (!(_mountPoints[i]->isOk())) {
+ const IOEvent* event = _mountPoints[i]->getLastEvent();
+
+ list[i] = spi::PartitionState(spi::PartitionState::DOWN,
+ event->getDescription());
+ }
+ }
+
+ return list;
+}
+
+void
+MountPointList::init(uint16_t diskCount)
+{
+ initDisks();
+ scanForDisks();
+ readFromFile();
+ if (verifyHealthyDisks(diskCount == 0 ? -1 : diskCount)) {
+ // Initialize monitors after having initialized disks, such as to
+ // not create them for invalid disks.
+ initializePartitionMonitors();
+ }
+ if (diskCount != 0 && _mountPoints.size() != diskCount) {
+ std::ostringstream ost;
+ ost << _mountPoints.size()
+ << " mount points found. Expected " << diskCount
+ << " mount points to exist.";
+ LOG(error, "%s", ost.str().c_str());
+ throw config::InvalidConfigException(ost.str(), VESPA_STRLOC);
+ }
+}
+
+void
+MountPointList::initDisks()
+{
+ if (_diskPath.empty()) return;
+
+ using vespalib::make_string;
+
+ vespalib::string vdsDisksPath = make_string("%s/disks", _vdsRoot.c_str());
+ vespalib::mkdir(vdsDisksPath);
+
+ for (size_t diskIndex = 0; diskIndex < _diskPath.size(); ++diskIndex) {
+ auto disk_path = make_string(
+ "%s/d%zu", vdsDisksPath.c_str(), diskIndex);
+ if (pathExists(disk_path)) continue;
+
+ vespalib::mkdir(_diskPath[diskIndex]);
+
+ try {
+ vespalib::symlink(_diskPath[diskIndex], disk_path);
+ } catch (vespalib::IoException& dummy) {
+ // The above mkdir() created disk_path as a directory, or a
+ // subdirectory of disk_path, which is OK.
+ (void) dummy;
+ }
+ }
+}
+
+void
+MountPointList::initializePartitionMonitors()
+{
+ std::set<Partition*> seen;
+ for (uint32_t i=0; i<_mountPoints.size(); ++i) {
+ if (!(_mountPoints[i]->isOk())) continue;
+ Partition* part = &_mountPoints[i]->getPartition();
+ std::set<Partition*>::const_iterator it(seen.find(part));
+ if (it == seen.end()) {
+ part->initializeMonitor();
+ seen.insert(part);
+ }
+ }
+}
+
+void
+MountPointList::scanForDisks()
+{
+ _mountPoints.clear();
+ std::vector<Directory::LP> entries;
+ DirPointer dir(opendir((_vdsRoot + "/disks").c_str()));
+ struct dirent* entry;
+ if (dir) while ((entry = readdir(dir))) {
+ if (entry == 0) {
+ std::ostringstream ost;
+ ost << "Failed to read directory \"" << _vdsRoot << "/disks\", "
+ << "errno " << errno << ": " << getLastErrorString();
+ throw vespalib::IoException(ost.str(),
+ vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC);
+ }
+ std::string name(reinterpret_cast<char*>(&entry->d_name));
+ assert(name.size() > 0);
+ if (name[0] == '.') continue;
+ // To be a valid d<digit> name, size must be at least 2
+ if (name.size() < 2 || name[0] != 'd') {
+ LOG(warning, "File %s in disks directory is faulty named for a "
+ "disk directory, ignoring it.", name.c_str());
+ continue;
+ }
+ char* endp;
+ uint32_t diskNr = strtoul(name.c_str()+1, &endp, 10);
+ // If rest of name is not a number, ignore
+ if (*endp != '\0') {
+ LOG(warning, "File %s in disks directory is faulty named for a "
+ "disk directory, ignoring it.", name.c_str());
+ continue;
+ }
+ // If number is out of range, ignore..
+ if (diskNr >= 254) {
+ LOG(warning, "Ignoring disk directory %s, as max directories have "
+ "been set to 254.", name.c_str());
+ continue;
+ }
+
+ // Valid disk directory.. Add entry..
+ if (entries.size() <= diskNr) {
+ entries.resize(diskNr + 1);
+ }
+ LOG(debug, "Found disk directory %u: %s", diskNr, name.c_str());
+ entries[diskNr] = _deviceManager->getDirectory(
+ _vdsRoot + "/disks/" + name, diskNr);
+
+ // We only care about directories (or symlinks). DT_UNKNOWN must be handled explicitly.
+ if (entry->d_type != DT_DIR && entry->d_type != DT_LNK && entry->d_type != DT_UNKNOWN) {
+ std::ostringstream ost;
+ ost << "File " << name << " in disks directory is not a directory.";
+ LOG(warning, "%s", ost.str().c_str());
+ entries[diskNr]->addEvent(Device::PATH_FAILURE,
+ ost.str(), VESPA_STRLOC);
+ }
+
+ // Not all filesystems support d_type. Have to stat if this equals DT_UNKNOWN.
+ if (entry->d_type == DT_UNKNOWN) {
+ struct stat st;
+ lstat(entries[diskNr]->getPath().c_str(), &st);
+ if (!S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ std::ostringstream ost;
+ ost << "File " << name << " in disks directory is not a directory.";
+ LOG(warning, "%s", ost.str().c_str());
+ entries[diskNr]->addEvent(Device::PATH_FAILURE,
+ ost.str(), VESPA_STRLOC);
+ }
+ }
+ } else if (errno == ENOENT) {
+ std::ostringstream ost;
+ ost << "Disk directory \"" << _vdsRoot << "/disks\" not created. VDS "
+ << "needs this to know which disks to use. See vespa doc.";
+ throw NoDisksException(ost.str(), VESPA_STRLOC);
+ } else {
+ std::ostringstream ost;
+ ost << "Failed to open directory \"" << _vdsRoot << "/disks\", errno "
+ << errno << ": " << getLastErrorString();
+ throw vespalib::IoException(ost.str(),
+ vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC);
+ }
+ // Assign found disks to the instance
+ _mountPoints.resize(entries.size());
+ for (uint32_t i=0; i<_mountPoints.size(); ++i) {
+ if (!entries[i].get()) {
+ if (!_mountPoints[i].get() ||
+ _mountPoints[i]->getState() == Device::OK)
+ {
+ std::ostringstream ost;
+ ost << _vdsRoot + "/disks/d" << i;
+ _mountPoints[i] = _deviceManager->getDirectory(ost.str(), i);
+ _mountPoints[i]->addEvent(
+ Device::NOT_FOUND,
+ "Disk not found during scanning of disks directory",
+ VESPA_STRLOC);
+ }
+ LOG(warning, "Disk %u was not found.", i);
+ } else if (!_mountPoints[i].get() ||
+ _mountPoints[i]->getState() == Device::NOT_FOUND)
+ {
+ _mountPoints[i] = entries[i];
+ }
+ }
+}
+
+namespace {
+ /**
+ * Get the disk nr of the given mountpoint,
+ * or -1 if the mountpoint is illegal.
+ */
+ int getDiskNr(const std::string& mountPoint) {
+ std::string::size_type pos1 = mountPoint.rfind('/');
+ if (pos1 == std::string::npos ||
+ pos1 + 2 >= mountPoint.size() ||
+ mountPoint[pos1+1] != 'd')
+ {
+ return -1;
+ }
+ char* endp;
+ std::string digit(mountPoint.substr(pos1+2));
+ const char* digitptr = digit.c_str();
+ int diskNr = strtoul(digitptr, &endp, 10);
+ if (digitptr[0] == '\0' || *endp != '\0') return -1;
+ return diskNr;
+ }
+}
+
+void
+MountPointList::readFromFile()
+{
+ std::vector<Directory::LP> entries;
+ // Read entries from disk
+ std::ifstream is;
+ // Throw exception if failing to read file
+ is.exceptions(std::ifstream::badbit);
+ is.open(getDiskStatusFileName().c_str());
+ std::string line("EOF");
+ while (std::getline(is, line)) {
+ if (line == "EOF") { break; }
+ Directory::LP dir = _deviceManager->deserializeDirectory(line);
+ int diskNr = getDiskNr(dir->getPath());
+ if (diskNr == -1) {
+ LOG(warning, "Found illegal disk entry '%s' in vds disk file %s.",
+ line.c_str(), getDiskStatusFileName().c_str());
+ } else {
+ dir->setIndex(diskNr);
+ if (entries.size() <= static_cast<uint32_t>(diskNr)) {
+ entries.resize(diskNr + 1);
+ }
+ entries[diskNr] = dir;
+ }
+ }
+ if (line != "EOF" || std::getline(is, line)) {
+ LOG(warning, "Disk status file %s did not end in EOF.",
+ getDiskStatusFileName().c_str());
+ }
+ // Assign entries to this instance
+ if (_mountPoints.size() < entries.size()) {
+ _mountPoints.resize(entries.size());
+ }
+ for (uint32_t i=0; i<entries.size(); ++i) {
+ if (entries[i].get() &&
+ entries[i]->getState() != Device::OK &&
+ entries[i]->getState() != Device::NOT_FOUND)
+ {
+ _mountPoints[i] = entries[i];
+ }
+ }
+}
+
+void
+MountPointList::writeToFile() const
+{
+ try{
+ std::string filename(getDiskStatusFileName());
+ std::string tmpFilename(filename + ".tmp");
+ std::ofstream os(tmpFilename.c_str());
+ if (os.fail()) {
+ LOG(warning, "Failed to open %s.tmp for writing. Not writing "
+ "disks.status file.", filename.c_str());
+ return;
+ }
+ for (std::vector<Directory::LP>::const_iterator it
+ = _mountPoints.begin(); it != _mountPoints.end(); ++it)
+ {
+ if (it->get() &&
+ (*it)->getState() != Device::OK)
+ {
+ os << **it << "\n";
+ }
+ }
+ os << "EOF";
+ os.close();
+ if (os.fail()) {
+ LOG(warning, "Failed to write %s.tmp. disks.status file might now "
+ "be corrupt as we failed while writing it.",
+ filename.c_str());
+ return;
+ }
+ vespalib::rename(tmpFilename, filename, false, false);
+ LOG(debug, "Mount point list saved to file %s.", filename.c_str());
+ } catch (std::exception& e) {
+ LOG(warning, "Failed to write disk status file: %s", e.what());
+ }
+}
+
+namespace {
+ void testMountPoint(Directory& mountPoint) {
+ struct stat filestats;
+ if (stat(mountPoint.getPath().c_str(), &filestats) != 0) {
+ switch (errno) {
+ case ENOTDIR:
+ case ENAMETOOLONG:
+ case ENOENT:
+ case EACCES:
+ case ELOOP:
+ {
+ mountPoint.addEvent(Device::PATH_FAILURE,
+ getLastErrorString(),
+ VESPA_STRLOC);
+ return;
+ }
+ case EIO:
+ {
+ mountPoint.addEvent(Device::IO_FAILURE,
+ getLastErrorString(), VESPA_STRLOC);
+ return;
+ }
+ case EFAULT:
+ default:
+ assert(0); // Should never happen
+ }
+ }
+ // At this point we know the mount point exists..
+ if (!(S_ISDIR(filestats.st_mode))) {
+ mountPoint.addEvent(
+ Device::PATH_FAILURE,
+ "The path exist, but is not a directory.",
+ VESPA_STRLOC);
+ }
+ }
+
+ struct Chunk {
+ uint32_t nr;
+ uint32_t total;
+
+ Chunk() : nr(0), total(0) {} // Invalid
+ bool valid() const { return (nr < total); }
+ };
+
+ Chunk getChunkDef(const std::string& mountPoint) {
+ vespalib::File file(mountPoint + "/chunkinfo");
+ file.open(vespalib::File::READONLY);
+ std::string buffer;
+ buffer.resize(200, '\0');
+ size_t read(file.read(&buffer[0], buffer.size(), 0));
+ buffer.resize(read);
+ vespalib::StringTokenizer tokenizer(buffer, "\n", "");
+
+ Chunk chunk;
+ if (tokenizer.size() < 3) {
+ return chunk;
+ }
+
+ char *c;
+ chunk.nr = strtoul(tokenizer[1].c_str(), &c, 10);
+ if (tokenizer[1].c_str() + tokenizer[1].size() != c) return Chunk();
+ chunk.total = strtoul(tokenizer[2].c_str(), &c, 10);
+ if (tokenizer[2].c_str() + tokenizer[2].size() != c) return Chunk();
+ return chunk;
+ }
+
+ void writeChunkDef(Chunk c, const std::string& mountPoint) {
+ vespalib::File file(mountPoint + "/chunkinfo");
+ file.open(vespalib::File::CREATE | vespalib::File::TRUNC, true);
+ std::ostringstream ost;
+ ost << "# This file tells VDS what data this mountpoint may contain.\n"
+ << c.nr << "\n"
+ << c.total << "\n";
+ std::string content(ost.str());
+ file.write(&content[0], content.size(), 0);
+ }
+
+ Device::State getDeviceState(vespalib::IoException::Type type) {
+ using vespalib::IoException;
+ switch (type) {
+ case IoException::ILLEGAL_PATH: return Device::PATH_FAILURE;
+ case IoException::NO_PERMISSION: return Device::NO_PERMISSION;
+ case IoException::DISK_PROBLEM: return Device::IO_FAILURE;
+ case IoException::INTERNAL_FAILURE: return Device::INTERNAL_FAILURE;
+ default: ;
+ }
+ return Device::OK;
+ }
+
+ bool emptyDir(Directory& dir) {
+ const std::string& path(dir.getPath());
+ errno = 0;
+ DirPointer dirdesc(opendir(path.c_str()));
+ struct dirent* entry;
+ if (dirdesc) while ((entry = readdir(dirdesc))) {
+ if (errno) break;
+ std::string name(reinterpret_cast<char*>(&entry->d_name));
+ if (name == "." || name == "..") continue;
+ return false;
+ }
+ if (dirdesc == 0 || errno) {
+ std::ostringstream ost;
+ ost << "Failed to read directory \"" << path << "\", "
+ << "errno " << errno << ": " << getLastErrorString();
+ dir.addEvent(getDeviceState(vespalib::IoException::getErrorType(errno)),
+ ost.str(),
+ VESPA_STRLOC);
+ throw vespalib::IoException(ost.str(),
+ vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC);
+ }
+ return true;
+ }
+
+ struct WriteStatusFileIfFailing {
+ MountPointList& _list;
+ bool _failed;
+
+ WriteStatusFileIfFailing(MountPointList& list)
+ : _list(list), _failed(false) {}
+ ~WriteStatusFileIfFailing() {
+ if (_failed) _list.writeToFile();
+ }
+
+ void reportFailure() { _failed = true; }
+ };
+}
+
+bool
+MountPointList::verifyHealthyDisks(int mountPointCount)
+{
+ WriteStatusFileIfFailing statusWriter(*this);
+ int usable = 0, empty = 0;
+ std::map<uint32_t, Directory::LP> lackingChunkDef;
+ // Test disks and get chunkinfo
+ for (uint32_t i=0, n=_mountPoints.size(); i<n; ++i) {
+ Directory::LP dir(_mountPoints[i]);
+ // Insert NOT_FOUND disk if not found, such that operator[]
+ // can return only valid pointers
+ if (!dir.get()) {
+ std::ostringstream ost;
+ ost << _vdsRoot + "/disks/d" << i;
+ dir = _deviceManager->getDirectory(ost.str(), i);
+ dir->addEvent(Device::NOT_FOUND,
+ "Disk not found during scanning of disks directory",
+ VESPA_STRLOC);
+ _mountPoints[i] = dir;
+ statusWriter.reportFailure();
+ }
+ if (dir->isOk()) {
+ testMountPoint(*dir);
+ if (!dir->isOk()) statusWriter.reportFailure();
+ }
+ // Don't touch unhealthy or non-existing disks.
+ if (!dir->isOk()) {
+ std::ostringstream ost;
+ ost << "Not using disk " << i << " marked bad: ";
+ dir->getLastEvent()->print(ost, true, " ");
+ LOG(warning, "%s", ost.str().c_str());
+ continue;
+ }
+
+ // Read chunkinfo
+ using vespalib::IoException;
+ Chunk chunk;
+ try{
+ chunk = getChunkDef(dir->getPath());
+ } catch (IoException& e) {
+ chunk = Chunk();
+ if (e.getType() == IoException::NOT_FOUND) {
+ if (!emptyDir(*dir)) {
+ dir->addEvent(Device::INTERNAL_FAILURE,
+ "Foreign data in mountpoint. New "
+ "mountpoints added should be empty.", "");
+ }
+ } else {
+ LOG(warning, "Failed to read chunkinfo file from mountpoint %s",
+ dir->getPath().c_str());
+ Device::State newState(getDeviceState(e.getType()));
+ if (newState != Device::OK) {
+ dir->addEvent(newState, e.what(), VESPA_STRLOC);
+ }
+ }
+ } catch (std::exception& e) {
+ LOG(warning, "Failed to read chunkinfo file from mountpoint %s",
+ dir->getPath().c_str());
+ dir->addEvent(Device::INTERNAL_FAILURE, e.what(), VESPA_STRLOC);
+ }
+
+ // If disk was found unusable, don't use it.
+ if (!dir->isOk()) {
+ LOG(warning, "Unusable disk %d: %s",
+ i, dir->getLastEvent()->toString(true).c_str());
+ statusWriter.reportFailure();
+ continue;
+ }
+ ++usable;
+ // Ensure disk fits in with the already detected ones.
+ if (!chunk.valid()) {
+ ++empty;
+ lackingChunkDef[i] = dir;
+ } else if (chunk.nr != i) {
+ std::ostringstream ost;
+ ost << "Disk " << dir->getPath() << " thinks it's disk " << chunk.nr
+ << " (instead of " << i << ").";
+ LOG(error, "%s", ost.str().c_str());
+ throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC);
+ } else if (mountPointCount == -1) {
+ mountPointCount = chunk.total;
+ } else if (static_cast<uint32_t>(mountPointCount) != chunk.total) {
+ std::ostringstream ost;
+ ost << "Disk " << dir->getPath() << " thinks it's disk " << chunk.nr
+ << " of " << chunk.total << " (instead of " << i << " of "
+ << mountPointCount << ").";
+ LOG(error, "%s", ost.str().c_str());
+ throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC);
+ }
+ }
+ if (empty == usable && usable != mountPointCount && mountPointCount != -1) {
+ std::ostringstream ost;
+ ost << "Found " << usable << " disks and config says we're "
+ << "supposed to have " << mountPointCount << ". Not initializing "
+ << "disks.";
+ throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC);
+ }
+ bool retval = true;
+ // Handle case where no chunkinfo file present (none/unusable/new disks)
+ if (mountPointCount == -1) {
+ if (_mountPoints.size() == 0) {
+ LOG(error, "No disks configured for storage node. Disk "
+ "directories/symlinks for this node should be created "
+ "in %s/disks/. Please refer to VDS documentation to "
+ "learn how to add disks", _vdsRoot.c_str());
+ throw spi::HandledException("No disks configured", VESPA_STRLOC);
+ } else if (usable == 0) {
+ LOG(error, "All of the configured disks are unusable. "
+ "Please refer to previous warnings and the VDS "
+ "documentation for troubleshooting");
+ throw spi::HandledException("All disks unusable", VESPA_STRLOC);
+ } else {
+ mountPointCount = _mountPoints.size();
+ LOG(info, "All disks empty. Setting up node to run with the %u "
+ "found disks.", mountPointCount);
+ retval = false;
+ }
+ }
+ // Write chunkdef files where these are missing
+ for (std::map<uint32_t, Directory::LP>::const_iterator it
+ = lackingChunkDef.begin(); it != lackingChunkDef.end(); ++it)
+ {
+ const Directory::LP& dir = it->second;
+ Chunk c;
+ c.nr = it->first;
+ c.total = mountPointCount;
+ if (c.nr >= c.total) {
+ LOG(warning, "Can't use disk %u of %u as the index is too high. "
+ "(Disks are indexed from zero)", c.nr, c.total);
+ continue;
+ }
+ if (!emptyDir(*dir)) {
+ LOG(warning, "Not creating chunkinfo file on disk %u as it already "
+ "contains data. If you want to include the disk, "
+ "create chunkinfo file manually.", c.nr);
+ assert(!dir->isOk());
+ continue;
+ }
+ using vespalib::IoException;
+ try{
+ writeChunkDef(c, dir->getPath());
+ retval = true;
+ } catch (IoException& e) {
+ statusWriter.reportFailure();
+ LOG(warning, "Failed to write chunkinfo file to mountpoint %s.",
+ dir->getPath().c_str());
+ Device::State newState(getDeviceState(e.getType()));
+ if (newState != Device::OK) {
+ dir->addEvent(newState, e.what(), VESPA_STRLOC);
+ }
+ } catch (std::exception& e) {
+ statusWriter.reportFailure();
+ LOG(warning, "Failed to write chunkinfo file to mountpoint %s",
+ dir->getPath().c_str());
+ dir->addEvent(Device::INTERNAL_FAILURE, e.what(), VESPA_STRLOC);
+ }
+ }
+ // If we need more entries in mountpointlist, due to chunkinfo
+ // showing more indexes, add them.
+ for (int i = _mountPoints.size(); i < mountPointCount; ++i) {
+ std::ostringstream ost;
+ ost << _vdsRoot + "/disks/d" << i;
+ Directory::LP dir(_deviceManager->getDirectory(ost.str(), i));
+ dir->addEvent(Device::NOT_FOUND,
+ "Disk not found during scanning of disks directory",
+ VESPA_STRLOC);
+ _mountPoints.push_back(dir);
+ }
+ if (static_cast<int>(_mountPoints.size()) > mountPointCount) {
+ _mountPoints.resize(mountPointCount);
+ }
+ return retval;
+}
+
+uint16_t
+MountPointList::findIndex(const Directory& dir) const
+{
+ for (uint16_t i = 0; i < _mountPoints.size(); ++i) {
+ if (_mountPoints[i].get() != 0 && dir == *_mountPoints[i]) return i;
+ }
+ throw vespalib::IllegalArgumentException(
+ "Could not find directory " + dir.toString(), VESPA_STRLOC);
+}
+
+std::string
+MountPointList::getDiskStatusFileName() const
+{
+ return _vdsRoot + "/disks.status";
+}
+
+vespalib::string
+MountPointList::reportXmlStatus(vespalib::xml::XmlOutputStream& xos,
+ const framework::HttpUrlPath&) const
+{
+ xos << *_deviceManager;
+ return "";
+}
+
+}
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h
new file mode 100644
index 00000000000..33a9574682a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h
@@ -0,0 +1,138 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::MountPointList
+ * \ingroup persistence
+ *
+ * \brief Class holding information about the mount points used by storage
+ *
+ * We need to keep a list of mount points, to read and write the mount point
+ * file, and to access what mount points should be used and not.
+ *
+ * NOTE: A mountpoint is often referred to as a disk, even though you technicly
+ * can have multiple mountpoints per partition and multiple partitions per disk.
+ *
+ * IMPORTANT: Remember to call verifyHealthyDisks() before starting to use them.
+ */
+
+#pragma once
+
+#include <vespa/vespalib/util/printable.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/memfilepersistence/device/directory.h>
+#include <vespa/storageframework/storageframework.h>
+#include <vector>
+#include <vespa/vespalib/util/linkedptr.h>
+#include <vespa/persistence/spi/persistenceprovider.h>
+
+namespace storage {
+namespace lib {
+ class NodeState;
+}
+
+namespace memfile {
+
+struct MountPointList : public framework::XmlStatusReporter {
+ typedef std::unique_ptr<MountPointList> UP;
+
+ /** Create a mount point list. */
+ MountPointList(const std::string& vdsRoot,
+ const std::vector<vespalib::string>& diskPath,
+ vespalib::LinkedPtr<DeviceManager>);
+
+ DeviceManager& getDeviceManager() { return *_deviceManager; }
+
+ /**
+ * Call init to initialize the mount point list in the regular fashion.
+ * @param diskCount Number of disks to find, or 0 to auto-detect.
+ * @return The number of usable disks found.
+ */
+ void init(uint16_t diskCount);
+
+ /**
+ * Initialize the disks, see description of diskPath config in
+ * stor-devices. Will be called as part of init().
+ */
+ void initDisks();
+
+ /**
+ * Scan disks directory for disks. Add entries found, which does not exist,
+ * or are marked NOT_FOUND to this instance.
+ *
+ * To prevent reading from possible bad disks, we cannot access the disks
+ * themselves. Thus, in case of symlinks, it assumes the symlink is to a
+ * directory.
+ */
+ void scanForDisks();
+
+ /**
+ * Read the disk status file and adjust the list.
+ * Important that any entry marking a disk bad (except for NOT_FOUND if it
+ * should be in the file) overrides any disks marked ok in this instance.
+ *
+ * Similarily to scanForDisks(), this does not access the disks itself.
+ */
+ void readFromFile();
+
+ /**
+ * Initialize the partition monitors within the partitions. Done after
+ * partition creation, as partition objects are generated for bad disks.
+ */
+ void initializePartitionMonitors();
+
+ /**
+ * Write the current state of disks to the disk status file.
+ * Disks that are OK or NOT_FOUND does not need to be written to file.
+ */
+ void writeToFile() const;
+
+ /**
+ * Go through all the mountpoints marked ok, and check that they work.
+ * <ul>
+ * <li> Verify that symlinks point to a directory, not a file.
+ * <li> Read disk chunk files, stating mountpoint is number A/N.
+ * <li> Write disk chunk files on mountpoints missing these.
+ *
+ * IMPORTANT: This must be called before starting to use the disks.
+ * getSize() may not return correct size before this has been called.
+ *
+ * @return True if there are at least one mountpoint appearing healthy.
+ * @throws document::IllegalStateException If the mountpoint chunk files
+ * disagree on how many mountpoints there are.
+ */
+ bool verifyHealthyDisks(int mountPointCount);
+
+ /** Get how many mountpoints exist. */
+ uint32_t getSize() const { return _mountPoints.size(); }
+
+ /** Get the given mountpoint. */
+ Directory& operator[](uint16_t i)
+ { assert(_mountPoints.size() > i); return *_mountPoints[i]; }
+ const Directory& operator[](uint16_t i) const
+ { assert(_mountPoints.size() > i); return *_mountPoints[i]; }
+
+ uint16_t findIndex(const Directory& dir) const;
+
+ // XmlStatusReporter implementation
+ vespalib::string reportXmlStatus(vespalib::xml::XmlOutputStream&,
+ const framework::HttpUrlPath&) const;
+
+ /**
+ * Returns the current state of the mountpoints.
+ */
+ spi::PartitionStateList getPartitionStates() const;
+
+private:
+ vespalib::LinkedPtr<DeviceManager> _deviceManager;
+ std::string _vdsRoot;
+ std::vector<vespalib::string> _diskPath;
+ std::vector<Directory::LP> _mountPoints;
+
+ /** Get the name used for the disk status file. */
+ std::string getDiskStatusFileName() const;
+};
+
+} // memfile
+
+} // storage
+
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp
new file mode 100644
index 00000000000..2829ae8a212
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp
@@ -0,0 +1,66 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/partition.h>
+
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+LOG_SETUP(".persistence.device.partition");
+
+namespace storage {
+
+namespace memfile {
+
+Partition::Partition(DeviceManager& manager,
+ uint64_t id,
+ const std::string& mountPoint)
+ : Device(manager),
+ _id(id),
+ _mountPoint(mountPoint),
+ _disk(manager.getDisk(mountPoint)),
+ _monitor()
+{
+ assert(_disk.get());
+}
+
+void Partition::initializeMonitor()
+{
+ try{
+ _monitor.reset(new PartitionMonitor(_mountPoint));
+ _monitor->setPolicy(_manager.getStatPolicy(), _manager.getStatPeriod());
+ } catch (vespalib::IoException& e) {
+ std::ostringstream error;
+ error << "Failed to create partition monitor for partition "
+ << _mountPoint << ": " << e.getMessage();
+ LOG(warning, "%s", error.str().c_str());
+ addEvent(IOEvent(_manager.getClock().getTimeInSeconds().getTime(),
+ Device::IO_FAILURE, error.str(), VESPA_STRLOC));
+ }
+}
+
+void Partition::addEvent(const IOEvent& e)
+{
+ // No events yet defined that is partition specific
+ _disk->addEvent(e);
+}
+
+const IOEvent*
+Partition::getLastEvent() const
+{
+ if (!_events.empty()) return &_events.back();
+ return _disk->getLastEvent();
+}
+
+void
+Partition::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ out << "Partition: " << _id << " " << _mountPoint << " ";
+ Device::print(out, verbose, indent);
+}
+
+} // memfile
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partition.h b/memfilepersistence/src/vespa/memfilepersistence/device/partition.h
new file mode 100644
index 00000000000..eeedafb7a49
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/partition.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::Partition
+ * \ingroup persistence
+ *
+ * \brief Class representing a disk partition.
+ *
+ * IMPORTANT: Partition objects may be generated for faulty partitions too,
+ * thus creating the object must not result in a disk operation.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/device/disk.h>
+#include <vespa/memfilepersistence/device/partitionmonitor.h>
+
+namespace storage {
+
+namespace memfile {
+
+class Partition : public Device {
+ uint64_t _id;
+ std::string _mountPoint;
+ Disk::LP _disk;
+ PartitionMonitor::LP _monitor;
+
+ Partition(DeviceManager& manager, uint64_t id,
+ const std::string& mountPoint);
+
+ friend class DeviceManager;
+
+public:
+ typedef vespalib::LinkedPtr<Partition> LP;
+
+ void initializeMonitor();
+
+ uint64_t getId() const { return _id; }
+ const std::string& getMountPoint() const { return _mountPoint; }
+
+ Disk& getDisk() { return *_disk; }
+ const Disk& getDisk() const { return *_disk; }
+
+ PartitionMonitor* getMonitor() { return _monitor.get(); }
+ const PartitionMonitor* getMonitor() const { return _monitor.get(); }
+
+ virtual void addEvent(const IOEvent& e);
+ const IOEvent* getLastEvent() const;
+
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+ bool operator==(const Partition& p) const { return (_id == p._id); }
+ bool operator!=(const Partition& p) const { return (_id != p._id); }
+
+};
+
+} // memfile
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp
new file mode 100644
index 00000000000..db1e61bc24e
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp
@@ -0,0 +1,392 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/device/partitionmonitor.h>
+
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+LOG_SETUP(".persistence.device.partition.monitor");
+
+namespace storage {
+
+namespace memfile {
+
+namespace {
+
+ uint32_t getBlockSize(struct statvfs& info) {
+ // f_bsize have a strange name in man page, but as far as we've seen
+ // on actual file systems, it seems to correspond to block size.
+ return info.f_bsize;
+ }
+
+ float calcRootOnlyRatio(struct statvfs& info) {
+ return (static_cast<uint64_t>(info.f_bfree)
+ - static_cast<uint64_t>(info.f_bavail))
+ / info.f_blocks;
+ }
+
+ struct RealStatter : public PartitionMonitor::Statter {
+ virtual void statFileSystem(const std::string& file,
+ struct statvfs& info)
+ {
+ if (statvfs(file.c_str(), &info) != 0) {
+ vespalib::asciistream ost;
+ ost << "Failed to run statvfs to find data on disk containing "
+ << "file " << file << ": errno(" << errno << ") - "
+ << vespalib::getLastErrorString() << ".";
+ throw vespalib::IoException(
+ ost.str(), vespalib::IoException::getErrorType(errno),
+ VESPA_STRLOC);
+ }
+ }
+ };
+
+}
+
+uint64_t
+PartitionMonitor::calcTotalSpace(struct statvfs& info) const {
+ // Ignore the part of the filesystem only root can write to.
+ uint64_t nonRootBlocksExisting(
+ static_cast<uint64_t>(info.f_blocks)
+ - static_cast<uint64_t>(info.f_bfree)
+ + static_cast<uint64_t>(info.f_bavail));
+ return nonRootBlocksExisting * _blockSize;
+}
+
+uint64_t
+PartitionMonitor::calcUsedSpace(struct statvfs& info) const {
+ return (_partitionSize - info.f_bavail * _blockSize);
+}
+
+float
+PartitionMonitor::calcInodeFillRatio(struct statvfs& info) const {
+ uint64_t freeForRootOnly = info.f_ffree - info.f_favail;
+ uint64_t nonRootInodes = info.f_files - freeForRootOnly;
+ float freeInodesRatio = static_cast<float>(info.f_favail) / nonRootInodes;
+ return float(1.0) - freeInodesRatio;
+}
+
+uint64_t
+PartitionMonitor::calcDynamicPeriod() const
+{
+ uint32_t lastFillRate = (100 * _usedSpace / _partitionSize);
+ uint32_t maxFillRate = static_cast<uint32_t>(100 * _maxFillRate);
+ if (lastFillRate >= maxFillRate) {
+ return 1;
+ } else {
+ uint32_t fillDiff = (maxFillRate - lastFillRate);
+ return _period * fillDiff * fillDiff;
+ }
+}
+
+PartitionMonitor::PartitionMonitor(const std::string& file)
+ : _fileOnPartition(file),
+ _fileSystemId(0),
+ _policy(STAT_PERIOD),
+ _blockSize(0),
+ _partitionSize(0),
+ _usedSpace(0),
+ _period(100),
+ _queriesSinceStat(0),
+ _maxFillRate(0.98),
+ _rootOnlyRatio(0),
+ _inodeFillRate(0),
+ _statter()
+{
+ setStatter(std::unique_ptr<Statter>(new RealStatter));
+ LOG(debug, "%s: Monitor created with default setting of period at 100.",
+ _fileOnPartition.c_str());
+}
+
+void
+PartitionMonitor::setPolicy(vespa::config::storage::StorDevicesConfig::StatfsPolicy policy,
+ uint32_t period)
+{
+ switch (policy) {
+ case vespa::config::storage::StorDevicesConfig::STAT_ALWAYS:
+ setAlwaysStatPolicy(); break;
+ case vespa::config::storage::StorDevicesConfig::STAT_ONCE:
+ setStatOncePolicy(); break;
+ case vespa::config::storage::StorDevicesConfig::STAT_PERIOD:
+ if (period == 0) {
+ setStatPeriodPolicy();
+ } else {
+ setStatPeriodPolicy(period);
+ }
+ break;
+ case vespa::config::storage::StorDevicesConfig::STAT_DYNAMIC:
+ if (period == 0) {
+ setStatDynamicPolicy();
+ } else {
+ setStatDynamicPolicy(period);
+ }
+ break;
+ }
+}
+
+void
+PartitionMonitor::setAlwaysStatPolicy()
+{
+ _policy = ALWAYS_STAT;
+ LOG(debug, "%s: Set stat policy to always stat.", _fileOnPartition.c_str());
+}
+
+void
+PartitionMonitor::setStatOncePolicy()
+{
+ _policy = STAT_ONCE;
+ LOG(debug, "%s: Set stat policy to stat once.", _fileOnPartition.c_str());
+}
+
+void
+PartitionMonitor::setStatPeriodPolicy(uint32_t period)
+{
+ _policy = STAT_PERIOD;
+ _period = period;
+ LOG(debug, "%s: Set stat policy to stat every %u attempt.",
+ _fileOnPartition.c_str(), _period);
+}
+
+void
+PartitionMonitor::setStatDynamicPolicy(uint32_t basePeriod)
+{
+ _policy = STAT_DYNAMIC;
+ _period = basePeriod;
+ LOG(debug, "%s: Set stat policy to stat dynamicly with base %u.",
+ _fileOnPartition.c_str(), _period);
+}
+
+void
+PartitionMonitor::setStatter(std::unique_ptr<Statter> statter)
+{
+ vespalib::LockGuard lock(_updateLock);
+ _statter = std::move(statter);
+ struct statvfs info;
+ _statter->statFileSystem(_fileOnPartition, info);
+ _blockSize = getBlockSize(info);
+ _partitionSize = calcTotalSpace(info);
+ // Calculations further down assumes total size can be held within
+ // a signed 64 bit.
+ assert(_partitionSize
+ < static_cast<uint64_t>(std::numeric_limits<int64_t>::max()));
+ _usedSpace = calcUsedSpace(info);
+ _rootOnlyRatio = calcRootOnlyRatio(info);
+ _inodeFillRate = calcInodeFillRatio(info);
+ _fileSystemId = info.f_fsid;
+ LOG(debug, "FileSystem(%s): Total size: %" PRIu64 ", used: %" PRIu64
+ ", root only %f, max fill rate %f, fill rate %f.",
+ _fileOnPartition.c_str(),
+ _partitionSize,
+ _usedSpace,
+ _rootOnlyRatio,
+ _maxFillRate,
+ static_cast<double>(_usedSpace) / _partitionSize);
+}
+
+void
+PartitionMonitor::updateIfNeeded() const
+{
+ uint32_t period = 0;
+ switch (_policy) {
+ case STAT_ONCE: period = std::numeric_limits<uint32_t>::max(); break;
+ case ALWAYS_STAT: period = 1; break;
+ case STAT_PERIOD: period = _period; break;
+ case STAT_DYNAMIC: period = calcDynamicPeriod(); break;
+ }
+ if (++_queriesSinceStat >= period) {
+ struct statvfs info;
+ try{
+ _statter->statFileSystem(_fileOnPartition, info);
+ _usedSpace = calcUsedSpace(info);
+ _inodeFillRate = calcInodeFillRatio(info);
+ _queriesSinceStat = 0;
+ } catch (vespalib::Exception& e) {
+ LOG(warning, "Failed to stat filesystem with file %s. Using "
+ "last stored used space of %" PRIu64 ".",
+ _fileOnPartition.c_str(), _usedSpace);
+ }
+ }
+}
+uint64_t
+PartitionMonitor::getUsedSpace() const
+{
+ vespalib::LockGuard lock(_updateLock);
+ updateIfNeeded();
+ return _usedSpace;
+}
+
+float
+PartitionMonitor::getFillRate(int64_t afterAdding) const
+{
+ vespalib::LockGuard lock(_updateLock);
+ updateIfNeeded();
+ float fillRate;
+ if (static_cast<int64_t>(_usedSpace) + afterAdding
+ >= static_cast<int64_t>(_partitionSize))
+ {
+ fillRate = 1;
+ } else if (static_cast<int64_t>(_usedSpace) + afterAdding < 0) {
+ fillRate = 0;
+ } else {
+ fillRate = (static_cast<double>(_usedSpace) + afterAdding)
+ / _partitionSize;
+ }
+ if (fillRate < _inodeFillRate) {
+ fillRate = _inodeFillRate;
+ LOG(spam, "Inode fill rate is now %f. %u requests since last stat.",
+ fillRate, _queriesSinceStat);
+ } else {
+ LOG(spam, "Fill rate is now %f. %u requests since last stat.",
+ fillRate, _queriesSinceStat);
+ }
+ return fillRate;
+}
+
+void
+PartitionMonitor::setMaxFillness(float maxFill)
+{
+ if (maxFill <= 0 || maxFill > 1.0) {
+ vespalib::asciistream ost;
+ ost << "Max fill rate must be in the range <0,1]. Value of "
+ << maxFill << " is not legal.";
+ throw vespalib::IllegalArgumentException(ost.str(), VESPA_STRLOC);
+ }
+ _maxFillRate = maxFill;
+}
+
+void
+PartitionMonitor::addingData(uint64_t dataSize)
+{
+ vespalib::LockGuard lock(_updateLock);
+ _usedSpace = std::max(_usedSpace, _usedSpace + dataSize);
+}
+
+void
+PartitionMonitor::removingData(uint64_t dataSize)
+{
+ vespalib::LockGuard lock(_updateLock);
+ _usedSpace = (_usedSpace > dataSize ? _usedSpace - dataSize : 0);
+}
+
+uint64_t
+PartitionMonitor::getPartitionId(const std::string& fileOnPartition)
+{
+ RealStatter realStatter;
+ struct statvfs info;
+ realStatter.statFileSystem(fileOnPartition, info);
+ return info.f_fsid;
+}
+
+namespace {
+ void printSize(std::ostream& out, uint64_t size) {
+ std::string s;
+ if (size < 10 * 1024) {
+ s = "B";
+ } else {
+ size = size / 1024;
+ if (size < 10 * 1024) {
+ s = "kB";
+ } else {
+ size = size / 1024;
+ if (size < 10 * 1024) {
+ s = "MB";
+ } else {
+ size = size / 1024;
+ if (size < 10 * 1024) {
+ s = "GB";
+ } else {
+ size = size / 1024;
+ s = "TB";
+ }
+ }
+ }
+ }
+ out << " (" << size << " " << s << ")";
+ }
+}
+
+void
+PartitionMonitor::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ vespalib::LockGuard lock(_updateLock);
+ out << "PartitionMonitor(" << _fileOnPartition;
+ if (verbose) {
+ out << ") {"
+ << "\n" << indent << " Fill rate: "
+ << (100.0 * _usedSpace / _partitionSize)
+ << " %"
+ << "\n" << indent << " Inode fill rate: " << (100 * _inodeFillRate)
+ << " %"
+ << "\n" << indent << " Detected block size: " << _blockSize
+ << "\n" << indent << " File system id: " << _fileSystemId
+ << "\n" << indent << " Total size: " << _partitionSize;
+ printSize(out, _partitionSize);
+ out << "\n" << indent << " Used size: " << _usedSpace;
+ printSize(out, _usedSpace);
+ out << "\n" << indent << " Queries since last stat: "
+ << _queriesSinceStat
+ << "\n" << indent << " Monitor policy: ";
+ } else {
+ out << ", ";
+ }
+ switch (_policy) {
+ case STAT_ONCE: out << "STAT_ONCE"; break;
+ case ALWAYS_STAT: out << "ALWAYS_STAT"; break;
+ case STAT_PERIOD: out << "STAT_PERIOD(" << _period << ")"; break;
+ case STAT_DYNAMIC: out << "STAT_DYNAMIC(" << calcDynamicPeriod() << ")";
+ break;
+ }
+ if (verbose) {
+ if (_policy == STAT_DYNAMIC) {
+ out << "\n" << indent << " Period at current fillrate "
+ << calcDynamicPeriod();
+ }
+ out << "\n" << indent << " Root only ratio " << _rootOnlyRatio
+ << "\n" << indent << " Max fill rate " << (100 * _maxFillRate)
+ << " %"
+ << "\n" << indent << "}";
+ } else {
+ bool inodesFill = false;
+ double fillRate = static_cast<double>(_usedSpace) / _partitionSize;
+ if (_inodeFillRate > fillRate) {
+ inodesFill = true;
+ fillRate = _inodeFillRate;
+ }
+
+ out << ", " << _usedSpace << "/" << _partitionSize << " used - "
+ << (100 * fillRate) << " % full" << (inodesFill ? " (inodes)" : "")
+ << ")";
+ }
+}
+
+void
+PartitionMonitor::printXml(vespalib::XmlOutputStream& xos) const
+{
+ using namespace vespalib::xml;
+ xos << XmlTag("partitionmonitor")
+ << XmlContent(toString(true))
+ << XmlEndTag();
+}
+
+void
+PartitionMonitor::overrideRealStat(uint32_t blockSize, uint32_t totalBlocks,
+ uint32_t blocksUsed, float inodeFillRate)
+{
+ vespalib::LockGuard lock(_updateLock);
+ if (_policy != STAT_ONCE) {
+ throw vespalib::IllegalStateException(
+ "Makes no sense to override real stat if policy isnt set to "
+ "STAT_ONCE. Values will just be set back to real values again.",
+ VESPA_STRLOC);
+ }
+ _blockSize = blockSize;
+ _partitionSize = totalBlocks * blockSize;
+ _usedSpace = blocksUsed * blockSize;
+ _inodeFillRate = inodeFillRate;
+}
+
+}
+
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h
new file mode 100644
index 00000000000..401a070389e
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::PartitionMonitor
+ * \ingroup persistence
+ *
+ * \brief Monitors how full a file system is.
+ *
+ * This class is used by the persistence layer to monitor how full a disk is.
+ * It remembers how full the disk is, and can also take hints, such that it
+ * can give reasonable correct answers cheaply.
+ */
+#pragma once
+
+#include <vespa/vespalib/util/printable.h>
+#include <sys/statvfs.h>
+#include <vespa/config-stor-devices.h>
+#include <vespa/vespalib/util/linkedptr.h>
+#include <vespa/vespalib/util/sync.h>
+#include <vespa/vespalib/util/xmlserializable.h>
+
+namespace storage {
+
+namespace memfile {
+
+class PartitionMonitorTest;
+
+class PartitionMonitor : public vespalib::Printable,
+ public vespalib::XmlSerializable
+{
+public:
+ typedef vespalib::LinkedPtr<PartitionMonitor> LP;
+
+ /**
+ * Use an object to stat through, such that unit tests can fake stat
+ * responses.
+ */
+ struct Statter {
+ virtual ~Statter() {}
+ virtual void statFileSystem(const std::string& file,
+ struct statvfs& info) = 0;
+ };
+
+private:
+ enum MonitorPolicy { ALWAYS_STAT, STAT_ONCE, STAT_PERIOD, STAT_DYNAMIC };
+
+ vespalib::Lock _updateLock;
+ std::string _fileOnPartition;
+ uint64_t _fileSystemId;
+ MonitorPolicy _policy;
+ uint32_t _blockSize;
+ uint64_t _partitionSize;
+ mutable uint64_t _usedSpace;
+ uint32_t _period;
+ mutable uint32_t _queriesSinceStat;
+ float _maxFillRate;
+ float _rootOnlyRatio;
+ mutable float _inodeFillRate;
+ std::unique_ptr<Statter> _statter;
+
+ void setStatter(std::unique_ptr<Statter> statter);
+ uint64_t calcTotalSpace(struct statvfs& info) const;
+ uint64_t calcUsedSpace(struct statvfs& info) const;
+ uint64_t calcDynamicPeriod() const;
+ float calcInodeFillRatio(struct statvfs& info) const;
+
+ friend class PartitionMonitorTest;
+
+public:
+ /** Default policy is STAT_PERIOD(100). Default max fill rate 0.98. */
+ PartitionMonitor(const std::string& fileOnFileSystem);
+
+ /** Set monitor policy from config. */
+ void setPolicy(vespa::config::storage::StorDevicesConfig::StatfsPolicy, uint32_t period);
+
+ /** Always stat on getFillRate() requests. */
+ void setAlwaysStatPolicy();
+ /**
+ * Stat only once, then depend on addingData/removingData hints to provide
+ * correct answers.
+ */
+ void setStatOncePolicy();
+ /**
+ * Run stat each period getFillRate() request. Depend on hints to keep value
+ * sane within a period.
+ */
+ void setStatPeriodPolicy(uint32_t period = 100);
+ /**
+ * Run stat often when close to full, but seldom when there is lots of free
+ * space. In current algorithm, we will check each percentage diff from full
+ * multiplied itself times the baseperiod request.
+ */
+ void setStatDynamicPolicy(uint32_t basePeriod = 10);
+
+ /** Get the file system id of this instance. */
+ uint64_t getFileSystemId() const { return _fileSystemId; }
+
+ float getRootOnlyRatio() const { return _rootOnlyRatio; }
+
+ uint64_t getPartitionSize() const { return _partitionSize; }
+
+ uint64_t getUsedSpace() const;
+
+ /**
+ * Get the fill rate of the file system. Where 0 is empty and 1 is 100%
+ * full.
+ */
+ float getFillRate(int64_t afterAdding = 0) const;
+
+ /** Set the limit where the file system is considered full. (0-1) */
+ void setMaxFillness(float maxFill);
+
+ /** Query whether disk fill rate is high enough to be considered full. */
+ bool isFull(int64_t afterAdding = 0, double maxFillRate = -1) const
+ {
+ if (maxFillRate == -1) {
+ maxFillRate = _maxFillRate;
+ }
+ return (getFillRate(afterAdding) >= maxFillRate);
+ }
+
+ /**
+ * To keep the monitor more up to date without having to do additional stat
+ * commands, give clues when you add or remove data from the file system.
+ */
+ void addingData(uint64_t dataSize);
+
+ /**
+ * To keep the monitor more up to date without having to do additional stat
+ * commands, give clues when you add or remove data from the file system.
+ */
+ void removingData(uint64_t dataSize);
+
+ virtual void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+ /**
+ * Calculate the file system id for a given file. Used when wanting an
+ * instance for a new file, but you're unsure whether you already have a
+ * tracker for that file system.
+ */
+ static uint64_t getPartitionId(const std::string& fileOnPartition);
+
+ /** Used in unit testing only. */
+ void overrideRealStat(uint32_t blockSize, uint32_t totalBlocks,
+ uint32_t blocksUsed, float inodeFillRate = 0.1);
+
+ virtual void printXml(vespalib::XmlOutputStream&) const;
+
+private:
+ void updateIfNeeded() const;
+
+};
+
+} // memfile
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt
new file mode 100644
index 00000000000..8bf8eb37b04
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_init OBJECT
+ SOURCES
+ filescanner.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp
new file mode 100644
index 00000000000..74708bb36d8
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp
@@ -0,0 +1,240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/init/filescanner.h>
+
+#include <vespa/document/bucket/bucketid.h>
+#include <iomanip>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+LOG_SETUP(".persistence.memfile.filescanner");
+
+namespace storage {
+namespace memfile {
+
+FileScanner::Metrics::Metrics(framework::Clock& clock)
+ : metrics::MetricSet("dbinit.filescan", "",
+ "Metrics for the memfile filescanner"),
+ _alienFiles(),
+ _alienFileCounter("alienfiles", "",
+ "Unknown files found during disk scanning.", this),
+ _temporaryFilesDeleted("tempfilesdeleted", "",
+ "Temporary files found and deleted during initialization.", this),
+ _multipleBucketsSameDisk("multiplebucketssamedisk", "",
+ "Multiple buckets found on same disk.", this),
+ _wrongDir("wrongdir", "",
+ "Number of buckets moved from wrong to right directory.", this),
+ _wrongDisk("wrongdisk", "",
+ "Number of buckets found on non-ideal disk.", this),
+ _dirsListed("dirslisted", "",
+ "Directories listed in list step of initialization.", this),
+ _startTime(clock),
+ _listLatency("listlatency", "",
+ "Time used until list phase is done. (in ms)", this)
+{
+}
+
+FileScanner::FileScanner(framework::ComponentRegister& reg,
+ const MountPointList& mountPoints,
+ uint32_t directoryLevels,
+ uint32_t directorySpread)
+ : framework::Component(reg, "filescanner"),
+ _directoryMapper(directoryLevels, directorySpread),
+ _mountPoints(mountPoints),
+ _dirLevels(directoryLevels),
+ _dirSpread(directorySpread),
+ _globalLock(),
+ _globalMetrics(getClock())
+{
+ registerMetric(_globalMetrics);
+}
+
+void
+FileScanner::buildBucketList(document::BucketId::List & list,
+ uint16_t partition,
+ uint16_t part, uint16_t totalParts)
+{
+ Context context(_mountPoints[partition], getClock());
+ std::vector<uint32_t> path(_dirLevels);
+ if (_dirLevels > 0) {
+ // If we have dirlevels, split into parts on top level only
+ for (uint32_t i=0, n=_dirSpread; i<n; ++i) {
+ if (i % totalParts == part) {
+ path[0] = i;
+ buildBucketList(list, context, path, 1);
+ }
+ }
+ } else if (part == 0) {
+ // If we don't have dirlevels, send all data in part 0
+ buildBucketList(list, context, path);
+ }
+ // Grab lock and update metrics
+ vespalib::LockGuard lock(_globalLock);
+ std::vector<metrics::Metric::LP> newMetrics;
+ context._metrics.addToSnapshot(_globalMetrics, newMetrics);
+ assert(newMetrics.empty());
+}
+
+void
+FileScanner::buildBucketList(document::BucketId::List & list,
+ Context& context,
+ std::vector<uint32_t>& path,
+ uint32_t dirLevel)
+{
+ if (dirLevel >= _dirLevels) {
+ buildBucketList(list, context, path);
+ return;
+ }
+ for (uint32_t i=0, n=_dirSpread; i<n; ++i) {
+ path[dirLevel] = i;
+ buildBucketList(list, context, path, dirLevel + 1);
+ }
+}
+
+std::string
+FileScanner::getPathName(Context& context, std::vector<uint32_t>& path,
+ const document::BucketId* bucket) const
+{
+ std::ostringstream ost;
+ ost << context._dir.getPath() << std::hex << std::setfill('0');
+ for (uint32_t i=0, n=path.size(); i<n; ++i) {
+ ost << '/' << std::setw(4) << path[i];
+ }
+ if (bucket != 0) {
+ ost << '/' << std::setw(16)
+ << bucket->stripUnused().getRawId() << ".0";
+ }
+ return ost.str();
+}
+
+void
+FileScanner::buildBucketList(document::BucketId::List & list,
+ Context& context,
+ std::vector<uint32_t>& path)
+{
+ std::string pathName(getPathName(context, path));
+ if (!vespalib::fileExists(pathName)) {
+ LOG(spam, "Directory %s does not exist.", pathName.c_str());
+ return;
+ }
+ LOG(spam, "Listing directory %s", pathName.c_str());
+ vespalib::DirectoryList dir(vespalib::listDirectory(pathName));
+ for (uint32_t i=0; i<dir.size(); ++i) {
+ if (!processFile(list, context, path, pathName, dir[i])) {
+ // To only process alien files once, we lock rather than use
+ // context object. Should be few (none) alien files so shouldn't
+ // matter from a performance point of view
+ vespalib::LockGuard lock(_globalLock);
+ _globalMetrics._alienFileCounter.inc();
+ if (_globalMetrics._alienFiles.size()
+ <= _config._maxAlienFilesLogged)
+ {
+ LOG(spam, "Detected alien file %s/%s",
+ pathName.c_str(), dir[i].c_str());
+ _globalMetrics._alienFiles.push_back(pathName + "/" + dir[i]);
+ }
+ }
+ }
+ context._metrics._dirsListed.inc();
+}
+
+
+// Always called from lister thread (which might be worker thread)
+bool
+FileScanner::processFile(document::BucketId::List & list,
+ Context& context,
+ std::vector<uint32_t>& path,
+ const std::string& pathName,
+ const std::string& name)
+{
+ if (name == "." || name == ".."
+ || name == "chunkinfo" || name == "creationinfo")
+ {
+ LOG(spam, "Ignoring expected file that is not a slotfile '%s'.",
+ name.c_str());
+ return true;
+ }
+ document::BucketId bucket(extractBucketId(name));
+ if (bucket.getRawId() == 0) {
+ // Delete temporary files generated by storage
+ if (name.size() > 4 && name.substr(name.size() - 4) == ".tmp") {
+ context._metrics._temporaryFilesDeleted.inc();
+ LOG(debug, "Deleting temporary file found '%s'. Assumed it was "
+ "generated by storage temporarily while processing a "
+ "request and process or disk died before operation "
+ "completed.",
+ (pathName + "/" + name).c_str());
+ vespalib::unlink(pathName + "/" + name);
+ return true;
+ }
+ return false;
+ }
+ if (handleBadLocation(bucket, context, path)) {
+ LOG(spam, "Adding bucket %s.", bucket.toString().c_str());
+ list.push_back(bucket);
+ }
+ return true;
+}
+
+document::BucketId
+FileScanner::extractBucketId(const std::string& name) const
+{
+ if (name.size() < 9) return document::BucketId();
+ std::string::size_type pos = name.find('.');
+ if (pos == std::string::npos || pos > 16) return document::BucketId();
+ char *endPtr;
+ document::BucketId::Type idnum = strtoull(&name[0], &endPtr, 16);
+ if (endPtr != &name[pos]) return document::BucketId();
+ uint32_t fileNr = strtol(&name[pos + 1], &endPtr, 16);
+ if (*endPtr != '\0') return document::BucketId();
+ // Check for deprecated name types
+ if (fileNr != 0) {
+ LOG(warning, "Found buckets split with old file splitting system. Have "
+ "you upgraded from VDS version < 3.1 to >= 3.1 ? This "
+ "requires a refeed as files stored are not backward "
+ "compatible.");
+ return document::BucketId();
+ }
+ return document::BucketId(idnum);
+}
+
+bool
+FileScanner::handleBadLocation(const document::BucketId& bucket,
+ Context& context,
+ std::vector<uint32_t>& path)
+{
+ std::vector<uint32_t> expectedPath(_directoryMapper.getPath(bucket));
+
+ // If in wrong directory on disk, do a rename to move it where VDS will
+ // access it.
+ if (expectedPath != path) {
+ std::string source(getPathName(context, path, &bucket));
+ std::string target(getPathName(context, expectedPath, &bucket));
+
+ if (vespalib::fileExists(target)) {
+ std::ostringstream err;
+ err << "Cannot move file from wrong directory " << source
+ << " to " << target << " as file already exist. Multiple "
+ << "instances of bucket on same disk. Should not happen. "
+ << "Ignoring file at in bad location.";
+ LOG(warning, "%s", err.str().c_str());
+ context._metrics._multipleBucketsSameDisk.inc();
+ return false;
+ }
+ if (!vespalib::rename(source, target, false, true)) {
+ std::ostringstream err;
+ err << "Cannot move file from " << source << " to " << target
+ << " as source file does not exist. Should not happen.";
+ LOG(error, "%s", err.str().c_str());
+ throw vespalib::IllegalStateException(err.str(), VESPA_STRLOC);
+ }
+ LOGBP(warning, "Found bucket in wrong directory. Moved %s to %s.",
+ source.c_str(), target.c_str());
+ context._metrics._wrongDir.inc();
+ }
+ return true;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h
new file mode 100644
index 00000000000..e83a01e932e
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::FileScanner
+ * \ingroup memfile
+ *
+ * \brief Scans a directory for memfiles.
+ *
+ * When storage starts up, we need to know what data already exist. This process
+ * will identify what buckets we have data for.
+ */
+
+#pragma once
+
+#include <vespa/metrics/metrics.h>
+#include <vespa/memfilepersistence/device/mountpointlist.h>
+#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h>
+#include <vespa/storageframework/storageframework.h>
+
+namespace document {
+ class BucketId;
+}
+
+namespace storage {
+namespace memfile {
+
+class FileScanner : private framework::Component {
+public:
+ typedef std::unique_ptr<FileScanner> UP;
+
+ struct Config {
+ uint32_t _maxAlienFilesLogged;
+ Config()
+ : _maxAlienFilesLogged(10) {}
+ };
+ struct Metrics : public metrics::MetricSet {
+ std::vector<std::string> _alienFiles;
+ metrics::LongCountMetric _alienFileCounter;
+ metrics::LongCountMetric _temporaryFilesDeleted;
+ metrics::LongCountMetric _multipleBucketsSameDisk;
+ metrics::LongCountMetric _wrongDir;
+ metrics::LongCountMetric _wrongDisk;
+ metrics::LongCountMetric _dirsListed;
+ framework::MilliSecTimer _startTime;
+ metrics::LongAverageMetric _listLatency;
+
+ Metrics(framework::Clock&);
+ };
+
+private:
+ struct Context {
+ const Directory& _dir;
+ Metrics _metrics;
+
+ Context(const Directory& d, framework::Clock& c)
+ : _dir(d), _metrics(c) {}
+ };
+
+ BucketDirectoryMapper _directoryMapper;
+ const MountPointList& _mountPoints;
+ Config _config;
+ uint32_t _dirLevels;
+ uint32_t _dirSpread;
+ // As there is only one FileScanner instance in storage, we need a
+ // lock to let multiple threads update global data in the scanner.
+ // Each operation will typically keep a Context object it can use
+ // without locking and then grab lock to update global data after
+ // completion.
+ vespalib::Lock _globalLock;
+ Metrics _globalMetrics;
+
+public:
+ FileScanner(framework::ComponentRegister&, const MountPointList&,
+ uint32_t dirLevels, uint32_t dirSpread);
+
+ void buildBucketList(document::BucketId::List & list,
+ uint16_t partition,
+ uint16_t part, uint16_t totalParts);
+
+ const Metrics& getMetrics() const { return _globalMetrics; }
+
+
+private:
+ void buildBucketList(document::BucketId::List & list,
+ Context&,
+ std::vector<uint32_t>& path,
+ uint32_t dirLevel);
+ std::string getPathName(Context&, std::vector<uint32_t>& path,
+ const document::BucketId* bucket = 0) const;
+ void buildBucketList(document::BucketId::List & list,
+ Context&,
+ std::vector<uint32_t>& path);
+ bool processFile(document::BucketId::List & list,
+ Context&,
+ std::vector<uint32_t>& path,
+ const std::string& pathName,
+ const std::string& name);
+ document::BucketId extractBucketId(const std::string& name) const;
+ bool handleBadLocation(const document::BucketId& bucket,
+ Context&,
+ std::vector<uint32_t>& path);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt
new file mode 100644
index 00000000000..166f8499725
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_mapper OBJECT
+ SOURCES
+ buffer.cpp
+ memfilemapper.cpp
+ memfile_v1_serializer.cpp
+ memfile_v1_verifier.cpp
+ locationreadplanner.cpp
+ simplememfileiobuffer.cpp
+ fileinfo.cpp
+ locationreadplanner.cpp
+ bufferedfilewriter.cpp
+ bucketdirectorymapper.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp
new file mode 100644
index 00000000000..956e806968d
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h>
+
+#include <vespa/document/bucket/bucketid.h>
+#include <vespa/vespalib/util/random.h>
+
+namespace storage {
+namespace memfile {
+
+BucketDirectoryMapper::BucketDirectoryMapper(uint32_t dirLevels,
+ uint32_t dirSpread)
+ : _dirLevels(dirLevels),
+ _dirSpread(dirSpread)
+{
+}
+
+std::vector<uint32_t>
+BucketDirectoryMapper::getPath(const document::BucketId& bucket)
+{
+ document::BucketId::Type seed = bucket.getId();
+ seed = seed ^ (seed >> 32);
+ vespalib::RandomGen randomizer(static_cast<uint32_t>(seed) ^ 0xba5eba11);
+ std::vector<uint32_t> position(_dirLevels);
+ for (uint32_t i=0; i<_dirLevels; ++i) {
+ position[i] = randomizer.nextUint32() % _dirSpread;
+ }
+ return position;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h
new file mode 100644
index 00000000000..a12c0f9c7cb
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::BucketDirectoryMapper
+ * \ingroup memfile
+ *
+ * \brief Maps buckets to directories on disk.
+ *
+ * To avoid having too many files in one directory, we want to map buckets to
+ * different directories. As these are all in the same partition anyways, we
+ * don't really need the distribution to be different based on node indexes or
+ * disk indexes.
+ *
+ * This class hides a simple function for distributing buckets between
+ * directories.
+ */
+
+#pragma once
+
+#include <vector>
+
+namespace document {
+ class BucketId;
+}
+
+namespace storage {
+namespace memfile {
+
+class BucketDirectoryMapper {
+ uint32_t _dirLevels;
+ uint32_t _dirSpread;
+
+public:
+ BucketDirectoryMapper(uint32_t dirLevels, uint32_t dirSpread);
+
+ std::vector<uint32_t> getPath(const document::BucketId&);
+};
+
+}
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp
new file mode 100644
index 00000000000..5ecb439b3f0
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp
@@ -0,0 +1,28 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/buffer.h>
+#include <algorithm>
+#include <stdlib.h>
+
+namespace storage {
+namespace memfile {
+
+Buffer::Buffer(size_t size)
+ : _buffer(size),
+ _size(size)
+{
+}
+
+void
+Buffer::resize(size_t size)
+{
+ BackingType buffer(size);
+ size_t commonSize(std::min(size, _size));
+ memcpy(buffer.get(), _buffer.get(), commonSize);
+ _buffer.swap(buffer);
+ _size = size;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h
new file mode 100644
index 00000000000..2484209d23e
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::Buffer
+ * \ingroup memfile
+ *
+ * \brief Simple wrapper class to contain an aligned buffer.
+ *
+ * For direct IO operations, we need to use 512 byte aligned buffers. This is
+ * a simple wrapper class to get such a buffer.
+ */
+
+#pragma once
+
+#include <boost/utility.hpp>
+#include <vespa/vespalib/util/alloc.h>
+#include <vespa/vespalib/util/linkedptr.h>
+
+namespace storage {
+namespace memfile {
+
+class Buffer : boost::noncopyable // Ensure no accidental copying of a buffer
+{
+ // Use AutoAlloc to transparently use mmap for large buffers.
+ // It is crucial that any backing buffer type returns an address that is
+ // 512-byte aligned, or direct IO will scream at us and fail everything.
+ static constexpr size_t MMapLimit = vespalib::MMapAlloc::HUGEPAGE_SIZE;
+ using BackingType = vespalib::AutoAlloc<MMapLimit, 512>;
+
+ BackingType _buffer;
+ // Actual, non-aligned size (as opposed to _buffer.size()).
+ size_t _size;
+
+public:
+ typedef vespalib::LinkedPtr<Buffer> LP;
+
+ Buffer(size_t size);
+
+ /**
+ * Resize buffer while keeping data that exists in the intersection of
+ * the old and new buffers' sizes.
+ */
+ void resize(size_t size);
+
+ char* getBuffer() noexcept {
+ return static_cast<char*>(_buffer.get());
+ }
+ const char* getBuffer() const noexcept {
+ return static_cast<const char*>(_buffer.get());
+ }
+ size_t getSize() const noexcept {
+ return _size;
+ }
+
+ operator char*() noexcept { return getBuffer(); }
+
+};
+
+} // storage
+} // memfile
+
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp
new file mode 100644
index 00000000000..369df0c1143
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp
@@ -0,0 +1,219 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/bufferedfilewriter.h>
+
+#include <boost/scoped_array.hpp>
+#include <vespa/vespalib/util/guard.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+LOG_SETUP(".persistence.memfile.bufferedfilewriter");
+
+namespace storage {
+
+namespace memfile {
+
+using vespalib::ValueGuard;
+
+BufferedFileWriter::
+BufferedFileWriter(vespalib::File& file, char* buffer, uint32_t bufferSize)
+ : _file(file),
+ _buffer(buffer),
+ _bufferSize(bufferSize),
+ _bufferedData(0),
+ _filePosition(0),
+ _writeCount(0),
+ _cache(0),
+ _cacheDirtyUpTo(0),
+ _writing(false)
+{
+ // Since we normally use direct IO for writing, we want to have
+ // 512b aligned buffers
+ if (bufferSize < 512) {
+ // Only warn for this. Used in testing.
+ LOGBP(warning, "Using buffer smaller than 512b");
+ } else if (bufferSize % 512 != 0) {
+ std::ostringstream ost;
+ ost << "Buffered file writer got buffer of length " << bufferSize
+ << " (Not dividable by 512)";
+ throw vespalib::IllegalArgumentException(ost.str());
+ }
+ LOG(spam, "Using buffer in writer of %u bytes", bufferSize);
+}
+
+BufferedFileWriter::~BufferedFileWriter()
+{
+ if (LOG_WOULD_LOG(debug) && _bufferedData != 0) {
+ LOG(debug, "Discarding %u bytes of buffered, unflushed data",
+ _bufferedData);
+ }
+}
+
+void
+BufferedFileWriter::setMemoryCache(Cache* cache)
+{
+ _cache = cache;
+ _cacheDirtyUpTo = 0;
+ if (cache == 0) {
+ LOG(spam, "No longer using a memory cache");
+ } else {
+ LOG(spam, "Using memory cache of %u bytes", _cache->getCachedAmount());
+ }
+}
+
+void BufferedFileWriter::write(const char* data, uint32_t size, uint32_t pos)
+{
+ _writing = true;
+ // If at least parts of data written is cached in slotfileimage, update
+ // cache rather than write to file.
+ if (_cache != 0 && _cache->getCachedAmount() > pos) {
+ uint32_t len = std::min(size, _cache->getCachedAmount() - pos);
+ _cache->setData(data, len, pos);
+ if (_cache->duplicateCacheWrite()) {
+ len = 0;
+ }
+ if (len != size) { // Write remaining directly to disk
+ LOG(spam, "Writing remainder after cache, bypassing buffer. "
+ "%u bytes at pos %u.", size - len, pos + len);
+ _file.write(data + len, size - len, pos + len);
+ ++_writeCount;
+ } else {
+ LOG(spam, "Writing %u bytes to memory cache at position %u.",
+ size, pos);
+ }
+ _cacheDirtyUpTo = std::max(_cacheDirtyUpTo, pos + len);
+ } else {
+ LOG(spam, "Writing directly to file, bypassing buffer. %u"
+ " bytes at pos %u", size, pos);
+ _file.write(data, size, pos);
+ ++_writeCount;
+ }
+ _writing = false;
+}
+
+void BufferedFileWriter::flush()
+{
+ if (_bufferedData == 0) return;
+ LOG(spam, "Flushing buffer. Writing %u at pos %u.",
+ _bufferedData, _filePosition);
+ write(_buffer, _bufferedData, _filePosition);
+ _filePosition += _bufferedData;
+ _bufferedData = 0;
+}
+
+void BufferedFileWriter::write(const void *buffer, size_t size)
+{
+ LOG(spam, "Writing %" PRIu64 " bytes to buffer at position %u.",
+ size, _filePosition + _bufferedData);
+ if (!_buffer) { // If we don't use a buffer, just write to file.
+ write(static_cast<const char*>(buffer), size, _filePosition);
+ _filePosition += size;
+ return;
+ }
+ // In case of exception later, reset state to original state
+ ValueGuard<uint32_t> bufIndexGuard(_bufferedData);
+ ValueGuard<uint32_t> filePositionGuard(_filePosition);
+ // Buffer may contain data prior to this write call. If this is
+ // successfully written to disk, we need to update state to revert
+ // to such that we don't lose that write.
+
+ if (_bufferedData + size >= _bufferSize) {
+ size_t part = _bufferSize - _bufferedData;
+ memcpy(_buffer + _bufferedData, buffer, part);
+ _bufferedData = _bufferSize;
+ buffer = static_cast<const char*>(buffer) + part;
+ flush();
+ bufIndexGuard = 0;
+ filePositionGuard = _filePosition + _bufferSize - part;
+ size -= part;
+ }
+
+ if (_bufferedData + size >= _bufferSize) {
+ if (reinterpret_cast<unsigned long>(buffer)%0x200 == 0) {
+ // Write the big part that is a multiple of _bufferSize to the file.
+ size_t part((size/_bufferSize)*_bufferSize);
+ write(static_cast<const char*>(buffer), part, _filePosition);
+ _filePosition += part;
+ buffer = static_cast<const char*>(buffer) + part;
+ size -= part;
+ } else {
+ for (; _bufferedData + size >= _bufferSize; size -= _bufferSize, buffer = static_cast<const char*>(buffer) + _bufferSize) {
+ memcpy(_buffer, buffer, _bufferSize);
+ _bufferedData = _bufferSize;
+ flush();
+ }
+ }
+ }
+
+ // We now have room for the rest of the data in buffer
+ assert(_bufferedData + size < _bufferSize);
+ memcpy(_buffer + _bufferedData, buffer, size);
+ _bufferedData += size;
+ // Finished successfully, deactivate guards
+ bufIndexGuard.deactivate();
+ filePositionGuard.deactivate();
+}
+
+void BufferedFileWriter::writeGarbage(uint32_t size) {
+ LOG(spam, "Writing %u bytes of garbage at position %u.",
+ size, _filePosition + _bufferedData);
+ if (!_buffer) {
+ ValueGuard<uint32_t> filePositionGuard(_filePosition);
+ uint32_t maxBufferSize = 0xFFFF;
+ uint32_t bufSize = (size > maxBufferSize ? maxBufferSize : size);
+ boost::scoped_array<char> buf(new char[bufSize]);
+ while (size > 0) {
+ uint32_t part = (size > bufSize ? bufSize : size);
+ write(&buf[0], part, _filePosition);
+ _filePosition += part;
+ size -= part;
+ }
+ filePositionGuard.deactivate();
+ return;
+ }
+ // In case of exception later, reset state to original state
+ ValueGuard<uint32_t> bufIndexGuard(_bufferedData);
+ ValueGuard<uint32_t> filePositionGuard(_filePosition);
+
+ if (_bufferedData + size >= _bufferSize) {
+ size_t part = _bufferSize - _bufferedData;
+ memset(_buffer + _bufferedData, 0xFF, part);
+ _bufferedData += part; // Use any garbage data already there.
+ flush();
+ bufIndexGuard = 0;
+ filePositionGuard = _filePosition + _bufferSize - part;
+ size -= part;
+ }
+
+ memset(_buffer + _bufferedData, 0xFF, std::min(_bufferSize-_bufferedData, size));
+
+ for (;_bufferedData + size >= _bufferSize; size -= _bufferSize) {
+ _bufferedData = _bufferSize;
+ flush();
+ }
+
+ // We now have room for the rest of the data in buffer
+ assert(_bufferedData + size < _bufferSize);
+ _bufferedData += size; // Use any garbage data already there.
+ // Finished successfully, deactivate guards
+ bufIndexGuard.deactivate();
+ filePositionGuard.deactivate();
+}
+
+void BufferedFileWriter::setFilePosition(uint32_t pos)
+{
+ if (pos != _filePosition + _bufferedData) {
+ flush();
+ _filePosition = pos;
+ }
+}
+
+uint32_t BufferedFileWriter::getFilePosition() const
+{
+ return _filePosition + _bufferedData;
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h
new file mode 100644
index 00000000000..e1101d94b58
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @class storage::BufferedFileWriter
+ * @ingroup filestorage
+ *
+ * @brief A utility class for buffered writing to a file.
+ *
+ * To minimize the number of system calls, and to minimize the chance of
+ * fragmentation, files should be written to disk in large chunks. Since
+ * it's easier to write algorithms which writes files in smaller pieces, this
+ * class exists to buffer such writes and send them to disk at a later time.
+ *
+ * @author H�kon Humberset
+ * @date 2005-11-03
+ */
+
+#pragma once
+
+#include <boost/utility.hpp>
+#include <vespa/fastos/types.h>
+#include <vector>
+
+namespace vespalib {
+ class File;
+}
+
+namespace storage {
+
+namespace memfile {
+
+class BufferedFileWriter : public boost::noncopyable {
+public:
+ struct Cache {
+ virtual ~Cache() {}
+ virtual uint32_t getCachedAmount() const = 0;
+ /** Index given must be within [0 - getCachedAmount()> */
+ virtual char* getCache(uint32_t atIndex) = 0;
+ /** If true, write to both cache and file, else, write to cache only. */
+ virtual bool duplicateCacheWrite() const = 0;
+ /** Function for updating content in cache. Implemented in cache as new
+ * core overrides it to ignore data ahead of a given index. */
+ virtual void setData(const char* data, size_t len, uint64_t pos)
+ { memcpy(getCache(pos), data, len); }
+ };
+
+private:
+ vespalib::File& _file;
+ char* _buffer;
+ uint32_t _bufferSize;
+ uint32_t _bufferedData;
+ uint32_t _filePosition;
+ uint32_t _writeCount;
+ Cache* _cache;
+ uint32_t _cacheDirtyUpTo;
+ bool _writing;
+
+public:
+ /**
+ * Create a new buffered file writer.
+ *
+ * @param filedescriptor Write to this file which should already be open for
+ * writing.
+ * @param buffer Pointer to the buffer to use in this writer. Note that
+ * if buffer is 0, fakemode will be used, where all writes
+ * are sent on to OS. This mode can be used to test difference
+ * in performance of using this class or not.
+ * @param bufferSize The size of the buffer to keep.
+ */
+ BufferedFileWriter(vespalib::File&, char* buffer, uint32_t bufferSize);
+ /**
+ * Destructor does not flush(). Make sure to call flush() manually.
+ * (flush() can fail, and destructors should not throw exceptions)
+ */
+ ~BufferedFileWriter();
+
+ uint32_t getBufferSize() const { return _bufferSize; }
+
+ /**
+ * If set, write portion written inside of memory cache here instead of
+ * to file.
+ */
+ void setMemoryCache(Cache* cache);
+
+ bool isMemoryCacheDirty() const { return (_cacheDirtyUpTo != 0); }
+
+ uint32_t getLastDirtyIndex() const { return _cacheDirtyUpTo; }
+
+ void tagCacheClean() { _cacheDirtyUpTo = 0; }
+
+ /** Write all buffered data to disk. */
+ void flush();
+
+ // Functions using the held file position.
+
+ /** Writes the given data to file and increases the file position. */
+ void write(const void *buffer, size_t size);
+
+ /** Writes undefined data of given size to file and increases position. */
+ void writeGarbage(uint32_t size);
+
+ /** Set the file position to the given value. (Flushes before changing) */
+ void setFilePosition(uint32_t pos);
+
+ /** Get the current file position. */
+ uint32_t getFilePosition() const;
+
+ uint32_t getBufferedSize() const { return _bufferedData; }
+
+ /** Get how many times this writer has flushed data to disk. */
+ uint32_t getWriteCount() const { return _writeCount; }
+
+private:
+ void write(const char* data, uint32_t size, uint32_t pos);
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp
new file mode 100644
index 00000000000..bda57a13aa7
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/fileinfo.h>
+
+namespace storage {
+
+namespace memfile {
+
+FileInfo::FileInfo()
+ : _metaDataListSize(0),
+ _headerBlockSize(0),
+ _bodyBlockSize(0)
+{
+}
+
+FileInfo::FileInfo(uint32_t metaDataListSize,
+ uint32_t headerBlockSize,
+ uint32_t bodyBlockSize)
+ : _metaDataListSize(metaDataListSize),
+ _headerBlockSize(headerBlockSize),
+ _bodyBlockSize(bodyBlockSize)
+{
+}
+
+
+FileInfo::FileInfo(const Header& header, size_t fileSize)
+ : _metaDataListSize(header._metaDataListSize),
+ _headerBlockSize(header._headerBlockSize),
+ _bodyBlockSize(
+ fileSize - header._headerBlockSize
+ - sizeof(MetaSlot) * header._metaDataListSize - sizeof(Header))
+{
+}
+
+uint32_t
+FileInfo::getHeaderBlockStartIndex() const
+{
+ return sizeof(Header) + _metaDataListSize * sizeof(MetaSlot);
+}
+
+uint32_t
+FileInfo::getBodyBlockStartIndex() const
+{
+ return getHeaderBlockStartIndex() + _headerBlockSize;
+}
+
+uint32_t
+FileInfo::getFileSize() const
+{
+ return getBodyBlockStartIndex() + _bodyBlockSize;
+}
+
+std::string
+FileInfo::toString() const
+{
+ std::ostringstream ost;
+ ost << "FileInfo("
+ << "meta_size " << _metaDataListSize
+ << " header_start " << getHeaderBlockStartIndex()
+ << " body_start " << getBodyBlockStartIndex()
+ << ")";
+ return ost.str();
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h
new file mode 100644
index 00000000000..973e1f2cd10
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h
@@ -0,0 +1,157 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/vespalib/util/crc.h>
+
+namespace storage {
+
+namespace memfile {
+
+struct MetaSlot : private Types {
+ Timestamp _timestamp;
+ GlobalId _gid;
+ uint32_t _headerPos;
+ uint32_t _headerSize;
+ uint32_t _bodyPos;
+ uint32_t _bodySize;
+ uint16_t _flags;
+ uint16_t _checksum;
+
+ MetaSlot() : _timestamp(0), _headerPos(0), _headerSize(0),
+ _bodyPos(0), _bodySize(0), _flags(0), _checksum(39859)
+ {
+ //_checksum = calcSlotChecksum();
+ //std::cerr << "Empty checksum " << _checksum << "\n";
+ }
+
+ uint16_t calcSlotChecksum() const {
+ static uint32_t size(sizeof(MetaSlot) - sizeof(_checksum));
+ vespalib::crc_32_type calculator;
+ calculator.process_bytes(this, size);
+ return calculator.checksum() & 0xffff;
+
+ }
+
+ bool inUse() const {
+ return (_flags & IN_USE);
+ }
+
+ void print(std::ostream & out) const {
+ vespalib::asciistream tmp;
+ print(tmp);
+ out << tmp.str();
+ }
+ void print(vespalib::asciistream & out) const {
+ out << "Slot(" << std::dec << _timestamp << ", " << _gid << ", "
+ << _headerPos << " - " << _headerSize << ", " << _bodyPos
+ << " - " << _bodySize << ", 0x" << std::hex << _flags << ", 0x"
+ << _checksum << ")" << std::dec;
+ }
+
+ // Functions used by unit tests (avoid renaming all old func usage)
+ void updateChecksum() { _checksum = calcSlotChecksum(); }
+ void setTimestamp(Timestamp ts) { _timestamp = ts; }
+ void setHeaderPos(uint32_t p) { _headerPos = p; }
+ void setHeaderSize(uint32_t sz) { _headerSize = sz; }
+ void setBodyPos(uint32_t p) { _bodyPos = p; }
+ void setBodySize(uint32_t sz) { _bodySize = sz; }
+ void setUseFlag(bool isInUse)
+ { _flags = (isInUse ? _flags | IN_USE : _flags & ~IN_USE); }
+};
+
+inline std::ostream& operator<<(std::ostream& out, const MetaSlot& slot) {
+ vespalib::asciistream tmp;
+ slot.print(tmp);
+ return out << tmp.str();
+}
+inline vespalib::asciistream& operator<<(vespalib::asciistream & out, const MetaSlot& slot) {
+ slot.print(out); return out;
+}
+
+/**
+ * Represents a slotfile header.
+ */
+struct Header {
+ uint32_t _version;
+ uint32_t _metaDataListSize;
+ uint32_t _headerBlockSize;
+ uint32_t _checksum;
+ uint32_t _fileChecksum;
+ uint32_t _notInUse0; // Some reserved bits, which we can use later if
+ uint64_t _notInUse1; // needed without altering the file format.
+ uint64_t _notInUse2;
+ uint64_t _notInUse3;
+ uint64_t _notInUse4;
+ uint64_t _notInUse5;
+
+ Header()
+ : _version(Types::TRADITIONAL_SLOTFILE),
+ _metaDataListSize(0),
+ _headerBlockSize(0),
+ _checksum(0),
+ _fileChecksum(0),
+ _notInUse0(0), _notInUse1(0), _notInUse2(0),
+ _notInUse3(0), _notInUse4(0), _notInUse5(0)
+ {
+ }
+
+ uint32_t calcHeaderChecksum() const {
+ vespalib::crc_32_type calculator;
+ calculator.process_bytes(this, 12);
+ return calculator.checksum();
+ }
+ bool verify() const {
+ return (_version == Types::TRADITIONAL_SLOTFILE
+ && _checksum == calcHeaderChecksum());
+ }
+ // Functions used by unit tests (avoid renaming all old func usage)
+ void updateChecksum() { _checksum = calcHeaderChecksum(); }
+ void setVersion(uint32_t version) { _version = version; }
+ void setMetaDataListSize(uint32_t sz) { _metaDataListSize = sz; }
+ void setHeaderBlockSize(uint32_t sz) { _headerBlockSize = sz; }
+
+ void print(std::ostream& out, const std::string& indent = "") const {
+ out << indent << "SlotFileHeader(\n"
+ << indent << " version: " << std::hex << _version << std::dec << "\n"
+ << indent << " meta data list size: " << _metaDataListSize << "\n"
+ << indent << " header block size: " << _headerBlockSize << "b\n"
+ << indent << " checksum: " << std::hex << _checksum
+ << indent << (verify() ? " (OK)\n" : " (MISMATCH)\n")
+ << indent << " file checksum: " << _fileChecksum << "\n"
+ << indent << ")";
+ }
+ };
+
+struct FileInfo {
+ typedef std::unique_ptr<FileInfo> UP;
+
+ uint32_t _metaDataListSize;
+ uint32_t _headerBlockSize;
+ uint32_t _bodyBlockSize;
+
+ // Cached header bytes to write in addition to metadata when
+ // needing to write back metadata 512 byte aligned
+ std::vector<char> _firstHeaderBytes;
+
+ FileInfo();
+ FileInfo(uint32_t metaDataListSize, uint32_t headerBlockSize, uint32_t bodyBlockSize);
+ FileInfo(const Header& header, size_t fileSize);
+
+ uint32_t getBlockSize(Types::DocumentPart part) const {
+ return (part == Types::BODY ? _bodyBlockSize : _headerBlockSize);
+ }
+ uint32_t getBlockIndex(Types::DocumentPart part) const {
+ return (part == Types::BODY ? getBodyBlockStartIndex()
+ : getHeaderBlockStartIndex());
+ }
+ uint32_t getHeaderBlockStartIndex() const;
+ uint32_t getBodyBlockStartIndex() const;
+ uint32_t getFileSize() const;
+ std::string toString() const;
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp
new file mode 100644
index 00000000000..e853e374943
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp
@@ -0,0 +1,102 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/locationreadplanner.h>
+#include <vespa/memfilepersistence/memfile/memfileiointerface.h>
+
+namespace storage {
+namespace memfile {
+
+LocationDiskIoPlanner::LocationDiskIoPlanner(
+ const MemFileIOInterface& io,
+ DocumentPart part,
+ const std::vector<DataLocation>& desiredLocations,
+ uint32_t maxGap,
+ uint32_t blockStartIndex)
+ : _io(io),
+ _operations(),
+ _part(part),
+ _blockStartIndex(blockStartIndex)
+{
+ processLocations(desiredLocations, maxGap);
+}
+
+namespace {
+ uint32_t alignDown(uint32_t value) {
+ uint32_t blocks = value / 512;
+ return blocks * 512;
+ };
+
+ uint32_t alignUp(uint32_t value) {
+ uint32_t blocks = (value + 512 - 1) / 512;
+ return blocks * 512;
+ };
+}
+
+void
+LocationDiskIoPlanner::scheduleLocation(DataLocation loc,
+ std::vector<DataLocation>& ops)
+{
+ if (!_io.isCached(loc, _part) && loc._size) {
+ // Convert the relative location from the buffer to an
+ // absolute location.
+ ops.push_back(DataLocation(loc._pos + _blockStartIndex,
+ loc._size));
+ }
+}
+
+void
+LocationDiskIoPlanner::processLocations(
+ const std::vector<DataLocation>& desiredLocations,
+ uint32_t maxGap)
+{
+ // Build list of disk read operations to do
+ std::vector<DataLocation> allOps;
+
+ // Create list of all locations we need to read
+ for (std::size_t i = 0; i < desiredLocations.size(); ++i) {
+ scheduleLocation(desiredLocations[i], allOps);
+ }
+
+ // Sort list, and join elements close together into single IO ops
+ std::sort(allOps.begin(), allOps.end());
+ for (size_t i = 0; i < allOps.size(); ++i) {
+ uint32_t start = alignDown(allOps[i]._pos);
+ uint32_t stop = alignUp(allOps[i]._pos + allOps[i]._size);
+ if (i != 0) {
+ uint32_t lastStop = _operations.back()._pos
+ + _operations.back()._size;
+ if (lastStop >= start || start - lastStop < maxGap) {
+ _operations.back()._size += (stop - lastStop);
+ continue;
+ }
+ }
+
+ _operations.push_back(DataLocation(start, stop - start));
+ }
+}
+
+uint32_t
+LocationDiskIoPlanner::getTotalBufferSize() const
+{
+ uint32_t totalSize = 0;
+ for (size_t i = 0; i < _operations.size(); ++i) {
+ totalSize += _operations[i]._size;
+ }
+ return totalSize;
+}
+
+void
+LocationDiskIoPlanner::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ (void) verbose; (void) indent;
+ for (std::size_t i = 0; i < _operations.size(); ++i) {
+ if (i > 0) out << ",";
+ out << "[" << _operations[i]._pos << ","
+ << (_operations[i]._size + _operations[i]._pos) << "]";
+ }
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h
new file mode 100644
index 00000000000..915f158eb85
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::LocationDiskIoPlanner
+ * \ingroup memfile
+ *
+ * \brief Creates list of minimal IO operations to do versus disk.
+ *
+ * When accessing many locations on disk, it is not necessarily ideal to do a
+ * disk access per location. This class creates a minimal set of locations to
+ * access to avoid accessing more than a maximum gap of uninteresting data.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+
+namespace storage {
+namespace memfile {
+
+class MemSlot;
+
+class MemFileIOInterface;
+
+class LocationDiskIoPlanner : public Types, public vespalib::Printable
+{
+public:
+ LocationDiskIoPlanner(const MemFileIOInterface& io,
+ DocumentPart part,
+ const std::vector<DataLocation>& desiredLocations,
+ uint32_t maxGap,
+ uint32_t blockStartIndex);
+
+ const std::vector<DataLocation>& getIoOperations() const {
+ return _operations;
+ }
+
+ /**
+ * Get the total amount of space needed to hold all the data from all
+ * locations identified to be accessed. Useful to create a buffer of correct
+ * size.
+ */
+ uint32_t getTotalBufferSize() const;
+
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+private:
+ const MemFileIOInterface& _io;
+ std::vector<DataLocation> _operations;
+ DocumentPart _part;
+ uint32_t _blockStartIndex;
+
+ void processLocations(
+ const std::vector<DataLocation>& desiredLocations,
+ uint32_t maxGap);
+
+ void scheduleLocation(DataLocation loc,
+ std::vector<DataLocation>&);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h
new file mode 100644
index 00000000000..e138e17480a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::MapperSlotOperation
+ * \ingroup memfile
+ *
+ * \brief Utility class to wrap const casting.
+ *
+ * The MemFile objects wants to track all changes done to them by clients, such
+ * that they can track internally whether they are altered in memory from the
+ * physical file. Thus, only const MemSlot objects are exposed. If one wants to
+ * alter the MemFile slots one has to call functions in MemFile to do it.
+ *
+ * But the mapper code need to alter some information in the MemFile and MemSlot
+ * objects. For instance, it has to clear altered tag after flushing content to
+ * disk. The mappers thus need to alter the objects in a way regular clients
+ * should not be allowed to.
+ *
+ * To implement this, we use this class, which contains only the functionality
+ * needed by the mappers, and which const cast to let the mapper change the
+ * state it needs to.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+
+namespace storage {
+namespace memfile {
+
+struct MapperSlotOperation : protected Types {
+ static void setFlag(const MemFile& file, uint32_t flags)
+ {
+ const_cast<MemFile&>(file).setFlag(flags);
+ }
+ static void clearFlag(const MemFile& file, uint32_t flags)
+ {
+ const_cast<MemFile&>(file).clearFlag(flags);
+ }
+ static void setFlag(const MemSlot& slot, uint32_t flags)
+ {
+ const_cast<MemSlot&>(slot).setFlag(flags);
+ }
+ static void clearFlag(const MemSlot& slot, uint32_t flags)
+ {
+ const_cast<MemSlot&>(slot).clearFlag(flags);
+ }
+ static void setLocation(const MemSlot& slot, DocumentPart part,
+ const DataLocation& dl)
+ {
+ const_cast<MemSlot&>(slot).setLocation(part, dl);
+ }
+ static void setChecksum(const MemSlot& slot, uint16_t checksum)
+ {
+ const_cast<MemSlot&>(slot).setChecksum(checksum);
+ }
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp
new file mode 100644
index 00000000000..12f7219e2ca
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp
@@ -0,0 +1,1029 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h>
+#include <vespa/memfilepersistence/common/exceptions.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/mapper/locationreadplanner.h>
+#include <vespa/memfilepersistence/mapper/uniqueslotgenerator.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h>
+#include <vespa/vespalib/util/crc.h>
+
+LOG_SETUP(".persistence.memfilev1");
+
+namespace storage {
+namespace memfile {
+
+namespace {
+
+void alignUp(uint32_t& value, uint32_t offset = 0, uint32_t block = 512) {
+ uint32_t blocks = (value + offset + block - 1) / block;
+ value = blocks * block - offset;
+}
+
+int32_t getBufferPos(
+ const DataLocation& location,
+ const std::vector<DataLocation>& locations)
+{
+ uint32_t posNow = 0;
+ for (uint32_t i = 0; i < locations.size(); ++i) {
+ if (locations[i].contains(location)) {
+ return posNow + location._pos - locations[i]._pos;
+ }
+
+ posNow += locations[i]._size;
+ }
+
+ return -1;
+}
+
+}
+
+MemFileV1Serializer::MemFileV1Serializer(ThreadMetricProvider& metricProvider)
+ : _metricProvider(metricProvider)
+{
+}
+
+namespace {
+
+class SlotValidator
+{
+public:
+ SlotValidator(uint32_t headerBlockOffset,
+ uint32_t bodyBlockOffset,
+ uint32_t fileSize)
+ : _headerBlockOffset(headerBlockOffset),
+ _bodyBlockOffset(bodyBlockOffset),
+ _fileSize(fileSize)
+ {
+ }
+
+ bool slotHasValidInformation(const MetaSlot& ms) const {
+ const uint16_t slotCrc(ms.calcSlotChecksum());
+ const bool checksumOk(slotCrc == ms._checksum);
+ return (checksumOk && slotLocationsWithinFileBounds(ms));
+ }
+
+private:
+ bool slotLocationsWithinFileBounds(const MetaSlot& ms) const {
+ // The reason for checking header location bounds against file size
+ // instead of body block offset is that the latter is computed from the
+ // file meta header information and will thus be entirely unaware of
+ // any file truncations.
+ return (_headerBlockOffset + ms._headerPos + ms._headerSize <= _fileSize
+ && _bodyBlockOffset + ms._bodyPos + ms._bodySize <= _fileSize);
+ }
+
+ const uint32_t _headerBlockOffset;
+ const uint32_t _bodyBlockOffset;
+ const uint32_t _fileSize;
+};
+
+}
+
+void
+MemFileV1Serializer::loadFile(MemFile& file, Environment& env,
+ Buffer& buffer, uint64_t bytesRead)
+{
+ SerializationMetrics& metrics(getMetrics().serialization);
+ SimpleMemFileIOBuffer& ioBuf(
+ static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO()));
+
+ vespalib::LazyFile* lf = &ioBuf.getFileHandle();
+
+ assert(file.getSlotCount() == 0);
+ assert(bytesRead >= 64);
+
+ const Header* header(reinterpret_cast<const Header*>(buffer.getBuffer()));
+ if (header->_checksum != header->calcHeaderChecksum()) {
+ std::ostringstream error;
+ error << "Header checksum mismatch. Stored checksum " << std::hex
+ << header->_checksum << " does not match calculated checksum "
+ << header->calcHeaderChecksum();
+ throw CorruptMemFileException(error.str(), file.getFile(), VESPA_STRLOC);
+ }
+ uint32_t headerBlockIndex = sizeof(Header)
+ + header->_metaDataListSize * sizeof(MetaSlot);
+
+ // Read all we need including first header bytes until alignment
+ uint32_t firstAlignedHeaderByte = headerBlockIndex;
+ alignUp(firstAlignedHeaderByte);
+ if (firstAlignedHeaderByte > bytesRead) {
+ framework::MilliSecTimer timer(env._clock);
+ LOG(spam,
+ "Only read %zu of required %u header bytes. "
+ "Resizing buffer and reading remaining data",
+ bytesRead,
+ firstAlignedHeaderByte);
+ buffer.resize(firstAlignedHeaderByte);
+ header = reinterpret_cast<const Header*>(buffer.getBuffer());
+ off_t moreBytesRead = lf->read(
+ buffer + bytesRead,
+ firstAlignedHeaderByte - bytesRead,
+ bytesRead);
+ bytesRead += moreBytesRead;
+ if (bytesRead != firstAlignedHeaderByte) {
+ size_t fileSize = lf->getFileSize();
+ if (firstAlignedHeaderByte > fileSize) {
+ std::ostringstream error;
+ error << "Header indicates file is bigger than it "
+ << "physically is. First aligned byte in header block "
+ << "starts at byte " << firstAlignedHeaderByte
+ << " while file is " << fileSize << " bytes long.";
+ throw CorruptMemFileException(error.str(), file.getFile(), VESPA_STRLOC);
+
+ }
+ assert(bytesRead == firstAlignedHeaderByte);
+ }
+ metrics.tooLargeMetaReadLatency.addValue(timer);
+ }
+
+ FileInfo::UP data(new FileInfo);
+ data->_metaDataListSize = header->_metaDataListSize;
+ data->_headerBlockSize = header->_headerBlockSize;
+ const uint32_t headerBlockOffset(
+ sizeof(Header) + data->_metaDataListSize * sizeof(MetaSlot));
+ const uint32_t bodyBlockOffset = headerBlockOffset + data->_headerBlockSize;
+ const uint32_t fileSize = lf->getFileSize();
+
+ // Avoid underflow in case of truncation.
+ const uint32_t bodyBlockSize(
+ fileSize > bodyBlockOffset ? fileSize - bodyBlockOffset : 0);
+
+ data->_bodyBlockSize = bodyBlockSize;
+ data->_firstHeaderBytes.resize(firstAlignedHeaderByte - headerBlockIndex);
+ memcpy(&data->_firstHeaderBytes[0], buffer.getBuffer() + headerBlockIndex,
+ data->_firstHeaderBytes.size());
+
+ LOG(debug,
+ "File %s header info: metaDataListSize=%u, "
+ "headerBlockSize=%u, bodyBlockSize=%u",
+ file.getFile().getPath().c_str(),
+ data->_metaDataListSize,
+ data->_headerBlockSize,
+ data->_bodyBlockSize);
+
+ ioBuf.setFileInfo(std::move(data));
+
+ uint32_t metaEntriesRead(header->_metaDataListSize);
+ bool foundBadSlot = false;
+ uint32_t lastBadSlot = 0;
+ SlotValidator validator(headerBlockOffset, bodyBlockOffset, fileSize);
+
+ for (uint32_t i = 0; i < metaEntriesRead; ++i) {
+ const MetaSlot* ms(reinterpret_cast<const MetaSlot*>(
+ buffer + sizeof(Header) + i * sizeof(MetaSlot)));
+
+ if (!validator.slotHasValidInformation(*ms)) {
+ foundBadSlot = true;
+ lastBadSlot = i;
+ continue; // Don't add bad slots.
+ }
+
+ if (!ms->inUse()) {
+ break;
+ }
+
+ MemSlot slot(ms->_gid,
+ ms->_timestamp,
+ DataLocation(ms->_headerPos, ms->_headerSize),
+ DataLocation(ms->_bodyPos, ms->_bodySize),
+ ms->_flags,
+ ms->_checksum);
+
+ file.addSlot(slot);
+ }
+
+ // We bail here instead of doing so inside the loop because this allows us
+ // to add all healthy slots to the file prior to throwing the exception.
+ // Any caller code that wants/need to inspect the good slots is then able
+ // to do so. It is not a given that this is a strong requirement; the check
+ // may be moved inside the loop if it can be established that no caller code
+ // expects the good slots to be present after a loadFile exception.
+ if (foundBadSlot) {
+ std::ostringstream error;
+ error << "Found bad slot in file '"
+ << file.getFile().getPath()
+ << "' at slot index " << lastBadSlot
+ << ", forcing repair of file. Details of file "
+ "corruption to follow.";
+ throw CorruptMemFileException(error.str(), file.getFile(),
+ VESPA_STRLOC);
+ }
+
+ file.clearFlag(SLOTS_ALTERED);
+
+ LOG(spam, "After loading file, its state is %s", file.toString(true).c_str());
+}
+
+void
+MemFileV1Serializer::cacheLocationsForPart(SimpleMemFileIOBuffer& cache,
+ DocumentPart part,
+ uint32_t blockIndex,
+ const std::vector<DataLocation>& locationsToCache,
+ const std::vector<DataLocation>& locationsRead,
+ SimpleMemFileIOBuffer::BufferAllocation& buf)
+{
+ vespalib::asciistream error;
+ for (uint32_t i = 0; i < locationsToCache.size(); ++i) {
+ DataLocation loc(locationsToCache[i]);
+ assert(loc.valid());
+
+ if (loc._size == 0) {
+ LOG(spam, "Bailing since location size is 0");
+ continue;
+ }
+
+ loc._pos += blockIndex;
+ int32_t bufferPos = getBufferPos(loc, locationsRead);
+
+ assert(bufferPos != -1);
+
+ MemFileV1Verifier verifier;
+ if (!verifier.verifyBlock(part, locationsToCache[i]._pos,
+ error,
+ buf.getBuffer() + bufferPos,
+ loc._size))
+ {
+ throw CorruptMemFileException(
+ error.str(), cache.getFileSpec(), VESPA_STRLOC);
+ }
+
+ cache.cacheLocation(part,
+ locationsToCache[i],
+ buf.getSharedBuffer(),
+ buf.getBufferPosition() + bufferPos);
+ }
+}
+
+void
+MemFileV1Serializer::cacheLocations(MemFileIOInterface& io,
+ Environment& env,
+ const Options& options,
+ DocumentPart part,
+ const std::vector<DataLocation>& locations)
+{
+ SimpleMemFileIOBuffer& cache(static_cast<SimpleMemFileIOBuffer&>(io));
+
+ const FileInfo& data(cache.getFileInfo());
+ uint32_t blockStartIndex(part == HEADER
+ ? data.getHeaderBlockStartIndex()
+ : data.getBodyBlockStartIndex());
+
+ LOG(spam, "%s: cacheLocations for %s with %zu locations. "
+ "max read-through gap is %u",
+ cache.getFileHandle().getFilename().c_str(),
+ getDocumentPartName(part),
+ locations.size(),
+ options._maximumGapToReadThrough);
+
+ LocationDiskIoPlanner planner(
+ cache,
+ part,
+ locations,
+ options._maximumGapToReadThrough,
+ blockStartIndex);
+
+ if (planner.getIoOperations().empty()) {
+ LOG(spam, "%s: no disk read operations required for %zu %s locations",
+ cache.getFileHandle().getFilename().c_str(),
+ locations.size(),
+ getDocumentPartName(part));
+ return;
+ }
+
+ const std::vector<DataLocation>& readLocations(planner.getIoOperations());
+
+ const size_t bufferSize = planner.getTotalBufferSize();
+ assert(bufferSize % 512 == 0);
+ const SimpleMemFileIOBuffer::SharedBuffer::Alignment align512(
+ SimpleMemFileIOBuffer::SharedBuffer::ALIGN_512_BYTES);
+
+ SimpleMemFileIOBuffer::BufferAllocation buf(
+ cache.allocateBuffer(part, bufferSize, align512));
+ assert(reinterpret_cast<size_t>(buf.getBuffer()) % 512 == 0);
+ LOG(spam,
+ "Allocated %u bytes with offset %u from shared buffer %p "
+ "(of total %zu bytes, %zu bytes used, %zu bytes free)",
+ buf.getSize(),
+ buf.getBufferPosition(),
+ buf.getSharedBuffer().get(),
+ buf.getSharedBuffer()->getSize(),
+ buf.getSharedBuffer()->getUsedSize(),
+ buf.getSharedBuffer()->getFreeSize());
+
+ framework::MilliSecTime readStart(env._clock.getTimeInMillis());
+ SerializationMetrics& metrics(getMetrics().serialization);
+
+ uint64_t total(read(cache.getFileHandle(), buf.getBuffer(), readLocations));
+
+ metrics::LongAverageMetric& latency(part == HEADER ? metrics.headerReadLatency
+ : metrics.bodyReadLatency);
+ metrics::LongAverageMetric& sz(part == HEADER ? metrics.headerReadSize
+ : metrics.bodyReadSize);
+ framework::MilliSecTime readDone(env._clock.getTimeInMillis());
+ latency.addValue((readDone - readStart).getTime());
+ sz.addValue(total);
+
+ cacheLocationsForPart(cache, part, blockStartIndex, locations,
+ readLocations, buf);
+
+ framework::MilliSecTime timeNow(env._clock.getTimeInMillis());
+ metrics.cacheUpdateAndImplicitVerifyLatency.addValue(
+ (timeNow - readDone).getTime());
+}
+
+uint64_t
+MemFileV1Serializer::read(vespalib::LazyFile& file,
+ char* buf,
+ const std::vector<DataLocation>& readOps)
+{
+ uint32_t currPos = 0;
+ uint64_t totalRead = 0;
+
+ for (uint32_t i = 0; i < readOps.size(); i++) {
+ file.read(buf + currPos, readOps[i]._size, readOps[i]._pos);
+ currPos += readOps[i]._size;
+ totalRead += readOps[i]._size;
+ }
+ return totalRead;
+}
+
+void
+MemFileV1Serializer::ensureFormatSpecificDataSet(const MemFile& )
+{
+/*
+ if (file.getFormatSpecificData() == 0) {
+ assert(!file.fileExists());
+ file.setFormatSpecificData(MemFile::FormatSpecificData::UP(new Data));
+ }
+*/
+}
+
+uint32_t
+MemFileV1Serializer::writeMetaData(BufferedFileWriter& writer,
+ const MemFile& file)
+{
+ const SimpleMemFileIOBuffer& ioBuf(
+ static_cast<const SimpleMemFileIOBuffer&>(file.getMemFileIO()));
+ uint32_t lastPos = writer.getFilePosition();
+ const FileInfo& data(ioBuf.getFileInfo());
+
+ // Create the header
+ Header header;
+ header._version = file.getCurrentVersion();
+ header._metaDataListSize = data._metaDataListSize;
+ header._headerBlockSize = data._headerBlockSize;
+ header.updateChecksum();
+ header._fileChecksum = file.getBucketInfo().getChecksum();
+ writer.write(&header, sizeof(Header));
+ for (uint32_t i=0, n=header._metaDataListSize; i<n; ++i) {
+ MetaSlot meta;
+ if (i < file.getSlotCount()) {
+ const MemSlot& slot(file[i]);
+ assert(i == 0 || (file[i].getTimestamp()
+ > file[i-1].getTimestamp()));
+ meta._timestamp = slot.getTimestamp();
+ meta._gid = slot.getGlobalId();
+ meta._flags = slot.getPersistedFlags();
+ meta._headerPos = slot.getLocation(HEADER)._pos;
+ meta._headerSize = slot.getLocation(HEADER)._size;
+ meta._bodyPos = slot.getLocation(BODY)._pos;
+ meta._bodySize = slot.getLocation(BODY)._size;
+ meta.updateChecksum();
+ }
+ writer.write(&meta, sizeof(MetaSlot));
+ }
+ return (writer.getFilePosition() - lastPos);
+}
+
+// TODO: make exception safe
+MemFileV1Serializer::FlushResult
+MemFileV1Serializer::flushUpdatesToFile(MemFile& file, Environment& env)
+{
+ framework::MilliSecTime startTime(env._clock.getTimeInMillis());
+ MemFilePersistenceThreadMetrics& metrics(getMetrics());
+ SerializationWriteMetrics& writeMetrics(metrics.serialization.partialWrite);
+ SimpleMemFileIOBuffer& ioBuf(
+ static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO()));
+ const FileInfo& data(ioBuf.getFileInfo());
+ BucketId bid(file.getFile().getBucketId());
+
+ LOG(spam,
+ "Attempting partial write of file %s",
+ file.getFile().getPath().c_str());
+
+ if (file.getSlotCount() > data._metaDataListSize) {
+ LOG(debug,
+ "Cannot do partial write of file %s as its "
+ "in-memory slot count (%u) is greater than its "
+ "persisted metadata list size (%u)",
+ file.getFile().getPath().c_str(),
+ file.getSlotCount(), data._metaDataListSize);
+ return FlushResult::TooFewMetaEntries;
+ }
+
+ // TODO: replace this with multimap to avoid vector allocations
+ // for every single unique location? Could potentially also use
+ // a Boost.Intrusive rbtree with a pool-based allocation scheme
+ // to avoid multiple allocations even for the nodes themselves.
+ typedef MemFile::LocationMap LocationMap;
+ LocationMap headersToWrite, bodiesToWrite;
+ LocationMap existingHeaders, existingBodies;
+
+ file.getLocations(headersToWrite, bodiesToWrite,
+ NON_PERSISTED_LOCATIONS);
+
+ // We don't need the slot list for this, just using it to find a
+ // gap in the file
+ file.getLocations(existingHeaders, existingBodies,
+ PERSISTED_LOCATIONS | NO_SLOT_LIST);
+
+ // Figure out total size of unwritten data for each part and
+ // whether or not there exists a single continuous gap in the
+ // part's block in which we can fit the data. Also keep track
+ // of the total amount of data we actually use so we can check
+ // if file should be downsized afterwards.
+ uint32_t totalSpaceUsed[2] = { 0 };
+ uint32_t maxUsedExtent[2] = { 0 };
+ uint32_t bytesToWrite[2] = { 0 };
+
+ for (uint32_t partId = 0; partId < 2; ++partId) {
+ DocumentPart part(static_cast<DocumentPart>(partId));
+ LocationMap& unwritten(part == HEADER ? headersToWrite : bodiesToWrite);
+ LocationMap& existing(part == HEADER ? existingHeaders : existingBodies);
+
+ for (LocationMap::iterator it(unwritten.begin()), e(unwritten.end());
+ it != e; ++it)
+ {
+ bytesToWrite[partId] += it->first._size;
+ }
+ alignUp(bytesToWrite[partId]);
+ for (LocationMap::iterator it(existing.begin()), e(existing.end());
+ it != e; ++it)
+ {
+ totalSpaceUsed[partId] += it->first._size;
+ maxUsedExtent[partId] = std::max(maxUsedExtent[partId],
+ it->first._pos + it->first._size);
+ }
+ LOG(spam, "Max used %s extent before align: %u",
+ getDocumentPartName(part),
+ maxUsedExtent[partId]);
+
+ assert(maxUsedExtent[partId] <= data.getBlockSize(part));
+ alignUp(maxUsedExtent[partId]);
+
+ if (maxUsedExtent[partId] > data.getBlockSize(part)
+ || (bytesToWrite[partId]
+ > (data.getBlockSize(part) - maxUsedExtent[partId])))
+ {
+ LOG(debug, "Could not find sufficient free space in %s to "
+ "perform a partial write for %s. Only %u bytes available, "
+ "but need at least %u bytes; rewriting entire file.",
+ getDocumentPartName(part),
+ file.getFile().getPath().c_str(),
+ (data.getBlockSize(part) >= maxUsedExtent[partId]
+ ? data.getBlockSize(part) - maxUsedExtent[partId]
+ : 0),
+ bytesToWrite[partId]);
+ return FlushResult::TooSmall;
+ }
+ }
+ if (LOG_WOULD_LOG(debug)) {
+ for (int partId = 0; partId < 2; ++partId) {
+ DocumentPart part(static_cast<DocumentPart>(partId));
+ LOG(debug,
+ "%s: block %s has totalSpaceUsed=%u, maxUsedExtent=%u "
+ "bytesToWrite=%u blockIndex=%u blockSize=%u",
+ bid.toString().c_str(),
+ getDocumentPartName(part),
+ totalSpaceUsed[part],
+ maxUsedExtent[part],
+ bytesToWrite[part],
+ data.getBlockIndex(part),
+ data.getBlockSize(part));
+ }
+ }
+ // Verify not too much free space. Remember to include bytes to write
+ // currently, and count free space forced added for alignment and to
+ // overrepresent blocks as used.
+ // TODO: are the overrepresent factors correct wrt. new data added?
+ std::shared_ptr<const MemFilePersistenceConfig> memFileCfg;
+ {
+ auto guard = env.acquireConfigReadLock();
+ memFileCfg = guard.memFilePersistenceConfig();
+ }
+ {
+ uint32_t usedSpace = static_cast<uint32_t>(
+ sizeof(Header)
+ + sizeof(MetaSlot) * file.getSlotCount()
+ * memFileCfg->overrepresentMetaDataFactor
+ + totalSpaceUsed[HEADER]
+ * memFileCfg->overrepresentHeaderBlockFactor
+ + totalSpaceUsed[BODY]
+ + bytesToWrite[HEADER]
+ + bytesToWrite[BODY]);
+ alignUp(usedSpace, 0, memFileCfg->fileBlockSize);
+ alignUp(usedSpace, 0, memFileCfg->minimumFileSize);
+ if (double(usedSpace) / data.getFileSize() < memFileCfg->minFillRate) {
+ LOG(debug, "File %s only uses %u of %u bytes (%f %%), which is "
+ "less than min fill rate of %f %%. "
+ "Resizing file to become smaller.",
+ file.getFile().getPath().c_str(),
+ usedSpace, data.getFileSize(),
+ 100.0 * usedSpace / data.getFileSize(),
+ 100.0 * memFileCfg->minFillRate);
+ return FlushResult::TooLarge;
+ }
+ }
+ // At this point, we've checked if we can downsize the file with
+ // a no-go outcome. If there are no altered slots, we can safely
+ // do an early exit here to avoid rewriting metadata needlessly.
+ if (!file.slotsAltered()) {
+ LOG(spam,
+ "No slots in %s altered, returning without writing anything.",
+ bid.toString().c_str());
+ assert(bytesToWrite[HEADER] == 0);
+ assert(bytesToWrite[BODY] == 0);
+ return FlushResult::UnAltered;
+ }
+
+ // Persist dirty locations to disk, updating all slots as we go.
+ // NOTE: it is assumed that the buffered data blocks contain pre-
+ // serialized checksums, document ids etc as appropriate since
+ // we only write the raw data to disk.
+ Buffer buffer(1024 * 1024);
+ BufferedFileWriter writer(ioBuf.getFileHandle(), buffer, buffer.getSize());
+ framework::MilliSecTime locationWriteTime(env._clock.getTimeInMillis());
+
+ for (uint32_t partId = 0; partId < 2; ++partId) {
+ DocumentPart part(static_cast<DocumentPart>(partId));
+ LocationMap& locations(part == HEADER ? headersToWrite : bodiesToWrite);
+
+ uint32_t realPos = data.getBlockIndex(part) + maxUsedExtent[partId];
+ alignUp(realPos);
+ uint32_t pos = realPos - data.getBlockIndex(part);
+
+ LOG(spam,
+ "%s: writing data for part %d, index %d, max "
+ "used extent %d, block size %d",
+ bid.toString().c_str(),
+ part,
+ data.getBlockIndex(part),
+ maxUsedExtent[partId],
+ data.getBlockSize(part));
+
+ writer.setFilePosition(realPos);
+ for (LocationMap::iterator it(locations.begin()), e(locations.end());
+ it != e; ++it)
+ {
+ uint32_t size = it->first._size;
+ writer.write(ioBuf.getBuffer(it->first, part), size);
+ DataLocation newSlotLocation(pos, size);
+ ioBuf.persist(part, it->first, newSlotLocation);
+
+ LOG(spam,
+ "%s: wrote location %d,%d to disk, resulting location was %d,%d",
+ bid.toString().c_str(),
+ it->first._pos,
+ it->first._size,
+ newSlotLocation._pos,
+ newSlotLocation._size);
+
+ std::vector<const MemSlot*>& slots(it->second.slots);
+ for (uint32_t j = 0; j < slots.size(); ++j) {
+ LOG(spam, "%s: setting %s location for slot %s to %u,%u",
+ bid.toString().c_str(),
+ getDocumentPartName(part),
+ slots[j]->toString().c_str(),
+ newSlotLocation._pos,
+ newSlotLocation._size);
+ MapperSlotOperation::setLocation(*slots[j], part, newSlotLocation);
+ }
+ pos += size;
+ }
+ pos = writer.getFilePosition();
+ alignUp(pos);
+ assert(part == BODY || pos <= data.getBlockIndex(BODY));
+ writer.writeGarbage(pos - writer.getFilePosition());
+
+ framework::MilliSecTime timeNow(env._clock.getTimeInMillis());
+ metrics::LongAverageMetric& latency(part == HEADER ? writeMetrics.headerLatency
+ : writeMetrics.bodyLatency);
+ metrics::LongAverageMetric& sz(part == HEADER ? writeMetrics.headerSize
+ : writeMetrics.bodySize);
+ latency.addValue((timeNow - locationWriteTime).getTime());
+ sz.addValue(bytesToWrite[part]);
+ locationWriteTime = timeNow;
+ }
+
+ // Write metadata back to file
+ writer.setFilePosition(0);
+ writeMetaData(writer, file);
+ writer.write(&data._firstHeaderBytes[0], data._firstHeaderBytes.size());
+ writer.flush();
+ MapperSlotOperation::clearFlag(file, SLOTS_ALTERED);
+
+ framework::MilliSecTime finishTime(env._clock.getTimeInMillis());
+ writeMetrics.metaLatency.addValue((finishTime - locationWriteTime).getTime());
+ writeMetrics.metaSize.addValue(writer.getFilePosition());
+ writeMetrics.totalLatency.addValue((finishTime - startTime).getTime());
+ return FlushResult::ChangesWritten;
+}
+
+namespace {
+ uint32_t
+ getMetaSlotCount(uint32_t usedSlotCount,
+ const FileSpecification& file,
+ const MemFilePersistenceConfig& cfg,
+ const Options& options)
+ {
+ uint32_t wanted = static_cast<uint32_t>(
+ usedSlotCount * options._growFactor
+ * options._overrepresentMetaDataFactor);
+ if (wanted < uint32_t(cfg.minimumFileMetaSlots)) {
+ wanted = cfg.minimumFileMetaSlots;
+ }
+ if (wanted > uint32_t(cfg.maximumFileMetaSlots)) {
+ if (uint32_t(cfg.maximumFileMetaSlots) >= usedSlotCount) {
+ wanted = cfg.maximumFileMetaSlots;
+ } else {
+ std::ostringstream ost;
+ ost << "Need " << usedSlotCount << " slots and want "
+ << wanted << " slots in file, but max slots is "
+ << cfg.maximumFileMetaSlots;
+ throw MemFileIoException(
+ ost.str(), file, MemFileIoException::FILE_FULL,
+ VESPA_STRLOC);
+ }
+ }
+ return wanted;
+ }
+
+ uint32_t
+ getHeaderBlockSize(uint32_t minBytesNeeded,
+ uint32_t startBlockIndex,
+ const FileSpecification& file,
+ const MemFilePersistenceConfig& cfg,
+ const Options& options)
+ {
+ uint32_t wanted = static_cast<uint32_t>(
+ minBytesNeeded * options._growFactor
+ * options._overrepresentHeaderBlockFactor);
+ if (wanted < uint32_t(cfg.minimumFileHeaderBlockSize)) {
+ wanted = cfg.minimumFileHeaderBlockSize;
+ }
+ if (wanted > uint32_t(cfg.maximumFileHeaderBlockSize)) {
+ if (uint32_t(cfg.maximumFileHeaderBlockSize)
+ >= minBytesNeeded)
+ {
+ wanted = cfg.maximumFileHeaderBlockSize;
+ } else {
+ std::ostringstream ost;
+ ost << "Need " << minBytesNeeded << " header bytes and want "
+ << wanted << " header bytes in file, but max is "
+ << cfg.maximumFileHeaderBlockSize;
+ throw MemFileIoException(
+ ost.str(), file, MemFileIoException::FILE_FULL,
+ VESPA_STRLOC);
+ }
+ }
+ alignUp(wanted, startBlockIndex);
+ return wanted;
+ }
+
+ uint32_t
+ getBodyBlockSize(uint32_t minBytesNeeded,
+ uint32_t startBlockIndex,
+ const FileSpecification& file,
+ const MemFilePersistenceConfig& cfg,
+ const Options& options)
+ {
+ assert(startBlockIndex % 512 == 0);
+ uint32_t wanted = static_cast<uint32_t>(
+ minBytesNeeded * options._growFactor);
+ if (wanted + startBlockIndex < uint32_t(cfg.minimumFileSize)) {
+ wanted = cfg.minimumFileSize - startBlockIndex;
+ }
+ if (wanted + startBlockIndex > uint32_t(cfg.maximumFileSize)) {
+ if (uint32_t(cfg.maximumFileSize)
+ >= minBytesNeeded + startBlockIndex)
+ {
+ wanted = cfg.maximumFileSize - startBlockIndex;
+ } else {
+ std::ostringstream ost;
+ ost << "Need " << minBytesNeeded << " body bytes and want "
+ << wanted << " body bytes in file, but max is "
+ << (cfg.maximumFileSize - startBlockIndex)
+ << " as the body block starts at index " << startBlockIndex;
+ throw MemFileIoException(
+ ost.str(), file, MemFileIoException::FILE_FULL,
+ VESPA_STRLOC);
+ }
+ }
+ alignUp(wanted, startBlockIndex, cfg.fileBlockSize);
+ return wanted;
+ }
+
+ struct TempCache : public BufferedFileWriter::Cache {
+ uint32_t _headerBlockIndex;
+ std::vector<char> _buffer;
+
+ TempCache(uint32_t headerBlockIndex)
+ : _headerBlockIndex(headerBlockIndex),
+ _buffer()
+ {
+ uint32_t firstAligned = _headerBlockIndex;
+ alignUp(firstAligned);
+ _buffer.resize(firstAligned - _headerBlockIndex);
+ }
+
+ virtual uint32_t getCachedAmount() const
+ { return _buffer.size() + _headerBlockIndex; }
+
+ virtual char* getCache(uint32_t pos) {
+ // We should never get requests to write prior to header block
+ // index.
+ assert(pos >= _headerBlockIndex);
+ return (&_buffer[0] + (pos - _headerBlockIndex));
+ }
+
+ virtual bool duplicateCacheWrite() const { return true; }
+
+ virtual void setData(const char* data, size_t len, uint64_t pos) {
+ if (pos < _headerBlockIndex) {
+ if (len <= _headerBlockIndex - pos) return;
+ uint32_t diff = (_headerBlockIndex - pos);
+ len -= diff;
+ pos += diff;
+ data += diff;
+ }
+ Cache::setData(data, len, pos);
+ }
+ };
+
+}
+
+// Iterate and write locations in timestamp order. Keep track of what
+// locations have already been written and what their new location
+// is in the rewritten file. Returns total number of bytes written
+// for all unique locations. Modifies slot locations in-place in MemFile.
+uint32_t
+MemFileV1Serializer::writeAndUpdateLocations(
+ MemFile& file,
+ SimpleMemFileIOBuffer& ioBuf,
+ BufferedFileWriter& writer,
+ DocumentPart part,
+ const MemFile::LocationMap& locationsToWrite,
+ const Environment& env)
+{
+ framework::MilliSecTimer timer(env._clock);
+ BucketId bid(file.getFile().getBucketId());
+ std::map<DataLocation, DataLocation> writtenLocations;
+ uint32_t index = 0;
+ for (uint32_t i = 0; i < file.getSlotCount(); ++i) {
+ const MemSlot& slot(file[i]);
+
+ DataLocation originalLoc(slot.getLocation(part));
+ if (originalLoc._size == 0) {
+ LOG(spam, "Slot %s has empty %s, not writing anything",
+ slot.toString().c_str(),
+ getDocumentPartName(part));
+ assert(originalLoc._pos == 0);
+ continue;
+ }
+
+ MemFile::LocationMap::const_iterator it(
+ locationsToWrite.find(originalLoc));
+ assert(it != locationsToWrite.end());
+ std::map<DataLocation, DataLocation>::iterator written(
+ writtenLocations.find(originalLoc));
+
+ DataLocation loc;
+ if (written == writtenLocations.end()) {
+ uint32_t size = it->first._size;
+ loc = DataLocation(index, size);
+
+ LOG(spam, "%s: writing %s for slot %s to location (%u, %u)",
+ file.getFile().getBucketId().toString().c_str(),
+ getDocumentPartName(part),
+ slot.toString().c_str(),
+ index, size);
+
+ writer.write(ioBuf.getBuffer(originalLoc, part), size);
+ index += size;
+ writtenLocations[originalLoc] = loc;
+ } else {
+ LOG(spam, "%s: %s already written for slot %s; "
+ "updating to location (%u, %u)",
+ file.getFile().getBucketId().toString().c_str(),
+ getDocumentPartName(part),
+ slot.toString().c_str(),
+ written->second._pos, written->second._size);
+ loc = written->second;
+ }
+ assert(loc.valid());
+ MapperSlotOperation::setLocation(slot, part, loc);
+ }
+ // Move in cache. Cannot be done inside loop.
+ ioBuf.remapAndPersistAllLocations(part, writtenLocations);
+
+ SerializationWriteMetrics& writeMetrics(
+ getMetrics().serialization.fullWrite);
+ metrics::LongAverageMetric& latency(part == HEADER ? writeMetrics.headerLatency
+ : writeMetrics.bodyLatency);
+ metrics::LongAverageMetric& sz(part == HEADER ? writeMetrics.headerSize
+ : writeMetrics.bodySize);
+ latency.addValue(timer);
+ sz.addValue(index); // Equal to written size.
+
+ return index;
+}
+
+void
+MemFileV1Serializer::rewriteFile(MemFile& file, Environment& env)
+{
+ framework::MilliSecTime startTime(env._clock.getTimeInMillis());
+ SerializationWriteMetrics& writeMetrics(
+ getMetrics().serialization.fullWrite);
+ file.ensureHeaderAndBodyBlocksCached();
+
+ SimpleMemFileIOBuffer& ioBuf(
+ static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO()));
+
+ const FileSpecification& oldSpec(file.getFile());
+ std::string newPath = oldSpec.getPath() + ".new";
+
+ LOG(debug, "Rewriting entire file %s", oldSpec.getPath().c_str());
+ ioBuf.getFileHandle().close();
+ vespalib::LazyFile::UP newFile = env.createFile(newPath);
+ newFile->open(ioBuf.getFileHandle().getFlags()
+ | vespalib::File::CREATE | vespalib::File::TRUNC, true);
+ MapperSlotOperation::setFlag(file, FILE_EXIST);
+
+ FileInfo::UP data(new FileInfo);
+ Buffer buffer(32 * 1024 * 1024);
+ BufferedFileWriter writer(*newFile, buffer, buffer.getSize());
+
+ std::shared_ptr<const MemFilePersistenceConfig> memFileCfg;
+ std::shared_ptr<const Options> options;
+ {
+ auto guard = env.acquireConfigReadLock();
+ memFileCfg = guard.memFilePersistenceConfig();
+ options = guard.options();
+ }
+
+ // Create the header
+ Header header;
+ header._version = getFileVersion();
+ header._metaDataListSize = getMetaSlotCount(
+ file.getSlotCount(), file.getFile(), *memFileCfg, *options);
+ data->_metaDataListSize = header._metaDataListSize;
+ header._fileChecksum = file.getBucketInfo().getChecksum();
+
+ // Dump header and metadata to writer, so we can start writing header
+ // and bodies. If buffer is too small causing this to be written, we
+ // need to write it again after updating it.
+ writer.write(&header, sizeof(Header));
+ LOG(spam, "Writing garbage for %u meta entries",
+ header._metaDataListSize);
+ writer.writeGarbage(sizeof(MetaSlot) * header._metaDataListSize);
+
+ TempCache tempCache(writer.getFilePosition());
+ writer.setMemoryCache(&tempCache);
+
+ typedef MemFile::LocationMap LocationMap;
+ LocationMap headersToWrite, bodiesToWrite;
+ // Don't need the slot list, we update that implicitly
+ file.getLocations(headersToWrite, bodiesToWrite,
+ PERSISTED_LOCATIONS
+ | NON_PERSISTED_LOCATIONS
+ | NO_SLOT_LIST);
+
+ uint32_t headerIndex = writeAndUpdateLocations(
+ file, ioBuf, writer, HEADER, headersToWrite, env);
+
+ header._headerBlockSize = getHeaderBlockSize(
+ headerIndex,
+ data->getHeaderBlockStartIndex(),
+ file.getFile(),
+ *memFileCfg,
+ *options);
+ header._checksum = header.calcHeaderChecksum();
+ data->_headerBlockSize = header._headerBlockSize;
+
+ if (headerIndex < header._headerBlockSize) {
+ LOG(spam, "Writing %u bytes of header garbage filler",
+ header._headerBlockSize - headerIndex);
+ writer.writeGarbage(header._headerBlockSize - headerIndex);
+ }
+
+ uint32_t bodyIndex = writeAndUpdateLocations(
+ file, ioBuf, writer, BODY, bodiesToWrite, env);
+
+ data->_bodyBlockSize = getBodyBlockSize(
+ bodyIndex,
+ data->getBodyBlockStartIndex(),
+ file.getFile(),
+ *memFileCfg,
+ *options);
+ if (bodyIndex < data->_bodyBlockSize) {
+ writer.writeGarbage(data->_bodyBlockSize - bodyIndex);
+ }
+
+ framework::MilliSecTime timeBeforeMetaWrite(env._clock.getTimeInMillis());
+ // Update meta entries
+ std::vector<MetaSlot> writeSlots(header._metaDataListSize);
+
+ for (uint32_t i = 0; i < file.getSlotCount(); ++i) {
+ const MemSlot& slot(file[i]);
+ MetaSlot& meta(writeSlots[i]);
+
+ DataLocation headerLoc = slot.getLocation(HEADER);
+ assert(headerLoc.valid());
+ DataLocation bodyLoc = slot.getLocation(BODY);
+ assert(bodyLoc.valid());
+ assert(i == 0 || (file[i].getTimestamp() > file[i - 1].getTimestamp()));
+
+ meta._timestamp = slot.getTimestamp();
+ meta._gid = slot.getGlobalId();
+ meta._flags = slot.getPersistedFlags();
+ meta._headerPos = headerLoc._pos;
+ meta._headerSize = headerLoc._size;
+ meta._bodyPos = bodyLoc._pos;
+ meta._bodySize = bodyLoc._size;
+ assert(meta.inUse());
+
+ meta.updateChecksum();
+ MapperSlotOperation::setChecksum(slot, meta._checksum);
+ }
+
+ if (writer.getWriteCount() != 0) {
+ // If we didn't have large enough buffer to hold entire file, reposition
+ // to start to write meta data after updates.
+ writer.setFilePosition(0);
+ writer.write(&header, sizeof(Header));
+ writer.write(&writeSlots[0], writeSlots.size() * sizeof(MetaSlot));
+ writer.write(&tempCache._buffer[0], tempCache._buffer.size());
+ } else {
+ // Otherwise, just update the content in the write buffer.
+ memcpy(buffer, &header, sizeof(Header));
+ memcpy(buffer + sizeof(Header),
+ &writeSlots[0], writeSlots.size() * sizeof(MetaSlot));
+ }
+
+ writer.flush();
+ data->_firstHeaderBytes.swap(tempCache._buffer);
+ int64_t sizeDiff = 0;
+ if (file.getFormatSpecificData() != 0) {
+ sizeDiff = ioBuf.getFileInfo().getFileSize();
+ }
+ sizeDiff = static_cast<int64_t>(data->getFileSize()) - sizeDiff;
+
+ //file.setFormatSpecificData(MemFile::FormatSpecificData::UP(data.release()));
+ ioBuf.setFileInfo(std::move(data));
+ file.setCurrentVersion(TRADITIONAL_SLOTFILE);
+ newFile->close();
+ vespalib::rename(newPath, oldSpec.getPath());
+
+ ioBuf.getFileHandle().open(
+ ioBuf.getFileHandle().getFlags(),
+ true);
+
+ // Update partitionmonitor with size usage.
+ PartitionMonitor* partitionMonitor(
+ file.getFile().getDirectory().getPartition().getMonitor());
+ if (partitionMonitor == 0) {
+ // Only report if monitor exist.
+ } else if (sizeDiff > 0) {
+ partitionMonitor->addingData(static_cast<uint32_t>(sizeDiff));
+ } else if (sizeDiff < 0) {
+ partitionMonitor->removingData(static_cast<uint32_t>(-1 * sizeDiff));
+ }
+ MapperSlotOperation::clearFlag(file, SLOTS_ALTERED);
+
+ framework::MilliSecTime timeAfterMetaWrite(env._clock.getTimeInMillis());
+ writeMetrics.metaLatency.addValue((timeAfterMetaWrite - timeBeforeMetaWrite).getTime());
+ writeMetrics.metaSize.addValue(sizeof(MetaSlot) * header._metaDataListSize);
+ writeMetrics.totalLatency.addValue((timeAfterMetaWrite - startTime).getTime());
+}
+
+bool
+MemFileV1Serializer::verify(MemFile& file, Environment& env,
+ std::ostream& reportStream,
+ bool repairErrors, uint16_t fileVerifyFlags)
+{
+ MemFileV1Verifier verifier;
+ SerializationMetrics& metrics(getMetrics().serialization);
+ framework::MilliSecTimer timer(env._clock);
+
+ bool ok(verifier.verify(file, env, reportStream, repairErrors, fileVerifyFlags));
+
+ metrics.verifyLatency.addValue(timer);
+ return ok;
+}
+
+}
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h
new file mode 100644
index 00000000000..bc1bdd902b9
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/mapper/bufferedfilewriter.h>
+#include <vespa/memfilepersistence/mapper/versionserializer.h>
+#include <vespa/memfilepersistence/mapper/fileinfo.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <vespa/memfilepersistence/common/environment.h>
+#include <vespa/memfilepersistence/spi/threadmetricprovider.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFileV1Serializer : public VersionSerializer
+{
+ ThreadMetricProvider& _metricProvider;
+ MemFilePersistenceThreadMetrics& getMetrics() {
+ return _metricProvider.getMetrics();
+ }
+public:
+ typedef vespalib::LinkedPtr<MemFileV1Serializer> LP;
+
+ MemFileV1Serializer(ThreadMetricProvider&);
+
+ virtual FileVersion getFileVersion() { return TRADITIONAL_SLOTFILE; }
+
+ virtual void loadFile(MemFile& file, Environment&,
+ Buffer& buffer, uint64_t bytesRead);
+
+ void cacheLocationsForPart(SimpleMemFileIOBuffer& cache,
+ DocumentPart part,
+ uint32_t blockIndex,
+ const std::vector<DataLocation>& locationsToCache,
+ const std::vector<DataLocation>& locationsRead,
+ SimpleMemFileIOBuffer::BufferAllocation& buf);
+
+ virtual void cacheLocations(MemFileIOInterface& cache,
+ Environment& env,
+ const Options& options,
+ DocumentPart part,
+ const std::vector<DataLocation>& locations);
+
+ virtual FlushResult flushUpdatesToFile(MemFile&, Environment&);
+
+ virtual void rewriteFile(MemFile&, Environment&);
+
+ virtual bool verify(MemFile&, Environment&,
+ std::ostream& errorReport, bool repairErrors,
+ uint16_t fileVerifyFlags);
+
+ uint64_t read(vespalib::LazyFile& file,
+ char* buf,
+ const std::vector<DataLocation>& readOps);
+
+ void ensureFormatSpecificDataSet(const MemFile& file);
+
+ uint32_t writeMetaData(BufferedFileWriter& writer,
+ const MemFile& file);
+
+ uint32_t writeAndUpdateLocations(
+ MemFile& file,
+ SimpleMemFileIOBuffer& ioBuf,
+ BufferedFileWriter& writer,
+ DocumentPart part,
+ const MemFile::LocationMap& locationsToWrite,
+ const Environment& env);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp
new file mode 100644
index 00000000000..223292ada2c
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp
@@ -0,0 +1,698 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <vespa/memfilepersistence/mapper/buffer.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".persistence.memfilev1.verifier");
+
+namespace storage {
+
+namespace memfile {
+
+namespace {
+
+void alignUp(uint32_t& value, uint32_t offset = 0, uint32_t block = 512) {
+ uint32_t blocks = (value + offset + block - 1) / block;
+ value = blocks * block - offset;
+}
+
+struct TimestampSlotOrder
+ : public std::binary_function<MetaSlot*,
+ MetaSlot*, bool>
+{
+ bool operator()(const MetaSlot* slot1,
+ const MetaSlot* slot2) const
+ {
+ return (slot1->_timestamp < slot2->_timestamp);
+ }
+};
+
+struct HeaderSlotOrder
+ : public std::binary_function<MetaSlot*,
+ MetaSlot*, bool>
+{
+ bool operator()(const MetaSlot* slot1,
+ const MetaSlot* slot2) const
+ {
+ if (slot1->_headerPos == slot2->_headerPos) {
+ return (slot1->_headerSize < slot2->_headerSize);
+ }
+ return (slot1->_headerPos < slot2->_headerPos);
+ }
+};
+
+struct BodySlotOrder
+ : public std::binary_function<MetaSlot*,
+ MetaSlot*, bool>
+{
+ bool operator()(const MetaSlot* slot1,
+ const MetaSlot* slot2) const
+ {
+ if (slot1->_bodyPos == slot2->_bodyPos) {
+ return (slot1->_bodySize < slot2->_bodySize);
+ }
+ return (slot1->_bodyPos < slot2->_bodyPos);
+ }
+};
+
+uint32_t calculateChecksum(const void* pos, uint32_t size) {
+ vespalib::crc_32_type calculator;
+ calculator.process_bytes(pos, size);
+ return calculator.checksum();
+}
+
+template<typename T>
+bool verifyBodyBlock(const T& id, vespalib::asciistream & error,
+ const char* data, uint32_t size)
+{
+ uint32_t bodyLen = size - sizeof(uint32_t);
+ const char* bodyCrcPos = data + bodyLen;
+ const uint32_t bodyCrc = *reinterpret_cast<const uint32_t*>(bodyCrcPos);
+ uint32_t calculatedChecksum = calculateChecksum(data, bodyLen);
+ if (calculatedChecksum != bodyCrc) {
+ error << "Body checksum mismatch for " << id
+ << ": Stored checksum is 0x" << std::hex << bodyCrc
+ << " while calculated one is 0x" << calculatedChecksum << ".";
+ return false;
+ }
+ return true;
+}
+
+template<typename T>
+bool verifyHeaderBlock(const T& id, vespalib::asciistream & error,
+ const char* data, uint32_t size,
+ Types::DocumentId* documentId = 0)
+{
+ if (size <= 3 * sizeof(uint32_t)) {
+ error << "Error in header for " << id << ": " << size << " byte "
+ << "header is too small to contain required data.";
+ return false;
+ }
+ const char* nameCrcPos = data + size - sizeof(uint32_t);
+ const uint32_t nameCrc = *reinterpret_cast<const uint32_t*>(nameCrcPos);
+ const char* nameLenPos = nameCrcPos - sizeof(uint32_t);
+ const uint32_t nameLen = *reinterpret_cast<const uint32_t*>(nameLenPos);
+ if (size < 3 * sizeof(uint32_t) + nameLen) {
+ error << "Error in header for " << id << ": " << size << " byte "
+ << "header is not big enough to contain a document "
+ << "identifier " << nameLen << " bytes long.";
+ return false;
+ }
+ const char *namePos = nameLenPos - nameLen;
+ uint32_t calculatedNameCrc(
+ calculateChecksum(namePos, nameLen + sizeof(uint32_t)));
+ if (calculatedNameCrc != nameCrc) {
+ error << "Document identifier checksum mismatch for " << id
+ << ": Stored checksum is 0x" << std::hex << nameCrc
+ << " while calculated one is 0x" << calculatedNameCrc << ".";
+ return false;
+ }
+ const char* blobCrcPos = namePos - sizeof(uint32_t);
+ const uint32_t blobCrc = *reinterpret_cast<const uint32_t*>(blobCrcPos);
+ uint32_t blobLen = size - nameLen - 3 * sizeof(uint32_t);
+ uint32_t calculatedChecksum = calculateChecksum(data, blobLen);
+ if (calculatedChecksum != blobCrc) {
+ error << "Header checksum mismatch for " << id
+ << ": Stored checksum is 0x" << std::hex << blobCrc
+ << " while calculated one is 0x" << calculatedChecksum << ".";
+ return false;
+ }
+ if (documentId != 0) {
+ *documentId = Types::DocumentId(Types::String(namePos, nameLen));
+ }
+ return true;
+}
+
+}
+
+// Utility classes for simplifying creating report from verify
+struct MemFileV1Verifier::ReportCreator {
+ bool _ok;
+ const MemFile& _file;
+ std::ostream& _report;
+
+ ReportCreator(const MemFile& file, std::ostream& out)
+ : _ok(true), _file(file), _report(out) {}
+
+ void addMessage(const std::string& msg) {
+ LOG(warning, "verify(%s): %s",
+ _file.getFile().getPath().c_str(), msg.c_str());
+ _report << msg << "\n";
+ _ok = false;
+ }
+};
+
+namespace {
+ struct ReportMessage {
+ MemFileV1Verifier::ReportCreator& _report;
+ mutable std::ostringstream _ost;
+
+ ReportMessage(MemFileV1Verifier::ReportCreator& rc)
+ : _report(rc), _ost() {}
+ ~ReportMessage() {
+ _report.addMessage(_ost.str());
+ }
+ // Copy constructor must exist for compiler not to complain
+ ReportMessage(const ReportMessage& o) : _report(o._report), _ost() {}
+ };
+
+ std::ostream& getReportStream(const ReportMessage& m) { return m._ost; }
+}
+
+#define REPORT(report) getReportStream(ReportMessage(report))
+
+bool
+MemFileV1Verifier::verifyBlock(Types::DocumentPart part,
+ uint32_t id,
+ vespalib::asciistream & error,
+ const char* data, uint32_t size)
+{
+ return (part == Types::HEADER
+ ? verifyHeaderBlock(id, error, data, size)
+ : verifyBodyBlock(id, error, data, size));
+}
+
+const Header*
+MemFileV1Verifier::verifyHeader(ReportCreator& report,
+ const Buffer& buffer, size_t fileSize) const
+{
+ const Header& header = *reinterpret_cast<const Header*>(buffer.getBuffer());
+ if (header._checksum != header.calcHeaderChecksum()) {
+ REPORT(report) << "Header checksum mismatch. Was " << std::hex
+ << header.calcHeaderChecksum() << ", stored "
+ << header._checksum;
+ return 0;
+ }
+ FileInfo data(header, fileSize);
+ if (data.getBodyBlockStartIndex() > fileSize) {
+ REPORT(report) << "Header indicates file is bigger than it physically "
+ << "is. File size is " << fileSize << " bytes, but "
+ << "header reports that it contains "
+ << header._metaDataListSize
+ << " meta data entries and a headerblock of "
+ << header._headerBlockSize << " bytes, thus the minimum "
+ << "file size is "
+ << (header._metaDataListSize * sizeof(MetaSlot)
+ + sizeof(Header) + header._headerBlockSize);
+ return 0;
+ }
+ return &header;
+}
+
+bool
+MemFileV1Verifier::verifyDocumentBody(
+ ReportCreator& report, const MetaSlot& slot, const Buffer& buffer,
+ uint32_t blockIndex, uint32_t blockSize) const
+{
+ if (slot._bodySize == 0) return true;
+ if (slot._bodyPos > blockSize ||
+ slot._bodyPos + slot._bodySize > blockSize ||
+ slot._bodyPos + slot._bodySize < slot._bodyPos)
+ {
+ REPORT(report) << slot << " has body size/pos not contained within "
+ << "body block of size " << blockSize << ".";
+ return false;
+ }
+ if (slot._bodySize <= sizeof(uint32_t)) {
+ REPORT(report) << slot << " body is not big enough to possibly "
+ << "contain a body.";
+ return false;
+ }
+ vespalib::asciistream error;
+ if (!verifyBodyBlock(slot, error,
+ buffer.getBuffer() + blockIndex + slot._bodyPos,
+ slot._bodySize))
+ {
+ REPORT(report) << error.str();
+ return false;
+ }
+ return true;
+}
+
+void
+MemFileV1Verifier::verifyMetaDataBlock(
+ ReportCreator& report, const Buffer& buffer,
+ const Header& header, const BucketInfo& info,
+ std::vector<const MetaSlot*>& slots) const
+{
+ assert(slots.size() == 0);
+ slots.reserve(header._metaDataListSize);
+ Timestamp lastTimestamp(0);
+ bool foundNotInUse = false;
+ bool foundUsedAfterUnused = false;
+ bool wrongOrder = false;
+ for (uint32_t i=0, n=header._metaDataListSize; i<n; ++i) {
+ const MetaSlot& slot(*reinterpret_cast<const MetaSlot*>(
+ buffer.getBuffer() + sizeof(Header) + i * sizeof(MetaSlot)));
+ if (slot._checksum != slot.calcSlotChecksum()) {
+ REPORT(report) << "Slot " << i << " at timestamp "
+ << slot._timestamp << " failed checksum "
+ << "verification. Was " << std::hex
+ << slot.calcSlotChecksum()
+ << ", stored " << slot._checksum;
+ continue;
+ }
+ if (!slot.inUse()) {
+ foundNotInUse = true;
+ continue;
+ }
+ if (foundNotInUse) {
+ if (!foundUsedAfterUnused) {
+ REPORT(report) << "Slot " << i << " found after unused entries";
+ }
+ foundUsedAfterUnused = true;
+ }
+ // Handle timestamp collisions later
+ if (slot._timestamp < lastTimestamp) {
+ wrongOrder = true;
+ REPORT(report) << "Slot " << i << " is out of timestamp order. ("
+ << slot._timestamp << " <= " << lastTimestamp
+ << ")";
+ }
+ slots.push_back(&slot);
+ lastTimestamp = slot._timestamp;
+ }
+ if (info.getChecksum() != header._fileChecksum) {
+ REPORT(report) << "File checksum should have been 0x" << std::hex
+ << info.getChecksum() << " according to metadata found, but is set "
+ << "to 0x" << header._fileChecksum << ".";
+ }
+ if (wrongOrder) {
+ std::sort(slots.begin(), slots.end(), TimestampSlotOrder());
+ }
+}
+
+void
+MemFileV1Verifier::verifyInBounds(
+ ReportCreator& report, const Header& header, bool doHeader,
+ const FileInfo& data, std::vector<const MetaSlot*>& slots) const
+{
+ // Gather all information different for header and body parts,
+ // to avoid differences further down.
+ uint32_t blockSize = (doHeader ? header._headerBlockSize
+ : data._bodyBlockSize);
+ uint32_t minSize = (doHeader ? 3*sizeof(uint32_t) : 0);
+ std::string part(doHeader ? "Header" : "Body");
+ std::vector<const MetaSlot*> okSlots;
+ okSlots.reserve(slots.size());
+ // Go through all slots ordered, and remove illegal ones.
+ for (uint32_t i=0, n=slots.size(); i<n; ++i) {
+ uint32_t pos(doHeader ? slots[i]->_headerPos : slots[i]->_bodyPos);
+ uint32_t size(doHeader ? slots[i]->_headerSize : slots[i]->_bodySize);
+ if (size < minSize) {
+ REPORT(report) << part << " of slot (" << *slots[i] << ") "
+ << "is too small to be valid";
+ } else if (size != 0 &&
+ (pos >= blockSize || pos + size > blockSize ||
+ pos + size < pos)) // 3 checks as + can overflow
+ {
+ REPORT(report) << part << " of slot (" << *slots[i] << ") goes out "
+ << "of bounds. (Blocksize " << blockSize << ")";
+ } else if (size == 0 && pos != 0) {
+ REPORT(report) << part << " of slot (" << *slots[i] << ") "
+ << "has size 0 but is not positioned at pos 0 "
+ << "as zero sized blocks should be";
+ } else {
+ okSlots.push_back(slots[i]);
+ }
+ }
+ okSlots.swap(slots);
+}
+
+void
+MemFileV1Verifier::verifyDataBlock(
+ ReportCreator& report, Environment& env, const Buffer& buffer,
+ const FileInfo& data, const BucketId& bucket,
+ std::vector<const MetaSlot*>& slots, bool doHeader) const
+{
+ std::vector<const MetaSlot*> okSlots;
+ okSlots.reserve(slots.size());
+ for (uint32_t i=0, n=slots.size(); i<n; ++i) {
+ if (!doHeader && slots[i]->_bodySize == 0) {
+ okSlots.push_back(slots[i]);
+ continue;
+ }
+ if (doHeader) {
+ DocumentId id;
+ if (!verifyDocumentHeader(report, *slots[i], buffer, id,
+ data.getHeaderBlockStartIndex(),
+ data._headerBlockSize))
+ {
+ continue;
+ }
+ BucketId foundBucket(env._bucketFactory.getBucketId(id));
+ foundBucket.setUsedBits(bucket.getUsedBits());
+ foundBucket = foundBucket.stripUnused();
+ if (id.getGlobalId() != slots[i]->_gid) {
+ REPORT(report) << *slots[i]
+ << " has gid " << slots[i]->_gid
+ << " but its header block contains document id "
+ << id << " with " << id.getGlobalId();
+ }
+ else if (bucket == foundBucket) {
+ okSlots.push_back(slots[i]);
+ } else {
+ REPORT(report) << "Slot " << *slots[i]
+ << " belongs to bucket " << foundBucket
+ << " not in bucket " << bucket;
+ }
+ } else {
+ if (!verifyDocumentBody(report, *slots[i], buffer,
+ data.getBodyBlockStartIndex(),
+ data._bodyBlockSize))
+ {
+ continue;
+ }
+ okSlots.push_back(slots[i]);
+ }
+ }
+ slots.swap(okSlots);
+}
+
+bool
+MemFileV1Verifier::verifyDocumentHeader(
+ ReportCreator& report, const MetaSlot& slot, const Buffer& buffer,
+ DocumentId& did, uint32_t blockIndex, uint32_t blockSize) const
+{
+ if (slot._headerPos > blockSize ||
+ slot._headerPos + slot._headerSize > blockSize ||
+ slot._headerPos + slot._headerSize < slot._headerPos)
+ {
+ REPORT(report) << slot << " has header size/pos not contained within "
+ << "header block of size " << blockSize << ".";
+ return false;
+ }
+ vespalib::asciistream error;
+ if (!verifyHeaderBlock(slot, error,
+ buffer.getBuffer() + blockIndex + slot._headerPos,
+ slot._headerSize, &did))
+ {
+ REPORT(report) << error.str();
+ return false;
+ }
+ return true;
+}
+
+namespace {
+// Helper function for verifyNonOverlap
+ void verifySlotsAtSamePosition(
+ MemFileV1Verifier::ReportCreator& report,
+ bool header,
+ std::vector<const MetaSlot*>& slots,
+ vespalib::hash_set<const MetaSlot*,
+ vespalib::hash<void *> >& faultySlots)
+ {
+ const Types::GlobalId& gid(slots[0]->_gid);
+ for (uint32_t i=1; i<slots.size(); ++i) {
+ if (slots[i]->_gid != gid) {
+ REPORT(report) << "Multiple slots with different gids use same "
+ << (header ? "header" : "body")
+ << " position. For instance slot "
+ << *slots[0] << " and " << *slots[i]
+ << ". Repairing will delete all " << slots.size()
+ << " slots using this position, as we don't "
+ << "know who is correct.";
+ for (uint32_t j=0; j<slots.size(); ++j) {
+ faultySlots.insert(slots[j]);
+ }
+ break;
+ }
+ }
+ }
+}
+
+void
+MemFileV1Verifier::verifyNonOverlap(
+ ReportCreator& report, bool doHeader,
+ std::vector<const MetaSlot*>& slots) const
+{
+ // Gather all information different for header and body parts,
+ // to avoid differences further down.
+ std::string part(doHeader ? "Header" : "Body");
+ std::vector<const MetaSlot*> order(slots);
+ // Using stable sort to sort slots, makes slots in same position
+ // keep timestamp order. (Thus we can use that if we want to remove
+ // oldest or newest illegally at same timestamp)
+ if (doHeader) {
+ std::stable_sort(order.begin(), order.end(), HeaderSlotOrder());
+ } else {
+ std::stable_sort(order.begin(), order.end(), BodySlotOrder());
+ }
+ // Temporary store slots that need to be removed
+ vespalib::hash_set<const MetaSlot*, vespalib::hash<void *> > failedSlots;
+ // Slots that points to the same area within a block.
+ std::vector<const MetaSlot*> local;
+ uint32_t lastPos = 0, lastSize = 0;
+ // Go through all slots ordered, and remove illegal ones.
+ for (uint32_t i=0, n=order.size(); i<n; ++i) {
+ uint32_t pos(doHeader ? order[i]->_headerPos : order[i]->_bodyPos);
+ uint32_t size(doHeader ? order[i]->_headerSize : order[i]->_bodySize);
+ if (size == 0) {
+ // Ignore zero sized entries
+ } else if (pos == lastPos && size == lastSize) {
+ local.push_back(order[i]);
+ } else if (pos < lastPos + lastSize) {
+ std::ostringstream ost;
+ if (!local.empty()) {
+ for (uint32_t j=0; j<local.size(); ++j) {
+ failedSlots.insert(local[j]);
+ if (j != 0) ost << ", ";
+ ost << *local[j];
+ }
+ }
+ failedSlots.insert(order[i]);
+ if (local.empty()) {
+ REPORT(report) << part << " of slot(" << *order[i] << ") "
+ << "overlaps with previously removed slots.";
+ } else {
+ REPORT(report) << part << " of slot (" << *order[i] << ") "
+ << "overlaps with "
+ << (local.size() == 1 ? "slot"
+ : "the following slots")
+ << " " << ost.str() << ".";
+ }
+ local.clear();
+ lastPos = pos;
+ lastSize = size;
+ } else {
+ if (local.size() > 1) {
+ verifySlotsAtSamePosition(report, doHeader, local, failedSlots);
+ }
+ local.clear();
+ local.push_back(order[i]);
+ lastPos = pos;
+ lastSize = size;
+ }
+ }
+ if (local.size() > 1) {
+ verifySlotsAtSamePosition(report, doHeader, local, failedSlots);
+ }
+ if (failedSlots.size() == 0) return;
+ std::vector<const MetaSlot*> okSlots;
+ okSlots.reserve(slots.size() - failedSlots.size());
+ for (uint32_t i=0, n=slots.size(); i<n; ++i) {
+ if (failedSlots.find(slots[i]) == failedSlots.end()) {
+ okSlots.push_back(slots[i]);
+ }
+ }
+ okSlots.swap(slots);
+}
+
+
+
+bool
+MemFileV1Verifier::verify(MemFile& file, Environment& env,
+ std::ostream& reportStream,
+ bool repairErrors, uint16_t fileVerifyFlags)
+{
+ bool verifyHeaderData = ((fileVerifyFlags & DONT_VERIFY_HEADER) == 0);
+ bool verifyBodyData = ((fileVerifyFlags & DONT_VERIFY_BODY) == 0);
+
+ LOG(debug, "verify(%s%s%s%s)",
+ file.getFile().toString().c_str(),
+ repairErrors ? ", repairing errors" : "",
+ verifyHeaderData ? ", verifying header block" : "",
+ verifyBodyData ? ", verifying body block" : "");
+
+ SimpleMemFileIOBuffer& ioBuf(
+ static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO()));
+
+ framework::MilliSecTimer startTimer(env._clock);
+ ReportCreator report(file, reportStream);
+ file.verifyConsistent();
+ if (!file.fileExists()) return report._ok;
+
+ // First read at least the header from disk
+ size_t fileSize = ioBuf.getFileHandle().getFileSize();
+ if (fileSize < sizeof(Header)) {
+ REPORT(report) << "File was only " << fileSize
+ << " B long and cannot be valid. Delete file to repair.";
+ if (repairErrors) {
+ env._memFileMapper.deleteFile(file, env);
+ }
+ return report._ok;
+ }
+ const size_t initialIndexRead(
+ env.acquireConfigReadLock().options()->_initialIndexRead);
+ Buffer buffer(std::min(fileSize, initialIndexRead));
+ size_t readBytes = ioBuf.getFileHandle().read(buffer, buffer.getSize(), 0);
+
+ // Exception should have been thrown by read if mismatch here.
+ assert(readBytes == buffer.getSize());
+
+ // Ensure slotfile header is ok. If not just delete whole file.
+ const Header* header = verifyHeader(report, buffer, fileSize);
+ if (header == 0) {
+ if (repairErrors) {
+ env._memFileMapper.deleteFile(file, env);
+ }
+ return report._ok;
+ }
+
+ FileInfo data(*header, fileSize);
+
+ // Read remaining data needed in check, if any
+ size_t lastNeededByte = sizeof(Header)
+ + sizeof(MetaSlot) * header->_metaDataListSize;
+ if (verifyBodyData) {
+ lastNeededByte = fileSize;
+ } else if (verifyHeaderData) {
+ lastNeededByte += header->_headerBlockSize;
+ }
+ if (buffer.getSize() < lastNeededByte) {
+ buffer.resize(lastNeededByte);
+ header = reinterpret_cast<const Header*>(buffer.getBuffer());
+ }
+ if (lastNeededByte > readBytes) {
+ readBytes += ioBuf.getFileHandle().read(
+ buffer + readBytes, buffer.getSize() - readBytes, readBytes);
+ }
+
+ // Exception should have been thrown by read if mismatch here.
+ assert(readBytes == buffer.getSize());
+
+ // Build list of slots. Do simple checking.
+ std::vector<const MetaSlot*> slots;
+ verifyMetaDataBlock(report, buffer, *header, file.getBucketInfo(), slots);
+ verifyInBounds(report, *header, true, data, slots);
+ verifyInBounds(report, *header, false, data, slots);
+
+ // Check header and body blocks if wanted
+ if (verifyHeaderData) {
+ verifyDataBlock(report, env, buffer, data, file.getFile().getBucketId(),
+ slots, true);
+ }
+ if (verifyBodyData) {
+ verifyDataBlock(report, env, buffer, data, file.getFile().getBucketId(),
+ slots, false);
+ }
+ // Check for overlapping slots last, in case only one of the slots
+ // actually overlapped pointed to a legal document, we may have
+ // already removed the problem.
+ verifyNonOverlap(report, true, slots);
+ verifyNonOverlap(report, false, slots);
+ verifyUniqueTimestamps(report, slots);
+ // If the slotlist is altered from what we read from disk, we need
+ // to write it back if we're gonna repair the errors.
+ if (!report._ok && repairErrors) {
+ // Remove bad entries from the memfile instance
+ // Entries that are cached in full may be removed from file and just
+ // tagged not in file anymore in cache.
+ std::vector<Timestamp> keep;
+ for (uint32_t i=0; i<slots.size(); ++i) {
+ keep.push_back(slots[i]->_timestamp);
+ }
+ env._memFileMapper.removeAllSlotsExcept(
+ const_cast<MemFile&>(file), keep);
+
+ // Edit header and metadata part of buffer to only keep wanted data
+ // Since both source and target is the same buffer, create new meta
+ // data in new buffer and memcpy back afterwards
+ Buffer metaData(header->_metaDataListSize * sizeof(MetaSlot));
+ BucketInfo info(file.getBucketInfo());
+ const_cast<Header*>(header)->_fileChecksum = info.getChecksum();
+ for (uint32_t i=0; i<header->_metaDataListSize; ++i) {
+ MetaSlot* slot(reinterpret_cast<MetaSlot*>(
+ metaData.getBuffer() + i * sizeof(MetaSlot)));
+ if (i >= slots.size()) {
+ *slot = MetaSlot();
+ } else if (slot != slots[i]) {
+ *slot = *slots[i];
+ }
+ }
+ memcpy(buffer.getBuffer() + sizeof(Header), metaData.getBuffer(),
+ metaData.getSize());
+ // Then rewrite metadata section to disk leaving out bad entries
+ uint32_t dataToWrite(sizeof(Header)
+ + sizeof(MetaSlot) * header->_metaDataListSize);
+ alignUp(dataToWrite);
+ ioBuf.getFileHandle().write(buffer, dataToWrite, 0);
+
+ // Tag memfile up to date
+ uint32_t memFileFlags = FILE_EXIST
+ | HEADER_BLOCK_READ
+ | BODY_BLOCK_READ;
+ for (MemFile::const_iterator it = file.begin(ITERATE_REMOVED);
+ it != file.end(); ++it)
+ {
+ if (!ioBuf.isCached(it->getLocation(BODY), BODY)) {
+ memFileFlags &= ~BODY_BLOCK_READ;
+ }
+ if (!ioBuf.isCached(it->getLocation(HEADER), HEADER)) {
+ memFileFlags &= ~HEADER_BLOCK_READ;
+ }
+
+ if (!ioBuf.isPersisted(it->getLocation(BODY), BODY)
+ || !ioBuf.isPersisted(it->getLocation(HEADER), HEADER))
+ {
+ memFileFlags |= SLOTS_ALTERED;
+ }
+
+ if (it->alteredInMemory()) {
+ memFileFlags |= SLOTS_ALTERED;
+ }
+ }
+ assert(file.fileExists());
+ const_cast<MemFile&>(file).clearFlag(LEGAL_MEMFILE_FLAGS);
+ const_cast<MemFile&>(file).setFlag(memFileFlags);
+ LOG(warning, "verify(%s): Errors repaired", file.toString().c_str());
+ } else if (report._ok) {
+ LOG(debug, "verify(%s): Ok", file.toString().c_str());
+ } else {
+ LOG(debug, "verify(%s): Not repairing errors", file.toString().c_str());
+ }
+
+// env._metrics.slotfileMetrics._verifyLatencyTotal.addValue(startTimer);
+ return report._ok;
+}
+
+void
+MemFileV1Verifier::verifyUniqueTimestamps(
+ ReportCreator& report, std::vector<const MetaSlot*>& slots) const
+{
+ std::vector<const MetaSlot*> okSlots;
+ okSlots.reserve(slots.size());
+ // Slots should already be in order as verifyMetaDataBlock has run
+ Timestamp last(0);
+ for (uint32_t i=0, n=slots.size(); i<n; ++i) {
+ if (slots[i]->_timestamp == last && i != 0) {
+ REPORT(report) << "Slot " << i << " (" << *slots[i]
+ << ") has same timestamp as slot " << (i-1)
+ << " (" << *slots[i-1] << ").";
+ } else {
+ okSlots.push_back(slots[i]);
+ last = slots[i]->_timestamp;
+ }
+ }
+ okSlots.swap(slots);
+}
+
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h
new file mode 100644
index 00000000000..27d663a82b9
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/mapper/fileinfo.h>
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/common/environment.h>
+
+namespace storage {
+
+
+namespace memfile {
+
+class MemFile;
+class Environment;
+class Buffer;
+
+class MemFileV1Verifier : public Types
+{
+public:
+ bool verify(MemFile&,
+ Environment&,
+ std::ostream& errorReport,
+ bool repairErrors,
+ uint16_t fileVerifyFlags);
+
+ bool verifyBlock(Types::DocumentPart part,
+ uint32_t id,
+ vespalib::asciistream & error,
+ const char* data,
+ uint32_t size);
+
+
+ class ReportCreator;
+
+private:
+ const Header* verifyHeader(ReportCreator& report,
+ const Buffer& buffer,
+ size_t fileSize) const;
+
+ void verifyMetaDataBlock(ReportCreator& report,
+ const Buffer& buffer,
+ const Header& header,
+ const BucketInfo& info,
+ std::vector<const MetaSlot*>& slots) const;
+
+ void verifyInBounds(ReportCreator& report,
+ const Header& header,
+ bool doHeader,
+ const FileInfo& data,
+ std::vector<const MetaSlot*>& slots) const;
+
+ void verifyDataBlock(ReportCreator& report,
+ Environment& env,
+ const Buffer& buffer,
+ const FileInfo& data,
+ const BucketId& bucket,
+ std::vector<const MetaSlot*>& slots,
+ bool doHeader) const;
+
+ void verifyNonOverlap(ReportCreator& report,
+ bool doHeader,
+ std::vector<const MetaSlot*>& slots) const;
+
+ bool verifyDocumentHeader(ReportCreator& report,
+ const MetaSlot& slot,
+ const Buffer& buffer,
+ DocumentId& did,
+ uint32_t blockIndex,
+ uint32_t blockSize) const;
+
+ bool verifyDocumentBody(ReportCreator& report,
+ const MetaSlot& slot,
+ const Buffer& buffer,
+ uint32_t blockIndex,
+ uint32_t blockSize) const;
+
+ void verifyUniqueTimestamps(ReportCreator& report,
+ std::vector<const MetaSlot*>& slots) const;
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp
new file mode 100644
index 00000000000..9d9360a3d3a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp
@@ -0,0 +1,340 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/common/exceptions.h>
+#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h>
+#include <vespa/vdslib/distribution/distribution.h>
+
+LOG_SETUP(".persistence.memfile.mapper");
+
+namespace storage {
+namespace memfile {
+
+// Repair defined in macro, such that log entries will be unique for the various
+// instances calling it (different file line numbers)
+#define VESPA_REPAIR_MEMFILE(file) \
+{ \
+ std::ostringstream memFileErrors; \
+ bool memFileRepairResult = repair(file, env, memFileErrors); \
+ if (!memFileRepairResult) { \
+ LOG(warning, "Repaired %s: %s", \
+ file.toString().c_str(), memFileErrors.str().c_str()); \
+ sendNotifyBucketCommand(file, env); \
+ } else { \
+ LOGBP(warning, "Repair for %s triggered but found nothing to repair.", \
+ file.toString().c_str()); \
+ } \
+}
+
+// To avoid duplicating code, this macro is used when autoRepair is on, and
+// call itself with autorepair off, handling the autorepair.
+#define VESPA_HANDLE_AUTOREPAIR(file, func) { \
+ try{ \
+ func; \
+ } catch (CorruptMemFileException& e) { \
+ LOGBP(warning, "Corrupt file %s: %s", \
+ file.toString().c_str(), e.what()); \
+ VESPA_REPAIR_MEMFILE(file); \
+ func; \
+ } \
+ return; \
+}
+
+void
+MemFileMapper::sendNotifyBucketCommand(const MemFile&,
+ Environment&)
+{
+/* TODO: Move to service layer.
+ BucketInfo info(file.getBucketInfo());
+ // Send notify bucket change command to update distributor
+ api::NotifyBucketChangeCommand::SP msg(
+ new api::NotifyBucketChangeCommand(file.getFile().getBucketId(),
+ info));
+ uint16_t distributor(
+ env._storageServer.getDistribution()->getIdealDistributorNode(
+ *env._storageServer.getStateUpdater().getSystemState(),
+ file.getFile().getBucketId()));
+ msg->setAddress(api::StorageMessageAddress(
+ env._storageServer.getClusterName(),
+ lib::NodeType::DISTRIBUTOR,
+ distributor));
+ msg->setSourceIndex(env._nodeIndex);
+ env._fileStorHandler.sendCommand(msg);
+*/
+}
+
+void
+MemFileMapper::addVersionSerializer(VersionSerializer::LP serializer)
+{
+ FileVersion version = serializer->getFileVersion();
+ if (_serializers.find(version) != _serializers.end()) {
+ std::ostringstream error;
+ error << "A serializer for version " << version
+ << " is already registered.";
+ throw vespalib::IllegalStateException(error.str(), VESPA_STRLOC);
+ }
+ _serializers[version] = serializer;
+}
+
+VersionSerializer&
+MemFileMapper::getVersionSerializer(const MemFile& file)
+{
+ std::map<FileVersion, VersionSerializer::LP>::iterator it(
+ _serializers.find(file.getCurrentVersion()));
+ if (it == _serializers.end()) {
+ std::ostringstream ost;
+ ost << "Unknown serialization version "
+ << getFileVersionName(file.getCurrentVersion())
+ << " (" << file.getCurrentVersion() << ")\n";
+ throw CorruptMemFileException(ost.str(), file.getFile(), VESPA_STRLOC);
+ }
+ return *it->second;
+}
+
+MemFileMapper::MemFileMapper(ThreadMetricProvider& metricProvider)
+ : _metricProvider(metricProvider)
+{
+ addVersionSerializer(VersionSerializer::LP(new MemFileV1Serializer(metricProvider)));
+}
+
+void
+MemFileMapper::setDefaultMemFileIO(MemFile& file,
+ vespalib::LazyFile::UP lf,
+ const Environment& env)
+{
+ std::map<FileVersion, VersionSerializer::LP>::iterator serializer(
+ _serializers.find(file.getFile().getWantedFileVersion()));
+ assert(serializer != _serializers.end());
+
+ file.setMemFileIO(
+ std::unique_ptr<MemFileIOInterface>(
+ new SimpleMemFileIOBuffer(
+ *serializer->second,
+ std::move(lf),
+ FileInfo::UP(new FileInfo()),
+ file.getFile(),
+ env)));
+}
+
+void
+MemFileMapper::loadFileImpl(MemFile& file, Environment& env)
+{
+ framework::MilliSecTimer timer(env._clock);
+
+ if (file.getSlotCount() != 0 || file.getCurrentVersion() != UNKNOWN) {
+ throw InvalidStateException("File is already loaded", file.getFile(),
+ VESPA_STRLOC);
+ }
+
+ vespalib::LazyFile::UP f = env.createFile(file.getFile().getPath());
+ vespalib::LazyFile* lf = f.get();
+
+ setDefaultMemFileIO(file, std::move(f), env);
+
+ // Early exit for file not found to avoid having to use
+ // exception for common control path
+ if (!vespalib::fileExists(file.getFile().getPath())) {
+ LOG(debug, "Cannot load file '%s' as it does not exist",
+ file.getFile().getPath().c_str());
+ file.setFlag(HEADER_BLOCK_READ | BODY_BLOCK_READ);
+ return;
+ }
+ file.setFlag(FILE_EXIST);
+
+ Buffer buffer(env.acquireConfigReadLock().options()->_initialIndexRead);
+ off_t readBytes = lf->read(buffer, buffer.getSize(), 0);
+
+ if (readBytes < 4) {
+ std::ostringstream err;
+ err << "Only " << readBytes << " bytes read from file. Not enough to "
+ << "get a file version.";
+ throw CorruptMemFileException(err.str(), file.getFile(), VESPA_STRLOC);
+ }
+ SerializationMetrics& metrics(getMetrics().serialization);
+ metrics.initialMetaReadLatency.addValue(timer);
+
+ file.setFlag(BUCKET_INFO_OUTDATED);
+
+ FileVersion version = static_cast<FileVersion>(
+ *reinterpret_cast<uint32_t*>(buffer.getBuffer()));
+ std::map<FileVersion, VersionSerializer::LP>::iterator serializer(
+ _serializers.find(version));
+ file.setCurrentVersion(version);
+ if (serializer == _serializers.end()) {
+ std::ostringstream err;
+ err << "Unknown file version " << std::hex << version;
+ throw CorruptMemFileException(err.str(), file.getFile(), VESPA_STRLOC);
+ }
+ serializer->second->loadFile(file, env, buffer, readBytes);
+
+ metrics.totalLoadFileLatency.addValue(timer);
+}
+
+void
+MemFileMapper::loadFile(MemFile& file, Environment& env, bool autoRepair)
+{
+ try {
+ loadFileImpl(file, env);
+ } catch (CorruptMemFileException& e) {
+ LOGBP(warning, "Corrupt file %s: %s",
+ file.toString().c_str(), e.what());
+ if (autoRepair) {
+ VESPA_REPAIR_MEMFILE(file);
+ // Must reset version info, slots etc to avoid getting errors
+ // that file is already loaded.
+ file.resetMetaState();
+ loadFileImpl(file, env);
+ }
+ // Add bucket to set of modified buckets so service layer can request
+ // new bucket info.
+ env.addModifiedBucket(file.getFile().getBucketId());
+ }
+}
+
+void
+MemFileMapper::flush(MemFile& f, Environment& env, bool autoRepair)
+{
+ (void) autoRepair;
+ if (f.fileExists()) {
+ VersionSerializer& serializer(getVersionSerializer(f));
+ typedef VersionSerializer::FlushResult FlushResult;
+ FlushResult result = serializer.flushUpdatesToFile(f, env);
+ if (result == FlushResult::TooSmall) {
+ f.compact();
+ result = serializer.flushUpdatesToFile(f, env);
+ }
+ if (result == FlushResult::ChangesWritten
+ || result == FlushResult::UnAltered)
+ {
+ return;
+ }
+ MemFilePersistenceThreadMetrics& metrics(_metricProvider.getMetrics());
+ switch (result) {
+ case FlushResult::TooFewMetaEntries:
+ metrics.serialization.fullRewritesDueToTooSmallFile.inc();
+ break;
+ case FlushResult::TooSmall:
+ metrics.serialization.fullRewritesDueToTooSmallFile.inc();
+ break;
+ case FlushResult::TooLarge:
+ metrics.serialization.fullRewritesDueToDownsizingFile.inc();
+ break;
+ default:
+ break;
+ }
+ } else {
+ // If a file does not yet exist, its content by definition exists
+ // entirely in memory. Consequently it costs next to nothing to run
+ // compaction since there is no need to read any meta/header blocks
+ // from disk. However, the gains from compacting may be significant if
+ // the bucket e.g. contains many versions of the same document.
+ f.compact();
+ }
+
+ // If we get here we failed to write updates only and will rewrite
+ std::map<FileVersion, VersionSerializer::LP>::iterator serializer(
+ _serializers.find(f.getFile().getWantedFileVersion()));
+ assert(serializer != _serializers.end());
+
+ serializer->second->rewriteFile(f, env);
+}
+
+bool
+MemFileMapper::verify(MemFile& file, Environment& env,
+ std::ostream& errorReport, bool repairErrors,
+ uint16_t fileVerifyFlags)
+{
+ if (file.fileExists()) {
+ std::map<FileVersion, VersionSerializer::LP>::iterator serializer(
+ _serializers.find(file.getCurrentVersion()));
+ if (serializer != _serializers.end()) {
+ bool wasOk = serializer->second->verify(
+ file, env, errorReport, repairErrors, fileVerifyFlags);
+ if (!wasOk) sendNotifyBucketCommand(file, env);
+ return wasOk;
+ }
+ // If we get here, version is corrupted. Delete file if repairing.
+ errorReport << "Header read from " << file.getFile().getPath()
+ << " is of wrong version "
+ << getFileVersionName(file.getCurrentVersion())
+ << "(0x" << std::hex << file.getCurrentVersion() << std::dec
+ << "). Corrupt file or unsupported format.";
+ if (repairErrors) {
+ deleteFile(file, env);
+ }
+ sendNotifyBucketCommand(file, env);
+ return false;
+ }
+ return true;
+}
+
+void
+MemFileMapper::deleteFile(const MemFile& constFile, Environment& env)
+{
+ MemFile& file(const_cast<MemFile&>(constFile));
+ framework::MilliSecTimer timer(env._clock);
+ std::vector<Timestamp> keep;
+ file.clearFlag(FILE_EXIST);
+ file.setCurrentVersion(UNKNOWN);
+
+ SimpleMemFileIOBuffer& ioBuf(
+ static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO()));
+
+ uint32_t fileSize = ioBuf.getFileHandle().getFileSize();
+ ioBuf.getFileHandle().unlink();
+
+ // Indicate we get free space to partition monitor
+ PartitionMonitor& partitionMonitor(
+ *constFile.getFile().getDirectory().getPartition().getMonitor());
+ partitionMonitor.removingData(fileSize);
+ getMetrics().serialization.deleteFileLatency.addValue(timer);
+}
+
+void
+MemFileMapper::removeAllSlotsExcept(MemFile& file, std::vector<Timestamp>& keep)
+{
+ std::vector<const MemSlot*> slotsToRemove;
+ MemFile::const_iterator orgIt(file.begin(ITERATE_REMOVED));
+ std::vector<Timestamp>::reverse_iterator keepIt(keep.rbegin());
+
+ // Linear merge of vectors to extract inverse set of `keep`; these will
+ // be the slots we should remove. The output of this is pretty much what
+ // std::set_symmetric_difference would've given us, but can't use that
+ // algorithm directly due to our non-implicitly convertible mixing of
+ // iterator value types.
+ // Note that iterator ranges are sorted in _descending_ order.
+ while (orgIt != file.end()) {
+ if (keepIt == keep.rend() || orgIt->getTimestamp() > *keepIt) {
+ slotsToRemove.push_back(&*orgIt);
+ ++orgIt;
+ } else if (orgIt->getTimestamp() == *keepIt) {
+ ++orgIt;
+ ++keepIt;
+ } else {
+ // The case where the verifier knows of a slot that the MemFile
+ // does not _may_ happen in the case of corruptions causing apparent
+ // timestamp collisions. In this case, sending in timestamps to
+ // keep could lead to ambiguities, but in general we can assume that
+ // one of the slots will be removed before this due to a mismatching
+ // checksum.
+ LOG(warning,
+ "Verifier code requested to keep slot at time %zu in "
+ "file %s, but that slot does not exist in the internal state. "
+ "Assuming this is due to corruption which will be fixed "
+ "automatically.",
+ keepIt->getTime(),
+ file.getFile().getPath().c_str());
+ ++keepIt;
+ }
+ }
+ std::reverse(slotsToRemove.begin(), slotsToRemove.end());
+ file.removeSlots(slotsToRemove);
+}
+
+} // storage
+} // memfile
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h
new file mode 100644
index 00000000000..30f483fc582
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::MemFileMapper
+ * \ingroup memfile
+ *
+ * \brief Maps memory representation of files to and from physical files.
+ *
+ * The mapper can map to and from all file formats supported. It keeps track
+ * of all possible formats and call the implementation of these as needed. This
+ * global class is needed such that files can seemlessly change file format when
+ * one wants to start using another than one used before.
+ *
+ * Note that there will be one MemFileMapper instance per disk thread, such that
+ * the mapper doesn't have to worry about being threadsafe with multiple
+ * threads calling it at the same time.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/mapper/versionserializer.h>
+#include <vespa/memfilepersistence/spi/threadmetricprovider.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFileMapper : private Types {
+private:
+ std::map<FileVersion, VersionSerializer::LP> _serializers;
+ ThreadMetricProvider& _metricProvider;
+ void setDefaultMemFileIO(MemFile& file,
+ vespalib::LazyFile::UP lf,
+ const Environment& env);
+
+public:
+ MemFileMapper(ThreadMetricProvider&);
+
+ /**
+ * Initialize a MemFile entry with the data found in corresponding file.
+ * This sets:
+ * - Flag whether file exist or not.
+ * - If file exist, sets header data in file, such as:
+ * - File version
+ * - Meta entry count
+ * - Header block size
+ * - Body block size
+ * - File checksum
+ */
+ void loadFile(MemFile&, Environment&, bool autoRepair = true);
+
+ /**
+ * Flushes all content in MemFile that is not already persisted to disk.
+ * This might require a rewrite of the file, if the size of the file need
+ * to change. Flush updates the following in the MemFile:
+ * - Updates state saying all is persisted.
+ * - If file was rewritten and was in unwanted version, file version may
+ * have changed to wanted version.
+ * - Sizes of blocks in the file may have changed.
+ * - Rewrite file if changes would leave the file too empty. (Thus,
+ * memfile given might not be dirty but still a write may be needed)
+ */
+ void flush(MemFile&, Environment&, bool autoRepair = true);
+
+ /**
+ * Verify that file is not corrupt.
+ * @return True if file is fine.
+ */
+ bool verify(MemFile& file, Environment& env,
+ std::ostream& errorReport, uint16_t fileVerifyFlags = 0)
+ { return verify(file, env, errorReport, false, fileVerifyFlags); }
+
+ /**
+ * Verify that file is not corrupt and repair it if it is.
+ * @return True if file was fine. False if any errors were fixed.
+ */
+ bool repair(MemFile& file, Environment& env,
+ std::ostream& errorReport, uint16_t fileVerifyFlags = 0)
+ { return verify(file, env, errorReport, true, fileVerifyFlags); }
+
+ /**
+ * Utility functions used by verify to remove data from memfile that is no
+ * longer pointing to valid data.
+ */
+ void deleteFile(const MemFile& file, Environment& env);
+ void removeAllSlotsExcept(MemFile& file, std::vector<Timestamp>& keep);
+
+private:
+ void addVersionSerializer(VersionSerializer::LP);
+ VersionSerializer& getVersionSerializer(const MemFile& file);
+
+ void loadFileImpl(MemFile&, Environment&);
+
+ /**
+ * Check file for errors, generate report of errors. Fix if repairErrors
+ * is set. Returns true if no failures were found.
+ */
+ bool verify(MemFile& file, Environment&,
+ std::ostream& errorReport, bool repairErrors,
+ uint16_t fileVerifyFlags);
+
+ MemFilePersistenceThreadMetrics& getMetrics() const {
+ return _metricProvider.getMetrics();
+ }
+
+ void sendNotifyBucketCommand(const MemFile&, Environment&);
+};
+
+} // storage
+} // memfile
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h
new file mode 100644
index 00000000000..0eac46a1065
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/metrics/metrics.h>
+
+namespace storage {
+namespace memfile {
+
+class SerializationWriteMetrics : public metrics::MetricSet
+{
+public:
+ metrics::LongAverageMetric headerLatency;
+ metrics::LongAverageMetric headerSize;
+ metrics::LongAverageMetric bodyLatency;
+ metrics::LongAverageMetric bodySize;
+ metrics::LongAverageMetric metaLatency;
+ metrics::LongAverageMetric metaSize;
+ metrics::LongAverageMetric totalLatency;
+
+ SerializationWriteMetrics(const std::string& name, metrics::MetricSet& owner)
+ : metrics::MetricSet(name, "",
+ "Write metrics for memfile persistence engine",
+ &owner),
+ headerLatency("header_latency", "",
+ "Time spent writing a single contiguous header location "
+ "on the disk.", this),
+ headerSize("header_size", "",
+ "Average size of contiguous header disk writes", this),
+ bodyLatency("body_latency", "",
+ "Time spent writing a single contiguous body location "
+ "on the disk.", this),
+ bodySize("body_size", "",
+ "Average size of contiguous body disk writes", this),
+ metaLatency("meta_latency", "",
+ "Time spent writing file header and slot metadata", this),
+ metaSize("meta_size", "",
+ "Size of file header and metadata writes", this),
+ totalLatency("total_latency", "",
+ "Total time spent performing slot file writing", this)
+ {
+ }
+};
+
+class SerializationMetrics : public metrics::MetricSet
+{
+public:
+ metrics::LongAverageMetric initialMetaReadLatency;
+ metrics::LongAverageMetric tooLargeMetaReadLatency;
+ metrics::LongAverageMetric totalLoadFileLatency;
+ metrics::LongAverageMetric verifyLatency;
+ metrics::LongAverageMetric deleteFileLatency;
+ metrics::LongAverageMetric headerReadLatency;
+ metrics::LongAverageMetric headerReadSize;
+ metrics::LongAverageMetric bodyReadLatency;
+ metrics::LongAverageMetric bodyReadSize;
+ metrics::LongAverageMetric cacheUpdateAndImplicitVerifyLatency;
+ metrics::LongCountMetric fullRewritesDueToDownsizingFile;
+ metrics::LongCountMetric fullRewritesDueToTooSmallFile;
+ SerializationWriteMetrics partialWrite;
+ SerializationWriteMetrics fullWrite;
+
+ SerializationMetrics(const std::string& name,
+ metrics::MetricSet* owner = 0)
+ : metrics::MetricSet(name, "",
+ "(De-)serialization I/O metrics for memfile "
+ "persistence engine", owner),
+ initialMetaReadLatency(
+ "initial_meta_read_latency", "",
+ "Time spent doing the initial read of "
+ "the file header and most (or all) of metadata",
+ this),
+ tooLargeMetaReadLatency(
+ "too_large_meta_read_latency", "",
+ "Time spent doing additional read for "
+ "metadata too large to be covered by initial "
+ "read", this),
+ totalLoadFileLatency(
+ "total_load_file_latency", "",
+ "Total time spent initially loading a "
+ "file from disk", this),
+ verifyLatency(
+ "verify_latency", "",
+ "Time spent performing file verification", this),
+ deleteFileLatency(
+ "delete_file_latency", "",
+ "Time spent deleting a file from disk", this),
+ headerReadLatency(
+ "header_read_latency", "",
+ "Time spent reading a single contiguous header location "
+ "on the disk (may span many document blobs)", this),
+ headerReadSize(
+ "header_read_size", "",
+ "Size of contiguous header disk location reads", this),
+ bodyReadLatency(
+ "body_read_latency", "",
+ "Time spent reading a single contiguous body location "
+ "on the disk (may span many document blobs)", this),
+ bodyReadSize(
+ "body_read_size", "",
+ "Size of contiguous body disk location reads", this),
+ cacheUpdateAndImplicitVerifyLatency(
+ "cache_update_and_implicit_verify_latency", "",
+ "Time spent updating memory cache structures and verifying "
+ "read data blocks for corruptions", this),
+ fullRewritesDueToDownsizingFile(
+ "full_rewrites_due_to_downsizing_file", "",
+ "Number of times a file was rewritten fully because the "
+ "original file had too low fill rate", this),
+ fullRewritesDueToTooSmallFile(
+ "full_rewrites_due_to_too_small_file", "",
+ "Number of times a file was rewritten fully because the "
+ "original file did not have sufficient free space for a "
+ "partial write", this),
+ partialWrite("partialwrite", *this),
+ fullWrite("fullwrite", *this)
+ {
+ }
+};
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp
new file mode 100644
index 00000000000..505e9c32f3b
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp
@@ -0,0 +1,538 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/memfilepersistence/common/environment.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <vespa/vespalib/util/crc.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".memfile.simpleiobuffer");
+
+namespace storage {
+
+namespace memfile {
+
+namespace {
+
+uint32_t calculateChecksum(const void* pos, uint32_t size) {
+ vespalib::crc_32_type calculator;
+ calculator.process_bytes(pos, size);
+ return calculator.checksum();
+}
+
+}
+
+SimpleMemFileIOBuffer::SimpleMemFileIOBuffer(
+ VersionSerializer& reader,
+ vespalib::LazyFile::UP file,
+ FileInfo::UP info,
+ const FileSpecification& fileSpec,
+ const Environment& env)
+ : _reader(reader),
+ _data(2),
+ _workingBuffers(2),
+ _file(std::move(file)),
+ _fileInfo(std::move(info)),
+ _fileSpec(fileSpec),
+ _env(env),
+ _options(env.acquireConfigReadLock().options())
+{
+}
+
+void
+SimpleMemFileIOBuffer::close()
+{
+ if (_file->isOpen()) {
+ _file->close();
+ }
+}
+
+const SimpleMemFileIOBuffer::Data&
+SimpleMemFileIOBuffer::getData(DocumentPart part, DataLocation loc) const
+{
+ DataMap::const_iterator iter = _data[part].find(loc);
+
+ if (iter == _data[part].end()) {
+ std::ostringstream ost;
+ ost << "Location " << loc
+ << " was not found for " << (part == HEADER ? "Header" : "Body");
+ throw PartNotCachedException(ost.str(), VESPA_STRLOC);
+ }
+
+ return iter->second;
+}
+
+document::Document::UP
+SimpleMemFileIOBuffer::getDocumentHeader(
+ const document::DocumentTypeRepo& repo,
+ DataLocation loc) const
+{
+ const Data& data = getData(HEADER, loc);
+
+ Document::UP doc(new Document());
+ document::ByteBuffer buf(data.buf->getBuffer() + data.pos,
+ data.buf->getSize() - data.pos);
+
+ doc->deserializeHeader(repo, buf, false);
+ return doc;
+}
+
+document::DocumentId
+SimpleMemFileIOBuffer::getDocumentId(DataLocation loc) const
+{
+ const Data& data = getData(HEADER, loc);
+
+ const char* buf = data.buf->getBuffer() + data.pos + loc._size;
+ buf -= 2 * sizeof(uint32_t);
+
+ uint32_t nameLen = *(const uint32_t*)(buf);
+ buf -= nameLen;
+
+ return document::DocumentId(vespalib::stringref(buf, nameLen));
+}
+
+void
+SimpleMemFileIOBuffer::readBody(
+ const document::DocumentTypeRepo& repo,
+ DataLocation loc,
+ Document& doc) const
+{
+ const Data& data = getData(BODY, loc);
+
+ document::ByteBuffer buf(data.buf->getBuffer() + data.pos,
+ data.buf->getSize() - data.pos);
+
+ doc.deserializeBody(repo, buf, false);
+}
+
+DataLocation
+SimpleMemFileIOBuffer::addLocation(DocumentPart part,
+ BufferAllocation newData)
+{
+ if (!newData.getSharedBuffer().get()) {
+ LOG(spam, "Not adding location since data is null");
+ return DataLocation(0, 0);
+ }
+
+ DataMap& target = _data[part];
+ DataLocation loc = DataLocation(_fileInfo->getBlockSize(part), newData.getSize());
+
+ DataMap::reverse_iterator iter = target.rbegin();
+ if (iter != target.rend() && iter->first.endPos() > loc._pos) {
+ loc = DataLocation(iter->first.endPos(), newData.getSize());
+ }
+
+ std::pair<DataMap::iterator, bool> existing(
+ target.insert(std::make_pair(loc, Data(newData.getSharedBuffer(),
+ newData.getBufferPosition(),
+ false))));
+ if (!existing.second) {
+ LOG(error, "%s: addLocation attempted %s insert with location %u,%u, "
+ "but that location already exists",
+ _fileSpec.toString().c_str(),
+ getDocumentPartName(part),
+ loc._pos,
+ loc._size);
+ assert(false);
+ }
+
+ LOG(spam, "%s: added %s at location %u,%u (buffer %p, position %u)",
+ _fileSpec.getBucketId().toString().c_str(),
+ getDocumentPartName(part),
+ loc._pos,
+ loc._size,
+ newData.getSharedBuffer().get(),
+ newData.getBufferPosition());
+ return loc;
+}
+
+void
+SimpleMemFileIOBuffer::HeaderChunkEncoder::bufferDocument(const Document& doc)
+{
+ assert(_serializedDoc.empty());
+ doc.serializeHeader(_serializedDoc);
+}
+
+/**
+ * Buffer is comprised of the following:
+ * - Document header blob (n bytes)
+ * - CRC32 of header blob (4 bytes)
+ * - Document Id (n bytes)
+ * - Length of document id (4 bytes)
+ * - CRC32 of document id and length (4 bytes)
+ *
+ * To a reader, the length of the header blob is inferred from length of
+ * total buffer chunk minus the overhead by the doc id string and metadata in
+ * the chunk trailer.
+ */
+void
+SimpleMemFileIOBuffer::HeaderChunkEncoder::writeTo(BufferAllocation& buf) const
+{
+ assert(buf.getSize() >= encodedSize());
+ // Note that docSize may be zero throughout this function.
+ const uint32_t docSize = _serializedDoc.size();
+ const uint32_t docChecksum = calculateChecksum(
+ _serializedDoc.peek(), docSize);
+ const uint32_t idLen = _docId.size();
+
+ vespalib::crc_32_type nameChecksum;
+ nameChecksum.process_bytes(_docId.c_str(), idLen);
+ nameChecksum.process_bytes(reinterpret_cast<const char*>(&idLen),
+ sizeof(uint32_t));
+ const uint32_t trailerChecksum = nameChecksum.checksum();
+
+ memcpy(buf.getBuffer(), _serializedDoc.peek(), docSize);
+ char* trailer = buf.getBuffer() + docSize;
+ memcpy(trailer, &docChecksum, sizeof(uint32_t));
+ trailer += sizeof(uint32_t);
+ memcpy(trailer, _docId.c_str(), idLen);
+ trailer += idLen;
+ memcpy(trailer, &idLen, sizeof(uint32_t));
+ trailer += sizeof(uint32_t);
+ memcpy(trailer, &trailerChecksum, sizeof(uint32_t));
+}
+
+bool
+SimpleMemFileIOBuffer::writeBackwardsCompatibleRemoves() const
+{
+ return !_options->_defaultRemoveDocType.empty();
+}
+
+document::Document::UP
+SimpleMemFileIOBuffer::generateBlankDocument(
+ const DocumentId& id,
+ const document::DocumentTypeRepo& repo) const
+{
+ vespalib::string typeName(
+ id.hasDocType() ? id.getDocType()
+ : _options->_defaultRemoveDocType);
+ const document::DocumentType* docType(repo.getDocumentType(typeName));
+ if (!docType) {
+ throw vespalib::IllegalArgumentException(
+ "Could not serialize document for remove with unknown "
+ "doctype '" + typeName + "'");
+ }
+ return std::unique_ptr<Document>(new Document(*docType, id));
+}
+
+SimpleMemFileIOBuffer::BufferAllocation
+SimpleMemFileIOBuffer::serializeHeader(const Document& doc)
+{
+ HeaderChunkEncoder encoder(doc.getId());
+ encoder.bufferDocument(doc);
+ BufferAllocation buf(allocateBuffer(HEADER, encoder.encodedSize()));
+ encoder.writeTo(buf);
+
+ return buf;
+}
+
+SimpleMemFileIOBuffer::BufferAllocation
+SimpleMemFileIOBuffer::serializeDocumentIdOnlyHeader(
+ const DocumentId& id,
+ const document::DocumentTypeRepo& repo)
+{
+ HeaderChunkEncoder encoder(id);
+ if (writeBackwardsCompatibleRemoves()) {
+ Document::UP blankDoc(generateBlankDocument(id, repo));
+ encoder.bufferDocument(*blankDoc);
+ }
+ BufferAllocation buf(allocateBuffer(HEADER, encoder.encodedSize()));
+ encoder.writeTo(buf);
+
+ return buf;
+}
+
+DataLocation
+SimpleMemFileIOBuffer::addDocumentIdOnlyHeader(
+ const DocumentId& docId,
+ const document::DocumentTypeRepo& repo)
+{
+ return addLocation(HEADER, serializeDocumentIdOnlyHeader(docId, repo));
+}
+
+DataLocation
+SimpleMemFileIOBuffer::addHeader(const Document& doc)
+{
+ return addLocation(HEADER, serializeHeader(doc));
+}
+
+SimpleMemFileIOBuffer::BufferAllocation
+SimpleMemFileIOBuffer::serializeBody(const Document& doc)
+{
+ vespalib::nbostream output(5 * 1024);
+ doc.serializeBody(output);
+
+ if (output.empty()) {
+ return BufferAllocation();
+ }
+
+ BufferAllocation val(allocateBuffer(BODY, output.size() + sizeof(uint32_t)));
+ memcpy(val.getBuffer(), output.peek(), output.size());
+
+ // Also append CRC32 of body block to buffer
+ uint32_t checksum = calculateChecksum(output.peek(), output.size());
+ char* trailer = val.getBuffer() + output.size();
+ memcpy(trailer, &checksum, sizeof(uint32_t));
+
+ return val;
+}
+
+SimpleMemFileIOBuffer::BufferAllocation
+SimpleMemFileIOBuffer::allocateBuffer(DocumentPart part,
+ uint32_t sz,
+ SharedBuffer::Alignment align)
+{
+ // If the requested size is greater than or equal to our working buffer
+ // size, simply allocate a separate buffer for it.
+ if (sz >= WORKING_BUFFER_SIZE) {
+ return BufferAllocation(SharedBuffer::LP(new SharedBuffer(sz)), 0, sz);
+ }
+
+ SharedBuffer::LP& bufLP(_workingBuffers[part]);
+ bool requireNewBlock = false;
+ if (!bufLP.get()) {
+ requireNewBlock = true;
+ } else if (!bufLP->hasRoomFor(sz, align)) {
+ requireNewBlock = true;
+ }
+
+ if (!requireNewBlock) {
+ return BufferAllocation(bufLP,
+ static_cast<uint32_t>(bufLP->allocate(sz, align)),
+ sz);
+ } else {
+ SharedBuffer::LP newBuf(new SharedBuffer(WORKING_BUFFER_SIZE));
+ bufLP = newBuf;
+ return BufferAllocation(newBuf,
+ static_cast<uint32_t>(newBuf->allocate(sz, align)),
+ sz);
+ }
+}
+
+DataLocation
+SimpleMemFileIOBuffer::addBody(const Document& doc)
+{
+ return addLocation(BODY, serializeBody(doc));
+}
+
+void
+SimpleMemFileIOBuffer::clear(DocumentPart part)
+{
+ LOG(debug, "%s: cleared all data for part %s",
+ _fileSpec.getBucketId().toString().c_str(),
+ getDocumentPartName(part));
+ _data[part].clear();
+}
+
+bool
+SimpleMemFileIOBuffer::verifyConsistent() const
+{
+ return true;
+}
+
+void
+SimpleMemFileIOBuffer::move(const FileSpecification& target)
+{
+ LOG(debug, "Moving %s -> %s",
+ _file->getFilename().c_str(),
+ target.getPath().c_str());
+ _file->close();
+
+ if (vespalib::fileExists(_file->getFilename())) {
+ vespalib::rename(_file->getFilename(), target.getPath(), true, true);
+ }
+
+ _file.reset(
+ new vespalib::LazyFile(target.getPath(), vespalib::File::DIRECTIO, true));
+}
+
+DataLocation
+SimpleMemFileIOBuffer::copyCache(const MemFileIOInterface& source,
+ DocumentPart part,
+ DataLocation loc)
+{
+ if (loc._size == 0) {
+ return loc;
+ }
+
+ const SimpleMemFileIOBuffer& srcBuf(
+ static_cast<const SimpleMemFileIOBuffer&>(source));
+ Data data = srcBuf.getData(part, loc);
+
+ BufferAllocation val(allocateBuffer(part, loc._size));
+ memcpy(val.getBuffer(), data.buf->getBuffer() + data.pos, loc._size);
+
+ LOG(spam,
+ "Copied cached data from %s to %s for location %u,%u buffer pos=%u",
+ srcBuf._fileSpec.getBucketId().toString().c_str(),
+ _fileSpec.getBucketId().toString().c_str(),
+ loc._pos,
+ loc._size,
+ data.pos);
+
+ return addLocation(part, val);
+}
+
+
+void
+SimpleMemFileIOBuffer::cacheLocation(DocumentPart part,
+ DataLocation loc,
+ BufferType::LP& buf,
+ uint32_t bufferPos)
+{
+ LOG(spam,
+ "%s: added existing %s buffer at location %u,%u "
+ "buffer=%p buffer pos=%u",
+ _fileSpec.toString().c_str(),
+ getDocumentPartName(part),
+ loc._pos,
+ loc._size,
+ buf.get(),
+ bufferPos);
+ _data[part][loc] = Data(buf, bufferPos, true);
+}
+
+bool
+SimpleMemFileIOBuffer::isCached(DataLocation loc,
+ DocumentPart type) const
+{
+ if (loc._size == 0) {
+ // Count zero-sized locations as cached
+ return true;
+ }
+
+ return _data[type].find(loc) != _data[type].end();
+}
+
+bool
+SimpleMemFileIOBuffer::isPersisted(DataLocation loc,
+ DocumentPart type) const
+{
+ DataMap::const_iterator iter = _data[type].find(loc);
+
+ // If the buffer doesn't know about the data at all,
+ // we must assume it is already persisted. How else would the file
+ // know about the location?
+ if (iter == _data[type].end()) {
+ return true;
+ }
+
+ return iter->second.persisted;
+}
+
+void
+SimpleMemFileIOBuffer::ensureCached(Environment& env,
+ DocumentPart part,
+ const std::vector<DataLocation>& locations)
+{
+ std::vector<DataLocation> nonCached;
+ nonCached.reserve(locations.size());
+
+ for (uint32_t i = 0; i < locations.size(); ++i) {
+ if (_data[part].find(locations[i]) == _data[part].end()) {
+ nonCached.push_back(locations[i]);
+ }
+ }
+
+ _reader.cacheLocations(*this, env, *_options, part, nonCached);
+}
+
+void
+SimpleMemFileIOBuffer::persist(DocumentPart part,
+ DataLocation oldLoc,
+ DataLocation newLoc)
+{
+ Data newData = getData(part, oldLoc);
+ newData.persisted = true;
+ size_t erased = _data[part].erase(oldLoc);
+ assert(erased > 0);
+ (void) erased;
+ _data[part][newLoc] = newData;
+
+ LOG(spam, "%s: persisted %s for %u,%u -> %u,%u",
+ _fileSpec.getBucketId().toString().c_str(),
+ getDocumentPartName(part),
+ oldLoc._pos, oldLoc._size,
+ newLoc._pos, newLoc._size);
+}
+
+void
+SimpleMemFileIOBuffer::remapAndPersistAllLocations(
+ DocumentPart part,
+ const std::map<DataLocation, DataLocation>& locs)
+{
+ DataMap remappedData;
+
+ typedef std::map<DataLocation, DataLocation>::const_iterator Iter;
+ for (Iter it(locs.begin()), e(locs.end()); it != e; ++it) {
+ DataLocation oldLoc = it->first;
+ DataLocation newLoc = it->second;
+
+ LOG(spam, "%s: remapping %u,%u -> %u,%u",
+ _fileSpec.getBucketId().toString().c_str(),
+ oldLoc._pos, oldLoc._size,
+ newLoc._pos, newLoc._size);
+
+ Data newData = getData(part, oldLoc);
+ newData.persisted = true;
+ std::pair<DataMap::iterator, bool> inserted(
+ remappedData.insert(std::make_pair(newLoc, newData)));
+ assert(inserted.second);
+ }
+ _data[part].swap(remappedData);
+
+ LOG(debug,
+ "%s: remapped %zu locations. Discarded %zu locations that "
+ "had no new mapping",
+ _fileSpec.getBucketId().toString().c_str(),
+ locs.size(),
+ _data[part].size() - locs.size());
+}
+
+const char*
+SimpleMemFileIOBuffer::getBuffer(DataLocation loc, DocumentPart part) const
+{
+ const Data& data = getData(part, loc);
+ return data.buf->getBuffer() + data.pos;
+}
+
+uint32_t
+SimpleMemFileIOBuffer::getSerializedSize(DocumentPart part,
+ DataLocation loc) const
+{
+ if (part == HEADER) {
+ const Data& data = getData(part, loc);
+ assert(loc._size > sizeof(uint32_t)*3);
+ const char* bufEnd = data.buf->getBuffer() + data.pos + loc._size;
+ uint32_t docIdLen = *reinterpret_cast<const uint32_t*>(
+ bufEnd - sizeof(uint32_t)*2);
+ return loc._size - sizeof(uint32_t)*3 - docIdLen;
+ } else {
+ return loc._size - sizeof(uint32_t);
+ }
+}
+
+size_t
+SimpleMemFileIOBuffer::getCachedSize(DocumentPart part) const
+{
+ const DataMap& dm(_data[part]);
+ vespalib::hash_set<const void*> seenBufs(dm.size());
+ size_t ret = 0;
+ for (DataMap::const_iterator it(dm.begin()), e(dm.end()); it != e; ++it) {
+ if (seenBufs.find(it->second.buf->getBuffer()) != seenBufs.end()) {
+ continue;
+ }
+
+ size_t bufSize = it->second.buf->getSize();
+ // Account for (approximate) mmap overhead.
+ bufSize = util::alignUpPow2<4096>(bufSize);
+ ret += bufSize;
+ seenBufs.insert(it->second.buf->getBuffer());
+ }
+ return ret;
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h
new file mode 100644
index 00000000000..8dbffcaf795
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h
@@ -0,0 +1,365 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/memfile/memfileiointerface.h>
+#include <vespa/memfilepersistence/mapper/buffer.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/memfilepersistence/mapper/fileinfo.h>
+#include <vespa/vespalib/util/exception.h>
+#include <vespa/vespalib/util/alloc.h>
+#include <vespa/memfilepersistence/mapper/versionserializer.h>
+
+namespace storage
+{
+namespace memfile
+{
+
+namespace util {
+
+/**
+ * @param Alignment (template) must be a power of two.
+ * @return val aligned up so that retval >= val && retval % Alignment == 0
+ */
+template <size_t Alignment>
+size_t
+alignUpPow2(const size_t val)
+{
+ const size_t mask = Alignment - 1;
+ return (val + mask) & ~mask;
+}
+
+/**
+ * Round any non-power of two value up to the nearest power of two. E.g:
+ * nextPow2(3) -> 4
+ * nextPow2(15) -> 16
+ * nextPow2(40) -> 64
+ * nextPow2(64) -> 64
+ *
+ * From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+ */
+inline uint32_t
+nextPow2(uint32_t v)
+{
+ --v;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ ++v;
+ return v;
+}
+
+}
+
+/**
+ * Implements a simple buffered cache for a memfile.
+ */
+class SimpleMemFileIOBuffer : public MemFileIOInterface
+{
+public:
+ /**
+ * Any buffer requests >= than this size will get their own, separately
+ * allocated buffer. For all other cases, we'll attempt to squeeze as many
+ * documents as possible into the same (shared) buffer.
+ */
+ static const size_t WORKING_BUFFER_SIZE = 16*1024; // TODO(vekterli): make configurable
+
+ class SharedBuffer
+ {
+ public:
+ static const size_t ALLOC_ALIGNMENT = 8;
+ enum Alignment {
+ NO_ALIGN,
+ ALIGN_512_BYTES
+ };
+
+ typedef vespalib::LinkedPtr<SharedBuffer> LP;
+ explicit SharedBuffer(size_t totalSize)
+ : _buf(totalSize),
+ _usedSize(0)
+ {
+ }
+
+ size_t getSize() const { return _buf.size(); }
+ size_t getUsedSize() const { return _usedSize; }
+ size_t getFreeSize() const { return getSize() - getUsedSize(); }
+ bool hasRoomFor(size_t sz, Alignment align = NO_ALIGN) const {
+ return (align == ALIGN_512_BYTES
+ ? util::alignUpPow2<512>(_usedSize)
+ : _usedSize) + sz <= getSize();
+ }
+
+ /**
+ * Returns an offset into the shared buffer which is valid to use for
+ * sz bytes. If align is ALIGN_512_BYTES, the returned offset will be
+ * aligned on a 512-byte boundary. It is the responsibility of the
+ * caller to ensure buffers used for Direct I/O are allocated with a
+ * size that is also evenly 512-byte divisible.
+ */
+ size_t allocate(size_t sz, Alignment align = NO_ALIGN) {
+ if (align == ALIGN_512_BYTES) {
+ _usedSize = util::alignUpPow2<512>(_usedSize);
+ }
+ assert(hasRoomFor(sz));
+ size_t ret = _usedSize;
+ _usedSize += util::alignUpPow2<ALLOC_ALIGNMENT>(sz);
+ return ret;
+ }
+
+ char* getBuffer() {
+ return static_cast<char*>(_buf.get());
+ }
+ const char* getBuffer() const {
+ return static_cast<const char*>(_buf.get());
+ }
+ private:
+ vespalib::MMapAlloc _buf;
+ size_t _usedSize;
+ };
+
+ struct BufferAllocation
+ {
+ BufferAllocation() : pos(0), size(0) {}
+
+ BufferAllocation(const SharedBuffer::LP& b, uint32_t p, uint32_t sz)
+ : buf(b), pos(p), size(sz) {}
+
+ /**
+ * Get buffer area available to this specific allocation
+ */
+ char* getBuffer() { return buf->getBuffer() + pos; }
+ const char* getBuffer() const { return buf->getBuffer() + pos; }
+
+ /**
+ * Get buffer that is (potentially) shared between many individual
+ * allocations.
+ */
+ SharedBuffer::LP& getSharedBuffer() { return buf; }
+ uint32_t getBufferPosition() const { return pos; }
+ uint32_t getSize() const { return size; }
+
+ SharedBuffer::LP buf;
+ uint32_t pos;
+ uint32_t size;
+ };
+
+ /**
+ * Utility class for fully encoding a chunk of file data for a document
+ * header in a slotfile. Supports writing header chunks with and without
+ * a document payload.
+ */
+ class HeaderChunkEncoder
+ {
+ vespalib::nbostream _serializedDoc;
+ vespalib::string _docId;
+ public:
+ static const size_t DEFAULT_STREAM_ALLOC_SIZE = 5 * 2014;
+
+ HeaderChunkEncoder(const document::DocumentId& docId)
+ : _serializedDoc(DEFAULT_STREAM_ALLOC_SIZE),
+ _docId(docId.toString())
+ {
+ }
+
+ /**
+ * Serializes header chunk to buf, which must have at least a size
+ * of encodedSize() bytes available.
+ */
+ void writeTo(BufferAllocation& buf) const;
+
+ /**
+ * Assign (and buffer) document that should be written to the chunk.
+ * If this method is not called on an encoder prior to writeTo(), the
+ * chunk will contain only a document ID but no payload. This is
+ * perfectly fine for 5.1+, but is not supported by 5.0 readers.
+ * It is safe for the provided document to go out of scope after having
+ * called this method.
+ * Since this method buffers it may only be called once per encoder.
+ */
+ void bufferDocument(const document::Document&);
+
+ /**
+ * Compute total size of chunk as it will reside on disk, including
+ * document blob/id payload and metadata overhead.
+ * Max doc size is <=64M so we cannot possibly exceed 32 bits.
+ */
+ uint32_t encodedSize() const {
+ return (_serializedDoc.size() + trailerLength());
+ }
+ private:
+ static constexpr uint32_t fixedTrailerLength() {
+ // CRC32 of doc blob + u32 doc id length + CRC32 of doc id.
+ return (sizeof(uint32_t) * 3);
+ }
+ uint32_t trailerLength() const {
+ return (fixedTrailerLength() + _docId.size());
+ }
+ };
+
+ typedef SharedBuffer BufferType;
+
+ class PartNotCachedException : public vespalib::Exception {
+ public:
+ PartNotCachedException(const std::string& msg,
+ const std::string& location)
+ : vespalib::Exception(msg, location) {};
+ };
+
+ SimpleMemFileIOBuffer(
+ VersionSerializer& reader,
+ vespalib::LazyFile::UP file,
+ FileInfo::UP fileInfo,
+ const FileSpecification& fileSpec,
+ const Environment& env);
+
+ virtual Document::UP getDocumentHeader(
+ const document::DocumentTypeRepo& repo,
+ DataLocation loc) const;
+
+ virtual document::DocumentId getDocumentId(DataLocation loc) const;
+
+ virtual void readBody(
+ const document::DocumentTypeRepo& repo,
+ DataLocation loc,
+ Document& doc) const;
+
+ virtual DataLocation addDocumentIdOnlyHeader(
+ const DocumentId& id,
+ const document::DocumentTypeRepo& repo);
+
+ virtual DataLocation addHeader(const Document& doc);
+
+ virtual DataLocation addBody(const Document& doc);
+
+ virtual void clear(DocumentPart type);
+
+ virtual bool verifyConsistent() const;
+
+ /**
+ * Moves the underlying file to another location.
+ */
+ virtual void move(const FileSpecification& target);
+
+ virtual void close();
+
+ virtual DataLocation copyCache(const MemFileIOInterface& source,
+ DocumentPart part,
+ DataLocation loc);
+
+ /**
+ * Add a location -> buffer mapping
+ */
+ void cacheLocation(DocumentPart part,
+ DataLocation loc,
+ BufferType::LP& buf,
+ uint32_t bufferPos);
+
+ /**
+ * @return Returns true if the given location is cached.
+ */
+ virtual bool isCached(DataLocation loc, DocumentPart type) const;
+
+ /**
+ * @return Returns true if the given location has been persisted to disk.
+ */
+ virtual bool isPersisted(DataLocation loc, DocumentPart type) const;
+
+ virtual uint32_t getSerializedSize(DocumentPart part,
+ DataLocation loc) const;
+
+ virtual void ensureCached(Environment& env,
+ DocumentPart part,
+ const std::vector<DataLocation>& locations);
+
+ /**
+ * Moves the given location into the persisted data area.
+ * oldLoc must be outside the persisted data area, and newLoc must be within.
+ */
+ void persist(DocumentPart part, DataLocation oldLoc, DataLocation newLoc);
+
+ /**
+ * Remaps every single location for the given part.
+ * WARNING: All existing locations that are not remapped will be discarded!
+ */
+ void remapAndPersistAllLocations(DocumentPart part,
+ const std::map<DataLocation, DataLocation>& locs);
+
+ vespalib::LazyFile& getFileHandle() { return *_file; };
+ const vespalib::LazyFile& getFileHandle() const { return *_file; };
+
+ const FileInfo& getFileInfo() const { return *_fileInfo; }
+ void setFileInfo(FileInfo::UP fileInfo) { _fileInfo = std::move(fileInfo); }
+
+ const FileSpecification& getFileSpec() const { return _fileSpec; }
+
+ const char* getBuffer(DataLocation loc, DocumentPart part) const;
+
+ size_t getCachedSize(DocumentPart part) const;
+
+ BufferAllocation allocateBuffer(DocumentPart part,
+ uint32_t sz,
+ SharedBuffer::Alignment align
+ = SharedBuffer::NO_ALIGN);
+
+ /**
+ * Whether removes should be written with a document header payload in
+ * order to be backwards-compatible with VDS 5.0. This is in order to
+ * support a scenario where a cluster is downgraded from 5.1+ -> 5.0.
+ */
+ bool writeBackwardsCompatibleRemoves() const;
+
+ /**
+ * Generate a document with no content which stores the given document ID
+ * and is of the type inferred by the ID. If the ID is of legacy format
+ * (and thus without a type), the default configured type will be used.
+ */
+ Document::UP generateBlankDocument(const DocumentId&,
+ const document::DocumentTypeRepo&) const;
+
+private:
+ struct Data {
+ Data() : pos(0), persisted(false) {}
+
+ Data(const BufferType::LP& b, uint32_t p, bool isPersisted)
+ : buf(b), pos(p), persisted(isPersisted) {}
+
+ BufferType::LP buf;
+ uint32_t pos;
+ bool persisted;
+ };
+
+ typedef std::map<DataLocation, Data> DataMap;
+
+ VersionSerializer& _reader;
+ std::vector<DataMap> _data;
+ std::vector<SharedBuffer::LP> _workingBuffers;
+ vespalib::LazyFile::UP _file;
+ FileInfo::UP _fileInfo;
+ FileSpecification _fileSpec;
+ const Environment& _env;
+ // Same memfile config is used during entire lifetime of buffer object.
+ // This makes live reconfigs kick in for all files only when all buckets
+ // have been evicted from the cache post-reconfig, but greatly simplifies
+ // the reasoning about a given bucket in the face of such actions.
+ std::shared_ptr<const Options> _options;
+
+ DataLocation addLocation(DocumentPart part,
+ BufferAllocation newData);
+
+ const Data& getData(DocumentPart part, DataLocation loc) const;
+
+ BufferAllocation serializeDocumentIdOnlyHeader(
+ const DocumentId& id,
+ const document::DocumentTypeRepo&);
+ BufferAllocation serializeHeader(const Document& doc);
+ BufferAllocation serializeBody(const Document& doc);
+
+ friend class SimpleMemFileIOBufferTest;
+};
+
+}
+}
+
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp
new file mode 100644
index 00000000000..fecdfb1b1c8
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <storage/persistence/memfile/mapper/slotreadplanner.h>
+
+#include <storage/persistence/memfile/memfile/memslot.h>
+
+namespace storage {
+namespace memfile {
+
+SlotDiskIoPlanner::SlotDiskIoPlanner(
+ const std::vector<const MemSlot*> desiredSlots,
+ DocumentPart highestPartNeeded,
+ uint32_t maxGap,
+ uint32_t headerBlockStartIndex,
+ uint32_t bodyBlockStartIndex)
+ : _operations(),
+ _startIndexes(2, 0)
+{
+ _startIndexes[HEADER] = headerBlockStartIndex;
+ _startIndexes[BODY] = bodyBlockStartIndex;
+ processSlots(desiredSlots, highestPartNeeded, maxGap);
+}
+
+namespace {
+ uint32_t alignDown(uint32_t value) {
+ uint32_t blocks = value / 512;
+ return blocks * 512;
+ };
+
+ uint32_t alignUp(uint32_t value) {
+ uint32_t blocks = (value + 512 - 1) / 512;
+ return blocks * 512;
+ };
+}
+
+void
+SlotDiskIoPlanner::scheduleLocation(const MemSlot& slot,
+ DocumentPart type,
+ std::vector<DataLocation>& ops)
+{
+ if (!slot.partAvailable(type) && slot.getLocation(type)._size) {
+ ops.push_back(DataLocation(
+ slot.getLocation(type)._pos + _startIndexes[type],
+ slot.getLocation(type)._size));
+ }
+}
+
+void
+SlotDiskIoPlanner::processSlots(
+ const std::vector<const MemSlot*> desiredSlots,
+ DocumentPart highestPartNeeded,
+ uint32_t maxGap)
+{
+ // Build list of disk read operations to do
+ std::vector<DataLocation> allOps;
+ // Create list of all locations we need to read
+ for (std::size_t i = 0; i < desiredSlots.size(); ++i) {
+ for (uint32_t p = 0; p <= uint32_t(highestPartNeeded); ++p) {
+ scheduleLocation(*desiredSlots[i], (DocumentPart) p, allOps);
+ }
+ }
+ // Sort list, and join elements close together into single IO ops
+ std::sort(allOps.begin(), allOps.end());
+ for (size_t i = 0; i < allOps.size(); ++i) {
+ uint32_t start = alignDown(allOps[i]._pos);
+ uint32_t stop = alignUp(allOps[i]._pos + allOps[i]._size);
+ if (i != 0) {
+ uint32_t lastStop = _operations.back()._pos
+ + _operations.back()._size;
+ if (lastStop >= start || start - lastStop < maxGap) {
+ _operations.back()._size += (stop - lastStop);
+ continue;
+ }
+ }
+ _operations.push_back(DataLocation(start, stop - start));
+ }
+}
+
+uint32_t
+SlotDiskIoPlanner::getTotalBufferSize() const
+{
+ uint32_t totalSize = 0;
+ for (size_t i = 0; i < _operations.size(); ++i) {
+ totalSize += _operations[i]._size;
+ }
+ return totalSize;
+}
+
+void
+SlotDiskIoPlanner::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ (void) verbose; (void) indent;
+ for (std::size_t i = 0; i < _operations.size(); ++i) {
+ if (i > 0) out << ",";
+ out << "[" << _operations[i]._pos << ","
+ << (_operations[i]._size + _operations[i]._pos) << "]";
+ }
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h
new file mode 100644
index 00000000000..a2e17debcdf
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::SlotDiskIoPlanner
+ * \ingroup memfile
+ *
+ * \brief Creates list of minimal IO operations to do versus disk.
+ *
+ * When accessing many locations on disk, it is not necessarily ideal to do a
+ * disk access per location. This class creates a minimal set of locations to
+ * access to avoid accessing more than a maximum gap of uninteresting data.
+ */
+#pragma once
+
+#include <storage/persistence/memfile/common/types.h>
+
+namespace storage {
+namespace memfile {
+
+class MemSlot;
+
+class SlotDiskIoPlanner : public Types, public vespalib::Printable
+{
+public:
+ SlotDiskIoPlanner(const std::vector<const MemSlot*> desiredSlots,
+ DocumentPart highestPartNeeded,
+ uint32_t maxGap,
+ uint32_t headerBlockStartIndex,
+ uint32_t bodyBlockStartIndex);
+
+ const std::vector<DataLocation>& getIoOperations() const {
+ return _operations;
+ }
+
+ /**
+ * Get the total amount of space needed to hold all the data from all
+ * locations identified to be accessed. Useful to create a buffer of correct
+ * size.
+ */
+ uint32_t getTotalBufferSize() const;
+
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+private:
+ std::vector<DataLocation> _operations;
+ std::vector<uint32_t> _startIndexes;
+
+ void processSlots(
+ const std::vector<const MemSlot*> desiredSlots,
+ DocumentPart highestPartNeeded,
+ uint32_t maxGap);
+
+ void scheduleLocation(const MemSlot&, DocumentPart,
+ std::vector<DataLocation>&);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp
new file mode 100644
index 00000000000..52408067977
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/mapper/uniqueslotgenerator.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+#include <memfilepersistence/memfile/doccache.h>
+
+namespace storage {
+
+namespace memfile {
+
+bool
+UniqueSlotGenerator::ContentLocation::operator==(
+ const ContentLocation& other) const
+{
+ if (_loc.valid() && other._loc.valid()) return _loc == other._loc;
+ return _content == other._content;
+}
+
+bool
+UniqueSlotGenerator::ContentLocation::operator<(
+ const ContentLocation& other) const
+{
+ if (_loc.valid() && other._loc.valid()) return _loc < other._loc;
+ if (other._loc.valid()) return false;
+ if (_loc.valid()) return true;
+ return _content < other._content;
+}
+
+void
+UniqueSlotGenerator::ContentLocation::print(std::ostream& out, bool,
+ const std::string&) const
+{
+ out << "ContentLocation(" << _loc << ", "
+ << std::hex << _content << std::dec << ")";
+}
+
+UniqueSlotGenerator::UniqueSlotGenerator(const MemFile& memFile)
+ : _slots(2),
+ _slotsInOrder(2)
+{
+ for (uint32_t i = 0; i < memFile.getSlotCount(); i++) {
+ const MemSlot& slot = memFile[i];
+ addSlot(HEADER, slot);
+ if (slot.hasBodyContent()) addSlot(BODY, slot);
+ }
+}
+
+void
+UniqueSlotGenerator::addSlot(DocumentPart part, const MemSlot& slot)
+{
+ ContentLocation contentLoc(slot.getLocation(part));
+ if (slot.getDocCache() != NULL) {
+ contentLoc._content = slot.getDocCache()->getPart(part).get();
+ }
+ SlotList& loc = _slots[part][contentLoc];
+ loc.push_back(&slot);
+ if (loc.size() == 1) {
+ _slotsInOrder[part].push_back(&loc);
+ }
+}
+
+void
+UniqueSlotGenerator::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ (void) verbose;
+ for (uint32_t i=0; i<2; ++i) {
+ DocumentPart part(static_cast<DocumentPart>(i));
+ out << getDocumentPartName(part) << ":";
+ const OrderedSlotList& list = _slotsInOrder[part];
+ for (uint32_t j = 0; j < list.size(); ++j) {
+ const SlotList& slotList = *list[j];
+ out << "\n" << indent << slotList[0]->getLocation(part) << ": ";
+ for (uint32_t k = 0; k < slotList.size(); ++k) {
+ if (k > 0) out << ", ";
+ out << slotList[k]->getTimestamp();
+ }
+ }
+ if (i == 0) out << "\n";
+ }
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h
new file mode 100644
index 00000000000..c2ce3adede8
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::UniqueSlotGenerator
+ * \ingroup memfile
+ *
+ * Generates a mapping from unique content locations on disk
+ * (or unique documents if not persisted) to a list of slots.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+
+namespace storage {
+namespace memfile {
+
+class MemSlot;
+class MemFile;
+
+class UniqueSlotGenerator : private Types, public vespalib::Printable
+{
+public:
+ typedef std::vector<const MemSlot*> SlotList;
+
+private:
+ struct ContentLocation : public vespalib::Printable {
+ DataLocation _loc;
+ const document::StructFieldValue* _content;
+
+ ContentLocation(const DataLocation& loc) : _loc(loc), _content(0) {}
+
+ ContentLocation(const DataLocation& loc,
+ const document::StructFieldValue* content)
+ : _loc(loc), _content(content) {}
+
+ bool operator<(const ContentLocation& other) const;
+ bool operator==(const ContentLocation& other) const;
+
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+ };
+
+ void addSlot(DocumentPart, const MemSlot&);
+
+ typedef std::map<ContentLocation, SlotList> LocationToSlotMap;
+ typedef std::vector<SlotList*> OrderedSlotList;
+
+ std::vector<LocationToSlotMap> _slots;
+ std::vector<OrderedSlotList> _slotsInOrder;
+
+public:
+ UniqueSlotGenerator(const MemFile& memFile);
+
+ uint32_t getNumUnique(DocumentPart part) const {
+ return _slotsInOrder[part].size();
+ }
+
+ const SlotList& getSlots(DocumentPart part, uint32_t uniqueIndex) const {
+ return *_slotsInOrder[part][uniqueIndex];
+ }
+
+ void print(std::ostream&, bool verbose, const std::string& indent) const;
+
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h
new file mode 100644
index 00000000000..b57734c2b24
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h
@@ -0,0 +1,96 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::VersionSerializer
+ * \ingroup memfile
+ *
+ * \brief Super class for file mappers implementing a file format.
+ *
+ * An implementation of this handles all specifics of reading and writing
+ * a file format.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/mapper/buffer.h>
+#include <vespa/memfilepersistence/mapper/mapperslotoperation.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+#include <vespa/vespalib/util/linkedptr.h>
+#include <vespa/memfilepersistence/memfile/memfileiointerface.h>
+
+namespace storage {
+namespace memfile {
+
+// Avoid circular dependencies
+class MemFileEnvironment;
+class Options;
+
+struct VersionSerializer : protected Types {
+ typedef vespalib::LinkedPtr<VersionSerializer> LP;
+
+ virtual ~VersionSerializer() {}
+
+ /** Returns the file version this implementation handles. */
+ virtual FileVersion getFileVersion() = 0;
+
+ /**
+ * The MemFileMapper main class reads file header to figure out what version
+ * it is in. Then loadFile is called on correct implementation to interpret
+ * the file. The part of the file already read is given to loadFile to avoid
+ * a re-read of the initial data.
+ */
+ virtual void loadFile(MemFile& file, Environment&,
+ Buffer& buffer, uint64_t bytesRead) = 0;
+
+ /**
+ * Flushes all content in MemFile that is altered or not persisted to disk
+ * to the physical file. This function should not handle file rewriting. If
+ * updates cannot be done to the existing file it needs to return in case
+ * we then want to rewrite the file in another format.
+ *
+ * Flush must update the following in the MemFile:
+ * - Update state saying all is persisted and nothing is altered
+ * - All block position and sizes need to be correct after flush.
+ *
+ * @return True if written successfully, false if file rewrite is required.
+ */
+ enum class FlushResult {
+ ChangesWritten,
+ TooFewMetaEntries,
+ TooSmall,
+ TooLarge,
+ UnAltered
+ };
+ virtual FlushResult flushUpdatesToFile(MemFile&, Environment&) = 0;
+
+ /**
+ * This function is typically called when file doesn't already exist or
+ * flushUpdatesToFile return false, indicating that file need a total
+ * rewrite. Before calling this function, all data must be cached in the
+ * MemFile instance.
+ */
+ virtual void rewriteFile(MemFile&, Environment&) = 0;
+
+ /**
+ * Check file for errors, generate report of errors. Fix if repairErrors
+ * is set. Returns true if no failures were found or no errors were fixed.
+ */
+ virtual bool verify(MemFile&, Environment&,
+ std::ostream& errorReport, bool repairErrors,
+ uint16_t fileVerifyFlags) = 0;
+
+
+ /**
+ * Cache locations into the given buffer.
+ */
+ virtual void cacheLocations(MemFileIOInterface& buffer,
+ Environment& env,
+ const Options& options,
+ DocumentPart part,
+ const std::vector<DataLocation>& locations) = 0;
+
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt
new file mode 100644
index 00000000000..798b0b873e5
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_memfile OBJECT
+ SOURCES
+ memslot.cpp
+ memfile.cpp
+ slotiterator.cpp
+ memfilecompactor.cpp
+ memfilecache.cpp
+ shared_data_location_tracker.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp
new file mode 100644
index 00000000000..281ada62a89
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp
@@ -0,0 +1,1116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+
+#include <ext/algorithm>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/common/exceptions.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h>
+#include <vespa/memfilepersistence/memfile/memfilecompactor.h>
+#include <vespa/memfilepersistence/memfile/shared_data_location_tracker.h>
+#include <vespa/vespalib/util/crc.h>
+#include <vespa/memfilepersistence/common/environment.h>
+#include <iomanip>
+
+namespace {
+
+template<class A>
+std::vector<A> toVector(A entry) {
+ std::vector<A> entries;
+ entries.push_back(entry);
+ return entries;
+};
+
+}
+
+LOG_SETUP(".persistence.memfile.memfile");
+
+#define FAIL_INCONSISTENT(msg, slot) \
+{ \
+ std::ostringstream error; \
+ error << msg; \
+ throw InconsistentSlotException(slot.toString() + ": " + error.str(), \
+ _file, slot, VESPA_STRLOC); \
+}
+#define FAIL_INCONSISTENT_FILE(msg) \
+{ \
+ std::ostringstream error; \
+ error << msg; \
+ throw InconsistentException(error.str(), _file, VESPA_STRLOC); \
+}
+
+#define RETHROW_NON_MEMFILE_EXCEPTIONS \
+ catch (MemFileException& exceptionToRethrow) { \
+ throw; \
+ } catch (vespalib::IoException& exceptionToRethrow) { \
+ std::ostringstream wrappedMessage; \
+ wrappedMessage << "Got IO exception while processing within " \
+ << "memfile. Wrapping in memfile exception: "; \
+ const std::string& sourceExceptionMessage( \
+ exceptionToRethrow.getMessage()); \
+ size_t pos = sourceExceptionMessage.find(':'); \
+ wrappedMessage << sourceExceptionMessage.substr(pos + 2); \
+ throw MemFileIoException(wrappedMessage.str(), _file, \
+ exceptionToRethrow.getType(), VESPA_STRLOC) \
+ .setCause(exceptionToRethrow); \
+ } catch (vespalib::Exception& exceptionToRethrow) { \
+ throw MemFileWrapperException( \
+ "Got generic exception while processing within " \
+ "memfile. Wrapping in memfile exception: " \
+ + std::string(exceptionToRethrow.getMessage()), \
+ _file, VESPA_STRLOC).setCause(exceptionToRethrow); \
+ }
+
+namespace storage {
+namespace memfile {
+
+MemFile::MemFile(const FileSpecification& file,
+ Environment& env,
+ const LoadOptions& opts)
+ : _flags(BUCKET_INFO_OUTDATED),
+ _info(),
+ _entries(),
+ _file(file),
+ _currentVersion(UNKNOWN),
+ _env(env)
+{
+ try{
+ env._memFileMapper.loadFile(*this, env, opts.autoRepair);
+ } RETHROW_NON_MEMFILE_EXCEPTIONS;
+}
+
+MemFile::MemFile(const FileSpecification& file, Environment& env,
+ bool callLoadFile)
+ : _flags(BUCKET_INFO_OUTDATED),
+ _info(),
+ _entries(),
+ _file(file),
+ _currentVersion(UNKNOWN),
+ _env(env)
+{
+ if (callLoadFile) {
+ env._memFileMapper.loadFile(*this, env, false);
+ }
+}
+
+void
+MemFile::verifyConsistent() const
+{
+ _buffer->verifyConsistent();
+}
+
+uint16_t
+MemFile::getDisk() const
+{
+ return _file.getDirectory().getIndex();
+}
+
+void
+MemFile::move(const FileSpecification& file)
+{
+ // Any given bucket can either be moved to a more specific or less
+ // specific bucket in the same subtree.
+ assert(file.getBucketId().contains(_file.getBucketId())
+ || _file.getBucketId().contains(file.getBucketId()));
+ _buffer->move(file);
+ _file = file;
+}
+
+uint32_t
+MemFile::getSlotCount() const
+{
+ return _entries.size();
+}
+
+const MemSlot*
+MemFile::getSlotWithId(const document::DocumentId& id,
+ framework::MicroSecTime maxTimestamp) const
+{
+ for (uint32_t n=_entries.size(), i=n-1; i<n; --i) {
+ if (_entries[i].getTimestamp() > maxTimestamp) continue;
+ if (id.getGlobalId() != _entries[i].getGlobalId()) continue;
+ if (getDocumentId(_entries[i]) == id) return &_entries[i];
+ }
+ return 0;
+}
+
+namespace {
+
+struct MemSlotTimestampPredicate
+{
+ bool operator()(const MemSlot& a, Types::Timestamp time) const
+ {
+ return a.getTimestamp() < time;
+ }
+};
+
+}
+
+const MemSlot*
+MemFile::getSlotAtTime(Timestamp time) const
+{
+ std::vector<MemSlot>::const_iterator it(
+ std::lower_bound(_entries.begin(), _entries.end(),
+ time, MemSlotTimestampPredicate()));
+ if (it != _entries.end() && it->getTimestamp() == time) {
+ return &*it;
+ }
+ return 0;
+}
+
+void
+MemFile::getSlotsByTimestamp(
+ const std::vector<Timestamp>& timestamps,
+ std::vector<const MemSlot*>& returned) const
+{
+ assert(__gnu_cxx::is_sorted(timestamps.begin(), timestamps.end()));
+
+ std::size_t source = 0;
+ std::size_t target = 0;
+
+ while (source < _entries.size() && target < timestamps.size()) {
+ if (_entries[source].getTimestamp() == timestamps[target]) {
+ returned.push_back(&_entries[source]);
+ ++source;
+ ++target;
+ } else if (_entries[source].getTimestamp() < timestamps[target]) {
+ ++source;
+ } else {
+ ++target;
+ }
+ }
+}
+
+document::Document::UP
+MemFile::getDocument(const MemSlot& slot, GetFlag getFlag) const
+{
+ LOG(spam,
+ "%s: getDocument(%s, %s)",
+ _file.getBucketId().toString().c_str(),
+ slot.toString().c_str(),
+ getFlag == HEADER_ONLY ? "header only" : "full document");
+ ensureDocumentCached(slot, getFlag == HEADER_ONLY);
+
+ auto& repo = _env.repo();
+ Document::UP doc = _buffer->getDocumentHeader(
+ repo, slot.getLocation(HEADER));
+
+ if (doc.get() && getFlag == ALL && slot.getLocation(BODY)._size > 0) {
+ _buffer->readBody(repo, slot.getLocation(BODY), *doc);
+ }
+
+ return doc;
+}
+
+document::DocumentId
+MemFile::getDocumentId(const MemSlot& slot) const
+{
+ LOG(spam,
+ "%s: getDocumentId(%s)",
+ _file.getBucketId().toString().c_str(),
+ slot.toString().c_str());
+ ensureDocumentCached(slot, true);
+
+ return _buffer->getDocumentId(slot.getLocation(HEADER));
+}
+
+void
+MemFile::assertSlotContainedInThisBucket(const MemSlot& slot) const
+{
+ document::BucketId fileBucket(getBucketId());
+ // Non-orderdoc documents should pass this first (very cheap) test.
+ if (slot.getGlobalId().containedInBucket(fileBucket)) {
+ return;
+ }
+ // Expensive path: get doc id and check against it instead.
+ DocumentId id(getDocumentId(slot));
+ document::BucketIdFactory factory;
+ document::BucketId slotBucket(factory.getBucketId(id));
+
+ LOG(spam,
+ "%s: slot %s has GID not contained in bucket, checking against id %s",
+ fileBucket.toString().c_str(),
+ slot.toString().c_str(),
+ id.toString().c_str());
+
+ if (!fileBucket.contains(slotBucket)) {
+ LOG(error,
+ "Slot %s with document ID %s is not contained in %s. Terminating "
+ "in order to avoid bucket corruption.",
+ slot.toString().c_str(),
+ id.toString().c_str(),
+ fileBucket.toString().c_str());
+ assert(false);
+ }
+}
+
+void
+MemFile::addPutSlot(const Document& doc, Timestamp time)
+{
+ DataLocation headerLoc = _buffer->addHeader(doc);
+ DataLocation bodyLoc = _buffer->addBody(doc);
+
+ addSlot(MemSlot(doc.getId().getGlobalId(),
+ time,
+ headerLoc,
+ bodyLoc,
+ IN_USE | CHECKSUM_OUTDATED,
+ 0));
+}
+
+void
+MemFile::addUpdateSlot(const Document& header, const MemSlot& body, Timestamp time)
+{
+ if (!body.getLocation(BODY).valid()) {
+ LOG(error,
+ "Slot %s has invalid body location while not "
+ "having body cached. This is an invalid state.",
+ body.toString().c_str());
+ assert(false);
+ }
+
+ DataLocation headerLoc = _buffer->addHeader(header);
+ DataLocation bodyLoc = body.getLocation(BODY);
+
+ addSlot(MemSlot(header.getId().getGlobalId(),
+ time,
+ headerLoc,
+ bodyLoc,
+ IN_USE | CHECKSUM_OUTDATED,
+ 0));
+}
+
+void
+MemFile::addRemoveSlot(const MemSlot& header, Timestamp time)
+{
+ addSlot(MemSlot(header.getGlobalId(),
+ time,
+ header.getLocation(HEADER),
+ DataLocation(0,0),
+ DELETED | IN_USE | CHECKSUM_OUTDATED,
+ 0));
+}
+
+void
+MemFile::addRemoveSlotForNonExistingEntry(const DocumentId& docId,
+ Timestamp time,
+ RemoveType removeType)
+{
+ addSlot(MemSlot(docId.getGlobalId(),
+ time,
+ _buffer->addDocumentIdOnlyHeader(docId, _env.repo()),
+ DataLocation(0,0),
+ DELETED
+ | IN_USE
+ | CHECKSUM_OUTDATED
+ | (removeType == UNREVERTABLE_REMOVE ? DELETED_IN_PLACE : 0),
+ 0));
+}
+
+void
+MemFile::addSlot(const MemSlot& slot)
+{
+ LOG(spam,
+ "%s: adding %s to memfile",
+ _file.getBucketId().toString().c_str(),
+ slot.toString().c_str());
+ // TODO: Add exception here?
+ //assert(slot.partAvailable(BODY));
+ assert(slot.getLocation(HEADER).valid());
+ assert(slot.getLocation(BODY).valid());
+ // Don't let full disk block remove entries or entries that
+ // are already fully persisted
+
+ if (!slot.deleted()
+ && !slot.deletedInPlace()
+ && !(partPersisted(slot, HEADER)
+ && partPersisted(slot, BODY)))
+ {
+ verifyDiskNotFull();
+ }
+
+ // Optimize common case where slot we're adding has a higher
+ // timestamp than the last slot already stored.
+ if (!_entries.empty()
+ && slot.getTimestamp() > _entries.back().getTimestamp())
+ {
+ _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED;
+ _entries.push_back(slot);
+ return;
+ }
+
+ std::vector<MemSlot> entries;
+ entries.reserve(_entries.size() + 1);
+ bool inserted = false;
+ for (uint32_t i=0; i<_entries.size(); ++i) {
+ if (_entries[i].getTimestamp() == slot.getTimestamp()) {
+ std::ostringstream err;
+ err << "Attempt of adding slot at timestamp "
+ << slot.getTimestamp() << " which already exist in file. "
+ << "Call modifySlot instead.";
+ LOG(error, "%s", err.str().c_str());
+ assert(false);
+ }
+ if (!inserted && _entries[i].getTimestamp() > slot.getTimestamp()) {
+ inserted = true;
+ entries.push_back(slot);
+ }
+ entries.push_back(_entries[i]);
+ }
+ if (!inserted) {
+ entries.push_back(slot);
+ }
+ _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED;
+ _entries.swap(entries);
+}
+
+void
+MemFile::copySlot(const MemFile& source, const MemSlot& slot)
+{
+ addSlot(MemSlot(slot.getGlobalId(),
+ slot.getTimestamp(),
+ _buffer->copyCache(*source._buffer, HEADER, slot.getLocation(HEADER)),
+ _buffer->copyCache(*source._buffer, BODY, slot.getLocation(BODY)),
+ slot.getFlags(),
+ slot.getChecksum()));
+}
+
+class MemFile::MemFileBufferCacheCopier : public BufferCacheCopier
+{
+public:
+ MemFileBufferCacheCopier(MemFile& target, const MemFile& source)
+ : _target(target),
+ _source(source)
+ {
+ }
+
+private:
+ DataLocation doCopyFromSourceToLocal(
+ Types::DocumentPart part,
+ DataLocation sourceLocation) override
+ {
+ return _target._buffer->copyCache(
+ *_source._buffer, part, sourceLocation);
+ }
+
+ MemFile& _target;
+ const MemFile& _source;
+};
+
+void
+MemFile::copySlotsFrom(
+ const MemFile& source,
+ const std::vector<const MemSlot*>& sourceSlots)
+{
+ // TODO we probably want a pre-allocation hint here to avoid many mmaps
+ MemFileBufferCacheCopier cacheCopier(*this, source);
+ SharedDataLocationTracker headerTracker(cacheCopier, HEADER);
+ SharedDataLocationTracker bodyTracker(cacheCopier, BODY);
+
+ for (auto slot : sourceSlots) {
+ auto headerLoc = headerTracker.getOrCreateSharedLocation(
+ slot->getLocation(HEADER));
+ auto bodyLoc = bodyTracker.getOrCreateSharedLocation(
+ slot->getLocation(BODY));
+ addSlot(MemSlot(slot->getGlobalId(),
+ slot->getTimestamp(),
+ headerLoc,
+ bodyLoc,
+ slot->getFlags(),
+ slot->getChecksum()));
+ }
+}
+
+void
+MemFile::removeSlot(const MemSlot& slot)
+{
+ _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED;
+ std::vector<MemSlot>::iterator it(
+ std::lower_bound(_entries.begin(), _entries.end(),
+ slot.getTimestamp(),
+ MemSlotTimestampPredicate()));
+ if (it != _entries.end()
+ && it->getTimestamp() == slot.getTimestamp())
+ {
+ _entries.erase(it);
+ } else {
+ LOG(error,
+ "Attempted to remove a slot that does not exist: %s",
+ slot.toString().c_str());
+ assert(false);
+ }
+}
+
+void
+MemFile::removeSlots(const std::vector<const MemSlot*>& slotsToRemove)
+{
+ if (slotsToRemove.empty()) return;
+ // Optimized way of removing slots. Should not throw exceptions,
+ // (and is not exception safe)
+ std::vector<MemSlot> slots(
+ _entries.size() - slotsToRemove.size(),
+ MemSlot(GlobalId(), Timestamp(0), DataLocation(), DataLocation(),
+ 0, 0));
+ uint32_t r=0;
+ for (uint32_t i=0,j=0; i<_entries.size(); ++i) {
+ if (r >= slotsToRemove.size() || slotsToRemove[r] != &_entries[i]) {
+ _entries[i].swap(slots[j]);
+ ++j;
+ } else {
+ ++r;
+ }
+ }
+ _entries.swap(slots);
+ if (_entries.size() != slots.size()) {
+ _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED;
+ }
+ // Verify that we found all slots to remove
+ if (r < slotsToRemove.size()) {
+ Timestamp ts(0);
+ for (uint32_t i=0; i<slotsToRemove.size(); ++i) {
+ assert(slotsToRemove[i]->getTimestamp() > ts);
+ ts = slotsToRemove[i]->getTimestamp();
+ }
+ LOG(error,
+ "Slot %s wasn't in the file. Only existing slots may be "
+ "given to removeSlots as non-existing slot stops other "
+ "slots from being removed.",
+ slotsToRemove[r]->toString().c_str());
+ assert(false);
+ }
+}
+
+void
+MemFile::modifySlot(const MemSlot& slot)
+{
+ _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED;
+ // MemSlot actually pointed to by const MemSlot* is non-const
+ // in entries-vector, so this should be well defined according
+ // to the C++ ISO standard
+ MemSlot* slotToModify = const_cast<MemSlot*>(
+ getSlotAtTime(slot.getTimestamp()));
+
+ assert(slotToModify != NULL);
+
+ LOG(spam, "Modifying %s -> %s",
+ slotToModify->toString().c_str(),
+ slot.toString().c_str());
+ *slotToModify = slot;
+}
+
+void
+MemFile::matchLocationWithFlags(LocationMap& result,
+ DocumentPart part,
+ const MemSlot* slot,
+ uint32_t flags) const
+{
+ DataLocation loc = slot->getLocation(part);
+ bool isPersisted = _buffer->isPersisted(loc, part);
+
+ if ((flags & NON_PERSISTED_LOCATIONS) && !isPersisted) {
+ result[loc].slots.push_back(slot);
+ } else if ((flags & PERSISTED_LOCATIONS) && isPersisted) {
+ result[loc].slots.push_back(slot);
+ }
+}
+
+void
+MemFile::getLocations(LocationMap& headers,
+ LocationMap& bodies,
+ uint32_t flags) const
+{
+ for (uint32_t i = 0; i < _entries.size(); ++i) {
+ matchLocationWithFlags(headers, HEADER, &_entries[i], flags);
+ matchLocationWithFlags(bodies, BODY, &_entries[i], flags);
+ }
+}
+
+bool
+MemFile::compact()
+{
+ auto options = _env.acquireConfigReadLock().options();
+ MemFileCompactor compactor(
+ _env._clock.getTimeInMicros(),
+ CompactionOptions()
+ .revertTimePeriod(options->_revertTimePeriod)
+ .keepRemoveTimePeriod(options->_keepRemoveTimePeriod)
+ .maxDocumentVersions(options->_maxDocumentVersions));
+ std::vector<const MemSlot*> slotsToRemove(
+ compactor.getSlotsToRemove(*this));
+ removeSlots(slotsToRemove);
+ return !slotsToRemove.empty();
+}
+
+MemFile::const_iterator
+MemFile::begin(uint32_t iteratorFlags,
+ Timestamp fromTimestamp,
+ Timestamp toTimestamp) const
+{
+ if (iteratorFlags & ITERATE_GID_UNIQUE) {
+ return const_iterator(SlotIterator::CUP(new GidUniqueSlotIterator(
+ *this, iteratorFlags & ITERATE_REMOVED,
+ fromTimestamp, toTimestamp)));
+ } else {
+ return const_iterator(SlotIterator::CUP(new AllSlotsIterator(
+ *this, iteratorFlags & ITERATE_REMOVED,
+ fromTimestamp, toTimestamp)));
+ }
+}
+
+void
+MemFile::ensureDocumentIdCached(const MemSlot& slot) const
+{
+ _buffer->ensureCached(_env, HEADER, toVector(slot.getLocation(HEADER)));
+}
+
+void
+MemFile::ensureDocumentCached(const MemSlot& slot, bool headerOnly) const
+{
+ _buffer->ensureCached(_env, HEADER, toVector(slot.getLocation(HEADER)));
+ if (!headerOnly) {
+ _buffer->ensureCached(_env, BODY, toVector(slot.getLocation(BODY)));
+ }
+}
+
+void
+MemFile::ensureDocumentCached(const std::vector<Timestamp>& timestamps,
+ bool headerOnly) const
+{
+ LOG(spam, "ensureDocumentCached with %zu timestamps",
+ timestamps.size());
+ if (!fileExists()) {
+ return;
+ }
+ try{
+ std::vector<const MemSlot*> slots;
+ getSlotsByTimestamp(timestamps, slots);
+
+ std::vector<DataLocation> headerLocations;
+ headerLocations.reserve(timestamps.size());
+ std::vector<DataLocation> bodyLocations;
+ if (!headerOnly) {
+ bodyLocations.reserve(timestamps.size());
+ }
+ for (uint32_t i = 0; i < slots.size(); ++i) {
+ headerLocations.push_back(slots[i]->getLocation(HEADER));
+
+ if (!headerOnly) {
+ bodyLocations.push_back(slots[i]->getLocation(BODY));
+ }
+ }
+
+ _buffer->ensureCached(_env, HEADER, headerLocations);
+ if (!headerOnly) {
+ _buffer->ensureCached(_env, BODY, bodyLocations);
+ }
+ } RETHROW_NON_MEMFILE_EXCEPTIONS;
+}
+
+void
+MemFile::ensureEntriesCached(bool includeBody) const
+{
+ if (!fileExists()) {
+ return;
+ }
+
+ try{
+ std::vector<DataLocation> headerLocations;
+ std::vector<DataLocation> bodyLocations;
+
+ for (uint32_t i = 0; i < _entries.size(); ++i) {
+ headerLocations.push_back(_entries[i].getLocation(HEADER));
+
+ if (includeBody) {
+ bodyLocations.push_back(_entries[i].getLocation(BODY));
+ }
+ }
+
+ _buffer->ensureCached(_env, HEADER, headerLocations);
+ if (includeBody) {
+ _buffer->ensureCached(_env, BODY, bodyLocations);
+ }
+ } RETHROW_NON_MEMFILE_EXCEPTIONS;
+}
+
+void
+MemFile::ensureHeaderBlockCached() const
+{
+ ensureEntriesCached(false);
+}
+
+void
+MemFile::ensureBodyBlockCached() const
+{
+ ensureEntriesCached(true);
+}
+
+/**
+ * Functionally this is the same as ensureBodyBlockCached, but with
+ * clearer semantics.
+ */
+void
+MemFile::ensureHeaderAndBodyBlocksCached() const
+{
+ ensureEntriesCached(true);
+}
+
+bool
+MemFile::documentIdAvailable(const MemSlot& slot) const
+{
+ return partAvailable(slot, HEADER);
+}
+
+bool
+MemFile::partAvailable(const MemSlot& slot, DocumentPart part) const
+{
+ return _buffer->isCached(slot.getLocation(part), part);
+}
+
+bool
+MemFile::partPersisted(const MemSlot& slot, DocumentPart part) const
+{
+ assert(_buffer.get());
+
+ return _buffer->isPersisted(slot.getLocation(part), part);
+}
+
+uint32_t
+MemFile::getSerializedSize(const MemSlot& slot, DocumentPart part) const {
+ DataLocation loc = slot.getLocation(part);
+ return _buffer->getSerializedSize(part, loc);
+}
+
+const Types::BucketInfo&
+MemFile::getBucketInfo() const
+{
+ if (_flags & BUCKET_INFO_OUTDATED) {
+ uint32_t uniqueCount = 0, uniqueSize = 0, usedSize = 0;
+ uint32_t checksum = 0;
+
+ typedef vespalib::hash_set<GlobalId, GlobalId::hash> SeenMap;
+ SeenMap seen(_entries.size() * 2);
+ uint32_t maxHeaderExtent = 0, maxBodyExtent = 0;
+
+ MemSlotVector::const_reverse_iterator e(_entries.rend());
+ for (MemSlotVector::const_reverse_iterator it(_entries.rbegin());
+ it != e; ++it)
+ {
+ const MemSlot& slot(*it);
+ // We now always write sequentially within the blocks, so used size
+ // for one block is effectively the max location extent seen within
+ // it.
+ maxHeaderExtent = std::max(maxHeaderExtent,
+ slot.getLocation(HEADER)._pos
+ + slot.getLocation(HEADER)._size);
+ maxBodyExtent = std::max(maxBodyExtent,
+ slot.getLocation(BODY)._pos
+ + slot.getLocation(BODY)._size);
+
+ SeenMap::insert_result inserted(seen.insert(slot.getGlobalId()));
+ if (!inserted.second) {
+ continue;
+ }
+ if (slot.deleted()) continue;
+
+ const uint32_t slotSize = slot.getLocation(HEADER)._size
+ + slot.getLocation(BODY)._size;
+ uniqueSize += slotSize;
+ ++uniqueCount;
+
+ vespalib::crc_32_type calculator;
+ calculator.process_bytes(slot.getGlobalId().get(),
+ GlobalId::LENGTH);
+ Timestamp time = slot.getTimestamp();
+ calculator.process_bytes(&time, sizeof(Timestamp));
+ checksum ^= calculator.checksum();
+ }
+
+ if (uniqueCount > 0 && checksum < 2) {
+ checksum += 2;
+ }
+
+ // Only set used size if we have any entries at all.
+ if (!_entries.empty()) {
+ usedSize = 64 + 40 * _entries.size()
+ + maxHeaderExtent + maxBodyExtent;
+ }
+
+ spi::BucketInfo info(spi::BucketChecksum(checksum),
+ uniqueCount,
+ uniqueSize,
+ _entries.size(),
+ usedSize,
+ BucketInfo::READY,
+ BucketInfo::NOT_ACTIVE);
+
+ _info = info;
+ _flags &= ~BUCKET_INFO_OUTDATED;
+ }
+ return _info;
+}
+
+void
+MemFile::flushToDisk(FlushFlag flag)
+{
+ if ((flag == CHECK_NON_DIRTY_FILE_FOR_SPACE) || (_flags & SLOTS_ALTERED)) {
+ LOG(spam, "Flushing %s to disk since flags is %x", toString().c_str(), _flags);
+ try{
+ _env._memFileMapper.flush(*this, _env);
+ } RETHROW_NON_MEMFILE_EXCEPTIONS;
+ } else {
+ LOG(spam, "Not flushing %s as it is not altered", toString().c_str());
+ }
+
+ // For now, close all files after done flushing, to avoid getting
+ // too many open at the same time. Later cache may cache limited
+ // amount of file handles
+ getMemFileIO().close();
+}
+
+void
+MemFile::clearCache(DocumentPart part)
+{
+ _buffer->clear(part);
+ if (part == HEADER) {
+ _cacheSizeOverride.headerSize = 0;
+ } else {
+ _cacheSizeOverride.bodySize = 0;
+ }
+}
+
+bool
+MemFile::repair(std::ostream& errorReport, uint32_t verifyFlags)
+{
+ try{
+ return _env._memFileMapper.repair(
+ *this, _env, errorReport, verifyFlags);
+ } RETHROW_NON_MEMFILE_EXCEPTIONS;
+}
+
+void
+MemFile::resetMetaState()
+{
+ LOG(debug, "Resetting meta state for MemFile");
+ _flags = BUCKET_INFO_OUTDATED;
+ _currentVersion = UNKNOWN;
+ _info = BucketInfo();
+ _entries.clear();
+}
+
+MemSlot::MemoryUsage
+MemFile::getCacheSize() const
+{
+ assert(_buffer.get());
+
+ if (_cacheSizeOverride.sum() > 0) {
+ return _cacheSizeOverride;
+ }
+
+ MemSlot::MemoryUsage retVal;
+ retVal.metaSize = sizeof(MemSlot) * _entries.size();
+ retVal.headerSize += _buffer->getCachedSize(HEADER);
+ retVal.bodySize += _buffer->getCachedSize(BODY);
+ return retVal;
+}
+
+void
+MemFile::verifyDiskNotFull()
+{
+ const double maxFillRate(
+ _env.acquireConfigReadLock().options()->_diskFullFactor);
+
+ Directory& dir = _file.getDirectory();
+
+ if (dir.getPartition().getMonitor() == 0) {
+ LOG(warning, "No partition monitor found for directory %s. Skipping "
+ "disk full test.", dir.toString(true).c_str());
+ } else if (dir.isFull(0, maxFillRate)) {
+ std::ostringstream token;
+ token << dir << " is full";
+ std::ostringstream ost;
+ ost << "Disallowing operation on file " << getFile().getPath()
+ << " because disk is or would be "
+ << (100 * dir.getPartition().getMonitor()
+ ->getFillRate()) << " % full, which is "
+ << "more than the max setting of "
+ << 100 * maxFillRate << " % full."
+ << " (Note that this may be both due to space or inodes. "
+ << "Check \"df -i\" too if manually checking)"
+ << " (" << dir.toString(true) << ")";
+ LOGBT(warning, token.str(), "%s", ost.str().c_str());
+ throw vespalib::IoException(
+ ost.str(), vespalib::IoException::NO_SPACE, VESPA_STRLOC);
+ } else {
+ LOG(spam, "Disk will only be %f %% full after operation, which "
+ "is below limit of %f %%; allowing it to go through.",
+ 100.0 * dir.getPartition().getMonitor()
+ ->getFillRate(),
+ 100.0 * maxFillRate);
+ }
+}
+
+bool
+MemFile::operator==(const MemFile& other) const
+{
+ if (_info == other._info &&
+ _entries.size() == other._entries.size() &&
+ _file == other._file &&
+ _currentVersion == other._currentVersion)
+ {
+ for (uint32_t i=0, n=_entries.size(); i<n; ++i) {
+ if (_entries[i] != other._entries[i]) return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+namespace {
+ void printMemFlags(std::ostream& out, uint32_t flags) {
+ bool anyPrinted = false;
+ for (uint32_t val=1,i=1; i<=32; ++i, val *= 2) {
+ if (flags & val) {
+ if (anyPrinted) { out << "|"; }
+ anyPrinted = true;
+ const char* name = Types::getMemFileFlagName(
+ static_cast<Types::MemFileFlag>(val));
+ if (strcmp(name, "INVALID") == 0) {
+ out << "INVALID(" << std::hex << val << std::dec << ")";
+ } else {
+ out << name;
+ }
+ }
+ }
+ if (!anyPrinted) out << "none";
+ }
+}
+
+void
+MemFile::printHeader(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ if (!verbose) {
+ out << "MemFile(" << _file.getBucketId() << ", dir "
+ << _file.getDirectory().getIndex();
+ } else {
+ out << "MemFile(" << _file.getBucketId()
+ << "\n" << indent << " Path(\""
+ << _file.getPath() << "\")"
+ << "\n" << indent << " Wanted version("
+ << Types::getFileVersionName(_file.getWantedFileVersion())
+ << "(" << std::hex << _file.getWantedFileVersion() << "))"
+ << "\n" << indent << " Current version("
+ << Types::getFileVersionName(_currentVersion)
+ << "(" << std::hex << _currentVersion << "))"
+ << "\n" << indent << " " << getBucketInfo()
+ << "\n" << indent << " Flags ";
+ printMemFlags(out, _flags);
+
+ if (_formatData.get()) {
+ out << "\n" << indent << " " << _formatData->toString();
+ }
+ }
+}
+
+void
+MemFile::printEntries(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ if (verbose && !_entries.empty()) {
+ out << ") {";
+ for (uint32_t i=0; i<_entries.size(); ++i) {
+ out << "\n" << indent << " ";
+ print(_entries[i], out, false, indent + " ");
+ }
+ out << "\n" << indent << "}";
+ } else {
+ out << ", " << _entries.size() << " entries)";
+ }
+}
+
+void
+MemFile::printEntriesState(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ for (uint32_t i=0; i<_entries.size(); ++i) {
+ if (verbose) {
+ printUserFriendly(_entries[i], out, indent);
+ } else {
+ print(_entries[i], out, false, indent);
+ }
+ out << "\n" << indent;
+ }
+ const SimpleMemFileIOBuffer& ioBuf(
+ static_cast<const SimpleMemFileIOBuffer&>(getMemFileIO()));
+ const FileInfo& fileInfo(ioBuf.getFileInfo());
+
+ unsigned int emptyCount = fileInfo._metaDataListSize - _entries.size();
+ if (emptyCount > 0) {
+ out << std::dec << emptyCount << " empty entries.\n" << indent;
+ }
+}
+
+void
+MemFile::print(std::ostream& out, bool verbose,
+ const std::string& indent) const
+{
+ printHeader(out, verbose, indent);
+ printEntries(out, verbose, indent);
+}
+
+void
+MemFile::printUserFriendly(const MemSlot& slot,
+ std::ostream& out,
+ const std::string& indent) const
+{
+ out << "MemSlot(" << slot.getGlobalId()
+ << std::setfill(' ')
+ << std::dec << "\n"
+ << indent << " Header pos: "
+ << std::setw(10) << slot.getLocation(HEADER)._pos
+ << " - " << std::setw(10) << slot.getLocation(HEADER)._size
+ << ", Body pos: " << std::setw(10) << slot.getLocation(BODY)._pos
+ << " - " << std::setw(10) << slot.getLocation(BODY)._size << "\n" << indent
+ << " Timestamp: " << slot.getTimestamp().toString()
+ << " (" << slot.getTimestamp().getTime() << ")\n"
+ << indent << " Checksum: 0x"
+ << std::hex << std::setw(4) << slot.getChecksum()
+ << std::setfill(' ') << "\n" << indent << " Flags: 0x"
+ << std::setw(4) << slot.getFlags();
+ std::list<std::string> flags;
+
+ if ((slot.getFlags() & IN_USE) == 0) flags.push_back("NOT IN USE");
+ if ((slot.getFlags() & DELETED) != 0) flags.push_back("DELETED");
+ if ((slot.getFlags() & DELETED_IN_PLACE) != 0) flags.push_back("DELETED_IN_PLACE");
+ if ((slot.getFlags() & CHECKSUM_OUTDATED) != 0) flags.push_back("CHECKSUM_OUTDATED");
+
+ for (std::list<std::string>::iterator it = flags.begin();
+ it != flags.end(); ++it)
+ {
+ out << ", " << *it;
+ }
+
+ const document::DocumentId id = getDocumentId(slot);
+
+ out << "\n" << indent << " Name: " << id;
+ document::BucketIdFactory factory;
+ document::BucketId bucket(
+ factory.getBucketId(
+ document::DocumentId(id)));
+ out << "\n" << indent << " Bucket: " << bucket;
+ out << ")";
+}
+
+void
+MemFile::print(const MemSlot& slot,
+ std::ostream& out,
+ bool verbose,
+ const std::string& indent) const
+{
+ if (verbose) {
+ out << "MemSlot(";
+ }
+ out << std::dec << slot.getTimestamp() << ", " << slot.getGlobalId() << ", h "
+ << slot.getLocation(HEADER)._pos << " - " << slot.getLocation(HEADER)._size << ", b "
+ << slot.getLocation(BODY)._pos << " - " << slot.getLocation(BODY)._size << ", f "
+
+ << std::hex << slot.getFlags() << ", c " << slot.getChecksum()
+ << ", C(" << (documentIdAvailable(slot) ? "D" : "")
+ << (partAvailable(slot, HEADER) ? "H" : "")
+ << (partAvailable(slot, BODY) ? "B" : "")
+ << ")";
+ if (verbose) {
+ out << ") {";
+ if (documentIdAvailable(slot)) {
+ out << "\n" << indent << " ";
+
+ getDocument(slot, ALL)
+ ->print(out, true, indent + " ");
+ } else {
+ out << "\n" << indent << " Nothing cached beyond metadata.";
+ }
+ out << "\n" << indent << "}";
+ }
+}
+
+void
+MemFile::printState(std::ostream& out, bool userFriendlyOutput,
+ bool printBody, bool printHeader2,
+ //SlotFile::MetaDataOrder order,
+ const std::string& indent) const
+{
+ const SimpleMemFileIOBuffer& ioBuf(
+ static_cast<const SimpleMemFileIOBuffer&>(getMemFileIO()));
+ const FileInfo& fileInfo(ioBuf.getFileInfo());
+
+ out << "\n" << indent << "Filename: '" << getFile().getPath() << "'";
+ if (!fileExists()) {
+ out << " (non-existing)";
+ return;
+ } else if (ioBuf.getFileHandle().isOpen()) {
+ out << " (fd " << ioBuf.getFileHandle().getFileDescriptor() << ")";
+ }
+ out << "\n";
+
+ uint32_t filesize = ioBuf.getFileHandle().getFileSize();
+ out << "Filesize: " << filesize << "\n";
+ Buffer buffer(filesize);
+ char* buf = buffer.getBuffer();
+ uint32_t readBytes = ioBuf.getFileHandle().read(buf, filesize, 0);
+ if (readBytes != filesize) {
+ out << "Failed to read whole file of size " << filesize
+ << ". Adjusting file size to " << readBytes
+ << " we managed to read.";
+ filesize = readBytes;
+ }
+
+ const Header* header(reinterpret_cast<const Header*>(buf));
+ header->print(out);
+ out << "\n" << indent;
+
+ if (filesize < fileInfo.getHeaderBlockStartIndex())
+ {
+ out << "File not big enough to contain all "
+ << fileInfo._metaDataListSize << " meta data entries.\n"
+ << indent;
+ } else {
+ printEntriesState(out, userFriendlyOutput, indent);
+ }
+
+ if (filesize < fileInfo.getBodyBlockStartIndex())
+ {
+ out << "File not big enough to contain the whole "
+ << fileInfo._headerBlockSize << " byte header block.\n" << indent;
+ } else {
+ out << "Header block: (" << std::dec << fileInfo._headerBlockSize
+ << "b)";
+ if (printHeader2) {
+ const char* start = &buf[0] + fileInfo.getHeaderBlockStartIndex();
+ out << "\n" << indent;
+ document::StringUtil::printAsHex(
+ out, start, fileInfo._headerBlockSize, 16, false);
+ }
+ out << "\n" << indent;
+ }
+
+ if (filesize < fileInfo.getFileSize())
+ {
+ out << "File not big enough to contain the whole "
+ << fileInfo._bodyBlockSize << " byte content block.\n" << indent;
+ } else {
+ out << "Content block: (" << std::dec << fileInfo._bodyBlockSize << "b)";
+ if (printBody) {
+ const char* start = &buf[0] + fileInfo.getBodyBlockStartIndex();
+ out << "\n" << indent;
+ document::StringUtil::printAsHex(
+ out, start, fileInfo._bodyBlockSize, 16, false);
+ }
+ out << "\n" << indent;
+ }
+}
+
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h
new file mode 100644
index 00000000000..ccd8a306332
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h
@@ -0,0 +1,316 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::MemFile
+ * \ingroup memfile
+ *
+ * \brief Class representing a file storing documents in slots.
+ *
+ * This is a memory representation of the file, that isn't constricted by the
+ * actual file format.
+ *
+ * A memfile must contains:
+ * - A header with generic information about the file, including version of
+ * file format.
+ *
+ * A memfile may also contain:
+ * - Cached meta data describing contents.
+ * - Cached document header content.
+ * - Cached document body content.
+ *
+ * The representation knows what parts of it that are persisted in a file, and
+ * what parts exist only in memory.
+ *
+ * For ease of use, information is loaded into the cache automatically by the
+ * MemFile implementation. Thus, the memfile needs a pointer to the file mapper
+ * implementation.
+ */
+
+#pragma once
+
+#include <vespa/memfilepersistence/common/filespecification.h>
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/memfile/memslot.h>
+#include <vespa/memfilepersistence/memfile/slotiterator.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/memfilepersistence/memfile/memfileiointerface.h>
+
+namespace storage {
+namespace memfile {
+
+class Environment;
+//class SlotFileV1SerializerTest;
+
+class MemFile : private Types,
+ public vespalib::Printable
+{
+public:
+ struct FormatSpecificData {
+ typedef std::unique_ptr<FormatSpecificData> UP;
+
+ virtual ~FormatSpecificData() {}
+
+ virtual std::string toString() const = 0;
+ };
+
+ typedef IteratorWrapper const_iterator;
+
+ struct LocationContent {
+ std::vector<const MemSlot*> slots;
+ };
+ typedef std::map<DataLocation, LocationContent> LocationMap;
+ typedef std::vector<MemSlot> MemSlotVector;
+
+private:
+ void verifyDiskNotFull();
+
+ mutable uint32_t _flags;
+ mutable BucketInfo _info;
+ MemFileIOInterface::UP _buffer;
+ MemSlotVector _entries;
+ FileSpecification _file;
+ mutable FileVersion _currentVersion;
+ Environment& _env;
+ mutable FormatSpecificData::UP _formatData;
+ MemSlot::MemoryUsage _cacheSizeOverride;
+
+ friend class MemFilePtr;
+ friend class MemCacheTest;
+ class MemFileBufferCacheCopier;
+
+ /**
+ * Verify tests need to be able to create memfiles that hasn't called
+ * loadfile, and possibly call loadFile without autorepair set. Such
+ * memfiles are invalid as many functions require header+metadata to be
+ * cached. Should only be used for unit tests.
+ */
+ friend class MemFileV1VerifierTest;
+ MemFile(const FileSpecification&, Environment&, bool callLoadFile);
+
+ // Ensures that all entries are cached.
+ // If includeBody is true, caches the body as well.
+ void ensureEntriesCached(bool includeBody) const;
+
+ // Put the given location in the result map if the
+ // location is persisted according to the given flags.
+ void matchLocationWithFlags(LocationMap& result,
+ DocumentPart part,
+ const MemSlot* slot,
+ uint32_t flags) const;
+
+public:
+ struct LoadOptions {
+ bool autoRepair;
+ LoadOptions() : autoRepair(true)
+ {}
+ };
+
+ MemFile(const FileSpecification& spec,
+ Environment& env,
+ const LoadOptions& opts = LoadOptions());
+
+ const FileSpecification& getFile() const { return _file; }
+ const document::BucketId& getBucketId() const noexcept {
+ return _file.getBucketId();
+ }
+ FileVersion getCurrentVersion() const { return _currentVersion; }
+
+ bool empty() const { return _entries.empty(); }
+ bool fileExists() const { return (_flags & FILE_EXIST); }
+ bool headerBlockCached() const { return (_flags & HEADER_BLOCK_READ); }
+ bool bodyBlockCached() const { return (_flags & BODY_BLOCK_READ); }
+ bool slotsAltered() const { return _flags & SLOTS_ALTERED; }
+
+ /**
+ * Called by the mapper when it has to call loadFile a second
+ * time due to corruption repairs. Must NOT be called by anyone
+ * else!
+ */
+ void resetMetaState();
+
+ void verifyConsistent() const;
+
+ /** Moves the physical file on disk (if any) to the new file name. */
+ void move(const FileSpecification& newFileName);
+
+ uint16_t getDisk() const;
+
+ FormatSpecificData* getFormatSpecificData() const
+ { return _formatData.get(); }
+ void setFormatSpecificData(FormatSpecificData::UP d) const
+ { _formatData = std::move(d); }
+ void setCurrentVersion(FileVersion ver) const { _currentVersion = ver; }
+
+ uint32_t getSlotCount() const;
+ const MemSlot& operator[](uint32_t index) const { return _entries[index]; }
+ const MemSlot* getSlotWithId(const document::DocumentId&,
+ Timestamp maxTimestamp = MAX_TIMESTAMP) const;
+ const MemSlot* getSlotAtTime(Timestamp) const;
+
+ void getSlotsByTimestamp(const std::vector<Timestamp>&,
+ std::vector<const MemSlot*>& returned) const;
+
+ // Get flags are defined in types.h (GetFlag)
+ Document::UP getDocument(const MemSlot& slot, GetFlag getFlag) const;
+
+ document::DocumentId getDocumentId(const MemSlot& slot) const;
+
+ /**
+ * Returns the number of bytes required by this memfile while
+ * in cache.
+ *
+ * @return Returns the cache size.
+ */
+ MemSlot::MemoryUsage getCacheSize() const;
+
+ void addPutSlot(const Document& doc, Timestamp time);
+
+ void addUpdateSlot(const Document& header,
+ const MemSlot& body,
+ Timestamp time);
+
+ void addRemoveSlot(const MemSlot& header, Timestamp time);
+
+ enum RemoveType
+ {
+ REGULAR_REMOVE,
+ UNREVERTABLE_REMOVE
+ };
+
+ void addRemoveSlotForNonExistingEntry(const DocumentId& docId,
+ Timestamp time,
+ RemoveType removeType);
+
+ void addSlot(const MemSlot&);
+ void removeSlot(const MemSlot&);
+
+ void setMemFileIO(MemFileIOInterface::UP buffer) {
+ _buffer = std::move(buffer);
+ }
+ MemFileIOInterface& getMemFileIO() { return *_buffer; }
+ const MemFileIOInterface& getMemFileIO() const { return *_buffer; }
+
+ void getLocations(LocationMap& headers,
+ LocationMap& bodies,
+ uint32_t flags) const;
+
+ /**
+ * Copies a slot from another memfile.
+ */
+ void copySlot(const MemFile& source, const MemSlot&);
+
+ void copySlotsFrom(const MemFile& source,
+ const std::vector<const MemSlot*>& sourceSlots);
+
+ /** Remove given slots. Slots must exist and be in rising timestamp order */
+ void removeSlots(const std::vector<const MemSlot*>&);
+ void modifySlot(const MemSlot&);
+
+ void setFlag(uint32_t flags) {
+ verifyLegalFlags(flags, LEGAL_MEMFILE_FLAGS, "MemFile::setFlag");
+ _flags |= flags;
+ }
+
+ void clearFlag(uint32_t flags) {
+ verifyLegalFlags(flags, LEGAL_MEMFILE_FLAGS, "MemFile::clearFlags");
+ _flags &= ~flags;
+ }
+
+ /**
+ * Removes entries overwritten after revert time period and remove
+ * entries older than keep remove period.
+ *
+ * @return True if anything was compacted
+ */
+ bool compact();
+
+ const_iterator begin(uint32_t iteratorFlags = 0,
+ Timestamp fromTimestamp = UNSET_TIMESTAMP,
+ Timestamp toTimestamp = UNSET_TIMESTAMP) const;
+
+ const_iterator end() const { return const_iterator(); }
+
+ void ensureDocumentIdCached(const MemSlot&) const;
+ void ensureDocumentCached(const MemSlot&, bool headerOnly) const;
+ void ensureHeaderBlockCached() const;
+ void ensureBodyBlockCached() const;
+ void ensureHeaderAndBodyBlocksCached() const;
+ void ensureDocumentCached(const std::vector<Timestamp>&,
+ bool headerOnly) const;
+
+ /**
+ * Assert that a given slot is contained in the bucket this MemFile has
+ * been created for (i.e. output of getBucketId()). In the common case,
+ * only the slot GID will be consulted, but in the case of orderdoc docs
+ * the document ID may have to be fetched.
+ *
+ * Precondition: `slot` must have its data blocks already added to the
+ * file's buffer cache. This means any fetches of the document ID should
+ * not require disk access, but will incur cache lookup and heap
+ * allocation overhead.
+ * Postcondition: no side effects if `slot` is contained in bucket. Logs
+ * error and dumps core otherwise.
+ */
+ void assertSlotContainedInThisBucket(const MemSlot& slot) const;
+
+ bool documentIdAvailable(const MemSlot&) const;
+ bool partAvailable(const MemSlot&, DocumentPart part) const;
+ bool partPersisted(const MemSlot&, DocumentPart) const;
+
+ uint32_t getSerializedSize(const MemSlot&, DocumentPart part) const;
+
+ /**
+ * Fetches the bucket info. If metadata is altered, info will be
+ * recalculated, and bucket database updated.
+ */
+ const BucketInfo& getBucketInfo() const;
+
+ void flushToDisk(FlushFlag flags = NONE);
+
+ void clearCache(DocumentPart part);
+
+ /**
+ * Repair any errors found in this slotfile.
+ * If given, stuff error report into given ostream.
+ *
+ * @return True if file was fine. False if any errors were repaired.
+ */
+ bool repair(std::ostream& errorReport, uint32_t fileVerifyFlags = 0);
+
+ /**
+ * Tests for equality of memfiles. Equality requires MemFile to look equal
+ * for clients. It will not read data from file, so the same parts of the
+ * file must be cached for objects to be equal. Non-persistent flags need
+ * not be equal (The same parts need not be persisted to backend files)
+ *
+ * Used in unit testing only.
+ */
+ bool operator==(const MemFile& other) const;
+
+ /** Stat wants control of printing of slots. */
+ void printHeader(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+ void printEntries(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+ void printEntriesState(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+ /** Stat wants control of printing of slots. */
+ void printUserFriendly(const MemSlot& slot,
+ std::ostream& out,
+ const std::string& indent) const;
+ void print(const MemSlot& slot,
+ std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+ /** Debug function to print state. */
+ void printState(std::ostream& out, bool userFriendlyOutput = false,
+ bool printBody = true, bool printHeader = true,
+ //MetaDataOrder order = DEFAULT,
+ const std::string& indent = "") const;
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp
new file mode 100644
index 00000000000..722fef80103
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp
@@ -0,0 +1,529 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/common/environment.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/memfile/memfilecache.h>
+#include <vespa/log/log.h>
+#include <vespa/vespalib/util/vstringfmt.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h>
+
+LOG_SETUP(".persistence.memfile.cache");
+
+namespace storage {
+namespace memfile {
+
+
+void
+MemFileCache::Entry::setInUse(bool inUse) {
+ LOG(debug, "Setting in use to %d for file %s", inUse, _file.toString().c_str());
+ _inUse = inUse;
+}
+
+void
+MemFileCache::returnToCache(MemFileCache::Entry& entry)
+{
+ // Ensure file descriptor is closed before returning to cache
+ entry._file.getMemFileIO().close();
+ vespalib::LockGuard lock(_cacheLock);
+
+ BucketInfo info(entry._file.getBucketInfo());
+ BucketId id(entry._file.getFile().getBucketId());
+
+ LOG(debug, "%s being returned to cache", id.toString().c_str());
+
+ MemoryUsage newUsage = entry._file.getCacheSize();
+
+ if (_memoryToken->getSize() == 0 || newUsage.sum() == 0) {
+ entry._file.flushToDisk();
+ eraseNoLock(id);
+ return;
+ }
+
+ // File must be flushed before being returned to the cache.
+ assert(!entry._file.slotsAltered());
+ entry.setInUse(false);
+
+ Entry* ptr = 0;
+ {
+ BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries);
+ BucketIdx::iterator it(bucketIdx.find(id));
+ assert(it != bucketIdx.end());
+ ptr = it->_ptr.get();
+
+ if (entry._returnToCacheWhenFinished) {
+ EntryWrapper wrp(it->_ptr, ++_lastUsedCounter, id);
+ _entries.replace(it, wrp);
+ _memoryUsage.add(newUsage);
+ entry._cacheSize = newUsage;
+ } else {
+ _entries.erase(it);
+ }
+ }
+
+ LOG(spam,
+ "Bucket %s, ptr %p returned to cache: %s with %s. "
+ "Total cache size after return: %s",
+ id.toString().c_str(),
+ ptr,
+ info.toString().c_str(),
+ newUsage.toString().c_str(),
+ _memoryUsage.toString().c_str());
+
+ evictWhileFull();
+}
+
+void
+MemFileCache::done(MemFileCache::Entry& entry)
+{
+ LOG(spam, "Finished with file %s",
+ entry._file.getFile().toString().c_str());
+
+ try {
+ entry._file.verifyConsistent();
+ } catch (vespalib::Exception e) {
+ LOG(debug,
+ "Verification of cache entry %s failed: %s",
+ entry._file.getFile().toString().c_str(),
+ e.getMessage().c_str());
+
+ entry.setInUse(false);
+ throw;
+ }
+
+ assert(entry.isInUse());
+
+ returnToCache(entry);
+}
+
+struct MemFileCache::CacheEntryGuard : public MemFilePtr::EntryGuard {
+ MemFileCache& _cache;
+ Environment& _env;
+ MemFileCache::Entry* _entry;
+
+ CacheEntryGuard(
+ MemFileCache& cache,
+ Environment& env,
+ MemFileCache::Entry& entry)
+ : MemFilePtr::EntryGuard(entry._file),
+ _cache(cache),
+ _env(env),
+ _entry(&entry)
+ {
+ }
+ virtual ~CacheEntryGuard() {
+ if (_entry) {
+ _cache.done(*_entry);
+ }
+ }
+
+ MemFile& getFile() {
+ return _entry->_file;
+ }
+
+ virtual void deleteFile() {
+ LOG(debug, "Cache entry guard deleting %s", _file->toString().c_str());
+ _env._memFileMapper.deleteFile(*_file, _env);
+ erase();
+ }
+
+ virtual void erase() {
+ LOG(debug, "Cache entry guard erasing %s from cache",
+ _file->toString().c_str());
+ _cache.erase(document::BucketId(_entry->_file.getFile().getBucketId()));
+ _entry = 0;
+ }
+
+ virtual void move(EntryGuard& target) {
+ LOG(debug, "Cache entry guard moving %s", _file->toString().c_str());
+ _cache.move(*this, static_cast<CacheEntryGuard&>(target));
+ }
+
+ void moveState(CacheEntryGuard& target) {
+ // Move state over to target.
+ target._entry = _entry;
+ target._file = _file;
+
+ // Invalidate this.
+ _entry = NULL;
+ _file = NULL;
+ }
+
+ MemFile* operator->() {
+ return &_entry->_file;
+ }
+};
+
+MemFileCache::MemFileCache(framework::ComponentRegister& componentRegister,
+ MemFilePersistenceCacheMetrics& metrics)
+ : Component(componentRegister, "memfilecache"),
+ _lastUsedCounter(0),
+ _allocationType(getMemoryManager().registerAllocationType(
+ framework::MemoryAllocationType(
+ "memfilecache", framework::MemoryAllocationType::CACHE))),
+ _memoryToken(getMemoryManager().allocate(_allocationType, 0, 0, 200)),
+ _metrics(metrics),
+ _bodyEvicter(_metrics.body_evictions),
+ _headerEvicter(_metrics.header_evictions),
+ _metaDataEvicter(_metrics.meta_evictions)
+{
+};
+
+void
+MemFileCache::setCacheSize(MemoryUsage cacheSize)
+{
+ vespalib::LockGuard lock(_cacheLock);
+
+ _cacheLimit = cacheSize;
+
+ _memoryToken->resize(std::min(_memoryToken->getSize(), _cacheLimit.sum()),
+ _cacheLimit.sum());
+
+ evictWhileFull();
+}
+
+MemFilePtr
+MemFileCache::get(const BucketId& id, Environment& env, Directory& dir,
+ bool createIfNotExisting)
+{
+ vespalib::LockGuard lock(_cacheLock);
+
+ BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries);
+
+ BucketIdx::iterator it(bucketIdx.find(id));
+ if (it == bucketIdx.end()) {
+ LOG(debug,
+ "Bucket %s was not in cache. Creating cache entry.",
+ id.toString().c_str());
+
+ FileSpecification file(id, dir, env.calculatePathInDir(id, dir));
+ const uint64_t counter(++_lastUsedCounter);
+ lock.unlock();
+ // Create memfile outside lock, since this will involve disk reads
+ // in the common case that there's a bucket file on the disk. The
+ // content layer shall guarantee that no concurrent operations happen
+ // for a single bucket, so this should be fully thread safe.
+ Entry::LP entry(new Entry(file, env, createIfNotExisting));
+
+ vespalib::LockGuard reLock(_cacheLock);
+ std::pair<LRUCache::iterator, bool> inserted(
+ _entries.insert(EntryWrapper(entry, counter, id)));
+ assert(inserted.second);
+ _metrics.misses.inc();
+
+ return MemFilePtr(MemFilePtr::EntryGuard::LP(
+ new CacheEntryGuard(*this, env, *entry)));
+ } else {
+ if (it->_ptr->isInUse()) {
+ LOG(error,
+ "Bug! File %s, ptr %p was in use while in the file cache",
+ it->_ptr->_file.toString(true).c_str(), it->_ptr.get());
+ assert(false);
+ }
+
+ it->_ptr->setInUse(true);
+ _memoryUsage.sub(it->_ptr->_cacheSize);
+ EntryWrapper wrp(it->_ptr, ++_lastUsedCounter, id);
+ _entries.replace(it, wrp);
+ _metrics.hits.inc();
+ }
+ LOG(debug,
+ "Bucket %s was already in cache. Returning cache entry with "
+ "memory usage %s, new total memory usage: %s",
+ id.toString().c_str(),
+ it->_ptr->_cacheSize.toString().c_str(),
+ _memoryUsage.toString().c_str());
+
+ return MemFilePtr(MemFilePtr::EntryGuard::LP(
+ new CacheEntryGuard(*this, env, *it->_ptr)));
+}
+
+// TODO: can this be removed??
+MemFileCache::BucketInfoMap
+MemFileCache::flushDirtyEntries()
+{
+ vespalib::LockGuard lock(_cacheLock);
+ BucketInfoMap retVal;
+
+ uint32_t total = 0, count = 0;
+ BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries);
+ for (BucketIdx::iterator it = bucketIdx.begin(); it != bucketIdx.end(); ++it) {
+ ++total;
+ if (!it->_ptr->isInUse()) {
+ retVal[it->_ptr->_file.getFile().getBucketId()] =
+ it->_ptr->_file.getBucketInfo();
+
+ it->_ptr->_file.flushToDisk();
+ // For now, close all files after done flushing, to avoid getting
+ // too many open at the same time. Later cache may cache limited
+ // amount of file handles
+ it->_ptr->_file.getMemFileIO().close();
+
+ ++count;
+ }
+ }
+ LOG(debug, "Flushed %u of %u entries in cache. Rest are in use", count, total);
+
+ return retVal;
+}
+
+void
+MemFileCache::clear()
+{
+ vespalib::LockGuard lock(_cacheLock);
+
+ uint32_t total = 0, count = 0;
+ BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries);
+ for (BucketIdx::iterator it = bucketIdx.begin();
+ it != bucketIdx.end();)
+ {
+ ++total;
+ if (!it->_ptr->isInUse()) {
+ // Any file not in use should have been flushed to disk already.
+ assert(!it->_ptr->_file.slotsAltered());
+ _memoryUsage.sub(it->_ptr->_cacheSize);
+ it = bucketIdx.erase(it);
+ ++count;
+ } else {
+ ++it;
+ }
+ }
+ LOG(debug, "Flushed and cleared %u of %u entries in cache. Rest are in use",
+ count, total);
+}
+
+void
+MemFileCache::eraseNoLock(const document::BucketId& id)
+{
+ BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries);
+ BucketIdx::iterator iter = bucketIdx.find(id);
+
+ assert(iter != bucketIdx.end());
+ assert(iter->_ptr->isInUse());
+ //assert(!iter->_ptr->_file.slotsAltered());
+ LOG(debug, "Removing %s from cache", id.toString().c_str());
+ bucketIdx.erase(iter);
+}
+
+void
+MemFileCache::erase(const document::BucketId& id) {
+ vespalib::LockGuard lock(_cacheLock);
+ eraseNoLock(id);
+}
+
+void
+MemFileCache::move(CacheEntryGuard& source, CacheEntryGuard& target)
+{
+ vespalib::LockGuard lock(_cacheLock);
+ assert(target->empty());
+
+ document::BucketId sourceId = source->getFile().getBucketId();
+ document::BucketId targetId = target->getFile().getBucketId();
+
+ LOG(debug, "Renaming file %s to %s",
+ source->toString().c_str(),
+ target->toString().c_str());
+ source->move(target->getFile());
+ source.moveState(target);
+
+ BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries);
+ BucketIdx::iterator sourceIt(bucketIdx.find(sourceId));
+ BucketIdx::iterator targetIt(bucketIdx.find(targetId));
+ assert(sourceIt != bucketIdx.end());
+ assert(targetIt != bucketIdx.end());
+
+ EntryWrapper wrp(sourceIt->_ptr, sourceIt->_lastUsed, targetId);
+ bucketIdx.erase(sourceIt);
+ _entries.replace(targetIt, wrp);
+}
+
+MemFileCache::TimeIdx::iterator
+MemFileCache::getLeastRecentlyUsedBucket()
+{
+ return boost::multi_index::get<1>(_entries).begin();
+
+}
+
+uint64_t
+MemFileCache::size() const
+{
+ LOG(spam, "memory usage is now %s (total is %zu)",
+ _memoryUsage.toString().c_str(), _memoryUsage.sum());
+ return _memoryUsage.sum();
+}
+
+bool
+MemFileCache::contains(const document::BucketId& bucketId) const
+{
+ vespalib::LockGuard lock(_cacheLock);
+ const BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries);
+ return bucketIdx.find(bucketId) != bucketIdx.end();
+}
+
+MemFileCache::TimeIdx::iterator
+MemFileCache::MetaDataEvictionPolicy::evict(
+ MemFileCache::TimeIdx& lruIndex,
+ MemFileCache::TimeIdx::iterator& it,
+ MemFileCache::MemoryUsage& curUsage)
+{
+ LOG(debug, "Evicting entire memfile for %s from cache. %s held",
+ it->_bid.toString().c_str(),
+ it->_ptr->_cacheSize.toString().c_str());
+ curUsage.sub(it->_ptr->_cacheSize);
+ _evictionMetric.inc();
+ return lruIndex.erase(it);
+}
+
+MemFileCache::TimeIdx::iterator
+MemFileCache::BodyEvictionPolicy::evict(
+ MemFileCache::TimeIdx& /*lruIndex*/,
+ MemFileCache::TimeIdx::iterator& it,
+ MemFileCache::MemoryUsage& curUsage)
+{
+ LOG(debug, "Removing body of %s from cache. %s held",
+ it->_bid.toString().c_str(),
+ it->_ptr->_cacheSize.toString().c_str());
+
+ if (it->_ptr->_cacheSize.bodySize) {
+ it->_ptr->_file.clearCache(BODY);
+ curUsage.bodySize -= it->_ptr->_cacheSize.bodySize;
+ it->_ptr->_cacheSize.bodySize = 0;
+ _evictionMetric.inc();
+ }
+ return ++it;
+}
+
+MemFileCache::TimeIdx::iterator
+MemFileCache::HeaderEvictionPolicy::evict(
+ MemFileCache::TimeIdx& /*lruIndex*/,
+ MemFileCache::TimeIdx::iterator& it,
+ MemFileCache::MemoryUsage& curUsage)
+{
+ LOG(debug, "Removing header and body of %s from cache. %s held",
+ it->_bid.toString().c_str(),
+ it->_ptr->_cacheSize.toString().c_str());
+
+ if (it->_ptr->_cacheSize.headerSize) {
+ it->_ptr->_file.clearCache(HEADER);
+ it->_ptr->_file.clearCache(BODY);
+ curUsage.headerSize -= it->_ptr->_cacheSize.headerSize;
+ curUsage.bodySize -= it->_ptr->_cacheSize.bodySize;
+ it->_ptr->_cacheSize.headerSize = 0;
+ it->_ptr->_cacheSize.bodySize = 0;
+ _evictionMetric.inc();
+ }
+ return ++it;
+}
+
+template <typename EvictionPolicy>
+void
+MemFileCache::executeCacheEvictionPolicy(EvictionPolicy& policy)
+{
+ MemFileCache::TimeIdx& timeIdx = boost::multi_index::get<1>(_entries);
+ for (MemFileCache::TimeIdx::iterator
+ i(timeIdx.upper_bound(policy.getEvictionCursor())),
+ e(timeIdx.end());
+ i != e;)
+ {
+ if (_memoryUsage.sum() <= _cacheLimit.sum()
+ || (policy.getValue(_memoryUsage)
+ <= policy.getValue(_cacheLimit)))
+ {
+ LOG(spam, "Aborting current policy because "
+ "memory usage %s is less than soft limit %s",
+ _memoryUsage.toString().c_str(),
+ _cacheLimit.toString().c_str());
+
+ return;
+ }
+
+ LOG(spam, "Need to evict more data as memory usage is %zu, hard limit is %zu",
+ _memoryUsage.sum(), _cacheLimit.sum());
+
+ // If memfile is in use, skip. It will be readded with new
+ // timestamp once it's done being used, which means the
+ // invariant of there not being any files < the cursor holding
+ // cached data of the policy's type will be maintained.
+ if (i->_ptr->isInUse()) {
+ LOG(spam, "Not evicting %s as it is currently active",
+ i->_bid.toString().c_str());
+ ++i;
+ continue;
+ }
+ policy.setEvictionCursor(i->_lastUsed);
+ i = policy.evict(timeIdx, i, _memoryUsage);
+ }
+}
+
+void
+MemFileCache::executeEvictionPolicies()
+{
+ executeCacheEvictionPolicy(_bodyEvicter);
+ if (_memoryUsage.sum() <= _cacheLimit.sum()) {
+ return;
+ }
+ executeCacheEvictionPolicy(_headerEvicter);
+ if (_memoryUsage.sum() <= _cacheLimit.sum()) {
+ return;
+ }
+ executeCacheEvictionPolicy(_metaDataEvicter);
+}
+
+void
+MemFileCache::evictWhileFull()
+{
+ if (size() > _cacheLimit.sum()) {
+ LOG(debug, "Before cache eviction, cache usage was %s"
+ ", new max size is %" PRIu64,
+ _memoryUsage.toString().c_str(), _cacheLimit.sum());
+
+ executeEvictionPolicies();
+
+ LOG(spam, "After cache eviction, memory usage is %s",
+ _memoryUsage.toString().c_str());
+ } else {
+ LOG(spam, "Max cache size is %" PRIu64 " bytes, but cache "
+ "only using %" PRIu64 " bytes, so not evicting anything",
+ _cacheLimit.sum(), _memoryUsage.sum());
+ }
+
+ _metrics.files.set(_entries.size());
+ _metrics.meta.set(_memoryUsage.metaSize);
+ _metrics.header.set(_memoryUsage.headerSize);
+ _metrics.body.set(_memoryUsage.bodySize);
+}
+
+MemFileCache::Statistics
+MemFileCache::getCacheStats() const
+{
+ vespalib::LockGuard lock(_cacheLock);
+ return Statistics(_memoryUsage, _memoryToken->getSize(), _entries.size());
+}
+
+void
+MemFileCache::printCacheEntriesHtml(std::ostream& out) const
+{
+ vespalib::LockGuard lock(_cacheLock);
+ out << "<p>Cache entries (most recently used first):</p>\n"
+ << "<ol>\n";
+ const MemFileCache::TimeIdx& timeIdx(boost::multi_index::get<1>(_entries));
+ for (MemFileCache::TimeIdx::const_reverse_iterator
+ it(timeIdx.rbegin()), e(timeIdx.rend());
+ it != e; ++it)
+ {
+ out << "<li>";
+ out << it->_bid << ": ";
+ if (!it->_ptr->isInUse()) {
+ out << it->_ptr->_cacheSize.toString();
+ } else {
+ out << "<em>(in use)</em>";
+ }
+ out << "</li>\n";
+ }
+ out << "</ol>\n";
+}
+
+} // memfile
+
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h
new file mode 100644
index 00000000000..cc25bd5f7a8
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h
@@ -0,0 +1,301 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::slotfile::MemFileCache
+ * \ingroup memfile
+ *
+ * \brief Cache holding onto all mem file objects in memory.
+ *
+ * This is the global memory file cache keeping track of all the memory files
+ * in memory.
+ */
+
+#pragma once
+
+#include <vespa/metrics/metrics.h>
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+#include <vespa/memfilepersistence/memfile/memfileptr.h>
+#include <boost/multi_index_container.hpp>
+#include <boost/multi_index/identity.hpp>
+#include <boost/multi_index/member.hpp>
+#include <boost/multi_index/mem_fun.hpp>
+#include <boost/multi_index/ordered_index.hpp>
+#include <boost/multi_index/sequenced_index.hpp>
+#include <vespa/storageframework/generic/memory/memorymanagerinterface.h>
+
+namespace storage {
+
+namespace memfile {
+
+class MemFilePersistenceCacheMetrics;
+class Environment; // Avoid cyclic dependency with environment
+
+class MemFileCache : private framework::Component,
+ private Types
+{
+public:
+ typedef MemSlot::MemoryUsage MemoryUsage;
+
+ struct Statistics
+ {
+ MemoryUsage _memoryUsage;
+ size_t _cacheSize;
+ size_t _numEntries;
+
+ Statistics(const MemoryUsage& memoryUsage,
+ size_t cacheSize,
+ size_t numEntries)
+ : _memoryUsage(memoryUsage),
+ _cacheSize(cacheSize),
+ _numEntries(numEntries)
+ {}
+ };
+private:
+ class Entry : boost::noncopyable {
+ public:
+ typedef vespalib::LinkedPtr<Entry> LP;
+
+ MemFile _file;
+ MemoryUsage _cacheSize;
+ Environment& _env;
+ bool _inUse;
+ bool _returnToCacheWhenFinished;
+
+ Entry(FileSpecification& file, Environment& env,
+ bool returnToCacheWhenFinished = true)
+ : _file(file, env), _env(env), _inUse(true),
+ _returnToCacheWhenFinished(returnToCacheWhenFinished)
+ {}
+
+ bool isInUse() const {
+ return _inUse;
+ }
+
+ void setInUse(bool inUse);
+ };
+
+ struct EntryWrapper {
+ EntryWrapper(
+ Entry::LP ptr,
+ uint64_t lastUsed,
+ const document::BucketId& bid)
+ : _ptr(ptr), _lastUsed(lastUsed), _bid(bid) {}
+
+ const Entry* operator->() const {
+ return _ptr.get();
+ };
+
+ Entry* operator->() {
+ return _ptr.get();
+ };
+
+ Entry::LP _ptr;
+ uint64_t _lastUsed;
+ document::BucketId _bid;
+ };
+
+ struct CacheEntryGuard;
+
+ vespalib::Lock _cacheLock;
+
+ typedef boost::multi_index::ordered_unique<
+ boost::multi_index::member<EntryWrapper, BucketId, &EntryWrapper::_bid>
+ > BucketIdOrder;
+
+ typedef boost::multi_index::ordered_non_unique<
+ boost::multi_index::member<EntryWrapper, uint64_t, &EntryWrapper::_lastUsed>
+ > TimeOrder;
+
+ typedef boost::multi_index::multi_index_container<
+ EntryWrapper,
+ boost::multi_index::indexed_by<
+ BucketIdOrder,
+ TimeOrder
+ >
+ > LRUCache;
+
+ typedef boost::multi_index::nth_index<LRUCache, 0>::type BucketIdx;
+ typedef boost::multi_index::nth_index<LRUCache, 1>::type TimeIdx;
+
+ class CacheEvictionPolicy
+ {
+ uint64_t _evictionCursor;
+ protected:
+ metrics::LongCountMetric& _evictionMetric;
+ public:
+ CacheEvictionPolicy(metrics::LongCountMetric& evictionMetric)
+ : _evictionCursor(0),
+ _evictionMetric(evictionMetric)
+ {}
+
+ uint64_t getEvictionCursor() const {
+ return _evictionCursor;
+ }
+ void setEvictionCursor(uint64_t cursor) {
+ _evictionCursor = cursor;
+ }
+ };
+
+ class MetaDataEvictionPolicy : public CacheEvictionPolicy
+ {
+ public:
+ MetaDataEvictionPolicy(metrics::LongCountMetric& evictionMetric)
+ : CacheEvictionPolicy(evictionMetric) {}
+
+ TimeIdx::iterator evict(
+ TimeIdx& lruIndex,
+ TimeIdx::iterator& it,
+ MemoryUsage& curUsage);
+
+ uint64_t getValue(const MemoryUsage& usage) const {
+ return usage.sum();
+ }
+ };
+
+ class BodyEvictionPolicy : public CacheEvictionPolicy
+ {
+ public:
+ BodyEvictionPolicy(metrics::LongCountMetric& evictionMetric)
+ : CacheEvictionPolicy(evictionMetric) {}
+
+ TimeIdx::iterator evict(
+ TimeIdx& lruIndex,
+ TimeIdx::iterator& it,
+ MemoryUsage& curUsage);
+
+ uint64_t getValue(const MemoryUsage& usage) const {
+ return usage.bodySize;
+ }
+ };
+
+ class HeaderEvictionPolicy : public CacheEvictionPolicy
+ {
+ public:
+ HeaderEvictionPolicy(metrics::LongCountMetric& evictionMetric)
+ : CacheEvictionPolicy(evictionMetric) {}
+
+ TimeIdx::iterator evict(
+ TimeIdx& lruIndex,
+ TimeIdx::iterator& it,
+ MemoryUsage& curUsage);
+
+ uint64_t getValue(const MemoryUsage& usage) const {
+ return usage.headerSize + usage.bodySize;
+ }
+ };
+
+
+ MemoryUsage _memoryUsage;
+
+ LRUCache _entries;
+ uint64_t _lastUsedCounter;
+ const framework::MemoryAllocationType& _allocationType;
+ framework::MemoryToken::UP _memoryToken;
+
+ MemFilePersistenceCacheMetrics& _metrics;
+
+ BodyEvictionPolicy _bodyEvicter;
+ HeaderEvictionPolicy _headerEvicter;
+ MetaDataEvictionPolicy _metaDataEvicter;
+
+ void done(Entry&);
+ void move(CacheEntryGuard& source, CacheEntryGuard& target);
+ void evictWhileFull();
+ void executeEvictionPolicies();
+ void returnToCache(MemFileCache::Entry& entry);
+
+ TimeIdx::iterator getLeastRecentlyUsedBucket();
+
+ /**
+ * @return Returns the current size of the cache.
+ */
+ uint64_t size() const;
+
+ void eraseNoLock(const document::BucketId& id);
+
+ friend class CacheEntryGuard;
+ friend class MemCacheTest;
+
+ template <typename EvictionPolicy>
+ void
+ executeCacheEvictionPolicy(EvictionPolicy& policy);
+
+ MemoryUsage _cacheLimit;
+
+public:
+ typedef std::unique_ptr<MemFileCache> UP;
+
+ MemFileCache(framework::ComponentRegister& componentRegister,
+ MemFilePersistenceCacheMetrics& metrics);
+
+ /**
+ * Get a memfile for the given bucket on the given disk.
+ * @param env Needed for cache to be able to create non-existing entries.
+ * @param dir If not given, use the default directory from the environment.
+ * @param createIfNotInCache If false, the bucket won't be inserted into the
+ * cache after, unless it was already cached before this operation.
+ */
+ MemFilePtr get(const BucketId&,
+ Environment& env,
+ Directory& dir,
+ bool createIfNotInCache = true);
+
+ /**
+ * Removes the given bucket id from cache. Bucket must be in use,
+ * so erase() will as a consequence not subtract the bucket's cache
+ * usage from the total cache usage as that has already been done
+ * upon retrieving the bucket in the first place.
+ */
+ void erase(const document::BucketId& id);
+
+ typedef std::map<document::BucketId, BucketInfo> BucketInfoMap;
+
+ /**
+ * This function exists just temporarily for memfile layer to flush all
+ * dirty entries found after each operation. This will be removed in favor
+ * of another mechanism later.
+ */
+ BucketInfoMap flushDirtyEntries();
+
+ /**
+ * Clears the cache of all non-active entries (flushing dirty entries
+ * as necessary).
+ */
+ void clear();
+
+ /**
+ * @return Returns true if the given bucket exists in the cache.
+ */
+ bool contains(const document::BucketId& bucketId) const;
+
+ /**
+ * Used for unit testing only.
+ */
+ framework::MemoryToken& getMemoryToken() { return *_memoryToken; }
+ const MemFilePersistenceCacheMetrics& getMetrics() const {
+ return _metrics;
+ }
+
+ /**
+ * Set maximum cache size.
+ */
+ void setCacheSize(MemoryUsage limits);
+
+ uint64_t getCacheSize() { return _memoryToken->getSize(); }
+
+ /**
+ * NOTE: takes lock, never call from within memfilecache code.
+ * @return Statistics over cache memory usage and entry counts
+ */
+ Statistics getCacheStats() const;
+
+ /**
+ * Dump all cache entries as a most recently used-ordered list.
+ * Used for verbose status page printing.
+ */
+ void printCacheEntriesHtml(std::ostream& out) const;
+};
+
+} // storage
+} // memfile
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp
new file mode 100644
index 00000000000..17bf530d450
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp
@@ -0,0 +1,208 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+#include <vespa/memfilepersistence/memfile/memfilecompactor.h>
+#include <algorithm>
+
+LOG_SETUP(".persistence.memfile.compactor");
+
+namespace storage {
+namespace memfile {
+
+struct DocumentVersionInfo {
+ document::DocumentId _id;
+ uint32_t _versions;
+ bool _tombstoned;
+
+ DocumentVersionInfo(const document::DocumentId& docId, bool tombstoned)
+ : _id(docId),
+ _versions(1),
+ _tombstoned(tombstoned)
+ {
+ }
+
+ bool newerVersionExists() const noexcept {
+ return (_versions != 1);
+ }
+};
+
+namespace {
+
+bool
+isTombstone(const MemSlot& slot)
+{
+ return slot.deleted();
+}
+
+// Deduct with underflow protection
+template<typename T>
+T deduct(T a, T b) {
+ return (a > b ? a - b : T(0));
+}
+
+struct CompactSlotInfo : private Types {
+ typedef std::list<DocumentVersionInfo> DocList;
+ typedef vespalib::hash_map<GlobalId, DocList, GlobalId::hash> Map;
+ Map _info;
+ const MemFile& _memFile;
+
+ CompactSlotInfo(const MemFile& memFile)
+ : _info(2 * memFile.getSlotCount()),
+ _memFile(memFile)
+ {
+ }
+
+ /**
+ * Registers a particular document version as having been seen in the file,
+ * keeping track of how many newer versions have been observed thus far and
+ * whether at least one of these was a tombstone (remove entry).
+ *
+ * Potential GID collisions are handled by utilizing the actual document
+ * ID to track specific documents.
+ *
+ * Returns a reference to the currently tracked version state for the
+ * document the slot is for. Returned reference is valid until the next
+ * invocation of registerSeen() or the owning CompactSlotInfo instance
+ * is destructed.
+ */
+ DocumentVersionInfo& registerSeen(const MemSlot& slot) {
+ document::DocumentId id = _memFile.getDocumentId(slot);
+ DocList& gidDocs(_info[slot.getGlobalId()]);
+ auto matchesId = [&](const DocumentVersionInfo& doc) {
+ return (id == doc._id);
+ };
+ auto existing = std::find_if(
+ gidDocs.begin(), gidDocs.end(), matchesId);
+
+ if (existing == gidDocs.end()) { // (Very) common case
+ gidDocs.emplace_back(id, isTombstone(slot));
+ return gidDocs.back();
+ } else {
+ ++existing->_versions;
+ if (isTombstone(slot)) {
+ existing->_tombstoned = true;
+ }
+ return *existing;
+ }
+ }
+};
+
+class DecreasingTimestampSlotRange
+{
+public:
+ DecreasingTimestampSlotRange(const MemFile& memFile)
+ : _memFile(memFile)
+ {
+ }
+ MemFile::const_iterator begin() const {
+ return _memFile.begin(Types::ITERATE_REMOVED);
+ }
+ MemFile::const_iterator end() const {
+ return _memFile.end();
+ }
+private:
+ const MemFile& _memFile;
+};
+
+DecreasingTimestampSlotRange
+allSlotsInDecreasingTimestampOrder(const MemFile& memFile)
+{
+ return {memFile};
+}
+
+}
+
+MemFileCompactor::MemFileCompactor(
+ framework::MicroSecTime currentTime,
+ const CompactionOptions& options)
+ : _options(options),
+ _currentTime(currentTime),
+ _revertTimePoint(deduct(currentTime, options._revertTimePeriod)),
+ _keepRemoveTimePoint(deduct(currentTime, options._keepRemoveTimePeriod))
+{
+ assert(_options._maxDocumentVersions != 0);
+}
+
+/*
+ * Cases to handle:
+ * - Document has too many versions; always remove slot
+ * - But otherwise, only remove if older than revert time.
+ * - Remove entry is too old; remove slot if older than revert time AND keep
+ * remove time.
+ * - Tombstoned entries are not resurrected as they are either compacted
+ * away due to being outside the revert time period or their tombstone
+ * survives by being inside the revert time period. The "keep remove
+ * time" period is also forced to be at least as high as the revert time
+ * period at configuration time.
+ * - Otherwise, keep the slot.
+ */
+MemFileCompactor::SlotList
+MemFileCompactor::getSlotsToRemove(const MemFile& memFile)
+{
+ memFile.ensureHeaderBlockCached();
+
+ std::vector<const MemSlot*> removeSlots;
+ CompactSlotInfo slots(memFile);
+
+ LOG(spam,
+ "Running compact on %s. Using revertTime=%zu, "
+ "keepRemoveTime=%zu, maxDocumentVersions=%u",
+ memFile.toString(true).c_str(),
+ _revertTimePoint.getTime(),
+ _keepRemoveTimePoint.getTime(),
+ _options._maxDocumentVersions);
+
+ for (auto& slot : allSlotsInDecreasingTimestampOrder(memFile)) {
+ DocumentVersionInfo& info(slots.registerSeen(slot));
+
+ if (exceededVersionCount(info)) {
+ alwaysCompact(slot, removeSlots);
+ } else if (info.newerVersionExists()) {
+ // A tombstone also counts as a newer version.
+ compactIfNotRevertible(slot, removeSlots);
+ } else if (isTombstone(slot) && keepRemoveTimeExpired(slot)) {
+ compactIfNotRevertible(slot, removeSlots);
+ } // else: keep slot since it's the newest or within revert period.
+ }
+
+ std::reverse(removeSlots.begin(), removeSlots.end());
+ return removeSlots;
+}
+
+bool
+MemFileCompactor::exceededVersionCount(
+ const DocumentVersionInfo& info) const noexcept
+{
+ return (info._versions > _options._maxDocumentVersions);
+}
+
+bool
+MemFileCompactor::keepRemoveTimeExpired(const MemSlot& slot) const noexcept
+{
+ return (slot.getTimestamp() < _keepRemoveTimePoint);
+}
+
+void
+MemFileCompactor::compactIfNotRevertible(
+ const MemSlot& slot,
+ SlotList& slotsToRemove) const
+{
+ // May compact slot away if its timestamp is older than the point in time
+ // where we expect reverts may be sent.
+ if (slot.getTimestamp() < _revertTimePoint) {
+ alwaysCompact(slot, slotsToRemove);
+ }
+}
+
+void
+MemFileCompactor::alwaysCompact(const MemSlot& slot,
+ SlotList& slotsToRemove) const
+{
+ LOG(spam, "Compacting slot %s", slot.toString().c_str());
+ slotsToRemove.push_back(&slot);
+}
+
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h
new file mode 100644
index 00000000000..f402489e627
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::MemFileCompactor
+ * \ingroup memfile
+ *
+ * \brief Class containing logic to find what slots in a memfile can be removed.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+#include <limits>
+
+namespace storage {
+namespace memfile {
+
+class MemFile;
+class MemSlot;
+
+struct CompactionOptions
+{
+ framework::MicroSecTime _revertTimePeriod;
+ framework::MicroSecTime _keepRemoveTimePeriod;
+ uint32_t _maxDocumentVersions {std::numeric_limits<uint32_t>::max()};
+
+ CompactionOptions& revertTimePeriod(framework::MicroSecTime t) {
+ _revertTimePeriod = t;
+ return *this;
+ }
+
+ CompactionOptions& keepRemoveTimePeriod(framework::MicroSecTime t) {
+ _keepRemoveTimePeriod = t;
+ return *this;
+ }
+
+ CompactionOptions& maxDocumentVersions(uint32_t maxVersions) {
+ _maxDocumentVersions = maxVersions;
+ return *this;
+ }
+};
+
+class DocumentVersionInfo;
+
+class MemFileCompactor : public Types
+{
+public:
+ using SlotList = std::vector<const MemSlot*>;
+
+ MemFileCompactor(framework::MicroSecTime currentTime,
+ const CompactionOptions& options);
+
+ SlotList getSlotsToRemove(const MemFile& memFile);
+private:
+ bool exceededVersionCount(const DocumentVersionInfo&) const noexcept;
+ bool keepRemoveTimeExpired(const MemSlot& slot) const noexcept;
+ void compactIfNotRevertible(const MemSlot& slot,
+ SlotList& slotsToRemove) const;
+ void alwaysCompact(const MemSlot& slot, SlotList& slotsToRemove) const;
+
+ CompactionOptions _options;
+ framework::MicroSecTime _currentTime;
+ framework::MicroSecTime _revertTimePoint;
+ framework::MicroSecTime _keepRemoveTimePoint;
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h
new file mode 100644
index 00000000000..fbe06d2c4df
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/common/filespecification.h>
+
+namespace storage {
+
+namespace memfile {
+
+class Environment;
+
+class MemFileIOInterface : public Types {
+public:
+ virtual ~MemFileIOInterface() {}
+
+ typedef std::unique_ptr<MemFileIOInterface> UP;
+
+ /**
+ * Deserializes the data in the given location (must already be read from disk),
+ * into a document object. If the data is not already read from disk, returns NULL.
+ */
+ virtual Document::UP getDocumentHeader(
+ const document::DocumentTypeRepo&,
+ DataLocation loc) const = 0;
+
+ virtual document::DocumentId getDocumentId(DataLocation loc) const = 0;
+
+ /**
+ * Deserializes the given document's body part with the data in the given data
+ * location.
+ */
+ virtual void readBody(
+ const document::DocumentTypeRepo&,
+ DataLocation loc,
+ Document& doc) const = 0;
+
+ virtual DataLocation addDocumentIdOnlyHeader(
+ const DocumentId&,
+ const document::DocumentTypeRepo&) = 0;
+
+ virtual DataLocation addHeader(const Document& doc) = 0;
+
+ virtual DataLocation addBody(const Document& doc) = 0;
+
+ virtual void clear(DocumentPart part) = 0;
+
+ virtual bool verifyConsistent() const = 0;
+
+ virtual void move(const FileSpecification& target) = 0;
+
+ virtual DataLocation copyCache(const MemFileIOInterface& source,
+ DocumentPart part,
+ DataLocation loc) = 0;
+
+ virtual void ensureCached(Environment& env,
+ DocumentPart part,
+ const std::vector<DataLocation>& locations) = 0;
+
+ virtual bool isCached(DataLocation loc, DocumentPart part) const = 0;
+
+ virtual bool isPersisted(DataLocation loc, DocumentPart part) const = 0;
+
+ virtual uint32_t getSerializedSize(DocumentPart part,
+ DataLocation loc) const = 0;
+
+ virtual void close() = 0;
+
+ virtual size_t getCachedSize(DocumentPart part) const = 0;
+
+ void clear() {
+ clear(HEADER);
+ clear(BODY);
+ }
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h
new file mode 100644
index 00000000000..545686e5f2f
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h
@@ -0,0 +1,90 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::slotfile::MemFilePtr
+ * \ingroup memfile
+ *
+ * \brief Utility class for managing an entry taken from cache.
+ *
+ * To be able to seamlessly return copy by value objects from the cache that
+ * can be used, and automatically return to the cache on destruction, this
+ * wrapper class exist to ensure that then the last user stops using it, it
+ * will be released.
+ *
+ * This object is created by the cache and returned to the disk thread using it.
+ * A linked pointer should thus be safe as we assume all users of it will be in
+ * the same thread. It assumes the cache itself has a lifetime longer than this
+ * object.
+ */
+
+#pragma once
+
+#include <vespa/vespalib/util/linkedptr.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFile;
+
+class MemFilePtr {
+public:
+ /**
+ * Utility class to ensure we call done() on cache after all cache
+ * pointers are deleted. The cache implements a subclass of this class
+ * doing it, to prevent cyclic dependency with cache.
+ */
+ struct EntryGuard {
+ typedef vespalib::LinkedPtr<EntryGuard> LP;
+
+ MemFile* _file;
+
+ EntryGuard(MemFile& file) : _file(&file) {}
+ virtual ~EntryGuard() {}
+
+ virtual void erase() = 0;
+ virtual void deleteFile() = 0;
+ virtual void move(EntryGuard& target) = 0;
+ };
+
+private:
+ EntryGuard::LP _entry;
+
+public:
+ MemFilePtr() {};
+ MemFilePtr(EntryGuard::LP entry) : _entry(entry) {}
+
+ // Behave like pointer to MemFile for ease of use.
+ MemFile* operator->() { return _entry->_file; }
+ MemFile& operator*() { return *_entry->_file; }
+ MemFile* get() {
+ return (_entry.get() != 0 ? _entry->_file : 0);
+ }
+ const MemFile* operator->() const { return _entry->_file; }
+ const MemFile& operator*() const { return *_entry->_file; }
+ const MemFile* get() const {
+ return (_entry.get() != 0 ? _entry->_file : 0);
+ }
+
+ /** Removes the entry from cache and deletes the underlying file. */
+ void deleteFile() { _entry->deleteFile(); }
+
+ /**
+ * Erases the entry from the cache. Does not touch the underlying file so
+ * therefore requires the memfile's alteredSlots() to return false.
+ */
+ void eraseFromCache() { _entry->erase(); }
+
+ /**
+ * Removes the entry from cache and renames the underlying file.
+ * The end result is that this mem file now points to the renamed file.
+ * The target MemFilePtr is invalid after this operation.
+ *
+ * @return Returns false if the target file already existed.
+ */
+ void move(MemFilePtr& target) {
+ _entry->move(*target._entry);
+ }
+};
+
+} // storage
+} // memfile
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp
new file mode 100644
index 00000000000..b7b88682fce
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp
@@ -0,0 +1,133 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/memfile/memslot.h>
+#include <iostream>
+#include <iomanip>
+
+#define ASSERT_FLAG(a) \
+{ \
+ if (!(_flags & a)) { \
+ std::ostringstream error; \
+ error << "Expected one of flags " << std::hex << a << " to be set at " \
+ << "this point, but only the given flags are set: " << _flags \
+ << ", " << toString(true); \
+ throw vespalib::IllegalStateException(error.str(), VESPA_STRLOC); \
+ } \
+}
+
+namespace storage {
+namespace memfile {
+
+MemSlot::MemSlot(const MemSlot& other)
+ : _timestamp(other._timestamp),
+ _header(other._header),
+ _body(other._body),
+ _gid(other._gid),
+ _flags(other._flags),
+ _checksum(other._checksum)
+{
+}
+
+MemSlot::MemSlot(const GlobalId& gid, Timestamp time,
+ DataLocation header, DataLocation body,
+ uint16_t flags, uint16_t checksum)
+ : _timestamp(time),
+ _header(header),
+ _body(body),
+ _gid(gid),
+ _flags(flags),
+ _checksum(checksum)
+{
+}
+
+MemSlot::~MemSlot()
+{
+}
+
+MemSlot::MemoryUsage
+MemSlot::getCacheSize() const
+{
+ MemoryUsage retVal;
+ retVal.metaSize = sizeof(MemSlot);
+ retVal.headerSize = _header._size;
+ retVal.bodySize = _body._size;
+ return retVal;
+}
+
+MemSlot&
+MemSlot::operator=(const MemSlot& other)
+{
+ _timestamp = other._timestamp;
+ _header = other._header;
+ _body = other._body;
+ _gid = other._gid;
+ _checksum = other._checksum;
+
+ // Flags must be copied after cache.
+ _flags = other._flags;
+ return *this;
+}
+
+void
+MemSlot::swap(MemSlot& other)
+{
+ std::swap(_timestamp, other._timestamp);
+ std::swap(_header, other._header);
+ std::swap(_body, other._body);
+ std::swap(_gid, other._gid);
+ std::swap(_checksum, other._checksum);
+ std::swap(_flags, other._flags);
+}
+
+bool
+MemSlot::hasBodyContent() const
+{
+ return _body._size > 0;
+}
+
+bool
+MemSlot::operator==(const MemSlot& other) const
+{
+ if (_checksum != other._checksum
+ || _timestamp != other._timestamp
+ || _header != other._header
+ || _body != other._body
+ || _flags != other._flags
+ || _gid != other._gid)
+ {
+ return false;
+ }
+ return true;
+}
+
+void
+MemSlot::print(std::ostream& out, bool verbose,
+ const std::string& /*indent*/) const
+{
+ if (verbose) {
+ out << "MemSlot(";
+ }
+ out << std::dec << _timestamp << ", " << _gid << ", h "
+ << _header._pos << " - " << _header._size << ", b "
+ << _body._pos << " - " << _body._size << ", f "
+
+ << std::hex << _flags << ", c " << _checksum;
+ if (verbose) {
+ out << ")";
+ }
+}
+
+std::string
+MemSlot::MemoryUsage::toString() const
+{
+ std::ostringstream ss;
+ ss << "MemoryUsage(meta=" << metaSize
+ << ", header=" << headerSize
+ << ", body=" << bodySize
+ << ")";
+ return ss.str();
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h
new file mode 100644
index 00000000000..53a20a86f8a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h
@@ -0,0 +1,189 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::MemSlot
+ * \ingroup memfile
+ *
+ * \brief Class representing a slot in a MemFile.
+ *
+ * The MemSlot class keeps all the data we need for a single entry in the
+ * slotfile.
+ *
+ * Note that a lot of these instances will be kept in the memory cache. It is
+ * important that the memory footprint of this class is really small, such that
+ * we can fit many entries in the cache. The layout of the class is thus a bit
+ * specialized to keep a low footprint.
+ *
+ * Currently, 40 bytes are used for metadata.
+ *
+ * A note about constness. The cached part are considered mutable, such that
+ * all read access can be const. Only operations causing the slot to change on
+ * disk (given a flush) is non-const.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFile;
+
+class MemSlot : private Types,
+ private boost::operators<MemSlot>
+{
+ // Metadata for slot we need to keep.
+ Timestamp _timestamp; // 64 bit - 8 bytes timestamp
+ DataLocation _header; // 2x32 bit - 8 bytes header location
+ DataLocation _body; // 2x32 bit - 8 bytes body location
+ GlobalId _gid; // 96 bit - 12 bytes
+ uint16_t _flags; // 16 bit - 2 bytes flag
+ uint16_t _checksum; // 16 bit - 2 bytes checksum
+
+ friend class MemFileTest;
+
+ // used by tests to simulate gid collision.
+ void setGlobalId(const GlobalId& gid) {
+ _gid = gid;
+ }
+
+public:
+ struct MemoryUsage {
+ MemoryUsage() :
+ headerSize(0),
+ bodySize(0),
+ metaSize(0) {}
+
+ MemoryUsage(uint64_t metaSz, uint64_t headerSz, uint64_t bodySz)
+ : headerSize(headerSz),
+ bodySize(bodySz),
+ metaSize(metaSz)
+ {}
+
+ uint64_t headerSize;
+ uint64_t bodySize;
+ uint64_t metaSize;
+
+ uint64_t sum() const {
+ return headerSize + bodySize + metaSize;
+ }
+
+ void add(const MemoryUsage& usage) {
+ headerSize += usage.headerSize;
+ bodySize += usage.bodySize;
+ metaSize += usage.metaSize;
+ }
+
+ void sub(const MemoryUsage& usage) {
+ headerSize -= usage.headerSize;
+ bodySize -= usage.bodySize;
+ metaSize -= usage.metaSize;
+ }
+
+ std::string toString() const;
+ };
+
+ typedef vespalib::LinkedPtr<MemSlot> LP;
+
+ MemSlot(const MemSlot&);
+ /** Constructor used by mappers reading from file. */
+ MemSlot(const GlobalId& gid, Timestamp time,
+ DataLocation header, DataLocation body,
+ uint16_t flags, uint16_t checksum);
+ ~MemSlot();
+
+ MemSlot& operator=(const MemSlot&);
+ void swap(MemSlot&);
+
+ Timestamp getTimestamp() const { return _timestamp; }
+ const GlobalId& getGlobalId() const { return _gid; }
+
+ DataLocation getLocation(DocumentPart part) const
+ { return (part == HEADER ? _header : _body); }
+
+ bool inUse() const { return (_flags & IN_USE); }
+ bool deleted() const { return (_flags & DELETED); }
+ bool deletedInPlace() const { return (_flags & DELETED_IN_PLACE); }
+
+ bool checksumOutdated() const { return (_flags & CHECKSUM_OUTDATED); }
+
+ bool alteredInMemory() const { return (_flags & SLOTS_ALTERED); }
+
+ bool usingUnusedFlags() const { return (_flags & UNUSED); }
+
+ uint16_t getFlags() const { return _flags; }
+
+ bool hasBodyContent() const;
+
+ uint16_t getPersistedFlags() const
+ { return (_flags & LEGAL_PERSISTED_SLOT_FLAGS); }
+
+ /**
+ * Returns the number of bytes required to keep this slot
+ * in memory.
+ */
+ MemoryUsage getCacheSize() const;
+
+ void setFlag(uint32_t flags)
+ { _flags |= flags | (flags & 0xff ? CHECKSUM_OUTDATED : 0); }
+
+ void clearFlag(uint32_t flags) { _flags &= ~flags; }
+
+ void setLocation(DocumentPart part, DataLocation location) {
+ if (part == HEADER) {
+ _header = location;
+ } else {
+ _body = location;
+ }
+ _flags |= CHECKSUM_OUTDATED;
+ }
+
+ void setChecksum(uint16_t checksum)
+ { _checksum = checksum; _flags &= ~CHECKSUM_OUTDATED; }
+
+ uint16_t getChecksum() const { return _checksum; }
+
+ void clearPersistence() {
+ _header = DataLocation();
+ if (_body._size > 0) {
+ _body = DataLocation();
+ }
+ _flags |= CHECKSUM_OUTDATED;
+ }
+
+ void turnToUnrevertableRemove() {
+ if (_flags & DELETED_IN_PLACE) return;
+ _body = DataLocation(0, 0);
+ _flags |= DELETED | DELETED_IN_PLACE;
+ _flags |= ALTERED_IN_MEMORY | CHECKSUM_OUTDATED;
+ }
+
+ /**
+ * Tests for equality of memfiles. Equality requires MemFile to look equal
+ * for clients. It will not read data from file, so the same parts of the
+ * file must be cached for objects to be equal. Non-persistent flags need
+ * not be equal (The same parts need not be persisted to backend files)
+ *
+ * Used in unit testing only.
+ */
+ bool operator==(const MemSlot& other) const;
+
+ // Implement print functions so we can be used similar to as we were
+ // a document::Printable (Don't want inheritance in this class)
+ void print(std::ostream& out, bool verbose,
+ const std::string& indent) const;
+
+ std::string toString(bool verbose = false) const {
+ std::ostringstream ost;
+ print(ost, verbose, "");
+ return ost.str();
+ }
+};
+
+inline std::ostream& operator<<(std::ostream& out, const MemSlot& slot) {
+ slot.print(out, false, "");
+ return out;
+}
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp
new file mode 100644
index 00000000000..82ac8ac62d2
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/memfile/shared_data_location_tracker.h>
+
+namespace storage {
+namespace memfile {
+
+DataLocation
+SharedDataLocationTracker::getOrCreateSharedLocation(
+ DataLocation sourceLocation)
+{
+ DataLocation& bufferedLoc(_trackedLocations[sourceLocation]);
+ if (!bufferedLoc.valid()) {
+ bufferedLoc = _cacheCopier.copyFromSourceToLocal(_part, sourceLocation);
+ }
+ return bufferedLoc;
+}
+
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h
new file mode 100644
index 00000000000..e0b1a7b9a2a
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/common/types.h>
+
+namespace storage {
+namespace memfile {
+
+class BufferCacheCopier;
+
+/**
+ * Simple utility to track locations copied between files and to help
+ * ensure locations that were shared in the source file will also be shared
+ * in the destination file.
+ */
+class SharedDataLocationTracker
+{
+public:
+ SharedDataLocationTracker(BufferCacheCopier& cacheCopier,
+ Types::DocumentPart part)
+ : _cacheCopier(cacheCopier),
+ _part(part),
+ _trackedLocations()
+ {
+ }
+
+ /**
+ * Get a location to data contained in the destination which points at the
+ * exact same data as that given by sourceLocation in the source. Multiple
+ * requests to the same source location will return the same destination
+ * location.
+ */
+ DataLocation getOrCreateSharedLocation(DataLocation sourceLocation);
+private:
+ BufferCacheCopier& _cacheCopier;
+ Types::DocumentPart _part;
+ std::map<DataLocation, DataLocation> _trackedLocations;
+};
+
+/**
+ * Interface for copying data between individual MemFile buffer caches.
+ */
+class BufferCacheCopier
+{
+ virtual DataLocation doCopyFromSourceToLocal(
+ Types::DocumentPart part,
+ DataLocation sourceLocation) = 0;
+public:
+ virtual ~BufferCacheCopier() {}
+
+ /**
+ * Copy a given file part location from a source cache into a new location
+ * in the destination cache. Returns new location in destination cache.
+ * It is assumed that locations returned by this method will be unique.
+ */
+ DataLocation copyFromSourceToLocal(Types::DocumentPart part,
+ DataLocation sourceLocation)
+ {
+ return doCopyFromSourceToLocal(part, sourceLocation);
+ }
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp
new file mode 100644
index 00000000000..1780870c050
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp
@@ -0,0 +1,107 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/memfile/slotiterator.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+
+namespace storage {
+namespace memfile {
+
+GidUniqueSlotIterator::GidUniqueSlotIterator(const MemFile& file,
+ bool iterateRemoves,
+ Timestamp fromTimestamp,
+ Timestamp toTimestamp)
+ : _file(file),
+ _seen(2 * file.getSlotCount()),
+ _iterateRemoves(iterateRemoves),
+ _fromTimestamp(fromTimestamp),
+ _toTimestamp(toTimestamp),
+ _currentIndex(file.getSlotCount())
+{
+ iterate();
+}
+
+void
+GidUniqueSlotIterator::iterate() const
+{
+ for (uint32_t i = _currentIndex - 1; i < _currentIndex; --i) {
+ // To avoid separate implementations for const and non-const
+ // iterators we do a const cast here. For const iterators, only
+ // const MemSlot entries will be exposed externally, so no
+ // modifications will be allowed for those.
+ MemSlot& slot(const_cast<MemSlot&>(_file[i]));
+ if (_fromTimestamp != Timestamp(0) &&
+ slot.getTimestamp() < _fromTimestamp) continue;
+ if (_toTimestamp != Timestamp(0) &&
+ slot.getTimestamp() > _toTimestamp) continue;
+
+ SeenMap::insert_result inserted(_seen.insert(slot.getGlobalId()));
+ if (!inserted.second) {
+ continue;
+ }
+ if (slot.deleted() && !_iterateRemoves) continue;
+ _current = &slot;
+ _currentIndex = i;
+ return;
+ }
+ _current = 0;
+ _currentIndex = 0;
+}
+
+SlotIterator*
+GidUniqueSlotIterator::clone() const {
+ GidUniqueSlotIterator* sit(
+ new GidUniqueSlotIterator(_file, _iterateRemoves,
+ _fromTimestamp, _toTimestamp));
+ sit->_seen = _seen;
+ sit->_currentIndex = _currentIndex;
+ sit->_current = _current;
+ return sit;
+}
+
+AllSlotsIterator::AllSlotsIterator(const MemFile& file,
+ bool iterateRemoves,
+ Timestamp fromTimestamp,
+ Timestamp toTimestamp)
+ : _file(file),
+ _iterateRemoves(iterateRemoves),
+ _fromTimestamp(fromTimestamp),
+ _toTimestamp(toTimestamp),
+ _currentIndex(file.getSlotCount())
+{
+ iterate();
+}
+
+SlotIterator*
+AllSlotsIterator::clone() const {
+ AllSlotsIterator* sit = new AllSlotsIterator(_file, _iterateRemoves,
+ _fromTimestamp, _toTimestamp);
+ sit->_currentIndex = _currentIndex;
+ sit->_current = _current;
+ return sit;
+}
+
+void
+AllSlotsIterator::iterate() const
+{
+ for (uint32_t i = _currentIndex - 1; i < _currentIndex; --i) {
+ // To avoid seprate implementations for const and non-const
+ // iterators we do a const cast here. For const iterators, only
+ // const MemSlot entries will be exposed externally, so no
+ // modifications will be allowed for those.
+ MemSlot& slot(const_cast<MemSlot&>(_file[i]));
+ if (_fromTimestamp != Timestamp(0) &&
+ slot.getTimestamp() < _fromTimestamp) continue;
+ if (_toTimestamp != Timestamp(0) &&
+ slot.getTimestamp() > _toTimestamp) continue;
+ if (slot.deleted() && !_iterateRemoves) continue;
+ _current = &slot;
+ _currentIndex = i;
+ return;
+ }
+ _current = 0;
+ _currentIndex = 0;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h
new file mode 100644
index 00000000000..c10075ef143
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::SlotIterator
+ * \ingroup memfile
+ *
+ * \brief Utility class for iterating slots in a MemFile.
+ *
+ * When needing to iterate the slots, sometimes one want to iterate only unique
+ * slots and sometimes you want to iterate deleted slots. Iterating only unique
+ * slots adds a CPU cost, so one would want to avoid adding that cost if
+ * iterating all.
+ *
+ * To simplify code iterating slots, they can use a SlotIterator, such that they
+ * don't have to reimplement the iteration.
+ *
+ * The typical way of creating such an iterator, is by calling MemFile's
+ * getSlotIterator function, which will give you an iterator of suitable
+ * implementation. Do not use these directly.
+ */
+
+#pragma once
+
+#include <boost/operators.hpp>
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/vespalib/stllike/hash_set.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFile; // MemFile depends on this file. Don't want circular dependency
+class MemSlot;
+
+class SlotIterator : protected Types {
+protected:
+ mutable MemSlot* _current;
+
+ virtual void iterate() const = 0;
+ SlotIterator() : _current(0) {}
+
+public:
+ typedef std::unique_ptr<SlotIterator> UP;
+ typedef std::unique_ptr<const SlotIterator> CUP;
+
+ virtual ~SlotIterator() {}
+
+ virtual SlotIterator* clone() const = 0;
+
+ MemSlot* getCurrent() { return _current; }
+ const MemSlot* getCurrent() const { return _current; }
+
+ const MemSlot& operator++() const { iterate(); return *_current; }
+};
+
+class GidUniqueSlotIterator : public SlotIterator {
+ const MemFile& _file;
+ typedef vespalib::hash_set<GlobalId, GlobalId::hash> SeenMap;
+ mutable SeenMap _seen;
+ bool _iterateRemoves;
+ Timestamp _fromTimestamp;
+ Timestamp _toTimestamp;
+ mutable uint32_t _currentIndex;
+
+public:
+ GidUniqueSlotIterator(const MemFile& file,
+ bool iterateRemoves,
+ Timestamp fromTimestamp,
+ Timestamp toTimestamp);
+
+ virtual void iterate() const;
+ virtual SlotIterator* clone() const;
+};
+
+class AllSlotsIterator : public SlotIterator {
+ const MemFile& _file;
+ bool _iterateRemoves;
+ Timestamp _fromTimestamp;
+ Timestamp _toTimestamp;
+ mutable uint32_t _currentIndex;
+
+public:
+ AllSlotsIterator(const MemFile& file,
+ bool iterateRemoves,
+ Timestamp fromTimestamp,
+ Timestamp toTimestamp);
+
+ virtual void iterate() const;
+ virtual SlotIterator* clone() const;
+};
+
+/**
+ * \class storage::memfile::IteratorWrapper
+ * \ingroup memfile
+ *
+ * \brief Wrapper class for iterators, such that we can return by value.
+ *
+ * Iterators use inheritance, so we need a wrapper class to wrap the
+ * implementation in order to be able to return iterators by value, as one is
+ * acustomed to in the standard library.
+ */
+class IteratorWrapper : public boost::operators<IteratorWrapper> {
+ SlotIterator::CUP _it;
+
+public:
+ IteratorWrapper() {} // Creates end() iterator.
+ IteratorWrapper(SlotIterator::CUP it) : _it(std::move(it)) {}
+ // Override to clone implementation
+ IteratorWrapper(const IteratorWrapper& o) : _it(o._it->clone()) {}
+ IteratorWrapper& operator=(const IteratorWrapper& o) {
+ _it.reset(0);
+ if (o._it.get() != 0) _it.reset(o._it->clone());
+ return *this;
+ }
+
+ bool operator==(const IteratorWrapper& o) const {
+ const MemSlot* slot(_it.get() == 0 ? 0 : _it->getCurrent());
+ const MemSlot* slot2(o._it.get() == 0 ? 0 : o._it->getCurrent());
+ return (slot == slot2);
+ }
+
+ const MemSlot& operator*() const { return *_it->getCurrent(); }
+ const MemSlot* operator->() const { return _it->getCurrent(); }
+ const MemSlot& operator++() const { return ++*_it; }
+};
+
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore
new file mode 100644
index 00000000000..7e7c0fe7fae
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore
@@ -0,0 +1,2 @@
+/.depend
+/Makefile
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt
new file mode 100644
index 00000000000..e30807d99b2
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_spi OBJECT
+ SOURCES
+ memfilepersistence.cpp
+ memfilepersistenceprovider.cpp
+ operationhandler.cpp
+ iteratorhandler.cpp
+ joinoperationhandler.cpp
+ splitoperationhandler.cpp
+ visitorslotmatcher.cpp
+ threadlocals.cpp
+ cacheevictionguard.cpp
+ DEPENDS
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp
new file mode 100644
index 00000000000..ba9bc8669c6
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/spi/cacheevictionguard.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+
+LOG_SETUP(".persistence.memfile.cacheevictionguard");
+
+namespace storage {
+namespace memfile {
+
+MemFileCacheEvictionGuard::~MemFileCacheEvictionGuard()
+{
+ if (!_ok) {
+ LOG(debug,
+ "Clearing %s from cache to force reload "
+ "of file on next access.",
+ _ptr->getFile().getBucketId().toString().c_str());
+ // Throw away all non-persisted changes to file and clear it from the
+ // cache to force a full reload on next access. This is the safest
+ // option, as all operations that are not yet persisted should fail
+ // back to the client automatically.
+ _ptr->clearFlag(Types::SLOTS_ALTERED);
+ _ptr.eraseFromCache(); // nothrow
+ }
+}
+
+}
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h
new file mode 100644
index 00000000000..6df524a1c58
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/memfile/memfileptr.h>
+
+namespace storage {
+namespace memfile {
+
+/**
+ * Guard which will forcefully un-mark a file as being modified and evict
+ * it from the cache if an exception occurs before it is destructed (more
+ * specifically, if unguard() is never invoked on it).
+ *
+ * Any data not yet persisted when the memfile is evicted will be lost.
+ * It's up to the caller to ensure that this does not actually cause
+ * any true data loss.
+ */
+class MemFileCacheEvictionGuard
+{
+public:
+ MemFileCacheEvictionGuard(const MemFilePtr& ptr)
+ : _ptr(ptr),
+ _ok(false)
+ {
+ assert(_ptr.get());
+ }
+ ~MemFileCacheEvictionGuard();
+
+ MemFile* operator->() { return _ptr.get(); }
+ MemFile& operator*() { return *_ptr; }
+ const MemFile* operator->() const { return _ptr.get(); }
+ const MemFile& operator*() const { return *_ptr; }
+
+ const MemFilePtr& get() const { return _ptr; }
+ MemFilePtr& get() { return _ptr; }
+
+ void unguard() { _ok = true; }
+private:
+ MemFilePtr _ptr;
+ bool _ok;
+};
+
+}
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp
new file mode 100644
index 00000000000..c95d59001f4
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp
@@ -0,0 +1,431 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <iterator>
+#include <vespa/document/fieldset/fieldsets.h>
+#include <vespa/document/select/bodyfielddetector.h>
+#include <vespa/memfilepersistence/spi/iteratorhandler.h>
+#include <vespa/memfilepersistence/spi/visitorslotmatcher.h>
+#include <vespa/memfilepersistence/spi/cacheevictionguard.h>
+
+LOG_SETUP(".persistence.memfile.handler.iterator");
+
+namespace storage {
+namespace memfile {
+
+CachePrefetchRequirements
+CachePrefetchRequirements::createFromSelection(const document::DocumentTypeRepo& repo,
+ const document::select::Node& sel)
+{
+ CachePrefetchRequirements ret;
+ document::select::BodyFieldDetector bfd(repo);
+ sel.visit(bfd);
+ ret.setHeaderPrefetchRequired(bfd.foundHeaderField);
+ ret.setBodyPrefetchRequired(bfd.foundBodyField);
+ return ret;
+}
+
+IteratorHandler::IteratorHandler(Environment& env)
+ : OperationHandler(env)
+{
+}
+
+IteratorHandler::~IteratorHandler()
+{
+}
+
+void
+IteratorHandler::sanityCheckActiveIteratorCount()
+{
+ if (_sharedState._iterators.size()
+ >= SharedIteratorHandlerState::WARN_ACTIVE_ITERATOR_COUNT
+ && !_sharedState._hasWarnedLargeIteratorCount)
+ {
+ LOG(warning, "Number of active iterators has reached warn-limit "
+ "of %zu. Potential iterator leak? destroyIterator() must be "
+ "invoked for each successful createIterator() invocation.",
+ SharedIteratorHandlerState::WARN_ACTIVE_ITERATOR_COUNT);
+ _sharedState._hasWarnedLargeIteratorCount = true;
+ }
+}
+
+spi::CreateIteratorResult
+IteratorHandler::createIterator(const spi::Bucket& bucket,
+ const document::FieldSet& fields,
+ const spi::Selection& sel,
+ spi::IncludedVersions versions)
+{
+ uint64_t id;
+ // By default, no explicit prefetching is required.
+ CachePrefetchRequirements prefetcher;
+
+ vespalib::LinkedPtr<document::select::Node> docSelection;
+ if (!sel.getDocumentSelection().getDocumentSelection().empty()) {
+ docSelection.reset(
+ parseDocumentSelection(
+ sel.getDocumentSelection().getDocumentSelection(),
+ true).release());
+ if (!docSelection.get()) {
+ return spi::CreateIteratorResult(
+ spi::Result::PERMANENT_ERROR,
+ "Got invalid/unparseable document selection string");
+ }
+ prefetcher = CachePrefetchRequirements::createFromSelection(
+ _env.repo(), *docSelection);
+ // NOTE: Suboptimal behavior; since the field detector doesn't
+ // understand that ID-related selections require header reads,
+ // we take the safest route here and simply always require the
+ // header to be prefetched if we have _any_ kind of non-empty
+ // document selection.
+ prefetcher.setHeaderPrefetchRequired(true);
+ }
+ prefetcher.setFromTimestamp(Timestamp(sel.getFromTimestamp().getValue()));
+ prefetcher.setToTimestamp(Timestamp(sel.getToTimestamp().getValue()));
+
+ {
+ vespalib::LockGuard lock(_sharedState._stateLock);
+ id = _sharedState._nextId;
+
+ std::pair<IteratorStateMap::iterator, bool> inserted(
+ _sharedState._iterators.insert(
+ IteratorStateMap::value_type(
+ id,
+ IteratorState(
+ bucket,
+ sel,
+ document::FieldSet::UP(fields.clone()),
+ versions,
+ docSelection,
+ prefetcher))));
+
+ assert(inserted.second); // Should never have duplicates
+ ++_sharedState._nextId;
+ sanityCheckActiveIteratorCount();
+ }
+ LOG(debug, "Created new iterator state for bucket %s "
+ "with iterator id %zu",
+ bucket.getBucketId().toString().c_str(),
+ id);
+ return spi::CreateIteratorResult(spi::IteratorId(id));
+}
+
+spi::Result
+IteratorHandler::destroyIterator(spi::IteratorId id)
+{
+ vespalib::LockGuard lock(_sharedState._stateLock);
+ uint64_t iterId = id;
+ IteratorStateMap::iterator iter(
+ _sharedState._iterators.find(iterId));
+ if (iter == _sharedState._iterators.end()) {
+ std::ostringstream ss;
+ ss << "destroyIterator called with unknown iterator id ("
+ << iterId << ")";
+ LOG(error, "%s", ss.str().c_str());
+ return spi::Result();
+ }
+ LOG(debug, "Destroying iterator state for iterator id %zu", iterId);
+ assert(!iter->second.isActive());
+ _sharedState._iterators.erase(iter);
+ return spi::Result();
+}
+
+spi::DocEntry::SizeType
+IteratorHandler::getDocumentSize(const MemFile& file,
+ const MemSlot& slot,
+ bool headerOnly) const
+{
+ spi::DocEntry::SizeType size = file.getSerializedSize(slot, HEADER);
+ if (!headerOnly) {
+ size += file.getSerializedSize(slot, BODY);
+ }
+ return size;
+}
+
+spi::DocEntry::SizeType
+IteratorHandler::getEntrySize(spi::DocEntry::SizeType docSize) const
+{
+ return docSize + sizeof(spi::DocEntry);
+}
+
+void
+IteratorHandler::prefetch(const CachePrefetchRequirements& requirements,
+ MemFile& file) const
+{
+ if (requirements.noPrefetchRequired()) {
+ LOG(spam, "%s: no prefetching required",
+ file.getFile().getBucketId().toString().c_str());
+ return;
+ }
+ // Let body prefetching also imply header prefetching, at least for now.
+ // If this changes, so must the explicit caching of remaining timestamps
+ // in iterate().
+ bool headerOnly = !requirements.isBodyPrefetchRequired();
+ if (requirements.prefetchEntireBlocks()) {
+ LOG(spam, "%s: prefetching entire blocks for header: yes, body: %s",
+ file.getFile().getBucketId().toString().c_str(),
+ headerOnly ? "no" : "yes");
+ if (headerOnly) {
+ file.ensureHeaderBlockCached();
+ } else {
+ file.ensureHeaderAndBodyBlocksCached();
+ }
+ } else {
+ std::vector<Timestamp> timestamps;
+ for (size_t i = 0; i < file.getSlotCount(); ++i) {
+ const MemSlot& slot(file[i]);
+ // TODO(vekterli): replace this sub-optimal code with a lower bound search
+ if (slot.getTimestamp() < requirements.getFromTimestamp()) {
+ continue;
+ }
+ if (slot.getTimestamp() > requirements.getToTimestamp()) {
+ break;
+ }
+ timestamps.push_back(slot.getTimestamp());
+ }
+ LOG(spam, "%s: prefetching %zu slots in timestamp range [%zu, %zu]",
+ file.getFile().getBucketId().toString().c_str(),
+ timestamps.size(),
+ requirements.getFromTimestamp().getTime(),
+ requirements.getToTimestamp().getTime());
+ file.ensureDocumentCached(timestamps, headerOnly);
+ }
+}
+
+std::vector<Types::Timestamp>&
+IteratorHandler::getOrFillRemainingTimestamps(MemFile& file,
+ IteratorState& state)
+{
+ std::vector<Types::Timestamp>& remaining(state.getRemaining());
+ if (remaining.empty()) {
+ if (state.getSelection().getTimestampSubset().empty()) {
+ VisitorSlotMatcher matcher(
+ _env.repo(), state.getDocumentSelectionPtr());
+
+ int flags = 0;
+ switch (state.getIncludedVersions()) {
+ case spi::NEWEST_DOCUMENT_ONLY:
+ flags = ITERATE_GID_UNIQUE;
+ break;
+ case spi::NEWEST_DOCUMENT_OR_REMOVE:
+ flags = ITERATE_GID_UNIQUE | ITERATE_REMOVED;
+ break;
+ case spi::ALL_VERSIONS:
+ flags = ITERATE_REMOVED;
+ break;
+ }
+
+ remaining = select(
+ file,
+ matcher,
+ flags,
+ Timestamp(state.getSelection().getFromTimestamp()),
+ Timestamp(state.getSelection().getToTimestamp()));
+ } else {
+ const std::vector<spi::Timestamp>& subset(
+ state.getSelection().getTimestampSubset());
+ remaining.reserve(subset.size());
+ for (size_t i = 0; i < subset.size(); ++i) {
+ // Ensure timestamps are strictly increasing
+ assert(i == 0 || subset[i] > subset[i - 1]);
+ remaining.push_back(Types::Timestamp(subset[i]));
+ }
+
+ state.setIncludedVersions(spi::ALL_VERSIONS);
+ }
+ }
+ return remaining;
+}
+
+bool
+IteratorHandler::addMetaDataEntry(spi::IterateResult::List& result,
+ const MemSlot& slot,
+ uint64_t& totalSize,
+ uint64_t maxByteSize) const
+{
+ size_t entrySize = getEntrySize(0);
+ if (totalSize + entrySize >= maxByteSize && !result.empty()) {
+ return false;
+ }
+ totalSize += entrySize;
+
+ int metaFlags = (slot.deleted() || slot.deletedInPlace()) ? spi::REMOVE_ENTRY : 0;
+ spi::DocEntry::LP docEntry(
+ new spi::DocEntry(
+ spi::Timestamp(slot.getTimestamp().getTime()),
+ metaFlags));
+ result.push_back(docEntry);
+ return true;
+}
+
+bool
+IteratorHandler::addRemoveEntry(spi::IterateResult::List& results,
+ const MemFile& file,
+ const MemSlot& slot,
+ uint64_t& totalSize,
+ uint64_t maxByteSize) const
+{
+ DocumentId did = file.getDocumentId(slot);
+ size_t idSize = did.getSerializedSize();
+ size_t entrySize = getEntrySize(idSize);
+
+ if (totalSize + entrySize >= maxByteSize && !results.empty()) {
+ return false;
+ }
+ totalSize += entrySize;
+
+ spi::DocEntry::LP docEntry(
+ new spi::DocEntry(
+ spi::Timestamp(slot.getTimestamp().getTime()),
+ spi::REMOVE_ENTRY,
+ did));
+ results.push_back(docEntry);
+ return true;
+}
+
+bool
+IteratorHandler::addPutEntry(spi::IterateResult::List& results,
+ const MemFile& file,
+ const MemSlot& slot,
+ bool headerOnly,
+ const document::FieldSet& fieldsToKeep,
+ uint64_t& totalSize,
+ uint64_t maxByteSize) const
+{
+ size_t docSize = getDocumentSize(file, slot, headerOnly);
+ size_t entrySize = getEntrySize(docSize);
+ if (totalSize + entrySize >= maxByteSize && !results.empty()) {
+ return false;
+ }
+ Document::UP doc(
+ file.getDocument(slot, headerOnly ? HEADER_ONLY : ALL));
+ totalSize += entrySize;
+ // If we want either the full doc or just the header, don't waste time
+ // stripping unwanted document fields.
+ if (fieldsToKeep.getType() != document::FieldSet::ALL
+ && fieldsToKeep.getType() != document::FieldSet::HEADER)
+ {
+ document::FieldSet::stripFields(*doc, fieldsToKeep);
+ }
+ spi::DocEntry::LP docEntry(
+ new spi::DocEntry(spi::Timestamp(slot.getTimestamp().getTime()),
+ 0,
+ std::move(doc),
+ docSize));
+ results.push_back(docEntry);
+ return true;
+}
+
+spi::IterateResult
+IteratorHandler::iterate(spi::IteratorId id, uint64_t maxByteSize)
+{
+ spi::IterateResult::List results;
+
+ IteratorState* state;
+ {
+ vespalib::LockGuard lock(_sharedState._stateLock);
+ IteratorStateMap::iterator iter(
+ _sharedState._iterators.find(id));
+ if (iter == _sharedState._iterators.end()) {
+ LOG(error, "Invoked iterate(id=%zu, maxByteSize=%zu) "
+ "with unknown id",
+ uint64_t(id),
+ maxByteSize);
+
+ return spi::IterateResult(spi::Result::PERMANENT_ERROR,
+ "Unknown iterator ID");
+ }
+ assert(!iter->second.isActive());
+ state = &iter->second;
+ if (state->isCompleted()) {
+ return spi::IterateResult(results, true);
+ }
+ state->setActive(true);
+ }
+
+ ActiveGuard activeGuard(*state);
+ MemFileCacheEvictionGuard file(getMemFile(state->getBucket()));
+
+ const document::FieldSet& fields(state->getFields());
+ bool metaDataOnly = (fields.getType() == document::FieldSet::NONE);
+ bool headerOnly = true;
+
+ // Ensure we have relevant parts of the file prefetched if this is required.
+ const CachePrefetchRequirements& prefetchRequirements(
+ state->getCachePrefetchRequirements());
+ prefetch(prefetchRequirements, *file);
+
+ std::vector<Timestamp>& remaining(
+ getOrFillRemainingTimestamps(*file, *state));
+
+ if (!metaDataOnly) {
+ document::HeaderFields h;
+ headerOnly = h.contains(fields);
+ // Don't bother doing duplicate work if we've already prefetched
+ // everything we need.
+ if (!((headerOnly && prefetchRequirements.isHeaderPrefetchRequired())
+ || prefetchRequirements.isBodyPrefetchRequired()))
+ {
+ LOG(spam, "Caching %zu remaining slots from disk for %s",
+ remaining.size(),
+ state->getBucket().getBucketId().toString().c_str());
+ file->ensureDocumentCached(remaining, headerOnly);
+ }
+ } else {
+ LOG(spam, "Not caching any of the %zu remaining slots from disk "
+ "for %s since iteration is metadata only",
+ remaining.size(),
+ state->getBucket().getBucketId().toString().c_str());
+ }
+
+ size_t totalSize = 0;
+ while (!remaining.empty()) {
+ Timestamp ts = remaining.back();
+ const MemSlot* slot = file->getSlotAtTime(ts);
+
+ if (slot) {
+ if (metaDataOnly) {
+ if (!addMetaDataEntry(results, *slot, totalSize, maxByteSize)) {
+ break;
+ }
+ } else if (slot->deleted() || slot->deletedInPlace()) {
+ if (state->getIncludedVersions() == spi::NEWEST_DOCUMENT_ONLY) {
+ // Probably altered by unrevertable remove between time
+ // of timestamp gathering and actual iteration.
+ remaining.pop_back();
+ continue;
+ }
+ if (!addRemoveEntry(results, *file, *slot,
+ totalSize, maxByteSize))
+ {
+ break;
+ }
+ } else {
+ if (!addPutEntry(results, *file, *slot,
+ headerOnly, fields, totalSize, maxByteSize))
+ {
+ break;
+ }
+ }
+ }
+ remaining.pop_back();
+ }
+
+ file.unguard();
+
+ LOG(debug, "Iteration of bucket %s returned result with %zu entries "
+ "and %zu bytes. Remaining docs: %zu",
+ state->getBucket().getBucketId().toString().c_str(),
+ results.size(),
+ totalSize,
+ remaining.size());
+
+ if (remaining.empty()) {
+ state->setCompleted();
+ return spi::IterateResult(results, true);
+ }
+
+ return spi::IterateResult(results, false);
+}
+
+}
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h
new file mode 100644
index 00000000000..7b3ee9627e5
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h
@@ -0,0 +1,252 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::IteratorHandler
+ * \ingroup memfile
+ *
+ * \brief Class exposing iterators over a bucket
+ */
+#pragma once
+
+#include <map>
+#include <vespa/memfilepersistence/spi/operationhandler.h>
+#include <vespa/persistence/spi/persistenceprovider.h>
+#include <vespa/document/fieldset/fieldsetrepo.h>
+
+namespace document {
+
+class FieldSet;
+
+namespace select {
+class Node;
+}
+}
+
+namespace storage {
+
+class GetIterCommand;
+
+namespace memfile {
+
+class CachePrefetchRequirements : public Types
+{
+public:
+ CachePrefetchRequirements()
+ : _headerPrefetchRequired(false),
+ _bodyPrefetchRequired(false),
+ _fromTimestamp(0),
+ _toTimestamp(UINT64_MAX)
+ {
+ }
+
+ bool noPrefetchRequired() const {
+ return !_headerPrefetchRequired && !_bodyPrefetchRequired;
+ }
+
+ bool isHeaderPrefetchRequired() const { return _headerPrefetchRequired; }
+ void setHeaderPrefetchRequired(bool required) { _headerPrefetchRequired = required; }
+
+ bool isBodyPrefetchRequired() const { return _bodyPrefetchRequired; }
+ void setBodyPrefetchRequired(bool required) { _bodyPrefetchRequired = required; }
+
+ bool prefetchEntireBlocks() const {
+ return (_fromTimestamp == Timestamp(0)
+ && _toTimestamp == Timestamp(UINT64_MAX));
+ }
+
+ Timestamp getFromTimestamp() const { return _fromTimestamp; }
+ void setFromTimestamp(Timestamp fromTimestamp) { _fromTimestamp = fromTimestamp; }
+ Timestamp getToTimestamp() const { return _toTimestamp; }
+ void setToTimestamp(Timestamp toTimestamp) { _toTimestamp = toTimestamp; }
+
+ static CachePrefetchRequirements createFromSelection(
+ const document::DocumentTypeRepo& repo,
+ const document::select::Node& sel);
+private:
+ // Whether or not document selection requires header/body to be read
+ // beforehand to work efficiently.
+ bool _headerPrefetchRequired;
+ bool _bodyPrefetchRequired;
+
+ Timestamp _fromTimestamp;
+ Timestamp _toTimestamp;
+};
+
+class IteratorState
+{
+ spi::Bucket _bucket;
+ spi::Selection _selection;
+ vespalib::LinkedPtr<document::FieldSet> _fieldSet;
+ vespalib::LinkedPtr<document::select::Node> _documentSelection;
+ std::vector<Types::Timestamp> _remaining;
+ spi::IncludedVersions _versions;
+ CachePrefetchRequirements _prefetchRequirements;
+ bool _isActive;
+ bool _isCompleted;
+ std::map<std::string, bool> _headerOnlyForDocumentType;
+
+public:
+ IteratorState(const spi::Bucket& bucket,
+ const spi::Selection& sel,
+ document::FieldSet::UP fieldSet,
+ spi::IncludedVersions versions,
+ vespalib::LinkedPtr<document::select::Node> docSel,
+ const CachePrefetchRequirements& prefetchRequirements)
+ : _bucket(bucket),
+ _selection(sel),
+ _fieldSet(vespalib::LinkedPtr<document::FieldSet>(fieldSet.release())),
+ _documentSelection(docSel),
+ _remaining(),
+ _versions(versions),
+ _prefetchRequirements(prefetchRequirements),
+ _isActive(false),
+ _isCompleted(false)
+ {}
+
+ const spi::Bucket& getBucket() const { return _bucket; }
+
+ const CachePrefetchRequirements& getCachePrefetchRequirements() const {
+ return _prefetchRequirements;
+ }
+
+ bool isActive() const { return _isActive; }
+ void setActive(bool active) { _isActive = active; }
+
+ bool isCompleted() const { return _isCompleted; }
+ void setCompleted(bool completed = true) { _isCompleted = completed; }
+
+ const spi::Selection& getSelection() const { return _selection; }
+ spi::Selection& getSelection() { return _selection; }
+ const document::FieldSet& getFields() const { return *_fieldSet; }
+
+ spi::IncludedVersions getIncludedVersions() const { return _versions; }
+ void setIncludedVersions(spi::IncludedVersions versions) { _versions = versions; }
+ bool hasDocumentSelection() const { return _documentSelection.get() != 0; }
+
+ /**
+ * Can only be called if hasDocumentSelection() == true
+ */
+ const document::select::Node& getDocumentSelection() const
+ {
+ return *_documentSelection;
+ }
+ /**
+ * @return pointer to doc selection if one has been given, NULL otherwise.
+ */
+ const document::select::Node* getDocumentSelectionPtr() const
+ {
+ return _documentSelection.get();
+ }
+ const std::vector<Types::Timestamp>& getRemaining() const { return _remaining; }
+ std::vector<Types::Timestamp>& getRemaining() { return _remaining; }
+};
+
+class SharedIteratorHandlerState
+{
+public:
+ typedef std::map<uint64_t, IteratorState> IteratorStateMap;
+private:
+ IteratorStateMap _iterators;
+ uint64_t _nextId;
+ vespalib::Lock _stateLock;
+ // Debugging aid:
+ static const size_t WARN_ACTIVE_ITERATOR_COUNT = 2048;
+ bool _hasWarnedLargeIteratorCount;
+
+ friend class IteratorHandler;
+ friend class IteratorHandlerTest;
+public:
+ SharedIteratorHandlerState() : _nextId(1) {}
+};
+
+class IteratorHandler : public OperationHandler
+{
+private:
+ typedef SharedIteratorHandlerState::IteratorStateMap IteratorStateMap;
+
+ class ActiveGuard
+ {
+ IteratorState& _state;
+ public:
+ ActiveGuard(IteratorState& state) : _state(state) {}
+ ~ActiveGuard() {
+ _state.setActive(false);
+ }
+ };
+
+ /**
+ * Get the serialized size of a document, only counting the header if
+ * headerOnly is true.
+ */
+ spi::DocEntry::SizeType getDocumentSize(const MemFile&,
+ const MemSlot&,
+ bool headerOnly) const;
+ /**
+ * Get the in-memory size of a single DocEntry object to more accurately
+ * limit per-iteration memory usage.
+ */
+ spi::DocEntry::SizeType getEntrySize(spi::DocEntry::SizeType docSize) const;
+ /**
+ * Populate the state's remaining timestamps-vector, either from an
+ * explicitly specified timestamp subset in the selection, or from its
+ * document selection if no timestamp subset is given.
+ * @return mutable reference to the state's remaining-vector.
+ */
+ std::vector<Types::Timestamp>& getOrFillRemainingTimestamps(
+ MemFile& file,
+ IteratorState&);
+
+ /**
+ * If header/body precaching is required, cache _all_ documents in the
+ * required part(s) for the file. Otherwise, do nothing.
+ */
+ void prefetch(const CachePrefetchRequirements& requirements,
+ MemFile& file) const;
+
+ bool addMetaDataEntry(spi::IterateResult::List& result,
+ const MemSlot& slot,
+ uint64_t& totalSize,
+ uint64_t maxByteSize) const;
+ bool addRemoveEntry(spi::IterateResult::List& result,
+ const MemFile& file,
+ const MemSlot& slot,
+ uint64_t& totalSize,
+ uint64_t maxByteSize) const;
+ bool addPutEntry(spi::IterateResult::List& result,
+ const MemFile& file,
+ const MemSlot& slot,
+ bool headerOnly,
+ const document::FieldSet& fieldsToKeep,
+ uint64_t& totalSize,
+ uint64_t maxByteSize) const;
+
+ /**
+ * Sanity checking to ensure we don't leak iterators. Checks if the number
+ * of active iterators exceeds a predefined Large Number(tm) and warns
+ * if this is the case. Mutates shared state (sets a "has warned" flag),
+ * so must only be called when holding shared state mutex.
+ */
+ void sanityCheckActiveIteratorCount();
+
+public:
+ typedef std::unique_ptr<IteratorHandler> UP;
+
+ SharedIteratorHandlerState _sharedState;
+
+ IteratorHandler(Environment&);
+ ~IteratorHandler();
+
+ spi::CreateIteratorResult createIterator(const spi::Bucket& bucket,
+ const document::FieldSet& fieldSet,
+ const spi::Selection& sel,
+ spi::IncludedVersions versions);
+ spi::Result destroyIterator(spi::IteratorId id);
+ spi::IterateResult iterate(spi::IteratorId id, uint64_t maxByteSize);
+
+ const SharedIteratorHandlerState& getState() const {
+ return _sharedState;
+ }
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp
new file mode 100644
index 00000000000..449e3dedf85
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp
@@ -0,0 +1,159 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/spi/joinoperationhandler.h>
+#include <vespa/memfilepersistence/spi/cacheevictionguard.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".persistence.memfile.handler.join");
+
+namespace storage {
+namespace memfile {
+
+JoinOperationHandler::JoinOperationHandler(Environment& env)
+ : OperationHandler(env),
+ _env(env)
+{
+}
+
+void
+JoinOperationHandler::copySlots(MemFile& sourceFile, MemFile& targetFile)
+{
+ sourceFile.ensureBodyBlockCached();
+ LOG(spam,
+ "Moving data from %s to %s",
+ sourceFile.toString().c_str(),
+ targetFile.toString().c_str());
+
+ std::vector<const MemSlot*> slotsToCopy;
+ slotsToCopy.reserve(sourceFile.getSlotCount());
+
+ for (uint32_t j = 0; j < sourceFile.getSlotCount(); j++) {
+ const MemSlot* slot(&sourceFile[j]);
+
+ if (!targetFile.getSlotAtTime(slot->getTimestamp())) {
+ slotsToCopy.push_back(slot);
+ }
+ }
+ targetFile.copySlotsFrom(sourceFile, slotsToCopy);
+ LOG(spam, "Moved data from %s to %s",
+ sourceFile.toString().c_str(), targetFile.toString().c_str());
+}
+
+spi::Result
+JoinOperationHandler::join(
+ const spi::Bucket& source1,
+ const spi::Bucket& source2,
+ const spi::Bucket& target)
+{
+ if ((source1.getBucketId() == source2.getBucketId())
+ && (target.getBucketId() == source1.getBucketId()))
+ {
+ return singleJoin(source1, target);
+ }
+
+ MemFileCacheEvictionGuard targetFile(
+ getMemFile(target.getBucketId(), target.getPartition(), false));
+
+ std::vector<spi::Bucket> sources;
+ sources.push_back(source1);
+ if (source1.getBucketId() != source2.getBucketId()) {
+ sources.push_back(source2);
+ }
+
+ for (uint32_t i = 0; i < sources.size(); i++) {
+ MemFileCacheEvictionGuard sourceFile(
+ getMemFile(sources[i].getBucketId(),
+ sources[i].getPartition(),
+ false));
+
+ if (targetFile->empty()) {
+ LOG(spam, "Renaming %s to %s",
+ sourceFile->toString().c_str(), targetFile->toString().c_str());
+ // It is assumed that if this fails, the nature of the exception is
+ // such that it will cause the disk to automatically be marked as
+ // down and for the process to restart, meaning we should not get
+ // out of sync between the service and persistence layers.
+ sourceFile.get().move(targetFile.get());
+ } else {
+ copySlots(*sourceFile, *targetFile);
+ targetFile->flushToDisk();
+ sourceFile.get().deleteFile();
+ }
+ sourceFile.unguard();
+ }
+ targetFile.unguard();
+
+ return spi::Result();
+}
+
+void
+JoinOperationHandler::clearBucketFromCache(const spi::Bucket& bucket)
+{
+ getMemFile(bucket.getBucketId(), bucket.getPartition(), false)
+ .eraseFromCache();
+}
+
+/*
+ * Moving same bucket between partitions, potentially joining data
+ * if target file already exists.
+ */
+spi::Result
+JoinOperationHandler::singleJoin(
+ const spi::Bucket& source,
+ const spi::Bucket& target)
+{
+ assert(source.getBucketId() == target.getBucketId());
+ assert(source.getPartition() != target.getPartition());
+ // Internal joins sidestep the cache completely, so we have to ensure
+ // the bucket is cleared from it before commencing. Otherwise, it's
+ // possible that the cached file offsets will not reflect what's actually
+ // stored on disk, leading to potential data corruption! The bucket shall
+ // not have been taken out of the cache before this point.
+ clearBucketFromCache(target);
+
+ Directory& toJoinDir = _env.getDirectory(source.getPartition());
+ FileSpecification toJoinSpec(
+ source.getBucketId(), toJoinDir,
+ _env.calculatePathInDir(source.getBucketId(), toJoinDir));
+
+ MemFile toJoin(toJoinSpec, _env);
+
+ Directory& toKeepDir = _env.getDirectory(target.getPartition());
+ FileSpecification toKeepSpec(
+ source.getBucketId(), toKeepDir,
+ _env.calculatePathInDir(source.getBucketId(), toKeepDir));
+ assert(toJoinDir != toKeepDir);
+
+ const double maxFillRate(
+ _env.acquireConfigReadLock().memFilePersistenceConfig()
+ ->diskFullFactorMove);
+ if (source.getPartition() != target.getPartition() &&
+ toKeepDir.isFull(0, maxFillRate))
+ {
+ std::string failure =
+ vespalib::make_string("Not moving bucket %s to directory %s because it's "
+ "fill rate is %G (>%G)",
+ source.getBucketId().toString().c_str(),
+ toKeepDir.toString().c_str(),
+ toKeepDir.getPartition().getMonitor()->getFillRate(),
+ maxFillRate);
+
+ LOG(debug, "%s", failure.c_str());
+
+ return spi::Result(spi::Result::TRANSIENT_ERROR, failure);
+ }
+
+ MemFile toKeep(toKeepSpec, _env);
+
+ copySlots(toJoin, toKeep);
+ toKeep.flushToDisk();
+
+ // Delete original file.
+ _env._memFileMapper.deleteFile(toJoin, _env);
+
+ return spi::Result();
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h
new file mode 100644
index 00000000000..c310a9f6f71
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h
@@ -0,0 +1,37 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::JoinHandler
+ * \ingroup memfile
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/spi/operationhandler.h>
+#include <vespa/persistence/spi/persistenceprovider.h>
+
+namespace storage {
+
+namespace memfile {
+
+class JoinOperationHandler : public OperationHandler {
+public:
+ typedef std::unique_ptr<JoinOperationHandler> UP;
+
+ JoinOperationHandler(Environment&);
+
+ spi::Result join(const spi::Bucket& source1,
+ const spi::Bucket& source2,
+ const spi::Bucket& target);
+
+ spi::Result singleJoin(const spi::Bucket& source,
+ const spi::Bucket& target);
+
+private:
+ Environment& _env;
+
+ void copySlots(MemFile& source, MemFile& target);
+ void clearBucketFromCache(const spi::Bucket&);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp
new file mode 100644
index 00000000000..c369ee47391
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp
@@ -0,0 +1,12 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/spi/memfilepersistence.h>
+
+namespace storage {
+namespace memfile {
+
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h
new file mode 100644
index 00000000000..d11673667f4
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::MemFilePersistence
+ * \ingroup memfile
+ *
+ * \brief Top class in memfile persistence actually implementing the SPI
+ */
+
+#pragma once
+
+namespace storage {
+namespace memfile {
+
+struct MemFilePersistence
+{
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp
new file mode 100644
index 00000000000..cec695423af
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp
@@ -0,0 +1,889 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/fieldset/fieldsetrepo.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h>
+#include <vespa/memfilepersistence/common/exceptions.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".memfilepersistenceprovider");
+
+#define TRACE(context, level, func, message) \
+{ \
+ if ((context).getTrace().shouldTrace(level)) { \
+ vespalib::string messageToTrace( \
+ vespalib::make_string("MemFilePP.%s: %s", func, message)); \
+ (context).getTrace().trace(level, messageToTrace); \
+ } \
+}
+#define TRACEGENERIC(context, type) \
+if ((context).getTrace().shouldTrace(9)) { \
+ vespalib::string messageToTrace( \
+ vespalib::make_string("MemFilePP.%s: Load type %s, priority %u.", \
+ type, (context).getLoadType().toString().c_str(), \
+ (uint32_t) (context).getPriority())); \
+ (context).getTrace().trace(9, messageToTrace); \
+}
+
+namespace storage {
+namespace memfile {
+
+namespace {
+
+Device::State
+mapIoExceptionToDeviceState(MemFileIoException::Type type)
+{
+ using vespalib::IoException;
+ switch (type) {
+ case IoException::ILLEGAL_PATH:
+ return Device::PATH_FAILURE;
+ case IoException::NO_PERMISSION:
+ return Device::NO_PERMISSION;
+ case IoException::DISK_PROBLEM:
+ return Device::IO_FAILURE;
+ case IoException::TOO_MANY_OPEN_FILES:
+ return Device::TOO_MANY_OPEN_FILES;
+ default:
+ return Device::OK;
+ }
+}
+
+} // end of anonymous namespace
+
+MemFilePtr&
+MemFilePersistenceProvider::getThreadLocalMemFile() const
+{
+ return _threadLocals.get()._memFile;
+}
+
+MemFilePersistenceThreadMetrics&
+MemFilePersistenceProvider::getMetrics() const
+{
+ ThreadContext& context = _threadLocals.get();
+ if (context._metrics == NULL) {
+ context._metrics = _metrics.addThreadMetrics();
+ }
+
+ return *context._metrics;
+}
+
+bool
+MemFilePersistenceProvider::hasCachedMemFile() const
+{
+ return _threadLocals.get()._memFile.get();
+}
+
+MemFilePtr
+MemFilePersistenceProvider::getMemFile(const spi::Bucket& b,
+ bool keepInCache) const
+{
+ MemFilePtr& ptr = getThreadLocalMemFile();
+
+ if (ptr.get()) {
+ assert(ptr->getFile().getBucketId() == b);
+
+ MemFilePtr retVal = ptr;
+ ptr = MemFilePtr();
+ return retVal;
+ }
+
+ return _env->_cache.get(b.getBucketId(),
+ *_env,
+ _env->getDirectory(b.getPartition()),
+ keepInCache);
+}
+
+void
+MemFilePersistenceProvider::setActiveMemFile(MemFilePtr ptr,
+ const char* user) const
+{
+ LOG(spam, "Inserting active memfile %s for user %s",
+ ptr->getFile().getBucketId().toString().c_str(),
+ user);
+ getThreadLocalMemFile() = ptr;
+}
+
+void
+MemFilePersistenceProvider::clearActiveMemFile(spi::Context* context) const
+{
+ LOG(spam, "Clearing active memfile");
+ MemFilePtr& ptr = getThreadLocalMemFile();
+ assert(ptr.get() == NULL || !ptr->slotsAltered());
+ ptr = MemFilePtr();
+ if (context != 0) {
+ TRACE(*context, 9, "clearActiveMemFile", "Done clearing");
+ }
+}
+
+enum MemFileAccessGuardScopeExitAction {
+ REINSERT_AS_ACTIVE = 0x1,
+};
+
+/**
+ * The MemFile access guard provides a simple scope guard for providing
+ * exception safety for operations toward MemFiles.
+ * The guard will always evict a file from the cache iff the guard has not
+ * been dismissed upon destruction. This will throw away all non-persisted
+ * changes to file and clear it from the cache to force a full reload on next
+ * access. This is the safest option, as all operations that are not yet
+ * persisted should fail back to the client automatically.
+ *
+ * The current MemFile will be reinserted as the thread's active MemFile
+ * iff REINSERT_AS_ACTIVE has specified as a guard construction flag and
+ * the guard was dismissed before destruction.
+ */
+class MemFileAccessGuard : public Types
+{
+ MemFileAccessGuard(const MemFileAccessGuard&);
+ MemFileAccessGuard& operator=(const MemFileAccessGuard&);
+public:
+ MemFileAccessGuard(const MemFilePersistenceProvider& spi,
+ const MemFilePtr& ptr,
+ const char* user,
+ uint32_t flags = 0)
+ : _spi(spi),
+ _ptr(ptr),
+ _user(user),
+ _flags(flags),
+ _dismissed(false)
+ {
+ assert(_ptr.get());
+ }
+
+ ~MemFileAccessGuard() {
+ if (!_dismissed) {
+ LOG(debug,
+ "Access guard in %s not dismissed on scope exit, clearing %s"
+ " from cache to force reload of file on next access.",
+ _user,
+ _ptr->getFile().getBucketId().toString().c_str());
+
+ _ptr->clearFlag(SLOTS_ALTERED);
+ _ptr.eraseFromCache(); // nothrow
+ }
+ if ((_flags & REINSERT_AS_ACTIVE) && _dismissed) {
+ _spi.setActiveMemFile(_ptr, _user);
+ } else {
+ _spi.clearActiveMemFile();
+ }
+ }
+
+ // Misc accessors
+ MemFile* operator->() {
+ return _ptr.get();
+ }
+ MemFile& operator*() {
+ return *_ptr;
+ }
+ const MemFile* operator->() const {
+ return _ptr.get();
+ }
+ const MemFile& operator*() const {
+ return *_ptr;
+ }
+ MemFilePtr& getMemFilePtr() {
+ return _ptr;
+ }
+ const MemFilePtr& getMemFilePtr() const {
+ return _ptr;
+ }
+
+ /**
+ * If all access towards the MemFile has been successfully performed,
+ * calling dismiss() will ensure that the specified cleanup actions
+ * are not taken upon scope exit.
+ */
+ void dismiss() {
+ _dismissed = true;
+ }
+
+private:
+ const MemFilePersistenceProvider& _spi;
+ MemFilePtr _ptr;
+ const char* _user;
+ const uint32_t _flags;
+ bool _dismissed;
+};
+
+void
+MemFilePersistenceProvider::handleBucketCorruption(const FileSpecification& file) const
+{
+ spi::Bucket fixBucket(file.getBucketId(),
+ spi::PartitionId(file.getDirectory().getIndex()));
+
+ // const_cast is nasty, but maintain() must necessarily be able to
+ // modify state...
+ MemFilePersistenceProvider& mutableSelf(
+ const_cast<MemFilePersistenceProvider&>(*this));
+
+ spi::Result maintainResult(mutableSelf.maintain(fixBucket, spi::HIGH));
+ if (maintainResult.getErrorCode() != spi::Result::NONE) {
+ LOG(warning,
+ "Failed to successfully repair %s after corruptions: %s",
+ fixBucket.toString().c_str(),
+ maintainResult.toString().c_str());
+ }
+
+ // Add bucket to set of modified buckets so service layer can request
+ // new bucket info.
+ _env->addModifiedBucket(file.getBucketId());
+}
+
+template<typename C>
+C MemFilePersistenceProvider::handleException(const std::exception& e,
+ bool canRepairBucket) const
+{
+ LOG(debug, "Handling exception caught during processing: %s", e.what());
+
+ const MemFileIoException* io = dynamic_cast<const MemFileIoException*>(&e);
+ if (io != NULL) {
+ std::ostringstream error;
+ error << "Exception caught processing operation for "
+ << io->getFile().getPath() << ": " << io->getMessage();
+
+ Device::State deviceState(
+ mapIoExceptionToDeviceState(io->getType()));
+
+ if (deviceState != Device::OK) {
+ io->getFile().getDirectory().addEvent(
+ deviceState,
+ io->getMessage(),
+ VESPA_STRLOC);
+
+ _env->_mountPoints->writeToFile();
+
+ return C(spi::Result::FATAL_ERROR, error.str());
+ }
+ if (io->getType() == vespalib::IoException::CORRUPT_DATA
+ && canRepairBucket)
+ {
+ handleBucketCorruption(io->getFile());
+ }
+
+ return C(spi::Result::TRANSIENT_ERROR, error.str());
+ }
+ const CorruptMemFileException* ce(
+ dynamic_cast<const CorruptMemFileException*>(&e));
+ if (ce != 0) {
+ std::ostringstream error;
+ error << "Exception caught processing operation for "
+ << ce->getFile().getPath() << ": " << ce->getMessage();
+ if (canRepairBucket) {
+ handleBucketCorruption(ce->getFile());
+ }
+ return C(spi::Result::TRANSIENT_ERROR, error.str());
+ }
+
+ const TimestampExistException* ts =
+ dynamic_cast<const TimestampExistException*>(&e);
+ if (ts != NULL) {
+ return C(spi::Result::TIMESTAMP_EXISTS, ts->getMessage());
+ }
+
+ return C(spi::Result::PERMANENT_ERROR, e.what());
+}
+
+MemFilePersistenceProvider::MemFilePersistenceProvider(
+ framework::ComponentRegister& compReg,
+ const config::ConfigUri & configUri)
+ : framework::Component(compReg, "memfilepersistenceprovider"),
+ framework::StatusReporter("memfilepersistenceprovider",
+ "VDS Persistence Provider"),
+ _componentRegister(compReg),
+ _configUri(configUri),
+ _config(*config::ConfigGetter<vespa::config::storage::StorMemfilepersistenceConfig>::getConfig(configUri.getConfigId(),
+ configUri.getContext())),
+ _memFileMapper(*this),
+ _repo(0),
+ _metrics(*this),
+ _threadLocals(1024)
+{
+ registerMetric(_metrics);
+ registerStatusPage(*this);
+}
+
+MemFilePersistenceProvider::~MemFilePersistenceProvider()
+{
+}
+
+void
+MemFilePersistenceProvider::setDocumentRepo(const document::DocumentTypeRepo& repo)
+{
+ _repo = &repo;
+ if (_env.get()) {
+ _env->setRepo(_repo);
+ }
+}
+
+using MemFilePersistenceConfig
+ = vespa::config::storage::StorMemfilepersistenceConfig;
+using PersistenceConfig = vespa::config::content::PersistenceConfig;
+
+namespace {
+
+MemFileCache::MemoryUsage
+getCacheLimits(const MemFilePersistenceConfig& cfg)
+{
+ MemFileCache::MemoryUsage cacheLimits;
+ cacheLimits.metaSize = cfg.cacheSize * cfg.cacheSizeMetaPercentage / 100;
+ cacheLimits.headerSize = cfg.cacheSize * cfg.cacheSizeHeaderPercentage / 100;
+ cacheLimits.bodySize = cfg.cacheSize * cfg.cacheSizeBodyPercentage / 100;
+ return cacheLimits;
+}
+
+std::unique_ptr<Options>
+makeOptions(const MemFilePersistenceConfig& memFileCfg,
+ const PersistenceConfig& persistenceCfg)
+{
+ return std::unique_ptr<Options>(new Options(memFileCfg, persistenceCfg));
+}
+
+}
+
+void
+MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::storage::StorMemfilepersistenceConfig> cfg)
+{
+ assert(cfg.get() != nullptr);
+ auto guard = _env->acquireConfigWriteLock();
+
+ guard.setMemFilePersistenceConfig(std::move(cfg));
+
+ if (guard.hasPersistenceConfig()) {
+ guard.setOptions(makeOptions(*guard.memFilePersistenceConfig(),
+ *guard.persistenceConfig()));
+ }
+
+ // Data race free; acquires internal cache lock.
+ _cache->setCacheSize(getCacheLimits(*guard.memFilePersistenceConfig()));
+}
+
+void
+MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::content::PersistenceConfig> cfg)
+{
+ assert(cfg.get() != nullptr);
+ auto guard = _env->acquireConfigWriteLock();
+
+ guard.setPersistenceConfig(std::move(cfg));
+
+ if (guard.hasMemFilePersistenceConfig()) {
+ guard.setOptions(makeOptions(*guard.memFilePersistenceConfig(),
+ *guard.persistenceConfig()));
+ }
+}
+
+void
+MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::storage::StorDevicesConfig> cfg)
+{
+ assert(cfg.get() != nullptr);
+ auto guard = _env->acquireConfigWriteLock();
+ guard.setDevicesConfig(std::move(cfg));
+}
+
+spi::PartitionStateListResult
+MemFilePersistenceProvider::getPartitionStates() const
+{
+ // Lazily initialize to ensure service layer has set up enough for us
+ // to use all we need (memory manager for instance)
+ if (_env.get() == 0) {
+ assert(_repo != 0);
+ _cache.reset(new MemFileCache(_componentRegister,
+ _metrics._cache));
+ _cache->setCacheSize(getCacheLimits(_config));
+ try{
+ _env.reset(new Environment(
+ _configUri, *_cache, _memFileMapper, *_repo, getClock()));
+ } catch (NoDisksException& e) {
+ return spi::PartitionStateListResult(spi::PartitionStateList(
+ spi::PartitionId::Type(0)));
+ }
+ _fileScanner.reset(new FileScanner(
+ _componentRegister, *_env->_mountPoints,
+ _config.dirLevels, _config.dirSpread));
+ _util.reset(new OperationHandler(*_env));
+ _iteratorHandler.reset(new IteratorHandler(*_env));
+ _joinOperationHandler.reset(new JoinOperationHandler(*_env));
+ _splitOperationHandler.reset(new SplitOperationHandler(*_env));
+ }
+ return _env->_mountPoints->getPartitionStates();
+}
+
+spi::BucketIdListResult
+MemFilePersistenceProvider::listBuckets(spi::PartitionId partition) const
+{
+ spi::BucketIdListResult::List buckets;
+ _fileScanner->buildBucketList(buckets, partition, 0, 1);
+ return spi::BucketIdListResult(buckets);
+}
+
+spi::BucketIdListResult
+MemFilePersistenceProvider::getModifiedBuckets() const
+{
+ document::BucketId::List modified;
+ _env->swapModifiedBuckets(modified); // Atomic op
+ return spi::BucketIdListResult(modified);
+}
+
+spi::BucketInfoResult
+MemFilePersistenceProvider::getBucketInfo(const spi::Bucket& bucket) const
+{
+ LOG(spam, "getBucketInfo(%s)", bucket.toString().c_str());
+ try {
+ bool retainMemFile = hasCachedMemFile();
+ MemFileAccessGuard file(*this,
+ getMemFile(bucket, false),
+ "getBucketInfo",
+ retainMemFile ? REINSERT_AS_ACTIVE : 0);
+
+ spi::BucketInfo info = file->getBucketInfo();
+
+ file.dismiss();
+ return spi::BucketInfoResult(info);
+ } catch (std::exception& e) {
+ return handleException<spi::BucketInfoResult>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::put(const spi::Bucket& bucket, spi::Timestamp ts,
+ const document::Document::SP& doc,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "put");
+ LOG(spam, "put(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts),
+ doc->getId().toString().c_str());
+ try {
+ TRACE(context, 9, "put", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "put",
+ REINSERT_AS_ACTIVE);
+ TRACE(context, 9, "put", "Altering file in memory");
+ _util->write(*file, *doc, Timestamp(ts));
+
+ TRACE(context, 9, "put", "Dismissing file");
+ file.dismiss();
+ return spi::Result();
+ } catch (std::exception& e) {
+ return handleException<spi::Result>(e, true);
+ }
+}
+
+spi::RemoveResult
+MemFilePersistenceProvider::remove(const spi::Bucket& bucket, spi::Timestamp ts,
+ const DocumentId& id, spi::Context& context)
+{
+ TRACEGENERIC(context, "remove");
+ LOG(spam, "remove(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts),
+ id.toString().c_str());
+ try {
+ TRACE(context, 9, "remove", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "remove",
+ REINSERT_AS_ACTIVE);
+ TRACE(context, 9, "remove", "Altering file in memory");
+ spi::Timestamp oldTs(_util->remove(*file,
+ id, Timestamp(ts),
+ OperationHandler::ALWAYS_PERSIST_REMOVE).getTime());
+ TRACE(context, 9, "remove", "Dismissing file");
+ file.dismiss();
+ return spi::RemoveResult(oldTs > 0);
+ } catch (std::exception& e) {
+ return handleException<spi::RemoveResult>(e, true);
+ }
+}
+
+spi::RemoveResult
+MemFilePersistenceProvider::removeIfFound(const spi::Bucket& bucket,
+ spi::Timestamp ts,
+ const DocumentId& id,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "removeIfFound");
+ LOG(spam, "removeIfFound(%s, %zu, %s)", bucket.toString().c_str(),
+ uint64_t(ts), id.toString().c_str());
+ try {
+ TRACE(context, 9, "removeIfFound", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "removeiffound",
+ REINSERT_AS_ACTIVE);
+ TRACE(context, 9, "removeIfFound", "Altering file in memory");
+ spi::Timestamp oldTs(_util->remove(*file,
+ id, Timestamp(ts),
+ OperationHandler::PERSIST_REMOVE_IF_FOUND).getTime());
+ TRACE(context, 9, "removeIfFound", "Dismissing file");
+ file.dismiss();
+ return spi::RemoveResult(oldTs > 0);
+ } catch (std::exception& e) {
+ return handleException<spi::RemoveResult>(e, true);
+ }
+}
+
+spi::UpdateResult
+MemFilePersistenceProvider::MemFilePersistenceProvider::update(
+ const spi::Bucket& bucket, spi::Timestamp ts,
+ const document::DocumentUpdate::SP& upd, spi::Context& context)
+{
+ TRACEGENERIC(context, "update");
+ LOG(spam, "update(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts),
+ upd->getId().toString().c_str());
+ try {
+ TRACE(context, 9, "update", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "update",
+ REINSERT_AS_ACTIVE);
+ TRACE(context, 9, "update", "Reading old entry");
+ bool headerOnly = !upd->affectsDocumentBody();
+ OperationHandler::ReadResult ret = _util->read(
+ *file,
+ upd->getId(),
+ Timestamp(ts),
+ headerOnly ? HEADER_ONLY : ALL);
+
+ Document::UP doc = ret.getDoc();
+ if (!doc.get()) {
+ if (upd->getCreateIfNonExistent()) {
+ TRACE(context, 9, "update", "Doc did not exist, creating one");
+ doc.reset(new Document(upd->getType(), upd->getId()));
+ upd->applyTo(*doc);
+ _util->write(*file, *doc, Timestamp(ts));
+ file.dismiss();
+ return spi::UpdateResult(spi::Timestamp(ts));
+ } else {
+ TRACE(context, 9, "update", "Doc did not exist");
+ file.dismiss();
+ return spi::UpdateResult();
+ }
+ }
+
+ if (Timestamp(ts) == ret._ts) {
+ file.dismiss();
+ if (doc->getId() == upd->getId()) {
+ TRACE(context, 9, "update", "Timestamp exist same doc");
+ return spi::UpdateResult(spi::Result::TRANSIENT_ERROR,
+ "Update was already performed.");
+ } else {
+ // TODO: Assert-fail if we ever get here??
+ TRACE(context, 9, "update", "Timestamp exist other doc");
+ std::ostringstream error;
+ error << "Update of " << upd->getId()
+ << ": There already exists a document"
+ << " with timestamp " << ts;
+
+ return spi::UpdateResult(spi::Result::TIMESTAMP_EXISTS, error.str());
+ }
+ }
+
+ TRACE(context, 9, "update", "Altering file in memory");
+ upd->applyTo(*doc);
+ if (headerOnly) {
+ TRACE(context, 9, "update", "Writing new header entry");
+ _util->update(*file, *doc, Timestamp(ts), Timestamp(ret._ts));
+ } else {
+ TRACE(context, 9, "update", "Writing new doc entry");
+ _util->write(*file, *doc, Timestamp(ts));
+ }
+ if (headerOnly) {
+ ++getMetrics().headerOnlyUpdates;
+ }
+
+ TRACE(context, 9, "update", "Dismissing file");
+ file.dismiss();
+ return spi::UpdateResult(spi::Timestamp(ret._ts.getTime()));
+ } catch (std::exception& e) {
+ return handleException<spi::UpdateResult>(e, true);
+ }
+}
+
+spi::GetResult
+MemFilePersistenceProvider::get(const spi::Bucket& bucket,
+ const document::FieldSet& fieldSet,
+ const DocumentId& id,
+ spi::Context& context) const
+{
+ TRACEGENERIC(context, "get");
+ LOG(spam, "get(%s, %s)", bucket.toString().c_str(), id.toString().c_str());
+ try {
+ TRACE(context, 9, "get", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "get");
+ document::HeaderFields headerFields;
+ bool headerOnly = headerFields.contains(fieldSet);
+
+ TRACE(context, 9, "get", "Reading from file.");
+ OperationHandler::ReadResult ret =
+ _util->read(*file, id, Timestamp(0),
+ headerOnly ? HEADER_ONLY : ALL);
+
+ file.dismiss();
+ if (!ret._doc.get()) {
+ TRACE(context, 9, "get", "Doc not found");
+ return spi::GetResult();
+ }
+ if (headerOnly) {
+ TRACE(context, 9, "get", "Retrieved doc header only");
+ ++getMetrics().headerOnlyGets;
+ }
+ // Don't create unnecessary copy if we want the full doc or header
+ if (fieldSet.getType() == document::FieldSet::ALL
+ || fieldSet.getType() == document::FieldSet::HEADER)
+ {
+ TRACE(context, 9, "get", "Returning doc");
+ return spi::GetResult(ret.getDoc(), spi::Timestamp(ret._ts.getTime()));
+ } else {
+ TRACE(context, 9, "get", "Returning stripped doc");
+ document::FieldSet::stripFields(*ret._doc, fieldSet);
+ return spi::GetResult(ret.getDoc(), spi::Timestamp(ret._ts.getTime()));
+ }
+ } catch (std::exception& e) {
+ return handleException<spi::GetResult>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::flush(const spi::Bucket& bucket,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "flush");
+ LOG(spam, "flush(%s)", bucket.toString().c_str());
+ try {
+ TRACE(context, 9, "flush", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "flush");
+
+ LOG(spam, "Attempting to auto-flush %s",
+ file->getFile().toString().c_str());
+ TRACE(context, 9, "flush", "Flushing to disk");
+ file->flushToDisk();
+
+ TRACE(context, 9, "flush", "Dismissing file");
+ file.dismiss();
+ return spi::Result();
+ } catch (std::exception& e) {
+ return handleException<spi::Result>(e, true);
+ }
+}
+
+spi::CreateIteratorResult
+MemFilePersistenceProvider::createIterator(const spi::Bucket& b,
+ const document::FieldSet& fieldSet,
+ const spi::Selection& sel,
+ spi::IncludedVersions versions,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "createIterator");
+ LOG(spam, "createIterator(%s)", b.toString().c_str());
+ try {
+ clearActiveMemFile();
+ return _iteratorHandler->createIterator(b, fieldSet, sel, versions);
+ } catch (std::exception& e) {
+ return handleException<spi::CreateIteratorResult>(e, true);
+ }
+}
+
+spi::IterateResult
+MemFilePersistenceProvider::iterate(spi::IteratorId iterId,
+ uint64_t maxByteSize,
+ spi::Context& context) const
+{
+ TRACEGENERIC(context, "iterate");
+ try {
+ clearActiveMemFile(&context);
+ spi::IterateResult result(
+ _iteratorHandler->iterate(iterId, maxByteSize));
+ TRACE(context, 9, "iterate", "Done filling iterator");
+ return result;
+ } catch (std::exception& e) {
+ return handleException<spi::IterateResult>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::destroyIterator(spi::IteratorId iterId,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "destroyIterator");
+ try {
+ return _iteratorHandler->destroyIterator(iterId);
+ } catch (std::exception& e) {
+ return handleException<spi::IterateResult>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::deleteBucket(const spi::Bucket& bucket,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "deleteBucket");
+ LOG(spam, "deleteBucket(%s)", bucket.toString().c_str());
+ try {
+ TRACE(context, 9, "deleteBucket", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "deleteBucket");
+ TRACE(context, 9, "deleteBucket", "Deleting it");
+ file.getMemFilePtr().deleteFile();
+ // It is assumed guard will only kick in if deleteFile has failed
+ // _before_ it erases the bucket from the cache (since this should
+ // be a nothrow op). Otherwise, this will crash trying to deref a
+ // null ptr.
+ TRACE(context, 9, "deleteBucket", "Dismissing file");
+ file.dismiss();
+ return spi::Result();
+ } catch (std::exception& e) {
+ return handleException<spi::IterateResult>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::split(const spi::Bucket& source,
+ const spi::Bucket& target1,
+ const spi::Bucket& target2,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "split");
+ LOG(spam, "split(%s -> %s, %s)", source.toString().c_str(),
+ target1.toString().c_str(), target2.toString().c_str());
+ try {
+ clearActiveMemFile();
+ return _splitOperationHandler->split(source, target1, target2);
+ } catch (std::exception& e) {
+ return handleException<spi::Result>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::join(const spi::Bucket& source1,
+ const spi::Bucket& source2,
+ const spi::Bucket& target,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "join");
+ LOG(spam, "join(%s, %s -> %s)", source1.toString().c_str(),
+ source2.toString().c_str(), target.toString().c_str());
+ try {
+ clearActiveMemFile();
+ return _joinOperationHandler->join(source1, source2, target);
+ } catch (std::exception& e) {
+ return handleException<spi::Result>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::removeEntry(const spi::Bucket& bucket,
+ spi::Timestamp ts,
+ spi::Context& context)
+{
+ TRACEGENERIC(context, "removeEntry");
+ LOG(spam, "removeEntry(%s, %zu)", bucket.toString().c_str(), uint64_t(ts));
+ try {
+ TRACE(context, 9, "removeEntry", "Grabbing memfile");
+ MemFileAccessGuard file(*this, getMemFile(bucket), "revert",
+ REINSERT_AS_ACTIVE);
+ const MemSlot* slot = file->getSlotAtTime(Timestamp(ts));
+ if (slot) {
+ TRACE(context, 9, "removeEntry", "Removing slot");
+ file->removeSlot(*slot);
+ }
+
+ TRACE(context, 9, "removeEntry", "Dismissing file");
+ file.dismiss();
+ return spi::Result();
+ } catch (std::exception& e) {
+ return handleException<spi::Result>(e, true);
+ }
+}
+
+spi::Result
+MemFilePersistenceProvider::maintain(const spi::Bucket& bucket,
+ spi::MaintenanceLevel level)
+{
+ LOG(spam, "maintain(%s)", bucket.toString().c_str());
+ try {
+ MemFileAccessGuard file(*this, getMemFile(bucket, false), "maintain");
+ assert(!file->slotsAltered());
+ if (!file->fileExists()) {
+ LOG(debug,
+ "maintain(%s): file '%s' does not exist, nothing to maintain. "
+ "Assuming file was corrupted and auto-deleted.",
+ bucket.toString().c_str(),
+ file->getFile().getPath().c_str());
+ return spi::Result();
+ }
+
+ std::ostringstream report;
+ const uint32_t verifyFlags((level == spi::HIGH) ? 0 : DONT_VERIFY_BODY);
+ if (!file->repair(report, verifyFlags)) {
+ LOG(debug,
+ "repair() on %s indicated errors, evicting from cache to "
+ "force reload of file with altered metadata",
+ bucket.toString().c_str());
+ return spi::Result(); // No dismissal of guard; auto-evict.
+ }
+ assert(!file->slotsAltered());
+ file->compact();
+ file->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE);
+
+ file.dismiss();
+ return spi::Result();
+ } catch (std::exception& e) {
+ // Failing maintain() cannot cause an auto-repair since this will
+ // in turn call maintain().
+ return handleException<spi::Result>(e, false);
+ }
+}
+
+vespalib::string
+MemFilePersistenceProvider::getReportContentType(const framework::HttpUrlPath&) const
+{
+ return "text/html";
+}
+
+namespace {
+
+void
+printMemoryUsage(std::ostream& out,
+ const char* part,
+ uint64_t usage,
+ uint64_t total)
+{
+ out << "<li>" << part << ": " << usage;
+ if (total > 0) {
+ out << " (" << ((static_cast<double>(usage) / total) * 100.0) << "%)";
+ }
+ out << "</li>\n";
+}
+
+}
+
+bool
+MemFilePersistenceProvider::reportStatus(std::ostream& out,
+ const framework::HttpUrlPath& path) const
+{
+ framework::PartlyHtmlStatusReporter htmlReporter(*this);
+ htmlReporter.reportHtmlHeader(out, path);
+
+ out << "<h1>Mem file persistence provider status page</h1>\n";
+ bool printVerbose = path.hasAttribute("verbose");
+ if (!printVerbose) {
+ out << "<p><a href=\"memfilepersistenceprovider?verbose\">"
+ "More verbose</a></p>\n";
+ } else {
+ out << "<p><a href=\"memfilepersistenceprovider\">"
+ "Less verbose</a></p>\n";
+ }
+
+ MemFileCache::Statistics cacheStats(_env->_cache.getCacheStats());
+ const MemFileCache::MemoryUsage& memUsage(cacheStats._memoryUsage);
+ out << "<p>Cache with " << cacheStats._numEntries
+ << " entries using " << memUsage.sum()
+ << " of max " << cacheStats._cacheSize
+ << " bytes</p>\n";
+ out << "<ul>\n";
+ printMemoryUsage(out, "Meta", memUsage.metaSize, memUsage.sum());
+ printMemoryUsage(out, "Header", memUsage.headerSize, memUsage.sum());
+ printMemoryUsage(out, "Body", memUsage.bodySize, memUsage.sum());
+ out << "</ul>\n";
+ out << "</p>\n";
+
+ if (printVerbose) {
+ _env->_cache.printCacheEntriesHtml(out);
+ }
+
+ htmlReporter.reportHtmlFooter(out, path);
+
+ return true;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h
new file mode 100644
index 00000000000..69edb680d5c
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h
@@ -0,0 +1,164 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/init/filescanner.h>
+#include <vespa/persistence/spi/abstractpersistenceprovider.h>
+#include <vespa/storageframework/storageframework.h>
+#include <vespa/memfilepersistence/spi/operationhandler.h>
+#include <vespa/memfilepersistence/spi/iteratorhandler.h>
+#include <vespa/memfilepersistence/spi/joinoperationhandler.h>
+#include <vespa/memfilepersistence/spi/splitoperationhandler.h>
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h>
+#include <vespa/memfilepersistence/spi/threadmetricprovider.h>
+#include <vespa/storageframework/generic/status/httpurlpath.h>
+#include <vespa/memfilepersistence/spi/threadlocals.h>
+#include <vespa/config/config.h>
+
+namespace storage {
+
+namespace memfile {
+
+class ThreadContext {
+public:
+ MemFilePtr _memFile;
+ MemFilePersistenceThreadMetrics* _metrics;
+
+ ThreadContext()
+ : _metrics(NULL)
+ {}
+};
+
+class MemFilePersistenceProvider : public spi::AbstractPersistenceProvider,
+ public framework::Component,
+ public Types,
+ public framework::StatusReporter,
+ public ThreadMetricProvider
+{
+public:
+ typedef std::unique_ptr<MemFilePersistenceProvider> UP;
+
+ MemFilePersistenceProvider(
+ framework::ComponentRegister& reg,
+ const config::ConfigUri & configUri);
+
+ ~MemFilePersistenceProvider();
+
+ spi::PartitionStateListResult getPartitionStates() const;
+
+ spi::BucketIdListResult listBuckets(spi::PartitionId) const;
+
+ spi::BucketIdListResult getModifiedBuckets() const;
+
+ spi::BucketInfoResult getBucketInfo(const spi::Bucket&) const;
+
+ spi::Result put(const spi::Bucket&, spi::Timestamp,
+ const document::Document::SP&, spi::Context&);
+
+ spi::RemoveResult remove(const spi::Bucket&, spi::Timestamp,
+ const DocumentId&, spi::Context&);
+
+ spi::RemoveResult removeIfFound(const spi::Bucket&, spi::Timestamp,
+ const DocumentId&, spi::Context&);
+
+ spi::UpdateResult update(const spi::Bucket&, spi::Timestamp,
+ const document::DocumentUpdate::SP&, spi::Context&);
+
+ spi::GetResult get(const spi::Bucket&, const document::FieldSet&,
+ const spi::DocumentId&, spi::Context&) const;
+
+ spi::Result flush(const spi::Bucket&, spi::Context&);
+
+ spi::CreateIteratorResult createIterator(const spi::Bucket&,
+ const document::FieldSet&,
+ const spi::Selection&,
+ spi::IncludedVersions versions,
+ spi::Context&);
+
+ spi::IterateResult iterate(spi::IteratorId,
+ uint64_t maxByteSize, spi::Context&) const;
+
+ spi::Result destroyIterator(spi::IteratorId, spi::Context&);
+
+ spi::Result deleteBucket(const spi::Bucket&, spi::Context&);
+
+ spi::Result split(const spi::Bucket& source,
+ const spi::Bucket& target1,
+ const spi::Bucket& target2,
+ spi::Context&);
+
+ spi::Result join(const spi::Bucket& source1,
+ const spi::Bucket& source2,
+ const spi::Bucket& target,
+ spi::Context&);
+
+ spi::Result removeEntry(const spi::Bucket&,
+ spi::Timestamp, spi::Context&);
+
+ spi::Result maintain(const spi::Bucket&,
+ spi::MaintenanceLevel level);
+
+ Environment& getEnvironment() {
+ return *_env;
+ }
+
+ virtual vespalib::string getReportContentType(
+ const framework::HttpUrlPath&) const;
+ virtual bool reportStatus(std::ostream&,
+ const framework::HttpUrlPath&) const;
+
+ /**
+ Used by unit tests.
+ */
+ void clearActiveMemFile(spi::Context* = 0) const;
+ const IteratorHandler& getIteratorHandler() const { return *_iteratorHandler; }
+
+ MemFilePersistenceThreadMetrics& getMetrics() const;
+
+ void setDocumentRepo(const document::DocumentTypeRepo& repo);
+ void setConfig(std::unique_ptr<vespa::config::storage::StorMemfilepersistenceConfig> config);
+ void setConfig(std::unique_ptr<vespa::config::content::PersistenceConfig> config);
+ void setConfig(std::unique_ptr<vespa::config::storage::StorDevicesConfig> config);
+private:
+ framework::ComponentRegister& _componentRegister;
+
+ config::ConfigUri _configUri;
+ vespa::config::storage::StorMemfilepersistenceConfig _config;
+ mutable MemFileMapper _memFileMapper;
+
+ const document::DocumentTypeRepo* _repo;
+ mutable MemFileCache::UP _cache;
+ mutable Environment::UP _env;
+ mutable FileScanner::UP _fileScanner;
+ mutable OperationHandler::UP _util;
+ mutable IteratorHandler::UP _iteratorHandler;
+ mutable JoinOperationHandler::UP _joinOperationHandler;
+ mutable SplitOperationHandler::UP _splitOperationHandler;
+ mutable MemFilePersistenceMetrics _metrics;
+
+ mutable ThreadLocals<ThreadContext> _threadLocals;
+
+ std::pair<spi::Result::ErrorType, vespalib::string>
+ getErrorFromException(const std::exception& e);
+
+ MemFilePtr getMemFile(const spi::Bucket& b, bool keepInCache = true) const;
+ void setActiveMemFile(MemFilePtr ptr, const char* user) const;
+ bool hasCachedMemFile() const;
+
+ template<typename C> C handleException(const std::exception& e,
+ bool canRepairBucket) const;
+
+ void handleBucketCorruption(const FileSpecification& file) const;
+
+ //void addBucketToNotifySet(const MemFile& file) const;
+
+ MemFilePtr& getThreadLocalMemFile() const;
+
+ friend class MemFileAccessGuard;
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h
new file mode 100644
index 00000000000..70c711e81fd
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h
@@ -0,0 +1,103 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/metrics/metrics.h>
+#include <vespa/memfilepersistence/mapper/serializationmetrics.h>
+
+namespace storage {
+namespace memfile {
+
+class MemFilePersistenceThreadMetrics : public metrics::MetricSet
+{
+public:
+ metrics::LongCountMetric headerOnlyGets;
+ metrics::LongCountMetric headerOnlyUpdates;
+ SerializationMetrics serialization;
+
+ MemFilePersistenceThreadMetrics(const std::string& name, metrics::MetricSet& owner)
+ : metrics::MetricSet(name, "partofsum thread",
+ "Metrics for a worker thread using memfile persistence "
+ "provider", &owner),
+ headerOnlyGets("headeronlygets", "",
+ "Number of gets that only read header", this),
+ headerOnlyUpdates("headeronlyupdates", "",
+ "Number of updates that only wrote header", this),
+ serialization("serialization", this)
+ {
+ }
+};
+
+class MemFilePersistenceCacheMetrics : public metrics::MetricSet
+{
+public:
+ metrics::LongValueMetric files;
+ metrics::LongValueMetric meta;
+ metrics::LongValueMetric header;
+ metrics::LongValueMetric body;
+ metrics::LongCountMetric hits;
+ metrics::LongCountMetric misses;
+ metrics::LongCountMetric meta_evictions;
+ metrics::LongCountMetric header_evictions;
+ metrics::LongCountMetric body_evictions;
+
+ MemFilePersistenceCacheMetrics(metrics::MetricSet& owner)
+ : metrics::MetricSet("cache", "",
+ "Metrics for the VDS persistence cache", &owner),
+ files("files", "", "Number of files cached", this),
+ meta("meta", "", "Bytes of file metadata cached", this),
+ header("header", "", "Bytes of file header parts cached", this),
+ body("body", "", "Bytes of file body parts cached", this),
+ hits("hits", "", "Number of times a bucket was attempted fetched "
+ "from the cache and it was already present", this),
+ misses("misses", "", "Number of times a bucket was attempted fetched "
+ "from the cache and it could not be found, requiring a load", this),
+ meta_evictions("meta_evictions", "", "Bucket meta data evictions", this),
+ header_evictions("header_evictions", "", "Bucket header (and "
+ "implicitly body, if present) data evictions", this),
+ body_evictions("body_evictions", "", "Bucket body data evictions", this)
+ {}
+};
+
+class MemFilePersistenceMetrics : public metrics::MetricSet
+{
+ framework::Component& _component;
+
+public:
+ vespalib::Lock _threadMetricsLock;
+ std::list<vespalib::LinkedPtr<MemFilePersistenceThreadMetrics> > _threadMetrics;
+
+ std::unique_ptr<metrics::SumMetric<MemFilePersistenceThreadMetrics> > _sumMetric;
+ MemFilePersistenceCacheMetrics _cache;
+
+ MemFilePersistenceMetrics(framework::Component& component)
+ : metrics::MetricSet("memfilepersistence", "",
+ "Metrics for the VDS persistence layer"),
+ _component(component),
+ _cache(*this)
+ {
+ }
+
+ MemFilePersistenceThreadMetrics* addThreadMetrics() {
+ metrics::MetricLockGuard metricLock(_component.getMetricManagerLock());
+ vespalib::LockGuard guard(_threadMetricsLock);
+
+ if (!_sumMetric.get()) {
+ _sumMetric.reset(new metrics::SumMetric<MemFilePersistenceThreadMetrics>
+ ("allthreads", "sum", "", this));
+ }
+
+ std::string name = vespalib::make_string("thread_%zu", _threadMetrics.size());
+
+ MemFilePersistenceThreadMetrics* metrics =
+ new MemFilePersistenceThreadMetrics(name, *this);
+
+ _threadMetrics.push_back(vespalib::LinkedPtr<MemFilePersistenceThreadMetrics>(
+ metrics));
+ _sumMetric->addMetricToSum(*metrics);
+ return metrics;
+ }
+};
+
+}
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp
new file mode 100644
index 00000000000..349663fb7c1
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp
@@ -0,0 +1,287 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/spi/operationhandler.h>
+
+#include <vespa/log/log.h>
+
+LOG_SETUP(".persistence.memfile.handler.operation");
+
+namespace storage {
+namespace memfile {
+
+OperationHandler::OperationHandler(Environment& env)
+ : _env(env)
+{
+}
+
+OperationHandler::ReadResult
+OperationHandler::read(MemFile& file, const DocumentId& id,
+ Timestamp maxTimestamp, GetFlag getFlags) const
+{
+ if (maxTimestamp == Timestamp(0)) {
+ maxTimestamp = MAX_TIMESTAMP;
+ }
+ const MemSlot* slot(file.getSlotWithId(id, maxTimestamp));
+ if (slot == 0 || slot->deleted()) {
+ return ReadResult(Document::UP(), Timestamp(0));
+ }
+ return ReadResult(file.getDocument(*slot, getFlags), slot->getTimestamp());
+}
+
+OperationHandler::ReadResult
+OperationHandler::read(MemFile& file, Timestamp timestamp,
+ GetFlag getFlags) const
+{
+ const MemSlot* slot(file.getSlotAtTime(timestamp));
+ if (slot == 0 || slot->deleted()) {
+ return ReadResult(Document::UP(), Timestamp(0));
+ }
+
+ return ReadResult(file.getDocument(*slot, getFlags), slot->getTimestamp());
+}
+
+Types::Timestamp
+OperationHandler::remove(MemFile& file,
+ const DocumentId& id,
+ Timestamp timestamp,
+ RemoveType persistRemove)
+{
+ LOG(debug, "remove(%s, %s, %zu, %s)",
+ file.getFile().getPath().c_str(),
+ id.toString().c_str(),
+ timestamp.getTime(),
+ persistRemove ? "always persist" : "persist only if put is found");
+
+ const MemSlot* slotAtTime(file.getSlotAtTime(timestamp));
+ if (slotAtTime) {
+ if (slotAtTime->deleted()) {
+ LOG(spam,
+ "Slot %s already existed at timestamp %zu but was already "
+ "deleted; not doing anything",
+ slotAtTime->toString().c_str(),
+ timestamp.getTime());
+ return Timestamp(0);
+ }
+ LOG(spam,
+ "Slot %s already existed at timestamp %zu, delegating to "
+ "unrevertableRemove",
+ slotAtTime->toString().c_str(),
+ timestamp.getTime());
+ return unrevertableRemove(file, id, timestamp);
+ }
+
+ const MemSlot* slot(file.getSlotWithId(id));
+
+ if (slot == 0 || slot->getTimestamp() > timestamp) {
+ LOG(spam, "No slot existed, or timestamp was higher");
+
+ if (persistRemove == ALWAYS_PERSIST_REMOVE) {
+ file.addRemoveSlotForNonExistingEntry(
+ id, timestamp, MemFile::REGULAR_REMOVE);
+ }
+ return Timestamp(0);
+ }
+
+ if (slot->deleted()) {
+ LOG(spam, "Document %s was already deleted.",
+ id.toString().c_str());
+
+ if (persistRemove == ALWAYS_PERSIST_REMOVE) {
+ file.addRemoveSlot(*slot, timestamp);
+ }
+
+ return Timestamp(0);
+ }
+
+ Timestamp oldTs(slot->getTimestamp());
+ file.addRemoveSlot(*slot, timestamp);
+ return oldTs;
+}
+
+Types::Timestamp
+OperationHandler::unrevertableRemove(MemFile& file,
+ const DocumentId& id,
+ Timestamp timestamp)
+{
+ LOG(debug, "unrevertableRemove(%s, %s, %zu)",
+ file.getFile().getPath().c_str(),
+ id.toString().c_str(),
+ timestamp.getTime());
+
+ const MemSlot* slot(file.getSlotAtTime(timestamp));
+ if (slot == 0) {
+ file.addRemoveSlotForNonExistingEntry(
+ id, timestamp, MemFile::UNREVERTABLE_REMOVE);
+ return Timestamp(0);
+ }
+ if (slot->getGlobalId() != id.getGlobalId()) {
+ // Should Not Happen(tm) case: given timestamp+document id does not
+ // match the document ID stored on file for the timestamp. In this
+ // case we throw out the old slot and insert a new unrevertable remove
+ // slot with the new document ID.
+ LOG(error, "Unrevertable remove for timestamp %zu with document id %s "
+ "does not match the document id %s of the slot stored at this "
+ "timestamp! Existing slot: %s. Removing old slot to get in sync.",
+ timestamp.getTime(),
+ id.toString().c_str(),
+ file.getDocumentId(*slot).toString().c_str(),
+ slot->toString().c_str());
+ file.removeSlot(*slot);
+ file.addRemoveSlotForNonExistingEntry(
+ id, timestamp, MemFile::UNREVERTABLE_REMOVE);
+ return timestamp;
+ }
+
+ MemSlot newSlot(*slot);
+ newSlot.turnToUnrevertableRemove();
+ file.modifySlot(newSlot);
+ return timestamp;
+}
+
+void
+OperationHandler::write(MemFile& file, const Document& doc, Timestamp time)
+{
+ const MemSlot* slot(file.getSlotAtTime(time));
+ if (slot != 0) {
+ if (doc.getId().getGlobalId() == slot->getGlobalId() &&
+ !slot->deleted())
+ {
+ LOG(debug, "Tried to put already existing document %s at time "
+ "%zu into file %s. Probably sent here by merge from other "
+ "copy. Flagging put ok and doing nothing.",
+ doc.getId().toString().c_str(),
+ time.getTime(),
+ file.getFile().getPath().c_str());
+ return;
+ } else {
+ std::ostringstream ost;
+ ost << "Failed adding document " << doc.getId().toString()
+ << " to slotfile '" << file.getFile().getPath()
+ << "'. Entry " << *slot << " already exists at that timestamp";
+ LOG(warning, "%s", ost.str().c_str());
+ throw TimestampExistException(
+ ost.str(), file.getFile(), time, VESPA_STRLOC);
+ }
+ }
+
+ file.addPutSlot(doc, time);
+}
+
+bool
+OperationHandler::update(MemFile& file, const Document& header,
+ Timestamp newTime, Timestamp existingTime)
+{
+ const MemSlot* slot;
+ if (existingTime == Timestamp(0)) {
+ slot = file.getSlotWithId(header.getId());
+ } else {
+ slot = file.getSlotAtTime(existingTime);
+ if (slot == NULL) {
+ return false;
+ }
+
+ DocumentId docId = file.getDocumentId(*slot);
+ if (docId != header.getId()) {
+ std::ostringstream ost;
+ ost << "Attempted update of doc " << header.getId() << " with "
+ << "timestamp " << existingTime << " failed as non-matching "
+ << "doc " << docId << " existed at timestamp.";
+ throw MemFileIoException(ost.str(), file.getFile(),
+ MemFileIoException::INTERNAL_FAILURE, VESPA_STRLOC);
+ }
+ }
+ if (slot == 0 || slot->deleted()) return false;
+
+ file.addUpdateSlot(header, *slot, newTime);
+ return true;
+}
+
+std::vector<Types::Timestamp>
+OperationHandler::select(MemFile& file,
+ SlotMatcher& checker,
+ uint32_t iteratorFlags,
+ Timestamp fromTimestamp,
+ Timestamp toTimestamp)
+{
+ verifyLegalFlags(iteratorFlags, LEGAL_ITERATOR_FLAGS, "select");
+ checker.preload(file);
+ std::vector<Timestamp> result;
+ result.reserve(file.getSlotCount());
+ for (MemFile::const_iterator it = file.begin(iteratorFlags,
+ fromTimestamp,
+ toTimestamp);
+ it != file.end(); ++it)
+ {
+ if (checker.match(SlotMatcher::Slot(*it, file))) {
+ result.push_back(it->getTimestamp());
+ }
+ }
+ reverse(result.begin(), result.end());
+ return result;
+}
+
+void
+OperationHandler::verifyBucketMapping(const DocumentId& id,
+ const BucketId& bucket) const
+{
+ BucketId docBucket(_env._bucketFactory.getBucketId(id));
+ docBucket.setUsedBits(bucket.getUsedBits());
+ if (bucket != docBucket) {
+ docBucket = _env._bucketFactory.getBucketId(id);
+ throw vespalib::IllegalStateException("Document " + id.toString()
+ + " (bucket " + docBucket.toString() + ") does not belong in "
+ + "bucket " + bucket.toString() + ".", VESPA_STRLOC);
+ }
+}
+
+MemFilePtr
+OperationHandler::getMemFile(const spi::Bucket& b, bool keepInCache)
+{
+ return getMemFile(b.getBucketId(), b.getPartition(), keepInCache);
+}
+
+MemFilePtr
+OperationHandler::getMemFile(const document::BucketId& id, Directory& dir,
+ bool keepInCache) {
+ return _env._cache.get(id, _env, dir, keepInCache);
+}
+
+MemFilePtr
+OperationHandler::getMemFile(const document::BucketId& id, uint16_t diskIndex,
+ bool keepInCache)
+{
+ return getMemFile(id, _env.getDirectory(diskIndex), keepInCache);
+}
+
+document::FieldSet::UP
+OperationHandler::parseFieldSet(const std::string& fieldSet)
+{
+ document::FieldSetRepo fsr;
+ return fsr.parse(_env.repo(), fieldSet);
+}
+
+std::unique_ptr<document::select::Node>
+OperationHandler::parseDocumentSelection(
+ const std::string& documentSelection, bool allowLeaf)
+{
+ std::unique_ptr<document::select::Node> ret;
+ try {
+ document::select::Parser parser(
+ _env.repo(), _env._bucketFactory);
+ ret = parser.parse(documentSelection);
+ } catch (document::select::ParsingFailedException& e) {
+ LOG(debug, "Failed to parse document selection '%s': %s",
+ documentSelection.c_str(), e.getMessage().c_str());
+ return std::unique_ptr<document::select::Node>();
+ }
+ if (ret->isLeafNode() && !allowLeaf) {
+ LOG(debug, "Document selection results in a single leaf node: '%s'",
+ documentSelection.c_str());
+ return std::unique_ptr<document::select::Node>();
+ }
+ return ret;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h
new file mode 100644
index 00000000000..9a2700c4209
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::OperationHandler
+ * \ingroup memfile
+ *
+ * \brief Super class for operation handlers.
+ *
+ * The operation handler superclass provides common functionality needed to
+ * operation handlers.
+ */
+#pragma once
+
+#include <boost/utility.hpp>
+#include <vespa/memfilepersistence/common/exceptions.h>
+#include <vespa/memfilepersistence/common/filespecification.h>
+#include <vespa/memfilepersistence/common/types.h>
+#include <vespa/memfilepersistence/memfile/memfile.h>
+#include <vespa/memfilepersistence/memfile/memfilecache.h>
+#include <vespa/memfilepersistence/memfile/memfileptr.h>
+#include <vespa/memfilepersistence/common/slotmatcher.h>
+#include <vespa/persistence/spi/bucketinfo.h>
+#include <vespa/memfilepersistence/common/environment.h>
+#include <vespa/document/fieldset/fieldsetrepo.h>
+
+namespace storage {
+namespace memfile {
+
+class OperationHandler : protected Types,
+ private boost::noncopyable
+{
+protected:
+ Environment& _env;
+
+public:
+ typedef std::unique_ptr<OperationHandler> UP;
+
+ OperationHandler(Environment&);
+ virtual ~OperationHandler() {}
+
+ struct ReadResult : private Types {
+ ReadResult(Document::UP doc,
+ Timestamp ts)
+ : _doc(std::move(doc)),
+ _ts(ts) {};
+
+ ReadResult(ReadResult&& other)
+ : _doc(std::move(other._doc)),
+ _ts(other._ts) {};
+
+ Document::UP _doc;
+ Timestamp _ts;
+
+ Document::UP getDoc() { return std::move(_doc); }
+ };
+
+ ReadResult read(MemFile&,
+ const DocumentId&,
+ Timestamp maxTimestamp,
+ GetFlag getFlags) const;
+
+ ReadResult read(MemFile&, Timestamp timestamp, GetFlag getFlags) const;
+
+ enum RemoveType
+ {
+ ALWAYS_PERSIST_REMOVE,
+ PERSIST_REMOVE_IF_FOUND
+ };
+
+ Types::Timestamp remove(MemFile&,
+ const DocumentId&,
+ Timestamp,
+ RemoveType);
+
+ Types::Timestamp unrevertableRemove(MemFile&,
+ const DocumentId&,
+ Timestamp);
+
+ void write(MemFile&, const Document& doc, Timestamp);
+
+ bool update(MemFile&,
+ const Document& headerToOverwrite,
+ Timestamp newTime,
+ Timestamp existingTime = Timestamp(0));
+
+ /**
+ * Get the slots matching a given matcher.
+ *
+ * @return The timestamps of the matching slots, ordered in rising
+ * timestamp order.
+ */
+ std::vector<Timestamp> select(MemFile&, SlotMatcher&,
+ uint32_t iteratorFlags,
+ Timestamp fromTimestamp = Timestamp(0),
+ Timestamp toTimestamp = Timestamp(0));
+
+ /** Verify that a document id belongs to a given bucket. */
+ void verifyBucketMapping(const DocumentId&, const BucketId&) const;
+
+ MemFilePtr getMemFile(const spi::Bucket& b, bool keepInCache = true);
+
+ MemFilePtr getMemFile(const document::BucketId& id, Directory& dir,
+ bool keepInCache = true);
+
+ MemFilePtr getMemFile(const document::BucketId& id, uint16_t disk,
+ bool keepInCache = true);
+
+ document::FieldSet::UP parseFieldSet(const std::string& fieldSet);
+
+ std::unique_ptr<document::select::Node>
+ parseDocumentSelection(const std::string& documentSelection,
+ bool allowLeaf);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp
new file mode 100644
index 00000000000..34d86259832
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/spi/splitoperationhandler.h>
+#include <vespa/memfilepersistence/spi/cacheevictionguard.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".persistence.memfile.handler.split");
+
+namespace storage {
+namespace memfile {
+
+SplitOperationHandler::SplitOperationHandler(Environment& env)
+ : OperationHandler(env)
+{
+}
+
+namespace {
+
+struct BucketMatcher : public SlotMatcher {
+ const document::BucketIdFactory& _factory;
+ document::BucketId _bid;
+
+ BucketMatcher(const document::BucketIdFactory& factory, const document::BucketId& bid)
+ : SlotMatcher(PRELOAD_HEADER),
+ _factory(factory),
+ _bid(bid) {}
+
+ virtual bool match(const Slot& slot) {
+ document::DocumentId id(slot.getDocumentId());
+ document::BucketId bucket = _factory.getBucketId(id);
+ bucket.setUsedBits(_bid.getUsedBits());
+
+ if (bucket.stripUnused() == _bid.stripUnused()) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+};
+
+}
+
+void
+SplitOperationHandler::copyTimestamps(
+ const MemFile& source,
+ MemFile& target,
+ const std::vector<Timestamp>& timestamps)
+{
+ std::vector<const MemSlot*> slotsToCopy;
+ slotsToCopy.reserve(timestamps.size());
+ for (uint32_t i = 0; i < timestamps.size(); i++) {
+ const MemSlot* slot = source.getSlotAtTime(timestamps[i]);
+
+ if (!target.getSlotAtTime(timestamps[i])) {
+ slotsToCopy.push_back(slot);
+ }
+ }
+ target.copySlotsFrom(source, slotsToCopy);
+}
+
+uint32_t
+SplitOperationHandler::splitIntoFile(MemFile& source,
+ const spi::Bucket& target)
+{
+ BucketMatcher matcher(_env._bucketFactory, target.getBucketId());
+
+ std::vector<Timestamp> ts = select(source, matcher, ITERATE_REMOVED);
+
+ MemFileCacheEvictionGuard targetFile(getMemFile(target, false));
+
+ LOG(debug,
+ "Found %" PRIu64 " slots to move from file %s to file %s",
+ ts.size(),
+ source.getFile().toString().c_str(),
+ targetFile->getFile().toString().c_str());
+
+ copyTimestamps(source, *targetFile, ts);
+
+ targetFile->flushToDisk();
+ targetFile.unguard();
+ return ts.size();
+}
+
+spi::Result
+SplitOperationHandler::split(const spi::Bucket& source,
+ const spi::Bucket& target1,
+ const spi::Bucket& target2)
+{
+ MemFileCacheEvictionGuard file(getMemFile(source, false));
+ file->ensureBodyBlockCached();
+
+ uint32_t totalDocsMoved = 0;
+ totalDocsMoved += splitIntoFile(*file, target1);
+ if (target2.getBucketId().getRawId() != 0) {
+ totalDocsMoved += splitIntoFile(*file, target2);
+ }
+ if (file->getBucketInfo().getEntryCount() != totalDocsMoved) {
+ LOG(error, "Split(%s) code moved only %u of %u entries out of source "
+ "file.",
+ source.getBucketId().toString().c_str(),
+ totalDocsMoved, file->getBucketInfo().getEntryCount());
+ assert(false);
+ }
+ file.get().deleteFile();
+ file.unguard();
+ return spi::Result();
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h
new file mode 100644
index 00000000000..5bc1376d55f
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * \class storage::memfile::SplitHandler
+ * \ingroup memfile
+ *
+ * \brief Class used to do basic operations to memfiles.
+ */
+#pragma once
+
+#include <vespa/memfilepersistence/spi/operationhandler.h>
+#include <vespa/persistence/spi/persistenceprovider.h>
+
+namespace storage {
+
+namespace memfile {
+
+class SplitOperationHandler : public OperationHandler {
+public:
+ typedef std::unique_ptr<SplitOperationHandler> UP;
+
+ SplitOperationHandler(Environment&);
+
+ spi::Result split(const spi::Bucket& source,
+ const spi::Bucket& target1,
+ const spi::Bucket& target2);
+
+private:
+ /**
+ * Copies the slots designated by the given list of timestamps from one mem
+ * file to another. If the target already has a slot at any of the given
+ * timestamps, those timestamps aren't copied.
+ */
+ void copyTimestamps(const MemFile& source, MemFile& target,
+ const std::vector<Timestamp>& timestamps);
+
+ uint32_t splitIntoFile(MemFile& source, const spi::Bucket& target);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp
new file mode 100644
index 00000000000..b6c71165f28
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/spi/threadlocals.h>
+
+namespace storage {
+
+namespace memfile {
+
+vespalib::Lock ThreadStatic::_threadLock;
+uint16_t ThreadStatic::_nextThreadIdx = 0;
+__thread int ThreadStatic::_threadIdx = -1;
+
+void ThreadStatic::initThreadIndex()
+{
+ if (_threadIdx == -1) {
+ vespalib::LockGuard guard(_threadLock);
+ _threadIdx = _nextThreadIdx;
+ ++_nextThreadIdx;
+ }
+}
+
+}
+
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h
new file mode 100644
index 00000000000..518eec7f909
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/util/sync.h>
+
+namespace storage {
+
+namespace memfile {
+
+class ThreadStatic {
+public:
+ static vespalib::Lock _threadLock;
+ static uint16_t _nextThreadIdx;
+ static __thread int _threadIdx;
+
+ void initThreadIndex();
+};
+
+/**
+ * This class takes ownership of a set of thread local
+ * variables. The maximum number of unique threads the
+ * class can use must be predetermined on construction.
+ */
+template<typename T>
+class ThreadLocals : public ThreadStatic {
+ static const size_t CACHE_LINE_SIZE = 64; // Architectural assumption.
+ struct CacheLinePaddedValue
+ {
+ T _data;
+ private:
+ // Ensure addressing the data of one entry does not touch the cache
+ // line of any following entries. Could make this an exact fit, but
+ // not very important since there are very few TLS entries in total.
+ char _padding[CACHE_LINE_SIZE];
+ };
+public:
+ mutable std::vector<CacheLinePaddedValue> _contexts;
+
+ ThreadLocals(uint32_t maxThreadCount)
+ : _contexts(maxThreadCount)
+ {
+ }
+
+ T& get() {
+ initThreadIndex();
+ assert(_threadIdx < (int)_contexts.size());
+ return _contexts[_threadIdx]._data;
+ }
+};
+
+}
+
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h b/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h
new file mode 100644
index 00000000000..5ffe259028c
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h
@@ -0,0 +1,18 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+namespace storage {
+namespace memfile {
+
+class MemFilePersistenceThreadMetrics;
+
+class ThreadMetricProvider
+{
+public:
+ virtual ~ThreadMetricProvider() {}
+
+ virtual MemFilePersistenceThreadMetrics& getMetrics() const = 0;
+};
+
+}
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp
new file mode 100644
index 00000000000..37779feb60f
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/memfilepersistence/spi/visitorslotmatcher.h>
+#include <vespa/document/select/bodyfielddetector.h>
+
+namespace storage {
+namespace memfile {
+
+namespace {
+
+SlotMatcher::PreloadFlag
+getCacheRequirements(const document::select::Node* selection,
+ const document::DocumentTypeRepo& repo) {
+ if (!selection) {
+ return SlotMatcher::PRELOAD_META_DATA_ONLY;
+ }
+
+ document::select::BodyFieldDetector detector(repo);
+ selection->visit(detector);
+
+ if (detector.foundBodyField) {
+ return SlotMatcher::PRELOAD_BODY;
+ } else {
+ return SlotMatcher::PRELOAD_HEADER;
+ }
+}
+
+bool needDocument(const document::select::Node* selection)
+{
+ if (selection) {
+ document::select::NeedDocumentDetector detector;
+ selection->visit(detector);
+ return detector.needDocument();
+ } else {
+ return false;
+ }
+}
+
+} // namespace
+
+VisitorSlotMatcher::VisitorSlotMatcher(
+ const document::DocumentTypeRepo& repo,
+ const document::select::Node* selection)
+ : SlotMatcher(getCacheRequirements(selection, repo)),
+ _selection(selection),
+ _needDocument(needDocument(selection))
+{
+};
+
+bool
+VisitorSlotMatcher::match(const Slot& slot) {
+ if (_selection) {
+ if (!slot.isRemove() && _needDocument) {
+ document::Document::UP doc(
+ slot.getDocument(!(_preload == PRELOAD_BODY)));
+ return (_selection->contains(*doc)
+ == document::select::Result::True);
+ } else {
+ document::DocumentId docId(slot.getDocumentId());
+ return (_selection->contains(docId)
+ == document::select::Result::True);
+ }
+ }
+
+ return true;
+}
+
+}
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h
new file mode 100644
index 00000000000..9b1412da4b7
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/memfilepersistence/common/slotmatcher.h>
+
+namespace storage {
+namespace memfile {
+
+class VisitorSlotMatcher : public SlotMatcher
+{
+private:
+ const document::select::Node* _selection;
+ bool _needDocument;
+
+public:
+ VisitorSlotMatcher(const document::DocumentTypeRepo& repo,
+ const document::select::Node* selection);
+
+ virtual bool match(const Slot& slot);
+
+};
+
+}
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore
new file mode 100644
index 00000000000..c7687cb62d2
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore
@@ -0,0 +1,5 @@
+/.depend
+/Makefile
+/dumpslotfile
+/vdsdisktool
+vdsdisktool-bin
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt
new file mode 100644
index 00000000000..cc8ad0a2320
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(memfilepersistence_tools OBJECT
+ SOURCES
+ dumpslotfile.cpp
+ vdsdisktool.cpp
+ DEPENDS
+)
+vespa_add_executable(memfilepersistence_dumpslotfile_app
+ SOURCES
+ dumpslotfileapp.cpp
+ OUTPUT_NAME dumpslotfile
+ INSTALL bin
+ DEPENDS
+ memfilepersistence
+)
+vespa_add_executable(memfilepersistence_vdsdisktool_app
+ SOURCES
+ vdsdiskapp.cpp
+ OUTPUT_NAME vdsdisktool-bin
+ INSTALL bin
+ DEPENDS
+ memfilepersistence
+)
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp
new file mode 100644
index 00000000000..a44aa81d3ab
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp
@@ -0,0 +1,361 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/config/helper/configgetter.h>
+#include <vespa/document/config/config-documenttypes.h>
+#include <vespa/document/repo/documenttyperepo.h>
+#include <vespa/document/document.h>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/common/environment.h>
+#include <vespa/memfilepersistence/device/devicemanager.h>
+#include <vespa/memfilepersistence/mapper/memfilemapper.h>
+#include <vespa/memfilepersistence/memfile/memfilecache.h>
+#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h>
+#include <vespa/memfilepersistence/tools/dumpslotfile.h>
+#include <string>
+#include <vespa/persistence/spi/bucketinfo.h>
+#include <vespa/storageframework/defaultimplementation/clock/realclock.h>
+#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h>
+#include <vespa/storageframework/defaultimplementation/memory/nomemorymanager.h>
+#include <vespa/vespalib/util/programoptions.h>
+
+LOG_SETUP(".vds.dumpslotfile");
+
+using config::ConfigGetter;
+using document::DocumenttypesConfig;
+using config::FileSpec;
+using document::DocumentTypeRepo;
+
+namespace storage {
+namespace memfile {
+
+namespace {
+ std::ostream* cout;
+ std::ostream* cerr;
+
+ struct CmdOptions : public vespalib::ProgramOptions {
+ bool showSyntaxPage;
+ bool userFriendlyOutput;
+ bool printHeader;
+ bool printBody;
+ bool toXml;
+ bool toBinary;
+ bool includeRemovedDocs;
+ bool includeRemoveEntries;
+// std::string metaDataSort;
+ std::string documentManConfigId;
+ std::string filename;
+ uint64_t timestampToShow;
+ std::string docId;
+// bool useConstructor;
+
+ CmdOptions(int argc, const char* const* argv)
+ : vespalib::ProgramOptions(argc, argv),
+ showSyntaxPage(false)
+ {
+ setSyntaxMessage(
+ "Utility program for showing the contents of the slotfiles "
+ "used by Vespa Document Storage in a user readable format. "
+ "Intended for debugging purposes."
+ );
+ addOption("h help", showSyntaxPage, false,
+ "Shows this help page");
+ addOption("n noheader", printHeader, true,
+ "If given, the header block content is not shown");
+ addOption("N nobody", printBody, true,
+ "If given, the body block content is not shown");
+ addOption("f friendly", userFriendlyOutput, false,
+ "Gives less compact, but more user friendly output");
+ addOption("x toxml", toXml, false,
+ "Print document XML of contained documents");
+ addOption("b tobinary", toBinary, false,
+ "Print binary representations of contained documents");
+ addOption("includeremoveddocs", includeRemovedDocs, false,
+ "When showing XML, include documents that are still in "
+ "the file, but have been removed.");
+ addOption("includeremoveentries", includeRemoveEntries, false,
+ "When showing XML, include remove entries.");
+ addOption("c documentconfig", documentManConfigId,
+ std::string("client"),
+ "The document config to use, needed if deserializing "
+ "documents.");
+// addOption("s sort", metaDataSort, std::string("none"),
+// "How to sort metadatalist. Valid arguments: "
+// "bodypos, headerpos & none.");
+ addOption("t time", timestampToShow, uint64_t(0),
+ "If set, only present data related to this timestamp, "
+ "when outputting XML or binary data.");
+ addOption("docid", docId, std::string(""),
+ "Retrieve single document using get semantics");
+// addOption("useconstructor", useConstructor, false, "Debug option");
+ addArgument("slotfile", filename, "The slotfile to dump.");
+ }
+ };
+
+ void printDoc(document::Document& doc, CmdOptions& o) {
+ if (o.toXml) {
+ *cout << doc.toXml() << "\n";
+ } else {
+ document::ByteBuffer::UP bbuf(doc.serialize());
+ *cout << std::string(bbuf->getBuffer(), bbuf->getLength());
+ }
+ }
+
+ void printFailure(const std::string& failure) {
+ *cerr << failure << "\n";
+ }
+
+ uint64_t extractBucketId(const std::string& path) {
+ size_t slashPos = path.find_last_of('/');
+ bool foundSlash = true;
+ if (slashPos == std::string::npos) {
+ foundSlash = false;
+ }
+
+ size_t dotPos = path.find_last_of('.');
+ if (dotPos == std::string::npos
+ || (foundSlash && (slashPos > dotPos)))
+ {
+ dotPos = path.size();
+ }
+
+ std::string bucketIdAsHex;
+ if (foundSlash) {
+ bucketIdAsHex.assign(path.begin() + slashPos + 1,
+ path.begin() + dotPos);
+ } else {
+ bucketIdAsHex.assign(path.begin(),
+ path.begin() + dotPos);
+ }
+
+ char* endp;
+ uint64_t bucketId = strtoull(bucketIdAsHex.c_str(), &endp, 16);
+ if (*endp != '\0') {
+ return 0;
+ }
+ return bucketId;
+ }
+
+ struct EnvironmentImpl : ThreadMetricProvider {
+ framework::defaultimplementation::ComponentRegisterImpl _compReg;
+ framework::Component _component;
+ framework::defaultimplementation::RealClock _clock;
+ framework::defaultimplementation::NoMemoryManager _memoryMan;
+ MemFilePersistenceMetrics _metrics;
+ MemFilePersistenceThreadMetrics* _threadMetrics;
+ std::unique_ptr<MemFileCache> _cache;
+ MemFileMapper _mapper;
+ DeviceManager _deviceManager;
+ document::DocumentType _docType;
+ DocumentTypeRepo::SP _repo;
+ vespa::config::storage::StorMemfilepersistenceConfigBuilder _memFileConfig;
+ vespa::config::content::PersistenceConfigBuilder _persistenceConfig;
+ vespa::config::storage::StorDevicesConfigBuilder _deviceConfig;
+ config::ConfigSet _configSet;
+ config::IConfigContext::SP _configContext;
+ std::unique_ptr<config::ConfigUri> _internalConfig;
+ std::unique_ptr<Environment> _env;
+
+ EnvironmentImpl(config::ConfigUri& externalConfig,
+ const char* documentConfigId)
+ : _compReg(),
+ _component(_compReg, "dumpslotfile"),
+ _clock(),
+ _metrics(_component),
+ _threadMetrics(_metrics.addThreadMetrics()),
+ _cache(),
+ _mapper(*this),
+ _deviceManager(DeviceMapper::UP(new SimpleDeviceMapper), _clock),
+ _docType("foo", 1)
+ {
+ _compReg.setClock(_clock);
+ _compReg.setMemoryManager(_memoryMan);
+ _cache.reset(new MemFileCache(_compReg, _metrics._cache));
+ LOG(debug, "Setting up document repo");
+ if (documentConfigId == 0) {
+ _repo.reset(new DocumentTypeRepo(_docType));
+ } else {
+ config::ConfigUri uri(
+ externalConfig.createWithNewId(documentConfigId));
+ std::unique_ptr<document::DocumenttypesConfig> config(
+ ConfigGetter<DocumenttypesConfig>::getConfig(
+ uri.getConfigId(), uri.getContext()));
+ _repo.reset(new DocumentTypeRepo(*config));
+ }
+ _deviceConfig.rootFolder = ".";
+ std::string configId("defaultId");
+ _configSet.addBuilder(configId, &_memFileConfig);
+ _configSet.addBuilder(configId, &_persistenceConfig);
+ _configSet.addBuilder(configId, &_deviceConfig);
+ _configContext.reset(new config::ConfigContext(_configSet));
+ _internalConfig.reset(
+ new config::ConfigUri(configId, _configContext));
+ _env.reset(new Environment(
+ *_internalConfig, *_cache, _mapper, *_repo, _clock, true));
+ }
+
+ MemFilePersistenceThreadMetrics& getMetrics() const {
+ return *_threadMetrics;
+ }
+
+ };
+
+}
+
+int SlotFileDumper::dump(int argc, const char * const * argv,
+ config::ConfigUri& config,
+ std::ostream& out, std::ostream& err)
+{
+ cout = &out;
+ cerr = &err;
+ CmdOptions o(argc, argv);
+ try{
+ o.parse();
+ } catch (vespalib::InvalidCommandLineArgumentsException& e) {
+ if (!o.showSyntaxPage) {
+ err << e.getMessage() << "\n\n";
+ o.writeSyntaxPage(err);
+ err << "\n";
+ return 1;
+ }
+ }
+ if (o.showSyntaxPage) {
+ o.writeSyntaxPage(err);
+ err << "\n";
+ return 0;
+ }
+ if (!o.toXml && (o.includeRemovedDocs || o.includeRemoveEntries)) {
+ err << "Options for what to include in XML makes no sense when "
+ "not printing XML content.\n\n";
+ o.writeSyntaxPage(err);
+ err << "\n";
+ return 1;
+ }
+ if (o.toBinary && o.timestampToShow == 0 && o.docId == "") {
+ err << "To binary option only works for a single document. "
+ "Use --time or --docid options.\n\n";
+ o.writeSyntaxPage(err);
+ err << "\n";
+ return 1;
+ }
+// if (o.metaDataSort != "none" && o.metaDataSort != "bodypos") {
+// err << "Illegal value for metadata sorting: '" << o.metaDataSort
+// << "'. Legal values are:\n"
+// << " none - Keep order on disk (currently timestamp)\n"
+// << " bodypos - Reorder metadata by position of body\n"
+// << " headerpos - Reorder metadata by position of header\n\n";
+// o.writeSyntaxPage(err);
+// err << "\n";
+// return 1;
+// }
+
+ EnvironmentImpl env(config, o.toXml ? o.documentManConfigId.c_str() : "");
+
+ document::BucketId bucket(extractBucketId(o.filename));
+ Directory::LP dir(env._deviceManager.getDirectory(o.filename, 0));
+ FileSpecification fileSpec(bucket, *dir, o.filename);
+
+ MemFile::LoadOptions opts;
+ opts.autoRepair = false;
+ MemFile memFile(fileSpec, *env._env, opts);
+
+ if (!o.toXml && !o.toBinary) {
+ spi::BucketInfo info;
+ info = memFile.getBucketInfo();
+ if (bucket.getRawId() == 0) {
+ out << "Failed to extract bucket id from filename\n";
+ } else {
+ out << bucket << " (extracted from filename)\n";
+ }
+ out << "Unique document count: " << info.getDocumentCount()
+ << "\nTotal document size: "
+ << info.getDocumentSize() << "\n";
+ out << "Used size: " << info.getUsedSize() << "\n";
+ out << "Entry count: " << info.getEntryCount() << "\n";
+
+/*
+ SlotFile::MetaDataOrder order = SlotFile::DEFAULT;
+ if (o.metaDataSort == "bodypos") {
+ order = SlotFile::BODYPOS;
+ } else if (o.metaDataSort == "headerpos") {
+ order = SlotFile::HEADERPOS;
+ }
+*/
+ memFile.printState(out, o.userFriendlyOutput, o.printBody,
+ o.printHeader/*, order*/);
+ out << "\n";
+ std::ostringstream ost;
+ uint16_t verifyFlags = 0; // May verify only header/body
+ if (env._mapper.verify(memFile, *env._env, ost, verifyFlags)) {
+ out << "Slotfile verified.\n";
+ } else {
+ out << "Slotfile failed verification.\n";
+ out << ost.str() << "\n";
+ }
+ } else {
+ std::ostringstream ost;
+ uint16_t verifyFlags = 0; // May verify only header/body
+ if (!env._mapper.verify(memFile, *env._env, ost, verifyFlags)) {
+ out << "Slotfile failed verification.\n";
+ out << ost.str() << "\n";
+ return 1;
+ }
+
+ if (o.toXml) {
+ out << "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
+ out << "<vespafeed>\n";
+ }
+ if (o.docId != "") {
+ const MemSlot* slot(
+ memFile.getSlotWithId(document::DocumentId(o.docId)));
+ if (slot != 0 && !slot->deleted()) {
+ document::Document::UP doc(memFile.getDocument(*slot,
+ o.printBody ?
+ Types::ALL : Types::HEADER_ONLY));
+ if (doc.get()) {
+ printDoc(*doc, o);
+ } else {
+ printFailure("No document with id " + o.docId +
+ " found.");
+ }
+ } else {
+ printFailure("No document with id " + o.docId + " found.");
+ }
+ } else {
+ uint32_t iteratorFlags = o.includeRemoveEntries ?
+ Types::ITERATE_REMOVED : 0;
+ if (!o.includeRemovedDocs) {
+ iteratorFlags |= Types::ITERATE_GID_UNIQUE;
+ }
+ for (MemFile::const_iterator it = memFile.begin(iteratorFlags);
+ it != memFile.end(); ++it)
+ {
+ if (o.timestampToShow == 0
+ || (Types::Timestamp)o.timestampToShow
+ == it->getTimestamp())
+ {
+ if (it->deleted() || it->deletedInPlace()) {
+ printFailure("Found remove entry");
+ } else {
+ document::Document::UP doc(memFile.getDocument(*it,
+ o.printBody ?
+ Types::ALL : Types::HEADER_ONLY));
+ if (doc.get()) {
+ printDoc(*doc, o);
+ } else {
+ printFailure("Unable to get document in " +
+ it->toString(true));
+ }
+ }
+ }
+ }
+ }
+ if (o.toXml) {
+ out << "</vespafeed>\n";
+ }
+ }
+ return 0;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h
new file mode 100644
index 00000000000..698f3a5066b
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h
@@ -0,0 +1,23 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <iostream>
+
+namespace config {
+ class ConfigUri;
+}
+
+namespace storage {
+namespace memfile {
+
+struct SlotFileDumper {
+ static int dump(int argc, const char * const * argv,
+ config::ConfigUri& config,
+ std::ostream& out, std::ostream& err);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp
new file mode 100644
index 00000000000..c5300f53571
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/config/subscription/configuri.h>
+#include <vespa/memfilepersistence/tools/dumpslotfile.h>
+
+namespace {
+
+struct DumpSlotFileApp : public FastOS_Application {
+ int Main() {
+ try{
+ config::ConfigUri config("");
+ return storage::memfile::SlotFileDumper::dump(
+ _argc, _argv, config, std::cout, std::cerr);
+ } catch (std::exception& e) {
+ std::cerr << "Aborting due to exception:\n" << e.what() << "\n";
+ return 1;
+ }
+ }
+};
+
+} // anonymous
+
+int main(int argc, char **argv) {
+ DumpSlotFileApp app;
+ return app.Entry(argc, argv);
+}
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp
new file mode 100644
index 00000000000..6fae206f15b
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/defaults.h>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/tools/vdsdisktool.h>
+
+LOG_SETUP(".vdsdisktool");
+
+namespace {
+ struct DiskApp : public FastOS_Application {
+ int Main() {
+ try {
+ std::string dir = vespa::Defaults::vespaHome();
+ dir.append("var/db/vespa/vds");
+ return storage::memfile::VdsDiskTool::run(
+ _argc, _argv, dir.c_str(),
+ std::cout, std::cerr);
+ } catch (std::exception& e) {
+ std::cerr << "Application aborted with exception:\n" << e.what()
+ << "\n";
+ return 1;
+ }
+ }
+ };
+} // anonymous
+
+int main(int argc, char **argv) {
+ DiskApp app;
+ return app.Entry(argc, argv);
+}
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp
new file mode 100644
index 00000000000..2a3f1d58134
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp
@@ -0,0 +1,518 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/util/stringutil.h>
+#include <vespa/fnet/frt/frt.h>
+#include <iostream>
+#include <vespa/log/log.h>
+#include <vespa/memfilepersistence/device/mountpointlist.h>
+#include <vespa/memfilepersistence/tools/vdsdisktool.h>
+#include <vespa/storageframework/defaultimplementation/clock/realclock.h>
+#include <vespa/vespalib/io/fileutil.h>
+#include <vespa/vespalib/util/programoptions.h>
+
+LOG_SETUP(".vdsdiskapp");
+
+using std::vector;
+
+namespace storage {
+namespace memfile {
+
+using vespalib::getLastErrorString;
+
+namespace {
+
+ struct Sorter {
+ bool operator()(const std::pair<std::string, std::string>& first,
+ const std::pair<std::string, std::string>& second)
+ { return (first.first < second.first); }
+ };
+
+ /**
+ * Read pid from pid file. In case we want to extend pid file to contain
+ * more information later, accept multiple lines in file as long as pid is
+ * in first, and allow a pid: prefix to the pid.
+ */
+ uint32_t readPid(const std::string& pidFile) {
+ vespalib::LazyFile lf(pidFile, vespalib::File::READONLY);
+ vector<char> data(32);
+ size_t read = lf.read(&data[0], 32, 0);
+ // If pid file has been extended to have more data, ignore it.
+ for (uint32_t i=0; i<32; ++i) {
+ if (data[i] == '\n') {
+ data[i] = '\0';
+ read = i;
+ break;
+ }
+ }
+ // Allow a "pid:" prefix if it exists.
+ int start = 0;
+ if (strncmp("pid:", &data[0], 4) == 0) {
+ start = 4;
+ }
+ // Fail unless the first line was just a number with the pid
+ char* endp;
+ uint32_t pid = strtoull(&data[start], &endp, 10);
+ if (*endp != '\0' || read >= 32) {
+ throw vespalib::IllegalStateException(
+ "Unexpected content in pid file " + pidFile,
+ VESPA_STRLOC);
+ }
+ if (pid == 0) {
+ throw vespalib::IllegalStateException(
+ "Read pid 0 from pidfile which is illegal.",
+ VESPA_STRLOC);
+ }
+ return pid;
+ }
+}
+
+struct CmdLineOptions : public vespalib::ProgramOptions {
+ std::ostream& _err;
+ std::string _rootpath;
+ bool _showSyntax;
+ std::string _cluster;
+ uint32_t _nodeIndex;
+ std::string _mode;
+ uint32_t _diskIndex;
+ std::string _message;
+ /*
+ std::string _slobrokConfigId;
+ std::string _slobrokConnectionSpec;
+ */
+
+ CmdLineOptions(int argc, const char * const * argv,
+ const std::string& rootpath, std::ostream& err)
+ : vespalib::ProgramOptions(argc, argv),
+ _err(err),
+ _rootpath(rootpath)
+ {
+ setSyntaxMessage(
+ "This tool is used to stop VDS from using a given partition "
+ "you no longer want it to use, or to reenable use of a partition "
+ "that previously have been disabled. Note that currently, this "
+ "requires a restart of the storage node, which this tool will "
+ "do automatically. Note that the tool must be run on the storage "
+ "node where you want to enable/disable a partition.\n\n"
+ "Examples:\n"
+ " vdsdisktool disable 2 \"Seeing a lot of smart warnings on this one\"\n"
+ " vdsdisktool -c mycluster -i 3 disable 0 \"Shouldn't have put this on OS drive\"\n"
+ " vdsdisktool enable 2\n"
+ );
+ addOption("h help", _showSyntax, false,
+ "Show this help page.");
+ addOption("c cluster", _cluster, std::string(""),
+ "Which cluster the storage node whose disks should be "
+ "adjusted. If only data from one cluster is detected "
+ "on the node, this does not have to be specified");
+ addOption("i index", _nodeIndex, uint32_t(0xffffffff),
+ "The node index of the storage node whose disks should be "
+ "adjusted. If only data from one storage node is detected "
+ "on the node, this does not have to be specified");
+ addArgument("Mode", _mode,
+ "There are three modes. They are status, enable and disable"
+ ". The status mode is used to just query current disk "
+ "status without. The enable and disable modes will enable "
+ "or disable a disk.");
+ addArgument("Disk Index", _diskIndex, uint32_t(0xffffffff),
+ "The disk index which you want to enable/disable. Not "
+ "specified in status mode, but required otherwise.");
+ addArgument("Reason", _message, std::string(""),
+ "Give a reason for why we're enabling or disabling a disk. "
+ "Required when disabling a disk, such that other "
+ "administrators can see why it has happened.");
+ }
+
+ vector<std::string> listDir(const std::string& dir) {
+ DIR* dirp = opendir(dir.c_str());
+ struct dirent* entry;
+ vector<std::string> result;
+ if (dirp) while ((entry = readdir(dirp))) {
+ if (entry == 0) {
+ std::ostringstream ost;
+ ost << "Failed to read directory '" << dir << "', errno "
+ << errno << ": " << getLastErrorString() << "\n";
+ int tmp = closedir(dirp);
+ assert(tmp == 0);
+ (void) tmp;
+ throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC);
+ }
+ std::string name(reinterpret_cast<char*>(&entry->d_name));
+ assert(name.size() > 0);
+ if (name[0] == '.') continue;
+ result.push_back(name);
+ }
+ int tmp = closedir(dirp);
+ assert(tmp == 0);
+ (void) tmp;
+ return result;
+ }
+
+ std::set<std::string> detectPossibleClusters() {
+ if (!vespalib::fileExists(_rootpath)) {
+ throw vespalib::IllegalStateException(
+ "No VDS installations found at all in " + _rootpath,
+ VESPA_STRLOC);
+ }
+ vector<std::string> files(listDir(_rootpath));
+ std::set<std::string> result(files.begin(), files.end());
+ return result;
+ }
+
+ std::set<uint16_t>
+ detectPossibleNodeIndexes(const std::string& cluster)
+ {
+ std::string dir = _rootpath + "/" + cluster + "/storage";
+ if (!vespalib::fileExists(dir)) {
+ throw vespalib::IllegalStateException(
+ "No VDS installations found at all in " + dir,
+ VESPA_STRLOC);
+ }
+ vector<std::string> files(listDir(dir));
+ std::set<uint16_t> result;
+ for (uint32_t i=0; i<files.size(); ++i) {
+ char* endp;
+ uint64_t index = strtoull(files[i].c_str(), &endp, 10);
+ if (*endp != '\0' || index > 0xffff) {
+ _err << "Found strange file in directory supposed to "
+ << "contain node indexes: '" << files[i] << "'.\n";
+ } else {
+ result.insert(index);
+ }
+ }
+ return result;
+ }
+
+ bool validate() {
+ // Validate that cluster was in fact found. Uses storage disk
+ // directories to scan for legal targets.
+ LOG(debug, "Detecting clusters");
+ std::set<std::string> clusters(detectPossibleClusters());
+ if (clusters.size() == 0) {
+ _err << "No VDS clusters at all detected on this node.\n";
+ return false;
+ }
+ bool clusterFound = false;
+ if (_cluster != "") {
+ if (clusters.find(_cluster) == clusters.end()) {
+ _err << "No cluster named '" << _cluster
+ << "' found.\n";
+ } else {
+ clusterFound = true;
+ }
+ } else if (clusters.size() != 1u) {
+ _err << "Cluster must be specified as there are multiple "
+ "targets.\n";
+ } else {
+ _cluster = *clusters.begin();
+ clusterFound = true;
+ }
+ if (!clusterFound) {
+ _err << "Detected cluster names on local node:\n";
+ for (std::set<std::string>::const_iterator it = clusters.begin();
+ it != clusters.end(); ++it)
+ {
+ _err << " " << *it << "\n";
+ }
+ return false;
+ }
+ // Validate that node index was in fact found. Uses storage disk
+ // directories to scan for legal targets.
+ LOG(debug, "Detecting node indexes");
+ std::set<uint16_t> nodeIndexes(
+ detectPossibleNodeIndexes(_cluster));
+ if (nodeIndexes.size() == 0) {
+ _err << "No node indexes at all detected on this node in "
+ "cluster '" << _cluster << ".\n";
+ return false;
+ }
+ bool indexFound = false;
+ if (_nodeIndex != uint32_t(0xffffffff)) {
+ if (_nodeIndex > 0xffff) {
+ _err << "Illegal node index " << _nodeIndex
+ << " specified. Nodes must be in the range of "
+ << "0-65535.\n";
+ return false;
+ }
+ if (nodeIndexes.find(_nodeIndex) == nodeIndexes.end()) {
+ _err << "No node with index " << _nodeIndex
+ << " found in cluster '" << _cluster
+ << "'.\n";
+ } else {
+ indexFound = true;
+ }
+ } else if (nodeIndexes.size() != 1u) {
+ _err << "Node index must be specified as there are multiple "
+ "targets.\n";
+ } else {
+ _nodeIndex = *nodeIndexes.begin();
+ indexFound = true;
+ }
+ if (!indexFound) {
+ _err << "Detected node indexes on local node in cluster '"
+ << _cluster << "':\n";
+ for (std::set<uint16_t>::const_iterator it = nodeIndexes.begin();
+ it != nodeIndexes.end(); ++it)
+ {
+ _err << " " << *it << "\n";
+ }
+ return false;
+ }
+ // Validate modes
+ if (_mode != "enable" && _mode != "disable" && _mode != "status") {
+ _err << "Illegal mode '" << _mode << "'.\n";
+ return false;
+ }
+ // Warn if senseless options are given in status mode
+ if (_mode == "status" && (_diskIndex != 0xffffffff || _message != "")) {
+ _err << "Warning: Disk index and/or reason makes no sense in "
+ << "status mode.\n";
+ }
+ if ((_mode == "enable" || _mode == "disable")
+ && _diskIndex == 0xffffffff)
+ {
+ _err << "A disk index must be given to specify which disk to "
+ << _mode << ".\n";
+ return false;
+ }
+ if (_mode == "disable" && _message == "") {
+ _err << "A reason must be given for why you are disabling the "
+ "disk.\n";
+ return false;
+ }
+ if (_mode == "enable" || _mode == "disable") {
+ std::ostringstream dir;
+ dir << _rootpath << "/" << _cluster << "/storage/" << _nodeIndex
+ << "/disks/d" << _diskIndex;
+ if (!vespalib::fileExists(dir.str())) {
+ _err << "Cannot " << _mode << " missing disk "
+ << _diskIndex << ". No disk detected at "
+ << dir.str() << "\n";
+ return false;
+ }
+ }
+ return true;
+ }
+
+ vector<uint16_t> getNodeIndexes() {
+ vector<uint16_t> indexes;
+ indexes.push_back(_nodeIndex);
+ return indexes;
+ }
+
+ std::string getNodePath(uint16_t nodeIndex) {
+ std::ostringstream ost;
+ ost << _rootpath << "/" << _cluster << "/storage/" << nodeIndex;
+ return ost.str();
+ }
+
+ std::string getPidFile(uint16_t nodeIndex) {
+ return getNodePath(nodeIndex) + "/pidfile";
+ }
+
+};
+
+
+int
+VdsDiskTool::run(int argc, const char * const * argv,
+ const std::string& rootPath,
+ std::ostream& out, std::ostream& err)
+{
+ CmdLineOptions options(argc, argv, rootPath, err);
+ try{
+ LOG(debug, "Parsing command line options");
+ options.parse();
+ } catch (vespalib::InvalidCommandLineArgumentsException& e) {
+ LOG(debug, "Failed parsing command line options");
+ if (!options._showSyntax) {
+ err << e.getMessage() << "\n";
+ options.writeSyntaxPage(err, false);
+ err << "\n";
+ return 1;
+ }
+ }
+ if (options._showSyntax) {
+ options.writeSyntaxPage(err, false);
+ err << "\n";
+ return 0;
+ }
+ LOG(debug, "Validating options");
+ if (!options.validate()) {
+ LOG(debug, "Options failed validation");
+ options.writeSyntaxPage(err, false);
+ return 1;
+ }
+ LOG(debug, "Iterate over all nodes to operate on");
+ // Iterate over all node indexes to operate on.
+ for (uint32_t indexIterator = 0;
+ indexIterator < options.getNodeIndexes().size(); ++indexIterator)
+ {
+ uint16_t nodeIndex = options.getNodeIndexes()[indexIterator];
+ std::string pidFile = options.getPidFile(nodeIndex);
+
+ // Read pid if process is running
+ uint32_t pid = 0;
+ try{
+ if (vespalib::fileExists(pidFile)) {
+ pid = readPid(pidFile);
+ if (kill(pid, 0) != 0) {
+ err << "Failed to signal process with pid "
+ << pid << " (" << errno << "): "
+ << getLastErrorString() << ". If storage node is "
+ << "running it needs to be manually restarted"
+ << " before changes take effect.\n";
+ } else if (options._mode == "status") {
+ out << "Storage node " << nodeIndex
+ << " in cluster " << options._cluster
+ << " is running with pid " << pid << ".\n";
+ }
+ }
+ } catch (vespalib::IoException& e) {
+ err << "Failed to read pid file: " << e.getMessage()
+ << "\n";
+ if (options._mode != "status") {
+ err << "Not restarting storage node after changes.\n";
+ }
+ }
+ framework::defaultimplementation::RealClock clock;
+ // Read the disk status file.
+ DeviceManager::LP devMan(new DeviceManager(
+ DeviceMapper::UP(new SimpleDeviceMapper),
+ clock));
+ MountPointList mountPointList(options.getNodePath(nodeIndex),
+ vector<vespalib::string>(),
+ devMan);
+ mountPointList.scanForDisks();
+ if (options._mode == "enable" || options._mode == "disable") {
+ if (mountPointList.getSize() <= options._diskIndex
+ || mountPointList[options._diskIndex].getState()
+ == Device::NOT_FOUND)
+ {
+ err << "Disk " << options._diskIndex << " on node "
+ << nodeIndex << " in cluster "
+ << options._cluster << " does not exist. "
+ << "Cannot enable or disable a non-existing "
+ << "disk.\n";
+ return 1;
+ }
+ if (mountPointList[options._diskIndex].getState()
+ != Device::OK)
+ {
+ err << "Disk " << options._diskIndex << " on node "
+ << nodeIndex << " in cluster "
+ << options._cluster << " fails pre-initialize "
+ << "routine. Cannot enable or disable disk with "
+ << "such a problem: "
+ << mountPointList[options._diskIndex] << "\n";
+ return 1;
+ }
+ }
+ vector<Device::State> preFileStates(
+ mountPointList.getSize());
+ for (uint32_t i=0; i<mountPointList.getSize(); ++i) {
+ preFileStates[i] = mountPointList[i].getState();
+ }
+ mountPointList.readFromFile();
+ if (options._mode == "enable") {
+ Directory& dir(mountPointList[options._diskIndex]);
+ if (dir.getState() == Device::OK) {
+ out << "Disk " << options._diskIndex << " on node "
+ << nodeIndex << " in cluster "
+ << options._cluster << " is already enabled. "
+ << "Nothing to do.\n";
+ continue;
+ }
+ // Shouldn't be null when state is not OK
+ assert(dir.getLastEvent() != 0);
+ IOEvent oldEvent(*dir.getLastEvent());
+ dir.clearEvents();
+ dir.getPartition().clearEvents();
+ dir.getPartition().getDisk().clearEvents();
+ if (preFileStates[options._diskIndex] != Device::OK) {
+ out << "Cannot enable disk " << options._diskIndex
+ << " on node " << nodeIndex << " in cluster "
+ << options._cluster << ", as it has a failure "
+ << "that must be fixed by an admin.\n";
+ if (preFileStates[options._diskIndex]
+ != oldEvent.getState())
+ {
+ out << "Clearing any stored state such that the "
+ << "disk will work once admin fixes\n"
+ << "the current error.\n";
+ }
+ } else {
+ out << "Reactivating disk " << options._diskIndex
+ << " on node " << nodeIndex << " in cluster "
+ << options._cluster << ". Removed stored event: "
+ << oldEvent << "\n";
+ }
+ } else if (options._mode == "disable") {
+ Directory& dir(mountPointList[options._diskIndex]);
+ if (dir.getState() != Device::OK) {
+ // Shouldn't be null when state is not OK
+ assert(dir.getLastEvent() != 0);
+ IOEvent oldEvent(*dir.getLastEvent());
+ out << "Disk " << options._diskIndex << " on node "
+ << nodeIndex << " in cluster "
+ << options._cluster << " is already disabled. "
+ << "Overriding old event: " << oldEvent << "\n";
+ }
+ dir.clearEvents();
+ dir.getPartition().clearEvents();
+ dir.getPartition().getDisk().clearEvents();
+ IOEvent newEvent(clock.getTimeInSeconds().getTime(),
+ Device::DISABLED_BY_ADMIN,
+ options._message, "vdsdisktool");
+ dir.addEvent(newEvent);
+ out << "Deactivated disk " << options._diskIndex
+ << " on node " << nodeIndex << " in cluster "
+ << options._cluster << ". Added event: "
+ << newEvent << "\n";
+ } else if (options._mode == "status") {
+ out << "Disks on storage node " << nodeIndex
+ << " in cluster " << options._cluster << ":\n";
+ if (mountPointList.getSize() == 0) {
+ out << " No disks at all are set up.\n";
+ }
+ for (uint32_t i=0; i<mountPointList.getSize(); ++i) {
+ out << " Disk " << i << ": ";
+ Directory& dir(mountPointList[i]);
+ if (dir.isOk()) {
+ out << "OK\n";
+ } else {
+ const IOEvent* event(dir.getLastEvent());
+ assert(event != 0); // If so disk is ok
+ out << Device::getStateString(
+ event->getState())
+ << " - " << event->getDescription() << "\n";
+ }
+ }
+ }
+ if (options._mode == "enable" || options._mode == "disable") {
+ out << "Writing disk status file to disk\n";
+ mountPointList.writeToFile();
+ if (pid != 0) {
+ out << "Killing node such that it reads new data\n";
+ int result = kill(pid, SIGTERM);
+ if (result != 0) {
+ if (errno == EINVAL) {
+ err << "Signal SIGTERM not recognized.\n";
+ } else if (errno == EPERM) {
+ err << "No permission to send kill signal to "
+ "storage process\n";
+ } else if (errno == ESRCH) {
+ err << "No process or process group found "
+ "using pid " << pid << "\n";
+ }
+ }
+ }
+ out << "Done\n";
+ continue;
+ }
+ }
+ return 0;
+}
+
+} // memfile
+} // storage
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h
new file mode 100644
index 00000000000..f764db274ce
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <iostream>
+
+namespace storage {
+namespace memfile {
+
+struct VdsDiskTool {
+ static int run(int argc, const char * const * argv,
+ const std::string& rootPath,
+ std::ostream& out, std::ostream& err);
+};
+
+} // memfile
+} // storage
+
diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl
new file mode 100644
index 00000000000..7d7afcbc9d7
--- /dev/null
+++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl
@@ -0,0 +1,47 @@
+#!/usr/bin/perl -w
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+use strict;
+
+# Simple wrapper for executing vdsdisktool-bin
+
+my $args = &getArgs();
+&run("vdsdisktool-bin $args");
+
+exit(0);
+
+sub isHelpRequest {
+ foreach my $arg (@ARGV) {
+ if ($arg eq '-h' || $arg eq '--help') {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+sub getArgs {
+ my @args;
+ foreach my $arg (@ARGV) {
+ $arg =~ s/([ \t\f])/\\$1/g;
+ push @args, $arg;
+ }
+ return join(' ', @args);
+}
+
+sub isDebugRun {
+ foreach my $arg (@ARGV) {
+ if ($arg eq '--debug-perl-wrapper') {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+sub run {
+ my ($cmd) = @_;
+ if (&isDebugRun()) {
+ print "Debug: Would have executed '$cmd'.\n";
+ } else {
+ exec($cmd);
+ }
+}