diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /memfilepersistence |
Publish
Diffstat (limited to 'memfilepersistence')
169 files changed, 27172 insertions, 0 deletions
diff --git a/memfilepersistence/.gitignore b/memfilepersistence/.gitignore new file mode 100644 index 00000000000..a9b20e8992d --- /dev/null +++ b/memfilepersistence/.gitignore @@ -0,0 +1,2 @@ +Makefile +Testing diff --git a/memfilepersistence/CMakeLists.txt b/memfilepersistence/CMakeLists.txt new file mode 100644 index 00000000000..005a1373b75 --- /dev/null +++ b/memfilepersistence/CMakeLists.txt @@ -0,0 +1,36 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_define_module( + DEPENDS + vespadefaults + fastos + vespalog + vespalib + document + config_cloudconfig + configdefinitions + vdslib + persistence + storageframework + + LIBS + src/vespa/memfilepersistence + src/vespa/memfilepersistence/common + src/vespa/memfilepersistence/device + src/vespa/memfilepersistence/init + src/vespa/memfilepersistence/mapper + src/vespa/memfilepersistence/memfile + src/vespa/memfilepersistence/spi + src/vespa/memfilepersistence/tools + + TEST_DEPENDS + persistence_persistence_conformancetest + vdstestlib + + TESTS + src/tests + src/tests/conformance + src/tests/device + src/tests/init + src/tests/spi + src/tests/tools +) diff --git a/memfilepersistence/OWNERS b/memfilepersistence/OWNERS new file mode 100644 index 00000000000..dbcff24b338 --- /dev/null +++ b/memfilepersistence/OWNERS @@ -0,0 +1 @@ +vekterli diff --git a/memfilepersistence/README b/memfilepersistence/README new file mode 100644 index 00000000000..38a0e92febb --- /dev/null +++ b/memfilepersistence/README @@ -0,0 +1,4 @@ +This module contains Storage's persistence SPI implementation. + +It uses memory representation of files, which currently are persisted in +slotfiles on disk. diff --git a/memfilepersistence/src/.gitignore b/memfilepersistence/src/.gitignore new file mode 100644 index 00000000000..d6d89678e22 --- /dev/null +++ b/memfilepersistence/src/.gitignore @@ -0,0 +1,4 @@ +/Makefile.ini +/config_command.sh +/project.dsw +/memfilepersistence.mak diff --git a/memfilepersistence/src/Doxyfile b/memfilepersistence/src/Doxyfile new file mode 100644 index 00000000000..d40aff6f46c --- /dev/null +++ b/memfilepersistence/src/Doxyfile @@ -0,0 +1,994 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +# Doxyfile 1.2.18 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# General configuration options +#--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = Storage + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = ../doc + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, +# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en +# (Japanese with english messages), Korean, Norwegian, Polish, Portuguese, +# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish and Ukrainian. + +OUTPUT_LANGUAGE = English + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these class will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited +# members of a class in the documentation of that class as if those members were +# ordinary class members. Constructors, destructors and assignment operators of +# the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. It is allowed to use relative paths in the argument list. + +STRIP_FROM_PATH = + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower case letters. If set to YES upper case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# users are adviced to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explict @brief command for a brief description. + +JAVADOC_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# reimplements. + +INHERIT_DOCS = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 4 + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consist of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. +# For instance some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources +# only. Doxygen will then generate output that is more tailored for Java. +# For instance namespaces will be presented as packages, qualified scopes +# will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = storage + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp +# *.h++ *.idl *.odl + +FILE_PATTERNS = *.h *.cpp + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories +# that are symbolic links (a Unix filesystem feature) are excluded from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. + +INPUT_FILTER = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet + +HTML_STYLESHEET = ../cpp/vespa_link.css + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output dir. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non empty doxygen will try to run +# the html help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the Html help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript and frames is required (for instance Mozilla, Netscape 4.0+, +# or Internet explorer 4.0+). Note that for large projects the tree generation +# can take a very long time. In such cases it is better to disable this feature. +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimised for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assigments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_XML = NO + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_PREDEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse the +# parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::addtions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES tag can be used to specify one or more tagfiles. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in Html, RTF and LaTeX) for classes with base or +# super classes. Setting the tag to NO turns the diagrams off. Note that this +# option is superceded by the HAVE_DOT option below. This is only a fallback. It is +# recommended to install and use dot, since it yield more powerful graphs. + +CLASS_DIAGRAMS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found on the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermedate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::addtions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO + +# The CGI_NAME tag should be the name of the CGI script that +# starts the search engine (doxysearch) with the correct parameters. +# A script with this name will be generated by doxygen. + +CGI_NAME = search.cgi + +# The CGI_URL tag should be the absolute URL to the directory where the +# cgi binaries are located. See the documentation of your http daemon for +# details. + +CGI_URL = + +# The DOC_URL tag should be the absolute URL to the directory where the +# documentation is located. If left blank the absolute path to the +# documentation, with file:// prepended to it, will be used. + +DOC_URL = + +# The DOC_ABSPATH tag should be the absolute path to the directory where the +# documentation is located. If left blank the directory on the local machine +# will be used. + +DOC_ABSPATH = + +# The BIN_ABSPATH tag must point to the directory where the doxysearch binary +# is installed. + +BIN_ABSPATH = /usr/local/bin/ + +# The EXT_DOC_PATHS tag can be used to specify one or more paths to +# documentation generated for other projects. This allows doxysearch to search +# the documentation for these projects as well. + +EXT_DOC_PATHS = diff --git a/memfilepersistence/src/tests/.gitignore b/memfilepersistence/src/tests/.gitignore new file mode 100644 index 00000000000..b8a959a31c5 --- /dev/null +++ b/memfilepersistence/src/tests/.gitignore @@ -0,0 +1,8 @@ +/.depend +/Makefile +/dirconfig.tmp +/test.vlog +/testfile.0 +/testrunner +/vdsroot +memfilepersistence_testrunner_app diff --git a/memfilepersistence/src/tests/CMakeLists.txt b/memfilepersistence/src/tests/CMakeLists.txt new file mode 100644 index 00000000000..ee0cea9e1a5 --- /dev/null +++ b/memfilepersistence/src/tests/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(memfilepersistence_testrunner_app + SOURCES + testhelper.cpp + testrunner.cpp + DEPENDS + memfilepersistence_testconformance + memfilepersistence_testdevices + memfilepersistence_testinit + memfilepersistence_testspi + memfilepersistence_testtools + memfilepersistence +) +vespa_add_test(NAME memfilepersistence_testrunner_app COMMAND memfilepersistence_testrunner_app) diff --git a/memfilepersistence/src/tests/conformance/.gitignore b/memfilepersistence/src/tests/conformance/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/tests/conformance/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/tests/conformance/CMakeLists.txt b/memfilepersistence/src/tests/conformance/CMakeLists.txt new file mode 100644 index 00000000000..378f5751931 --- /dev/null +++ b/memfilepersistence/src/tests/conformance/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_testconformance + SOURCES + memfileconformancetest.cpp + DEPENDS +) diff --git a/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp b/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp new file mode 100644 index 00000000000..18a12788945 --- /dev/null +++ b/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/spi/memfilepersistence.h> +#include <vespa/persistence/conformancetest/conformancetest.h> + +LOG_SETUP(".test.conformance"); + +using namespace storage::spi; + +namespace storage { +namespace memfile { + + /* +struct MemFileConformanceTest : public ConformanceTest { + struct Factory : public PersistenceFactory { + + PersistenceSPI::UP getPersistenceImplementation() { + return PersistenceSPI::UP(new MemFilePersistence); + } + }; + + MemFileConformanceTest() + : ConformanceTest(PersistenceFactory::UP(new Factory)) {} + + CPPUNIT_TEST_SUITE(MemFileConformanceTest); + DEFINE_CONFORMANCE_TESTS(); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(MemFileConformanceTest); +*/ + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/device/.gitignore b/memfilepersistence/src/tests/device/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/tests/device/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/tests/device/CMakeLists.txt b/memfilepersistence/src/tests/device/CMakeLists.txt new file mode 100644 index 00000000000..845c70ae8e3 --- /dev/null +++ b/memfilepersistence/src/tests/device/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_testdevices + SOURCES + mountpointlisttest.cpp + devicemanagertest.cpp + devicestest.cpp + devicemappertest.cpp + partitionmonitortest.cpp + DEPENDS +) diff --git a/memfilepersistence/src/tests/device/devicemanagertest.cpp b/memfilepersistence/src/tests/device/devicemanagertest.cpp new file mode 100644 index 00000000000..eeb5007f452 --- /dev/null +++ b/memfilepersistence/src/tests/device/devicemanagertest.cpp @@ -0,0 +1,129 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/vespalib/util/exception.h> +#include <sys/errno.h> +#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> + +namespace storage { + +namespace memfile { + +class DeviceManagerTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(DeviceManagerTest); + CPPUNIT_TEST(testEventClass); + CPPUNIT_TEST(testEventSending); + CPPUNIT_TEST(testXml); + CPPUNIT_TEST_SUITE_END(); + +public: + void testEventClass(); + void testEventSending(); + void testXml(); + + framework::defaultimplementation::FakeClock _clock; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(DeviceManagerTest); + +void DeviceManagerTest::testEventClass() +{ + // Test that creation various IO events through common errno errors + // generates understandable errors. + { + IOEvent e(IOEvent::createEventFromErrno(1, ENOTDIR, "/mypath")); + CPPUNIT_ASSERT_EQUAL( + std::string("IOEvent(PATH_FAILURE, Not a directory: /mypath, time 1)"), + e.toString(true)); + CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, e.getState()); + } + { + IOEvent e(IOEvent::createEventFromErrno(2, EACCES, "/mypath")); + CPPUNIT_ASSERT_EQUAL( + std::string("IOEvent(NO_PERMISSION, Permission denied: /mypath, time 2)"), + e.toString(true)); + CPPUNIT_ASSERT_EQUAL(Device::NO_PERMISSION, e.getState()); + } + { + IOEvent e(IOEvent::createEventFromErrno(3, EIO, "/mypath")); + CPPUNIT_ASSERT_EQUAL( + std::string("IOEvent(IO_FAILURE, Input/output error: /mypath, time 3)"), + e.toString(true)); + CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, e.getState()); + } + { + IOEvent e( + IOEvent::createEventFromErrno(4, EBADF, "/mypath", VESPA_STRLOC)); + CPPUNIT_ASSERT_PREFIX( + std::string("IOEvent(INTERNAL_FAILURE, Bad file descriptor: /mypath" + ", testEventClass in"), + e.toString(true)); + CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, e.getState()); + } +} + +namespace { + + struct Listener : public IOEventListener { + std::ostringstream ost; + + Listener() : ost() { ost << "\n"; } + virtual ~Listener() {} + + virtual void handleDirectoryEvent(Directory& dir, const IOEvent& e) { + ost << "Dir " << dir.getPath() << ": " << e.toString(true) << "\n"; + } + virtual void handlePartitionEvent(Partition& part, const IOEvent& e) { + ost << "Partition " << part.getMountPoint() << ": " + << e.toString(true) << "\n"; + } + virtual void handleDiskEvent(Disk& disk, const IOEvent& e) { + ost << "Disk " << disk.getId() << ": " << e.toString(true) << "\n"; + } + + }; + +} + +void DeviceManagerTest::testEventSending() +{ + // Test that adding events to directories in the manager actually sends + // these events on to listeners. + DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); + Listener l; + manager.addIOEventListener(l); + Directory::LP dir(manager.getDirectory("/home/foo/var", 0)); + // IO failures are disk events. Will mark all partitions and + // directories on that disk bad + dir->addEvent(IOEvent::createEventFromErrno(1, EIO, "/home/foo/var/foo")); + dir->addEvent(IOEvent::createEventFromErrno(2, EBADF, "/home/foo/var/bar")); + dir->addEvent(IOEvent::createEventFromErrno(3, EACCES, "/home/foo/var/car")); + dir->addEvent(IOEvent::createEventFromErrno(4, EISDIR, "/home/foo/var/var")); + std::string expected("\n" + "Disk 1: IOEvent(IO_FAILURE, Input/output error: " + "/home/foo/var/foo, time 1)\n" + "Dir /home/foo/var: IOEvent(INTERNAL_FAILURE, Bad file " + "descriptor: /home/foo/var/bar, time 2)\n" + "Dir /home/foo/var: IOEvent(NO_PERMISSION, Permission denied: " + "/home/foo/var/car, time 3)\n" + "Dir /home/foo/var: IOEvent(PATH_FAILURE, Is a directory: " + "/home/foo/var/var, time 4)\n" + ); + CPPUNIT_ASSERT_EQUAL(expected, l.ost.str()); +} + +void DeviceManagerTest::testXml() +{ + DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); + Directory::LP dir(manager.getDirectory("/home/", 0)); + dir->getPartition().initializeMonitor(); + std::string xml = manager.toXml(" "); + CPPUNIT_ASSERT_MSG(xml, + xml.find("<partitionmonitor>") != std::string::npos); +} + +} + +} diff --git a/memfilepersistence/src/tests/device/devicemappertest.cpp b/memfilepersistence/src/tests/device/devicemappertest.cpp new file mode 100644 index 00000000000..a78554a6342 --- /dev/null +++ b/memfilepersistence/src/tests/device/devicemappertest.cpp @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/devicemapper.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/vespalib/util/exceptions.h> +#include <sys/errno.h> + +namespace storage { + +namespace memfile { + +class DeviceMapperTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(DeviceMapperTest); + CPPUNIT_TEST(testSimpleDeviceMapper); + CPPUNIT_TEST(testAdvancedDeviceMapper); + CPPUNIT_TEST_SUITE_END(); + +public: + void testSimpleDeviceMapper(); + void testAdvancedDeviceMapper(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(DeviceMapperTest); + +void DeviceMapperTest::testSimpleDeviceMapper() +{ + SimpleDeviceMapper mapper; + CPPUNIT_ASSERT_EQUAL(uint64_t(1), mapper.getDeviceId("whatever&�")); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), mapper.getDeviceId("whatever&�")); + CPPUNIT_ASSERT_EQUAL(uint64_t(2), mapper.getDeviceId("whatnot")); + std::string expected("Whatever& �=)/%#)="); + CPPUNIT_ASSERT_EQUAL(expected, mapper.getMountPoint(expected)); +} + +void DeviceMapperTest::testAdvancedDeviceMapper() +{ + AdvancedDeviceMapper mapper; + try{ + mapper.getDeviceId("/doesnotexist"); + CPPUNIT_FAIL("Expected exception"); + } catch (vespalib::Exception& e) { + std::string what(e.what()); + CPPUNIT_ASSERT_CONTAIN( + "Failed to run stat to find data on file /doesnotexist", what); + } +} + +} + +} // storage diff --git a/memfilepersistence/src/tests/device/devicestest.cpp b/memfilepersistence/src/tests/device/devicestest.cpp new file mode 100644 index 00000000000..bd6898cb7ac --- /dev/null +++ b/memfilepersistence/src/tests/device/devicestest.cpp @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/vespalib/util/exceptions.h> +#include <sys/errno.h> +#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> + +namespace storage { + +namespace memfile { + +class DevicesTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(DevicesTest); + CPPUNIT_TEST(testDisk); + CPPUNIT_TEST(testPartition); + CPPUNIT_TEST(testDirectory); + CPPUNIT_TEST_SUITE_END(); + +public: + void testDisk(); + void testPartition(); + void testDirectory(); + + framework::defaultimplementation::FakeClock _clock; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(DevicesTest); + +void DevicesTest::testDisk() +{ + DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); + Disk::LP disk1(manager.getDisk("/something/on/disk")); + Disk::LP disk2(manager.getDisk("/something/on/disk")); + CPPUNIT_ASSERT_EQUAL(disk1->getId(), disk2->getId()); + CPPUNIT_ASSERT_EQUAL(disk1.get(), disk2.get()); + Disk::LP disk3(manager.getDisk("/something/on/disk2")); + CPPUNIT_ASSERT(disk2->getId() != disk3->getId()); + disk3->toString(); // Add code coverage +} + +void DevicesTest::testPartition() +{ + DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); + Partition::LP part(manager.getPartition("/etc")); + CPPUNIT_ASSERT_EQUAL(std::string("/etc"), part->getMountPoint()); + part->toString(); // Add code coverage +} + +void DevicesTest::testDirectory() +{ + DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); + Directory::LP dir1(manager.getDirectory("/on/disk", 0)); + CPPUNIT_ASSERT_EQUAL(std::string("/on/disk"), dir1->getPath()); + CPPUNIT_ASSERT(dir1->getLastEvent() == 0); + CPPUNIT_ASSERT_EQUAL(Device::OK, dir1->getState()); + CPPUNIT_ASSERT(dir1->isOk()); + CPPUNIT_ASSERT_EQUAL(std::string("/on/disk 0"), dir1->toString()); + + dir1->addEvent(Device::IO_FAILURE, "Ouch", ""); + CPPUNIT_ASSERT(!dir1->isOk()); + CPPUNIT_ASSERT(dir1->getLastEvent() != 0); + CPPUNIT_ASSERT_EQUAL(std::string("/on/disk 5 0 Ouch"), dir1->toString()); + dir1->toString(); // Add code coverage +} + +} + +} // storage diff --git a/memfilepersistence/src/tests/device/mountpointlisttest.cpp b/memfilepersistence/src/tests/device/mountpointlisttest.cpp new file mode 100644 index 00000000000..4cb5822ceb7 --- /dev/null +++ b/memfilepersistence/src/tests/device/mountpointlisttest.cpp @@ -0,0 +1,255 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <fstream> +#include <vespa/memfilepersistence/device/mountpointlist.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> + +using vespalib::LinkedPtr; +using vespalib::fileExists; +using vespalib::isDirectory; +using vespalib::isSymLink; +using vespalib::readLink; + +namespace storage { + +namespace memfile { + +class MountPointList_Test : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(MountPointList_Test); + CPPUNIT_TEST(testScanning); + CPPUNIT_TEST(testStatusFile); + CPPUNIT_TEST(testInitDisks); + CPPUNIT_TEST_SUITE_END(); + + static const std::string _prefix; + +public: + void testScanning(); + void testStatusFile(); + void testInitDisks(); + + void init(); + void tearDown(); + + framework::defaultimplementation::FakeClock _clock; + +private: + LinkedPtr<DeviceManager> newDeviceManager() { + return LinkedPtr<DeviceManager>( + new DeviceManager( + DeviceMapper::UP(new SimpleDeviceMapper), + _clock)); + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(MountPointList_Test); + +const std::string MountPointList_Test::_prefix("./vdsroot"); + +namespace { + void run(const std::string& cmd) { + CPPUNIT_ASSERT_MESSAGE(cmd, system(cmd.c_str()) == 0); + } +} + +void MountPointList_Test::init() +{ + tearDown(); + run("rm -rf "+_prefix); + run("mkdir -p "+_prefix+"/disks"); + + run("mkdir "+_prefix+"/disks/d0"); // Regular dir + run("mkdir "+_prefix+"/disks/d1"); // Inaccessible dir + run("chmod 000 "+_prefix+"/disks/d1"); + run("mkdir "+_prefix+"/disks/D2"); // Wrongly named dir + run("mkdir "+_prefix+"/disks/d3"); // Regular non-empty dir + run("touch "+_prefix+"/disks/d3/foo"); + run("touch "+_prefix+"/disks/d4"); // Not a dir + run("ln -s D2 "+_prefix+"/disks/d5"); // Symlink to dir + run("ln -s d4 "+_prefix+"/disks/d6"); // Symlink to file +} + +void MountPointList_Test::tearDown() +{ + try{ + if (fileExists(_prefix+"/disks/d1")) { + run("chmod 755 "+_prefix+"/disks/d1"); + } + } catch (std::exception& e) { + std::cerr << "Failed to clean up: " << e.what() << "\n"; + } +} + +void MountPointList_Test::testScanning() +{ + init(); + MountPointList list(_prefix, + std::vector<vespalib::string>(), + vespalib::LinkedPtr<DeviceManager>( + new DeviceManager( + DeviceMapper::UP(new SimpleDeviceMapper), + _clock))); + list.scanForDisks(); + + // Check that we got the expected entries. + CPPUNIT_ASSERT_EQUAL(7u, list.getSize()); + + for (uint32_t i=0; i<7u; ++i) { + std::ostringstream ost; + ost << _prefix << "/disks/d" << i; + CPPUNIT_ASSERT_EQUAL(ost.str(), list[i].getPath()); + } + + // Note.. scanForDisks() should not in any circumstances access the + // disks. Thus it should not know that d1 is inaccessible, or that d6 + // is actually a symlink to a file + CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState()); + CPPUNIT_ASSERT_EQUAL(Device::OK, list[1].getState()); + CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState()); + CPPUNIT_ASSERT_EQUAL(Device::OK, list[3].getState()); + CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState()); + CPPUNIT_ASSERT_EQUAL(Device::OK, list[5].getState()); + CPPUNIT_ASSERT_EQUAL(Device::OK, list[6].getState()); + + list.verifyHealthyDisks(-1); + CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState()); + CPPUNIT_ASSERT_EQUAL(Device::NO_PERMISSION, list[1].getState()); + CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState()); + CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, list[3].getState()); + CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState()); + CPPUNIT_ASSERT_EQUAL(Device::OK, list[5].getState()); + CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[6].getState()); +} + +void MountPointList_Test::testStatusFile() +{ + init(); + std::string statusFileName(_prefix + "/disks.status"); + + // Try reading non-existing file, and writing a file + { + MountPointList list(_prefix, + std::vector<vespalib::string>(), + vespalib::LinkedPtr<DeviceManager>( + new DeviceManager( + DeviceMapper::UP(new SimpleDeviceMapper), + _clock))); + + _clock.setAbsoluteTimeInSeconds(5678); + list.scanForDisks(); + + // File does not currently exist, that should be ok though. + list.readFromFile(); + list.verifyHealthyDisks(-1); + CPPUNIT_ASSERT_EQUAL(7u, list.getSize()); + list[5].addEvent(IOEvent(1234, Device::IO_FAILURE, "Argh", "Hmm")); + CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, list[5].getState()); + + // Write to file. + list.writeToFile(); + } + + // Check contents of file. + { + std::ifstream in(statusFileName.c_str()); + std::string line; + CPPUNIT_ASSERT(std::getline(in, line)); + + CPPUNIT_ASSERT_PREFIX( + std::string(_prefix + "/disks/d1 3 5678 IoException: NO PERMISSION: " + "open(./vdsroot/disks/d1/chunkinfo, 0x1): Failed, " + "errno(13): Permission denied"), + line); + CPPUNIT_ASSERT(std::getline(in, line)); + CPPUNIT_ASSERT_PREFIX( + std::string(_prefix +"/disks/d2 1 5678 Disk not found during scanning of " + "disks directory"), + line); + CPPUNIT_ASSERT(std::getline(in, line)); + CPPUNIT_ASSERT_PREFIX( + std::string(_prefix + "/disks/d3 4 5678 Foreign data in mountpoint. New " + "mountpoints added should be empty."), + line); + CPPUNIT_ASSERT(std::getline(in, line)); + CPPUNIT_ASSERT_PREFIX( + std::string(_prefix + "/disks/d4 2 5678 File d4 in disks directory is not " + "a directory."), + line); + CPPUNIT_ASSERT(std::getline(in, line)); + CPPUNIT_ASSERT_PREFIX(std::string(_prefix + "/disks/d5 5 1234 Argh"), + line); + CPPUNIT_ASSERT(std::getline(in, line)); + CPPUNIT_ASSERT_PREFIX( + std::string(_prefix + "/disks/d6 2 5678 The path exist, but is not a " + "directory."), + line); + CPPUNIT_ASSERT(std::getline(in, line)); + CPPUNIT_ASSERT_EQUAL(std::string("EOF"), line); + } + + // Starting over to get new device instances. + // Scan disk, read file, and check that erronious disks are not used. + { + MountPointList list(_prefix, + std::vector<vespalib::string>(), + vespalib::LinkedPtr<DeviceManager>( + new DeviceManager( + DeviceMapper::UP(new SimpleDeviceMapper), + _clock))); + list.scanForDisks(); + list.readFromFile(); + // Check that we got the expected entries. + CPPUNIT_ASSERT_EQUAL(7u, list.getSize()); + + // Note.. scanForDisks() should not under any circumstance access the + // disks. Thus it should not know that d1 is inaccessible. + CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState()); + CPPUNIT_ASSERT_EQUAL(Device::NO_PERMISSION, list[1].getState()); + CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState()); + CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, list[3].getState()); + CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState()); + CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, list[5].getState()); + CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[6].getState()); + } +} + +void MountPointList_Test::testInitDisks() +{ + vespalib::string d3target = "d3target"; + vespalib::string foodev = _prefix + "/foodev"; + vespalib::string bardev = _prefix + "/bardev"; + + tearDown(); + run("rm -rf " + _prefix); + run("mkdir -p " + _prefix + "/disks/d2"); + run("ln -s " + d3target + " " + _prefix + "/disks/d3"); + + std::vector<vespalib::string> diskPaths { + // disks/d0 should become a regular directory + _prefix + "/disks/d0", + // disks/d1 should be a symlink to /foo + foodev, + // disks/d2 should already be a directory + "/ignored", + // disks/d3 should already be a symlink + "/ignored2" + }; + + MountPointList list(_prefix, diskPaths, newDeviceManager()); + list.initDisks(); + + CPPUNIT_ASSERT(isDirectory(_prefix + "/disks")); + CPPUNIT_ASSERT(isDirectory(_prefix + "/disks/d0")); + CPPUNIT_ASSERT(isSymLink(_prefix + "/disks/d1")); + CPPUNIT_ASSERT_EQUAL(foodev, readLink(_prefix + "/disks/d1")); + CPPUNIT_ASSERT(isDirectory(_prefix + "/disks/d2")); + CPPUNIT_ASSERT(isSymLink(_prefix + "/disks/d3")); + CPPUNIT_ASSERT_EQUAL(d3target, readLink(_prefix + "/disks/d3")); +} + +} // memfile + +} // storage diff --git a/memfilepersistence/src/tests/device/partitionmonitortest.cpp b/memfilepersistence/src/tests/device/partitionmonitortest.cpp new file mode 100644 index 00000000000..1a016edcc83 --- /dev/null +++ b/memfilepersistence/src/tests/device/partitionmonitortest.cpp @@ -0,0 +1,204 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/partitionmonitor.h> +#include <vespa/vdstestlib/cppunit/macros.h> + +namespace storage { + +namespace memfile { + +struct PartitionMonitorTest : public CppUnit::TestFixture +{ + void testNormalUsage(); + void testHighInodeFillrate(); + void testAlwaysStatPolicy(); + void testPeriodPolicy(); + void testStatOncePolicy(); + void testDynamicPolicy(); + void testIsFull(); + + CPPUNIT_TEST_SUITE(PartitionMonitorTest); + CPPUNIT_TEST(testNormalUsage); + CPPUNIT_TEST(testHighInodeFillrate); + CPPUNIT_TEST(testAlwaysStatPolicy); + CPPUNIT_TEST(testPeriodPolicy); + CPPUNIT_TEST(testStatOncePolicy); + CPPUNIT_TEST(testDynamicPolicy); + CPPUNIT_TEST(testIsFull); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(PartitionMonitorTest); + +struct FakeStatter : public PartitionMonitor::Statter { + struct statvfs _info; + + FakeStatter() { + _info.f_bsize = 4096; + _info.f_frsize = 4096; + _info.f_blocks = 1000; + _info.f_bfree = 500; + _info.f_bavail = 400; + _info.f_files = 64; + _info.f_ffree = 32; + _info.f_favail = 30; + _info.f_fsid = 13; + _info.f_namemax = 256; + } + void removeData(uint32_t size) { + _info.f_bavail += (size / _info.f_bsize); + _info.f_bfree += (size / _info.f_bsize); + } + void addData(uint32_t size) { + _info.f_bavail -= (size / _info.f_bsize); + _info.f_bfree -= (size / _info.f_bsize); + } + + virtual void statFileSystem(const std::string&, struct statvfs& info) { + info = _info; + } +}; + +void PartitionMonitorTest::testNormalUsage() +{ + PartitionMonitor monitor("testrunner.cpp"); + FakeStatter* statter = new FakeStatter(); + monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); + std::string expected( + "PartitionMonitor(testrunner.cpp, STAT_PERIOD(100), " + "2048000/3686400 used - 55.5556 % full)"); + CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(false)); + expected = + "PartitionMonitor(testrunner.cpp) {\n" + " Fill rate: 55.5556 %\n" + " Inode fill rate: 51.6129 %\n" + " Detected block size: 4096\n" + " File system id: 13\n" + " Total size: 3686400 (3600 kB)\n" + " Used size: 2048000 (2000 kB)\n" + " Queries since last stat: 0\n" + " Monitor policy: STAT_PERIOD(100)\n" + " Root only ratio 0\n" + " Max fill rate 98 %\n" + "}"; + CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(true)); + CPPUNIT_ASSERT(monitor.getFillRate() > 0.55); +} + +void PartitionMonitorTest::testHighInodeFillrate() +{ + PartitionMonitor monitor("testrunner.cpp"); + FakeStatter* statter = new FakeStatter(); + statter->_info.f_favail = 2; + monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); + std::string expected( + "PartitionMonitor(testrunner.cpp, STAT_PERIOD(100), " + "2048000/3686400 used - 94.1176 % full (inodes))"); + CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(false)); + expected = + "PartitionMonitor(testrunner.cpp) {\n" + " Fill rate: 55.5556 %\n" + " Inode fill rate: 94.1176 %\n" + " Detected block size: 4096\n" + " File system id: 13\n" + " Total size: 3686400 (3600 kB)\n" + " Used size: 2048000 (2000 kB)\n" + " Queries since last stat: 0\n" + " Monitor policy: STAT_PERIOD(100)\n" + " Root only ratio 0\n" + " Max fill rate 98 %\n" + "}"; + CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(true)); + CPPUNIT_ASSERT(monitor.getFillRate() > 0.94); +} + +void PartitionMonitorTest::testAlwaysStatPolicy() +{ + PartitionMonitor monitor("testrunner.cpp"); + FakeStatter* statter = new FakeStatter(); + monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); + monitor.setAlwaysStatPolicy(); + for (uint32_t i=0; i<10; ++i) { + monitor.getFillRate(); + CPPUNIT_ASSERT_EQUAL(0u, monitor._queriesSinceStat); + } +} + +void PartitionMonitorTest::testPeriodPolicy() +{ + PartitionMonitor monitor("testrunner.cpp"); + FakeStatter* statter = new FakeStatter(); + monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); + monitor.setStatPeriodPolicy(4); + for (uint32_t i=1; i<16; ++i) { + monitor.getFillRate(); + CPPUNIT_ASSERT_EQUAL(i % 4, monitor._queriesSinceStat); + } +} + +void PartitionMonitorTest::testStatOncePolicy() +{ + PartitionMonitor monitor("testrunner.cpp"); + FakeStatter* statter = new FakeStatter(); + monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); + monitor.setStatOncePolicy(); + for (uint32_t i=1; i<16; ++i) { + monitor.getFillRate(); + CPPUNIT_ASSERT_EQUAL(i, monitor._queriesSinceStat); + } +} + +void PartitionMonitorTest::testDynamicPolicy() +{ + PartitionMonitor monitor("testrunner.cpp"); + FakeStatter* statter = new FakeStatter(); + monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); + monitor.setStatDynamicPolicy(2); + // Add some data, such that we see that period goes down + CPPUNIT_ASSERT_EQUAL(uint64_t(3698), monitor.calcDynamicPeriod()); + CPPUNIT_ASSERT_EQUAL(55, (int) (100 * monitor.getFillRate())); + monitor.addingData(256 * 1024); + CPPUNIT_ASSERT_EQUAL(uint64_t(2592), monitor.calcDynamicPeriod()); + CPPUNIT_ASSERT_EQUAL(62, (int) (100 * monitor.getFillRate())); + monitor.addingData(512 * 1024); + CPPUNIT_ASSERT_EQUAL(uint64_t(968), monitor.calcDynamicPeriod()); + CPPUNIT_ASSERT_EQUAL(76, (int) (100 * monitor.getFillRate())); + // Add such that we hint that we have more data than possible on disk + monitor.addingData(1024 * 1024); + // Let fake stat just have a bit more data than before + statter->addData(256 * 1024); + // With high fill rate, we should check stat each time + CPPUNIT_ASSERT_EQUAL(uint64_t(1), monitor.calcDynamicPeriod()); + // As period is 1, we will now do a new stat, it should find we + // actually have less fill rate + CPPUNIT_ASSERT_EQUAL(62, (int) (100 * monitor.getFillRate())); +} + +void PartitionMonitorTest::testIsFull() +{ + PartitionMonitor monitor("testrunner.cpp"); + monitor.setMaxFillness(0.85); + FakeStatter* statter = new FakeStatter(); + monitor.setStatOncePolicy(); + monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); + + CPPUNIT_ASSERT_EQUAL(55, (int) (100 * monitor.getFillRate())); + CPPUNIT_ASSERT(!monitor.isFull()); + monitor.addingData(512 * 1024); + CPPUNIT_ASSERT_EQUAL(69, (int) (100 * monitor.getFillRate())); + CPPUNIT_ASSERT(!monitor.isFull()); + monitor.addingData(600 * 1024); + CPPUNIT_ASSERT_EQUAL(86, (int) (100 * monitor.getFillRate())); + CPPUNIT_ASSERT(monitor.isFull()); + monitor.removingData(32 * 1024); + CPPUNIT_ASSERT_EQUAL(85, (int) (100 * monitor.getFillRate())); + CPPUNIT_ASSERT(monitor.isFull()); + monitor.removingData(32 * 1024); + CPPUNIT_ASSERT_EQUAL(84, (int) (100 * monitor.getFillRate())); + CPPUNIT_ASSERT(!monitor.isFull()); +} + +} + +} // storage diff --git a/memfilepersistence/src/tests/init/.gitignore b/memfilepersistence/src/tests/init/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/tests/init/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/tests/init/CMakeLists.txt b/memfilepersistence/src/tests/init/CMakeLists.txt new file mode 100644 index 00000000000..ebc4738a8c4 --- /dev/null +++ b/memfilepersistence/src/tests/init/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_testinit + SOURCES + filescannertest.cpp + DEPENDS +) diff --git a/memfilepersistence/src/tests/init/filescannertest.cpp b/memfilepersistence/src/tests/init/filescannertest.cpp new file mode 100644 index 00000000000..8b49a21dad0 --- /dev/null +++ b/memfilepersistence/src/tests/init/filescannertest.cpp @@ -0,0 +1,492 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/document/bucket/bucketid.h> +#include <iomanip> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/memfilepersistence/init/filescanner.h> +#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h> +#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h> +#include <vespa/storageframework/defaultimplementation/clock/realclock.h> +#include <vespa/vdslib/state/nodestate.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/random.h> +#include <sys/errno.h> + +namespace storage { +namespace memfile { + +struct FileScannerTest : public CppUnit::TestFixture { + struct TestParameters { + uint32_t filesPerDisk; + uint32_t diskCount; + uint32_t bucketSplitBits; + uint32_t dirLevels; + uint32_t dirSpread; + uint32_t parts; + std::set<uint32_t> disksDown; + bool diskDownWithBrokenSymlink; + bool bucketWrongDir; + bool bucketMultipleDirs; + bool bucketMultipleDisks; + bool addTemporaryFiles; + bool addAlienFiles; + bool dirWithNoListPermission; + bool dirWithNoWritePermission; + bool dirWithNoExecutePermission; + bool fileWithNoReadPermission; + bool fileWithNoWritePermission; + + TestParameters() + : filesPerDisk(10), diskCount(5), bucketSplitBits(20), + dirLevels(1), dirSpread(16), parts(1), disksDown(), + diskDownWithBrokenSymlink(false), + bucketWrongDir(false), bucketMultipleDirs(false), + bucketMultipleDisks(false), + addTemporaryFiles(false), addAlienFiles(false), + dirWithNoListPermission(false), + dirWithNoWritePermission(false), + dirWithNoExecutePermission(false), + fileWithNoReadPermission(false), + fileWithNoWritePermission(false) {} + void addAllComplexities() { + disksDown.insert(0); + disksDown.insert(2); + disksDown.insert(4); + bucketWrongDir = true; + bucketMultipleDirs = true; + bucketMultipleDisks = true; + parts = 7; + addTemporaryFiles = true; + addAlienFiles = true; + dirWithNoWritePermission = true; + fileWithNoWritePermission = true; + fileWithNoReadPermission = true; + } + }; + + void testNormalUsage() { + TestParameters params; + runTest(params); + } + void testMultipleParts() { + TestParameters params; + params.parts = 3; + runTest(params); + } + void testBucketInWrongDirectory() { + TestParameters params; + params.bucketWrongDir = true; + runTest(params); + } + void testBucketInMultipleDirectories() { + TestParameters params; + params.bucketMultipleDirs = true; + runTest(params); + } + void testZeroDirLevel() { + TestParameters params; + params.dirLevels = 0; + runTest(params); + } + void testSeveralDirLevels() { + TestParameters params; + params.dirLevels = 3; + runTest(params); + } + void testNonStandardDirSpread() { + TestParameters params; + params.dirSpread = 63; + runTest(params); + } + void testDiskDown() { + TestParameters params; + params.disksDown.insert(1); + runTest(params); + } + void testDiskDownBrokenSymlink() { + TestParameters params; + params.disksDown.insert(1); + params.disksDown.insert(3); + params.diskDownWithBrokenSymlink = true; + runTest(params); + } + void testRemoveTemporaryFile() { + TestParameters params; + params.addTemporaryFiles = true; + runTest(params); + } + void testAlienFile() { + TestParameters params; + params.addAlienFiles = true; + runTest(params); + } + void testUnlistableDirectory() { + TestParameters params; + params.dirWithNoListPermission = true; + runTest(params); + } + void testDirWithNoWritePermission() { + TestParameters params; + params.dirWithNoWritePermission = true; + runTest(params); + } + void testDirWithNoExecutePermission() { + TestParameters params; + params.dirWithNoWritePermission = true; + runTest(params); + } + void testFileWithNoReadPermission() { + TestParameters params; + params.bucketWrongDir = true; + params.fileWithNoReadPermission = true; + runTest(params); + } + void testFileWithNoWritePermission() { + TestParameters params; + params.bucketWrongDir = true; + params.fileWithNoWritePermission = true; + runTest(params); + } + void testAllFailuresCombined() { + TestParameters params; + params.addAllComplexities(); + runTest(params); + } + + CPPUNIT_TEST_SUITE(FileScannerTest); + CPPUNIT_TEST(testNormalUsage); + CPPUNIT_TEST(testMultipleParts); + CPPUNIT_TEST(testBucketInWrongDirectory); + CPPUNIT_TEST(testBucketInMultipleDirectories); + CPPUNIT_TEST(testZeroDirLevel); + CPPUNIT_TEST(testSeveralDirLevels); + CPPUNIT_TEST(testNonStandardDirSpread); + CPPUNIT_TEST(testDiskDown); + CPPUNIT_TEST(testDiskDownBrokenSymlink); + CPPUNIT_TEST(testRemoveTemporaryFile); + CPPUNIT_TEST(testAlienFile); + CPPUNIT_TEST(testUnlistableDirectory); + CPPUNIT_TEST(testDirWithNoWritePermission); + CPPUNIT_TEST(testDirWithNoExecutePermission); + CPPUNIT_TEST(testFileWithNoReadPermission); + CPPUNIT_TEST(testFileWithNoWritePermission); + CPPUNIT_TEST(testAllFailuresCombined); + CPPUNIT_TEST_SUITE_END(); + + // Actual implementation of the tests. + + /** Run a console command and fail test if it fails. */ + void run(std::string cmd); + + /** Struct containing metadata for a single bucket. */ + struct BucketData { + document::BucketId bucket; + uint32_t disk; + std::vector<uint32_t> directory; + bool shouldExist; // Set to false for buckets that won't exist due to + // some failure. + + BucketData() : shouldExist(true) {} + + bool sameDir(BucketData& other) const { + return (disk == other.disk && directory == other.directory); + } + }; + + /** + * Create an overview of the buckets we're gonna use in the test. + * (Without any failures introduced) + */ + std::vector<BucketData> createBuckets(const TestParameters& params); + + /** + * Create the data in the bucket map and introduce the failures specified + * in the test. Mark buckets in bucket list that won't exist due to the + * failures so we know how to verify result of test. + */ + void createData(const TestParameters&, std::vector<BucketData>& buckets, + std::vector<std::string>& tempFiles, + std::vector<std::string>& alienFiles); + + /** + * Run a test with a given set of parameters, calling createData to set up + * the data, and then using a file scanner to actually list the files. + */ + void runTest(const TestParameters&); + +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(FileScannerTest); + +void +FileScannerTest::run(std::string cmd) +{ + int result = system(cmd.c_str()); + if (result != 0) { + CPPUNIT_FAIL("Failed to run command '" + cmd + "'."); + } +} + +std::vector<FileScannerTest::BucketData> +FileScannerTest::createBuckets(const TestParameters& params) +{ + std::vector<BucketData> buckets; + BucketDirectoryMapper dirMapper(params.dirLevels, params.dirSpread); + for (uint32_t i=0; i<params.diskCount; ++i) { + if (params.disksDown.find(i) != params.disksDown.end()) { + continue; + } + for (uint32_t j=0; j<params.filesPerDisk; ++j) { + BucketData data; + data.bucket = document::BucketId(params.bucketSplitBits, + params.filesPerDisk * i + j); + data.disk = i; + data.directory = dirMapper.getPath(data.bucket); + buckets.push_back(data); + } + } + return buckets; +} + +void +FileScannerTest::createData(const TestParameters& params, + std::vector<BucketData>& buckets, + std::vector<std::string>& tempFiles, + std::vector<std::string>& alienFiles) +{ + if (params.bucketWrongDir) { + CPPUNIT_ASSERT(params.dirLevels > 0); + buckets[0].directory[0] = (buckets[0].directory[0] + 1) + % params.dirSpread; + } + if (params.bucketMultipleDirs) { + CPPUNIT_ASSERT(params.dirLevels > 0); + BucketData copy(buckets[1]); + copy.directory[0] = (buckets[1].directory[0] + 1) % params.dirSpread; + buckets.push_back(copy); + } + if (params.bucketMultipleDisks && params.dirLevels > 0) { + BucketData copy(buckets[2]); + uint32_t disk = 0; + for (; disk<params.diskCount; ++disk) { + if (disk == copy.disk) continue; + if (params.disksDown.find(disk) == params.disksDown.end()) break; + } + CPPUNIT_ASSERT(disk < params.diskCount); + copy.disk = disk; + buckets.push_back(copy); + } + + run("mkdir -p vdsroot"); + run("chmod -R a+rwx vdsroot"); + run("rm -rf vdsroot"); + run("mkdir -p vdsroot/disks"); + vespalib::RandomGen randomizer; + uint32_t diskToHaveBrokenSymlink = (params.disksDown.empty() + ? 0 : randomizer.nextUint32(0, params.disksDown.size())); + uint32_t downIndex = 0; + for (uint32_t i=0; i<params.diskCount; ++i) { + if (params.disksDown.find(i) != params.disksDown.end()) { + if (downIndex++ == diskToHaveBrokenSymlink + && params.diskDownWithBrokenSymlink) + { + std::ostringstream path; + path << "vdsroot/disks/d" << i; + run("ln -s /non-existing-dir " + path.str()); + } + } else { + std::ostringstream path; + path << "vdsroot/disks/d" << i; + run("mkdir -p " + path.str()); + std::ofstream of((path.str() + "/chunkinfo").c_str()); + of << "#chunkinfo\n" << i << "\n" << params.diskCount << "\n"; + } + } + for (uint32_t i=0; i<buckets.size(); ++i) { + if (!buckets[i].shouldExist) continue; + std::ostringstream path; + path << "vdsroot/disks/d" << buckets[i].disk << std::hex; + for (uint32_t j=0; j<buckets[i].directory.size(); ++j) { + path << '/' << std::setw(4) << std::setfill('0') + << buckets[i].directory[j]; + } + run("mkdir -p " + path.str()); + if (params.dirWithNoListPermission && i == 8) { + run("chmod a-r " + path.str()); + // Scanner will abort with exception, so we don't really know + // how many docs will not be found due to this. + continue; + } + if (params.dirWithNoExecutePermission && i == 9) { + run("chmod a-x " + path.str()); + // Scanner will abort with exception, so we don't really know + // how many docs will not be found due to this. + continue; + } + path << '/' << std::setw(16) << std::setfill('0') + << buckets[i].bucket.getId() << ".0"; + run("touch " + path.str()); + if (params.addTemporaryFiles && i == 4) { + run("touch " + path.str() + ".tmp"); + tempFiles.push_back(path.str() + ".tmp"); + } + if (params.addAlienFiles && i == 6) { + run("touch " + path.str() + ".alien"); + alienFiles.push_back(path.str() + ".alien"); + } + if (params.fileWithNoWritePermission && i == 0) { + // Overlapping with wrong dir so it would want to move file + run("chmod a-w " + path.str()); + } + if (params.fileWithNoReadPermission && i == 0) { + // Overlapping with wrong dir so it would want to move file + run("chmod a-r " + path.str()); + } + if (params.dirWithNoWritePermission && i == 9) { + run("chmod a-w " + path.str()); + } + } +} + +namespace { + struct BucketDataFound { + uint16_t _disk; + bool _checked; + + BucketDataFound() : _disk(65535), _checked(false) {} + BucketDataFound(uint32_t disk) : _disk(disk), _checked(false) {} + }; +} + +void +FileScannerTest::runTest(const TestParameters& params) +{ + std::vector<BucketData> buckets(createBuckets(params)); + std::vector<std::string> tempFiles; + std::vector<std::string> alienFiles; + createData(params, buckets, tempFiles, alienFiles); + + framework::defaultimplementation::RealClock clock; + framework::defaultimplementation::ComponentRegisterImpl compReg; + compReg.setClock(clock); + + MountPointList mountPoints("./vdsroot", + std::vector<vespalib::string>(), + vespalib::LinkedPtr<DeviceManager>( + new DeviceManager( + DeviceMapper::UP(new SimpleDeviceMapper), + clock))); + mountPoints.init(params.diskCount); + + FileScanner scanner(compReg, mountPoints, + params.dirLevels, params.dirSpread); + std::map<document::BucketId, BucketDataFound> foundBuckets; + uint32_t extraBucketsSameDisk = 0; + uint32_t extraBucketsOtherDisk = 0; + for (uint32_t j=0; j<params.diskCount; ++j) { + // std::cerr << "Disk " << j << "\n"; + if (params.disksDown.find(j) != params.disksDown.end()) continue; + for (uint32_t i=0; i<params.parts; ++i) { + document::BucketId::List bucketList; + try{ + scanner.buildBucketList(bucketList, j, i, params.parts); + for (uint32_t k=0; k<bucketList.size(); ++k) { + if (foundBuckets.find(bucketList[k]) != foundBuckets.end()) + { + if (j == foundBuckets[bucketList[k]]._disk) { + ++extraBucketsSameDisk; + } else { + ++extraBucketsOtherDisk; + } +// std::cerr << "Bucket " << bucketList[k] +// << " on disk " << j << " is already found on disk " +// << foundBuckets[bucketList[k]]._disk << ".\n"; + } + foundBuckets[bucketList[k]] = BucketDataFound(j); + } + } catch (vespalib::IoException& e) { + if (!(params.dirWithNoListPermission + && e.getType() == vespalib::IoException::NO_PERMISSION)) + { + throw; + } + } + } + } + std::vector<BucketData> notFound; + std::vector<BucketData> wasFound; + std::vector<BucketDataFound> foundNonExisting; + // Verify that found buckets match buckets expected. + for (uint32_t i=0; i<buckets.size(); ++i) { + std::map<document::BucketId, BucketDataFound>::iterator found( + foundBuckets.find(buckets[i].bucket)); + if (buckets[i].shouldExist && found == foundBuckets.end()) { + notFound.push_back(buckets[i]); + } else if (!buckets[i].shouldExist && found != foundBuckets.end()) { + wasFound.push_back(buckets[i]); + } + if (found != foundBuckets.end()) { found->second._checked = true; } + } + for (std::map<document::BucketId, BucketDataFound>::iterator it + = foundBuckets.begin(); it != foundBuckets.end(); ++it) + { + if (!it->second._checked) { + foundNonExisting.push_back(it->second); + } + } + if (params.dirWithNoListPermission) { + CPPUNIT_ASSERT(!notFound.empty()); + } else if (!notFound.empty()) { + std::ostringstream ost; + ost << "Failed to find " << notFound.size() << " of " + << buckets.size() << " buckets. Including buckets:"; + for (uint32_t i=0; i<5 && i<notFound.size(); ++i) { + ost << " " << notFound[i].bucket; + } + CPPUNIT_FAIL(ost.str()); + } + CPPUNIT_ASSERT(wasFound.empty()); + CPPUNIT_ASSERT(foundNonExisting.empty()); + if (params.bucketMultipleDirs) { + // TODO: Test something else here? This is not correct test, as when + // there are two buckets on the same disk, one of them will be ignored by + // the bucket lister. + // CPPUNIT_ASSERT_EQUAL(1u, extraBucketsSameDisk); + } else { + CPPUNIT_ASSERT_EQUAL(0u, extraBucketsSameDisk); + } + if (params.bucketMultipleDisks) { + CPPUNIT_ASSERT_EQUAL(1u, extraBucketsOtherDisk); + } else { + CPPUNIT_ASSERT_EQUAL(0u, extraBucketsOtherDisk); + } + if (params.addTemporaryFiles) { + CPPUNIT_ASSERT_EQUAL( + 1, int(scanner.getMetrics()._temporaryFilesDeleted.getValue())); + } else { + CPPUNIT_ASSERT_EQUAL( + 0, int(scanner.getMetrics()._temporaryFilesDeleted.getValue())); + } + if (params.addAlienFiles) { + CPPUNIT_ASSERT_EQUAL( + 1, int(scanner.getMetrics()._alienFileCounter.getValue())); + } else { + CPPUNIT_ASSERT_EQUAL( + 0, int(scanner.getMetrics()._alienFileCounter.getValue())); + } + // We automatically delete temporary files (created by VDS, indicating + // an operation that only half finished. + for (uint32_t i=0; i<tempFiles.size(); ++i) { + CPPUNIT_ASSERT_MSG(tempFiles[i], !vespalib::fileExists(tempFiles[i])); + } + // We don't automatically delete alien files + for (uint32_t i=0; i<alienFiles.size(); ++i) { + CPPUNIT_ASSERT_MSG(alienFiles[i], vespalib::fileExists(alienFiles[i])); + } +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/mapper/.gitignore b/memfilepersistence/src/tests/mapper/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/memfilepersistence/src/tests/mapper/.gitignore diff --git a/memfilepersistence/src/tests/spi/.gitignore b/memfilepersistence/src/tests/spi/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/tests/spi/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/tests/spi/CMakeLists.txt b/memfilepersistence/src/tests/spi/CMakeLists.txt new file mode 100644 index 00000000000..d5dade96f57 --- /dev/null +++ b/memfilepersistence/src/tests/spi/CMakeLists.txt @@ -0,0 +1,20 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_testspi + SOURCES + memfiletestutils.cpp + providerconformancetest.cpp + memfilev1serializertest.cpp + memfilev1verifiertest.cpp + basicoperationhandlertest.cpp + splitoperationhandlertest.cpp + joinoperationhandlertest.cpp + iteratorhandlertest.cpp + memfiletest.cpp + memcachetest.cpp + simplememfileiobuffertest.cpp + memfileautorepairtest.cpp + shared_data_location_tracker_test.cpp + buffered_file_writer_test.cpp + buffer_test.cpp + DEPENDS +) diff --git a/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp new file mode 100644 index 00000000000..2f7913b0e1f --- /dev/null +++ b/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp @@ -0,0 +1,735 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/simulatedfailurefile.h> +#include <tests/spi/options_builder.h> +#include <vespa/document/fieldset/fieldsetrepo.h> +#include <vespa/document/fieldset/fieldsets.h> + +namespace storage { +namespace memfile { +namespace { + spi::LoadType defaultLoadType(0, "default"); +} + +class BasicOperationHandlerTest : public SingleDiskMemFileTestUtils +{ + CPPUNIT_TEST_SUITE(BasicOperationHandlerTest); + CPPUNIT_TEST(testGetHeaderOnly); + CPPUNIT_TEST(testGetFieldFiltering); + CPPUNIT_TEST(testRemove); + CPPUNIT_TEST(testRemoveWithNonMatchingTimestamp); + CPPUNIT_TEST(testRemoveWithNonMatchingTimestampAlwaysPersist); + CPPUNIT_TEST(testRemoveForExistingRemoveSameTimestamp); + CPPUNIT_TEST(testRemoveForExistingRemoveNewTimestamp); + CPPUNIT_TEST(testRemoveForExistingRemoveNewTimestampAlwaysPersist); + CPPUNIT_TEST(testRemoveDocumentNotFound); + CPPUNIT_TEST(testRemoveDocumentNotFoundAlwaysPersist); + CPPUNIT_TEST(testRemoveExistingOlderDocumentVersion); + CPPUNIT_TEST(testPutSameTimestampAsRemove); + CPPUNIT_TEST(testUpdateBody); + CPPUNIT_TEST(testUpdateHeaderOnly); + CPPUNIT_TEST(testUpdateTimestampExists); + CPPUNIT_TEST(testUpdateForNonExistentDocWillFail); + CPPUNIT_TEST(testUpdateMayCreateDoc); + CPPUNIT_TEST(testRemoveEntry); + CPPUNIT_TEST(testEraseFromCacheOnFlushException); + CPPUNIT_TEST(testEraseFromCacheOnMaintainException); + CPPUNIT_TEST(testEraseFromCacheOnDeleteBucketException); + CPPUNIT_TEST_SUITE_END(); + + void doTestRemoveDocumentNotFound( + OperationHandler::RemoveType persistRemove); + void doTestRemoveWithNonMatchingTimestamp( + OperationHandler::RemoveType persistRemove); + void doTestRemoveForExistingRemoveNewTimestamp( + OperationHandler::RemoveType persistRemove); +public: + void setupTestConfig(); + void testPutHeadersOnly(); + void testPutHeadersOnlyDocumentNotFound(); + void testPutHeadersOnlyTimestampNotFound(); + void testGetHeaderOnly(); + void testGetFieldFiltering(); + void testRemove(); + void testRemoveWithNonMatchingTimestamp(); + void testRemoveWithNonMatchingTimestampAlwaysPersist(); + void testRemoveForExistingRemoveSameTimestamp(); + void testRemoveForExistingRemoveNewTimestamp(); + void testRemoveForExistingRemoveNewTimestampAlwaysPersist(); + void testRemoveDocumentNotFound(); + void testRemoveDocumentNotFoundAlwaysPersist(); + void testRemoveExistingOlderDocumentVersion(); + void testPutSameTimestampAsRemove(); + void testUpdateBody(); + void testUpdateHeaderOnly(); + void testUpdateTimestampExists(); + void testUpdateForNonExistentDocWillFail(); + void testUpdateMayCreateDoc(); + void testRemoveEntry(); + void testEraseFromCacheOnFlushException(); + void testEraseFromCacheOnMaintainException(); + void testEraseFromCacheOnDeleteBucketException(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(BasicOperationHandlerTest); + +/** + * Test that doing a header-only get gives back a document containing + * only the document header + */ +void +BasicOperationHandlerTest::testGetHeaderOnly() +{ + document::BucketId bucketId(16, 4); + + Document::SP doc(createRandomDocumentAtLocation(4)); + doc->setValue(doc->getField("hstringval"), document::StringFieldValue("hypnotoad")); + doc->setValue(doc->getField("headerval"), document::IntFieldValue(42)); + + doPut(doc, bucketId, Timestamp(4567), 0); + flush(bucketId); + + spi::GetResult reply = doGet(bucketId, doc->getId(), document::HeaderFields()); + + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, reply.getErrorCode()); + CPPUNIT_ASSERT(reply.hasDocument()); + CPPUNIT_ASSERT_EQUAL(std::string("headerval: 42\nhstringval: hypnotoad\n"), + stringifyFields(reply.getDocument())); + CPPUNIT_ASSERT_EQUAL( + size_t(1), + getPersistenceProvider().getMetrics().headerOnlyGets.getValue()); +} + +void +BasicOperationHandlerTest::testGetFieldFiltering() +{ + document::BucketId bucketId(16, 4); + Document::SP doc(createRandomDocumentAtLocation(4)); + doc->setValue(doc->getField("headerval"), document::IntFieldValue(42)); + doc->setValue(doc->getField("hstringval"), + document::StringFieldValue("groovy")); + + document::FieldSetRepo repo; + + doPut(doc, bucketId, Timestamp(4567), 0); + flush(bucketId); + spi::GetResult reply(doGet(bucketId, + doc->getId(), + *repo.parse(*getTypeRepo(), "testdoctype1:hstringval"))); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, reply.getErrorCode()); + CPPUNIT_ASSERT(reply.hasDocument()); + CPPUNIT_ASSERT_EQUAL(std::string("hstringval: groovy\n"), + stringifyFields(reply.getDocument())); + CPPUNIT_ASSERT_EQUAL( + size_t(1), + getPersistenceProvider().getMetrics().headerOnlyGets.getValue()); +} + +void +BasicOperationHandlerTest::testRemove() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + + document::Document::SP doc = doPut(4, Timestamp(1)); + + CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, + doc->getId(), + Timestamp(2), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + + getPersistenceProvider().flush( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + + env()._cache.clear(); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL)); + + CPPUNIT_ASSERT_EQUAL(Timestamp(2), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT((*file)[1].deleted()); + CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), (*file)[1].getLocation(BODY)); + CPPUNIT_ASSERT_EQUAL((*file)[0].getLocation(HEADER), + (*file)[1].getLocation(HEADER)); +} + +/** + * Test that removing a document with a max timestamp for which there + * is no matching document does not add a remove slot to the memfile + */ +void +BasicOperationHandlerTest::doTestRemoveWithNonMatchingTimestamp( + OperationHandler::RemoveType persistRemove) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + document::Document::SP doc = doPut(4, Timestamp(1234)); + + CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId, + doc->getId(), + Timestamp(1233), + persistRemove)); + + getPersistenceProvider().flush( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL( + uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE + ? 2 : 1), + file->getSlotCount()); + + int i = 0; + if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) { + CPPUNIT_ASSERT_EQUAL(Timestamp(1233), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT((*file)[0].deleted()); + CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), (*file)[0].getLocation(BODY)); + CPPUNIT_ASSERT((*file)[0].getLocation(HEADER) + != (*file)[1].getLocation(HEADER)); + CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[0])); + ++i; + } + + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[i].getTimestamp()); + CPPUNIT_ASSERT(!(*file)[i].deleted()); + CPPUNIT_ASSERT(file->getDocument((*file)[i], ALL)->getValue("content").get()); +} + +/** + * Test that removing a document with a max timestamp for which there + * is no matching document does not add a remove slot to the memfile + */ +void +BasicOperationHandlerTest::testRemoveWithNonMatchingTimestamp() +{ + doTestRemoveWithNonMatchingTimestamp( + OperationHandler::PERSIST_REMOVE_IF_FOUND); +} + +void +BasicOperationHandlerTest::testRemoveWithNonMatchingTimestampAlwaysPersist() +{ + doTestRemoveWithNonMatchingTimestamp( + OperationHandler::ALWAYS_PERSIST_REMOVE); +} + +/** + * Test that doing a remove with a timestamp for which there already + * exists a remove does not add another remove slot + */ +void +BasicOperationHandlerTest::testRemoveForExistingRemoveSameTimestamp() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + document::Document::SP doc = doPut(4, Timestamp(1234)); + + CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, + doc->getId(), + Timestamp(1235), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId, + doc->getId(), + Timestamp(1235), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + + getPersistenceProvider().flush( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + + // Should only be one remove entry still + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); + + CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT((*file)[1].deleted()); +} + +void +BasicOperationHandlerTest::doTestRemoveForExistingRemoveNewTimestamp( + OperationHandler::RemoveType persistRemove) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + document::Document::SP doc = doPut(4, Timestamp(1234)); + + CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, + doc->getId(), + Timestamp(1235), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId, + doc->getId(), + Timestamp(1236), + persistRemove)); + + getPersistenceProvider().flush( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL( + uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE + ? 3 : 2), + file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); + + CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT((*file)[1].deleted()); + + if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) { + CPPUNIT_ASSERT_EQUAL(Timestamp(1236), (*file)[2].getTimestamp()); + CPPUNIT_ASSERT((*file)[2].deleted()); + } +} + +/** + * Test that doing a second remove with a newer timestamp does not add + * another remove slot when PERSIST_REMOVE_IF_FOUND is specified + */ +void +BasicOperationHandlerTest::testRemoveForExistingRemoveNewTimestamp() +{ + doTestRemoveForExistingRemoveNewTimestamp( + OperationHandler::PERSIST_REMOVE_IF_FOUND); +} + +void +BasicOperationHandlerTest::testRemoveForExistingRemoveNewTimestampAlwaysPersist() +{ + doTestRemoveForExistingRemoveNewTimestamp( + OperationHandler::ALWAYS_PERSIST_REMOVE); +} + +/** + * Test removing an older version of a document. Older version should be removed + * in-place without attempting to add a new slot (which would fail). + */ +void +BasicOperationHandlerTest::testRemoveExistingOlderDocumentVersion() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + document::Document::SP doc = doPut(4, Timestamp(1234)); + + CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, + doc->getId(), + Timestamp(1235), + OperationHandler::ALWAYS_PERSIST_REMOVE)); + + getPersistenceProvider().flush( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + + CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, + doc->getId(), + Timestamp(1234), + OperationHandler::ALWAYS_PERSIST_REMOVE)); + + getPersistenceProvider().flush( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + + // Should now be two remove entries. + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[0])); + CPPUNIT_ASSERT((*file)[0].deleted()); + + CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[1])); + CPPUNIT_ASSERT((*file)[1].deleted()); +} + +void +BasicOperationHandlerTest::doTestRemoveDocumentNotFound( + OperationHandler::RemoveType persistRemove) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + document::DocumentId docId("userdoc:test:4:0"); + doPut(4, Timestamp(1234)); + + CPPUNIT_ASSERT_EQUAL(false, + doRemove(bucketId, + docId, + Timestamp(1235), + persistRemove)); + + getPersistenceProvider().flush( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL( + uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE + ? 2 : 1), + file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); + if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) { + CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT((*file)[1].deleted()); + CPPUNIT_ASSERT_EQUAL(docId, file->getDocumentId((*file)[1])); + } +/* TODO: Test this in service layer tests. + CPPUNIT_ASSERT_EQUAL( + uint64_t(1), + env()._metrics.remove[documentapi::LoadType::DEFAULT].notFound.getValue()); +*/ +} + +/** + * Test that removing a non-existing document when PERSIST_EXISTING_ONLY is + * specified does not add a remove entry + */ +void +BasicOperationHandlerTest::testRemoveDocumentNotFound() +{ + doTestRemoveDocumentNotFound( + OperationHandler::PERSIST_REMOVE_IF_FOUND); +} + +void +BasicOperationHandlerTest::testRemoveDocumentNotFoundAlwaysPersist() +{ + doTestRemoveDocumentNotFound( + OperationHandler::ALWAYS_PERSIST_REMOVE); +} + +void +BasicOperationHandlerTest::testPutSameTimestampAsRemove() +{ + document::BucketId bucketId(16, 4); + + document::Document::SP doc = doPut(4, Timestamp(1234)); + + CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, + doc->getId(), + Timestamp(1235), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + + // Flush here to avoid put+remove being thrown away by duplicate timestamp + // exception evicting the cache and unpersisted changes. + flush(bucketId); + + doPut(4, Timestamp(1235)); + flush(bucketId); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); + + CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT((*file)[1].deleted()); +} + +/** + * Test that updating body results in a new memfile slot containing + * an updated document + */ +void +BasicOperationHandlerTest::testUpdateBody() +{ + document::BucketId bucketId(16, 4); + document::StringFieldValue updateValue("foo"); + document::Document::SP doc = doPut(4, Timestamp(1234)); + document::Document originalDoc(*doc); + + document::DocumentUpdate::SP update = createBodyUpdate( + doc->getId(), updateValue); + + spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(5678)); + flush(bucketId); + CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp()); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); + CPPUNIT_ASSERT_EQUAL(*(originalDoc.getValue("content")), + *file->getDocument((*file)[0], ALL)->getValue("content")); + + CPPUNIT_ASSERT_EQUAL(Timestamp(5678), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT(file->getDocument((*file)[1], ALL)->getValue("content").get()); + CPPUNIT_ASSERT_EQUAL(updateValue, + dynamic_cast<document::StringFieldValue&>( + *file->getDocument((*file)[1], ALL)->getValue( + "content"))); + CPPUNIT_ASSERT_EQUAL( + size_t(0), + getPersistenceProvider().getMetrics().headerOnlyUpdates.getValue()); +} + +void +BasicOperationHandlerTest::testUpdateHeaderOnly() +{ + document::BucketId bucketId(16, 4); + document::IntFieldValue updateValue(42); + document::Document::SP doc = doPut(4, Timestamp(1234)); + + document::DocumentUpdate::SP update = createHeaderUpdate( + doc->getId(), updateValue); + + spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(5678)); + flush(bucketId); + CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp()); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("headerval").get() == + NULL); + + CPPUNIT_ASSERT_EQUAL(Timestamp(5678), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT(file->getDocument((*file)[1], ALL)->getValue("headerval").get()); + CPPUNIT_ASSERT_EQUAL(updateValue, + dynamic_cast<document::IntFieldValue&>( + *file->getDocument((*file)[1], ALL)->getValue( + "headerval"))); + CPPUNIT_ASSERT_EQUAL( + size_t(1), + getPersistenceProvider().getMetrics().headerOnlyUpdates.getValue()); +} + +void +BasicOperationHandlerTest::testUpdateTimestampExists() +{ + document::BucketId bucketId(16, 4); + document::IntFieldValue updateValue(42); + document::Document::SP doc = doPut(4, Timestamp(1234)); + + document::DocumentUpdate::SP update = createHeaderUpdate( + doc->getId(), updateValue); + + spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(1234)); + flush(bucketId); + CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, result.getErrorCode()); +} + +void +BasicOperationHandlerTest::testUpdateForNonExistentDocWillFail() +{ + document::BucketId bucketId(16, 4); + document::IntFieldValue updateValue(42); + Timestamp timestamp(5678); + + // Is there an easier way to get a DocumentId? + document::Document::UP doc( + createRandomDocumentAtLocation(4, timestamp.getTime())); + const DocumentId& documentId = doc->getId(); + + document::DocumentUpdate::SP update = createHeaderUpdate( + documentId, updateValue); + + spi::UpdateResult result = doUpdate(bucketId, update, timestamp); + flush(bucketId); + CPPUNIT_ASSERT_EQUAL(0, (int)result.getExistingTimestamp()); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), file->getSlotCount()); +} + +void +BasicOperationHandlerTest::testUpdateMayCreateDoc() +{ + document::BucketId bucketId(16, 4); + document::IntFieldValue updateValue(42); + Timestamp timestamp(5678); + + // Is there an easier way to get a DocumentId? + document::Document::UP doc( + createRandomDocumentAtLocation(4, timestamp.getTime())); + const DocumentId& documentId = doc->getId(); + + document::DocumentUpdate::SP update = createHeaderUpdate( + documentId, updateValue); + update->setCreateIfNonExistent(true); + + spi::UpdateResult result = doUpdate(bucketId, update, timestamp); + flush(bucketId); + CPPUNIT_ASSERT_EQUAL(timestamp.getTime(), + (uint64_t)result.getExistingTimestamp()); + + MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(timestamp, (*file)[0].getTimestamp()); + + auto headerval = file->getDocument((*file)[0], ALL)->getValue("headerval"); + CPPUNIT_ASSERT(headerval.get() != nullptr); + CPPUNIT_ASSERT_EQUAL(updateValue, + dynamic_cast<document::IntFieldValue&>(*headerval)); +} + +void +BasicOperationHandlerTest::testRemoveEntry() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + + doPut(4, Timestamp(1234)); + Document::SP doc = doPut(4, Timestamp(2345)); + doPut(4, Timestamp(3456)); + + getPersistenceProvider().removeEntry( + spi::Bucket(bucketId, spi::PartitionId(0)), + spi::Timestamp(1234), context); + getPersistenceProvider().removeEntry( + spi::Bucket(bucketId, spi::PartitionId(0)), + spi::Timestamp(3456), context); + flush(bucketId); + + memfile::MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(2345), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL)); +} + +void +BasicOperationHandlerTest::setupTestConfig() +{ + using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig; + using MemFileConfigBuilder + = vespa::config::storage::StorMemfilepersistenceConfigBuilder; + MemFileConfigBuilder builder( + *env().acquireConfigReadLock().memFilePersistenceConfig()); + builder.minimumFileMetaSlots = 2; + builder.minimumFileHeaderBlockSize = 3000; + auto newConfig = std::unique_ptr<MemFileConfig>(new MemFileConfig(builder)); + env().acquireConfigWriteLock().setMemFilePersistenceConfig( + std::move(newConfig)); +} + +void +BasicOperationHandlerTest::testEraseFromCacheOnFlushException() +{ + document::BucketId bucketId(16, 4); + + setupTestConfig(); + + document::Document::SP doc( + createRandomDocumentAtLocation(4, 2345, 1024, 1024)); + doPut(doc, bucketId, Timestamp(2345)); + flush(bucketId); + // Must throw out cache to re-create lazyfile + env()._cache.clear(); + + env()._lazyFileFactory = + std::unique_ptr<Environment::LazyFileFactory>( + new SimulatedFailureLazyFile::Factory); + + // Try partial write, followed by full rewrite + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < i+1; ++j) { + document::Document::SP doc2( + createRandomDocumentAtLocation(4, 4000 + j, 1500, 1500)); + doPut(doc2, bucketId, Timestamp(4000 + j)); + } + spi::Result result = flush(bucketId); + CPPUNIT_ASSERT(result.hasError()); + CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") + != vespalib::string::npos); + + CPPUNIT_ASSERT(!env()._cache.contains(bucketId)); + + // Check that we still have first persisted put + memfile::MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(2345), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL)); + } +} + +void +BasicOperationHandlerTest::testEraseFromCacheOnMaintainException() +{ + document::BucketId bucketId(16, 4); + + setupTestConfig(); + + getFakeClock()._absoluteTime = framework::MicroSecTime(2000 * 1000000); + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options) + .revertTimePeriod(framework::MicroSecTime(100000ULL * 1000000)) + .build()); + // Put a doc twice to allow for revert time compaction to be done + document::Document::SP doc1( + createRandomDocumentAtLocation(4, 2345, 1024, 1024)); + document::Document::SP doc2( + createRandomDocumentAtLocation(4, 2345, 1024, 1024)); + doPut(doc1, bucketId, Timestamp(1000 * 1000000)); + doPut(doc2, bucketId, Timestamp(1500 * 1000000)); + flush(bucketId); + env()._cache.clear(); + + options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options) + .revertTimePeriod(framework::MicroSecTime(100ULL * 1000000)) + .build()); + + env()._lazyFileFactory = + std::unique_ptr<Environment::LazyFileFactory>( + new SimulatedFailureLazyFile::Factory); + + spi::Result result = getPersistenceProvider().maintain( + spi::Bucket(bucketId, spi::PartitionId(0)), + spi::HIGH); + CPPUNIT_ASSERT(result.hasError()); + CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") + != vespalib::string::npos); + + CPPUNIT_ASSERT(!env()._cache.contains(bucketId)); + + // Check that we still have both persisted puts + memfile::MemFilePtr file(getMemFile(bucketId)); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1000 * 1000000), (*file)[0].getTimestamp()); + CPPUNIT_ASSERT_EQUAL(*doc1, *file->getDocument((*file)[0], ALL)); + CPPUNIT_ASSERT_EQUAL(Timestamp(1500 * 1000000), (*file)[1].getTimestamp()); + CPPUNIT_ASSERT_EQUAL(*doc2, *file->getDocument((*file)[1], ALL)); +} + +void +BasicOperationHandlerTest::testEraseFromCacheOnDeleteBucketException() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + document::Document::SP doc( + createRandomDocumentAtLocation(4, 2345, 1024, 1024)); + doPut(doc, bucketId, Timestamp(2345)); + flush(bucketId); + env()._cache.clear(); + + SimulatedFailureLazyFile::Factory* factory( + new SimulatedFailureLazyFile::Factory); + factory->setReadOpsBeforeFailure(0); + env()._lazyFileFactory = + std::unique_ptr<Environment::LazyFileFactory>(factory); + + // loadFile will fail + spi::Result result = getPersistenceProvider().deleteBucket( + spi::Bucket(bucketId, spi::PartitionId(0)), context); + CPPUNIT_ASSERT(result.hasError()); + CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O read") + != vespalib::string::npos); + + CPPUNIT_ASSERT(!env()._cache.contains(bucketId)); + +} + +} + +} diff --git a/memfilepersistence/src/tests/spi/buffer_test.cpp b/memfilepersistence/src/tests/spi/buffer_test.cpp new file mode 100644 index 00000000000..a2d917301fc --- /dev/null +++ b/memfilepersistence/src/tests/spi/buffer_test.cpp @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/memfilepersistence/mapper/buffer.h> + +namespace storage { +namespace memfile { + +class BufferTest : public CppUnit::TestFixture +{ +public: + void getSizeReturnsInitiallyAllocatedSize(); + void getSizeReturnsUnAlignedSizeForMMappedAllocs(); + void resizeRetainsExistingDataWhenSizingUp(); + void resizeRetainsExistingDataWhenSizingDown(); + void bufferAddressIs512ByteAligned(); + + CPPUNIT_TEST_SUITE(BufferTest); + CPPUNIT_TEST(getSizeReturnsInitiallyAllocatedSize); + CPPUNIT_TEST(getSizeReturnsUnAlignedSizeForMMappedAllocs); + CPPUNIT_TEST(resizeRetainsExistingDataWhenSizingUp); + CPPUNIT_TEST(resizeRetainsExistingDataWhenSizingDown); + CPPUNIT_TEST(bufferAddressIs512ByteAligned); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(BufferTest); + +void +BufferTest::getSizeReturnsInitiallyAllocatedSize() +{ + Buffer buf(1234); + CPPUNIT_ASSERT_EQUAL(size_t(1234), buf.getSize()); +} + +void +BufferTest::getSizeReturnsUnAlignedSizeForMMappedAllocs() +{ + Buffer buf(vespalib::MMapAlloc::HUGEPAGE_SIZE + 1); + CPPUNIT_ASSERT_EQUAL(size_t(vespalib::MMapAlloc::HUGEPAGE_SIZE + 1), + buf.getSize()); +} + +void +BufferTest::resizeRetainsExistingDataWhenSizingUp() +{ + std::string src = "hello world"; + Buffer buf(src.size()); + memcpy(buf.getBuffer(), src.data(), src.size()); + buf.resize(src.size() * 2); + CPPUNIT_ASSERT_EQUAL(src.size() * 2, buf.getSize()); + CPPUNIT_ASSERT_EQUAL(0, memcmp(buf.getBuffer(), src.data(), src.size())); +} + +void +BufferTest::resizeRetainsExistingDataWhenSizingDown() +{ + std::string src = "hello world"; + Buffer buf(src.size()); + memcpy(buf.getBuffer(), src.data(), src.size()); + buf.resize(src.size() / 2); + CPPUNIT_ASSERT_EQUAL(src.size() / 2, buf.getSize()); + CPPUNIT_ASSERT_EQUAL(0, memcmp(buf.getBuffer(), src.data(), src.size() / 2)); +} + +void +BufferTest::bufferAddressIs512ByteAligned() +{ + Buffer buf(32); + CPPUNIT_ASSERT(reinterpret_cast<size_t>(buf.getBuffer()) % 512 == 0); +} + +} // memfile +} // storage + diff --git a/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp b/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp new file mode 100644 index 00000000000..b59e8a32258 --- /dev/null +++ b/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/memfilepersistence/mapper/bufferedfilewriter.h> +#include <vespa/memfilepersistence/mapper/buffer.h> +#include <vespa/vespalib/io/fileutil.h> + +namespace storage { +namespace memfile { + +class BufferedFileWriterTest : public CppUnit::TestFixture +{ +public: + void noImplicitFlushingWhenDestructing(); + + CPPUNIT_TEST_SUITE(BufferedFileWriterTest); + CPPUNIT_TEST(noImplicitFlushingWhenDestructing); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(BufferedFileWriterTest); + +namespace { + +// Partial mock of vespalib::File. Unfortunately, there's currently no +// base interface to implement so have to override a class that already has +// implementation code present. +class MockFile : public vespalib::File +{ +public: + bool _didWrite; + + MockFile(const std::string& filename) + : File(filename), + _didWrite(false) + { + } + + void open(int flags, bool autoCreateDirectories) override { + (void) flags; + (void) autoCreateDirectories; + // Don't do anything here to prevent us from actually opening a file + // on disk. + } + + off_t write(const void *buf, size_t bufsize, off_t offset) override { + (void) buf; + (void) bufsize; + (void) offset; + _didWrite = true; + return 0; + } +}; + +} + +void +BufferedFileWriterTest::noImplicitFlushingWhenDestructing() +{ + MockFile file("foo"); + { + Buffer buffer(1024); + BufferedFileWriter writer(file, buffer, buffer.getSize()); + // Do a buffered write. This fits well within the buffer and should + // consequently not be immediately written out to the backing file. + writer.write("blarg", 5); + // Escape scope without having flushed anything. + } + // Since BufferedFileWriter is meant to be used with O_DIRECT files, + // flushing just implies writing rather than syncing (this is a half truth + // since you still sync directories etc to ensure metadata is written, but + // this constrained assumption works fine in the context of this test). + CPPUNIT_ASSERT(!file._didWrite); +} + +} // memfile +} // storage + diff --git a/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp b/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp new file mode 100644 index 00000000000..6fea98e3c8e --- /dev/null +++ b/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp @@ -0,0 +1,940 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <set> +#include <vector> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/simulatedfailurefile.h> +#include <tests/spi/options_builder.h> +#include <vespa/document/fieldset/fieldsets.h> + +namespace storage { +namespace memfile { +namespace { + spi::LoadType defaultLoadType(0, "default"); +} + +class IteratorHandlerTest : public SingleDiskMemFileTestUtils +{ + CPPUNIT_TEST_SUITE(IteratorHandlerTest); + CPPUNIT_TEST(testCreateIterator); + CPPUNIT_TEST(testSomeSlotsRemovedBetweenInvocations); + CPPUNIT_TEST(testAllSlotsRemovedBetweenInvocations); + CPPUNIT_TEST(testIterateMetadataOnly); + CPPUNIT_TEST(testIterateHeadersOnly); + CPPUNIT_TEST(testIterateLargeDocument); + CPPUNIT_TEST(testDocumentsRemovedBetweenInvocations); + CPPUNIT_TEST(testUnrevertableRemoveBetweenInvocations); + CPPUNIT_TEST(testUnrevertableRemoveBetweenInvocationsIncludeRemoves); + CPPUNIT_TEST(testMatchTimestampRangeDocAltered); + CPPUNIT_TEST(testIterateAllVersions); + CPPUNIT_TEST(testFieldSetFiltering); + CPPUNIT_TEST(testIteratorInactiveOnException); + CPPUNIT_TEST(testDocsCachedBeforeDocumentSelection); + CPPUNIT_TEST(testTimestampRangeLimitedPrefetch); + CPPUNIT_TEST(testCachePrefetchRequirements); + CPPUNIT_TEST(testBucketEvictedFromCacheOnIterateException); + CPPUNIT_TEST_SUITE_END(); + +public: + void testCreateIterator(); + void testSomeSlotsRemovedBetweenInvocations(); + void testAllSlotsRemovedBetweenInvocations(); + void testIterateMetadataOnly(); + void testIterateHeadersOnly(); + void testIterateLargeDocument(); + void testDocumentsRemovedBetweenInvocations(); + void testUnrevertableRemoveBetweenInvocations(); + void testUnrevertableRemoveBetweenInvocationsIncludeRemoves(); + void testMatchTimestampRangeDocAltered(); + void testIterateAllVersions(); + void testFieldSetFiltering(); + void testIteratorInactiveOnException(); + void testDocsCachedBeforeDocumentSelection(); + void testTimestampRangeLimitedPrefetch(); + void testCachePrefetchRequirements(); + void testBucketEvictedFromCacheOnIterateException(); + + void setUp(); + void tearDown(); + + struct Chunk + { + std::vector<spi::DocEntry::LP> _entries; + }; + +private: + spi::Selection createSelection(const std::string& docSel) const; + + + spi::CreateIteratorResult create( + const spi::Bucket& b, + const spi::Selection& sel, + spi::IncludedVersions versions = spi::NEWEST_DOCUMENT_ONLY, + const document::FieldSet& fieldSet = document::AllFields()) + { + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + return getPersistenceProvider().createIterator(b, fieldSet, sel, + versions, context); + } + + typedef std::pair<Document::SP, spi::Timestamp> DocAndTimestamp; + + std::vector<DocAndTimestamp> feedDocs(size_t numDocs, + uint32_t minSize = 110, + uint32_t maxSize = 110); + + std::vector<Chunk> doIterate(spi::IteratorId id, + uint64_t maxByteSize, + size_t maxChunks = 0, + bool allowEmptyResult = false); + + void verifyDocs(const std::vector<DocAndTimestamp>& wanted, + const std::vector<IteratorHandlerTest::Chunk>& chunks, + const std::set<vespalib::string>& removes + = std::set<vespalib::string>()) const; + + void doTestUnrevertableRemoveBetweenInvocations(bool includeRemoves); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(IteratorHandlerTest); + +void +IteratorHandlerTest::setUp() +{ + SingleDiskMemFileTestUtils::setUp(); +} + +void +IteratorHandlerTest::tearDown() +{ + SingleDiskMemFileTestUtils::tearDown(); +} + +spi::Selection +IteratorHandlerTest::createSelection(const std::string& docSel) const +{ + return spi::Selection(spi::DocumentSelection(docSel)); +} + +void +IteratorHandlerTest::testCreateIterator() +{ + spi::Bucket b(BucketId(16, 1234), spi::PartitionId(0)); + + spi::CreateIteratorResult iter1(create(b, createSelection("true"))); + CPPUNIT_ASSERT_EQUAL(spi::IteratorId(1), iter1.getIteratorId()); + + spi::CreateIteratorResult iter2(create(b, createSelection("true"))); + CPPUNIT_ASSERT_EQUAL(spi::IteratorId(2), iter2.getIteratorId()); +} + +std::vector<IteratorHandlerTest::Chunk> +IteratorHandlerTest::doIterate(spi::IteratorId id, + uint64_t maxByteSize, + size_t maxChunks, + bool allowEmptyResult) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + std::vector<Chunk> chunks; + + while (true) { + std::vector<spi::DocEntry::LP> entries; + + spi::IterateResult result(getPersistenceProvider().iterate( + id, maxByteSize, context)); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + CPPUNIT_ASSERT(result.getEntries().size() > 0 || allowEmptyResult); + + for (size_t i = 0; i < result.getEntries().size(); ++i) { + entries.push_back(result.getEntries()[i]); + } + chunks.push_back(Chunk()); + chunks.back()._entries.swap(entries); + if (result.isCompleted() + || (maxChunks != 0 && chunks.size() >= maxChunks)) + { + break; + } + } + return chunks; +} + +namespace { + +size_t +getDocCount(const std::vector<IteratorHandlerTest::Chunk>& chunks) +{ + size_t count = 0; + for (size_t i=0; i<chunks.size(); ++i) { + count += chunks[i]._entries.size(); + } + return count; +} + +size_t +getRemoveEntryCount(const std::vector<spi::DocEntry::LP>& entries) +{ + size_t ret = 0; + for (size_t i = 0; i < entries.size(); ++i) { + if (entries[i]->isRemove()) { + ++ret; + } + } + return ret; +} + +struct DocEntryIndirectTimestampComparator +{ + bool operator()(const spi::DocEntry::LP& e1, + const spi::DocEntry::LP& e2) const + { + return e1->getTimestamp() < e2->getTimestamp(); + } +}; + +std::vector<spi::DocEntry::LP> +getEntriesFromChunks(const std::vector<IteratorHandlerTest::Chunk>& chunks) +{ + std::vector<spi::DocEntry::LP> ret; + for (size_t chunk = 0; chunk < chunks.size(); ++chunk) { + for (size_t i = 0; i < chunks[chunk]._entries.size(); ++i) { + ret.push_back(chunks[chunk]._entries[i]); + } + } + std::sort(ret.begin(), + ret.end(), + DocEntryIndirectTimestampComparator()); + return ret; +} + +const vespalib::LazyFile& +getFileHandle(const MemFile& mf1) +{ + return static_cast<const SimpleMemFileIOBuffer&>( + mf1.getMemFileIO()).getFileHandle(); +} + +const LoggingLazyFile& +getLoggerFile(const MemFile& file) +{ + return dynamic_cast<const LoggingLazyFile&>(getFileHandle(file)); +} + +} + +void +IteratorHandlerTest::verifyDocs(const std::vector<DocAndTimestamp>& wanted, + const std::vector<IteratorHandlerTest::Chunk>& chunks, + const std::set<vespalib::string>& removes) const +{ + std::vector<spi::DocEntry::LP> retrieved( + getEntriesFromChunks(chunks)); + size_t removeCount = getRemoveEntryCount(retrieved); + // Ensure that we've got the correct number of puts and removes + CPPUNIT_ASSERT_EQUAL(removes.size(), removeCount); + CPPUNIT_ASSERT_EQUAL(wanted.size(), retrieved.size() - removeCount); + + size_t wantedIdx = 0; + for (size_t i = 0; i < retrieved.size(); ++i) { + spi::DocEntry& entry(*retrieved[i]); + if (entry.getDocument() != 0) { + if (!(*wanted[wantedIdx].first == *entry.getDocument())) { + std::ostringstream ss; + ss << "Documents differ! Wanted:\n" + << wanted[wantedIdx].first->toString(true) + << "\n\nGot:\n" + << entry.getDocument()->toString(true); + CPPUNIT_FAIL(ss.str()); + } + CPPUNIT_ASSERT_EQUAL(wanted[wantedIdx].second, entry.getTimestamp()); + CPPUNIT_ASSERT_EQUAL(wanted[wantedIdx].first->serialize()->getLength() + + sizeof(spi::DocEntry), + size_t(entry.getSize())); + ++wantedIdx; + } else { + // Remove-entry + CPPUNIT_ASSERT(entry.getDocumentId() != 0); + CPPUNIT_ASSERT_EQUAL(entry.getDocumentId()->getSerializedSize() + + sizeof(spi::DocEntry), + size_t(entry.getSize())); + if (removes.find(entry.getDocumentId()->toString()) == removes.end()) { + std::ostringstream ss; + ss << "Got unexpected remove entry for document id " + << *entry.getDocumentId(); + CPPUNIT_FAIL(ss.str()); + } + } + } +} + +// Feed numDocs documents, starting from timestamp 1000 +std::vector<IteratorHandlerTest::DocAndTimestamp> +IteratorHandlerTest::feedDocs(size_t numDocs, + uint32_t minSize, + uint32_t maxSize) +{ + std::vector<DocAndTimestamp> docs; + for (uint32_t i = 0; i < numDocs; ++i) { + docs.push_back( + DocAndTimestamp( + doPut(4, + framework::MicroSecTime(1000 + i), + minSize, + maxSize), + spi::Timestamp(1000 + i))); + } + flush(document::BucketId(16, 4)); + return docs; +} + +void +IteratorHandlerTest::testSomeSlotsRemovedBetweenInvocations() +{ + std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096); + + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + spi::Selection sel(createSelection("true")); + + spi::CreateIteratorResult iter(create(b, sel)); + CPPUNIT_ASSERT(env()._cache.contains(b.getBucketId())); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 10000, 25); + CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); + + { + MemFilePtr file(getMemFile(b.getBucketId())); + + for (int i = 0 ; i < 2; ++i) { + const MemSlot* slot = file->getSlotWithId(docs.front().first->getId()); + CPPUNIT_ASSERT(slot != 0); + file->removeSlot(*slot); + docs.erase(docs.begin()); + } + file->flushToDisk(); + } + + std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 10000); + CPPUNIT_ASSERT_EQUAL(size_t(24), chunks2.size()); + std::copy(chunks2.begin(), + chunks2.end(), + std::back_insert_iterator<std::vector<Chunk> >(chunks)); + + verifyDocs(docs, chunks); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + + // Bucket should not be evicted from cache during normal operation. + CPPUNIT_ASSERT(env()._cache.contains(b.getBucketId())); +} + +void +IteratorHandlerTest::testAllSlotsRemovedBetweenInvocations() +{ + std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096); + + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + spi::Selection sel(createSelection("true")); + + spi::CreateIteratorResult iter(create(b, sel)); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25); + CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); + + { + MemFilePtr file(getMemFile(b.getBucketId())); + + for (int i = 0 ; i < 75; ++i) { + const MemSlot* slot = file->getSlotWithId(docs[i].first->getId()); + CPPUNIT_ASSERT(slot != 0); + file->removeSlot(*slot); + } + file->flushToDisk(); + docs.erase(docs.begin(), docs.begin() + 75); + } + + std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1, 0, true); + CPPUNIT_ASSERT_EQUAL(size_t(0), getDocCount(chunks2)); + verifyDocs(docs, chunks); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::testIterateMetadataOnly() +{ + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + std::vector<DocAndTimestamp> docs = feedDocs(10); + + CPPUNIT_ASSERT( + doUnrevertableRemove(b.getBucketId(), + docs[docs.size() - 2].first->getId(), + Timestamp(1008))); + + CPPUNIT_ASSERT( + doRemove(b.getBucketId(), + docs[docs.size() - 1].first->getId(), + framework::MicroSecTime(3001), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + + flush(b.getBucketId()); + + spi::Selection sel(createSelection("true")); + spi::CreateIteratorResult iter( + create(b, sel, spi::NEWEST_DOCUMENT_OR_REMOVE, document::NoFields())); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); + std::vector<spi::DocEntry::LP> entries = getEntriesFromChunks(chunks); + CPPUNIT_ASSERT_EQUAL(docs.size(), entries.size()); + std::vector<DocAndTimestamp>::const_iterator docIter( + docs.begin()); + for (size_t i = 0; i < entries.size(); ++i, ++docIter) { + const spi::DocEntry& entry = *entries[i]; + + CPPUNIT_ASSERT(entry.getDocument() == 0); + CPPUNIT_ASSERT(entry.getDocumentId() == 0); + if (i == 9) { + CPPUNIT_ASSERT(entry.isRemove()); + CPPUNIT_ASSERT_EQUAL(spi::Timestamp(3001), entry.getTimestamp()); + } else if (i == 8) { + CPPUNIT_ASSERT(entry.isRemove()); + CPPUNIT_ASSERT_EQUAL(spi::Timestamp(1008), entry.getTimestamp()); + } else { + CPPUNIT_ASSERT(!entry.isRemove()); + CPPUNIT_ASSERT_EQUAL(docIter->second, entry.getTimestamp()); + } + } + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::testIterateHeadersOnly() +{ + std::vector<DocAndTimestamp> docs = feedDocs(20); + // Remove all bodies. + for (size_t i = 0; i < docs.size(); ++i) { + clearBody(*docs[i].first); + } + + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + spi::Selection sel(createSelection("true")); + + spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY, + document::HeaderFields())); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1024); + verifyDocs(docs, chunks); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::testIterateLargeDocument() +{ + std::vector<DocAndTimestamp> docs = feedDocs(10, 10000, 10000); + std::vector<DocAndTimestamp> largedoc; + largedoc.push_back(docs.back()); + + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + spi::Selection sel(createSelection("true")); + + spi::CreateIteratorResult iter(create(b, sel)); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 100, 1); + verifyDocs(largedoc, chunks); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::testDocumentsRemovedBetweenInvocations() +{ + int docCount = 100; + std::vector<DocAndTimestamp> docs = feedDocs(docCount); + + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + spi::Selection sel(createSelection("true")); + + spi::CreateIteratorResult iter(create(b, sel)); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25); + CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); + + // Remove a subset of the documents. We should still get all the + // original documents from the iterator, assuming no compactions. + std::vector<DocumentId> removedDocs; + std::vector<DocAndTimestamp> nonRemovedDocs; + for (int i = 0; i < docCount; ++i) { + if (i % 3 == 0) { + removedDocs.push_back(docs[i].first->getId()); + CPPUNIT_ASSERT(doRemove(b.getBucketId(), + removedDocs.back(), + framework::MicroSecTime(2000 + i), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + } else { + nonRemovedDocs.push_back(docs[i]); + } + } + flush(b.getBucketId()); + + std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1); + CPPUNIT_ASSERT_EQUAL(size_t(75), chunks2.size()); + std::copy(chunks2.begin(), + chunks2.end(), + std::back_insert_iterator<std::vector<Chunk> >(chunks)); + + verifyDocs(docs, chunks); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::doTestUnrevertableRemoveBetweenInvocations(bool includeRemoves) +{ + int docCount = 100; + std::vector<DocAndTimestamp> docs = feedDocs(docCount); + + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + spi::Selection sel(createSelection("true")); + spi::CreateIteratorResult iter( + create(b, sel, + includeRemoves ? + spi::NEWEST_DOCUMENT_OR_REMOVE : spi::NEWEST_DOCUMENT_ONLY)); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25); + CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); + + // Remove a subset of the documents unrevertably. + std::vector<DocumentId> removedDocs; + std::vector<DocAndTimestamp> nonRemovedDocs; + for (int i = 0; i < docCount - 25; ++i) { + if (i < 10) { + removedDocs.push_back(docs[i].first->getId()); + CPPUNIT_ASSERT( + doUnrevertableRemove(b.getBucketId(), + removedDocs.back(), + Timestamp(1000+i))); + } else { + nonRemovedDocs.push_back(docs[i]); + } + } + flush(b.getBucketId()); + + std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1); + std::vector<spi::DocEntry::LP> entries = getEntriesFromChunks(chunks2); + if (!includeRemoves) { + CPPUNIT_ASSERT_EQUAL(nonRemovedDocs.size(), chunks2.size()); + verifyDocs(nonRemovedDocs, chunks2); + } else { + CPPUNIT_ASSERT_EQUAL(size_t(75), entries.size()); + for (int i = 0; i < docCount - 25; ++i) { + spi::DocEntry& entry(*entries[i]); + if (i < 10) { + CPPUNIT_ASSERT(entry.isRemove()); + } else { + CPPUNIT_ASSERT(!entry.isRemove()); + } + } + } + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::testUnrevertableRemoveBetweenInvocations() +{ + doTestUnrevertableRemoveBetweenInvocations(false); +} + +void +IteratorHandlerTest::testUnrevertableRemoveBetweenInvocationsIncludeRemoves() +{ + doTestUnrevertableRemoveBetweenInvocations(true); +} + +void +IteratorHandlerTest::testMatchTimestampRangeDocAltered() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucketId(16, 4); + document::StringFieldValue updateValue1("update1"); + document::StringFieldValue updateValue2("update2"); + + Document::SP originalDoc = doPut(4, Timestamp(1234)); + + { + document::DocumentUpdate::SP update = createBodyUpdate( + originalDoc->getId(), updateValue1); + + spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(2345)); + CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp()); + } + + { + document::DocumentUpdate::SP update = createBodyUpdate( + originalDoc->getId(), updateValue2); + + spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(3456)); + CPPUNIT_ASSERT_EQUAL(2345, (int)result.getExistingTimestamp()); + } + + CPPUNIT_ASSERT( + doRemove(bucketId, + originalDoc->getId(), + Timestamp(4567), + OperationHandler::PERSIST_REMOVE_IF_FOUND)); + flush(bucketId); + + spi::Bucket b(bucketId, spi::PartitionId(0)); + + { + spi::Selection sel(createSelection("true")); + sel.setFromTimestamp(spi::Timestamp(0)); + sel.setToTimestamp(spi::Timestamp(10)); + spi::CreateIteratorResult iter(create(b, sel)); + + spi::IterateResult result(getPersistenceProvider().iterate( + iter.getIteratorId(), 4096, context)); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + CPPUNIT_ASSERT_EQUAL(size_t(0), result.getEntries().size()); + CPPUNIT_ASSERT(result.isCompleted()); + + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + } + + { + spi::Selection sel(createSelection("true")); + sel.setFromTimestamp(spi::Timestamp(10000)); + sel.setToTimestamp(spi::Timestamp(20000)); + spi::CreateIteratorResult iter(create(b, sel)); + + spi::IterateResult result(getPersistenceProvider().iterate( + iter.getIteratorId(), 4096, context)); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + CPPUNIT_ASSERT_EQUAL(size_t(0), result.getEntries().size()); + CPPUNIT_ASSERT(result.isCompleted()); + + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + } + + { + spi::Selection sel(createSelection("true")); + sel.setFromTimestamp(spi::Timestamp(0)); + sel.setToTimestamp(spi::Timestamp(1234)); + spi::CreateIteratorResult iter(create(b, sel)); + + spi::IterateResult result(getPersistenceProvider().iterate( + iter.getIteratorId(), 4096, context)); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size()); + CPPUNIT_ASSERT(result.isCompleted()); + + const Document& receivedDoc(*result.getEntries()[0]->getDocument()); + if (!(*originalDoc == receivedDoc)) { + std::ostringstream ss; + ss << "Documents differ! Wanted:\n" + << originalDoc->toString(true) + << "\n\nGot:\n" + << receivedDoc.toString(true); + CPPUNIT_FAIL(ss.str()); + } + + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + } + + { + spi::Selection sel(createSelection("true")); + sel.setFromTimestamp(spi::Timestamp(0)); + sel.setToTimestamp(spi::Timestamp(2345)); + spi::CreateIteratorResult iter(create(b, sel)); + + spi::IterateResult result(getPersistenceProvider().iterate( + iter.getIteratorId(), 4096, context)); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size()); + CPPUNIT_ASSERT(result.isCompleted()); + + const Document& receivedDoc(*result.getEntries()[0]->getDocument()); + CPPUNIT_ASSERT(receivedDoc.getValue("content").get()); + CPPUNIT_ASSERT_EQUAL(updateValue1, + dynamic_cast<document::StringFieldValue&>( + *receivedDoc.getValue( + "content"))); + + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + } + + { + spi::Selection sel(createSelection("true")); + sel.setFromTimestamp(spi::Timestamp(0)); + sel.setToTimestamp(spi::Timestamp(3456)); + spi::CreateIteratorResult iter(create(b, sel)); + + spi::IterateResult result(getPersistenceProvider().iterate( + iter.getIteratorId(), 4096, context)); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size()); + CPPUNIT_ASSERT(result.isCompleted()); + + const Document& receivedDoc(*result.getEntries()[0]->getDocument()); + CPPUNIT_ASSERT(receivedDoc.getValue("content").get()); + CPPUNIT_ASSERT_EQUAL(updateValue2, + dynamic_cast<document::StringFieldValue&>( + *receivedDoc.getValue( + "content"))); + + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + } +} + +void +IteratorHandlerTest::testIterateAllVersions() +{ + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + std::vector<DocAndTimestamp> docs; + + Document::SP originalDoc(createRandomDocumentAtLocation( + 4, 1001, 110, 110)); + + doPut(originalDoc, framework::MicroSecTime(1001), 0); + + document::StringFieldValue updateValue1("update1"); + { + document::DocumentUpdate::SP update = createBodyUpdate( + originalDoc->getId(), updateValue1); + + spi::UpdateResult result = doUpdate(b.getBucketId(), update, Timestamp(2345)); + CPPUNIT_ASSERT_EQUAL(1001, (int)result.getExistingTimestamp()); + } + flush(b.getBucketId()); + + Document::SP updatedDoc(new Document(*originalDoc)); + updatedDoc->setValue("content", document::StringFieldValue("update1")); + docs.push_back(DocAndTimestamp(originalDoc, spi::Timestamp(1001))); + docs.push_back(DocAndTimestamp(updatedDoc, spi::Timestamp(2345))); + + spi::Selection sel(createSelection("true")); + spi::CreateIteratorResult iter(create(b, sel, spi::ALL_VERSIONS)); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); + verifyDocs(docs, chunks); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::testFieldSetFiltering() +{ + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + Document::SP doc(createRandomDocumentAtLocation( + 4, 1001, 110, 110)); + doc->setValue(doc->getField("headerval"), document::IntFieldValue(42)); + doc->setValue(doc->getField("hstringval"), + document::StringFieldValue("groovy, baby!")); + doc->setValue(doc->getField("content"), + document::StringFieldValue("fancy content")); + doPut(doc, framework::MicroSecTime(1001), 0); + flush(b.getBucketId()); + + document::FieldSetRepo repo; + spi::Selection sel(createSelection("true")); + spi::CreateIteratorResult iter( + create(b, sel, spi::NEWEST_DOCUMENT_ONLY, + *repo.parse(*getTypeRepo(), "testdoctype1:hstringval,content"))); + std::vector<spi::DocEntry::LP> entries( + getEntriesFromChunks(doIterate(iter.getIteratorId(), 4096))); + CPPUNIT_ASSERT_EQUAL(size_t(1), entries.size()); + CPPUNIT_ASSERT_EQUAL(std::string("content: fancy content\n" + "hstringval: groovy, baby!\n"), + stringifyFields(*entries[0]->getDocument())); +} + +void +IteratorHandlerTest::testIteratorInactiveOnException() +{ + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + feedDocs(10); + + env()._cache.clear(); + + simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1)); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + spi::CreateIteratorResult iter(create(b, createSelection("true"))); + spi::IterateResult result(getPersistenceProvider().iterate( + iter.getIteratorId(), 100000, context)); + CPPUNIT_ASSERT(result.hasError()); + // Check that iterator is marked as inactive + const SharedIteratorHandlerState& state( + getPersistenceProvider().getIteratorHandler().getState()); + CPPUNIT_ASSERT(state._iterators.find(iter.getIteratorId().getValue()) + != state._iterators.end()); + CPPUNIT_ASSERT(state._iterators.find(iter.getIteratorId().getValue()) + ->second.isActive() == false); + + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); +} + +void +IteratorHandlerTest::testDocsCachedBeforeDocumentSelection() +{ + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096); + + env()._cache.clear(); + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options).maximumReadThroughGap(1024*1024).build()); + env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( + new LoggingLazyFile::Factory()); + + spi::Selection sel(createSelection("id.user=4")); + spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY, + document::BodyFields())); + + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + { + MemFilePtr file(getMemFile(b.getBucketId())); + // Should have 3 read ops; metadata, (precached) headers and bodies + CPPUNIT_ASSERT_EQUAL(size_t(3), + getLoggerFile(*file).operations.size()); + } +} + +void +IteratorHandlerTest::testTimestampRangeLimitedPrefetch() +{ + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + // Feed docs with timestamp range [1000, 1100) + feedDocs(100, 4096, 4096); + + env()._cache.clear(); + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options).maximumReadThroughGap(512).build()); + env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( + new LoggingLazyFile::Factory()); + + spi::Selection sel(createSelection("id.user=4")); + sel.setFromTimestamp(spi::Timestamp(1050)); + sel.setToTimestamp(spi::Timestamp(1059)); + spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY, + document::BodyFields())); + std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); + CPPUNIT_ASSERT_EQUAL(size_t(10), getDocCount(chunks)); + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); + // Iterate over all slots, ensuring that only those that fall within the + // timestamp range have actually been cached. + { + MemFilePtr file(getMemFile(b.getBucketId())); + // Should have 3 read ops; metadata, (precached) headers and bodies + CPPUNIT_ASSERT_EQUAL(size_t(3), + getLoggerFile(*file).operations.size()); + for (size_t i = 0; i < file->getSlotCount(); ++i) { + const MemSlot& slot((*file)[i]); + if (slot.getTimestamp() >= Timestamp(1050) + && slot.getTimestamp() <= Timestamp(1059)) + { + CPPUNIT_ASSERT(file->partAvailable(slot, HEADER)); + CPPUNIT_ASSERT(file->partAvailable(slot, BODY)); + } else { + CPPUNIT_ASSERT(!file->partAvailable(slot, HEADER)); + CPPUNIT_ASSERT(!file->partAvailable(slot, BODY)); + } + } + } +} + +void +IteratorHandlerTest::testCachePrefetchRequirements() +{ + document::select::Parser parser( + env().repo(), env()._bucketFactory); + { + // No prefetch required. + // NOTE: since stuff like id.user=1234 won't work, we have to handle + // that explicitly in createIterator based on the assumption that a + // non-empty document selection at _least_ requires header to be read. + std::unique_ptr<document::select::Node> sel( + parser.parse("true")); + CachePrefetchRequirements req( + CachePrefetchRequirements::createFromSelection(env().repo(), + *sel)); + CPPUNIT_ASSERT(!req.isHeaderPrefetchRequired()); + CPPUNIT_ASSERT(!req.isBodyPrefetchRequired()); + } + + { + // Header prefetch required. + std::unique_ptr<document::select::Node> sel( + parser.parse("testdoctype1.hstringval='blarg'")); + CachePrefetchRequirements req( + CachePrefetchRequirements::createFromSelection(env().repo(), + *sel)); + CPPUNIT_ASSERT(req.isHeaderPrefetchRequired()); + CPPUNIT_ASSERT(!req.isBodyPrefetchRequired()); + } + + { + // Body prefetch required. + std::unique_ptr<document::select::Node> sel( + parser.parse("testdoctype1.content='foobar'")); + CachePrefetchRequirements req( + CachePrefetchRequirements::createFromSelection(env().repo(), + *sel)); + CPPUNIT_ASSERT(!req.isHeaderPrefetchRequired()); + CPPUNIT_ASSERT(req.isBodyPrefetchRequired()); + } +} + +void +IteratorHandlerTest::testBucketEvictedFromCacheOnIterateException() +{ + spi::Bucket b(BucketId(16, 4), spi::PartitionId(0)); + feedDocs(10); + env()._cache.clear(); + + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + spi::CreateIteratorResult iter(create(b, createSelection("true"))); + simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1)); + spi::IterateResult result(getPersistenceProvider().iterate( + iter.getIteratorId(), 100000, context)); + CPPUNIT_ASSERT(result.hasError()); + + // This test is actually a bit disingenuous since calling iterate will + // implicitly invoke maintain() on an IO exception, which will subsequently + // evict the bucket due to the exception happening again in its context. + CPPUNIT_ASSERT(!env()._cache.contains(b.getBucketId())); +} + +} +} diff --git a/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp new file mode 100644 index 00000000000..78601b461ab --- /dev/null +++ b/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp @@ -0,0 +1,504 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> + +#include <vespa/document/datatype/documenttype.h> +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/simulatedfailurefile.h> +#include <vespa/vdstestlib/cppunit/macros.h> + +using document::DocumentType; + +namespace storage { +namespace memfile { +namespace { + spi::LoadType defaultLoadType(0, "default"); +} + +class JoinOperationHandlerTest : public MemFileTestUtils +{ + CPPUNIT_TEST_SUITE(JoinOperationHandlerTest); + CPPUNIT_TEST(testSimple); + CPPUNIT_TEST(testTargetExists); + CPPUNIT_TEST(testTargetWithOverlap); + CPPUNIT_TEST(testMultiDisk); + CPPUNIT_TEST(testMultiDiskFlushed); + CPPUNIT_TEST(testInternalJoin); + CPPUNIT_TEST(testInternalJoinDiskFull); + CPPUNIT_TEST(testTargetIoWriteExceptionEvictsTargetFromCache); + CPPUNIT_TEST(test1stSourceIoReadExceptionEvictsSourceFromCache); + CPPUNIT_TEST(test2ndSourceExceptionEvictsExistingTargetFromCache); + CPPUNIT_TEST_SUITE_END(); + +public: + void testSimple(); + void testTargetExists(); + void testTargetWithOverlap(); + void testMultiDisk(); + void testMultiDiskFlushed(); + void testInternalJoin(); + void testInternalJoinDiskFull(); + void testTargetIoWriteExceptionEvictsTargetFromCache(); + void test1stSourceIoReadExceptionEvictsSourceFromCache(); + void test2ndSourceExceptionEvictsExistingTargetFromCache(); + + void insertDocumentInBucket(uint64_t location, + Timestamp timestamp, + document::BucketId bucket); + +private: + void feedSingleDisk(); + void feedMultiDisk(); + std::string getStandardMemFileStatus(uint32_t disk = 0); + + spi::Result doJoin(const document::BucketId to, + const document::BucketId from1, + const document::BucketId from2); +}; + +namespace { + +document::BucketId TARGET = document::BucketId(15, 4); +document::BucketId SOURCE1 = document::BucketId(16, 4); +document::BucketId SOURCE2 = document::BucketId(16, (uint64_t)4 | ((uint64_t)1 << 15)); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(JoinOperationHandlerTest); + +void +JoinOperationHandlerTest::feedSingleDisk() +{ + for (uint32_t i = 0; i < 100; i++) { + std::ostringstream ost; + ost << "userdoc:storage_test:1234:" << i; + const DocumentType& type( + *getTypeRepo()->getDocumentType("testdoctype1")); + document::Document::SP doc( + new document::Document(type, document::DocumentId(ost.str()))); + + document::BucketId bucket( + getBucketIdFactory().getBucketId(doc->getId())); + bucket.setUsedBits(33); + doPut(doc, Timestamp(1000 + i), 0, 33); + flush(bucket); + } +} + +void +JoinOperationHandlerTest::feedMultiDisk() +{ + for (uint32_t i = 0; i < 100; i += 2) { + doPutOnDisk(7, 4 | (1 << 15), Timestamp(1000 + i)); + } + flush(SOURCE2); + + for (uint32_t i = 1; i < 100; i += 2) { + doPutOnDisk(4, 4, Timestamp(1000 + i)); + } + flush(SOURCE1); + + { + MemFilePtr file(getMemFile(SOURCE1, 4)); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(4, (int)file->getDisk()); + } + + { + MemFilePtr file(getMemFile(SOURCE2, 7)); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(7, (int)file->getDisk()); + } +} + +std::string +JoinOperationHandlerTest::getStandardMemFileStatus(uint32_t disk) +{ + std::ostringstream ost; + + ost << getMemFileStatus(TARGET, disk) << "\n" + << getMemFileStatus(SOURCE1, disk ) << "\n" + << getMemFileStatus(SOURCE2, disk) << "\n"; + + return ost.str(); +} + +void +JoinOperationHandlerTest::insertDocumentInBucket( + uint64_t location, + Timestamp timestamp, + document::BucketId bucket) +{ + Document::SP doc( + createRandomDocumentAtLocation( + location, timestamp.getTime(), 100, 100)); + doPut(doc, bucket, timestamp); +} + +spi::Result +JoinOperationHandlerTest::doJoin(const document::BucketId to, + const document::BucketId from1, + const document::BucketId from2) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + return getPersistenceProvider().join( + spi::Bucket(from1, spi::PartitionId(0)), + spi::Bucket(from2, spi::PartitionId(0)), + spi::Bucket(to, spi::PartitionId(0)), + context); +} + +void +JoinOperationHandlerTest::testSimple() +{ + setupDisks(1); + feedSingleDisk(); + + { + MemFilePtr file(getMemFile(document::BucketId(33, 1234))); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + } + + { + MemFilePtr file(getMemFile(document::BucketId(33, (uint64_t)1234 | ((uint64_t)1 << 32)))); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + } + + spi::Result result = + doJoin(document::BucketId(32, 1234), + document::BucketId(33, 1234), + document::BucketId(33, (uint64_t)1234 | ((uint64_t)1 << 32))); + + { + MemFilePtr file(getMemFile(document::BucketId(32, (uint64_t)1234))); + CPPUNIT_ASSERT_EQUAL(100, (int)file->getSlotCount()); + CPPUNIT_ASSERT(!file->slotsAltered()); + } +} + +void +JoinOperationHandlerTest::testTargetExists() +{ + setupDisks(1); + + for (uint32_t i = 0; i < 100; i += 2) { + doPut(4 | (1 << 15), Timestamp(1000 + i)); + } + flush(SOURCE2); + + for (uint32_t i = 1; i < 100; i += 2) { + doPut(4, Timestamp(1000 + i)); + } + flush(SOURCE1); + + for (uint32_t i = 0; i < 100; i++) { + uint32_t location = 4; + if (i % 2 == 0) { + location |= (1 << 15); + } + + insertDocumentInBucket(location, Timestamp(500 + i), TARGET); + } + flush(TARGET); + + doJoin(TARGET, SOURCE1, SOURCE2); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x3c00000000000004): 200,0\n" + "BucketId(0x4000000000000004): 0,0\n" + "BucketId(0x4000000000008004): 0,0\n"), + getStandardMemFileStatus()); +} + +void +JoinOperationHandlerTest::testTargetWithOverlap() +{ + setupDisks(1); + + for (uint32_t i = 0; i < 100; i += 2) { + doPut(4 | (1 << 15), Timestamp(1000 + i)); + } + flush(SOURCE2); + + for (uint32_t i = 1; i < 100; i += 2) { + doPut(4, Timestamp(1000 + i)); + } + flush(SOURCE1); + + for (uint32_t i = 0; i < 100; i++) { + uint32_t location = 4; + if (i % 2 == 0) { + location |= (1 << 15); + } + + insertDocumentInBucket(location, Timestamp(950 + i), TARGET); + } + flush(TARGET); + + doJoin(TARGET, SOURCE1, SOURCE2); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x3c00000000000004): 150,0\n" + "BucketId(0x4000000000000004): 0,0\n" + "BucketId(0x4000000000008004): 0,0\n"), + getStandardMemFileStatus()); +} + +void +JoinOperationHandlerTest::testMultiDisk() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + setupDisks(10); + feedMultiDisk(); + + getPersistenceProvider().join(spi::Bucket(SOURCE2, spi::PartitionId(7)), + spi::Bucket(SOURCE1, spi::PartitionId(4)), + spi::Bucket(TARGET, spi::PartitionId(3)), + context); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x3c00000000000004): 100,3\n" + "BucketId(0x4000000000000004): 0,0\n" + "BucketId(0x4000000000008004): 0,0\n"), + getStandardMemFileStatus()); +} + +void +JoinOperationHandlerTest::testMultiDiskFlushed() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + setupDisks(10); + feedMultiDisk(); + + // Flush everything to disk, to check that we can join even + // if it's not in cache before. + env()._cache.flushDirtyEntries(); + env()._cache.clear(); + + getPersistenceProvider().join(spi::Bucket(SOURCE2, spi::PartitionId(7)), + spi::Bucket(SOURCE1, spi::PartitionId(4)), + spi::Bucket(TARGET, spi::PartitionId(3)), + context); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x3c00000000000004): 100,3\n" + "BucketId(0x4000000000000004): 0,3\n" + "BucketId(0x4000000000008004): 0,3\n"), + getStandardMemFileStatus(3)); +} + +void +JoinOperationHandlerTest::testInternalJoin() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + setupDisks(10); + + for (uint32_t i = 4; i < 6; i++) { + for (uint32_t j = 0; j < 10; j++) { + uint32_t location = 4; + doPutOnDisk(i, location, Timestamp(i * 1000 + j)); + } + flush(document::BucketId(16, 4), i); + env()._cache.clear(); + } + + std::string fileName1 = + env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[4]); + std::string fileName2 = + env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[5]); + + CPPUNIT_ASSERT(vespalib::stat(fileName1).get()); + vespalib::FileInfo::UP file2(vespalib::stat(fileName2)); + + CPPUNIT_ASSERT(file2.get()); + CPPUNIT_ASSERT(file2->_size > 0); + + PartitionMonitor* mon = env().getDirectory(5).getPartition().getMonitor(); + // Set disk under 80% full. Over 80%, we shouldn't move buckets to the target. + mon->setStatOncePolicy(); + mon->overrideRealStat(512, 100000, 50000); + CPPUNIT_ASSERT(!mon->isFull(0, .80f)); + + getPersistenceProvider().join(spi::Bucket(SOURCE1, spi::PartitionId(4)), + spi::Bucket(SOURCE1, spi::PartitionId(4)), + spi::Bucket(SOURCE1, spi::PartitionId(5)), + context); + + env()._cache.clear(); + + CPPUNIT_ASSERT(!vespalib::stat(fileName1).get()); + CPPUNIT_ASSERT(vespalib::stat(fileName2).get()); +} + +void +JoinOperationHandlerTest::testInternalJoinDiskFull() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + setupDisks(10); + + for (uint32_t i = 4; i < 6; i++) { + for (uint32_t j = 0; j < 10; j++) { + uint32_t location = 4; + doPutOnDisk(i, location, Timestamp(i * 1000 + j)); + } + flush(document::BucketId(16, 4), i); + env()._cache.clear(); + } + + std::string fileName1 = + env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[4]); + std::string fileName2 = + env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[5]); + + CPPUNIT_ASSERT(vespalib::stat(fileName1).get()); + vespalib::FileInfo::UP file2(vespalib::stat(fileName2)); + + CPPUNIT_ASSERT(file2.get()); + CPPUNIT_ASSERT(file2->_size > 0); + + PartitionMonitor* mon = env().getDirectory(5).getPartition().getMonitor(); + // Set disk to 81% full. Over 80%, we shouldn't move buckets to the target. + mon->setStatOncePolicy(); + mon->overrideRealStat(512, 100000, 81000); + CPPUNIT_ASSERT(!mon->isFull()); + CPPUNIT_ASSERT(mon->isFull(0, .08f)); + + spi::Result result = + getPersistenceProvider().join(spi::Bucket(SOURCE1, spi::PartitionId(4)), + spi::Bucket(SOURCE1, spi::PartitionId(4)), + spi::Bucket(SOURCE1, spi::PartitionId(5)), + context); + + CPPUNIT_ASSERT(result.hasError()); +} + +void +JoinOperationHandlerTest::testTargetIoWriteExceptionEvictsTargetFromCache() +{ + setupDisks(1); + feedSingleDisk(); + + document::BucketId src1(33, 1234); + document::BucketId src2(33, 1234ULL | (1ULL << 32)); + document::BucketId target(32, 1234); + + CPPUNIT_ASSERT(env()._cache.contains(src1)); + CPPUNIT_ASSERT(env()._cache.contains(src2)); + CPPUNIT_ASSERT(!env()._cache.contains(target)); + + // Reading existing (fully cached) files will go fine, but writing + // new file will not. + simulateIoErrorsForSubsequentlyOpenedFiles(); + + spi::Result result = doJoin(target, src1, src2); + CPPUNIT_ASSERT(result.hasError()); + CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") + != vespalib::string::npos); + + CPPUNIT_ASSERT(!env()._cache.contains(target)); + // NOTE: since we end up renaming src1 -> target during the first + // iteration of join, src1 will actually be empty. This should not + // matter since the service layer will query the bucket info for + // all these afterwards and will thus pick up on this automatically. + unSimulateIoErrorsForSubsequentlyOpenedFiles(); + { + MemFilePtr file(getMemFile(src1)); + CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount()); + CPPUNIT_ASSERT(!file->slotsAltered()); + } + { + MemFilePtr file(getMemFile(src2)); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + CPPUNIT_ASSERT(!file->slotsAltered()); + } + { + MemFilePtr file(getMemFile(target)); + // Renamed from src1 + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + CPPUNIT_ASSERT(!file->slotsAltered()); + } +} + +void +JoinOperationHandlerTest::test1stSourceIoReadExceptionEvictsSourceFromCache() +{ + setupDisks(1); + feedSingleDisk(); + + document::BucketId src1(33, 1234); + document::BucketId src2(33, 1234ULL | (1ULL << 32)); + document::BucketId target(32, 1234); + + env()._cache.clear(); + // Allow for reading in initial metadata so that loadFile itself doesn't + // fail. This could otherwise cause a false negative since that happens + // during initial cache lookup on a cache miss, at which point any + // exception will always stop a file from being added to the cache. Here + // we want to test the case where a file has been successfully hoisted + // out of the cache initially. + simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1)); + + spi::Result result = doJoin(target, src1, src2); + CPPUNIT_ASSERT(result.hasError()); + CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O read") + != vespalib::string::npos); + + CPPUNIT_ASSERT(!env()._cache.contains(src1)); + CPPUNIT_ASSERT(!env()._cache.contains(src2)); + CPPUNIT_ASSERT(!env()._cache.contains(target)); +} + +/** + * It must be exception safe for any source bucket to throw an exception during + * processing. Otherwise the node will core due to cache sanity checks. + * + * See VESPA-674 for context. In this scenario, it was not possible to write + * to the target file when attempting to join in the 2nd source bucket due to + * the disk fill ratio exceeding configured limits. + */ +void +JoinOperationHandlerTest::test2ndSourceExceptionEvictsExistingTargetFromCache() +{ + setupDisks(1); + feedSingleDisk(); + + constexpr uint64_t location = 1234; + + document::BucketId src1(33, location); + document::BucketId src2(33, location | (1ULL << 32)); + document::BucketId target(32, location); + + // Ensure target file is _not_ empty so that copySlots is triggered for + // each source bucket (rather than just renaming the file, which does not + // invoke the file read/write paths). + insertDocumentInBucket(location, Timestamp(100000), target); + flush(target); + + env()._cache.clear(); + // File rewrites are buffered before ever reaching the failure simulation + // layer, so only 1 actual write is used to flush the target file after + // the first source file has been processed. Attempting to flush the writes + // for the second source file should fail with an exception. + simulateIoErrorsForSubsequentlyOpenedFiles( + IoErrors().afterReads(INT_MAX).afterWrites(1)); + + spi::Result result = doJoin(target, src1, src2); + CPPUNIT_ASSERT(result.hasError()); + CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") + != vespalib::string::npos); + + CPPUNIT_ASSERT(!env()._cache.contains(src1)); + CPPUNIT_ASSERT(!env()._cache.contains(src2)); + CPPUNIT_ASSERT(!env()._cache.contains(target)); +} + +} + +} diff --git a/memfilepersistence/src/tests/spi/logginglazyfile.h b/memfilepersistence/src/tests/spi/logginglazyfile.h new file mode 100644 index 00000000000..e54753f7c3e --- /dev/null +++ b/memfilepersistence/src/tests/spi/logginglazyfile.h @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/vespalib/io/fileutil.h> +#include <iostream> + +namespace storage { + +namespace memfile { + +class LoggingLazyFile : public vespalib::LazyFile { +public: + class Factory : public Environment::LazyFileFactory { + public: + vespalib::LazyFile::UP createFile(const std::string& fileName) const { + return vespalib::LazyFile::UP( + new LoggingLazyFile(fileName, vespalib::File::DIRECTIO)); + } + }; + + enum OpType { + READ = 0, + WRITE + }; + + struct Entry { + OpType opType; + size_t bufsize; + off_t offset; + + std::string toString() const { + std::ostringstream ost; + ost << (opType == READ ? "Reading " : "Writing ") + << bufsize + << " bytes at " + << offset; + return ost.str(); + } + }; + + mutable std::vector<Entry> operations; + + LoggingLazyFile(const std::string& filename, int flags) + : LazyFile(filename, flags) {}; + + size_t getOperationCount() const { + return operations.size(); + } + + virtual off_t write(const void *buf, size_t bufsize, off_t offset) { + Entry e; + e.opType = WRITE; + e.bufsize = bufsize; + e.offset = offset; + + operations.push_back(e); + + return vespalib::LazyFile::write(buf, bufsize, offset); + } + + virtual size_t read(void *buf, size_t bufsize, off_t offset) const { + Entry e; + e.opType = READ; + e.bufsize = bufsize; + e.offset = offset; + + operations.push_back(e); + + return vespalib::LazyFile::read(buf, bufsize, offset); + } + + std::string toString() const { + std::ostringstream ost; + for (uint32_t i = 0; i < operations.size(); i++) { + ost << operations[i].toString() << "\n"; + } + + return ost.str(); + } + + + +}; + +} + +} + diff --git a/memfilepersistence/src/tests/spi/memcachetest.cpp b/memfilepersistence/src/tests/spi/memcachetest.cpp new file mode 100644 index 00000000000..d34159ce3f4 --- /dev/null +++ b/memfilepersistence/src/tests/spi/memcachetest.cpp @@ -0,0 +1,412 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/memfile/memfilecache.h> +#include <vespa/storageframework/defaultimplementation/memory/simplememorylogic.h> +#include <tests/spi/memfiletestutils.h> +#include <vespa/vdstestlib/cppunit/macros.h> + + +namespace storage { +namespace memfile { + +class MemCacheTest : public SingleDiskMemFileTestUtils +{ + CPPUNIT_TEST_SUITE(MemCacheTest); + CPPUNIT_TEST(testSimpleLRU); + CPPUNIT_TEST(testCacheSize); + CPPUNIT_TEST(testEvictBody); + CPPUNIT_TEST(testEvictHeader); + CPPUNIT_TEST(testKeepBodyWhenLessThanOneFourth); + CPPUNIT_TEST(testComplexEviction); + CPPUNIT_TEST(testEraseEmptyOnReturn); + CPPUNIT_TEST(testDeleteDoesNotReAddMemoryUsage); + CPPUNIT_TEST(testEraseDoesNotReAddMemoryUsage); + CPPUNIT_TEST(testGetWithNoCreation); + CPPUNIT_TEST_SUITE_END(); + +public: + void testSimpleLRU(); + void testCacheSize(); + void testReduceCacheSizeCallback(); + void testReduceCacheSizeCallbackWhileActive(); + void testEvictBody(); + void testEvictHeader(); + void testKeepBodyWhenLessThanOneFourth(); + void testComplexEviction(); + void testEraseEmptyOnReturn(); + void testDeleteDoesNotReAddMemoryUsage(); + void testEraseDoesNotReAddMemoryUsage(); + void testGetWithNoCreation(); + +private: + framework::defaultimplementation::ComponentRegisterImpl::UP _register; + framework::Component::UP _component; + FakeClock::UP _clock; + framework::defaultimplementation::MemoryManager::UP _memoryManager; + std::vector<framework::MemoryToken::LP> _stolenMemory; + std::unique_ptr<MemFilePersistenceMetrics> _metrics; + + std::unique_ptr<MemFileCache> _cache; + + void setSize(const document::BucketId& id, + uint64_t metaSize, + uint64_t headerSz = 0, + uint64_t bodySz = 0, + bool createIfNotInCache = true) + { + MemFilePtr file(_cache->get(id, env(), env().getDirectory(), + createIfNotInCache)); + CPPUNIT_ASSERT(file.get()); + + file->_cacheSizeOverride.metaSize = metaSize; + file->_cacheSizeOverride.headerSize = headerSz; + file->_cacheSizeOverride.bodySize = bodySz; + } + + std::string + getBucketStatus(uint32_t buckets) + { + std::ostringstream ost; + for (uint32_t i = 1; i < buckets + 1; i++) { + document::BucketId id(16, i); + ost << id << " "; + if (!_cache->contains(id)) { + ost << "<nil>\n"; + } else { + MemFilePtr file(_cache->get(id, env(), env().getDirectory())); + if (file->_cacheSizeOverride.bodySize > 0) { + ost << "body,"; + } + if (file->_cacheSizeOverride.headerSize > 0) { + ost << "header\n"; + } else { + ost << "meta only\n"; + } + } + } + + return ost.str(); + } + + uint64_t cacheSize() { + return _cache->size(); + } + + document::BucketId getLRU() { + return _cache->getLeastRecentlyUsedBucket()->_bid; + } + + void setCacheSize(uint64_t sz) { + MemFileCache::MemoryUsage usage; + usage.metaSize = sz / 3; + usage.headerSize = sz / 3; + usage.bodySize = sz - usage.metaSize - usage.headerSize; + + _cache->setCacheSize(usage); + } + + void stealMemory(uint64_t memToSteal) { + setCacheSize(_cache->getCacheSize() - memToSteal); + } + + void setup(uint64_t maxMemory) { + tearDown(); + _register.reset( + new framework::defaultimplementation::ComponentRegisterImpl); + _clock.reset(new FakeClock); + _register->setClock(*_clock); + _memoryManager.reset( + new framework::defaultimplementation::MemoryManager( + framework::defaultimplementation::AllocationLogic::UP( + new framework::defaultimplementation::SimpleMemoryLogic( + *_clock, maxMemory * 2)))); + _register->setMemoryManager(*_memoryManager); + _component.reset(new framework::Component(*_register, "testcomponent")); + _metrics.reset(new MemFilePersistenceMetrics(*_component)); + _cache.reset(new MemFileCache(*_register, _metrics->_cache)); + setCacheSize(maxMemory); + _memoryManager->registerAllocationType(framework::MemoryAllocationType( + "steal", framework::MemoryAllocationType::FORCE_ALLOCATE)); + } + +public: + void tearDown() { + _stolenMemory.clear(); + _cache.reset(0); + _metrics.reset(0); + _component.reset(0); + _register.reset(0); + _memoryManager.reset(0); + _clock.reset(0); + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(MemCacheTest); + +namespace { + FakeClock clock; +} + +void +MemCacheTest::testSimpleLRU() +{ + setup(2000); + + for (uint32_t i = 1; i < 4; i++) { + setSize(document::BucketId(16, i), 100); + } + + CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 1), getLRU()); + + setSize(document::BucketId(16, 1), 100); + + CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().hits.getValue()); + CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 2), getLRU()); +} + +void +MemCacheTest::testCacheSize() +{ + setup(400); + + setSize(document::BucketId(16, 2), 100); + setSize(document::BucketId(16, 1), 150); + + CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().hits.getValue()); + CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue()); + + CPPUNIT_ASSERT_EQUAL(250ul, cacheSize()); + + setSize(document::BucketId(16, 1), 200); + + CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().hits.getValue()); + CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue()); + + CPPUNIT_ASSERT_EQUAL(300ul, cacheSize()); + + CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 2))); + CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 1))); + + setSize(document::BucketId(16, 1), 301); + + CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().hits.getValue()); + CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue()); + + CPPUNIT_ASSERT(!_cache->contains(document::BucketId(16, 2))); + CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 1))); + + _cache->clear(); + CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); +} + +void +MemCacheTest::testEvictBody() +{ + setup(1400); + + CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().body_evictions.getValue()); + + setSize(BucketId(16, 1), 150, 100, 0); + setSize(BucketId(16, 2), 100, 100, 900); + + CPPUNIT_ASSERT_EQUAL(1350ul, cacheSize()); + + stealMemory(150); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x4000000000000001) header\n" + "BucketId(0x4000000000000002) header\n"), + getBucketStatus(2)); + CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().body_evictions.getValue()); +} + +void +MemCacheTest::testKeepBodyWhenLessThanOneFourth() +{ + setup(450); + + setSize(BucketId(16, 1), 150, 0, 0); + setSize(BucketId(16, 2), 100, 50, 50); + + stealMemory(150); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x4000000000000001) <nil>\n" + "BucketId(0x4000000000000002) body,header\n"), + getBucketStatus(2)); +} + +void +MemCacheTest::testEvictHeader() +{ + setup(550); + + CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().header_evictions.getValue()); + + setSize(BucketId(16, 1), 150, 0, 0); + setSize(BucketId(16, 2), 100, 200, 100); + + stealMemory(150); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x4000000000000001) meta only\n" + "BucketId(0x4000000000000002) meta only\n"), + getBucketStatus(2)); + CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().header_evictions.getValue()); +} + +#define ASSERT_CACHE_EVICTIONS(meta, header, body) \ + CPPUNIT_ASSERT_EQUAL(size_t(meta), _cache->getMetrics().body_evictions.getValue()); \ + CPPUNIT_ASSERT_EQUAL(size_t(header), _cache->getMetrics().header_evictions.getValue()); \ + CPPUNIT_ASSERT_EQUAL(size_t(body), _cache->getMetrics().meta_evictions.getValue()); + +void +MemCacheTest::testComplexEviction() +{ + setup(4200); + + setSize(BucketId(16, 1), 150, 0, 0); + setSize(BucketId(16, 2), 100, 200, 200); + setSize(BucketId(16, 3), 100, 200, 0); + setSize(BucketId(16, 4), 100, 400, 0); + setSize(BucketId(16, 5), 100, 200, 400); + setSize(BucketId(16, 6), 100, 200, 300); + setSize(BucketId(16, 7), 100, 0, 0); + setSize(BucketId(16, 8), 100, 200, 400); + setSize(BucketId(16, 9), 100, 200, 250); + + CPPUNIT_ASSERT_EQUAL(4100ul, cacheSize()); + + ASSERT_CACHE_EVICTIONS(0, 0, 0); + + stealMemory(600); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x4000000000000001) meta only\n" + "BucketId(0x4000000000000002) header\n" + "BucketId(0x4000000000000003) header\n" + "BucketId(0x4000000000000004) header\n" + "BucketId(0x4000000000000005) header\n" + "BucketId(0x4000000000000006) body,header\n" + "BucketId(0x4000000000000007) meta only\n" + "BucketId(0x4000000000000008) body,header\n" + "BucketId(0x4000000000000009) body,header\n"), + getBucketStatus(9)); + + CPPUNIT_ASSERT_EQUAL(3500ul, cacheSize()); + + ASSERT_CACHE_EVICTIONS(2, 0, 0); + + stealMemory(500); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x4000000000000001) meta only\n" + "BucketId(0x4000000000000002) meta only\n" + "BucketId(0x4000000000000003) meta only\n" + "BucketId(0x4000000000000004) header\n" + "BucketId(0x4000000000000005) header\n" + "BucketId(0x4000000000000006) body,header\n" + "BucketId(0x4000000000000007) meta only\n" + "BucketId(0x4000000000000008) body,header\n" + "BucketId(0x4000000000000009) body,header\n"), + getBucketStatus(9)); + + CPPUNIT_ASSERT_EQUAL(3100ul, cacheSize()); + + ASSERT_CACHE_EVICTIONS(2, 2, 0); + + stealMemory(1000); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x4000000000000001) <nil>\n" + "BucketId(0x4000000000000002) meta only\n" + "BucketId(0x4000000000000003) meta only\n" + "BucketId(0x4000000000000004) meta only\n" + "BucketId(0x4000000000000005) meta only\n" + "BucketId(0x4000000000000006) header\n" + "BucketId(0x4000000000000007) meta only\n" + "BucketId(0x4000000000000008) body,header\n" + "BucketId(0x4000000000000009) body,header\n"), + getBucketStatus(9)); + + CPPUNIT_ASSERT_EQUAL(2050ul, cacheSize()); + + ASSERT_CACHE_EVICTIONS(3, 4, 1); + + stealMemory(1100); + + CPPUNIT_ASSERT_EQUAL( + std::string( + "BucketId(0x4000000000000001) <nil>\n" + "BucketId(0x4000000000000002) <nil>\n" + "BucketId(0x4000000000000003) <nil>\n" + "BucketId(0x4000000000000004) <nil>\n" + "BucketId(0x4000000000000005) <nil>\n" + "BucketId(0x4000000000000006) <nil>\n" + "BucketId(0x4000000000000007) meta only\n" + "BucketId(0x4000000000000008) header\n" + "BucketId(0x4000000000000009) body,header\n"), + getBucketStatus(9)); + + CPPUNIT_ASSERT_EQUAL(950ul, cacheSize()); +} + +#undef ASSERT_CACHE_EVICTIONS + +void +MemCacheTest::testEraseEmptyOnReturn() +{ + setup(4200); + setSize(BucketId(16, 1), 0, 0, 0); + CPPUNIT_ASSERT(!_cache->contains(document::BucketId(16, 1))); +} + +void +MemCacheTest::testDeleteDoesNotReAddMemoryUsage() +{ + BucketId id(16, 1); + setup(1000); + setSize(id, 100, 200, 300); + CPPUNIT_ASSERT_EQUAL(600ul, cacheSize()); + { + MemFilePtr file(_cache->get(id, env(), env().getDirectory())); + file.deleteFile(); + } + CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); + +} + +void +MemCacheTest::testGetWithNoCreation() +{ + BucketId id(16, 1); + setup(1000); + setSize(id, 100, 200, 300, false); + CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); +} + + +void +MemCacheTest::testEraseDoesNotReAddMemoryUsage() +{ + BucketId id(16, 1); + setup(1000); + setSize(id, 100, 200, 300); + CPPUNIT_ASSERT_EQUAL(600ul, cacheSize()); + { + MemFilePtr file(_cache->get(id, env(), env().getDirectory())); + file.eraseFromCache(); + } + CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); + +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp b/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp new file mode 100644 index 00000000000..04d82741e67 --- /dev/null +++ b/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp @@ -0,0 +1,411 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h> +#include <vespa/memfilepersistence/mapper/fileinfo.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <tests/spi/memfiletestutils.h> +#include <vespa/vdstestlib/cppunit/macros.h> + +namespace storage { +namespace memfile { + +class MemFileAutoRepairTest : public SingleDiskMemFileTestUtils +{ +public: + void setUp(); + void tearDown(); + + void testFileMetadataCorruptionIsAutoRepaired(); + void testDocumentContentCorruptionIsAutoRepaired(); + void testCorruptionEvictsBucketFromCache(); + void testRepairFailureInMaintainEvictsBucketFromCache(); + void testZeroLengthFileIsDeleted(); + void testTruncatedBodyLocationIsAutoRepaired(); + void testTruncatedHeaderLocationIsAutoRepaired(); + void testTruncatedHeaderBlockIsAutoRepaired(); + + void corruptBodyBlock(); + + CPPUNIT_TEST_SUITE(MemFileAutoRepairTest); + CPPUNIT_TEST(testFileMetadataCorruptionIsAutoRepaired); + CPPUNIT_TEST(testDocumentContentCorruptionIsAutoRepaired); + CPPUNIT_TEST(testCorruptionEvictsBucketFromCache); + CPPUNIT_TEST(testRepairFailureInMaintainEvictsBucketFromCache); + CPPUNIT_TEST(testZeroLengthFileIsDeleted); + CPPUNIT_TEST(testTruncatedBodyLocationIsAutoRepaired); + CPPUNIT_TEST(testTruncatedHeaderLocationIsAutoRepaired); + CPPUNIT_TEST(testTruncatedHeaderBlockIsAutoRepaired); + CPPUNIT_TEST_SUITE_END(); + +private: + void assertDocumentIsSilentlyRemoved( + const document::BucketId& bucket, + const document::DocumentId& docId); + + void reconfigureMinimumHeaderBlockSize(uint32_t newMinSize); + + document::BucketId _bucket; + std::unique_ptr<FileSpecification> _file; + std::vector<document::DocumentId> _slotIds; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(MemFileAutoRepairTest); + +namespace { + // A totall uncached memfile with content to use for verify testing + std::unique_ptr<MemFile> _memFile; + + // Clear old content. Create new file. Make sure nothing is cached. + void prepareBucket(SingleDiskMemFileTestUtils& util, + const FileSpecification& file) { + _memFile.reset(); + util.env()._cache.clear(); + vespalib::unlink(file.getPath()); + util.createTestBucket(file.getBucketId(), 0); + util.env()._cache.clear(); + _memFile.reset(new MemFile(file, util.env())); + _memFile->getMemFileIO().close(); + + } + + MetaSlot getSlot(uint32_t index) { + assert(_memFile.get()); + vespalib::LazyFile file(_memFile->getFile().getPath(), 0); + MetaSlot result; + file.read(&result, sizeof(MetaSlot), + sizeof(Header) + sizeof(MetaSlot) * index); + return result; + } + + void setSlot(uint32_t index, MetaSlot slot, + bool updateFileChecksum = true) + { + (void)updateFileChecksum; + assert(_memFile.get()); + //if (updateFileChecksum) slot.updateFileChecksum(); + vespalib::LazyFile file(_memFile->getFile().getPath(), 0); + file.write(&slot, sizeof(MetaSlot), + sizeof(Header) + sizeof(MetaSlot) * index); + } +} + +void +MemFileAutoRepairTest::setUp() +{ + SingleDiskMemFileTestUtils::setUp(); + _bucket = BucketId(16, 0xa); + createTestBucket(_bucket, 0); + + { + MemFilePtr memFilePtr(env()._cache.get(_bucket, env(), env().getDirectory())); + _file.reset(new FileSpecification(memFilePtr->getFile())); + CPPUNIT_ASSERT(memFilePtr->getSlotCount() >= 2); + for (size_t i = 0; i < memFilePtr->getSlotCount(); ++i) { + _slotIds.push_back(memFilePtr->getDocumentId((*memFilePtr)[i])); + } + } + env()._cache.clear(); +} + +void +MemFileAutoRepairTest::tearDown() +{ + _file.reset(0); + _memFile.reset(0); + SingleDiskMemFileTestUtils::tearDown(); +}; + +void +MemFileAutoRepairTest::testFileMetadataCorruptionIsAutoRepaired() +{ + // Test corruption detected in initial metadata load + prepareBucket(*this, *_file); + document::DocumentId id(_slotIds[1]); + MetaSlot slot(getSlot(1)); + CPPUNIT_ASSERT_EQUAL(slot._gid, + id.getGlobalId()); // Sanity checking... + { + MetaSlot s(slot); + s.setTimestamp(Timestamp(40)); + setSlot(1, s); + } + + CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); + + // File not in cache; should be detected in initial load + spi::GetResult res(doGet(_bucket, id, document::AllFields())); + // FIXME: currently loadFile is silently fixing corruptions! + //CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res.getErrorCode()); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode()); + CPPUNIT_ASSERT(!res.hasDocument()); + + CPPUNIT_ASSERT_EQUAL(std::string("400000000000000a"), getModifiedBuckets()); + CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); + + // File should now have been repaired, so a subsequent get for + // the same document should just return an empty (but OK) result. + spi::GetResult res2(doGet(_bucket, id, document::AllFields())); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res2.getErrorCode()); + CPPUNIT_ASSERT(!res2.hasDocument()); + + CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); +} + +void +MemFileAutoRepairTest::corruptBodyBlock() +{ + CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); + // Corrupt body block of slot 1 + MetaSlot slot(getSlot(1)); + { + MetaSlot s(slot); + s.setBodyPos(52); + s.setBodySize(18); + s.updateChecksum(); + setSlot(1, s); + } +} + +void +MemFileAutoRepairTest::testDocumentContentCorruptionIsAutoRepaired() +{ + // Corrupt body block + prepareBucket(*this, *_file); + document::DocumentId id(_slotIds[1]); + corruptBodyBlock(); + + CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); + + spi::GetResult res(doGet(_bucket, id, document::AllFields())); + CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res.getErrorCode()); + CPPUNIT_ASSERT(!res.hasDocument()); + + CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); + + CPPUNIT_ASSERT_EQUAL(std::string("400000000000000a"), getModifiedBuckets()); + CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); + + // File should now have been repaired, so a subsequent get for + // the same document should just return an empty (but OK) result. + spi::GetResult res2(doGet(_bucket, id, document::AllFields())); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res2.getErrorCode()); + CPPUNIT_ASSERT(!res2.hasDocument()); + + // File should now be in cache OK + CPPUNIT_ASSERT(env()._cache.contains(_bucket)); + CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); +} + +// Ideally we'd test this for each spi operation that accesses MemFiles, but +// they all use the same eviction+auto-repair logic... +void +MemFileAutoRepairTest::testCorruptionEvictsBucketFromCache() +{ + prepareBucket(*this, *_file); + corruptBodyBlock(); + + // Read slot 0 and shove file into cache + spi::GetResult res(doGet(_bucket, _slotIds[0], document::AllFields())); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode()); + CPPUNIT_ASSERT(res.hasDocument()); + CPPUNIT_ASSERT(env()._cache.contains(_bucket)); + + spi::GetResult res2(doGet(_bucket, _slotIds[1], document::AllFields())); + CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res2.getErrorCode()); + CPPUNIT_ASSERT(!res2.hasDocument()); + + // Out of the cache! Begone! Shoo! + CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); + +} + +void +MemFileAutoRepairTest::testRepairFailureInMaintainEvictsBucketFromCache() +{ + prepareBucket(*this, *_file); + corruptBodyBlock(); + spi::Result result(getPersistenceProvider().maintain( + spi::Bucket(_bucket, spi::PartitionId(0)), spi::HIGH)); + // File being successfully repaired does not constitute a failure of + // the maintain() call. + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + // It should, however, shove it out of the cache. + CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); +} + +void +MemFileAutoRepairTest::testZeroLengthFileIsDeleted() +{ + // Completely truncate auto-created file + vespalib::LazyFile file(_file->getPath(), 0); + file.resize(0); + + // No way to deal with zero-length files aside from deleting them. + spi::Result result(getPersistenceProvider().maintain( + spi::Bucket(_bucket, spi::PartitionId(0)), spi::HIGH)); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); + CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); + CPPUNIT_ASSERT(!vespalib::fileExists(_file->getPath())); +} + +namespace { + +uint32_t +alignDown(uint32_t value) +{ + uint32_t blocks = value / 512; + return blocks * 512; +}; + +FileInfo +fileInfoFromMemFile(const MemFilePtr& mf) +{ + auto& ioBuf(dynamic_cast<const SimpleMemFileIOBuffer&>( + mf->getMemFileIO())); + return ioBuf.getFileInfo(); +} + +} + +void +MemFileAutoRepairTest::assertDocumentIsSilentlyRemoved( + const document::BucketId& bucket, + const document::DocumentId& docId) +{ + // Corrupted (truncated) slot should be transparently removed during + // loadFile and it should be as if it was never there! + spi::Bucket spiBucket(bucket, spi::PartitionId(0)); + spi::GetResult res(doGet(spiBucket, docId, document::AllFields())); + CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode()); + CPPUNIT_ASSERT(!res.hasDocument()); +} + +void +MemFileAutoRepairTest::testTruncatedBodyLocationIsAutoRepaired() +{ + document::BucketId bucket(16, 4); + document::Document::SP doc( + createRandomDocumentAtLocation(4, 1234, 1024, 1024)); + + doPut(doc, bucket, framework::MicroSecTime(1000)); + flush(bucket); + FileInfo fileInfo; + { + MemFilePtr mf(getMemFile(bucket)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount()); + fileInfo = fileInfoFromMemFile(mf); + + const uint32_t bodyBlockStart( + sizeof(Header) + + fileInfo._metaDataListSize * sizeof(MetaSlot) + + fileInfo._headerBlockSize); + + vespalib::LazyFile file(mf->getFile().getPath(), 0); + uint32_t slotBodySize = (*mf)[0].getLocation(BODY)._size; + CPPUNIT_ASSERT(slotBodySize > 0); + // Align down to nearest sector alignment to avoid unrelated DirectIO + // checks to kick in. Since the body block is always aligned on a + // sector boundary, we know this cannot truncate into the header block. + file.resize(alignDown(bodyBlockStart + slotBodySize - 1)); + } + env()._cache.clear(); + assertDocumentIsSilentlyRemoved(bucket, doc->getId()); +} + +void +MemFileAutoRepairTest::testTruncatedHeaderLocationIsAutoRepaired() +{ + document::BucketId bucket(16, 4); + document::Document::SP doc( + createRandomDocumentAtLocation(4, 1234, 1024, 1024)); + // Ensure header has a bunch of data (see alignment comments below). + doc->setValue(doc->getField("hstringval"), + document::StringFieldValue(std::string(1024, 'A'))); + + doPut(doc, bucket, framework::MicroSecTime(1000)); + flush(bucket); + FileInfo fileInfo; + { + MemFilePtr mf(getMemFile(bucket)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount()); + fileInfo = fileInfoFromMemFile(mf); + + const uint32_t headerBlockStart( + sizeof(Header) + + fileInfo._metaDataListSize * sizeof(MetaSlot)); + + vespalib::LazyFile file(mf->getFile().getPath(), 0); + uint32_t slotHeaderSize = (*mf)[0].getLocation(HEADER)._size; + CPPUNIT_ASSERT(slotHeaderSize > 0); + // Align down to nearest sector alignment to avoid unrelated DirectIO + // checks to kick in. The header block is not guaranteed to start on + // sector boundary, but we assume there is enough slack in the header + // section for the metadata slots themselves to be untouched since we + // have a minimum header size of 1024 for the doc in question. + file.resize(alignDown(headerBlockStart + slotHeaderSize - 1)); + } + env()._cache.clear(); + assertDocumentIsSilentlyRemoved(bucket, doc->getId()); +} + +void +MemFileAutoRepairTest::reconfigureMinimumHeaderBlockSize(uint32_t newMinSize) +{ + using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig; + using MemFileConfigBuilder + = vespa::config::storage::StorMemfilepersistenceConfigBuilder; + MemFileConfigBuilder builder( + *env().acquireConfigReadLock().memFilePersistenceConfig()); + builder.minimumFileMetaSlots = 2; + builder.minimumFileHeaderBlockSize = newMinSize; + auto newConfig = std::unique_ptr<MemFileConfig>(new MemFileConfig(builder)); + env().acquireConfigWriteLock().setMemFilePersistenceConfig( + std::move(newConfig)); +} + +void +MemFileAutoRepairTest::testTruncatedHeaderBlockIsAutoRepaired() +{ + document::BucketId bucket(16, 4); + document::Document::SP doc( + createRandomDocumentAtLocation(4, 1234, 1, 1)); + // Ensure header block is large enough that free space is added to the end. + reconfigureMinimumHeaderBlockSize(8192); + // Add header field and remove randomly generated body field, ensuring + // we have no data to add to body field. This will prevent slot body + // location checking from detecting a header truncation. + doc->setValue(doc->getField("hstringval"), + document::StringFieldValue("foo")); + doc->remove(doc->getField("content")); + + doPut(doc, bucket, framework::MicroSecTime(1000)); + flush(bucket); + FileInfo fileInfo; + { + MemFilePtr mf(getMemFile(bucket)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount()); + fileInfo = fileInfoFromMemFile(mf); + + const uint32_t headerBlockEnd( + sizeof(Header) + + fileInfo._metaDataListSize * sizeof(MetaSlot) + + fileInfo._headerBlockSize); + + vespalib::LazyFile file(mf->getFile().getPath(), 0); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), + (*mf)[0].getLocation(BODY)._size); // No body. + const auto headerLoc((*mf)[0].getLocation(HEADER)); + const uint32_t extent(headerLoc._pos + headerLoc._size); + // Make sure we don't intersect an existing slot range. + CPPUNIT_ASSERT(extent < alignDown(headerBlockEnd - 1)); + file.resize(alignDown(headerBlockEnd - 1)); + } + env()._cache.clear(); + assertDocumentIsSilentlyRemoved(bucket, doc->getId()); +} + +} +} diff --git a/memfilepersistence/src/tests/spi/memfiletest.cpp b/memfilepersistence/src/tests/spi/memfiletest.cpp new file mode 100644 index 00000000000..70b03271da9 --- /dev/null +++ b/memfilepersistence/src/tests/spi/memfiletest.cpp @@ -0,0 +1,987 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/memfile/memfile.h> +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/logginglazyfile.h> +#include <tests/spi/options_builder.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/memfilepersistence/memfile/memfilecompactor.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <limits> + +namespace storage { +namespace memfile { + +struct MemFileTest : public SingleDiskMemFileTestUtils +{ + typedef MemFileCompactor::SlotList SlotList; + + /** + * Feed a document whose ID is deterministically generated from `seed` to + * bucket (16, 4) at time `timestamp`. + */ + document::DocumentId feedDocument( + uint64_t seed, + uint64_t timestamp, + uint32_t headerSize = 0, + uint32_t minBodySize = 10, + uint32_t maxBodySize = 100); + + /** + * Feed n instances of documents with the same ID to bucket (16, 4) using + * a timestamp range of [1000, 1000+n). + */ + void feedSameDocNTimes(uint32_t n); + + void setMaxDocumentVersionsOption(uint32_t n); + + std::vector<Types::Timestamp> compactWithVersionLimit(uint32_t maxVersions); + + void testCompactRemoveDoublePut(); + void testCompactPutRemove(); + void testCompactGidCollision(); + void testCompactGidCollisionAndNot(); + void testCompactWithMemFile(); + void testCompactCombined(); + void testCompactDifferentPuts(); + void testNoCompactionWhenDocumentVersionsWithinLimit(); + void testCompactWhenDocumentVersionsExceedLimit(); + void testCompactLimit1KeepsNewestVersionOnly(); + void testCompactionOptionsArePropagatedFromConfig(); + void testZeroDocumentVersionConfigIsCorrected(); + void testResizeToFreeSpace(); + void testNoFileWriteOnNoOpCompaction(); + void testCacheSize(); + void testClearCache(); + void testGetSlotsByTimestamp(); + void testCacheInconsistentSlot(); + void testEnsureCached(); + void testAddSlotWhenDiskFull(); + void testGetSerializedSize(); + void testGetBucketInfo(); + void testCopySlotsPreservesLocationSharing(); + void testFlushingToNonExistingFileAlwaysRunsCompaction(); + void testOrderDocSchemeDocumentsCanBeAddedToFile(); + + CPPUNIT_TEST_SUITE(MemFileTest); + CPPUNIT_TEST(testCompactRemoveDoublePut); + CPPUNIT_TEST(testCompactPutRemove); + CPPUNIT_TEST(testCompactGidCollision); + CPPUNIT_TEST(testCompactGidCollisionAndNot); + CPPUNIT_TEST(testCompactWithMemFile); + CPPUNIT_TEST(testCompactCombined); + CPPUNIT_TEST(testCompactDifferentPuts); + CPPUNIT_TEST(testNoCompactionWhenDocumentVersionsWithinLimit); + CPPUNIT_TEST(testCompactWhenDocumentVersionsExceedLimit); + CPPUNIT_TEST(testCompactLimit1KeepsNewestVersionOnly); + CPPUNIT_TEST(testCompactionOptionsArePropagatedFromConfig); + CPPUNIT_TEST(testZeroDocumentVersionConfigIsCorrected); + CPPUNIT_TEST(testNoFileWriteOnNoOpCompaction); + CPPUNIT_TEST(testCacheSize); + CPPUNIT_TEST(testClearCache); + CPPUNIT_TEST(testGetSlotsByTimestamp); + CPPUNIT_TEST(testEnsureCached); + CPPUNIT_TEST(testResizeToFreeSpace); + CPPUNIT_TEST(testAddSlotWhenDiskFull); + CPPUNIT_TEST(testGetSerializedSize); + CPPUNIT_TEST(testGetBucketInfo); + CPPUNIT_TEST(testCopySlotsPreservesLocationSharing); + CPPUNIT_TEST(testFlushingToNonExistingFileAlwaysRunsCompaction); + CPPUNIT_TEST(testOrderDocSchemeDocumentsCanBeAddedToFile); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(MemFileTest); + +/** + * Slots should actually be the same pointer. Use this assert to do correct + * check, and still print content of slots on failure. + */ +#define ASSERT_SLOT_EQUAL(slotptra, slotptrb) \ +{ \ + CPPUNIT_ASSERT(slotptra != 0); \ + CPPUNIT_ASSERT(slotptrb != 0); \ + std::ostringstream slotdiff; \ + slotdiff << "Expected: " << *slotptra << ", but got " << *slotptrb; \ + CPPUNIT_ASSERT_EQUAL_MSG(slotdiff.str(), slotptra, slotptrb); \ +} + +namespace { + +framework::MicroSecTime sec(uint64_t n) { + return framework::MicroSecTime(n * 1000000ULL); +} + +/** + * Utility functions for tests to call to do compacting, such that the + * tests themselves are not bound to the current interface. + * + * Also, this function translates second time to microsecond time. + */ +MemFileTest::SlotList getSlotsToRemove( + const MemFile& file, uint64_t currentTime, + uint64_t revertTime, uint64_t keepRemoveTime) +{ + MemFileCompactor compactor( + sec(currentTime), + CompactionOptions() + .maxDocumentVersions( + std::numeric_limits<uint32_t>::max()) + .revertTimePeriod(sec(revertTime)) + .keepRemoveTimePeriod(sec(keepRemoveTime))); + return compactor.getSlotsToRemove(file); +} + +class AutoFlush +{ +public: + AutoFlush(MemFilePtr& ptr) : _ptr(ptr) {} + ~AutoFlush() { _ptr->flushToDisk(); } +private: + MemFilePtr& _ptr; +}; + +} + +document::DocumentId +MemFileTest::feedDocument( + uint64_t seed, + uint64_t timestamp, + uint32_t headerSize, + uint32_t minDocSize, + uint32_t maxDocSize) { + document::Document::SP doc(createRandomDocumentAtLocation( + 4, seed, minDocSize, maxDocSize)); + + if (headerSize > 0) { + std::string val(headerSize, 'A'); + doc->setValue(doc->getField("hstringval"), + document::StringFieldValue(val)); + } + + doPut(doc, + document::BucketId(16, 4), + Timestamp(timestamp * 1000000)); + + return doc->getId(); +} + +void +MemFileTest::feedSameDocNTimes(uint32_t n) +{ + for (uint32_t i = 0; i < n; ++i) { + feedDocument(1234, 1000 + i); + } +} + +void +MemFileTest::setMaxDocumentVersionsOption(uint32_t n) +{ + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options) + .maxDocumentVersions(n) + .build()); +} + +void +MemFileTest::testCacheSize() +{ + // Feed some puts + for (uint32_t i = 0; i < 4; i++) { + feedDocument(1234 * (i % 2), 1000 + 200 * i); + } + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + + CPPUNIT_ASSERT(file->getCacheSize().sum() > 0); +} + +void +MemFileTest::testClearCache() +{ + // Feed some puts + for (uint32_t i = 0; i < 4; i++) { + feedDocument(1234 * (i % 2), 1000 + 200 * i); + } + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + file->flushToDisk(); + + CPPUNIT_ASSERT(file->getCacheSize().bodySize > 0); + CPPUNIT_ASSERT(file->getCacheSize().headerSize > 0); + + file->clearCache(HEADER); + + CPPUNIT_ASSERT(file->getCacheSize().bodySize > 0); + CPPUNIT_ASSERT(file->getMemFileIO().getCachedSize(BODY) > 0); + CPPUNIT_ASSERT_EQUAL(0, (int)file->getCacheSize().headerSize); + CPPUNIT_ASSERT_EQUAL(uint64_t(0), file->getMemFileIO().getCachedSize(HEADER)); + + file->clearCache(BODY); + + CPPUNIT_ASSERT_EQUAL(0, (int)file->getCacheSize().bodySize); + CPPUNIT_ASSERT_EQUAL(uint64_t(0), file->getMemFileIO().getCachedSize(BODY)); +} + + +void +MemFileTest::testCompactGidCollision() +{ + // Feed two puts + for (uint32_t i = 0; i < 2; i++) { + feedDocument(1234 * i, 1000 + 200 * i); + } + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + const_cast<MemSlot&>((*file)[1]).setGlobalId((*file)[0].getGlobalId()); + + CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount()); + + { + SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 86400)); + CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size()); + file->removeSlots(toRemove); + } +} + +void +MemFileTest::testCompactGidCollisionAndNot() +{ + // Feed some puts + for (uint32_t i = 0; i < 4; i++) { + feedDocument(1234 * (i % 2), 1000 + 200 * i); + } + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + const_cast<MemSlot&>((*file)[2]).setGlobalId((*file)[0].getGlobalId()); + const_cast<MemSlot&>((*file)[3]).setGlobalId((*file)[1].getGlobalId()); + + CPPUNIT_ASSERT_EQUAL(4, (int)file->getSlotCount()); + + { + SlotList toRemove(getSlotsToRemove(*file, 2000, 300, 86400)); + + CPPUNIT_ASSERT_EQUAL(2, (int)toRemove.size()); + ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); + ASSERT_SLOT_EQUAL(&(*file)[1], toRemove[1]); + file->removeSlots(toRemove); + } +} + + +void +MemFileTest::testCompactRemoveDoublePut() +{ + // Feed two puts at time 1000 and 1200 + for (uint32_t i = 0; i < 2; i++) { + feedDocument(1234, 1000 + 200 * i); + } + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount()); + + { + // Not time to collect yet, newest is still revertable + SlotList toRemove(getSlotsToRemove(*file, 1300, 300, 86400)); + CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size()); + } + + { + SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 86400)); + + CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size()); + ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); + file->removeSlots(toRemove); + } +} + +void +MemFileTest::testCompactPutRemove() +{ + document::DocumentId docId = feedDocument(1234, 1000); + + doRemove(docId, Timestamp(1200*1000000), 0); + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + + { + // Since remove can still be reverted, we can't revert anything. + SlotList toRemove(getSlotsToRemove(*file, 1300, 300, 600)); + + CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size()); + } + + { + SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 600)); + + CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size()); + ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); + file->removeSlots(toRemove); + } + + { + SlotList toRemove(getSlotsToRemove(*file, 1900, 300, 600)); + + CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size()); + ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); + file->removeSlots(toRemove); + } +} + +void +MemFileTest::testCompactCombined() +{ + document::DocumentId docId; + + // Feed some puts at time 1000, 1200, 1400, 1600 and 1800 for same doc. + for (uint32_t i = 0; i < 5; i++) { + docId = feedDocument(1234, 1000 + i * 200); + } + flush(document::BucketId(16, 4)); + + // Now add remove at time 2000. + doRemove(docId, Timestamp(2000 * 1000000), 0); + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + CPPUNIT_ASSERT_EQUAL(6, (int)file->getSlotCount()); + + { + // Compact all redundant slots that are older than revert period of 300. + // This includes 1000, 1200, 1400 and 1600. + SlotList toRemove(getSlotsToRemove(*file, 2001, 300, 86400)); + CPPUNIT_ASSERT_EQUAL(4, (int)toRemove.size()); + for (int i = 0; i < 4; ++i) { + ASSERT_SLOT_EQUAL(&(*file)[i], toRemove[i]); + } + file->removeSlots(toRemove); + } +} + +void +MemFileTest::testCompactDifferentPuts() +{ + document::DocumentId docId; + + // Feed some puts + for (uint32_t i = 0; i < 2; i++) { + for (uint32_t j = 0; j < 3; j++) { + feedDocument(1234 * j, 1000 + (i * 3 + j) * 200); + } + } + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + CPPUNIT_ASSERT_EQUAL(6, (int)file->getSlotCount()); + + { + SlotList toRemove(getSlotsToRemove(*file, 3000, 300, 86400)); + CPPUNIT_ASSERT_EQUAL(3, (int)toRemove.size()); + + for (uint32_t i = 0; i < 3; i++) { + bool found = false; + for (uint32_t j = 0; j < 3; j++) { + if ((*file)[j] == *toRemove[i]) { + found = true; + } + } + + CPPUNIT_ASSERT(found); + } + file->removeSlots(toRemove); + } +} + +void +MemFileTest::testCompactWithMemFile() +{ + // Feed two puts + for (uint32_t i = 0; i < 2; i++) { + document::Document::SP doc(createRandomDocumentAtLocation( + 4, 1234, 10, 100)); + + doPut(doc, document::BucketId(16, 4), Timestamp((1000 + i * 200)*1000000), 0); + } + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount()); + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options) + .revertTimePeriod(framework::MicroSecTime(1000)) + .build()); + + getFakeClock()._absoluteTime = framework::MicroSecTime(2000ULL * 1000000); + + CPPUNIT_ASSERT(file->compact()); + CPPUNIT_ASSERT(!file->compact()); + + CPPUNIT_ASSERT_EQUAL(1, (int)file->getSlotCount()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1200 * 1000000), (*file)[0].getTimestamp()); +} + +/** + * Feed 5 versions of a single document at absolute times 0 through 4 seconds + * and run compaction using the provided max document version option. + * Revert time/keep remove time options are effectively disabled for this test. + * Returns timestamps of all slots that are marked as compactable. + */ +std::vector<Types::Timestamp> +MemFileTest::compactWithVersionLimit(uint32_t maxVersions) +{ + document::BucketId bucket(16, 4); + std::shared_ptr<Document> doc( + createRandomDocumentAtLocation(4, 1234, 10, 100)); + uint32_t versionLimit = 5; + for (uint32_t i = 0; i < versionLimit; ++i) { + Timestamp ts(sec(i).getTime()); + doPut(doc, bucket, ts, 0); + } + flush(bucket); + + MemFilePtr file(getMemFile(bucket)); + CPPUNIT_ASSERT_EQUAL(versionLimit, file->getSlotCount()); + + framework::MicroSecTime currentTime(sec(versionLimit)); + MemFileCompactor compactor( + currentTime, + CompactionOptions() + .revertTimePeriod(sec(versionLimit)) + .keepRemoveTimePeriod(sec(versionLimit)) + .maxDocumentVersions(maxVersions)); + auto slots = compactor.getSlotsToRemove(*file); + // Convert to timestamps since caller won't have access to actual MemFile. + std::vector<Timestamp> timestamps; + for (const MemSlot* slot : slots) { + timestamps.push_back(slot->getTimestamp()); + } + return timestamps; +} + +void +MemFileTest::testNoCompactionWhenDocumentVersionsWithinLimit() +{ + auto timestamps = compactWithVersionLimit(5); + CPPUNIT_ASSERT(timestamps.empty()); +} + +void +MemFileTest::testCompactWhenDocumentVersionsExceedLimit() +{ + auto timestamps = compactWithVersionLimit(2); + CPPUNIT_ASSERT_EQUAL(size_t(3), timestamps.size()); + std::vector<Timestamp> expected = { + sec(0), sec(1), sec(2) + }; + CPPUNIT_ASSERT_EQUAL(expected, timestamps); +} + +void +MemFileTest::testCompactLimit1KeepsNewestVersionOnly() +{ + auto timestamps = compactWithVersionLimit(1); + CPPUNIT_ASSERT_EQUAL(size_t(4), timestamps.size()); + std::vector<Timestamp> expected = { + sec(0), sec(1), sec(2), sec(3) + }; + CPPUNIT_ASSERT_EQUAL(expected, timestamps); +} + +void +MemFileTest::testCompactionOptionsArePropagatedFromConfig() +{ + vespa::config::storage::StorMemfilepersistenceConfigBuilder mfcBuilder; + vespa::config::content::PersistenceConfigBuilder pcBuilder; + + pcBuilder.maximumVersionsOfSingleDocumentStored = 12345; + pcBuilder.revertTimePeriod = 555; + pcBuilder.keepRemoveTimePeriod = 777; + + vespa::config::storage::StorMemfilepersistenceConfig mfc(mfcBuilder); + vespa::config::content::PersistenceConfig pc(pcBuilder); + Options opts(mfc, pc); + + CPPUNIT_ASSERT_EQUAL(framework::MicroSecTime(555 * 1000000), + opts._revertTimePeriod); + CPPUNIT_ASSERT_EQUAL(framework::MicroSecTime(777 * 1000000), + opts._keepRemoveTimePeriod); + CPPUNIT_ASSERT_EQUAL(uint32_t(12345), opts._maxDocumentVersions); +} + +void +MemFileTest::testZeroDocumentVersionConfigIsCorrected() +{ + vespa::config::storage::StorMemfilepersistenceConfigBuilder mfcBuilder; + vespa::config::content::PersistenceConfigBuilder pcBuilder; + + pcBuilder.maximumVersionsOfSingleDocumentStored = 0; + + vespa::config::storage::StorMemfilepersistenceConfig mfc(mfcBuilder); + vespa::config::content::PersistenceConfig pc(pcBuilder); + Options opts(mfc, pc); + + CPPUNIT_ASSERT_EQUAL(uint32_t(1), opts._maxDocumentVersions); +} + +void +MemFileTest::testGetSlotsByTimestamp() +{ + for (uint32_t i = 0; i < 10; i++) { + feedDocument(i, 1000 + i); + } + flush(document::BucketId(16, 4)); + + std::vector<Timestamp> timestamps; + timestamps.push_back(Timestamp(999 * 1000000)); + timestamps.push_back(Timestamp(1001 * 1000000)); + timestamps.push_back(Timestamp(1002 * 1000000)); + timestamps.push_back(Timestamp(1007 * 1000000)); + timestamps.push_back(Timestamp(1100 * 1000000)); + std::vector<const MemSlot*> slots; + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + file->getSlotsByTimestamp(timestamps, slots); + CPPUNIT_ASSERT_EQUAL(std::size_t(3), slots.size()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1001 * 1000000), slots[0]->getTimestamp()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1002 * 1000000), slots[1]->getTimestamp()); + CPPUNIT_ASSERT_EQUAL(Timestamp(1007 * 1000000), slots[2]->getTimestamp()); +} + +void +MemFileTest::testEnsureCached() +{ + // Feed some puts + for (uint32_t i = 0; i < 5; i++) { + feedDocument(i, 1000 + i * 200, 600, 600, 600); + } + flush(document::BucketId(16, 4)); + + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options).maximumReadThroughGap(512).build()); + env()._cache.clear(); + + { + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + CPPUNIT_ASSERT(file.get()); + CPPUNIT_ASSERT_EQUAL(5, (int)file->getSlotCount()); + + file->ensureDocumentIdCached((*file)[1]); + + for (std::size_t i = 0; i < file->getSlotCount(); ++i) { + if (i == 1) { + CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); + } else { + CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); + } + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); + } + } + + env()._cache.clear(); + + { + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + file->ensureDocumentCached((*file)[2], true); + + for (std::size_t i = 0; i < file->getSlotCount(); ++i) { + if (i == 2) { + CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); + } else { + CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER)); + } + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); + } + } + + env()._cache.clear(); + + { + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + + file->ensureDocumentCached((*file)[3], false); + + for (std::size_t i = 0; i < file->getSlotCount(); ++i) { + if (i == 3) { + CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY)); + } else { + CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER)); + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); + } + } + } + + env()._cache.clear(); + + { + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + + std::vector<Timestamp> ts; + for (int i = 2; i < 5; ++i) { + ts.push_back((*file)[i].getTimestamp()); + } + + file->ensureDocumentCached(ts, false); + + for (std::size_t i = 0; i < file->getSlotCount(); ++i) { + if (i > 1 && i < 5) { + CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY)); + } else { + CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER)); + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); + } + } + } + + env()._cache.clear(); + + { + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + + file->ensureHeaderBlockCached(); + + for (std::size_t i = 0; i < file->getSlotCount(); ++i) { + CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); + CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); + } + } + + env()._cache.clear(); + + { + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + + file->ensureBodyBlockCached(); + + for (std::size_t i = 0; i < file->getSlotCount(); ++i) { + CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); + CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY)); + } + } +} + +void +MemFileTest::testResizeToFreeSpace() +{ + /** + * This test tests that files are resized to a smaller size when they need + * to be. This should happen during a call to flushToDisk() in MemFile, + * which is either dirty or if passed flag to check even if clean. (Which + * the integrity checker cycle uses). A clean file is used for testing to + * ensure that no part of the code only works for dirty files. This test + * only test for the case where body block is too large. The real + * implementation here will be in the flushUpdatesToFile() function for the + * given file formats. (VersionSerializer's) If more cases wants to be + * tested add those as unit tests for the versionserializers themselves. + */ + + // Create a test bucket to test with. + BucketId bucket(16, 0xa); + createTestBucket(bucket, 0); + + off_t file_size = + ((SimpleMemFileIOBuffer&)getMemFile(bucket)->getMemFileIO()). + getFileHandle().getFileSize(); + + // Clear cache so we can manually modify backing file to increase the + // size of it. + FileSpecification file(getMemFile(bucket)->getFile()); + env()._cache.clear(); + { + // Extend file to 1 MB, which should create an excessively large + // body block such that file should be resized to be smaller + vespalib::LazyFile fileHandle(file.getPath(), 0); + fileHandle.write("foobar", 6, 2 * 1024 * 1024 - 6); + } + MemFilePtr memFile(getMemFile(bucket)); + memFile->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE); + CPPUNIT_ASSERT_EQUAL(file_size, + ((SimpleMemFileIOBuffer&)memFile->getMemFileIO()). + getFileHandle().getFileSize()); +} + +namespace { + +const vespalib::LazyFile& +getFileHandle(const MemFile& mf1) +{ + return dynamic_cast<const SimpleMemFileIOBuffer&>( + mf1.getMemFileIO()).getFileHandle(); +} + +const LoggingLazyFile& +getLoggerFile(const MemFile& file) +{ + return dynamic_cast<const LoggingLazyFile&>(getFileHandle(file)); +} + +} + +void +MemFileTest::testNoFileWriteOnNoOpCompaction() +{ + BucketId bucket(16, 4); + env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( + new LoggingLazyFile::Factory()); + + // Feed some unique puts, none of which can be compacted away. + for (uint32_t i = 0; i < 2; i++) { + document::Document::SP doc(createRandomDocumentAtLocation( + 4, i, 10, 100)); + + doPut(doc, bucket, Timestamp((1000 + i * 200)*1000000), 0); + } + flush(bucket); + + MemFilePtr file(getMemFile(bucket)); + + size_t opsBeforeFlush = getLoggerFile(*file).getOperationCount(); + file->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE); + size_t opsAfterFlush = getLoggerFile(*file).getOperationCount(); + + // Disk should not have been touched, since no slots have been + // compacted away. + if (opsBeforeFlush != opsAfterFlush) { + std::cerr << "\n" << getLoggerFile(*file).toString() << "\n"; + } + CPPUNIT_ASSERT_EQUAL(opsBeforeFlush, opsAfterFlush); +} + +void +MemFileTest::testAddSlotWhenDiskFull() +{ + { + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + { + // Add a dummy-slot that can later be removed + Document::SP doc(createRandomDocumentAtLocation(4)); + file->addPutSlot(*doc, Timestamp(1001)); + } + } + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + AutoFlush af(file); + PartitionMonitor* mon = env().getDirectory().getPartition().getMonitor(); + // Set disk to 99% full + mon->setStatOncePolicy(); + mon->setMaxFillness(.98f); + mon->overrideRealStat(512, 100000, 99000); + CPPUNIT_ASSERT(mon->isFull()); + + // Test that addSlot with a non-persisted Put fails + { + Document::SP doc(createRandomDocumentAtLocation(4)); + try { + file->addPutSlot(*doc, Timestamp(10003)); + CPPUNIT_ASSERT(false); + } catch (vespalib::IoException& e) { + CPPUNIT_ASSERT_EQUAL(vespalib::IoException::NO_SPACE, e.getType()); + } + } + + // Slots with valid header and body locations should also + // not fail, as these are added when the file is loaded + { + // Just steal parts from existing slot to ensure they're persisted + const MemSlot* existing = file->getSlotAtTime(Timestamp(1001)); + + MemSlot slot(existing->getGlobalId(), + Timestamp(1005), + existing->getLocation(HEADER), + existing->getLocation(BODY), + IN_USE, + 0x1234); + file->addSlot(slot); + } + + // Removes should not fail when disk is full + { + file->addRemoveSlot(*file->getSlotAtTime(Timestamp(1001)), Timestamp(1003)); + } +} + +void +MemFileTest::testGetSerializedSize() { + document::Document::SP doc(createRandomDocumentAtLocation( + 4, 1234, 1024, 1024)); + + std::string val("Header"); + doc->setValue(doc->getField("hstringval"), + document::StringFieldValue(val)); + + doPut(doc, document::BucketId(16, 4), framework::MicroSecTime(1000)); + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + file->ensureBodyBlockCached(); + const MemSlot* slot = file->getSlotAtTime(framework::MicroSecTime(1000)); + CPPUNIT_ASSERT(slot != 0); + + vespalib::nbostream serializedHeader; + doc->serializeHeader(serializedHeader); + + vespalib::nbostream serializedBody; + doc->serializeBody(serializedBody); + + CPPUNIT_ASSERT_EQUAL(uint32_t(serializedHeader.size()), + file->getSerializedSize(*slot, HEADER)); + CPPUNIT_ASSERT_EQUAL(uint32_t(serializedBody.size()), + file->getSerializedSize(*slot, BODY)); +} + +void +MemFileTest::testGetBucketInfo() +{ + document::Document::SP doc(createRandomDocumentAtLocation( + 4, 1234, 100, 100)); + doc->setValue(doc->getField("content"), + document::StringFieldValue("foo")); + document::Document::SP doc2(createRandomDocumentAtLocation( + 4, 1235, 100, 100)); + doc2->setValue(doc->getField("content"), + document::StringFieldValue("bar")); + + doPut(doc, document::BucketId(16, 4), framework::MicroSecTime(1000)); + flush(document::BucketId(16, 4)); + + doPut(doc2, document::BucketId(16, 4), framework::MicroSecTime(1001)); + flush(document::BucketId(16, 4)); + + // Do remove which should only add a single meta entry + doRemove(doc->getId(), Timestamp(1002), 0); + flush(document::BucketId(16, 4)); + + MemFilePtr file(getMemFile(document::BucketId(16, 4))); + + CPPUNIT_ASSERT_EQUAL(3u, file->getSlotCount()); + uint32_t maxHeaderExtent = (*file)[1].getLocation(HEADER)._pos + + (*file)[1].getLocation(HEADER)._size; + uint32_t maxBodyExtent = (*file)[1].getLocation(BODY)._pos + + (*file)[1].getLocation(BODY)._size; + + uint32_t wantedUsedSize = 64 + 40*3 + maxHeaderExtent + maxBodyExtent; + BucketInfo info = file->getBucketInfo(); + CPPUNIT_ASSERT_EQUAL(1u, info.getDocumentCount()); + CPPUNIT_ASSERT_EQUAL(3u, info.getEntryCount()); + CPPUNIT_ASSERT_EQUAL(wantedUsedSize, info.getUsedSize()); + uint32_t wantedUniqueSize = (*file)[1].getLocation(HEADER)._size + + (*file)[1].getLocation(BODY)._size; + CPPUNIT_ASSERT_EQUAL(wantedUniqueSize, info.getDocumentSize()); +} + +void +MemFileTest::testCopySlotsPreservesLocationSharing() +{ + document::BucketId bucket(16, 4); + // Feed two puts to same document (identical seed). These should not + // share any blocks. Note: implicit sec -> microsec conversion. + feedDocument(1234, 1000); // slot 0 + auto docId = feedDocument(1234, 1001); // slot 1 + // Update only header of last version of document. This should share + // slot body block 2 with that slot 1. + auto update = createHeaderUpdate(docId, document::IntFieldValue(5678)); + doUpdate(bucket, update, Timestamp(1002 * 1000000), 0); + // Feed a remove for doc in slot 2. This should share the header block of + // slot 3 with the newest document in slot 2. + doRemove(docId, Timestamp(1003 * 1000000), 0); + flush(bucket); + + { + MemFilePtr src(getMemFile(document::BucketId(16, 4))); + MemFilePtr dest(getMemFile(document::BucketId(17, 4))); + std::vector<Timestamp> timestamps { + Timestamp(1000 * 1000000), + Timestamp(1001 * 1000000), + Timestamp(1002 * 1000000), + Timestamp(1003 * 1000000) + }; + std::vector<const MemSlot*> slots { + src->getSlotAtTime(Timestamp(1000 * 1000000)), + src->getSlotAtTime(Timestamp(1001 * 1000000)), + src->getSlotAtTime(Timestamp(1002 * 1000000)), + src->getSlotAtTime(Timestamp(1003 * 1000000)) + }; + dest->copySlotsFrom(*src, slots); + dest->flushToDisk(); + CPPUNIT_ASSERT_EQUAL(uint32_t(4), dest->getSlotCount()); + + DataLocation header[4]; + DataLocation body[4]; + for (int i = 0; i < 4; ++i) { + const MemSlot* slot = dest->getSlotAtTime(timestamps[i]); + header[i] = slot->getLocation(HEADER); + body[i] = slot->getLocation(BODY); + } + CPPUNIT_ASSERT(!(header[0] == header[1])); + + CPPUNIT_ASSERT_EQUAL(body[2], body[1]); + CPPUNIT_ASSERT_EQUAL(header[3], header[2]); + } +} + +void +MemFileTest::testFlushingToNonExistingFileAlwaysRunsCompaction() +{ + document::BucketId bucket(16, 4); + + setMaxDocumentVersionsOption(1); + feedSameDocNTimes(10); + flush(bucket); + + // Max version limit is 1, flushing should have compacted it down. + MemFilePtr file(getMemFile(bucket)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); +} + +void +MemFileTest::testOrderDocSchemeDocumentsCanBeAddedToFile() +{ + // Quick explanation of the esoteric and particular values chosen below: + // orderdoc mangles the MSB of the bucket ID based on the document ID's + // ordering parameters and thus its bucket cannot be directly deduced from + // the generated GID. The values given here specify a document whose GID + // bits differ from those generated by the document and where a GID-only + // bucket ownership check would fail (nuking the node with an assertion). + // We have to make sure cases do not trigger false positives. + document::BucketId bucket(0x84000000ee723751); + auto doc = createDocument("the quick red fox trips over a hedge", + "orderdoc(3,1):storage_test:group1:9:9"); + doPut(std::shared_ptr<Document>(std::move(doc)), + bucket, + Timestamp(1000000 * 1234)); + flush(bucket); + + MemFilePtr file(getMemFile(bucket)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); + // Ideally we'd test the failure case as well, but that'd require framework + // support for death tests. +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/spi/memfiletestutils.cpp b/memfilepersistence/src/tests/spi/memfiletestutils.cpp new file mode 100644 index 00000000000..1e882ccbe6b --- /dev/null +++ b/memfilepersistence/src/tests/spi/memfiletestutils.cpp @@ -0,0 +1,455 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> + +#include <vespa/document/datatype/documenttype.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h> +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/simulatedfailurefile.h> +#include <vespa/memfilepersistence/memfile/memfilecache.h> +#include <vespa/storageframework/defaultimplementation/memory/simplememorylogic.h> +#include <sys/time.h> + +using document::DocumentType; + +namespace storage { +namespace memfile { + +namespace { + spi::LoadType defaultLoadType(0, "default"); +} + +namespace { + vdstestlib::DirConfig initialize(uint32_t numDisks) { + system(vespalib::make_string("rm -rf vdsroot").c_str()); + for (uint32_t i = 0; i < numDisks; i++) { + system(vespalib::make_string("mkdir -p vdsroot/disks/d%d", i).c_str()); + } + vdstestlib::DirConfig config(getStandardConfig(true)); + return config; + } + + template<typename T> + struct ConfigReader : public T::Subscriber + { + T config; + + ConfigReader(const std::string& configId) { + T::subscribe(configId, *this); + } + void configure(const T& c) { config = c; } + }; +} + +MemFileTestEnvironment::MemFileTestEnvironment( + uint32_t numDisks, + framework::ComponentRegister& reg, + const document::DocumentTypeRepo& repo) + : _config(initialize(numDisks)), + _provider(reg, _config.getConfigId()) +{ + _provider.setDocumentRepo(repo); + _provider.getPartitionStates(); +} + +MemFileTestUtils::MemFileTestUtils() +{ +} + +MemFileTestUtils::~MemFileTestUtils() +{ +} + +void +MemFileTestUtils::setupDisks(uint32_t numDisks) { + tearDown(); + _componentRegister.reset( + new framework::defaultimplementation::ComponentRegisterImpl); + _clock.reset(new FakeClock); + _componentRegister->setClock(*_clock); + _memoryManager.reset( + new framework::defaultimplementation::MemoryManager( + framework::defaultimplementation::AllocationLogic::UP( + new framework::defaultimplementation::SimpleMemoryLogic( + *_clock, 1024 * 1024 * 1024)))); + _componentRegister->setMemoryManager(*_memoryManager); + _env.reset(new MemFileTestEnvironment(numDisks, + *_componentRegister, + *getTypeRepo())); +} + +Environment& +MemFileTestUtils::env() +{ + return static_cast<MemFilePersistenceProvider&>( + getPersistenceProvider()).getEnvironment(); +} + +MemFilePersistenceProvider& +MemFileTestUtils::getPersistenceProvider() +{ + return _env->_provider; +} + +MemFilePersistenceThreadMetrics& +MemFileTestUtils::getMetrics() +{ + return getPersistenceProvider().getMetrics(); +} + +std::string +MemFileTestUtils::getMemFileStatus(const document::BucketId& id, + uint32_t disk) +{ + MemFilePtr file(getMemFile(id, disk)); + std::ostringstream ost; + ost << id << ": " << file->getSlotCount() << "," << file->getDisk(); + return ost.str(); +} + +std::string +MemFileTestUtils::getModifiedBuckets() +{ + spi::BucketIdListResult result( + getPersistenceProvider().getModifiedBuckets()); + const spi::BucketIdListResult::List& list(result.getList()); + std::ostringstream ss; + for (size_t i = 0; i < list.size(); ++i) { + if (i != 0) { + ss << ","; + } + ss << std::hex << list[i].getId(); + } + return ss.str(); +} + +MemFilePtr +MemFileTestUtils::getMemFile(const document::BucketId& id, uint16_t disk) +{ + return env()._cache.get(id, env(), env().getDirectory(disk)); +} + +spi::Result +MemFileTestUtils::flush(const document::BucketId& id, uint16_t disk) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + return getPersistenceProvider().flush( + spi::Bucket(id, spi::PartitionId(disk)), context); +} + +document::Document::SP +MemFileTestUtils::doPutOnDisk( + uint16_t disk, + uint32_t location, + Timestamp timestamp, + uint32_t minSize, + uint32_t maxSize) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::Document::SP doc(createRandomDocumentAtLocation( + location, timestamp.getTime(), minSize, maxSize)); + getPersistenceProvider().put( + spi::Bucket(document::BucketId(16, location), spi::PartitionId(disk)), + spi::Timestamp(timestamp.getTime()), + doc, + context); + return doc; +} + +bool +MemFileTestUtils::doRemoveOnDisk( + uint16_t disk, + const document::BucketId& bucketId, + const document::DocumentId& docId, + Timestamp timestamp, + OperationHandler::RemoveType persistRemove) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + if (persistRemove == OperationHandler::PERSIST_REMOVE_IF_FOUND) { + spi::RemoveResult result = getPersistenceProvider().removeIfFound( + spi::Bucket(bucketId, spi::PartitionId(disk)), + spi::Timestamp(timestamp.getTime()), + docId, + context); + return result.wasFound(); + } + spi::RemoveResult result = getPersistenceProvider().remove( + spi::Bucket(bucketId, spi::PartitionId(disk)), + spi::Timestamp(timestamp.getTime()), + docId, + context); + + return result.wasFound(); +} + +bool +MemFileTestUtils::doUnrevertableRemoveOnDisk( + uint16_t disk, + const document::BucketId& bucketId, + const DocumentId& docId, + Timestamp timestamp) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + spi::RemoveResult result = + getPersistenceProvider().remove( + spi::Bucket(bucketId, spi::PartitionId(disk)), + spi::Timestamp(timestamp.getTime()), + docId, context); + + return result.wasFound(); +} + +spi::GetResult +MemFileTestUtils::doGetOnDisk( + uint16_t disk, + const document::BucketId& bucketId, + const document::DocumentId& docId, + const document::FieldSet& fields) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + return getPersistenceProvider().get( + spi::Bucket(bucketId, spi::PartitionId(disk)), + fields, docId, context); +} + +document::DocumentUpdate::SP +MemFileTestUtils::createBodyUpdate( + const document::DocumentId& docId, + const document::FieldValue& updateValue) +{ + const DocumentType* + docType(getTypeRepo()->getDocumentType("testdoctype1")); + document::DocumentUpdate::SP update( + new document::DocumentUpdate(*docType, docId)); + std::shared_ptr<document::AssignValueUpdate> assignUpdate( + new document::AssignValueUpdate(updateValue)); + document::FieldUpdate fieldUpdate(docType->getField("content")); + fieldUpdate.addUpdate(*assignUpdate); + update->addUpdate(fieldUpdate); + return update; +} + +document::DocumentUpdate::SP +MemFileTestUtils::createHeaderUpdate( + const document::DocumentId& docId, + const document::FieldValue& updateValue) +{ + const DocumentType* + docType(getTypeRepo()->getDocumentType("testdoctype1")); + document::DocumentUpdate::SP update( + new document::DocumentUpdate(*docType, docId)); + std::shared_ptr<document::AssignValueUpdate> assignUpdate( + new document::AssignValueUpdate(updateValue)); + document::FieldUpdate fieldUpdate(docType->getField("headerval")); + fieldUpdate.addUpdate(*assignUpdate); + update->addUpdate(fieldUpdate); + return update; +} + +void +MemFileTestUtils::doPut(const document::Document::SP& doc, + Timestamp time, + uint16_t disk, + uint16_t usedBits) +{ + document::BucketId bucket( + getBucketIdFactory().getBucketId(doc->getId())); + bucket.setUsedBits(usedBits); + doPut(doc, bucket, time, disk); +} + +void +MemFileTestUtils::doPut(const document::Document::SP& doc, + document::BucketId bid, + Timestamp time, + uint16_t disk) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + getPersistenceProvider().put(spi::Bucket(bid, spi::PartitionId(disk)), + spi::Timestamp(time.getTime()), doc, context); +} + +spi::UpdateResult +MemFileTestUtils::doUpdate(document::BucketId bid, + const document::DocumentUpdate::SP& update, + Timestamp time, + uint16_t disk) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + return getPersistenceProvider().update( + spi::Bucket(bid, spi::PartitionId(disk)), + spi::Timestamp(time.getTime()), update, context); +} + +void +MemFileTestUtils::doRemove(const document::DocumentId& id, Timestamp time, + uint16_t disk, bool unrevertableRemove, + uint16_t usedBits) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + document::BucketId bucket(getBucketIdFactory().getBucketId(id)); + bucket.setUsedBits(usedBits); + + if (unrevertableRemove) { + getPersistenceProvider().remove( + spi::Bucket(bucket, spi::PartitionId(disk)), + spi::Timestamp(time.getTime()), + id, context); + } else { + spi::RemoveResult result = getPersistenceProvider().removeIfFound( + spi::Bucket(bucket, spi::PartitionId(disk)), + spi::Timestamp(time.getTime()), + id, context); + + if (!result.wasFound()) { + throw vespalib::IllegalStateException( + "Attempted to remove non-existing doc " + id.toString(), + VESPA_STRLOC); + } + } +} + +void +MemFileTestUtils::copyHeader(document::Document& dest, + const document::Document& src) +{ + // FIXME(vekterli): temporary solution while we don't have + // fieldset pruning functionality in Document. + //dest.setHeaderPtr(src.getHeaderPtr()); + vespalib::nbostream originalBodyStream; + dest.serializeBody(originalBodyStream); + + vespalib::nbostream headerStream; + src.serializeHeader(headerStream); + document::ByteBuffer hbuf(headerStream.peek(), headerStream.size()); + dest.deserializeHeader(*getTypeRepo(), hbuf); + // deserializeHeader clears fields struct, so have to re-set body + document::ByteBuffer bbuf(originalBodyStream.peek(), + originalBodyStream.size()); + dest.deserializeBody(*getTypeRepo(), bbuf); +} + +void +MemFileTestUtils::copyBody(document::Document& dest, + const document::Document& src) +{ + // FIXME(vekterli): temporary solution while we don't have + // fieldset pruning functionality in Document. + //dest.setBodyPtr(src.getBodyPtr()); + vespalib::nbostream stream; + src.serializeBody(stream); + document::ByteBuffer buf(stream.peek(), stream.size()); + dest.deserializeBody(*getTypeRepo(), buf); +} + +void +MemFileTestUtils::clearBody(document::Document& doc) +{ + // FIXME(vekterli): temporary solution while we don't have + // fieldset pruning functionality in Document. + //doc->getBody().clear(); + vespalib::nbostream stream; + doc.serializeHeader(stream); + doc.deserialize(*getTypeRepo(), stream); +} + +void +MemFileTestUtils::createTestBucket(const document::BucketId& bucket, + uint16_t disk) +{ + + uint32_t opsPerType = 2; + uint32_t numberOfLocations = 2; + uint32_t minDocSize = 0; + uint32_t maxDocSize = 128; + + for (uint32_t useHeaderOnly = 0; useHeaderOnly < 2; ++useHeaderOnly) { + bool headerOnly = (useHeaderOnly == 1); + for (uint32_t optype=0; optype < 4; ++optype) { + for (uint32_t i=0; i<opsPerType; ++i) { + uint32_t seed = useHeaderOnly * 10000 + optype * 1000 + i + 1; + uint64_t location = (seed % numberOfLocations); + location <<= 32; + location += (bucket.getRawId() & 0xffffffff); + document::Document::SP doc( + createRandomDocumentAtLocation( + location, seed, minDocSize, maxDocSize)); + if (headerOnly) { + clearBody(*doc); + } + doPut(doc, Timestamp(seed), disk, bucket.getUsedBits()); + if (optype == 0) { // Regular put + } else if (optype == 1) { // Overwritten later in time + Document::SP doc2(new Document(*doc)); + doc2->setValue(doc2->getField("content"), + document::StringFieldValue("overwritten")); + doPut(doc2, Timestamp(seed + 500), + disk, bucket.getUsedBits()); + } else if (optype == 2) { // Removed + doRemove(doc->getId(), Timestamp(seed + 500), disk, false, + bucket.getUsedBits()); + } else if (optype == 3) { // Unrevertable removed + doRemove(doc->getId(), Timestamp(seed), disk, true, + bucket.getUsedBits()); + } + } + } + } + flush(bucket, disk); +} + +void +MemFileTestUtils::simulateIoErrorsForSubsequentlyOpenedFiles( + const IoErrors& errs) +{ + std::unique_ptr<SimulatedFailureLazyFile::Factory> factory( + new SimulatedFailureLazyFile::Factory); + factory->setWriteOpsBeforeFailure(errs._afterWrites); + factory->setReadOpsBeforeFailure(errs._afterReads); + env()._lazyFileFactory = std::move(factory); +} + +void +MemFileTestUtils::unSimulateIoErrorsForSubsequentlyOpenedFiles() +{ + env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( + new DefaultLazyFileFactory(0)); +} + +std::string +MemFileTestUtils::stringifyFields(const document::Document& doc) const +{ + using namespace document; + std::vector<std::string> output; + const StructFieldValue& fields(doc.getFields()); + for (StructFieldValue::const_iterator + it(fields.begin()), e(fields.end()); + it != e; ++it) + { + std::ostringstream ss; + const Field& f(it.field()); + ss << f.getName() << ": "; + FieldValue::UP val(fields.getValue(f)); + if (val.get()) { + ss << val->toString(); + } else { + ss << "(null)"; + } + output.push_back(ss.str()); + } + std::ostringstream ret; + std::sort(output.begin(), output.end()); + std::copy(output.begin(), output.end(), + std::ostream_iterator<std::string>(ret, "\n")); + return ret.str(); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/spi/memfiletestutils.h b/memfilepersistence/src/tests/spi/memfiletestutils.h new file mode 100644 index 00000000000..a13b902a214 --- /dev/null +++ b/memfilepersistence/src/tests/spi/memfiletestutils.h @@ -0,0 +1,294 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::MemFileTestUtils + * \ingroup memfile + * + * \brief Utilities for unit tests of the MemFile layer. + * + * The memfile layer typically needs a MemFileEnvironment object that must be + * set up. This class creates such an object to be used by unit tests. Other + * utilities useful for only MemFile testing can be added here too. + */ + +#pragma once + +#include <vespa/memfilepersistence/memfile/memfilecache.h> +#include <tests/testhelper.h> +#include <vespa/persistence/spi/persistenceprovider.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h> +#include <vespa/document/base/testdocman.h> +#include <vespa/storageframework/defaultimplementation/clock/realclock.h> +#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h> +#include <vespa/storageframework/defaultimplementation/memory/memorymanager.h> + +namespace storage { +namespace memfile { + +struct FakeClock : public framework::Clock { +public: + typedef std::unique_ptr<FakeClock> UP; + + framework::MicroSecTime _absoluteTime; + + FakeClock() {}; + + virtual void addSecondsToTime(uint32_t nr) { + _absoluteTime += framework::MicroSecTime(nr * uint64_t(1000000)); + } + + virtual framework::MicroSecTime getTimeInMicros() const { + return _absoluteTime; + } + virtual framework::MilliSecTime getTimeInMillis() const { + return getTimeInMicros().getMillis(); + } + virtual framework::SecondTime getTimeInSeconds() const { + return getTimeInMicros().getSeconds(); + } +}; + +struct MemFileTestEnvironment { + MemFileTestEnvironment(uint32_t numDisks, + framework::ComponentRegister& reg, + const document::DocumentTypeRepo& repo); + + vdstestlib::DirConfig _config; + MemFilePersistenceProvider _provider; +}; + +class MemFileTestUtils : public Types, public document::TestDocMan, public CppUnit::TestFixture { +private: + // This variables are kept in test class. Instances that needs to be + // unique per test needs to be setup in setupDisks and cleared in + // tearDown + document::BucketIdFactory _bucketIdFactory; + framework::defaultimplementation::ComponentRegisterImpl::UP _componentRegister; + FakeClock::UP _clock; + framework::defaultimplementation::MemoryManager::UP _memoryManager; + std::unique_ptr<MemFileTestEnvironment> _env; + +public: + MemFileTestUtils(); + virtual ~MemFileTestUtils(); + + void setupDisks(uint32_t disks); + + void tearDown() { + _env.reset(); + _componentRegister.reset(); + _memoryManager.reset(); + _clock.reset(); + } + + std::string getMemFileStatus(const document::BucketId& id, uint32_t disk = 0); + + std::string getModifiedBuckets(); + + /** + Flushes all cached data to disk and updates the bucket database accordingly. + */ + void flush(); + + FakeClock& getFakeClock() { return *_clock; } + + spi::Result flush(const document::BucketId& id, uint16_t disk = 0); + + MemFilePersistenceProvider& getPersistenceProvider(); + + MemFilePtr getMemFile(const document::BucketId& id, uint16_t disk = 0); + + Environment& env(); + + MemFilePersistenceThreadMetrics& getMetrics(); + + MemFileTestEnvironment& getEnv() { return *_env; } + + /** + Performs a put to the given disk. + Returns the document that was inserted. + */ + document::Document::SP doPutOnDisk( + uint16_t disk, + uint32_t location, + Timestamp timestamp, + uint32_t minSize = 0, + uint32_t maxSize = 128); + + document::Document::SP doPut( + uint32_t location, + Timestamp timestamp, + uint32_t minSize = 0, + uint32_t maxSize = 128) + { return doPutOnDisk(0, location, timestamp, minSize, maxSize); } + + /** + Performs a remove to the given disk. + Returns the new doccount if document was removed, or -1 if not found. + */ + bool doRemoveOnDisk( + uint16_t disk, + const document::BucketId& bid, + const document::DocumentId& id, + Timestamp timestamp, + OperationHandler::RemoveType persistRemove); + + bool doRemove( + const document::BucketId& bid, + const document::DocumentId& id, + Timestamp timestamp, + OperationHandler::RemoveType persistRemove) { + return doRemoveOnDisk(0, bid, id, timestamp, persistRemove); + } + + bool doUnrevertableRemoveOnDisk(uint16_t disk, + const document::BucketId& bid, + const DocumentId& id, + Timestamp timestamp); + + bool doUnrevertableRemove(const document::BucketId& bid, + const DocumentId& id, + Timestamp timestamp) + { + return doUnrevertableRemoveOnDisk(0, bid, id, timestamp); + } + + virtual const document::BucketIdFactory& getBucketIdFactory() const + { return _bucketIdFactory; } + + document::BucketIdFactory& getBucketIdFactory() + { return _bucketIdFactory; } + + /** + * Do a remove toward storage set up in test environment. + * + * @id Document to remove. + * @disk If set, use this disk, otherwise lookup in bucket db. + * @unrevertableRemove If set, instead of adding put, turn put to remove. + * @usedBits Generate bucket to use from docid using this amount of bits. + */ + void doRemove(const DocumentId& id, Timestamp, uint16_t disk, + bool unrevertableRemove = false, uint16_t usedBits = 16); + + spi::GetResult doGetOnDisk( + uint16_t disk, + const document::BucketId& bucketId, + const document::DocumentId& docId, + const document::FieldSet& fields); + + spi::GetResult doGet( + const document::BucketId& bucketId, + const document::DocumentId& docId, + const document::FieldSet& fields) + { return doGetOnDisk(0, bucketId, docId, fields); } + + document::DocumentUpdate::SP createBodyUpdate( + const document::DocumentId& id, + const document::FieldValue& updateValue); + + document::DocumentUpdate::SP createHeaderUpdate( + const document::DocumentId& id, + const document::FieldValue& updateValue); + + virtual const document::DocumentTypeRepo::SP getTypeRepo() const + { return document::TestDocMan::getTypeRepoSP(); } + + /** + * Do a put toward storage set up in test environment. + * + * @doc Document to put. Use TestDocMan to generate easily. + * @disk If set, use this disk, otherwise lookup in bucket db. + * @usedBits Generate bucket to use from docid using this amount of bits. + */ + void doPut(const Document::SP& doc, Timestamp, + uint16_t disk, uint16_t usedBits = 16); + + void doPut(const document::Document::SP& doc, + document::BucketId bid, + Timestamp time, + uint16_t disk = 0); + + spi::UpdateResult doUpdate(document::BucketId bid, + const document::DocumentUpdate::SP& update, + Timestamp time, + uint16_t disk = 0); + + /** + * Create a test bucket with various content representing most states a + * bucket can represent. (Such that tests have a nice test bucket to use + * that require operations to handle all the various bucket contents. + * + * @disk If set, use this disk, otherwise lookup in bucket db. + */ + void createTestBucket(const BucketId&, uint16_t disk = 0xffff); + + /** + * In-place modify doc so that it has no more body fields. + */ + void clearBody(document::Document& doc); + + /** + * Copy all header data from src into dest, replacing any + * header fields it may already have there. NOTE: this will + * also overwrite document ID, type etc! + */ + void copyHeader(document::Document& dest, + const document::Document& src); + + /** + * Copy all body data from src into dest, replacing any + * body fields it may already have there. + */ + void copyBody(document::Document& dest, + const document::Document& src); + + std::string stringifyFields(const Document& doc) const; + + struct IoErrors { + int _afterReads; + int _afterWrites; + + IoErrors() + : _afterReads(0), + _afterWrites(0) + { + } + + IoErrors& afterReads(int n) { + _afterReads = n; + return *this; + } + + IoErrors& afterWrites(int n) { + _afterWrites = n; + return *this; + } + }; + + /** + * Replaces internal LazyFile factory so that it produces LazyFile + * implementations that trigger I/O exceptions on read/write. Optionally, + * can supply a parameter setting explicit bounds on how many operations + * are allowed on a file before trigging exceptions from there on out. A + * bound of -1 in practice means "don't fail ever" while 0 means "fail the + * next op of that type". + */ + void simulateIoErrorsForSubsequentlyOpenedFiles( + const IoErrors& errs = IoErrors()); + + /** + * Replace internal LazyFile factory with the default, non-failing impl. + */ + void unSimulateIoErrorsForSubsequentlyOpenedFiles(); +}; + +class SingleDiskMemFileTestUtils : public MemFileTestUtils +{ +public: + void setUp() { + setupDisks(1); + } +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp b/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp new file mode 100644 index 00000000000..a5d1c50d043 --- /dev/null +++ b/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp @@ -0,0 +1,1110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <tests/spi/memfiletestutils.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/memfilepersistence/mapper/locationreadplanner.h> +#include <tests/spi/simulatedfailurefile.h> +#include <tests/spi/options_builder.h> + +namespace storage { +namespace memfile { + +struct MemFileV1SerializerTest : public SingleDiskMemFileTestUtils +{ + void tearDown(); + void setUpPartialWriteEnvironment(); + void resetConfig(uint32_t minimumFileSize, uint32_t minimumFileHeaderBlockSize); + void doTestPartialWriteRemove(bool readAll); + void doTestPartialWriteUpdate(bool readAll); + + void testWriteReadSingleDoc(); + void testWriteReadPartial(); + void testWriteReadPartialRemoved(); + void testPartialWritePutHeaderOnly(); + void testPartialWritePut(); + void testPartialWriteRemoveCached(); + void testPartialWriteRemoveNotCached(); + void testPartialWriteUpdateCached(); + void testPartialWriteUpdateNotCached(); + void testPartialWriteTooMuchFreeSpace(); + void testPartialWriteNotEnoughFreeSpace(); + void testWriteReadSingleRemovedDoc(); + void testLocationDiskIoPlannerSimple(); + void testLocationDiskIoPlannerMergeReads(); + void testLocationDiskIoPlannerAlignReads(); + void testLocationDiskIoPlannerOneDocument(); + void testSeparateReadsForHeaderAndBody(); + void testLocationsRemappedConsistently(); + void testHeaderBufferTooSmall(); + + /*std::unique_ptr<MemFile> createMemFile(FileSpecification& file, + bool callLoadFile) + { + return std::unique_ptr<MemFile>(new MemFile(file, env(), callLoadFile)); + }*/ + + CPPUNIT_TEST_SUITE(MemFileV1SerializerTest); + CPPUNIT_TEST(testWriteReadSingleDoc); + CPPUNIT_TEST(testWriteReadPartial); + CPPUNIT_TEST(testWriteReadPartialRemoved); + CPPUNIT_TEST(testWriteReadSingleRemovedDoc); + CPPUNIT_TEST(testPartialWritePutHeaderOnly); + CPPUNIT_TEST(testPartialWritePut); + CPPUNIT_TEST(testPartialWriteRemoveCached); + CPPUNIT_TEST(testPartialWriteRemoveNotCached); + CPPUNIT_TEST(testPartialWriteUpdateCached); + CPPUNIT_TEST(testPartialWriteUpdateNotCached); + CPPUNIT_TEST(testLocationDiskIoPlannerSimple); + CPPUNIT_TEST(testLocationDiskIoPlannerMergeReads); + CPPUNIT_TEST(testLocationDiskIoPlannerAlignReads); + CPPUNIT_TEST(testLocationDiskIoPlannerOneDocument); + CPPUNIT_TEST(testSeparateReadsForHeaderAndBody); + CPPUNIT_TEST(testPartialWriteTooMuchFreeSpace); + CPPUNIT_TEST(testPartialWriteNotEnoughFreeSpace); + CPPUNIT_TEST(testLocationsRemappedConsistently); + CPPUNIT_TEST(testHeaderBufferTooSmall); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(MemFileV1SerializerTest); + +namespace { + +const vespalib::LazyFile& +getFileHandle(const MemFile& mf1) +{ + return static_cast<const SimpleMemFileIOBuffer&>( + mf1.getMemFileIO()).getFileHandle(); +} + +const LoggingLazyFile& +getLoggerFile(const MemFile& file) +{ + return static_cast<const LoggingLazyFile&>(getFileHandle(file)); +} + +bool isContentEqual(MemFile& mf1, MemFile& mf2, + bool requireEqualContentCached, std::ostream& error) +{ + MemFile::const_iterator it1( + mf1.begin(Types::ITERATE_GID_UNIQUE | Types::ITERATE_REMOVED)); + MemFile::const_iterator it2( + mf2.begin(Types::ITERATE_GID_UNIQUE | Types::ITERATE_REMOVED)); + while (true) { + if (it1 == mf1.end() && it2 == mf2.end()) { + return true; + } + if (it1 == mf1.end() || it2 == mf2.end()) { + error << "Different amount of GID unique slots"; + return false; + } + if (it1->getTimestamp() != it2->getTimestamp()) { + error << "Different timestamps"; + return false; + } + if (it1->getGlobalId() != it2->getGlobalId()) { + error << "Different gids"; + return false; + } + if (it1->getPersistedFlags() != it2->getPersistedFlags()) { + error << "Different persisted flags"; + return false; + } + if (requireEqualContentCached) { + if (mf1.partAvailable(*it1, Types::BODY) + ^ mf2.partAvailable(*it2, Types::BODY) + || mf1.partAvailable(*it1, Types::HEADER) + ^ mf2.partAvailable(*it2, Types::HEADER)) + { + error << "Difference in cached content: "; + return false; + } + } + + if (mf1.partAvailable(*it1, Types::HEADER) && + mf2.partAvailable(*it2, Types::HEADER)) + { + document::Document::UP doc1 = mf1.getDocument(*it1, Types::ALL); + document::Document::UP doc2 = mf2.getDocument(*it2, Types::ALL); + + CPPUNIT_ASSERT(doc1.get()); + CPPUNIT_ASSERT(doc2.get()); + + if (*doc1 != *doc2) { + error << "Documents different: Expected:\n" + << doc1->toString(true) << "\nActual:\n" + << doc2->toString(true) << "\n"; + return false; + } + } + ++it1; + ++it2; + } +} + +bool +validateMemFileStructure(const MemFile& mf, std::ostream& error) +{ + const SimpleMemFileIOBuffer& ioBuf( + dynamic_cast<const SimpleMemFileIOBuffer&>(mf.getMemFileIO())); + const FileInfo& fileInfo(ioBuf.getFileInfo()); + if (fileInfo.getFileSize() % 512) { + error << "File size is not a multiple of 512 bytes"; + return false; + } + if (fileInfo.getBlockIndex(Types::BODY) % 512) { + error << "Body start index is not a multiple of 512 bytes"; + return false; + } + if (fileInfo.getBlockSize(Types::BODY) % 512) { + error << "Body size is not a multiple of 512 bytes"; + return false; + } + return true; +} + +} + +void +MemFileV1SerializerTest::tearDown() { + //_memFile.reset(); +} + +/** + * Adjust minimum slotfile size values to avoid rewriting file + * when we want to get a partial write + */ +void +MemFileV1SerializerTest::setUpPartialWriteEnvironment() +{ + resetConfig(4096, 2048); +} + +void +MemFileV1SerializerTest::resetConfig(uint32_t minimumFileSize, + uint32_t minimumFileHeaderBlockSize) +{ + using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig; + using MemFileConfigBuilder + = vespa::config::storage::StorMemfilepersistenceConfigBuilder; + + MemFileConfigBuilder persistenceConfig( + *env().acquireConfigReadLock().memFilePersistenceConfig()); + persistenceConfig.minimumFileHeaderBlockSize = minimumFileHeaderBlockSize; + persistenceConfig.minimumFileSize = minimumFileSize; + auto newCfg = std::unique_ptr<MemFileConfig>( + new MemFileConfig(persistenceConfig)); + env().acquireConfigWriteLock().setMemFilePersistenceConfig( + std::move(newCfg)); +} + +struct DummyMemFileIOInterface : MemFileIOInterface { + Document::UP getDocumentHeader(const document::DocumentTypeRepo&, + DataLocation) const + { + return Document::UP(); + } + + document::DocumentId getDocumentId(DataLocation) const { + return document::DocumentId(""); + } + + void readBody(const document::DocumentTypeRepo&, + DataLocation, + Document&) const + { + } + DataLocation addDocumentIdOnlyHeader( + const DocumentId&, + const document::DocumentTypeRepo&) + { + return DataLocation(); + } + DataLocation addHeader(const Document&) { return DataLocation(); } + DataLocation addBody(const Document&) { return DataLocation(); } + void clear(DocumentPart) {} + bool verifyConsistent() const { return true; } + void move(const FileSpecification&) {} + DataLocation copyCache(const MemFileIOInterface&, + DocumentPart, + DataLocation) + { + return DataLocation(); + } + + void close() {}; + bool isCached(DataLocation, DocumentPart) const { return false; } + bool isPersisted(DataLocation, DocumentPart) const { return false; } + uint32_t getSerializedSize(DocumentPart, + DataLocation) const { return 0; } + + void ensureCached(Environment&, + DocumentPart, + const std::vector<DataLocation>&) + {} + + size_t getCachedSize(DocumentPart) const { return 0; } +}; + +#define VESPA_MEMFILEV1_SETUP_SOURCE \ + system("rm -f testfile.0"); \ + document::Document::SP doc(createRandomDocumentAtLocation(4)); \ + FileSpecification file(document::BucketId(16, 4), env().getDirectory(0), "testfile.0"); \ + MemFile source(file, env()); + +#define VESPA_MEMFILEV1_DIFF(source, target) \ + "\nSource:\n" + source.toString(true) \ + + "\nTarget:\n" + target.toString(true) + +#define VESPA_MEMFILEV1_VALIDATE_STRUCTURE(mfile) \ +{ \ + std::ostringstream validateErr; \ + if (!validateMemFileStructure(mfile, validateErr)) { \ + CPPUNIT_FAIL(validateErr.str()); \ + } \ +} + +#define VESPA_MEMFILEV1_ASSERT_SERIALIZATION(sourceMemFile) \ +env()._memFileMapper.flush(sourceMemFile, env()); \ +VESPA_MEMFILEV1_VALIDATE_STRUCTURE(sourceMemFile) \ +MemFile target(file, env()); \ +VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target) \ +{ \ + target.ensureBodyBlockCached(); \ + target.getBucketInfo(); \ + std::ostringstream diff; \ + if (!isContentEqual(sourceMemFile, target, true, diff)) { \ + std::string msg = "MemFiles not content equal: " + diff.str() \ + + VESPA_MEMFILEV1_DIFF(sourceMemFile, target); \ + CPPUNIT_FAIL(msg); \ + } \ +} + +void +MemFileV1SerializerTest::testWriteReadSingleDoc() +{ + VESPA_MEMFILEV1_SETUP_SOURCE; + source.addPutSlot(*doc, Timestamp(1001)); + std::string foo(VESPA_MEMFILEV1_DIFF(source, source)); + VESPA_MEMFILEV1_ASSERT_SERIALIZATION(source); +} + +void +MemFileV1SerializerTest::testWriteReadPartial() +{ + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + std::map<Timestamp, Document::SP> docs; + { + MemFile source(file, env()); + + for (int i = 0; i < 50; ++i) { + Document::SP doc(createRandomDocumentAtLocation(4, i, 1000, 2000)); + source.addPutSlot(*doc, Timestamp(1001 + i)); + docs[Timestamp(1001 + i)] = doc; + } + + env()._memFileMapper.flush(source, env()); + VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source); + } + + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options).maximumReadThroughGap(1024).build()); + env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( + new LoggingLazyFile::Factory()); + + MemFile target(file, env()); + + std::vector<Timestamp> timestamps; + + for (int i = 0; i < 50; i+=4) { + timestamps.push_back(Timestamp(1001 + i)); + } + CPPUNIT_ASSERT_EQUAL(size_t(13), timestamps.size()); + + getLoggerFile(target).operations.clear(); + target.ensureDocumentCached(timestamps, false); + // Headers are small enough that they get read in 1 op + 13 body reads + CPPUNIT_ASSERT_EQUAL(14, (int)getLoggerFile(target).operations.size()); + + for (std::size_t i = 0; i < timestamps.size(); ++i) { + const MemSlot* slot = target.getSlotAtTime(timestamps[i]); + CPPUNIT_ASSERT(slot); + CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); + CPPUNIT_ASSERT(target.partAvailable(*slot, BODY)); + CPPUNIT_ASSERT_EQUAL(*docs[timestamps[i]], *target.getDocument(*slot, ALL)); + } + VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); +} + +void +MemFileV1SerializerTest::testWriteReadPartialRemoved() +{ + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + MemFile source(file, env()); + + for (int i = 0; i < 50; ++i) { + Document::SP doc(createRandomDocumentAtLocation(4, i, 1000, 2000)); + source.addPutSlot(*doc, Timestamp(1001 + i)); + source.addRemoveSlot(*source.getSlotAtTime(Timestamp(1001 + i)), + Timestamp(2001 + i)); + } + + env()._memFileMapper.flush(source, env()); + VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source); + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options).maximumReadThroughGap(1024).build()); + env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( + new LoggingLazyFile::Factory); + + MemFile target(file, env()); + + std::vector<Timestamp> timestamps; + + for (int i = 0; i < 50; i+=4) { + timestamps.push_back(Timestamp(2001 + i)); + } + + getLoggerFile(target).operations.clear(); + target.ensureDocumentCached(timestamps, false); + // All removed; should only read header locations + CPPUNIT_ASSERT_EQUAL(1, (int)getLoggerFile(target).operations.size()); + + for (std::size_t i = 0; i < timestamps.size(); ++i) { + const MemSlot* slot = target.getSlotAtTime(timestamps[i]); + const MemSlot* removedPut( + target.getSlotAtTime(timestamps[i] - Timestamp(1000))); + CPPUNIT_ASSERT(slot); + CPPUNIT_ASSERT(removedPut); + CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); + CPPUNIT_ASSERT_EQUAL(removedPut->getLocation(HEADER), + slot->getLocation(HEADER)); + CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), slot->getLocation(BODY)); + } + VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); +} + +void MemFileV1SerializerTest::testWriteReadSingleRemovedDoc() +{ + VESPA_MEMFILEV1_SETUP_SOURCE; + source.addPutSlot(*doc, Timestamp(1001)); + source.addRemoveSlot( + *source.getSlotAtTime(Timestamp(1001)), Timestamp(2001)); + VESPA_MEMFILEV1_ASSERT_SERIALIZATION(source); +} + +/** + * Write a single put with no body to the memfile and ensure it is + * persisted properly without a body block + */ +void +MemFileV1SerializerTest::testPartialWritePutHeaderOnly() +{ + setUpPartialWriteEnvironment(); + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + document::Document::SP doc(createRandomDocumentAtLocation(4)); + { + MemFile source(file, env()); + source.addPutSlot(*doc, Timestamp(1001)); + env()._memFileMapper.flush(source, env()); + VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source); + } + { + // Have to put a second time since the first one will always + // rewrite the entire file + MemFile target(file, env()); + Document::SP doc2(createRandomDocumentAtLocation(4)); + clearBody(*doc2); + target.addPutSlot(*doc2, Timestamp(1003)); + env()._memFileMapper.flush(target, env()); + VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); + } + { + MemFile target(file, env()); + target.ensureBodyBlockCached(); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); + + const MemSlot& slot = *target.getSlotAtTime(Timestamp(1003)); + CPPUNIT_ASSERT(slot.getLocation(HEADER)._pos > 0); + CPPUNIT_ASSERT(slot.getLocation(HEADER)._size > 0); + CPPUNIT_ASSERT_EQUAL( + DataLocation(0, 0), slot.getLocation(BODY)); + VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); + } +} + + + + +void +MemFileV1SerializerTest::testLocationDiskIoPlannerSimple() +{ + std::vector<MemSlot> slots; + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1001), + DataLocation(0, 1024), + DataLocation(4096, 512), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1003), + DataLocation(1024, 1024), + DataLocation(8192, 512), 0, 0)); + } + + std::vector<DataLocation> headers; + std::vector<DataLocation> bodies; + headers.push_back(slots[0].getLocation(HEADER)); + bodies.push_back(slots[0].getLocation(BODY)); + + DummyMemFileIOInterface dummyIo; + { + LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 100, 0); + + CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); + CPPUNIT_ASSERT_EQUAL( + DataLocation(0, 1024), + planner.getIoOperations()[0]); + } + { + LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 100, 4096); + + CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); + CPPUNIT_ASSERT_EQUAL( + DataLocation(8192, 512), // + block index + planner.getIoOperations()[0]); + } +} + +void +MemFileV1SerializerTest::testLocationDiskIoPlannerMergeReads() +{ + std::vector<MemSlot> slots; + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1001), + DataLocation(0, 1024), + DataLocation(5120, 512), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1002), + DataLocation(2048, 1024), + DataLocation(7168, 512), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1003), + DataLocation(1024, 1024), + DataLocation(9216, 512), 0, 0)); + } + + std::vector<DataLocation> headers; + std::vector<DataLocation> bodies; + for (int i = 0; i < 2; ++i) { + headers.push_back(slots[i].getLocation(HEADER)); + bodies.push_back(slots[i].getLocation(BODY)); + } + + DummyMemFileIOInterface dummyIo; + { + LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 1025, 0); + + CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); + CPPUNIT_ASSERT_EQUAL( + DataLocation(0, 3072), + planner.getIoOperations()[0]); + } + + { + LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 1025, 0); + + CPPUNIT_ASSERT_EQUAL(2, (int)planner.getIoOperations().size()); + CPPUNIT_ASSERT_EQUAL( + DataLocation(5120, 512), + planner.getIoOperations()[0]); + CPPUNIT_ASSERT_EQUAL( + DataLocation(7168, 512), + planner.getIoOperations()[1]); + } +} + +void +MemFileV1SerializerTest::testLocationDiskIoPlannerOneDocument() +{ + std::vector<MemSlot> slots; + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1001), + DataLocation(0, 1024), + DataLocation(5120, 512), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1002), + DataLocation(2048, 1024), + DataLocation(7168, 512), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1003), + DataLocation(1024, 1024), + DataLocation(9216, 512), 0, 0)); + } + + std::vector<DataLocation> headers; + std::vector<DataLocation> bodies; + headers.push_back(slots[1].getLocation(HEADER)); + bodies.push_back(slots[1].getLocation(BODY)); + + DummyMemFileIOInterface dummyIo; + { + LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 1000, 0); + CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); + CPPUNIT_ASSERT_EQUAL( + DataLocation(2048, 1024), + planner.getIoOperations()[0]); + } + + { + LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 1000, 0); + CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); + CPPUNIT_ASSERT_EQUAL( + DataLocation(7168, 512), + planner.getIoOperations()[0]); + } +} + +void +MemFileV1SerializerTest::testLocationDiskIoPlannerAlignReads() +{ + std::vector<MemSlot> slots; + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1001), + DataLocation(7, 100), + DataLocation(5000, 500), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1002), + DataLocation(2000, 100), + DataLocation(7000, 500), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1003), + DataLocation(110, 200), + DataLocation(9000, 500), 0, 0)); + } + + { + Document::SP doc(createRandomDocumentAtLocation(4)); + slots.push_back( + MemSlot( + doc->getId().getGlobalId(), + Timestamp(1004), + DataLocation(3000, 100), + DataLocation(11000, 500), 0, 0)); + } + + std::vector<DataLocation> headers; + std::vector<DataLocation> bodies; + for (int i = 0; i < 2; ++i) { + headers.push_back(slots[i].getLocation(HEADER)); + bodies.push_back(slots[i].getLocation(BODY)); + } + + DummyMemFileIOInterface dummyIo; + { + LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 512, 0); + std::vector<DataLocation> expected; + expected.push_back(DataLocation(0, 512)); + expected.push_back(DataLocation(1536, 1024)); + + CPPUNIT_ASSERT_EQUAL(expected, planner.getIoOperations()); + } + { + LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 512, 0); + std::vector<DataLocation> expected; + expected.push_back(DataLocation(4608, 1024)); + expected.push_back(DataLocation(6656, 1024)); + + CPPUNIT_ASSERT_EQUAL(expected, planner.getIoOperations()); + } +} + +// TODO(vekterli): add read planner test with a location cached + +void +MemFileV1SerializerTest::testSeparateReadsForHeaderAndBody() +{ + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + Document::SP doc(createRandomDocumentAtLocation(4, 0, 1000, 2000)); + { + MemFile source(file, env()); + source.addPutSlot(*doc, Timestamp(1001)); + + env()._memFileMapper.flush(source, env()); + } + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options) + .maximumReadThroughGap(1024*1024*100) + .build()); + env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( + new LoggingLazyFile::Factory()); + + MemFile target(file, env()); + + std::vector<Timestamp> timestamps; + timestamps.push_back(Timestamp(1001)); + + getLoggerFile(target).operations.clear(); + target.ensureDocumentCached(timestamps, false); + + CPPUNIT_ASSERT_EQUAL(2, (int)getLoggerFile(target).operations.size()); + const MemSlot* slot = target.getSlotAtTime(Timestamp(1001)); + CPPUNIT_ASSERT(slot); + CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); + CPPUNIT_ASSERT(target.partAvailable(*slot, BODY)); + CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(*slot, ALL)); + + CPPUNIT_ASSERT(getMetrics().serialization.headerReadSize.getLast() > 0); + CPPUNIT_ASSERT(getMetrics().serialization.bodyReadSize.getLast() > 0); +} + +/** + * Write a single put with body to the memfile and ensure it is + * persisted properly with both header and body blocks + */ +void +MemFileV1SerializerTest::testPartialWritePut() +{ + setUpPartialWriteEnvironment(); + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + Document::SP doc(createRandomDocumentAtLocation(4)); + { + MemFile source(file, env()); + source.addPutSlot(*doc, Timestamp(1001)); + + env()._memFileMapper.flush(source, env()); + } + + { + // Have to put a second time since the first one will always + // rewrite the entire file + MemFile target(file, env()); + Document::SP doc2(createRandomDocumentAtLocation(4)); + target.addPutSlot(*doc2, Timestamp(1003)); + env()._memFileMapper.flush(target, env()); + } + { + MemFile target(file, env()); + target.ensureBodyBlockCached(); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); + + const MemSlot& slot = *target.getSlotAtTime(Timestamp(1003)); + CPPUNIT_ASSERT(slot.getLocation(HEADER)._pos > 0); + CPPUNIT_ASSERT(slot.getLocation(HEADER)._size > 0); + + CPPUNIT_ASSERT(slot.getLocation(BODY)._size > 0); + CPPUNIT_ASSERT(slot.getLocation(BODY)._pos > 0); + } +} + +void +MemFileV1SerializerTest::doTestPartialWriteRemove(bool readAll) +{ + setUpPartialWriteEnvironment(); + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + Document::SP doc(createRandomDocumentAtLocation(4)); + { + MemFile source(file, env()); + source.addPutSlot(*doc, Timestamp(1001)); + env()._memFileMapper.flush(source, env()); + } + { + MemFile target(file, env()); + // Only populate cache before removing if explicitly told so + if (readAll) { + target.ensureBodyBlockCached(); + } + CPPUNIT_ASSERT_EQUAL(uint32_t(1), target.getSlotCount()); + target.addRemoveSlot(target[0], Timestamp(1003)); + + env()._memFileMapper.flush(target, env()); + } + { + MemFile target(file, env()); + target.ensureBodyBlockCached(); + + CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); + + const MemSlot& originalSlot = target[0]; + const MemSlot& removeSlot = target[1]; + CPPUNIT_ASSERT(originalSlot.getLocation(HEADER)._size > 0); + CPPUNIT_ASSERT(originalSlot.getLocation(BODY)._size > 0); + CPPUNIT_ASSERT_EQUAL( + originalSlot.getLocation(HEADER), + removeSlot.getLocation(HEADER)); + CPPUNIT_ASSERT_EQUAL( + DataLocation(0, 0), removeSlot.getLocation(BODY)); + } +} + +/** + * Ensure that removes get the same header location as the Put + * they're removing, and that they get a zero body location + */ +void +MemFileV1SerializerTest::testPartialWriteRemoveCached() +{ + doTestPartialWriteRemove(true); +} + +void +MemFileV1SerializerTest::testPartialWriteRemoveNotCached() +{ + doTestPartialWriteRemove(false); +} + +void +MemFileV1SerializerTest::doTestPartialWriteUpdate(bool readAll) +{ + setUpPartialWriteEnvironment(); + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + Document::SP doc(createRandomDocumentAtLocation(4)); + { + MemFile source(file, env()); + source.addPutSlot(*doc, Timestamp(1001)); + env()._memFileMapper.flush(source, env()); + } + + Document::SP doc2; + { + MemFile target(file, env()); + if (readAll) { + target.ensureBodyBlockCached(); + } + + doc2.reset(new Document(*doc->getDataType(), doc->getId())); + clearBody(*doc2); + doc2->setValue(doc->getField("hstringval"), + document::StringFieldValue("Some updated content")); + + target.addUpdateSlot(*doc2, *target.getSlotAtTime(Timestamp(1001)), + Timestamp(1003)); + env()._memFileMapper.flush(target, env()); + } + + { + MemFile target(file, env()); + CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); + const MemSlot& originalSlot = target[0]; + const MemSlot& updateSlot = target[1]; + CPPUNIT_ASSERT(originalSlot.getLocation(HEADER)._size > 0); + CPPUNIT_ASSERT(originalSlot.getLocation(BODY)._size > 0); + CPPUNIT_ASSERT_EQUAL( + originalSlot.getLocation(BODY), + updateSlot.getLocation(BODY)); + CPPUNIT_ASSERT( + updateSlot.getLocation(HEADER) + != originalSlot.getLocation(HEADER)); + + CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(target[0], ALL)); + copyHeader(*doc, *doc2); + CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(target[1], ALL)); + } +} + +/** + * Ensure that header updates keep the same body block + */ +void +MemFileV1SerializerTest::testPartialWriteUpdateCached() +{ + doTestPartialWriteUpdate(true); +} + +void +MemFileV1SerializerTest::testPartialWriteUpdateNotCached() +{ + doTestPartialWriteUpdate(false); +} + +void +MemFileV1SerializerTest::testPartialWriteTooMuchFreeSpace() +{ + setUpPartialWriteEnvironment(); + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + { + MemFile source(file, env()); + Document::SP doc(createRandomDocumentAtLocation(4)); + source.addPutSlot(*doc, Timestamp(1001)); + env()._memFileMapper.flush(source, env()); + } + int64_t sizeBefore; + // Append filler to slotfile to make it too big for comfort, + // forcing a rewrite to shrink it down + { + vespalib::File slotfile(file.getPath()); + slotfile.open(0); + CPPUNIT_ASSERT(slotfile.isOpen()); + sizeBefore = slotfile.getFileSize(); + slotfile.resize(sizeBefore * 20); // Well over min fill rate of 10% + } + // Write new slot to file; it should now be rewritten with the + // same file size as originally + { + MemFile source(file, env()); + Document::SP doc(createRandomDocumentAtLocation(4)); + source.addPutSlot(*doc, Timestamp(1003)); + env()._memFileMapper.flush(source, env()); + } + { + vespalib::File slotfile(file.getPath()); + slotfile.open(0); + CPPUNIT_ASSERT(slotfile.isOpen()); + CPPUNIT_ASSERT_EQUAL( + sizeBefore, + slotfile.getFileSize()); + } + CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization + .fullRewritesDueToDownsizingFile.getValue()); + CPPUNIT_ASSERT_EQUAL(uint64_t(0), getMetrics().serialization + .fullRewritesDueToTooSmallFile.getValue()); +} + +void +MemFileV1SerializerTest::testPartialWriteNotEnoughFreeSpace() +{ + setUpPartialWriteEnvironment(); + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + // Write file initially + MemFile source(file, env()); + { + Document::SP doc(createRandomDocumentAtLocation(4)); + source.addPutSlot(*doc, Timestamp(1001)); + env()._memFileMapper.flush(source, env()); + } + + uint32_t minFile = 1024 * 512; + auto memFileCfg = env().acquireConfigReadLock().memFilePersistenceConfig(); + resetConfig(minFile, memFileCfg->minimumFileHeaderBlockSize); + + // Create doc bigger than initial minimum filesize, + // prompting a full rewrite + Document::SP doc( + createRandomDocumentAtLocation(4, 0, 4096, 4096)); + source.addPutSlot(*doc, Timestamp(1003)); + + env()._memFileMapper.flush(source, env()); + + CPPUNIT_ASSERT_EQUAL( + minFile, + uint32_t(getFileHandle(source).getFileSize())); + + CPPUNIT_ASSERT_EQUAL(uint64_t(0), getMetrics().serialization + .fullRewritesDueToDownsizingFile.getValue()); + CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization + .fullRewritesDueToTooSmallFile.getValue()); + + // Now, ensure we respect minimum file size and don't try to + // "helpfully" rewrite the file again (try to detect full + // file rewrite with help from the fact we don't currently + // check whether or not the file is < the minimum filesize. + // If that changes, so must this) + memFileCfg = env().acquireConfigReadLock().memFilePersistenceConfig(); + resetConfig(2 * minFile, memFileCfg->minimumFileHeaderBlockSize); + + source.addRemoveSlot(*source.getSlotAtTime(Timestamp(1003)), + Timestamp(1005)); + env()._memFileMapper.flush(source, env()); + + CPPUNIT_ASSERT_EQUAL( + minFile, + uint32_t(getFileHandle(source).getFileSize())); + + CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization + .fullRewritesDueToTooSmallFile.getValue()); +} + +// Test that we don't mess up when remapping locations that +// have already been written during the same operation. That is: +// part A is remapped (P1, S1) -> (P2, S2) +// part B is remapped (P2, S2) -> (P3, S3) +// Obviously, part B should not overwrite the location of part A, +// but this will happen if we don't do the updating in one batch. +void +MemFileV1SerializerTest::testLocationsRemappedConsistently() +{ + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + + std::map<Timestamp, Document::SP> docs; + { + MemFile mf(file, env()); + Document::SP tmpDoc( + createRandomDocumentAtLocation(4, 0, 100, 100)); + + // Create docs identical in size but differing only in doc ids + // By keeping same size but inserting with _lower_ timestamps + // for docs that get higher location positions, we ensure that + // when the file is rewritten, the lower timestamp slots will + // get remapped to locations that match existing locations for + // higher timestamp slots. + for (int i = 0; i < 2; ++i) { + std::ostringstream ss; + ss << "doc" << i; + DocumentId id(document::UserDocIdString("userdoc:foo:4:" + ss.str())); + Document::SP doc(new Document(*tmpDoc->getDataType(), id)); + doc->getFields() = tmpDoc->getFields(); + mf.addPutSlot(*doc, Timestamp(1000 - i)); + docs[Timestamp(1000 - i)] = doc; + } + + env()._memFileMapper.flush(mf, env()); + // Dirty the cache for rewrite + { + DocumentId id2(document::UserDocIdString("userdoc:foo:4:doc9")); + Document::UP doc2(new Document(*tmpDoc->getDataType(), id2)); + doc2->getFields() = tmpDoc->getFields(); + mf.addPutSlot(*doc2, Timestamp(2000)); + docs[Timestamp(2000)] = std::move(doc2); + } + + // Force rewrite + auto memFileCfg = env().acquireConfigReadLock() + .memFilePersistenceConfig(); + resetConfig(1024*512, memFileCfg ->minimumFileHeaderBlockSize); + env()._memFileMapper.flush(mf, env()); + } + + MemFile target(file, env()); + target.ensureBodyBlockCached(); + + std::ostringstream err; + if (!env()._memFileMapper.verify(target, env(), err)) { + std::cerr << err.str() << "\n"; + CPPUNIT_FAIL("MemFile verification failed"); + } + + typedef std::map<Timestamp, Document::SP>::iterator Iter; + for (Iter it(docs.begin()); it != docs.end(); ++it) { + const MemSlot* slot = target.getSlotAtTime(it->first); + CPPUNIT_ASSERT(slot); + CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); + CPPUNIT_ASSERT(target.partAvailable(*slot, BODY)); + CPPUNIT_ASSERT_EQUAL(*it->second, *target.getDocument(*slot, ALL)); + } +} + +/** + * Test that we read in the correct header information when we have to read + * in two passes to get it in its entirety. + */ +void +MemFileV1SerializerTest::testHeaderBufferTooSmall() +{ + system("rm -f testfile.0"); + FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); + FileInfo wantedInfo; + { + MemFile f(file, env()); + // 50*40 bytes of meta list data should be more than sufficient + for (size_t i = 0; i < 50; ++i) { + Document::SP doc(createRandomDocumentAtLocation(4, i)); + f.addPutSlot(*doc, Timestamp(1001 + i)); + env()._memFileMapper.flush(f, env()); + } + SimpleMemFileIOBuffer& io( + dynamic_cast<SimpleMemFileIOBuffer&>(f.getMemFileIO())); + wantedInfo = io.getFileInfo(); + } + + // Force initial index read to be too small to contain all metadata, + // triggering buffer resize and secondary read. + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options).initialIndexRead(512).build()); + { + MemFile f(file, env()); + CPPUNIT_ASSERT_EQUAL(uint32_t(50), f.getSlotCount()); + // Ensure we've read correct file info + SimpleMemFileIOBuffer& io( + dynamic_cast<SimpleMemFileIOBuffer&>(f.getMemFileIO())); + const FileInfo& info(io.getFileInfo()); + CPPUNIT_ASSERT_EQUAL(wantedInfo.getFileSize(), info.getFileSize()); + CPPUNIT_ASSERT_EQUAL(wantedInfo.getHeaderBlockStartIndex(), + info.getHeaderBlockStartIndex()); + CPPUNIT_ASSERT_EQUAL(wantedInfo.getBodyBlockStartIndex(), + info.getBodyBlockStartIndex()); + CPPUNIT_ASSERT_EQUAL(wantedInfo.getBlockSize(HEADER), + info.getBlockSize(HEADER)); + CPPUNIT_ASSERT_EQUAL(wantedInfo.getBlockSize(BODY), + info.getBlockSize(BODY)); + } +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp b/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp new file mode 100644 index 00000000000..0cf04eadaa2 --- /dev/null +++ b/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp @@ -0,0 +1,501 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h> +#include <vespa/memfilepersistence/mapper/fileinfo.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <tests/spi/memfiletestutils.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <tests/spi/simulatedfailurefile.h> + +namespace storage { +namespace memfile { + +struct MemFileV1VerifierTest : public SingleDiskMemFileTestUtils +{ + void testVerify(); + + void tearDown(); + + std::unique_ptr<MemFile> createMemFile(FileSpecification& file, + bool callLoadFile) + { + return std::unique_ptr<MemFile>(new MemFile(file, env(), callLoadFile)); + } + + CPPUNIT_TEST_SUITE(MemFileV1VerifierTest); + CPPUNIT_TEST_IGNORED(testVerify); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(MemFileV1VerifierTest); + +namespace { + // A totall uncached memfile with content to use for verify testing + std::unique_ptr<MemFile> _memFile; + + // Clear old content. Create new file. Make sure nothing is cached. + void prepareBucket(SingleDiskMemFileTestUtils& util, + const FileSpecification& file) { + _memFile.reset(); + util.env()._cache.clear(); + vespalib::unlink(file.getPath()); + util.createTestBucket(file.getBucketId(), 0); + util.env()._cache.clear(); + _memFile.reset(new MemFile(file, util.env())); + _memFile->getMemFileIO().close(); + + } + + // Get copy of header of memfile created + Header getHeader() { + assert(_memFile.get()); + vespalib::LazyFile file(_memFile->getFile().getPath(), 0); + Header result; + file.read(&result, sizeof(Header), 0); + return result; + } + + MetaSlot getSlot(uint32_t index) { + assert(_memFile.get()); + vespalib::LazyFile file(_memFile->getFile().getPath(), 0); + MetaSlot result; + file.read(&result, sizeof(MetaSlot), + sizeof(Header) + sizeof(MetaSlot) * index); + return result; + } + + void setSlot(uint32_t index, MetaSlot slot, + bool updateFileChecksum = true) + { + (void)updateFileChecksum; + assert(_memFile.get()); + //if (updateFileChecksum) slot.updateFileChecksum(); + vespalib::LazyFile file(_memFile->getFile().getPath(), 0); + file.write(&slot, sizeof(MetaSlot), + sizeof(Header) + sizeof(MetaSlot) * index); + } + + void setHeader(const Header& header) { + assert(_memFile.get()); + vespalib::LazyFile file(_memFile->getFile().getPath(), 0); + file.write(&header, sizeof(Header), 0); + } + + void verifySlotFile(MemFileV1VerifierTest& util, + const std::string& expectedError, + const std::string& message, + int32_t remainingEntries, + bool includeContent = true, + bool includeHeader = true) + { + assert(_memFile.get()); + FileSpecification file(_memFile->getFile()); + _memFile.reset(); + _memFile = util.createMemFile(file, false); + std::ostringstream before; + try{ + util.env()._memFileMapper.loadFile(*_memFile, util.env(), false); + _memFile->print(before, true, ""); + } catch (vespalib::Exception& e) { + before << "Unknown. Exception during loadFile\n"; + } + std::ostringstream errors; + uint32_t flags = (includeContent ? 0 : Types::DONT_VERIFY_BODY) + | (includeHeader ? 0 : Types::DONT_VERIFY_HEADER); + if (util.env()._memFileMapper.verify( + *_memFile, util.env(), errors, flags)) + { + _memFile->print(std::cerr, true, ""); + std::cerr << errors.str() << "\n"; + CPPUNIT_FAIL("verify() failed to detect: " + message); + } + CPPUNIT_ASSERT_CONTAIN_MESSAGE(message + "\nBefore: " + before.str(), + expectedError, errors.str()); + errors.str(""); + if (util.env()._memFileMapper.repair( + *_memFile, util.env(), errors, flags)) + { + CPPUNIT_FAIL("repair() failed to detect: " + message + + ": " + errors.str()); + } + CPPUNIT_ASSERT_CONTAIN_MESSAGE(message + "\nBefore: " + before.str(), + expectedError, errors.str()); + std::ostringstream remainingErrors; + if (!util.env()._memFileMapper.verify( + *_memFile, util.env(), remainingErrors, flags)) + { + CPPUNIT_FAIL("verify() returns issue after repair of: " + + message + ": " + remainingErrors.str()); + } + CPPUNIT_ASSERT_MESSAGE(remainingErrors.str(), + remainingErrors.str().size() == 0); + if (remainingEntries < 0) { + if (_memFile->fileExists()) { + CPPUNIT_FAIL(message + ": Expected file to not exist anymore"); + } + } else if (dynamic_cast<SimpleMemFileIOBuffer&>(_memFile->getMemFileIO()) + .getFileHandle().getFileSize() == 0) + { + std::ostringstream ost; + ost << "Expected " << remainingEntries << " to remain in file, " + << "but file does not exist\n"; + CPPUNIT_FAIL(message + ": " + ost.str()); + } else { + if (int64_t(_memFile->getSlotCount()) != remainingEntries) { + std::ostringstream ost; + ost << "Expected " << remainingEntries << " to remain in file, " + << "but found " << _memFile->getSlotCount() << "\n"; + ost << errors.str() << "\n"; + ost << "Before: " << before.str() << "\nAfter: "; + _memFile->print(ost, true, ""); + CPPUNIT_FAIL(message + ": " + ost.str()); + } + } + } +} + +void +MemFileV1VerifierTest::tearDown() +{ + _memFile.reset(0); + SingleDiskMemFileTestUtils::tearDown(); +}; + +void +MemFileV1VerifierTest::testVerify() +{ + BucketId bucket(16, 0xa); + std::unique_ptr<FileSpecification> file; + createTestBucket(bucket, 0); + + { + MemFilePtr memFilePtr(env()._cache.get(bucket, env(), env().getDirectory())); + file.reset(new FileSpecification(memFilePtr->getFile())); + env()._cache.clear(); + } + { // Ensure buildTestFile builds a valid file + // Initial file should be fine. + MemFile memFile(*file, env()); + std::ostringstream errors; + if (!env()._memFileMapper.verify(memFile, env(), errors)) { + memFile.print(std::cerr, false, ""); + CPPUNIT_FAIL("Slotfile failed verification: " + errors.str()); + } + } + // Header tests + prepareBucket(*this, *file); + Header orgheader(getHeader()); + { // Test wrong version + Header header(orgheader); + header.setVersion(0xc0edbabe); + header.updateChecksum(); + setHeader(header); + verifySlotFile(*this, + "400000000000000a.0 is of wrong version", + "Faulty version", + -1); + } + { // Test meta data list size bigger than file + prepareBucket(*this, *file); + Header header(orgheader); + header.setMetaDataListSize(0xFFFF); + header.updateChecksum(); + setHeader(header); + verifySlotFile(*this, + "indicates file is bigger than it physically is", + "Too big meta data list size", + -1); + } + { // Test header block size bigger than file + prepareBucket(*this, *file); + Header header(orgheader); + header.setHeaderBlockSize(0xFFFF); + header.updateChecksum(); + setHeader(header); + verifySlotFile(*this, + "Header indicates file is bigger than it physically is", + "Too big header block size", + -1); + } + { // Test wrong header crc + prepareBucket(*this, *file); + Header header(orgheader); + header.setMetaDataListSize(4); + setHeader(header); + verifySlotFile(*this, + "Header checksum mismatch", + "Wrong header checksum", + -1); + } + // Meta data tests + prepareBucket(*this, *file); + MetaSlot slot6(getSlot(6)); + { // Test extra removes - currently allowed + MetaSlot slot7(getSlot(7)); + MetaSlot s(slot7); + s.setTimestamp(Timestamp(s._timestamp.getTime() - 1)); + s.updateChecksum(); + setSlot(6, s); + s.setTimestamp(Timestamp(s._timestamp.getTime() + 1)); + s.updateChecksum(); + setSlot(7, s); + std::ostringstream errors; + if (!env()._memFileMapper.verify(*_memFile, env(), errors)) { + _memFile->print(std::cerr, false, ""); + std::cerr << errors.str() << "\n"; + CPPUNIT_FAIL("Supposed to be legal with multiple remove values"); + } + setSlot(7, slot7); + } + { + // Test metadata crc mismatch with "used" flag being accidentally + // flipped. Should not inhibit adding of subsequent slots. + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setUseFlag(false); + setSlot(6, s); + verifySlotFile(*this, + "Slot 6 at timestamp 2001 failed checksum verification", + "Crc failure with use flag", 23, false); + } + { // Test overlapping documents + MetaSlot s(slot6); + // Direct overlapping header + prepareBucket(*this, *file); + s.setHeaderPos(0); + s.setHeaderSize(51); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "overlaps with slot", + "Direct overlapping header", 6, false, false); + // Contained header + // (contained bit not valid header so fails on other error now) + prepareBucket(*this, *file); + s.setHeaderPos(176); + s.setHeaderSize(80); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "not big enough to contain a document id", + "Contained header", 7, false); + // Partly overlapping header + // (contained bit not valid header so fails on other error now) + prepareBucket(*this, *file); + s.setHeaderPos(191); + s.setHeaderSize(35); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "not big enough to contain a document id", + "Partly overlapping header", 7, false); + prepareBucket(*this, *file); + s.setHeaderPos(185); + s.setHeaderSize(33); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "not big enough to contain a document id", + "Partly overlapping header (2)", 7, false); + // Direct overlapping body + prepareBucket(*this, *file); + s = slot6; + s.setBodyPos(0); + s.setBodySize(136); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "Multiple slots with different gids use same body position", + "Directly overlapping body", 6, false); + // Contained body + prepareBucket(*this, *file); + s.setBodyPos(10); + s.setBodySize(50); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "overlaps with slot", + "Contained body", 6, false); + CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(1)) == 0); + // Overlapping body + prepareBucket(*this, *file); + s.setBodyPos(160); + s.setBodySize(40); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "overlaps with slot", + "Overlapping body", 5, false); + CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(2)) == 0); + CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(1501)) == 0); + // Overlapping body, verifying bodies + // (Bad body bit should be removed first, so only one slot needs + // removing) + prepareBucket(*this, *file); + setSlot(6, s); + verifySlotFile(*this, + "Body checksum mismatch", + "Overlapping body(2)", 7, true); + } + { // Test out of bounds + MetaSlot s(slot6); + + // Header out of bounds + prepareBucket(*this, *file); + s.setHeaderPos(500); + s.setHeaderSize(5000); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "goes out of bounds", + "Header out of bounds", 7, false, false); + // Body out of bounds + prepareBucket(*this, *file); + s = slot6; + s.setBodyPos(2400); + s.setBodySize(6000); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "goes out of bounds", + "Body out of bounds", 7, false); + } + { // Test timestamp collision + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setTimestamp(Timestamp(10002)); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "has same timestamp as slot 5", + "Timestamp collision", 6, false); + } + { // Test timestamp out of order + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setTimestamp(Timestamp(38)); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "Slot 6 is out of timestamp order", + "Timestamp out of order", 8, false); + } + { // Test metadata crc mismatch + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setTimestamp(Timestamp(40)); + setSlot(6, s); + verifySlotFile(*this, + "Slot 6 at timestamp 40 failed checksum verification", + "Crc failure", 7, false); + } + { // Test used after unused + // This might actually lose documents after the unused entries. + // The memfile will not know about the documents after unused entry. + // If the memfile contains changes and writes metadata back due to this, + // the following entries will be missing. + // (To prevent this repair would have to add metadata entries, but that + // may be problems if repair happens at a time where all header or body + // data in the file needs to be cached.) + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setUseFlag(false); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "Slot 7 found after unused entries", + "Used after unused", 6, false); + } + { // Test header blob corrupt + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setHeaderPos(519); + s.setHeaderSize(86); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "Header checksum mismatch", + "Corrupt header blob.", 7); + } + { // Test body blob corrupt + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setBodyPos(52); + s.setBodySize(18); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "Body checksum mismatch", + "Corrupt body blob.", 7); + } + { // Test too long name for header chunk + prepareBucket(*this, *file); + MetaSlot s(slot6); + s.setHeaderPos(160); + s.setHeaderSize(33); + s.updateChecksum(); + setSlot(6, s); + verifySlotFile(*this, + "header is not big enough to contain a document", + "Too long name in header.", 7); + } + { // Test wrong file checksum +// Currently disabled. Currently only possible to calculate file checksum from +// memfile now, and memfile object wont be valid. +/* + // First test if we actually have less entries at all.. + prepareBucket(*this, *file); + MetaSlot s(getSlot(7)); + s.setUseFlag(false); + s.updateChecksum(); + setSlot(7, s, false); + s = getSlot(8); + s.setUseFlag(false); + s.updateChecksum(); + setSlot(8, s, false); + verifySlotFile(*this, + "File checksum should have been", + "Wrong file checksum in file.", 7, false); +std::cerr << "U\n"; + // Then test with different timestamp in remaining document + prepareBucket(*this, *file); + s = getSlot(6); + s.setTimestamp(s._timestamp + 1); + s.updateChecksum(); + setSlot(6, s, false); + verifySlotFile(*this, + "File checksum should have been", + "Wrong file checksum in file.", 9, false); +std::cerr << "V\n"; + // Then check with different gid + prepareBucket(*this, *file); + s = getSlot(6); + s._gid = GlobalId("sdfsdfsedsdfsdfsd"); + s.updateChecksum(); + setSlot(6, s, false); + verifySlotFile(*this, + "File checksum should have been", + "Wrong file checksum in file.", 9, false, false); +*/ + } + { // Test that documents not belonging in a bucket is removed +// Currently disabled. Hard to test. Needs total rewrite +/* + prepareBucket(*this, *file); + Blob b(createBlob(43u, "userdoc::0:315", "header", "body")); + _memFile->write(b, 80); + CPPUNIT_ASSERT_EQUAL(4u, _memFile->getBlobCount()); + CPPUNIT_ASSERT(_memFile->read(b)); + verifySlotFile(*this, + "belongs in bucket", + "Document not belonging there", 9); + CPPUNIT_ASSERT_EQUAL(3u, _memFile->getBlobCount()); +*/ + } +} + +} +} diff --git a/memfilepersistence/src/tests/spi/options_builder.h b/memfilepersistence/src/tests/spi/options_builder.h new file mode 100644 index 00000000000..044e7f1d351 --- /dev/null +++ b/memfilepersistence/src/tests/spi/options_builder.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/common/environment.h> +#include <vespa/vespalib/stllike/string.h> +#include <memory> + +namespace storage { +namespace memfile { + +class OptionsBuilder +{ + Options _newOptions; +public: + OptionsBuilder(const Options& opts) + : _newOptions(opts) + { + } + + OptionsBuilder& maximumReadThroughGap(uint32_t readThroughGap) { + _newOptions._maximumGapToReadThrough = readThroughGap; + return *this; + } + + OptionsBuilder& initialIndexRead(uint32_t bytesToRead) { + _newOptions._initialIndexRead = bytesToRead; + return *this; + } + + OptionsBuilder& revertTimePeriod(framework::MicroSecTime revertTime) { + _newOptions._revertTimePeriod = revertTime; + return *this; + } + + OptionsBuilder& defaultRemoveDocType(vespalib::stringref typeName) { + _newOptions._defaultRemoveDocType = typeName; + return *this; + } + + OptionsBuilder& maxDocumentVersions(uint32_t maxVersions) { + _newOptions._maxDocumentVersions = maxVersions; + return *this; + } + + std::unique_ptr<Options> build() const { + return std::unique_ptr<Options>(new Options(_newOptions)); + } +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/tests/spi/providerconformancetest.cpp b/memfilepersistence/src/tests/spi/providerconformancetest.cpp new file mode 100644 index 00000000000..526f61a812c --- /dev/null +++ b/memfilepersistence/src/tests/spi/providerconformancetest.cpp @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/persistence/conformancetest/conformancetest.h> +#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h> +#include <vespa/storageframework/defaultimplementation/clock/realclock.h> +#include <vespa/storageframework/defaultimplementation/memory/memorymanager.h> +#include <vespa/storageframework/defaultimplementation/memory/simplememorylogic.h> +#include <vespa/storageframework/generic/memory/memorymanagerinterface.h> +#include <vespa/memfilepersistence/memfile/memfilecache.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h> +#include <tests/spi/memfiletestutils.h> + +LOG_SETUP(".test.dummyimpl"); + +namespace storage { +namespace memfile { + +struct ProviderConformanceTest : public spi::ConformanceTest { + struct Factory : public PersistenceFactory { + framework::defaultimplementation::ComponentRegisterImpl _compRegister; + framework::defaultimplementation::RealClock _clock; + framework::defaultimplementation::MemoryManager _memoryManager; + std::unique_ptr<MemFileCache> cache; + + Factory() + : _compRegister(), + _clock(), + _memoryManager( + framework::defaultimplementation::AllocationLogic::UP( + new framework::defaultimplementation::SimpleMemoryLogic( + _clock, 1024 * 1024 * 1024))) + { + _compRegister.setClock(_clock); + _compRegister.setMemoryManager(_memoryManager); + } + + spi::PersistenceProvider::UP + getPersistenceImplementation(const document::DocumentTypeRepo::SP& repo, + const document::DocumenttypesConfig&) + { + system("rm -rf vdsroot"); + system("mkdir -p vdsroot/disks/d0"); + vdstestlib::DirConfig config(getStandardConfig(true)); + + MemFilePersistenceProvider::UP result( + new MemFilePersistenceProvider( + _compRegister, + config.getConfigId())); + result->setDocumentRepo(*repo); + return spi::PersistenceProvider::UP(result.release()); + } + + bool + supportsRevert() const + { + return true; + } + }; + + ProviderConformanceTest() + : spi::ConformanceTest(PersistenceFactory::UP(new Factory)) {} + + CPPUNIT_TEST_SUITE(ProviderConformanceTest); + DEFINE_CONFORMANCE_TESTS(); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(ProviderConformanceTest); + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp b/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp new file mode 100644 index 00000000000..fbf7badf5e4 --- /dev/null +++ b/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/memfilepersistence/memfile/shared_data_location_tracker.h> + +namespace storage { +namespace memfile { + +class SharedDataLocationTrackerTest : public CppUnit::TestFixture +{ +public: + void headerIsPassedDownToCacheAccessor(); + void bodyIsPassedDownToCacheAccessor(); + void firstInvocationReturnsNewLocation(); + void multipleInvocationsForSharedSlotReturnSameLocation(); + + CPPUNIT_TEST_SUITE(SharedDataLocationTrackerTest); + CPPUNIT_TEST(headerIsPassedDownToCacheAccessor); + CPPUNIT_TEST(bodyIsPassedDownToCacheAccessor); + CPPUNIT_TEST(firstInvocationReturnsNewLocation); + CPPUNIT_TEST(multipleInvocationsForSharedSlotReturnSameLocation); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(SharedDataLocationTrackerTest); + +namespace { + +using Params = std::pair<Types::DocumentPart, DataLocation>; +constexpr auto HEADER = Types::HEADER; +constexpr auto BODY = Types::BODY; + +/** + * A simple mock of a buffer cache which records all invocations + * and returns a location increasing by 100 for each invocation. + */ +struct MockBufferCacheCopier : BufferCacheCopier +{ + // This is practically _screaming_ for GoogleMock. + std::vector<Params> invocations; + + DataLocation doCopyFromSourceToLocal( + Types::DocumentPart part, + DataLocation sourceLocation) override + { + Params params(part, sourceLocation); + const size_t invocationsBefore = invocations.size(); + invocations.push_back(params); + return DataLocation(invocationsBefore * 100, + invocationsBefore * 100 + 100); + } +}; + +} + +void +SharedDataLocationTrackerTest::headerIsPassedDownToCacheAccessor() +{ + MockBufferCacheCopier cache; + SharedDataLocationTracker tracker(cache, HEADER); + tracker.getOrCreateSharedLocation({0, 100}); + CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size()); + CPPUNIT_ASSERT_EQUAL(Params(HEADER, {0, 100}), cache.invocations[0]); +} + +void +SharedDataLocationTrackerTest::bodyIsPassedDownToCacheAccessor() +{ + MockBufferCacheCopier cache; + SharedDataLocationTracker tracker(cache, BODY); + tracker.getOrCreateSharedLocation({0, 100}); + CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size()); + CPPUNIT_ASSERT_EQUAL(Params(BODY, {0, 100}), cache.invocations[0]); +} + +void +SharedDataLocationTrackerTest::firstInvocationReturnsNewLocation() +{ + MockBufferCacheCopier cache; + SharedDataLocationTracker tracker(cache, HEADER); + // Auto-incrementing per cache copy invocation. + CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100), + tracker.getOrCreateSharedLocation({500, 600})); + CPPUNIT_ASSERT_EQUAL(DataLocation(100, 200), + tracker.getOrCreateSharedLocation({700, 800})); + + CPPUNIT_ASSERT_EQUAL(size_t(2), cache.invocations.size()); + CPPUNIT_ASSERT_EQUAL(Params(HEADER, {500, 600}), cache.invocations[0]); + CPPUNIT_ASSERT_EQUAL(Params(HEADER, {700, 800}), cache.invocations[1]); +} + +void +SharedDataLocationTrackerTest + ::multipleInvocationsForSharedSlotReturnSameLocation() +{ + MockBufferCacheCopier cache; + SharedDataLocationTracker tracker(cache, HEADER); + CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100), + tracker.getOrCreateSharedLocation({500, 600})); + // Same source location, thus we can reuse the same destination location + // as well. + CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100), + tracker.getOrCreateSharedLocation({500, 600})); + + CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size()); + CPPUNIT_ASSERT_EQUAL(Params(HEADER, {500, 600}), cache.invocations[0]); +} + +} // memfile +} // storage + diff --git a/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp b/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp new file mode 100644 index 00000000000..af0466fafe7 --- /dev/null +++ b/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp @@ -0,0 +1,663 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/options_builder.h> + +namespace storage { +namespace memfile { + +class SimpleMemFileIOBufferTest : public SingleDiskMemFileTestUtils +{ + CPPUNIT_TEST_SUITE(SimpleMemFileIOBufferTest); + CPPUNIT_TEST(testAddAndReadDocument); + CPPUNIT_TEST(testNonExistingLocation); + CPPUNIT_TEST(testCopy); + CPPUNIT_TEST(testCacheLocation); + CPPUNIT_TEST(testPersist); + CPPUNIT_TEST(testGetSerializedSize); + CPPUNIT_TEST(testRemapLocations); + CPPUNIT_TEST(testAlignmentUtilFunctions); + CPPUNIT_TEST(testCalculatedCacheSize); + CPPUNIT_TEST(testSharedBuffer); + CPPUNIT_TEST(testSharedBufferUsage); + CPPUNIT_TEST(testHeaderChunkEncoderComputesSizesCorrectly); + CPPUNIT_TEST(testHeaderChunkEncoderSerializesIdCorrectly); + CPPUNIT_TEST(testHeaderChunkEncoderSerializesHeaderCorrectly); + CPPUNIT_TEST(testRemovesCanBeWrittenWithBlankDefaultDocument); + CPPUNIT_TEST(testRemovesCanBeWrittenWithIdInferredDoctype); + CPPUNIT_TEST(testRemovesWithInvalidDocTypeThrowsException); + CPPUNIT_TEST_SUITE_END(); + + using BufferType = SimpleMemFileIOBuffer::BufferType; + using BufferLP = BufferType::LP; + using BufferAllocation = SimpleMemFileIOBuffer::BufferAllocation; + using HeaderChunkEncoder = SimpleMemFileIOBuffer::HeaderChunkEncoder; + using SimpleMemFileIOBufferUP = std::unique_ptr<SimpleMemFileIOBuffer>; + + BufferAllocation allocateBuffer(size_t sz) { + return BufferAllocation(BufferLP(new BufferType(sz)), 0, sz); + } + + /** + * Create an I/O buffer instance with for a dummy bucket. If removeDocType + * is non-empty, remove entries will be written in backwards compatible + * mode. + */ + SimpleMemFileIOBufferUP createIoBufferWithDummySpec( + vespalib::stringref removeDocType = ""); + +public: + class DummyFileReader : public VersionSerializer { + public: + virtual FileVersion getFileVersion() { return FileVersion(); } + virtual void loadFile(MemFile&, Environment&, + Buffer&, uint64_t ) {} + virtual FlushResult flushUpdatesToFile(MemFile&, Environment&) { + return FlushResult::TooSmall; + } + virtual void rewriteFile(MemFile&, Environment&) {} + virtual bool verify(MemFile&, Environment&, + std::ostream&, bool, + uint16_t) { return false; }; + virtual void cacheLocations(MemFileIOInterface&, + Environment&, + const Options&, + DocumentPart, + const std::vector<DataLocation>&) {} + }; + + DummyFileReader dfr; + + void testAddAndReadDocument(); + void testNonExistingLocation(); + void testCopy(); + void testCacheLocation(); + void testPersist(); + void testGetSerializedSize(); + void testRemapLocations(); + void testAlignmentUtilFunctions(); + void testCalculatedCacheSize(); + void testSharedBuffer(); + void testSharedBufferUsage(); + void testHeaderChunkEncoderComputesSizesCorrectly(); + void testHeaderChunkEncoderSerializesIdCorrectly(); + void testHeaderChunkEncoderSerializesHeaderCorrectly(); + void testRemovesCanBeWrittenWithBlankDefaultDocument(); + void testRemovesCanBeWrittenWithIdInferredDoctype(); + void testRemovesWithInvalidDocTypeThrowsException(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(SimpleMemFileIOBufferTest); + + +void +SimpleMemFileIOBufferTest::testAddAndReadDocument() +{ + FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); + document::Document::SP doc(createRandomDocumentAtLocation( + 123, + 456, + 789, + 1234)); + + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env()); + + DataLocation h = buffer.addHeader(*doc); + DataLocation b = buffer.addBody(*doc); + + Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), h); + buffer.readBody(*getTypeRepo(), b, *newDoc); + + CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); + CPPUNIT_ASSERT_EQUAL(true, buffer.isCached(h, HEADER)); + CPPUNIT_ASSERT_EQUAL(true, buffer.isCached(b, BODY)); + CPPUNIT_ASSERT_EQUAL(false, buffer.isCached(h, BODY)); + CPPUNIT_ASSERT_EQUAL(false, buffer.isCached(b, HEADER)); + CPPUNIT_ASSERT_EQUAL(doc->getId(), buffer.getDocumentId(h)); +} + +void +SimpleMemFileIOBufferTest::testPersist() +{ + FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); + document::Document::SP doc(createRandomDocumentAtLocation( + 123, + 456, + 789, + 1234)); + + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env()); + + DataLocation h = buffer.addHeader(*doc); + DataLocation b = buffer.addBody(*doc); + + CPPUNIT_ASSERT(!buffer.isPersisted(h, HEADER)); + CPPUNIT_ASSERT(!buffer.isPersisted(b, BODY)); + + buffer.persist(HEADER, h, DataLocation(1000, h.size())); + buffer.persist(BODY, b, DataLocation(5000, b.size())); + + Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), DataLocation(1000, h.size())); + buffer.readBody(*getTypeRepo(), DataLocation(5000, b.size()), *newDoc); + + CPPUNIT_ASSERT(buffer.isPersisted(DataLocation(1000, h.size()), HEADER)); + CPPUNIT_ASSERT(buffer.isPersisted(DataLocation(5000, b.size()), BODY)); + + CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); +} + +void +SimpleMemFileIOBufferTest::testCopy() +{ + FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env()); + + for (uint32_t i = 0; i < 10; ++i) { + document::Document::SP doc(createRandomDocumentAtLocation( + 123, + 456, + 789, + 1234)); + + DataLocation h = buffer.addHeader(*doc); + DataLocation b = buffer.addBody(*doc); + + SimpleMemFileIOBuffer buffer2(dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env()); + + DataLocation h2 = buffer2.copyCache(buffer, HEADER, h); + DataLocation b2 = buffer2.copyCache(buffer, BODY, b); + + Document::UP newDoc = buffer2.getDocumentHeader(*getTypeRepo(), h2); + buffer2.readBody(*getTypeRepo(), b2, *newDoc); + + CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); + } +} + +void +SimpleMemFileIOBufferTest::testNonExistingLocation() +{ + FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); + document::Document::SP doc(createRandomDocumentAtLocation( + 123, + 456, + 789, + 1234)); + + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env()); + + DataLocation h = buffer.addHeader(*doc); + DataLocation b = buffer.addBody(*doc); + + buffer.clear(HEADER); + + try { + Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), h); + CPPUNIT_ASSERT(false); + } catch (SimpleMemFileIOBuffer::PartNotCachedException& e) { + } + + buffer.clear(BODY); + + try { + document::Document newDoc; + buffer.readBody(*getTypeRepo(), b, newDoc); + CPPUNIT_ASSERT(false); + } catch (SimpleMemFileIOBuffer::PartNotCachedException& e) { + } +} + +void +SimpleMemFileIOBufferTest::testCacheLocation() +{ + FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); + + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + FileInfo::UP(new FileInfo(100, 10000, 50000)), + fileSpec, + env()); + + document::Document::SP doc(createRandomDocumentAtLocation( + 123, + 456, + 789, + 1234)); + + BufferAllocation headerBuf = buffer.serializeHeader(*doc); + BufferAllocation bodyBuf = buffer.serializeBody(*doc); + + DataLocation hloc(1234, headerBuf.getSize()); + DataLocation bloc(5678, bodyBuf.getSize()); + + buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0); + buffer.cacheLocation(BODY, bloc, bodyBuf.getSharedBuffer(), 0); + + Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), hloc); + buffer.readBody(*getTypeRepo(), bloc, *newDoc); + + CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); +} + +void +SimpleMemFileIOBufferTest::testGetSerializedSize() +{ + FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); + + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + FileInfo::UP(new FileInfo(100, 10000, 50000)), + fileSpec, + env()); + + document::Document::SP doc(createRandomDocumentAtLocation( + 123, + 456, + 789, + 1234)); + + BufferAllocation headerBuf = buffer.serializeHeader(*doc); + BufferAllocation bodyBuf = buffer.serializeBody(*doc); + + DataLocation hloc(1234, headerBuf.getSize()); + DataLocation bloc(5678, bodyBuf.getSize()); + + buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0); + buffer.cacheLocation(BODY, bloc, bodyBuf.getSharedBuffer(), 0); + + vespalib::nbostream serializedHeader; + doc->serializeHeader(serializedHeader); + + vespalib::nbostream serializedBody; + doc->serializeBody(serializedBody); + + CPPUNIT_ASSERT_EQUAL(uint32_t(serializedHeader.size()), + buffer.getSerializedSize(HEADER, hloc)); + CPPUNIT_ASSERT_EQUAL(uint32_t(serializedBody.size()), + buffer.getSerializedSize(BODY, bloc)); +} + +// Test that remapping does not overwrite datalocations that it has +// already updated +void +SimpleMemFileIOBufferTest::testRemapLocations() +{ + FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); + + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + FileInfo::UP(new FileInfo(100, 10000, 50000)), + fileSpec, + env()); + + document::Document::SP doc(createRandomDocumentAtLocation( + 123, + 100, + 100)); + BufferAllocation headerBuf = buffer.serializeHeader(*doc); + BufferAllocation bodyBuf = buffer.serializeBody(*doc); + + document::Document::SP doc2(createRandomDocumentAtLocation( + 123, + 100, + 100)); + + BufferAllocation headerBuf2 = buffer.serializeHeader(*doc2); + BufferAllocation bodyBuf2 = buffer.serializeBody(*doc2); + + DataLocation hloc(30000, headerBuf.getSize()); + DataLocation hloc2(0, headerBuf2.getSize()); + DataLocation hloc3(10000, hloc2._size); + + buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0); + buffer.cacheLocation(HEADER, hloc2, headerBuf2.getSharedBuffer(), 0); + + std::map<DataLocation, DataLocation> remapping; + remapping[hloc2] = hloc; + remapping[hloc] = hloc3; + + buffer.remapAndPersistAllLocations(HEADER, remapping); + + Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), hloc3); + document::ByteBuffer bbuf(bodyBuf.getBuffer(), bodyBuf.getSize()); + newDoc->deserializeBody(*getTypeRepo(), bbuf); + + CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); + + Document::UP newDoc2 = buffer.getDocumentHeader(*getTypeRepo(), hloc); + document::ByteBuffer bbuf2(bodyBuf.getBuffer(), bodyBuf.getSize()); + newDoc2->deserializeBody(*getTypeRepo(), bbuf2); + CPPUNIT_ASSERT_EQUAL(*doc2, *newDoc2); +} + +/** + * Not technically a part of SimpleMemFileIOBuffer, but used by it and + * currently contained within its header file. Move test somewhere else + * if the code itself is moved. + */ +void +SimpleMemFileIOBufferTest::testAlignmentUtilFunctions() +{ + using namespace util; + CPPUNIT_ASSERT_EQUAL(size_t(0), alignUpPow2<4096>(0)); + CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(1)); + CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(512)); + CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(4096)); + CPPUNIT_ASSERT_EQUAL(size_t(8192), alignUpPow2<4096>(4097)); + CPPUNIT_ASSERT_EQUAL(size_t(32), alignUpPow2<16>(20)); + CPPUNIT_ASSERT_EQUAL(size_t(32), alignUpPow2<32>(20)); + CPPUNIT_ASSERT_EQUAL(size_t(64), alignUpPow2<64>(20)); + CPPUNIT_ASSERT_EQUAL(size_t(128), alignUpPow2<128>(20)); + + CPPUNIT_ASSERT_EQUAL(uint32_t(0), nextPow2(0)); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), nextPow2(1)); + CPPUNIT_ASSERT_EQUAL(uint32_t(4), nextPow2(3)); + CPPUNIT_ASSERT_EQUAL(uint32_t(16), nextPow2(15)); + CPPUNIT_ASSERT_EQUAL(uint32_t(64), nextPow2(40)); + CPPUNIT_ASSERT_EQUAL(uint32_t(64), nextPow2(64)); +} + +/** + * Test that allocated buffers are correctly reported with their sizes + * rounded up to account for mmap overhead. + */ +void +SimpleMemFileIOBufferTest::testCalculatedCacheSize() +{ + FileSpecification fileSpec(BucketId(16, 123), + env().getDirectory(), "testfile.0"); + SimpleMemFileIOBuffer buffer(dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env()); + + CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(HEADER)); + CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(BODY)); + + // All buffers are on a 4k page granularity. + BufferAllocation sharedHeaderBuffer(allocateBuffer(1500)); // -> 4096 + buffer.cacheLocation(HEADER, DataLocation(0, 85), + sharedHeaderBuffer.getSharedBuffer(), 0); + CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(HEADER)); + + buffer.cacheLocation(HEADER, DataLocation(200, 100), + sharedHeaderBuffer.getSharedBuffer(), 85); + CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(HEADER)); + + BufferAllocation singleHeaderBuffer(allocateBuffer(200)); // -> 4096 + buffer.cacheLocation(HEADER, DataLocation(0, 100), + singleHeaderBuffer.getSharedBuffer(), 0); + CPPUNIT_ASSERT_EQUAL(size_t(8192), buffer.getCachedSize(HEADER)); + + BufferAllocation singleBodyBuffer(allocateBuffer(300)); // -> 4096 + buffer.cacheLocation(BODY, DataLocation(0, 100), + singleBodyBuffer.getSharedBuffer(), 0); + CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(BODY)); + + buffer.clear(HEADER); + CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(HEADER)); + + buffer.clear(BODY); + CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(BODY)); +} + +void +SimpleMemFileIOBufferTest::testSharedBuffer() +{ + typedef SimpleMemFileIOBuffer::SharedBuffer SharedBuffer; + + { + SharedBuffer buf(1024); + CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getSize()); + CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getFreeSize()); + CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getUsedSize()); + CPPUNIT_ASSERT(buf.hasRoomFor(1024)); + CPPUNIT_ASSERT(!buf.hasRoomFor(1025)); + + CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(13)); + // Allocation should be rounded up to nearest alignment. + // TODO: is this even necessary? + CPPUNIT_ASSERT_EQUAL(size_t(16), buf.getUsedSize()); + CPPUNIT_ASSERT_EQUAL(size_t(1008), buf.getFreeSize()); + CPPUNIT_ASSERT(buf.hasRoomFor(1008)); + CPPUNIT_ASSERT(!buf.hasRoomFor(1009)); + CPPUNIT_ASSERT_EQUAL(size_t(16), buf.allocate(1)); + CPPUNIT_ASSERT_EQUAL(size_t(24), buf.getUsedSize()); + + CPPUNIT_ASSERT_EQUAL(size_t(24), buf.allocate(999)); + CPPUNIT_ASSERT(!buf.hasRoomFor(1)); + CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize()); + CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize()); + } + // Test exact fit. + { + SharedBuffer buf(1024); + CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(1024)); + CPPUNIT_ASSERT(!buf.hasRoomFor(1)); + CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize()); + CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize()); + } + // Test 512-byte alignment. + { + SharedBuffer buf(1024); + CPPUNIT_ASSERT(buf.hasRoomFor(1000, SharedBuffer::ALIGN_512_BYTES)); + CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(10)); + CPPUNIT_ASSERT(!buf.hasRoomFor(1000, SharedBuffer::ALIGN_512_BYTES)); + CPPUNIT_ASSERT(!buf.hasRoomFor(513, SharedBuffer::ALIGN_512_BYTES)); + CPPUNIT_ASSERT(buf.hasRoomFor(512, SharedBuffer::ALIGN_512_BYTES)); + CPPUNIT_ASSERT_EQUAL(size_t(512), buf.allocate(512, SharedBuffer::ALIGN_512_BYTES)); + CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize()); + CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize()); + } +} + +void +SimpleMemFileIOBufferTest::testSharedBufferUsage() +{ + FileSpecification fileSpec(BucketId(16, 123), + env().getDirectory(), "testfile.0"); + SimpleMemFileIOBuffer ioBuf(dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env()); + + const size_t threshold = SimpleMemFileIOBuffer::WORKING_BUFFER_SIZE; + + // Brand new allocation + BufferAllocation ba(ioBuf.allocateBuffer(HEADER, 1)); + CPPUNIT_ASSERT(ba.buf.get()); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba.pos); + CPPUNIT_ASSERT_EQUAL(uint32_t(1), ba.size); + // Should reuse buffer, but get other offset + BufferAllocation ba2(ioBuf.allocateBuffer(HEADER, 500)); + CPPUNIT_ASSERT_EQUAL(ba.buf.get(), ba2.buf.get()); + CPPUNIT_ASSERT_EQUAL(uint32_t(8), ba2.pos); + CPPUNIT_ASSERT_EQUAL(uint32_t(500), ba2.size); + CPPUNIT_ASSERT_EQUAL(size_t(512), ba2.buf->getUsedSize()); + + // Allocate a buffer so big that it should get its own buffer instance + BufferAllocation ba3(ioBuf.allocateBuffer(HEADER, threshold)); + CPPUNIT_ASSERT(ba3.buf.get() != ba2.buf.get()); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba3.pos); + CPPUNIT_ASSERT_EQUAL(uint32_t(threshold), ba3.size); + + // But smaller allocs should still be done from working buffer + BufferAllocation ba4(ioBuf.allocateBuffer(HEADER, 512)); + CPPUNIT_ASSERT_EQUAL(ba.buf.get(), ba4.buf.get()); + CPPUNIT_ASSERT_EQUAL(uint32_t(512), ba4.pos); + CPPUNIT_ASSERT_EQUAL(uint32_t(512), ba4.size); + CPPUNIT_ASSERT_EQUAL(size_t(1024), ba4.buf->getUsedSize()); + + // Allocate lots of smaller buffers from the same buffer until we run out. + while (true) { + BufferAllocation tmp(ioBuf.allocateBuffer(HEADER, 1024)); + CPPUNIT_ASSERT_EQUAL(ba.buf.get(), tmp.buf.get()); + if (!tmp.buf->hasRoomFor(2048)) { + break; + } + } + BufferAllocation ba5(ioBuf.allocateBuffer(HEADER, 2048)); + CPPUNIT_ASSERT(ba5.buf.get() != ba.buf.get()); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba5.pos); + CPPUNIT_ASSERT_EQUAL(uint32_t(2048), ba5.size); + + // Allocating for different part should get different buffer. + BufferAllocation ba6(ioBuf.allocateBuffer(BODY, 128)); + CPPUNIT_ASSERT(ba6.buf.get() != ba5.buf.get()); + CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba6.pos); + CPPUNIT_ASSERT_EQUAL(uint32_t(128), ba6.size); +} + +void +SimpleMemFileIOBufferTest::testHeaderChunkEncoderComputesSizesCorrectly() +{ + document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100)); + + std::string idString = doc->getId().toString(); + HeaderChunkEncoder encoder(doc->getId()); + // Without document, payload is: 3x u32 + doc id string (no zero term). + CPPUNIT_ASSERT_EQUAL(sizeof(uint32_t)*3 + idString.size(), + static_cast<size_t>(encoder.encodedSize())); + + encoder.bufferDocument(*doc); + vespalib::nbostream stream; + doc->serializeHeader(stream); + // With document, add size of serialized document to the mix. + CPPUNIT_ASSERT_EQUAL(sizeof(uint32_t)*3 + idString.size() + stream.size(), + static_cast<size_t>(encoder.encodedSize())); +} + +SimpleMemFileIOBufferTest::SimpleMemFileIOBufferUP +SimpleMemFileIOBufferTest::createIoBufferWithDummySpec( + vespalib::stringref removeDocType) +{ + FileSpecification fileSpec(BucketId(16, 123), + env().getDirectory(), "testfile.0"); + // Override config. + auto options = env().acquireConfigReadLock().options(); + env().acquireConfigWriteLock().setOptions( + OptionsBuilder(*options) + .defaultRemoveDocType(removeDocType) + .build()); + + SimpleMemFileIOBufferUP ioBuf( + new SimpleMemFileIOBuffer( + dfr, + vespalib::LazyFile::UP(), + std::unique_ptr<FileInfo>(new FileInfo), + fileSpec, + env())); + return ioBuf; +} + +void +SimpleMemFileIOBufferTest::testHeaderChunkEncoderSerializesIdCorrectly() +{ + document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100)); + HeaderChunkEncoder encoder(doc->getId()); + + SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec()); + + BufferAllocation buf(ioBuf->allocateBuffer(HEADER, encoder.encodedSize())); + encoder.writeTo(buf); + DataLocation newLoc = ioBuf->addLocation(HEADER, buf); + document::DocumentId checkId = ioBuf->getDocumentId(newLoc); + + CPPUNIT_ASSERT_EQUAL(doc->getId(), checkId); +} + +void +SimpleMemFileIOBufferTest::testHeaderChunkEncoderSerializesHeaderCorrectly() +{ + document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100)); + HeaderChunkEncoder encoder(doc->getId()); + encoder.bufferDocument(*doc); + + SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec()); + BufferAllocation buf(ioBuf->allocateBuffer(HEADER, encoder.encodedSize())); + encoder.writeTo(buf); + DataLocation newLoc = ioBuf->addLocation(HEADER, buf); + Document::UP checkDoc = ioBuf->getDocumentHeader(*getTypeRepo(), newLoc); + + CPPUNIT_ASSERT_EQUAL(doc->getId(), checkDoc->getId()); + CPPUNIT_ASSERT_EQUAL(doc->getType(), checkDoc->getType()); +} + +void +SimpleMemFileIOBufferTest::testRemovesCanBeWrittenWithBlankDefaultDocument() +{ + SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1")); + + document::DocumentId id("userdoc:yarn:12345:fluff"); + DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo())); + // Despite adding with document id only, we should now actually have a + // valid document header. Will fail with a DeserializeException if no + // header has been written. + Document::UP removeWithHeader( + ioBuf->getDocumentHeader(*getTypeRepo(), loc)); + CPPUNIT_ASSERT_EQUAL(removeWithHeader->getId(), id); + CPPUNIT_ASSERT_EQUAL(removeWithHeader->getType(), + *getTypeRepo()->getDocumentType("testdoctype1")); +} + +void +SimpleMemFileIOBufferTest::testRemovesCanBeWrittenWithIdInferredDoctype() +{ + SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1")); + + document::DocumentId id("id:yarn:testdoctype2:n=12345:fluff"); + DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo())); + // Since document id contains an explicit document type, the blank remove + // document header should be written with that type instead of the one + // provided as default via config. + Document::UP removeWithHeader( + ioBuf->getDocumentHeader(*getTypeRepo(), loc)); + CPPUNIT_ASSERT_EQUAL(removeWithHeader->getId(), id); + CPPUNIT_ASSERT_EQUAL(removeWithHeader->getType(), + *getTypeRepo()->getDocumentType("testdoctype2")); +} + +void +SimpleMemFileIOBufferTest::testRemovesWithInvalidDocTypeThrowsException() +{ + SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1")); + + document::DocumentId id("id:yarn:nosuchtype:n=12345:fluff"); + try { + DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo())); + CPPUNIT_FAIL("No exception thrown on bad doctype"); + } catch (const vespalib::Exception& e) { + CPPUNIT_ASSERT(e.getMessage().find("Could not serialize document " + "for remove with unknown doctype " + "'nosuchtype'") + != std::string::npos); + } +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/spi/simulatedfailurefile.h b/memfilepersistence/src/tests/spi/simulatedfailurefile.h new file mode 100644 index 00000000000..1ded927a3d1 --- /dev/null +++ b/memfilepersistence/src/tests/spi/simulatedfailurefile.h @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/logginglazyfile.h> + +namespace storage { +namespace memfile { + +class SimulatedFailureLazyFile : public vespalib::LazyFile +{ + mutable int _readOpsBeforeFailure; + mutable int _writeOpsBeforeFailure; +public: + class Factory : public Environment::LazyFileFactory { + public: + Factory() + : _readOpsBeforeFailure(-1), + _writeOpsBeforeFailure(0) + { + } + vespalib::LazyFile::UP createFile(const std::string& fileName) const { + return vespalib::LazyFile::UP( + new SimulatedFailureLazyFile(fileName, + vespalib::File::DIRECTIO, + _readOpsBeforeFailure, + _writeOpsBeforeFailure)); + } + + void setReadOpsBeforeFailure(int ops) { + _readOpsBeforeFailure = ops; + } + + void setWriteOpsBeforeFailure(int ops) { + _writeOpsBeforeFailure = ops; + } + private: + int _readOpsBeforeFailure; + int _writeOpsBeforeFailure; + }; + + SimulatedFailureLazyFile( + const std::string& filename, + int flags, + int readOpsBeforeFailure, + int writeOpsBeforeFailure) + : LazyFile(filename, flags), + _readOpsBeforeFailure(readOpsBeforeFailure), + _writeOpsBeforeFailure(writeOpsBeforeFailure) + { + } + + off_t write(const void *buf, size_t bufsize, off_t offset) + { + if (_writeOpsBeforeFailure == 0) { + throw vespalib::IoException( + "A simulated I/O write exception was triggered", + vespalib::IoException::CORRUPT_DATA, VESPA_STRLOC); + } + --_writeOpsBeforeFailure; + return vespalib::LazyFile::write(buf, bufsize, offset); + } + + size_t read(void *buf, size_t bufsize, off_t offset) const + { + if (_readOpsBeforeFailure == 0) { + throw vespalib::IoException( + "A simulated I/O read exception was triggered", + vespalib::IoException::CORRUPT_DATA, VESPA_STRLOC); + } + --_readOpsBeforeFailure; + return vespalib::LazyFile::read(buf, bufsize, offset); + } +}; + +} // ns memfile +} // ns storage + diff --git a/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp new file mode 100644 index 00000000000..75eab5c2972 --- /dev/null +++ b/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp @@ -0,0 +1,213 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> + +#include <vespa/document/datatype/documenttype.h> +#include <tests/spi/memfiletestutils.h> +#include <tests/spi/simulatedfailurefile.h> +#include <vespa/vdstestlib/cppunit/macros.h> + +using document::DocumentType; + +namespace storage { +namespace memfile { +namespace { + spi::LoadType defaultLoadType(0, "default"); +} + +class SplitOperationHandlerTest : public SingleDiskMemFileTestUtils +{ + + void doTestMultiDisk(uint16_t sourceDisk, + uint16_t targetDisk0, + uint16_t targetDisk1); + + + CPPUNIT_TEST_SUITE(SplitOperationHandlerTest); + CPPUNIT_TEST(testSimple); + CPPUNIT_TEST(testMultiDisk); + CPPUNIT_TEST(testMultiDiskNonZeroSourceIndex); + CPPUNIT_TEST(testExceptionDuringSplittingEvictsAllBuckets); + CPPUNIT_TEST_SUITE_END(); + +public: + void testSimple(); + void testMultiDisk(); + void testMultiDiskNonZeroSourceIndex(); + void testExceptionDuringSplittingEvictsAllBuckets(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(SplitOperationHandlerTest); + +void +SplitOperationHandlerTest::testSimple() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + setupDisks(1); + + for (uint32_t i = 0; i < 100; i++) { + uint32_t location = 4; + if (i % 2 == 0) { + location |= (1 << 16); + } + + doPut(location, Timestamp(1000 + i)); + } + flush(document::BucketId(16, 4)); + + env()._cache.clear(); + + document::BucketId sourceBucket = document::BucketId(16, 4); + document::BucketId target1 = document::BucketId(17, 4); + document::BucketId target2 = document::BucketId(17, 4 | (1 << 16)); + + SplitOperationHandler handler(env()); + spi::Result result = getPersistenceProvider().split( + spi::Bucket(sourceBucket, spi::PartitionId(0)), + spi::Bucket(target1, spi::PartitionId(0)), + spi::Bucket(target2, spi::PartitionId(0)), + context); + + env()._cache.clear(); + + { + MemFilePtr file(handler.getMemFile(sourceBucket, 0)); + CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount()); + } + + { + MemFilePtr file(handler.getMemFile(target1, 0)); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + for (uint32_t i = 0; i < file->getSlotCount(); ++i) { + file->getDocument((*file)[i], ALL); + } + } + + { + MemFilePtr file(handler.getMemFile(target2, 0)); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + for (uint32_t i = 0; i < file->getSlotCount(); ++i) { + file->getDocument((*file)[i], ALL); + } + } +} + +void +SplitOperationHandlerTest::doTestMultiDisk(uint16_t sourceDisk, + uint16_t targetDisk0, + uint16_t targetDisk1) +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + setupDisks(3); + + for (uint32_t i = 0; i < 100; i++) { + uint32_t location = 4; + if (i % 2 == 0) { + location |= (1 << 16); + } + + doPutOnDisk(sourceDisk, location, Timestamp(1000 + i)); + } + flush(document::BucketId(16, 4)); + + env()._cache.clear(); + + document::BucketId sourceBucket = document::BucketId(16, 4); + document::BucketId target1 = document::BucketId(17, 4); + document::BucketId target2 = document::BucketId(17, 4 | (1 << 16)); + + SplitOperationHandler handler(env()); + spi::Result result = getPersistenceProvider().split( + spi::Bucket(sourceBucket, spi::PartitionId(sourceDisk)), + spi::Bucket(target1, spi::PartitionId(targetDisk0)), + spi::Bucket(target2, spi::PartitionId(targetDisk1)), + context); + + env()._cache.clear(); + + { + MemFilePtr file(handler.getMemFile(sourceBucket, sourceDisk)); + CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount()); + } + + { + MemFilePtr file(handler.getMemFile(target1, targetDisk0)); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + for (uint32_t i = 0; i < file->getSlotCount(); ++i) { + file->getDocument((*file)[i], ALL); + } + } + + { + MemFilePtr file(handler.getMemFile(target2, targetDisk1)); + CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); + for (uint32_t i = 0; i < file->getSlotCount(); ++i) { + file->getDocument((*file)[i], ALL); + } + } +} + +void +SplitOperationHandlerTest::testMultiDisk() +{ + doTestMultiDisk(0, 1, 2); +} + +void +SplitOperationHandlerTest::testMultiDiskNonZeroSourceIndex() +{ + doTestMultiDisk(1, 2, 0); +} + +void +SplitOperationHandlerTest::testExceptionDuringSplittingEvictsAllBuckets() +{ + spi::Context context(defaultLoadType, spi::Priority(0), + spi::Trace::TraceLevel(0)); + setupDisks(1); + + for (uint32_t i = 0; i < 100; i++) { + uint32_t location = 4; + if (i % 2 == 0) { + location |= (1 << 16); + } + + doPut(location, Timestamp(1000 + i)); + } + flush(document::BucketId(16, 4)); + + simulateIoErrorsForSubsequentlyOpenedFiles(); + + document::BucketId sourceBucket(16, 4); + document::BucketId target1(17, 4); + document::BucketId target2(17, 4 | (1 << 16)); + + try { + SplitOperationHandler handler(env()); + spi::Result result = getPersistenceProvider().split( + spi::Bucket(sourceBucket, spi::PartitionId(0)), + spi::Bucket(target1, spi::PartitionId(0)), + spi::Bucket(target2, spi::PartitionId(0)), + context); + CPPUNIT_FAIL("Exception not thrown on flush failure"); + } catch (std::exception&) { + } + + CPPUNIT_ASSERT(!env()._cache.contains(sourceBucket)); + CPPUNIT_ASSERT(!env()._cache.contains(target1)); + CPPUNIT_ASSERT(!env()._cache.contains(target2)); + + unSimulateIoErrorsForSubsequentlyOpenedFiles(); + + // Source must not have been deleted + { + SplitOperationHandler handler(env()); + MemFilePtr file(handler.getMemFile(sourceBucket, 0)); + CPPUNIT_ASSERT_EQUAL(100, (int)file->getSlotCount()); + } +} + +} + +} diff --git a/memfilepersistence/src/tests/testhelper.cpp b/memfilepersistence/src/tests/testhelper.cpp new file mode 100644 index 00000000000..40a3512e400 --- /dev/null +++ b/memfilepersistence/src/tests/testhelper.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <tests/testhelper.h> + +#include <vespa/log/log.h> +#include <vespa/vespalib/io/fileutil.h> + +LOG_SETUP(".testhelper"); + +namespace storage { + +void addStorageDistributionConfig(vdstestlib::DirConfig& dc) +{ + vdstestlib::DirConfig::Config* config; + config = &dc.getConfig("stor-distribution", true); + config->clear(); + config->set("group[1]"); + config->set("group[0].name", "foo"); + config->set("group[0].index", "0"); + config->set("group[0].nodes[50]"); + + for (uint32_t i = 0; i < 50; i++) { + std::ostringstream key; key << "group[0].nodes[" << i << "].index"; + std::ostringstream val; val << i; + config->set(key.str(), val.str()); + } +} + +vdstestlib::DirConfig getStandardConfig(bool storagenode) { + vdstestlib::DirConfig dc; + vdstestlib::DirConfig::Config* config; + config = &dc.addConfig("stor-cluster"); + config = &dc.addConfig("load-type"); + config = &dc.addConfig("bucket"); + config = &dc.addConfig("messagebus"); + config = &dc.addConfig("stor-prioritymapping"); + config = &dc.addConfig("stor-bucketdbupdater"); + config = &dc.addConfig("metricsmanager"); + config->set("consumer[1]"); + config->set("consumer[0].name", "\"status\""); + config->set("consumer[0].addedmetrics[1]"); + config->set("consumer[0].addedmetrics[0]", "\"*\""); + config = &dc.addConfig("stor-communicationmanager"); + config->set("rpcport", "0"); + config->set("mbusport", "0"); + config = &dc.addConfig("stor-bucketdb"); + config->set("chunklevel", "0"); + config = &dc.addConfig("stor-distributormanager"); + config = &dc.addConfig("stor-opslogger"); + config = &dc.addConfig("stor-memfilepersistence"); + // Easier to see what goes wrong with only 1 thread per disk. + config->set("minimum_file_meta_slots", "2"); + config->set("minimum_file_header_block_size", "368"); + config->set("minimum_file_size", "4096"); + config->set("threads[1]"); + config->set("threads[0].lowestpri 255"); + config->set("dir_spread", "4"); + config->set("dir_levels", "0"); + // Unit tests typically use fake low time values, so don't complain + // about them or compact/delete them by default. Override in tests testing that + // behavior + config = &dc.addConfig("persistence"); + config->set("keep_remove_time_period", "2000000000"); + config->set("revert_time_period", "2000000000"); + config = &dc.addConfig("stor-bouncer"); + config = &dc.addConfig("stor-integritychecker"); + config = &dc.addConfig("stor-bucketmover"); + config = &dc.addConfig("stor-messageforwarder"); + config = &dc.addConfig("stor-server"); + config->set("enable_dead_lock_detector", "false"); + config->set("enable_dead_lock_detector_warnings", "false"); + config->set("max_merges_per_node", "25"); + config->set("max_merge_queue_size", "20"); + config->set("root_folder", + (storagenode ? "vdsroot" : "vdsroot.distributor")); + config->set("is_distributor", + (storagenode ? "false" : "true")); + config = &dc.addConfig("stor-devices"); + config->set("root_folder", + (storagenode ? "vdsroot" : "vdsroot.distributor")); + config = &dc.addConfig("stor-status"); + config->set("httpport", "0"); + config = &dc.addConfig("stor-visitor"); + config->set("defaultdocblocksize", "8192"); + // By default, need "old" behaviour of maxconcurrent + config->set("maxconcurrentvisitors_fixed", "4"); + config->set("maxconcurrentvisitors_variable", "0"); + config = &dc.addConfig("stor-visitordispatcher"); + addFileConfig(dc, "documenttypes", "config-doctypes.cfg"); + addStorageDistributionConfig(dc); + return dc; +} + +void addFileConfig(vdstestlib::DirConfig& dc, + const std::string& configDefName, + const std::string& fileName) +{ + vdstestlib::DirConfig::Config* config; + config = &dc.getConfig(configDefName, true); + config->clear(); + std::ifstream in(fileName.c_str()); + std::string line; + while (std::getline(in, line, '\n')) { + std::string::size_type pos = line.find(' '); + if (pos == std::string::npos) { + config->set(line); + } else { + config->set(line.substr(0, pos), line.substr(pos + 1)); + } + } + in.close(); +} + +TestName::TestName(const std::string& n) + : name(n) +{ + LOG(debug, "Starting test %s", name.c_str()); +} + +TestName::~TestName() { + LOG(debug, "Done with test %s", name.c_str()); +} + +} // storage diff --git a/memfilepersistence/src/tests/testhelper.h b/memfilepersistence/src/tests/testhelper.h new file mode 100644 index 00000000000..4445086d300 --- /dev/null +++ b/memfilepersistence/src/tests/testhelper.h @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once +#include <vespa/vdstestlib/cppunit/dirconfig.h> +#include <vespa/vdstestlib/cppunit/macros.h> + + +#include <fstream> +#include <vespa/fastos/fastos.h> +#include <sstream> + +#define ASSERT_REPLY_COUNT(count, dummylink) \ + { \ + std::ostringstream msgost; \ + if ((dummylink).getNumReplies() != count) { \ + for (uint32_t ijx=0; ijx<(dummylink).getNumReplies(); ++ijx) { \ + msgost << (dummylink).getReply(ijx)->toString(true) << "\n"; \ + } \ + } \ + CPPUNIT_ASSERT_EQUAL_MSG(msgost.str(), size_t(count), \ + (dummylink).getNumReplies()); \ + } +#define ASSERT_COMMAND_COUNT(count, dummylink) \ + { \ + std::ostringstream msgost; \ + if ((dummylink).getNumCommands() != count) { \ + for (uint32_t ijx=0; ijx<(dummylink).getNumCommands(); ++ijx) { \ + msgost << (dummylink).getCommand(ijx)->toString(true) << "\n"; \ + } \ + } \ + CPPUNIT_ASSERT_EQUAL_MSG(msgost.str(), size_t(count), \ + (dummylink).getNumCommands()); \ + } + +namespace storage { + +void addFileConfig(vdstestlib::DirConfig& dc, + const std::string& configDefName, + const std::string& fileName); + + +void addStorageDistributionConfig(vdstestlib::DirConfig& dc); + +vdstestlib::DirConfig getStandardConfig(bool storagenode); + +// Class used to print start and end of test. Enable debug when you want to see +// which test creates what output or where we get stuck +struct TestName { + std::string name; + TestName(const std::string& n); + ~TestName(); +}; + +} // storage + diff --git a/memfilepersistence/src/tests/testrunner.cpp b/memfilepersistence/src/tests/testrunner.cpp new file mode 100644 index 00000000000..16027870c47 --- /dev/null +++ b/memfilepersistence/src/tests/testrunner.cpp @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <iostream> +#include <vespa/log/log.h> +#include <vespa/vdstestlib/cppunit/cppunittestrunner.h> + +LOG_SETUP("persistencecppunittests"); + +int +main(int argc, char **argv) +{ + vdstestlib::CppUnitTestRunner testRunner; + return testRunner.run(argc, argv); +} diff --git a/memfilepersistence/src/tests/tools/.gitignore b/memfilepersistence/src/tests/tools/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/tests/tools/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/tests/tools/CMakeLists.txt b/memfilepersistence/src/tests/tools/CMakeLists.txt new file mode 100644 index 00000000000..aef718c7633 --- /dev/null +++ b/memfilepersistence/src/tests/tools/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_testtools + SOURCES + dumpslotfiletest.cpp + vdsdisktooltest.cpp + DEPENDS +) diff --git a/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp b/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp new file mode 100644 index 00000000000..112f8840e72 --- /dev/null +++ b/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/config/subscription/configuri.h> +#include <vespa/document/base/testdocrepo.h> +#include <vespa/memfilepersistence/tools/dumpslotfile.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/vespalib/util/programoptions_testutils.h> +#include <tests/spi/memfiletestutils.h> + +#include <vespa/document/config/config-documenttypes.h> + +namespace storage { +namespace memfile { + +class DumpSlotFileTest : public SingleDiskMemFileTestUtils +{ + CPPUNIT_TEST_SUITE(DumpSlotFileTest); + CPPUNIT_TEST(testSimple); + CPPUNIT_TEST_SUITE_END(); + +public: + void testSimple(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(DumpSlotFileTest); + +#define ASSERT_MATCH(optstring, pattern) \ +{ \ + vespalib::AppOptions opts("dumpslotfile " optstring); \ + std::ostringstream out; \ + config::ConfigUri configUri(config::ConfigUri::createFromInstance( \ + document::TestDocRepo::getDefaultConfig())); \ + std::unique_ptr<document::DocumenttypesConfig> config = config::ConfigGetter<document::DocumenttypesConfig>::getConfig(configUri.getConfigId(), configUri.getContext()); \ + SlotFileDumper::dump(opts.getArgCount(), opts.getArguments(), \ + configUri, out, out); \ + CPPUNIT_ASSERT_MATCH_REGEX(pattern, out.str()); \ + output = out.str(); \ +} + +void +DumpSlotFileTest::testSimple() +{ + std::string output; + // Test syntax page + ASSERT_MATCH("--help", ".*Usage: dumpslotfile.*"); + // Test non-existing file. (Handle as empty file) + ASSERT_MATCH("00a.0", + ".*BucketId\\(0x000000000000000a\\)" + ".*document count: 0.*non-existing.*"); + // Parse bucketid without extension. + ASSERT_MATCH("000000000000000a", + ".*BucketId\\(0x000000000000000a\\) " + "\\(extracted from filename\\).*"); + // Parse invalid bucket id. + ASSERT_MATCH("000010000000000g", + ".*Failed to extract bucket id from filename.*"); + // Test toXml with no data. Thus doesn't require doc config + ASSERT_MATCH("--toxml --documentconfig whatevah 000a.0", + ".*<vespafeed>.*"); + // Test invalid arguments + ASSERT_MATCH("--foobar", ".*Invalid option 'foobar'\\..*"); + // What to show in XML doesn't make sense in non-xml mode + ASSERT_MATCH("--includeremoveddocs 0.0", + ".*Options for what to include in XML makes no sense when not " + "printing XML content.*"); + ASSERT_MATCH("--includeremoveentries 0.0", + ".*Options for what to include in XML makes no sense when not " + "printing XML content.*"); + // To binary only works for single doc + ASSERT_MATCH("--tobinary 0.0", + ".*To binary option only works for a single document.*"); + + BucketId bid(1, 0); + createTestBucket(bid, 0); + ASSERT_MATCH("-nN vdsroot/disks/d0/400000000000000.0", + ".*" + "Unique document count: 8.*" + "Total document size: [0-9]+.*" + "Used size: [0-9]+.*" + "Filename: .*/d0/.*" + "Filesize: 12288.*" + "SlotFileHeader.*" + "[0-9]+ empty entries.*" + "Header block.*" + "Content block.*" + "Slotfile verified.*" + ); + ASSERT_MATCH("vdsroot/disks/d0/400000000000000.0", ".*ff ff ff ff.*"); + + // User friendly output + ASSERT_MATCH("--friendly -nN vdsroot/disks/d0/400000000000000.0", + ".*id:mail:testdoctype1:n=0:9380.html.*"); + + ASSERT_MATCH("--tobinary " + "--docid id:mail:testdoctype1:n=0:doesnotexisthere.html " + "vdsroot/disks/d0/400000000000000.0", + ".*No document with id id:mail:testdoctype1:n=0:doesnotexi.* " + "found.*"); + + // Should test XML with content.. But needs document config for it to work. + // Should be able to create programmatically from testdocman. + ASSERT_MATCH("--toxml --documentconfig '' " + "vdsroot/disks/d0/400000000000000.0", + ".*<vespafeed>\n" + "<document documenttype=\"testdoctype1\" " + "documentid=\"id:mail:testdoctype1:n=0:9639.html\">\n" + "<content>overwritten</content>\n" + "</document>.*"); + + // To binary + ASSERT_MATCH("--tobinary --docid id:mail:testdoctype1:n=0:9380.html " + "vdsroot/disks/d0/400000000000000.0", + ".*"); + { + TestDocMan docMan; + document::ByteBuffer buf(output.c_str(), output.size()); + document::Document doc(docMan.getTypeRepo(), buf); + CPPUNIT_ASSERT_EQUAL(std::string( + "<document documenttype=\"testdoctype1\" " + "documentid=\"id:mail:testdoctype1:n=0:9380.html\">\n" + "<content>To be, or not to be: that is the question:\n" + "Whether 'tis nobler in the mind to suffer\n" + "The slings and arrows of outrage</content>\n" + "</document>"), doc.toXml()); + } + + // Fail verification + { + vespalib::LazyFile file("vdsroot/disks/d0/400000000000000.0", 0); + file.write("corrupt", 7, 64); + } + ASSERT_MATCH("-nN vdsroot/disks/d0/400000000000000.0", + ".*lot 0 at timestamp [0-9]+ failed checksum verification.*"); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp b/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp new file mode 100644 index 00000000000..29e780bc900 --- /dev/null +++ b/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/config/subscription/configuri.h> +#include <vespa/memfilepersistence/tools/vdsdisktool.h> +#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> +#include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/vespalib/util/programoptions_testutils.h> +#include <tests/spi/memfiletestutils.h> + +namespace storage { +namespace memfile { + +struct VdsDiskToolTest : public SingleDiskMemFileTestUtils +{ + framework::defaultimplementation::FakeClock _clock; + DeviceManager::LP _deviceManager; + + void setUp(); + void setupRoot(); + + void testSimple(); + + CPPUNIT_TEST_SUITE(VdsDiskToolTest); + CPPUNIT_TEST(testSimple); + CPPUNIT_TEST_SUITE_END(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(VdsDiskToolTest); + +#define ASSERT_MATCH(optstring, pattern, exitcode) \ +{ \ + std::ostringstream out; \ + int result = 1; \ + try{ \ + vespalib::AppOptions opts("vdsdisktool " optstring); \ + result = VdsDiskTool::run(opts.getArgCount(), opts.getArguments(), \ + "vdsroot", out, out); \ + } catch (std::exception& e) { \ + out << "Application aborted with exception:\n" << e.what() << "\n"; \ + } \ + CPPUNIT_ASSERT_MATCH_REGEX(pattern, out.str()); \ + CPPUNIT_ASSERT_EQUAL(exitcode, result); \ +} + +namespace { + void createDisk(int i) { + std::ostringstream path; + path << "vdsroot/mycluster/storage/3/disks/d" << i; + CPPUNIT_ASSERT_EQUAL(0, system(("mkdir -p " + path.str()).c_str())); + } +} + +void +VdsDiskToolTest::setUp() +{ + system("rm -rf vdsroot"); + _deviceManager.reset(new DeviceManager( + DeviceMapper::UP(new SimpleDeviceMapper), _clock)); +} + +void +VdsDiskToolTest::setupRoot() +{ + system("rm -rf vdsroot"); + createDisk(0); +} + +void +VdsDiskToolTest::testSimple() +{ + // Test syntax page + ASSERT_MATCH("--help", ".*Usage: vdsdisktool .*", 0); + // No VDS installation + ASSERT_MATCH("status", ".*No VDS installations found at all.*", 1); + // Common setup + setupRoot(); + ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" + "Disk 0: OK\\s*", 0); + // Two disks + system("mkdir -p vdsroot/mycluster/storage/3/disks/d1/"); + ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" + "Disk 0: OK\\s*" + "Disk 1: OK\\s*", 0); + // Two disks, non-continuous indexes + system("rm -rf vdsroot/mycluster/storage/3/disks/d1/"); + system("mkdir -p vdsroot/mycluster/storage/3/disks/d2/"); + ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" + "Disk 0: OK\\s*" + "Disk 1: NOT_FOUND - Disk not found during scan.*" + "Disk 2: OK\\s*", 0); + // Status file existing + setupRoot(); + createDisk(1); + MountPointList mountPoints("vdsroot/mycluster/storage/3", + std::vector<vespalib::string>(), + _deviceManager); + mountPoints.scanForDisks(); + CPPUNIT_ASSERT_EQUAL(2u, mountPoints.getSize()); + mountPoints[1].addEvent(Device::IO_FAILURE, "Bad", "Found in test"); + mountPoints.writeToFile(); + ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" + "Disk 0: OK\\s*" + "Disk 1: IO_FAILURE - 0 Bad\\s*", 0); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/.gitignore new file mode 100644 index 00000000000..c43cd4d8c3b --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/.gitignore @@ -0,0 +1,3 @@ +/.depend +/Makefile +/libmemfilepersistence.so.5.1 diff --git a/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt new file mode 100644 index 00000000000..465e217210c --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt @@ -0,0 +1,13 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence + SOURCES + $<TARGET_OBJECTS:memfilepersistence_device> + $<TARGET_OBJECTS:memfilepersistence_init> + $<TARGET_OBJECTS:memfilepersistence_mapper> + $<TARGET_OBJECTS:memfilepersistence_spi> + $<TARGET_OBJECTS:memfilepersistence_common> + $<TARGET_OBJECTS:memfilepersistence_memfile> + $<TARGET_OBJECTS:memfilepersistence_tools> + INSTALL lib64 + DEPENDS +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt new file mode 100644 index 00000000000..82a78fa1d0a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_common OBJECT + SOURCES + environment.cpp + options.cpp + types.cpp + filespecification.cpp + exceptions.cpp + slotmatcher.cpp + config_lock_guard.cpp + DEPENDS +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h b/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h new file mode 100644 index 00000000000..506699f7e31 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/config-stor-memfilepersistence.h> +#include <vespa/config-stor-devices.h> +#include <vespa/config-persistence.h> + +namespace storage { +namespace memfile { + +// Friendly aliases to painfully long config names. +using MemFilePersistenceConfig + = vespa::config::storage::StorMemfilepersistenceConfig; +using PersistenceConfig = vespa::config::content::PersistenceConfig; +using DevicesConfig = vespa::config::storage::StorDevicesConfig; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp new file mode 100644 index 00000000000..1e68024d8dd --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/common/config_lock_guard.h> +#include <vespa/memfilepersistence/common/environment.h> + +namespace storage { +namespace memfile { + +bool +ConfigLockGuardBase::hasPersistenceConfig() const noexcept +{ + return (_env->_persistenceConfig.get() != nullptr); +} + +std::shared_ptr<const PersistenceConfig> +ConfigLockGuardBase::persistenceConfig() const noexcept +{ + return _env->_persistenceConfig; +} + +bool +ConfigLockGuardBase::hasMemFilePersistenceConfig() const noexcept +{ + return (_env->_config.get() != nullptr); +} + +std::shared_ptr<const MemFilePersistenceConfig> +ConfigLockGuardBase::memFilePersistenceConfig() const noexcept +{ + return _env->_config; +} + +bool +ConfigLockGuardBase::hasDevicesConfig() const noexcept +{ + return (_env->_devicesConfig.get() != nullptr); +} + +std::shared_ptr<const DevicesConfig> +ConfigLockGuardBase::devicesConfig() const noexcept +{ + return _env->_devicesConfig; +} + +bool +ConfigLockGuardBase::hasOptions() const noexcept +{ + return (_env->_options.get() != nullptr); +} + +std::shared_ptr<const Options> +ConfigLockGuardBase::options() const noexcept +{ + return _env->_options; +} + +ConfigWriteLockGuard::ConfigWriteLockGuard(Environment& e) + : ConfigLockGuardBase(e), + _lock(e._configRWLock), + _mutableEnv(&e) +{ +} + +ConfigWriteLockGuard::ConfigWriteLockGuard(ConfigWriteLockGuard&& other) + : ConfigLockGuardBase(std::move(other)), + _lock(other._lock), // Implicit lock stealing, no explicit moving + _mutableEnv(other._mutableEnv) +{ + other._mutableEnv = nullptr; +} + +void +ConfigWriteLockGuard::setPersistenceConfig( + std::unique_ptr<PersistenceConfig> cfg) noexcept +{ + mutableEnv()._persistenceConfig = std::move(cfg); +} + +void +ConfigWriteLockGuard::setMemFilePersistenceConfig( + std::unique_ptr<MemFilePersistenceConfig> cfg) noexcept +{ + mutableEnv()._config = std::move(cfg); +} + +void +ConfigWriteLockGuard::setDevicesConfig( + std::unique_ptr<DevicesConfig> cfg) noexcept +{ + mutableEnv()._devicesConfig = std::move(cfg); +} + +void +ConfigWriteLockGuard::setOptions(std::unique_ptr<Options> opts) +{ + mutableEnv()._options = std::move(opts); +} + +ConfigReadLockGuard::ConfigReadLockGuard(const Environment& e) + : ConfigLockGuardBase(e), + _lock(e._configRWLock) +{ +} + +ConfigReadLockGuard::ConfigReadLockGuard(ConfigReadLockGuard&& other) + : ConfigLockGuardBase(std::move(other)), + _lock(other._lock) // Implicit lock stealing, no explicit moving +{ +} + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h new file mode 100644 index 00000000000..b97b61010af --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/common/config_aliases.h> +#include <vespa/memfilepersistence/common/options.h> +#include <vespa/vespalib/util/rwlock.h> +#include <memory> + +namespace storage { +namespace memfile { + +class Environment; + +/** + * Shared guard base allowing read access to existing configs via both + * read and write guard subclasses. + */ +class ConfigLockGuardBase { +public: + explicit ConfigLockGuardBase(const Environment& e) + : _env(&e) + { + } + + ConfigLockGuardBase(ConfigLockGuardBase&& other) + : _env(other._env) + { + // If the source is attempted used after the move, ensure it nukes + // itself with a SIGSEGV. + other._env = nullptr; + } + + // To avoid circular dependencies, all access of Environment internals + // must be in separate .cpp file. + + bool hasPersistenceConfig() const noexcept; + std::shared_ptr<const PersistenceConfig> persistenceConfig() const noexcept; + + bool hasMemFilePersistenceConfig() const noexcept; + std::shared_ptr<const MemFilePersistenceConfig> + memFilePersistenceConfig() const noexcept; + + bool hasDevicesConfig() const noexcept; + std::shared_ptr<const DevicesConfig> devicesConfig() const noexcept; + + bool hasOptions() const noexcept; + std::shared_ptr<const Options> options() const noexcept; + + ConfigLockGuardBase(const ConfigLockGuardBase&) = delete; + ConfigLockGuardBase& operator=(const ConfigLockGuardBase&) = delete; + +private: + const Environment* _env; +}; + +class ConfigWriteLockGuard : public ConfigLockGuardBase { +public: + explicit ConfigWriteLockGuard(Environment& e); + /** + * Moving a guard transfers ownership of the lock to the move target. It + * is illegal and undefined behavior to attempt to access the environment + * configuration through a guard whose lock has been transferred away. + */ + ConfigWriteLockGuard(ConfigWriteLockGuard&& other); + + // By definition, configs can only be mutated when the writer lock + // is held. + void setPersistenceConfig(std::unique_ptr<PersistenceConfig> cfg) noexcept; + void setMemFilePersistenceConfig( + std::unique_ptr<MemFilePersistenceConfig> cfg) noexcept; + void setDevicesConfig(std::unique_ptr<DevicesConfig> cfg) noexcept; + void setOptions(std::unique_ptr<Options> opts); + +private: + vespalib::RWLockWriter _lock; + // This points to the exact same object as the const ref in the base + // and basically serves as an alternative to const_cast. + Environment* _mutableEnv; + + // Hide the fact that we're storing duplicate information to other + // methods. + Environment& mutableEnv() { return *_mutableEnv; } +}; + +class ConfigReadLockGuard : public ConfigLockGuardBase { +public: + explicit ConfigReadLockGuard(const Environment& e); + ConfigReadLockGuard(ConfigReadLockGuard&& other); + + // Config reader methods already implemented in base. + +private: + vespalib::RWLockReader _lock; +}; + + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h b/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h new file mode 100644 index 00000000000..9ca9c9e6294 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class ConfigKeeper + * \class memfile + * + * \brief Utility function for live reconfiguration + * + * When many threads want the same config, we don't want each of these threads + * to subscribe on the same config because of the following reasons: + * - No need to put lots of extra load on the config system. + * - Application doesn't know whether all users have the same config version + * at any given time. + * + * This class implements a utility class for handling this. + */ +#pragma once + +#include <vespa/vespalib/util/sync.h> + +namespace storage { + +template<typename ConfigClass> +class ConfigKeeper { + vespalib::Monitor _configLock; + bool _configUpdated; // Set to true if updating config. + std::unique_ptr<ConfigClass> _nextConfig; + ConfigClass _config; + +public: + ConfigKeeper() : _configUpdated(false) {} + + void updateConfig(const ConfigClass& config) { + vespalib::MonitorGuard lock(_configLock); + _nextConfig.reset(new ConfigClass(config)); + _configUpdated = true; + } + + void activateNewConfig() { + if (!_configUpdated) return; + vespalib::MonitorGuard lock(_configLock); + _config = *_nextConfig; + _nextConfig.reset(0); + _configUpdated = false; + lock.signal(); + } + + void waitForAnyActivation() { + vespalib::MonitorGuard lock(_configLock); + while (_configUpdated) lock.wait(); + } + + ConfigClass* operator->() { return &_config; } + ConfigClass& operator*() { return _config; } +}; + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp new file mode 100644 index 00000000000..6cfe2269886 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp @@ -0,0 +1,120 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/common/environment.h> +#include <vespa/vespalib/util/random.h> +#include <vespa/vespalib/util/vstringfmt.h> +#include <vespa/config/config.h> + +using config::ConfigGetter; + +namespace storage { +namespace memfile { + +namespace { + +template <typename ConfigT> +std::shared_ptr<ConfigT> +resolveConfig(const config::ConfigUri& configUri) +{ + return {ConfigGetter<ConfigT>::getConfig( + configUri.getConfigId(), configUri.getContext())}; +} + +} + +vespalib::LazyFile::UP +DefaultLazyFileFactory::createFile(const std::string& fileName) const +{ + return vespalib::LazyFile::UP( + new vespalib::LazyFile( + fileName, vespalib::File::DIRECTIO | _flags)); +} + +Environment::Environment(const config::ConfigUri & configUri, + MemFileCache& cache, + MemFileMapper& mapper, + const document::DocumentTypeRepo& typeRepo, + const framework::Clock& clock, + bool ignoreDisks) + : _clock(clock), + _cache(cache), + _memFileMapper(mapper), + _bucketFactory(), + _lazyFileFactory(new DefaultLazyFileFactory( + ignoreDisks ? vespalib::File::READONLY : 0)), + _repo(&typeRepo), + _config(resolveConfig<MemFilePersistenceConfig>(configUri)), + _persistenceConfig(resolveConfig<PersistenceConfig>(configUri)), + _devicesConfig(resolveConfig<DevicesConfig>(configUri)), + _options(std::make_shared<Options>(*_config, *_persistenceConfig)) +{ + DeviceManager::LP manager( + new DeviceManager(DeviceMapper::UP(new SimpleDeviceMapper()), + _clock)); + + manager->setPartitionMonitorPolicy( + _devicesConfig->statfsPolicy, _devicesConfig->statfsPeriod); + _mountPoints.reset(new MountPointList(_devicesConfig->rootFolder, + _devicesConfig->diskPath, + manager)); + + if (!ignoreDisks) { + _mountPoints->init(0); + + // Update full disk setting for partition monitors + for (uint32_t i=0; i<_mountPoints->getSize(); ++i) { + Directory& dir(getDirectory(i)); + if (dir.getPartition().getMonitor() != 0) { + dir.getPartition().getMonitor()->setMaxFillness( + _options->_diskFullFactor); + } + } + } +} + +Types::String +Environment::calculatePathInDir(const Types::BucketId& id, Directory& dir) +{ + vespalib::asciistream os; + os << dir.getPath() << '/'; + // Directories created should only depend on bucket identifier. + document::BucketId::Type seed = id.getId(); + seed = seed ^ (seed >> 32); + vespalib::RandomGen randomizer(static_cast<uint32_t>(seed) ^ 0xba5eba11); + + for (uint32_t i = 1; i <= (uint32_t)_config->dirLevels; ++i) { + os << vespalib::make_vespa_string( + "%.4x/", + randomizer.nextUint32() % _config->dirSpread); + } + + os << vespalib::make_vespa_string("%.8" PRIx64 ".0", id.getId()); + return os.str(); +} + +Environment::~Environment() +{ +} + +Directory& Environment::getDirectory(uint16_t disk) +{ + return (*_mountPoints)[disk]; +} + +void +Environment::addModifiedBucket(const document::BucketId& bid) +{ + vespalib::LockGuard guard(_modifiedBucketsLock); + _modifiedBuckets.push_back(bid); +} + +void +Environment::swapModifiedBuckets(document::BucketId::List & ids) +{ + vespalib::LockGuard guard(_modifiedBucketsLock); + _modifiedBuckets.swap(ids); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/environment.h b/memfilepersistence/src/vespa/memfilepersistence/common/environment.h new file mode 100644 index 00000000000..8a944f6921e --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/environment.h @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::slotfile::MemFileEnvironment + * \ingroup memfile + * + * \brief Keeps environment for MemFile operations + * + * The memfile layer needs quite a lot of stuff set up in order to work. Rather + * than passing all these bits around when creating new slotfiles, we rather + * have an environment where all the static pieces not related to single files + * will be kept. + */ + +#pragma once + +#include <vespa/config/helper/configfetcher.h> +#include <vespa/memfilepersistence/common/options.h> +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/common/config_lock_guard.h> +#include <vespa/memfilepersistence/common/config_aliases.h> +#include <vespa/storageframework/storageframework.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/memfilepersistence/device/mountpointlist.h> + +namespace storage { +namespace memfile { + +class MemFileMapper; +class MemFileCache; + +struct Environment : public Types { + class LazyFileFactory { + public: + virtual ~LazyFileFactory() {}; + + virtual vespalib::LazyFile::UP + createFile(const std::string& fileName) const = 0; + }; + using UP = std::unique_ptr<Environment>; + + const framework::Clock& _clock; + MemFileCache& _cache; + MemFileMapper& _memFileMapper; + MountPointList::UP _mountPoints; + document::BucketIdFactory _bucketFactory; + std::unique_ptr<LazyFileFactory> _lazyFileFactory; + vespalib::Lock _modifiedBucketsLock; + document::BucketId::List _modifiedBuckets; + + Environment(const config::ConfigUri & configUri, + MemFileCache&, + MemFileMapper&, + const document::DocumentTypeRepo&, + const framework::Clock&, + bool ignoreDisks = false); + ~Environment(); + + String calculatePathInDir(const Types::BucketId& id, Directory& dir); + + vespalib::LazyFile::UP createFile(const std::string& fileName) const { + return _lazyFileFactory->createFile(fileName); + } + + Directory& getDirectory(uint16_t disk = 0); + + void addModifiedBucket(const document::BucketId&); + void swapModifiedBuckets(document::BucketId::List &); + + ConfigReadLockGuard acquireConfigReadLock() const { + return ConfigReadLockGuard(*this); + } + + ConfigWriteLockGuard acquireConfigWriteLock() { + return ConfigWriteLockGuard(*this); + } + + /** + * Get the currently assigned document repo in a data race free manner. + * Forms a release/acquire pair with setRepo() + */ + const document::DocumentTypeRepo& repo() const noexcept { + return *_repo.load(std::memory_order_acquire); + } + /** + * Sets the currently assigned document repo in a data race free manner. + * Forms a release/acquire pair with repo() + */ + void setRepo(const document::DocumentTypeRepo* typeRepo) noexcept { + _repo.store(typeRepo, std::memory_order_release); + } +private: + mutable vespalib::RWLock _configRWLock; + /** + * For simplicity, repos are currently kept alive for the duration of the + * process. This means we don't have to care about lifetime management, but + * we still have to ensure writes that set the repo are release/acquired + * paired with their reads. Repos are provided through the SPI and _not_ + * through regular provider-level config subscription, so we therefore do + * not require the config lock to be held when reading/writing. + */ + std::atomic<const document::DocumentTypeRepo*> _repo; + /** + * Configs are kept as shared_ptrs to allow lock window to remain as small + * as possible while still retaining thread safety during pointer + * reassignments. + */ + std::shared_ptr<const MemFilePersistenceConfig> _config; + std::shared_ptr<const PersistenceConfig> _persistenceConfig; + std::shared_ptr<const DevicesConfig> _devicesConfig; + /** + * Options is not a true config as per se, but is an aggregate of multiple + * other configs and must thus be protected as if it were. + */ + std::shared_ptr<const Options> _options; + // We entrust the config guards with access to our internals. + friend class ConfigLockGuardBase; + friend class ConfigWriteLockGuard; + friend class ConfigReadLockGuard; +}; + +struct DefaultLazyFileFactory + : public Environment::LazyFileFactory +{ + int _flags; + + DefaultLazyFileFactory(int flags) : _flags(flags) {} + + vespalib::LazyFile::UP createFile(const std::string& fileName) const; +}; + +} // storage +} // memfile + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp new file mode 100644 index 00000000000..16235f21707 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/common/exceptions.h> + +namespace storage { +namespace memfile { + +VESPA_IMPLEMENT_EXCEPTION_SPINE(TimestampExistException); +VESPA_IMPLEMENT_EXCEPTION_SPINE(InconsistentSlotException); +VESPA_IMPLEMENT_EXCEPTION_SPINE(MemFileIoException); +VESPA_IMPLEMENT_EXCEPTION(NoDisksException, vespalib::Exception); + +VESPA_IMPLEMENT_MEMFILE_EXCEPTION(SlotNotFoundException); +VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InvalidArgumentException); +VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InvalidStateException); +VESPA_IMPLEMENT_MEMFILE_EXCEPTION(CorruptMemFileException); +VESPA_IMPLEMENT_MEMFILE_EXCEPTION(MemFileWrapperException); +VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InconsistentException); + +MemFileException::MemFileException(const FileSpecification& file) + : _file(file) +{ +} + +MemFileException::~MemFileException() +{ +} + +TimestampExistException::TimestampExistException( + const vespalib::string& message, const FileSpecification& file, + Types::Timestamp ts, const vespalib::string& location, int skipStack) + : Exception(message, location, skipStack + 1), + MemFileException(file), + _timestamp(ts) +{ +} + +InconsistentSlotException::InconsistentSlotException( + const vespalib::string& message, const FileSpecification& file, + const MemSlot& slot, const vespalib::string& location, int skipstack) + : InconsistentException(message, file, location, skipstack + 1), + _slot(slot) +{ +} + +MemFileIoException::MemFileIoException( + const vespalib::string& msg, const FileSpecification& file, + Type type, const vespalib::string& location, int skipStack) + : IoException(msg, type, location, skipStack + 1), + MemFileException(file) +{ +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h new file mode 100644 index 00000000000..03edf7e7a83 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::IoException + * \ingroup memfile + * + * \brief Exception thrown by memfile layer for IO problems. + * + * Storage needs to know what disk was having issues for disk related problems, + * in case it needs to disable a non-working disk. Some information on what + * file was being operated on while one is having trouble is nice anyhow. Thus + * specific exceptions have been created to keep the file specification of the + * file in question. The MemFile layer may throw some exceptions that aren't + * MemFileExceptions though. These exceptions should not be disk/file related. + */ + +#pragma once + +#include <vespa/memfilepersistence/common/filespecification.h> +#include <vespa/memfilepersistence/memfile/memslot.h> +#include <vespa/vespalib/util/exceptions.h> + +#define VESPA_DEFINE_MEMFILE_EXCEPTION(name) \ + struct name : public vespalib::Exception, public MemFileException { \ + name(const vespalib::string& message, const FileSpecification& file, \ + const vespalib::string& location, int skipStack = 0); \ + ~name() throw(); \ + VESPA_DEFINE_EXCEPTION_SPINE(name); \ +}; + +#define VESPA_IMPLEMENT_MEMFILE_EXCEPTION(name) \ + name::name(const vespalib::string& message, const FileSpecification& file, \ + const vespalib::string& location, int skipStack) \ + : vespalib::Exception(message, location, skipStack + 1), \ + MemFileException(file) {} \ + name::~name() throw() {} \ + VESPA_IMPLEMENT_EXCEPTION_SPINE(name); + +namespace storage { +namespace memfile { + +VESPA_DEFINE_EXCEPTION(NoDisksException, vespalib::Exception); + +/** + * \class storage::memfile::MemFileException + * \ingroup memfile + * + * \brief Interface to implement for exceptions that contain a file specification specifying what memfile was problematic. + */ +class MemFileException : protected Types { + FileSpecification _file; + +public: + MemFileException(const FileSpecification&); + virtual ~MemFileException() = 0; + + const FileSpecification& getFile() const { return _file; } +}; + +VESPA_DEFINE_MEMFILE_EXCEPTION(SlotNotFoundException); +VESPA_DEFINE_MEMFILE_EXCEPTION(InvalidArgumentException); +VESPA_DEFINE_MEMFILE_EXCEPTION(InvalidStateException); +VESPA_DEFINE_MEMFILE_EXCEPTION(CorruptMemFileException); +VESPA_DEFINE_MEMFILE_EXCEPTION(MemFileWrapperException); + +/** + * \class storage::InconsistentException + * \ingroup memfile + * + * \brief Thrown by MemFile::verifyConsistent() if inconsistent + */ +VESPA_DEFINE_MEMFILE_EXCEPTION(InconsistentException); + +/** + * @class storage::TimestampExistException + * @ingroup filestorage + * + * @brief Thrown by SlotFile::write() when timestamp given is already in use. + */ +class TimestampExistException : public vespalib::Exception, + public MemFileException +{ + Timestamp _timestamp; +public: + TimestampExistException(const vespalib::string& message, + const FileSpecification&, Timestamp ts, + const vespalib::string& location, int skipstack = 0); + virtual ~TimestampExistException() throw() {} + + VESPA_DEFINE_EXCEPTION_SPINE(TimestampExistException); + + Timestamp getTimestamp() const { return _timestamp; } +}; + +/** + * @class storage::InconsistentSlotException + * @ingroup filestorage + * + * @brief Thrown by MemFile::verifyConsistent() if a slot is inconsistent + */ +class InconsistentSlotException : public InconsistentException { + MemSlot _slot; + +public: + InconsistentSlotException(const vespalib::string& message, + const FileSpecification&, const MemSlot& slot, + const vespalib::string& location, int skipstack = 0); + virtual ~InconsistentSlotException() throw() {} + + VESPA_DEFINE_EXCEPTION_SPINE(InconsistentSlotException); +}; + +class MemFileIoException : public vespalib::IoException, + public MemFileException +{ +public: + MemFileIoException(const vespalib::string& msg, const FileSpecification&, + Type type, const vespalib::string& location, + int skipStack = 0); + virtual ~MemFileIoException() throw() {} + + VESPA_DEFINE_EXCEPTION_SPINE(MemFileIoException); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp new file mode 100644 index 00000000000..b3e90d53bb1 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/common/filespecification.h> + +namespace storage { +namespace memfile { + +FileSpecification::FileSpecification(const BucketId& bucket, Directory& dir, + const String& path) + : _bucketId(bucket), + _dir(&dir), + _path(path), + _wantedVersion(TRADITIONAL_SLOTFILE) +{ + if (dir.getState() != Device::OK) { + throw vespalib::IllegalStateException( + "Attempt to create file specification for file on disk that " + "is not available: " + dir.toString(), VESPA_STRLOC); + } +} + +void +FileSpecification::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + (void) verbose; (void) indent; + out << "FileSpecification(" << _bucketId << ", " << *_dir << ", " << _path + << ", wanted version 0x" << std::hex << _wantedVersion << std::dec + << ")"; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h new file mode 100644 index 00000000000..4d9cda2c47c --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::slotfile::FileSpecification + * \ingroup memfile + * + * \brief Information about the file currently worked on. + * + * The file specification specifies what file a given MemFile should work on. + */ + +#pragma once + +#include <vespa/vespalib/util/printable.h> +#include <vespa/memfilepersistence/device/directory.h> +#include <vespa/memfilepersistence/common/types.h> + +namespace storage { +namespace memfile { + +class MemFileEnvironment; + +class FileSpecification : private Types, + public vespalib::Printable, + public boost::operators<FileSpecification> +{ + BucketId _bucketId; + Directory* _dir; + String _path; + FileVersion _wantedVersion; + +public: + FileSpecification(const BucketId&, Directory&, const String& path); + + void setWantedVersion(FileVersion v) { _wantedVersion = v; } + + const document::BucketId& getBucketId() const { return _bucketId; } + Directory& getDirectory() const { return *_dir; } + const String& getPath() const { return _path; } + FileVersion getWantedFileVersion() const { return _wantedVersion; } + + virtual void print(std::ostream& out, bool verbose, + const std::string& indent) const; + + bool operator==(const FileSpecification& o) const { + return (_bucketId == o._bucketId && _dir == o._dir + && _path == o._path && _wantedVersion == o._wantedVersion); + } +}; + +} // storage +} // memfile + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/freeptr.h b/memfilepersistence/src/vespa/memfilepersistence/common/freeptr.h new file mode 100644 index 00000000000..de807efed2a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/freeptr.h @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @class storage::FreePtr + * @ingroup slotfile + * + * @brief Simple pointer wrapper that free() its content when deleted. + * + * Utility used to hold memory allocated with malloc directly. + */ + +#pragma once + +#include <iostream> +#include <sstream> + +namespace storage { + +template<typename T> +class FreePtr { + T* _ptr; + +public: + FreePtr(T* ptr = 0) : _ptr(ptr) {} + ~FreePtr() { free(); } + + FreePtr(FreePtr& ptr) : _ptr(ptr._ptr) { ptr._ptr = 0; } + FreePtr& operator=(FreePtr& ptr) { swap(ptr); ptr.free(); return *this; } + + void reset(T* ptr = 0) { free(); _ptr = ptr; } + void swap(FreePtr<T>& other) + { T* tmp = _ptr; _ptr = other._ptr; other._ptr = tmp; } + T* get() { return _ptr; } + const T* get() const { return _ptr; } + T* operator->() { return _ptr; } + const T* operator->() const { return _ptr; } + T& operator*() { assert(_ptr != 0); return *_ptr; } + const T& operator*() const { assert(_ptr != 0); return *_ptr; } + void free() { if (_ptr != 0) { ::free(_ptr); _ptr = 0; } } +}; + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp new file mode 100644 index 00000000000..7cf75c4f977 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/common/options.h> + +#include <vespa/log/log.h> +#include <iomanip> +#include <vespa/config-stor-memfilepersistence.h> + +LOG_SETUP(".persistence.slotfile.options"); + +namespace storage { + +namespace memfile { + +Options::Options(const vespa::config::storage::StorMemfilepersistenceConfig& newConfig, + const vespa::config::content::PersistenceConfig& newPersistenceConfig) + : _minimumFileMetaSlots(newConfig.minimumFileMetaSlots), + _maximumFileMetaSlots(newConfig.maximumFileMetaSlots), + _minimumFileHeaderBlockSize(newConfig.minimumFileHeaderBlockSize), + _maximumFileHeaderBlockSize(newConfig.maximumFileHeaderBlockSize), + _minimumFileSize(newConfig.minimumFileSize), + _maximumFileSize(newConfig.maximumFileSize), + _fileBlockSize(newConfig.fileBlockSize), + _revertTimePeriod(newPersistenceConfig.revertTimePeriod * 1000000ll), + _keepRemoveTimePeriod( + newPersistenceConfig.keepRemoveTimePeriod * 1000000ll), + _maxDocumentVersions( + newPersistenceConfig.maximumVersionsOfSingleDocumentStored), + _cacheSize(newConfig.cacheSize), + _initialIndexRead(newConfig.initialIndexRead), + _maximumGapToReadThrough(newConfig.maximumGapToReadThrough), + _diskFullFactor(newConfig.diskFullFactor), + _growFactor(newConfig.growFactor), + _overrepresentMetaDataFactor(newConfig.overrepresentMetaDataFactor), + _overrepresentHeaderBlockFactor(newConfig.overrepresentHeaderBlockFactor), + _defaultRemoveDocType( + newConfig.store50BackwardsCompatibleRemoveEntriesWithDoctype) +{ + validate(); +} + +namespace { + template<typename Number> + void verifyAligned(Number n, uint32_t alignSize, const char* name) { + if (n % alignSize != 0) { + std::ostringstream ost; + ost << name << " " << n + << " must be dividable by block alignment size " << alignSize; + throw vespalib::IllegalStateException( + ost.str(), VESPA_STRLOC); + } + } +} + +void Options::validate() +{ + uint32_t tmp32 = 0; + + // REVERT / KEEP REMOVE TIME PERIODS + if (_revertTimePeriod > _keepRemoveTimePeriod) { + LOG(warning, "Keep all time period (%" PRIu64 ") is set larger than keep " + "removes time period (%" PRIu64 ". Adjusting keep removes " + "period to match", + _revertTimePeriod.getTime(), _keepRemoveTimePeriod.getTime()); + _keepRemoveTimePeriod = _revertTimePeriod; + } + if (_maxDocumentVersions < 1) { + LOG(warning, "Max number of document versions attempted set to 0. " + "This is a bad idea for all the obvious reasons. Forcing " + "used value to be 1."); + _maxDocumentVersions = 1; + } + // MINIMUM FILE SIZES + if (_minimumFileMetaSlots < 1) { + LOG(warning, "Minimum file meta slots is not allowed to be less than " + "1. Setting it to 1."); + _minimumFileMetaSlots = 1; + } + if (_minimumFileMetaSlots > 1024*1024) { + LOG(warning, "Minimum file meta slots is not allowed to be more than " + "%u. Setting it to %u.", 1024*1024, 1024*1024); + _minimumFileMetaSlots = 1024*1024; + } + if (_minimumFileHeaderBlockSize > 2*1024*1024*1024u) { + LOG(warning, "Minimum file header block size is not allowed to be above" + " 2 GB. Altering it from %u B to 2 GB.", + _minimumFileHeaderBlockSize); + _minimumFileHeaderBlockSize = 2*1024*1024*1024u; + } + if (_minimumFileSize % _fileBlockSize != 0) { + tmp32 = _fileBlockSize + * ((_minimumFileSize + _fileBlockSize - 1) / _fileBlockSize); + LOG(warning, "Min file size %u not a multiplum of file block size %u. " + "Increasing minimum filesize to %u to match.", + _minimumFileSize, _fileBlockSize, tmp32); + _minimumFileSize = tmp32; + } + // MAXIMUM FILE SIZES + if (_maximumFileMetaSlots != 0 + && _maximumFileMetaSlots < _minimumFileMetaSlots) + { + LOG(warning, "Maximum file meta slots cannot be less than the minimum. " + "Adjusting it from %u to %u.", + _maximumFileMetaSlots, _minimumFileMetaSlots); + _maximumFileMetaSlots = _minimumFileMetaSlots; + } + if (_maximumFileHeaderBlockSize != 0 + && _maximumFileHeaderBlockSize < _minimumFileHeaderBlockSize) + { + LOG(warning, "Maximum file header block size cannot be less than the " + "minimum. Adjusting it from %u to %u.", + _maximumFileHeaderBlockSize, _minimumFileHeaderBlockSize); + _maximumFileHeaderBlockSize = _minimumFileHeaderBlockSize; + } + if (_maximumFileSize != 0 && _maximumFileSize < _minimumFileSize) { + LOG(warning, "Maximum file size cannot be less than the " + "minimum. Adjusting it from %u to %u.", + _maximumFileSize, _minimumFileSize); + _maximumFileSize = _minimumFileSize; + } + if (_maximumFileSize % _fileBlockSize != 0) { + tmp32 = _fileBlockSize + * ((_maximumFileSize + _fileBlockSize - 1) / _fileBlockSize); + LOG(warning, "Max file size %u not a multiplum of file block size %u. " + "Increasing maximum to %u bytes to match.", + _maximumFileSize, _fileBlockSize, tmp32); + _maximumFileSize = tmp32; + } + + if (_growFactor < 1.0 || _growFactor >= 100.0) { + throw vespalib::IllegalStateException( + "The grow factor needs to be in the range [1, 100].", + VESPA_STRLOC); + } + + if (!_defaultRemoveDocType.empty()) { + // Log the usage of this option to make it visible, as it is not + // something most people should use. + LOG(info, + "Will write remove entries in 5.0 backwards compatible mode. By " + "default this will be done using the '%s' document type unless " + "the document identifier specifies otherwise.", + _defaultRemoveDocType.c_str()); + } +} + +void Options::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + (void) verbose; + std::string s("\n" + indent + " "); + + out << "SlotFile options:" + << s << "Minimum file meta slots: " << _minimumFileMetaSlots + << s << "Maximum file meta slots: " << _maximumFileMetaSlots + << s << "Minimum file header block size: " + << _minimumFileHeaderBlockSize << " b" + << s << "Maximum file header block size: " + << _maximumFileHeaderBlockSize << " b" + << s << "Minimum file size: " << _minimumFileSize << " b" + << s << "Maximum file size: " << _maximumFileSize << " b" + << s << "Filesystem block size: " << _fileBlockSize << " b" + << s << "Revert time period: " << _revertTimePeriod << " microsecs" + << s << "Keep remove time period: " + << _keepRemoveTimePeriod << "microsecs" + << s << "Max document versions: " << _maxDocumentVersions + << s << "Cache size: " << _cacheSize + << s << "Initial index read: " << _initialIndexRead << " b" + << s << "Maximum gap to read through: " + << _maximumGapToReadThrough << " b" + << s << "Disk full factor: " << _diskFullFactor + << s << "Grow factor: " << _growFactor + << s << "Overrepresent meta data factor: " + << _overrepresentMetaDataFactor + << s << "Overrepresent header block factor: " + << _overrepresentHeaderBlockFactor + << s << "Write removes with blank documents of default type: " + << _defaultRemoveDocType + << ""; +} + +} + +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/options.h b/memfilepersistence/src/vespa/memfilepersistence/common/options.h new file mode 100644 index 00000000000..831f43ab603 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/options.h @@ -0,0 +1,136 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @class storage::Options + * @ingroup filestorage + * + * @brief Options used by slotfiles + * + * To avoid the need for static variables which cannot be altered while the + * system is running, and which forces all slotfile instances to work with the + * same options, this options class has been created to contain all the options + * a slotfile will use. + * + * @author H�kon Humberset + * @date 2005-10-26 + */ + +#pragma once + +#include <boost/operators.hpp> +#include <vespa/vespalib/util/printable.h> +#include <vespa/fastos/types.h> // For uint32_t on linux +#include <string> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/storageframework/storageframework.h> +#include <vespa/config-stor-memfilepersistence.h> +#include <vespa/config-persistence.h> + +namespace storage { + +namespace memfile { + +struct Options : public vespalib::Printable, + public boost::operators<Options> +{ + // Parameters from def file. See config file for comments. + + // FILE SIZE PARAMETERS + + uint32_t _minimumFileMetaSlots; + uint32_t _maximumFileMetaSlots; + uint32_t _minimumFileHeaderBlockSize; + uint32_t _maximumFileHeaderBlockSize; + uint32_t _minimumFileSize; + uint32_t _maximumFileSize; + uint32_t _fileBlockSize; + + // CONSISTENCY PARAMETERS + framework::MicroSecTime _revertTimePeriod; + framework::MicroSecTime _keepRemoveTimePeriod; + uint32_t _maxDocumentVersions; + + // PERFORMANCE PARAMETERS + uint64_t _cacheSize; + uint32_t _initialIndexRead; + uint32_t _maximumGapToReadThrough; + + double _diskFullFactor; + double _growFactor; + double _overrepresentMetaDataFactor; + double _overrepresentHeaderBlockFactor; + + // COMPATIBILITY PARAMETERS + // If non-empty, will cause remove entries to be written with a blank + // document containing only the document type and identifier rather than + // just writing a document id with no document at all. Note that if a + // document identifier contains a type string it will override this default + // value. + // This is a feature for backwards compatibility with 5.0, as it chokes + // when trying to read remove entries without a document. + vespalib::string _defaultRemoveDocType; + + /** + * Creates a new slotfile options instance. Implemented in header file, + * such that the current defaults can be easily viewed. + */ + Options() + : _minimumFileMetaSlots(512), + _maximumFileMetaSlots(0), + _minimumFileHeaderBlockSize(102848), + _maximumFileHeaderBlockSize(0), + _minimumFileSize(1048576), + _maximumFileSize(0), + _fileBlockSize(4096), + _revertTimePeriod(300 * 1000000ull), + _keepRemoveTimePeriod(604800 * 1000000ull), + _maxDocumentVersions(5), + _cacheSize(0), + _initialIndexRead(65536), + _maximumGapToReadThrough(65536), + _diskFullFactor(0.98), + _growFactor(2.0), + _overrepresentMetaDataFactor(1.2), + _overrepresentHeaderBlockFactor(1.1), + _defaultRemoveDocType() + { + } + + Options(const vespa::config::storage::StorMemfilepersistenceConfig& newConfig, + const vespa::config::content::PersistenceConfig& newPersistenceConfig); + + void validate() const { const_cast<Options&>(*this).validate(); } + void validate(); + + /** Printable implementation */ + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + + bool operator==(const Options& options) const { + if (_minimumFileMetaSlots == options._minimumFileMetaSlots + && _maximumFileMetaSlots == options._maximumFileMetaSlots + && _minimumFileHeaderBlockSize + == options._minimumFileHeaderBlockSize + && _maximumFileHeaderBlockSize + == options._maximumFileHeaderBlockSize + && _minimumFileSize == options._minimumFileSize + && _maximumFileSize == options._maximumFileSize + && _fileBlockSize == options._fileBlockSize + && _revertTimePeriod == options._revertTimePeriod + && _maxDocumentVersions == options._maxDocumentVersions + && _keepRemoveTimePeriod == options._keepRemoveTimePeriod + && _cacheSize == options._cacheSize + && _initialIndexRead == options._initialIndexRead + && _maximumGapToReadThrough == options._maximumGapToReadThrough + && _diskFullFactor == options._diskFullFactor + && _defaultRemoveDocType == options._defaultRemoveDocType) + { + return true; + } + return false; + } +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp new file mode 100644 index 00000000000..dd045239877 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/common/slotmatcher.h> +#include <vespa/memfilepersistence/memfile/memfile.h> + +namespace storage { +namespace memfile { + +Types::Timestamp +SlotMatcher::Slot::getTimestamp() const +{ + return _slot.getTimestamp(); +} + +bool +SlotMatcher::Slot::isRemove() const +{ + return _slot.deleted(); +} + +const document::GlobalId& +SlotMatcher::Slot::getGlobalId() const +{ + return _slot.getGlobalId(); +} + +document::Document::UP +SlotMatcher::Slot::getDocument(bool headerOnly) const +{ + return _file.getDocument(_slot, headerOnly ? HEADER_ONLY : ALL); +} + +document::DocumentId +SlotMatcher::Slot::getDocumentId() const +{ + return _file.getDocumentId(_slot); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h new file mode 100644 index 00000000000..ca5988e9a19 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::slotfile::SlotMatcher + * \ingroup memfile + * + * \brief Implement this to create a filter for MemSlot instances. + * + * Many operations want to do something to a subset of the slots in a file. + * Such operations can retrieve the slots that matches using an implementation + * of this filter. + * + * Creating a slot matcher, you should give information of what type of data + * you want to preload from disk. Typically you want to preload entries you + * need such as to prevent many disk accesses, but if there is some data you + * only need for a few entries, you can use the functions supplied in the + * matcher to get these instances even though they are not cached for all + * entries. + */ + +#pragma once + +#include <vespa/memfilepersistence/memfile/memslot.h> + +namespace storage { +namespace memfile { + +class MemFile; + +class SlotMatcher : private Types { +public: + enum PreloadFlag { + PRELOAD_META_DATA_ONLY = 0x0, + PRELOAD_BODY = 0x1, + PRELOAD_HEADER = 0x3, + PRELOAD_DOC_ID = 0x7 + }; + +protected: + SlotMatcher(PreloadFlag preld) : _preload(preld) {} + + PreloadFlag _preload; + +public: + class Slot { + private: + const MemSlot& _slot; + const MemFile& _file; + + public: + Slot(const MemSlot& slot, const MemFile& file) + : _slot(slot), + _file(file) {}; + + /** + Returns the timestamp of the slot. + */ + Timestamp getTimestamp() const; + + /** + * Returns whether a slot is a remove, either regular + * or unrevertable. + */ + bool isRemove() const; + + /** + Returns the global id of the slot. + */ + const GlobalId& getGlobalId() const; + + /** + * Get the document, optionally just the header. If not preloaded, will load + * this document from disk. + */ + Document::UP getDocument(bool headerOnly) const; + + document::DocumentId getDocumentId() const; + }; + + virtual ~SlotMatcher() {} + + virtual bool match(const Slot&) = 0; + + /** Do what is needed to preload wanted content. */ + void preload(MemFile&) const {}; +}; + +} // storage +} // memfile + diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp new file mode 100644 index 00000000000..337638cadc9 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <iostream> +#include <vespa/memfilepersistence/common/types.h> + +namespace storage { +namespace memfile { + +const framework::MicroSecTime Types::MAX_TIMESTAMP(framework::MicroSecTime::max()); +const framework::MicroSecTime Types::UNSET_TIMESTAMP(0); + +void +Types::verifyLegalFlags(uint32_t flags, uint32_t legal, const char* operation) +{ + if ((flags & legal) != flags) { + std::ostringstream ost; + ost << "Invalid flags given to operation " << operation << ". " + << std::hex << flags << " given, but only " << legal + << " are legal."; + throw vespalib::IllegalArgumentException(ost.str(), VESPA_STRLOC); + } +} + +std::ostream& +operator<<(std::ostream& os, const DataLocation& loc) +{ + os << "DataLocation(" + << std::dec + << loc._pos + << ", " + << loc._size + << ")"; + return os; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/types.h b/memfilepersistence/src/vespa/memfilepersistence/common/types.h new file mode 100644 index 00000000000..bf4bdc98222 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/common/types.h @@ -0,0 +1,198 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::slotfile::Types + * \ingroup memfile + * + * \brief This class defines and includes some types used in the slotfile layer. + * + * As many of the types are used many places in the layer, we define them here + * rather than in one random class using them. This also makes it easy to switch + * implementation by switching out which class to use here. + * + * This class should not have any members, virtual classes or anything. We don't + * want it to add to the memory footprint of classes, as it will be used also + * by classes kept many times in memory cache. + */ +#pragma once + +#include <iosfwd> +#include <vespa/document/bucket/bucketid.h> +#include <vespa/document/base/documentid.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/storageframework/storageframework.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/persistence/spi/bucketinfo.h> + +namespace storage { +namespace memfile { + +/** + * \class storage::slotfile::DataLocation + * \ingroup memfile + * + * \brief Points to data in a file storing documents. + * + * This file stores info on where header and body parts of document are stored. + * It is really format specific data, but for now it is implemented globally. + * + * All unused locations should be size zero pointing to address zero. A size + * of zero with a non-zero position is invalid, and used to indicate that this + * value is not set yet. (Typically when data isn't persisted to disk yet) + */ +struct DataLocation : public boost::operators<DataLocation> { + uint32_t _pos; + uint32_t _size; + + DataLocation() : _pos(1), _size(0) {} // pos 1 size 0 is invalid value. + DataLocation(uint32_t pos, uint32_t sz) : _pos(pos), _size(sz) {} + + uint32_t size() const { return _size; } + + uint32_t endPos() const { return _pos + _size; } + + bool valid() const { return (_size > 0 || _pos == 0); } + + bool operator==(const DataLocation& other) const + { return (_pos == other._pos && _size == other._size); } + + bool operator<(const DataLocation& other) const { + if (_pos == other._pos) { + return _size < other._size; + } + + return _pos < other._pos; + } + + bool contains(const DataLocation& other) const { + return (_pos <= other._pos && _pos + _size >= other._pos + other._size); + } +}; + +std::ostream& operator<<(std::ostream&, const DataLocation&); + +struct Types { + typedef document::BucketId BucketId; + typedef document::Document Document; + typedef vespalib::LinkedPtr<Document> DocLP; + typedef document::DocumentId DocumentId; + typedef document::GlobalId GlobalId; + typedef framework::MicroSecTime Timestamp; + typedef Timestamp RevertToken; + typedef vespalib::string String; + typedef spi::BucketInfo BucketInfo; + + static const framework::MicroSecTime MAX_TIMESTAMP; + static const framework::MicroSecTime UNSET_TIMESTAMP; + + enum FileVersion { + UNKNOWN = 0, + TRADITIONAL_SLOTFILE = 0xABCD0001 + }; + + enum SlotFlag { + IN_USE = 0x01, + DELETED = 0x02, + DELETED_IN_PLACE = 0x04, + LEGAL_PERSISTED_SLOT_FLAGS = 0x07, + + // States not stored in file. As we have set aside 16 bits for the + // flags in the fileformat, but use so few, we use some of the + // unused bits in the memory representation to store memory state. + ALTERED_IN_MEMORY = 0x02 << 8, + CHECKSUM_OUTDATED = 0x04 << 8, + + // Masks to check for multiple bits + UNUSED = 0xf8f8 + }; + + enum GetFlag { + ALL = 0, + HEADER_ONLY = 0x1, + LEGAL_GET_FLAGS = 0x1 + }; + + enum IteratorFlag { + ITERATE_GID_UNIQUE = 0x1, + ITERATE_REMOVED = 0x2, + LEGAL_ITERATOR_FLAGS = 0x3 + }; + + enum DocContentFlag { + HAS_HEADER_ONLY, + HAS_BODY + }; + + enum DocumentPart { + HEADER, + BODY + }; + + enum MemFileFlag { + FILE_EXIST = 0x0001, + HEADER_BLOCK_READ = 0x0002, + BODY_BLOCK_READ = 0x0004, + BUCKET_INFO_OUTDATED = 0x0008, + SLOTS_ALTERED = 0x0010, + LEGAL_MEMFILE_FLAGS = 0x001f + }; + + enum FileVerifyFlags { + DONT_VERIFY_HEADER = 0x0001, + DONT_VERIFY_BODY = 0x0002, + LEGAL_VERIFY_FLAGS = 0x0003 + }; + + enum FlushFlag { + NONE = 0, + CHECK_NON_DIRTY_FILE_FOR_SPACE = 1 + }; + + enum GetLocationsFlag { + NON_PERSISTED_LOCATIONS = 0x0001, + PERSISTED_LOCATIONS = 0x0002, + NO_SLOT_LIST = 0x0004 + }; + + enum DocumentCopyType { + DEEP_COPY, + SHALLOW_COPY + }; + + static const char* getDocumentPartName(DocumentPart part) { + switch (part) { + case HEADER: return "Header"; + case BODY: return "Body"; + default: return "Invalid"; + } + } + + static const char* getFileVersionName(FileVersion version) { + switch (version) { + case UNKNOWN: return "UNKNOWN"; + case TRADITIONAL_SLOTFILE: return "TRADITIONAL_SLOTFILE"; + default: return "INVALID"; + } + } + + static const char* getMemFileFlagName(MemFileFlag flag) { + switch (flag) { + case FILE_EXIST: return "FILE_EXIST"; + case HEADER_BLOCK_READ: return "HEADER_BLOCK_READ"; + case BODY_BLOCK_READ: return "BODY_BLOCK_READ"; + case BUCKET_INFO_OUTDATED: return "BUCKET_INFO_OUTDATED"; + case SLOTS_ALTERED: return "SLOTS_ALTERED"; + case LEGAL_MEMFILE_FLAGS: assert(false); // Not a single flag + default: return "INVALID"; + } + } + + static void verifyLegalFlags(uint32_t flags, uint32_t legal, + const char* operation); + +protected: + ~Types() {} // Noone should refer to objects as Types objects +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt new file mode 100644 index 00000000000..2b2916b1ead --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_device OBJECT + SOURCES + device.cpp + disk.cpp + partition.cpp + directory.cpp + devicemapper.cpp + devicemanager.cpp + ioevent.cpp + partitionmonitor.cpp + mountpointlist.cpp + DEPENDS +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp new file mode 100644 index 00000000000..88283065790 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/device.h> + +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/device/ioevent.h> + +LOG_SETUP(".persistence.device"); + +#include <algorithm> + +namespace storage { + +namespace memfile { + +Device::Device(DeviceManager& manager) + : _manager(manager) +{ +} + +Device::~Device() +{ +} + +std::string Device::getStateString(State s) +{ + switch (s) { + case OK: return "OK"; + case TOO_MANY_OPEN_FILES: return "TOO_MANY_OPEN_FILES"; + case NOT_FOUND: return "NOT_FOUND"; + case PATH_FAILURE: return "PATH_FAILURE"; + case NO_PERMISSION: return "NO_PERMISSION"; + case IO_FAILURE: return "IO_FAILURE"; + case INTERNAL_FAILURE: return "INTERNAL_FAILURE"; + case DISABLED_BY_ADMIN: return "DISABLED_BY_ADMIN"; + default: + { + std::ostringstream ost; + ost << "UNKNOWN(" << s << ")"; + return ost.str(); + } + } +} + +void +Device::print(std::ostream& out, bool, const std::string&) const +{ + const IOEvent* event = getLastEvent(); + if (event == 0) { + out << Device::OK; + } else { + out << event->getState() << " "; + out << event->getTimestamp() << " "; + std::string desc = event->getDescription(); + std::replace(desc.begin(), desc.end(), '\n', ' '); + out << desc; + } +} + +void +Device::clearEvents() +{ + _events.clear(); +} + +} // memfile + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/device.h b/memfilepersistence/src/vespa/memfilepersistence/device/device.h new file mode 100644 index 00000000000..dd582ff327a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/device.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::Device + * \ingroup persistence + * + * @brief Class holding information about a device. + * + * Base class for devices, such as directories, partitions and disks. + */ + +#pragma once + +#include <vespa/vespalib/util/printable.h> +#include <list> +#include <string> + +namespace storage { + +namespace memfile { + +class IOEvent; +class DeviceManager; + +class Device : public vespalib::Printable { +private: + // These objects are not possible to copy. They represents physical + // resources on a computer + Device(const Device&); + Device& operator=(Device&); + +protected: + DeviceManager& _manager; + std::list<IOEvent> _events; + + Device(DeviceManager& manager); + +public: + /** + * Storage device states. Most serious states are at the bottom of the + * list. If a single state is requested from the device, the one with + * the highest value wins through. + */ + enum State { + OK, + NOT_FOUND, // Not found + PATH_FAILURE, // Illegal path + NO_PERMISSION, // Permission problems + INTERNAL_FAILURE, // Probably problem with process. + IO_FAILURE, // Disk problems + TOO_MANY_OPEN_FILES, // Too many open files so we can't use disk. + // This is a global problem that will not be stored + // as disk state, but must exist in order to be + // able to report event. + DISABLED_BY_ADMIN // If disabled through admin tool + }; + + static std::string getStateString(State s); + + virtual ~Device(); + + virtual void addEvent(const IOEvent& e) = 0; + virtual void clearEvents(); + virtual const IOEvent* getLastEvent() const = 0; + + const std::list<IOEvent>& getEvents() const { return _events; } + + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + +}; + +} // memfile + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp new file mode 100644 index 00000000000..d088f1dab46 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp @@ -0,0 +1,213 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/devicemanager.h> + +#include <vespa/memfilepersistence/device/devicemapper.h> +#include <vespa/vespalib/util/exceptions.h> + +namespace storage { + +namespace memfile { + +DeviceManager::DeviceManager(DeviceMapper::UP mapper, + const framework::Clock& clock) + : _deviceMapper(std::move(mapper)), + _disks(), + _partitions(), + _directories(), + _eventListeners(), + _statPolicy(vespa::config::storage::StorDevicesConfig::STAT_DYNAMIC), + _statPeriod(0), + _clock(clock) +{ +} + +void +DeviceManager::setPartitionMonitorPolicy( + vespa::config::storage::StorDevicesConfig::StatfsPolicy policy, uint32_t period) +{ + _statPolicy = policy; + _statPeriod = period; + for (std::map<std::string, Partition::LP>::iterator it + = _partitions.begin(); it != _partitions.end(); ++it) + { + Partition& p(*it->second); + if (p.getMonitor() != 0) p.getMonitor()->setPolicy(policy, period); + } +} + +void DeviceManager::notifyDiskEvent(Disk& d, const IOEvent& e) +{ + for (std::set<IOEventListener*>::iterator it = _eventListeners.begin(); + it != _eventListeners.end(); ++it) + { + assert(*it != 0); + (*it)->handleDiskEvent(d, e); + } +} + +void +DeviceManager::notifyDirectoryEvent(Directory& dir, const IOEvent& e) +{ + for (std::set<IOEventListener*>::iterator it = _eventListeners.begin(); + it != _eventListeners.end(); ++it) + { + assert(*it != 0); + (*it)->handleDirectoryEvent(dir, e); + } +} + +void +DeviceManager::notifyPartitionEvent(Partition& part, const IOEvent& e) +{ + for (std::set<IOEventListener*>::iterator it = _eventListeners.begin(); + it != _eventListeners.end(); ++it) + { + assert(*it != 0); + (*it)->handlePartitionEvent(part, e); + } +} + +void +DeviceManager::addIOEventListener(IOEventListener& listener) +{ + _eventListeners.insert(&listener); +} + +void +DeviceManager::removeIOEventListener(IOEventListener& listener) +{ + _eventListeners.erase(&listener); +} + +Directory::LP +DeviceManager::getDirectory(const std::string& dir, uint16_t index) +{ + std::map<std::string, Directory::LP>::iterator it = + _directories.find(dir); + if (it != _directories.end()) { + return it->second; + } + Directory::LP d(new Directory(*this, index, dir)); + _directories[dir] = d; + return d; +} + +Directory::LP +DeviceManager::deserializeDirectory(const std::string& serialized) +{ + // Deserialize object + Directory::LP d(new Directory(serialized, *this)); + // If not existing, just add it. + std::map<std::string, Directory::LP>::iterator it = + _directories.find(d->getPath()); + if (it == _directories.end()) { + _directories[d->getPath()] = d; + return d; + } + // If already existing, merge info with existing entry. + it->second->addEvents(*d); + return it->second; +} + +Partition::LP +DeviceManager::getPartition(const std::string& path) +{ + try{ + std::string mountPoint(_deviceMapper->getMountPoint(path)); + uint64_t id = _deviceMapper->getPartitionId(mountPoint); + std::map<std::string, Partition::LP>::iterator it( + _partitions.find(mountPoint)); + if (it != _partitions.end()) { + return it->second; + } + Partition::LP part(new Partition(*this, id, mountPoint)); + if (part->getMonitor() != 0) { + part->getMonitor()->setPolicy(_statPolicy, _statPeriod); + } + _partitions[mountPoint] = part; + return part; + } catch (vespalib::IoException& e) { + // If we fail to create partition, due to having IO troubles getting + // partition id or mount point, create a partition that doesn't + // correspond to a physical device containing the error found. + Partition::LP part(new Partition(*this, -1, path)); + part->addEvent(IOEvent::createEventFromIoException( + e, + _clock.getTimeInSeconds().getTime())); + _partitions[path] = part; + return part; + } +} + +Disk::LP +DeviceManager::getDisk(const std::string& path) +{ + try{ + int devnr = _deviceMapper->getDeviceId(path); + std::map<int, Disk::LP>::iterator it = _disks.find(devnr); + if (it != _disks.end()) { + return it->second; + } + Disk::LP disk(new Disk(*this, devnr)); + _disks[devnr] = disk; + return disk; + } catch (vespalib::IoException& e) { + // Use negative ints for illegal ids. Make sure they don't already + // exist + int devnr = -1; + while (_disks.find(devnr) != _disks.end()) --devnr; + // If we fail to create partition, due to having IO troubles getting + // partition id or mount point, create a partition that doesn't + // correspond to a physical device containing the error found. + Disk::LP disk(new Disk(*this, devnr)); + disk->addEvent(IOEvent::createEventFromIoException( + e, + _clock.getTimeInSeconds().getTime())); + _disks[devnr] = disk; + return disk; + } +} + +void +DeviceManager::printXml(vespalib::XmlOutputStream& xos) const +{ + using namespace vespalib::xml; + xos << XmlTag("devicemanager"); + xos << XmlTag("mapper") << XmlAttribute("type", _deviceMapper->getName()) + << XmlEndTag(); + xos << XmlTag("devices"); + for (std::map<int, Disk::LP>::const_iterator diskIt = _disks.begin(); + diskIt != _disks.end(); ++diskIt) + { + xos << XmlTag("disk") << XmlAttribute("deviceId", diskIt->first); + for (std::map<std::string, Partition::LP>::const_iterator partIt + = _partitions.begin(); partIt != _partitions.end(); ++partIt) + { + if (partIt->second->getDisk() != *diskIt->second) continue; + xos << XmlTag("partition") + << XmlAttribute("id", partIt->second->getId()) + << XmlAttribute("mountpoint", partIt->second->getMountPoint()); + if (partIt->second->getMonitor() != 0) { + xos << *partIt->second->getMonitor(); + } + for (std::map<std::string, Directory::LP>::const_iterator dirIt + = _directories.begin(); dirIt != _directories.end(); + ++dirIt) + { + if (dirIt->second->getPartition() != *partIt->second) continue; + xos << XmlTag("directory") + << XmlAttribute("index", dirIt->second->getIndex()) + << XmlAttribute("path", dirIt->second->getPath()) + << XmlEndTag(); + } + xos << XmlEndTag(); + } + xos << XmlEndTag(); + } + xos << XmlEndTag() << XmlEndTag(); +} + +} // memfile + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h new file mode 100644 index 00000000000..dc1c6fdd68d --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::DeviceManager + * \ingroup persistence + * + * \brief Class keeping information about all devices. + * + * This class keeps track of all the devices so they can be looked up. + */ +#pragma once + +#include <vespa/memfilepersistence/device/devicemapper.h> +#include <vespa/memfilepersistence/device/directory.h> +#include <vespa/memfilepersistence/device/disk.h> +#include <vespa/memfilepersistence/device/ioevent.h> +#include <vespa/memfilepersistence/device/partition.h> +#include <set> +#include <vector> +#include <vespa/vespalib/util/xmlserializable.h> +#include <vespa/storageframework/generic/clock/clock.h> + +namespace storage { + +namespace memfile { + +class DeviceManager : public vespalib::XmlSerializable { + DeviceMapper::UP _deviceMapper; + std::map<int, Disk::LP> _disks; + std::map<std::string, Partition::LP> _partitions; + std::map<std::string, Directory::LP> _directories; + std::set<IOEventListener*> _eventListeners; + vespa::config::storage::StorDevicesConfig::StatfsPolicy _statPolicy; + uint32_t _statPeriod; + const framework::Clock& _clock; + + DeviceManager(const DeviceManager&); + DeviceManager& operator=(const DeviceManager&); + + void setFindDeviceFunction(); + +public: + typedef vespalib::LinkedPtr<DeviceManager> LP; + + DeviceManager(DeviceMapper::UP mapper, + const framework::Clock& clock); + + void setPartitionMonitorPolicy( + vespa::config::storage::StorDevicesConfig::StatfsPolicy, uint32_t period = 0); + + void notifyDiskEvent(Disk& disk, const IOEvent& e); + void notifyDirectoryEvent(Directory& dir, const IOEvent& e); + void notifyPartitionEvent(Partition& part, const IOEvent& e); + + void addIOEventListener(IOEventListener& listener); + void removeIOEventListener(IOEventListener& listener); + + Directory::LP getDirectory(const std::string& dir, uint16_t index); + Directory::LP deserializeDirectory(const std::string& serialized); + Partition::LP getPartition(const std::string& path); + Disk::LP getDisk(const std::string& path); + + std::vector<Directory::LP> getDirectories(const Disk& disk) const; + std::vector<Directory::LP> getDirectories(const Partition& part) const; + + vespa::config::storage::StorDevicesConfig::StatfsPolicy getStatPolicy() const + { return _statPolicy; } + uint32_t getStatPeriod() const { return _statPeriod; } + + virtual void printXml(vespalib::XmlOutputStream&) const; + + const framework::Clock& getClock() const { return _clock; } +}; + +} // memfile + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp new file mode 100644 index 00000000000..e6f45fe9e4b --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp @@ -0,0 +1,101 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/devicemapper.h> + +#include <vespa/vespalib/text/stringtokenizer.h> +#include <fstream> +#include <vespa/log/log.h> +#include <sstream> +#include <sys/stat.h> +#include <vespa/vespalib/util/exceptions.h> + +LOG_SETUP(".persistence.devicemapper"); + +namespace storage { + +namespace memfile { + +namespace { + uint64_t getDevice(const std::string& path) { + struct stat info; + if (stat(path.c_str(), &info) != 0) { + std::ostringstream ost; + ost << "Failed to run stat to find data on file " << path + << ": errno(" << errno << ") - " << vespalib::getLastErrorString() << "."; + throw vespalib::IoException( + ost.str(), vespalib::IoException::getErrorType(errno), + VESPA_STRLOC); + } + return info.st_dev; + } +} + +AdvancedDeviceMapper::AdvancedDeviceMapper() + : _mountPoints() +{ + // Initialize the mount point map + std::ifstream is; + is.exceptions(std::ifstream::badbit); // Throw exception on failure + is.open("/proc/mounts"); + init(is); +} + +void +AdvancedDeviceMapper::init(std::istream& is) +{ + std::string line; + while (std::getline(is, line)) { + vespalib::StringTokenizer st(line, " \t\f\r\n", ""); + if (st[0] == "none") { + LOG(debug, "Ignoring special mount point '%s'.", line.c_str()); + continue; + } + if (st.size() < 3 || st[1][0] != '/') { + LOG(warning, "Found unexpected line in /proc/mounts: '%s'.", + line.c_str()); + continue; + } + std::string mountPoint(st[1]); + try{ + uint64_t deviceId = getDevice(mountPoint); + LOG(debug, "Added mountpoint '%s' with device id %" PRIu64 ".", + mountPoint.c_str(), deviceId); + _mountPoints[deviceId] = mountPoint; + } catch (vespalib::Exception& e) { + LOG(info, "Failed to get device of mountpoint %s. This is normal " + "for some special mountpoints, and doesn't matter unless " + "the device is used by VDS: %s", + mountPoint.c_str(), e.getMessage().c_str()); + } + } +} + +std::string +AdvancedDeviceMapper::getMountPoint(const std::string& fileOnFS) const +{ + uint64_t dev = getDevice(fileOnFS); + std::map<uint64_t, std::string>::const_iterator it(_mountPoints.find(dev)); + if (it == _mountPoints.end()) { + std::ostringstream ost; + ost << "Failed to find a device for file '" << fileOnFS << "'. Stat " + << "returned device " << dev << " but only the following devices " + << "are known:"; + for (it = _mountPoints.begin(); it != _mountPoints.end(); ++it) { + ost << " (" << it->first << " - " << it->second << ")"; + } + throw vespalib::IoException( + ost.str(), vespalib::IoException::INTERNAL_FAILURE, + VESPA_STRLOC); + } + return it->second; +} + +uint64_t +AdvancedDeviceMapper::getPartitionId(const std::string& fileOnFS) const +{ + return getDevice(fileOnFS); +} + +} + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h new file mode 100644 index 00000000000..dd25283d029 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The device mapper is used to get some interesting information for + * storage devies. + */ +#pragma once + +#include <iostream> +#include <map> +#include <string> +#include <memory> + +namespace storage { + +namespace memfile { + +/** + * @class DeviceMapper + * @ingroup persistence + * + * @brief Maps directories to partition and disk information. + */ +struct DeviceMapper { + typedef std::unique_ptr<DeviceMapper> UP; + + virtual ~DeviceMapper() {} + + virtual const char* getName() const = 0; + + virtual std::string getMountPoint(const std::string& fileOnFS) const = 0; + virtual uint64_t getPartitionId(const std::string& fileOnFS) const = 0; + virtual uint64_t getDeviceId(const std::string& fileOnFS) const = 0; +}; + +/** + * @class SimpleDeviceMapper + * @ingroup persistence + * + * @brief Simple device mapper, not trying to detect any information. + * + * This simple device mapper, assumes all directories used are actually + * mountpoints, and that all mountpoints are on separate disks. This returns + * dummy device numbers. + * + * Using this, each directory used will be handled separately, and there is no + * dependency on information to retrieve from OS. + */ +class SimpleDeviceMapper : public DeviceMapper { + mutable std::map<std::string, int> _devices; + mutable int _lastDevice; + + SimpleDeviceMapper(const SimpleDeviceMapper&); + SimpleDeviceMapper& operator=(const SimpleDeviceMapper&); + +public: + SimpleDeviceMapper() : _devices(), _lastDevice(0) {} + + uint64_t getPartitionId(const std::string& fileOnFS) const { + std::map<std::string, int>::const_iterator it = _devices.find(fileOnFS); + if (it != _devices.end()) { + return it->second; + } + int dev = ++_lastDevice; + _devices[fileOnFS] = dev; + return dev; + } + std::string getMountPoint(const std::string& path) const { return path; } + virtual uint64_t getDeviceId(const std::string& fileOnFS) const { + return getPartitionId(fileOnFS); + } + virtual const char* getName() const + { return "Simple (All directories on individual fake devices)"; } +}; + +/** + * @class AdvancedDeviceMapper + * @ingroup persistence + * + * @brief Device mapper trying to find a real physical model using stat/statfs. + * + * Using this device mapper, stat/statfs will be used to try to find a real + * model. Directories mapping to common components wil cause all directories to + * fail if the common component fails. + */ +struct AdvancedDeviceMapper : public DeviceMapper { + std::map<uint64_t, std::string> _mountPoints; + + AdvancedDeviceMapper(); + void init(std::istream&); + + virtual std::string getMountPoint(const std::string& fileOnFS) const; + virtual uint64_t getPartitionId(const std::string& fileOnFS) const; + virtual uint64_t getDeviceId(const std::string& fileOnFS) const { + // Not found a way to detect partitions on common device. + // Returning partition ids for now. + return getPartitionId(fileOnFS); + } + virtual const char* getName() const + { return "Advanced (Read devices attempted found)"; } +}; + +} + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp new file mode 100644 index 00000000000..9c03e1eb449 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp @@ -0,0 +1,141 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/directory.h> + +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/vespalib/util/exceptions.h> + +LOG_SETUP(".persistence.device.directory"); + +namespace storage { + +namespace memfile { + +const IOEvent* +Directory::getLastEvent() const +{ + if (!_events.empty()) return &_events.back(); + return _partition->getLastEvent(); +} + +Device::State +Directory::getState() const +{ + const IOEvent* event = getLastEvent(); + return (event ? event->getState() : Device::OK); +} + +void +Directory::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + out << _path << " "; + Device::print(out, verbose, indent); +} + +Directory::Directory(DeviceManager& manager, uint16_t index, + const std::string& path) + : Device(manager), + _index(index), + _path(path), + _partition(manager.getPartition(path)) +{ + assert(_partition.get()); +} + +namespace { + struct Entry { + std::string path; + Device::State status; + std::string description; + }; + + Entry parseDirectoryString(const std::string& serialized) { + while (1) { + Entry e; + std::string::size_type pos1 = serialized.find(' '); + if (pos1 == std::string::npos) break; + e.path = serialized.substr(0, pos1); + std::string::size_type pos2 = serialized.find(' ', pos1 + 1); + std::string num = serialized.substr(pos1 + 1, pos2 - pos1 - 1); + char* c; + e.status = static_cast<Device::State>( + strtoul(num.c_str(), &c, 10)); + if (*c != '\0') break; + if (pos2 != std::string::npos) { + e.description = serialized.substr(pos2 + 1); + } + return e; + } + std::string msg = "Illegal line in disk status file: '" + serialized + + "'. Ignoring it."; + LOG(warning, "%s", msg.c_str()); + throw vespalib::IllegalArgumentException(msg, VESPA_STRLOC); + } +} + +Directory::Directory(const std::string& serialized, + DeviceManager& manager) + : Device(manager), + _index(0), + _path(parseDirectoryString(serialized).path), + _partition(manager.getPartition(_path)) +{ + assert(_partition.get()); + Entry e = parseDirectoryString(serialized); + if (e.status != Device::OK) { + addEvent(IOEvent(manager.getClock().getTimeInSeconds().getTime(), + e.status, e.description, VESPA_STRLOC)); + } +} + +void Directory::addEvent(const IOEvent& e) +{ + switch (e.getState()) { + case Device::IO_FAILURE: + _partition->addEvent(e); + break; + case Device::PATH_FAILURE: + case Device::NO_PERMISSION: + case Device::INTERNAL_FAILURE: + case Device::DISABLED_BY_ADMIN: + default: + if (!e.isGlobal()) { + _events.push_back(e); + } + _manager.notifyDirectoryEvent(*this, e); + } +} + +void +Directory::addEvent(Device::State s, + const std::string& description, + const std::string& location) +{ + addEvent(IOEvent( + _manager.getClock().getTimeInSeconds().getTime(), + s, + description, + location)); + +} + +void Directory::addEvents(const Directory& d) +{ + std::list<IOEvent> events; + events.insert(events.end(), d.getEvents().begin(), d.getEvents().end()); + events.insert(events.end(), d.getPartition().getEvents().begin(), + d.getPartition().getEvents().end()); + events.insert(events.end(), d.getPartition().getDisk().getEvents().begin(), + d.getPartition().getDisk().getEvents().end()); + for (std::list<IOEvent>::const_iterator it = events.begin(); + it != events.end(); ++it) + { + addEvent(*it); + } +} + +} // memfile + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/directory.h b/memfilepersistence/src/vespa/memfilepersistence/device/directory.h new file mode 100644 index 00000000000..7bd2f7dcd53 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/directory.h @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::Directory + * \ingroup persistence + * + * \brief Class representing a directory used by Vespa storage. + * + * IMPORTANT: Directory objects may be generated for faulty directories too, + * thus creating the object must not result in a disk operation. + */ +#pragma once + +#include <vespa/memfilepersistence/device/partition.h> + +namespace storage { + +namespace memfile { + +class Directory : public Device { + uint16_t _index; + std::string _path; + Partition::LP _partition; + + // Only DeviceManager can create these objects, so we only need + // to cope with these constructors being so similar there. + Directory(DeviceManager&, uint16_t index, const std::string& path); + Directory(const std::string& serialized, DeviceManager& manager); + + void addEvents(const Directory& d); + + friend class DeviceManager; + +public: + typedef vespalib::LinkedPtr<Directory> LP; + void setIndex(uint16_t index) { _index = index; } // Used when deserializing + + uint16_t getIndex() const { return _index; } + const std::string& getPath() const { return _path; } + Partition& getPartition() { return *_partition; } + const Partition& getPartition() const { return *_partition; } + + const IOEvent* getLastEvent() const; + virtual void addEvent(const IOEvent& e); + virtual void addEvent(Device::State s, + const std::string& description, + const std::string& location); + + State getState() const; + bool isOk() const { return (getLastEvent() == 0); } + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + bool operator==(const Directory& d) const { return (_path == d._path); } + bool operator!=(const Directory& d) const { return (_path != d._path); } + + // Easy access functions, using the partition monitor to query state of + // partition + + /** Query whether partition is full after adding given amount of data. */ + bool isFull(int64_t afterAdding = 0, double maxFillRate = -1) const { + return _partition->getMonitor() == 0 + || _partition->getMonitor()->isFull(afterAdding, maxFillRate); + } + +}; + +} // memfile + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp new file mode 100644 index 00000000000..4e207d326ed --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/disk.h> + +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/device/devicemanager.h> + +LOG_SETUP(".persistence.device.disk"); + +namespace storage { + +namespace memfile { + +Disk::Disk(DeviceManager& manager, uint64_t id) + : Device(manager), + _id(id) +{ +} + +void Disk::addEvent(const IOEvent& e) +{ + if (!e.isGlobal()) { + _events.push_back(e); + } + _manager.notifyDiskEvent(*this, e); +} + +const IOEvent* +Disk::getLastEvent() const +{ + if (getEvents().size() > 0) + return &getEvents().back(); + return 0; +} + +void +Disk::print(std::ostream& out, bool verbose, const std::string& indent) const +{ + out << "Disk id: " << _id << " "; + Device::print(out, verbose, indent); +} + +} // memfile + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/disk.h b/memfilepersistence/src/vespa/memfilepersistence/device/disk.h new file mode 100644 index 00000000000..77549a12470 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/disk.h @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::Disk + * \ingroup persistence + * + * \brief Class representing a storage unit on a node. + * + * Class representing a storage unit on a node, which can be a physical disk, or + * a device set up by a RAID controller or similar. + * + * IMPORTANT: Disk objects may be generated for faulty disks too, thus creating + * the object must not result in a disk operation. + */ + +#pragma once + +#include <vespa/memfilepersistence/device/device.h> +#include <vespa/vespalib/util/linkedptr.h> + +namespace storage { + +namespace memfile { + +class Disk : public Device { + uint64_t _id; + + Disk(DeviceManager&, uint64_t id); + + friend class DeviceManager; + +public: + typedef vespalib::LinkedPtr<Disk> LP; + + uint64_t getId() const { return _id; } + + virtual void addEvent(const IOEvent& e); + const IOEvent* getLastEvent() const; + + bool operator==(const Disk& disk) const { return (_id == disk._id); } + bool operator!=(const Disk& disk) const { return (_id != disk._id); } + void print(std::ostream& out, bool verbose, + const std::string& indent) const; +}; + +} // memfile + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp new file mode 100644 index 00000000000..a85d66d1cbb --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/memfilepersistence/device/ioevent.h> +#include <vespa/memfilepersistence/device/device.h> +#include <cerrno> + +namespace storage { + +namespace memfile { + +IOEvent::IOEvent() + : _state(Device::OK), + _description(), + _location(), + _global(false), + _timestamp(0) +{ +} + +namespace { + vespalib::string stripBacktrace(const vespalib::string& s) { + vespalib::string::size_type pos = s.find("Backtrace:"); + if (pos == vespalib::string::npos) return s; + while (pos > 0 && (s[pos - 1] == ' ' || s[pos - 1] == '\n')) { + --pos; + } + return s.substr(0, pos); + } +} + +IOEvent::IOEvent(uint32_t timestamp, Device::State s, const vespalib::string& description, + const vespalib::string& location, bool global) + : _state(s), + _description(stripBacktrace(description)), + _location(location), + _global(global), + _timestamp(timestamp) +{ +} + +IOEvent +IOEvent::createEventFromErrno(uint32_t timestamp, + int error, const vespalib::string& extraInfo, + const vespalib::string& location) +{ + vespalib::string err(vespalib::getErrorString(error)); + err += ": " + extraInfo; + switch (error) { + case ENOENT: + return IOEvent(timestamp, Device::NOT_FOUND, err, location); + case ENOTDIR: + case ENAMETOOLONG: + case ELOOP: + case EISDIR: // Using directory as file + case EOPNOTSUPP: // Operation not supported by filesystem + case EROFS: + case EMLINK: + case ENXIO: + case ESPIPE: // Descriptor is a pip/socket/fifo + return IOEvent(timestamp, Device::PATH_FAILURE, err, location); + case EACCES: + return IOEvent(timestamp, Device::NO_PERMISSION, err, location); + case EIO: // IO error occured. + case EINTR: // Read from slow device interrupted before any data. + return IOEvent(timestamp, Device::IO_FAILURE, err, location); + case EMFILE: + return IOEvent(timestamp, Device::TOO_MANY_OPEN_FILES, err, + location, true); + case EAGAIN: // Non-blocking read but no data available + case EBADF: // Invalid file descriptor + case EFAULT: // Buffer pointer invalid + case EINVAL: // Faulty input parameter + case ENFILE: + default: + return IOEvent(timestamp, Device::INTERNAL_FAILURE, err, location); + } +} + +IOEvent +IOEvent::createEventFromIoException(vespalib::IoException& e, uint32_t timestamp) +{ + Device::State type = Device::INTERNAL_FAILURE; + switch (e.getType()) { + case vespalib::IoException::NOT_FOUND: + type = Device::NOT_FOUND; break; + case vespalib::IoException::ILLEGAL_PATH: + type = Device::PATH_FAILURE; break; + case vespalib::IoException::NO_PERMISSION: + type = Device::NO_PERMISSION; break; + case vespalib::IoException::DISK_PROBLEM: + type = Device::IO_FAILURE; break; + case vespalib::IoException::TOO_MANY_OPEN_FILES: + type = Device::TOO_MANY_OPEN_FILES; break; + case vespalib::IoException::INTERNAL_FAILURE: + case vespalib::IoException::NO_SPACE: + case vespalib::IoException::CORRUPT_DATA: + case vespalib::IoException::DIRECTORY_HAVE_CONTENT: + case vespalib::IoException::FILE_FULL: + case vespalib::IoException::ALREADY_EXISTS: + case vespalib::IoException::UNSPECIFIED: + type = Device::INTERNAL_FAILURE; break; + } + return IOEvent(timestamp, type, e.getMessage(), e.getLocation()); +} + +void +IOEvent::print(std::ostream & os, bool verbose, const std::string& indent) const +{ + (void) indent; + os << "IOEvent("; + os << Device::getStateString(_state); + if (verbose) { + if (_description.size() > 0) { + os << ", " << _description; + } + if (_location.size() > 0) { + os << ", " << _location; + } + os << ", time " << _timestamp; + } + os << ")"; +} + +} // memfile + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h new file mode 100644 index 00000000000..d30026c9f8c --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::IOEvent + * \ingroup persistence + * + * \brief Class representing an IO event. An event tied to a device. + */ +#pragma once + +#include <vespa/memfilepersistence/device/device.h> +#include <vespa/vespalib/util/exceptions.h> + +namespace storage { + +namespace memfile { + +class IOEvent : public vespalib::Printable { +public: + IOEvent(); + + IOEvent(uint32_t timestamp, + Device::State s, + const vespalib::string & description, + const vespalib::string & location, + bool global = false); + + static IOEvent createEventFromErrno(uint32_t timestamp, + int error, + const vespalib::string& extraInfo = "", + const vespalib::string& location = ""); + static IOEvent createEventFromIoException(vespalib::IoException& e, + uint32_t timestamp); + + Device::State getState() const { return _state; } + const vespalib::string& getDescription() const { return _description; } + + void print(std::ostream& out, bool verbose, + const std::string& indent) const override; + + /** + * Global events aren't tied to device they was found in. They should not + * be saved on each device or be a reason to disable one. + */ + bool isGlobal() const { return _global; } + + uint32_t getTimestamp() const { return _timestamp; } + +private: + Device::State _state; + vespalib::string _description; + vespalib::string _location; + bool _global; + uint32_t _timestamp; +}; + +class Directory; +class Partition; +class Disk; + +/** + * \class storage::IOEventListener + * \ingroup persistence + * + * \brief Interface to implement if you want IO events. Register at manager. + */ +struct IOEventListener { + virtual void handleDirectoryEvent(Directory& dir, const IOEvent& e) = 0; + virtual void handlePartitionEvent(Partition& part, const IOEvent& e) = 0; + virtual void handleDiskEvent(Disk& disk, const IOEvent& e) = 0; + + virtual ~IOEventListener() {} +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp new file mode 100644 index 00000000000..0f5dbb288f1 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp @@ -0,0 +1,651 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/mountpointlist.h> + +#include <vespa/config/helper/configfetcher.h> +#include <vespa/vespalib/util/guard.h> +#include <vespa/vespalib/text/stringtokenizer.h> +#include <errno.h> +#include <fstream> +#include <vespa/log/log.h> +#include <vespa/persistence/spi/exceptions.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <vespa/memfilepersistence/common/exceptions.h> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/vdslib/state/nodestate.h> +#include <vespa/vespalib/util/xmlserializable.h> + +LOG_SETUP(".persistence.mountpointlist"); + +namespace storage { + +namespace memfile { + +using vespalib::getLastErrorString; +using vespalib::DirPointer; + +MountPointList::MountPointList(const std::string& vdsRoot, + const std::vector<vespalib::string>& diskPath, + DeviceManager::LP manager) + : framework::XmlStatusReporter("mountpointlist", "Disk directories"), + _deviceManager(manager), + _vdsRoot(vdsRoot), + _diskPath(diskPath), + _mountPoints(0) +{ +} + +spi::PartitionStateList +MountPointList::getPartitionStates() const +{ + spi::PartitionStateList list(_mountPoints.size()); + for (uint32_t i=0; i<_mountPoints.size(); ++i) { + if (!(_mountPoints[i]->isOk())) { + const IOEvent* event = _mountPoints[i]->getLastEvent(); + + list[i] = spi::PartitionState(spi::PartitionState::DOWN, + event->getDescription()); + } + } + + return list; +} + +void +MountPointList::init(uint16_t diskCount) +{ + initDisks(); + scanForDisks(); + readFromFile(); + if (verifyHealthyDisks(diskCount == 0 ? -1 : diskCount)) { + // Initialize monitors after having initialized disks, such as to + // not create them for invalid disks. + initializePartitionMonitors(); + } + if (diskCount != 0 && _mountPoints.size() != diskCount) { + std::ostringstream ost; + ost << _mountPoints.size() + << " mount points found. Expected " << diskCount + << " mount points to exist."; + LOG(error, "%s", ost.str().c_str()); + throw config::InvalidConfigException(ost.str(), VESPA_STRLOC); + } +} + +void +MountPointList::initDisks() +{ + if (_diskPath.empty()) return; + + using vespalib::make_string; + + vespalib::string vdsDisksPath = make_string("%s/disks", _vdsRoot.c_str()); + vespalib::mkdir(vdsDisksPath); + + for (size_t diskIndex = 0; diskIndex < _diskPath.size(); ++diskIndex) { + auto disk_path = make_string( + "%s/d%zu", vdsDisksPath.c_str(), diskIndex); + if (pathExists(disk_path)) continue; + + vespalib::mkdir(_diskPath[diskIndex]); + + try { + vespalib::symlink(_diskPath[diskIndex], disk_path); + } catch (vespalib::IoException& dummy) { + // The above mkdir() created disk_path as a directory, or a + // subdirectory of disk_path, which is OK. + (void) dummy; + } + } +} + +void +MountPointList::initializePartitionMonitors() +{ + std::set<Partition*> seen; + for (uint32_t i=0; i<_mountPoints.size(); ++i) { + if (!(_mountPoints[i]->isOk())) continue; + Partition* part = &_mountPoints[i]->getPartition(); + std::set<Partition*>::const_iterator it(seen.find(part)); + if (it == seen.end()) { + part->initializeMonitor(); + seen.insert(part); + } + } +} + +void +MountPointList::scanForDisks() +{ + _mountPoints.clear(); + std::vector<Directory::LP> entries; + DirPointer dir(opendir((_vdsRoot + "/disks").c_str())); + struct dirent* entry; + if (dir) while ((entry = readdir(dir))) { + if (entry == 0) { + std::ostringstream ost; + ost << "Failed to read directory \"" << _vdsRoot << "/disks\", " + << "errno " << errno << ": " << getLastErrorString(); + throw vespalib::IoException(ost.str(), + vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC); + } + std::string name(reinterpret_cast<char*>(&entry->d_name)); + assert(name.size() > 0); + if (name[0] == '.') continue; + // To be a valid d<digit> name, size must be at least 2 + if (name.size() < 2 || name[0] != 'd') { + LOG(warning, "File %s in disks directory is faulty named for a " + "disk directory, ignoring it.", name.c_str()); + continue; + } + char* endp; + uint32_t diskNr = strtoul(name.c_str()+1, &endp, 10); + // If rest of name is not a number, ignore + if (*endp != '\0') { + LOG(warning, "File %s in disks directory is faulty named for a " + "disk directory, ignoring it.", name.c_str()); + continue; + } + // If number is out of range, ignore.. + if (diskNr >= 254) { + LOG(warning, "Ignoring disk directory %s, as max directories have " + "been set to 254.", name.c_str()); + continue; + } + + // Valid disk directory.. Add entry.. + if (entries.size() <= diskNr) { + entries.resize(diskNr + 1); + } + LOG(debug, "Found disk directory %u: %s", diskNr, name.c_str()); + entries[diskNr] = _deviceManager->getDirectory( + _vdsRoot + "/disks/" + name, diskNr); + + // We only care about directories (or symlinks). DT_UNKNOWN must be handled explicitly. + if (entry->d_type != DT_DIR && entry->d_type != DT_LNK && entry->d_type != DT_UNKNOWN) { + std::ostringstream ost; + ost << "File " << name << " in disks directory is not a directory."; + LOG(warning, "%s", ost.str().c_str()); + entries[diskNr]->addEvent(Device::PATH_FAILURE, + ost.str(), VESPA_STRLOC); + } + + // Not all filesystems support d_type. Have to stat if this equals DT_UNKNOWN. + if (entry->d_type == DT_UNKNOWN) { + struct stat st; + lstat(entries[diskNr]->getPath().c_str(), &st); + if (!S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) { + std::ostringstream ost; + ost << "File " << name << " in disks directory is not a directory."; + LOG(warning, "%s", ost.str().c_str()); + entries[diskNr]->addEvent(Device::PATH_FAILURE, + ost.str(), VESPA_STRLOC); + } + } + } else if (errno == ENOENT) { + std::ostringstream ost; + ost << "Disk directory \"" << _vdsRoot << "/disks\" not created. VDS " + << "needs this to know which disks to use. See vespa doc."; + throw NoDisksException(ost.str(), VESPA_STRLOC); + } else { + std::ostringstream ost; + ost << "Failed to open directory \"" << _vdsRoot << "/disks\", errno " + << errno << ": " << getLastErrorString(); + throw vespalib::IoException(ost.str(), + vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC); + } + // Assign found disks to the instance + _mountPoints.resize(entries.size()); + for (uint32_t i=0; i<_mountPoints.size(); ++i) { + if (!entries[i].get()) { + if (!_mountPoints[i].get() || + _mountPoints[i]->getState() == Device::OK) + { + std::ostringstream ost; + ost << _vdsRoot + "/disks/d" << i; + _mountPoints[i] = _deviceManager->getDirectory(ost.str(), i); + _mountPoints[i]->addEvent( + Device::NOT_FOUND, + "Disk not found during scanning of disks directory", + VESPA_STRLOC); + } + LOG(warning, "Disk %u was not found.", i); + } else if (!_mountPoints[i].get() || + _mountPoints[i]->getState() == Device::NOT_FOUND) + { + _mountPoints[i] = entries[i]; + } + } +} + +namespace { + /** + * Get the disk nr of the given mountpoint, + * or -1 if the mountpoint is illegal. + */ + int getDiskNr(const std::string& mountPoint) { + std::string::size_type pos1 = mountPoint.rfind('/'); + if (pos1 == std::string::npos || + pos1 + 2 >= mountPoint.size() || + mountPoint[pos1+1] != 'd') + { + return -1; + } + char* endp; + std::string digit(mountPoint.substr(pos1+2)); + const char* digitptr = digit.c_str(); + int diskNr = strtoul(digitptr, &endp, 10); + if (digitptr[0] == '\0' || *endp != '\0') return -1; + return diskNr; + } +} + +void +MountPointList::readFromFile() +{ + std::vector<Directory::LP> entries; + // Read entries from disk + std::ifstream is; + // Throw exception if failing to read file + is.exceptions(std::ifstream::badbit); + is.open(getDiskStatusFileName().c_str()); + std::string line("EOF"); + while (std::getline(is, line)) { + if (line == "EOF") { break; } + Directory::LP dir = _deviceManager->deserializeDirectory(line); + int diskNr = getDiskNr(dir->getPath()); + if (diskNr == -1) { + LOG(warning, "Found illegal disk entry '%s' in vds disk file %s.", + line.c_str(), getDiskStatusFileName().c_str()); + } else { + dir->setIndex(diskNr); + if (entries.size() <= static_cast<uint32_t>(diskNr)) { + entries.resize(diskNr + 1); + } + entries[diskNr] = dir; + } + } + if (line != "EOF" || std::getline(is, line)) { + LOG(warning, "Disk status file %s did not end in EOF.", + getDiskStatusFileName().c_str()); + } + // Assign entries to this instance + if (_mountPoints.size() < entries.size()) { + _mountPoints.resize(entries.size()); + } + for (uint32_t i=0; i<entries.size(); ++i) { + if (entries[i].get() && + entries[i]->getState() != Device::OK && + entries[i]->getState() != Device::NOT_FOUND) + { + _mountPoints[i] = entries[i]; + } + } +} + +void +MountPointList::writeToFile() const +{ + try{ + std::string filename(getDiskStatusFileName()); + std::string tmpFilename(filename + ".tmp"); + std::ofstream os(tmpFilename.c_str()); + if (os.fail()) { + LOG(warning, "Failed to open %s.tmp for writing. Not writing " + "disks.status file.", filename.c_str()); + return; + } + for (std::vector<Directory::LP>::const_iterator it + = _mountPoints.begin(); it != _mountPoints.end(); ++it) + { + if (it->get() && + (*it)->getState() != Device::OK) + { + os << **it << "\n"; + } + } + os << "EOF"; + os.close(); + if (os.fail()) { + LOG(warning, "Failed to write %s.tmp. disks.status file might now " + "be corrupt as we failed while writing it.", + filename.c_str()); + return; + } + vespalib::rename(tmpFilename, filename, false, false); + LOG(debug, "Mount point list saved to file %s.", filename.c_str()); + } catch (std::exception& e) { + LOG(warning, "Failed to write disk status file: %s", e.what()); + } +} + +namespace { + void testMountPoint(Directory& mountPoint) { + struct stat filestats; + if (stat(mountPoint.getPath().c_str(), &filestats) != 0) { + switch (errno) { + case ENOTDIR: + case ENAMETOOLONG: + case ENOENT: + case EACCES: + case ELOOP: + { + mountPoint.addEvent(Device::PATH_FAILURE, + getLastErrorString(), + VESPA_STRLOC); + return; + } + case EIO: + { + mountPoint.addEvent(Device::IO_FAILURE, + getLastErrorString(), VESPA_STRLOC); + return; + } + case EFAULT: + default: + assert(0); // Should never happen + } + } + // At this point we know the mount point exists.. + if (!(S_ISDIR(filestats.st_mode))) { + mountPoint.addEvent( + Device::PATH_FAILURE, + "The path exist, but is not a directory.", + VESPA_STRLOC); + } + } + + struct Chunk { + uint32_t nr; + uint32_t total; + + Chunk() : nr(0), total(0) {} // Invalid + bool valid() const { return (nr < total); } + }; + + Chunk getChunkDef(const std::string& mountPoint) { + vespalib::File file(mountPoint + "/chunkinfo"); + file.open(vespalib::File::READONLY); + std::string buffer; + buffer.resize(200, '\0'); + size_t read(file.read(&buffer[0], buffer.size(), 0)); + buffer.resize(read); + vespalib::StringTokenizer tokenizer(buffer, "\n", ""); + + Chunk chunk; + if (tokenizer.size() < 3) { + return chunk; + } + + char *c; + chunk.nr = strtoul(tokenizer[1].c_str(), &c, 10); + if (tokenizer[1].c_str() + tokenizer[1].size() != c) return Chunk(); + chunk.total = strtoul(tokenizer[2].c_str(), &c, 10); + if (tokenizer[2].c_str() + tokenizer[2].size() != c) return Chunk(); + return chunk; + } + + void writeChunkDef(Chunk c, const std::string& mountPoint) { + vespalib::File file(mountPoint + "/chunkinfo"); + file.open(vespalib::File::CREATE | vespalib::File::TRUNC, true); + std::ostringstream ost; + ost << "# This file tells VDS what data this mountpoint may contain.\n" + << c.nr << "\n" + << c.total << "\n"; + std::string content(ost.str()); + file.write(&content[0], content.size(), 0); + } + + Device::State getDeviceState(vespalib::IoException::Type type) { + using vespalib::IoException; + switch (type) { + case IoException::ILLEGAL_PATH: return Device::PATH_FAILURE; + case IoException::NO_PERMISSION: return Device::NO_PERMISSION; + case IoException::DISK_PROBLEM: return Device::IO_FAILURE; + case IoException::INTERNAL_FAILURE: return Device::INTERNAL_FAILURE; + default: ; + } + return Device::OK; + } + + bool emptyDir(Directory& dir) { + const std::string& path(dir.getPath()); + errno = 0; + DirPointer dirdesc(opendir(path.c_str())); + struct dirent* entry; + if (dirdesc) while ((entry = readdir(dirdesc))) { + if (errno) break; + std::string name(reinterpret_cast<char*>(&entry->d_name)); + if (name == "." || name == "..") continue; + return false; + } + if (dirdesc == 0 || errno) { + std::ostringstream ost; + ost << "Failed to read directory \"" << path << "\", " + << "errno " << errno << ": " << getLastErrorString(); + dir.addEvent(getDeviceState(vespalib::IoException::getErrorType(errno)), + ost.str(), + VESPA_STRLOC); + throw vespalib::IoException(ost.str(), + vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC); + } + return true; + } + + struct WriteStatusFileIfFailing { + MountPointList& _list; + bool _failed; + + WriteStatusFileIfFailing(MountPointList& list) + : _list(list), _failed(false) {} + ~WriteStatusFileIfFailing() { + if (_failed) _list.writeToFile(); + } + + void reportFailure() { _failed = true; } + }; +} + +bool +MountPointList::verifyHealthyDisks(int mountPointCount) +{ + WriteStatusFileIfFailing statusWriter(*this); + int usable = 0, empty = 0; + std::map<uint32_t, Directory::LP> lackingChunkDef; + // Test disks and get chunkinfo + for (uint32_t i=0, n=_mountPoints.size(); i<n; ++i) { + Directory::LP dir(_mountPoints[i]); + // Insert NOT_FOUND disk if not found, such that operator[] + // can return only valid pointers + if (!dir.get()) { + std::ostringstream ost; + ost << _vdsRoot + "/disks/d" << i; + dir = _deviceManager->getDirectory(ost.str(), i); + dir->addEvent(Device::NOT_FOUND, + "Disk not found during scanning of disks directory", + VESPA_STRLOC); + _mountPoints[i] = dir; + statusWriter.reportFailure(); + } + if (dir->isOk()) { + testMountPoint(*dir); + if (!dir->isOk()) statusWriter.reportFailure(); + } + // Don't touch unhealthy or non-existing disks. + if (!dir->isOk()) { + std::ostringstream ost; + ost << "Not using disk " << i << " marked bad: "; + dir->getLastEvent()->print(ost, true, " "); + LOG(warning, "%s", ost.str().c_str()); + continue; + } + + // Read chunkinfo + using vespalib::IoException; + Chunk chunk; + try{ + chunk = getChunkDef(dir->getPath()); + } catch (IoException& e) { + chunk = Chunk(); + if (e.getType() == IoException::NOT_FOUND) { + if (!emptyDir(*dir)) { + dir->addEvent(Device::INTERNAL_FAILURE, + "Foreign data in mountpoint. New " + "mountpoints added should be empty.", ""); + } + } else { + LOG(warning, "Failed to read chunkinfo file from mountpoint %s", + dir->getPath().c_str()); + Device::State newState(getDeviceState(e.getType())); + if (newState != Device::OK) { + dir->addEvent(newState, e.what(), VESPA_STRLOC); + } + } + } catch (std::exception& e) { + LOG(warning, "Failed to read chunkinfo file from mountpoint %s", + dir->getPath().c_str()); + dir->addEvent(Device::INTERNAL_FAILURE, e.what(), VESPA_STRLOC); + } + + // If disk was found unusable, don't use it. + if (!dir->isOk()) { + LOG(warning, "Unusable disk %d: %s", + i, dir->getLastEvent()->toString(true).c_str()); + statusWriter.reportFailure(); + continue; + } + ++usable; + // Ensure disk fits in with the already detected ones. + if (!chunk.valid()) { + ++empty; + lackingChunkDef[i] = dir; + } else if (chunk.nr != i) { + std::ostringstream ost; + ost << "Disk " << dir->getPath() << " thinks it's disk " << chunk.nr + << " (instead of " << i << ")."; + LOG(error, "%s", ost.str().c_str()); + throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); + } else if (mountPointCount == -1) { + mountPointCount = chunk.total; + } else if (static_cast<uint32_t>(mountPointCount) != chunk.total) { + std::ostringstream ost; + ost << "Disk " << dir->getPath() << " thinks it's disk " << chunk.nr + << " of " << chunk.total << " (instead of " << i << " of " + << mountPointCount << ")."; + LOG(error, "%s", ost.str().c_str()); + throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); + } + } + if (empty == usable && usable != mountPointCount && mountPointCount != -1) { + std::ostringstream ost; + ost << "Found " << usable << " disks and config says we're " + << "supposed to have " << mountPointCount << ". Not initializing " + << "disks."; + throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); + } + bool retval = true; + // Handle case where no chunkinfo file present (none/unusable/new disks) + if (mountPointCount == -1) { + if (_mountPoints.size() == 0) { + LOG(error, "No disks configured for storage node. Disk " + "directories/symlinks for this node should be created " + "in %s/disks/. Please refer to VDS documentation to " + "learn how to add disks", _vdsRoot.c_str()); + throw spi::HandledException("No disks configured", VESPA_STRLOC); + } else if (usable == 0) { + LOG(error, "All of the configured disks are unusable. " + "Please refer to previous warnings and the VDS " + "documentation for troubleshooting"); + throw spi::HandledException("All disks unusable", VESPA_STRLOC); + } else { + mountPointCount = _mountPoints.size(); + LOG(info, "All disks empty. Setting up node to run with the %u " + "found disks.", mountPointCount); + retval = false; + } + } + // Write chunkdef files where these are missing + for (std::map<uint32_t, Directory::LP>::const_iterator it + = lackingChunkDef.begin(); it != lackingChunkDef.end(); ++it) + { + const Directory::LP& dir = it->second; + Chunk c; + c.nr = it->first; + c.total = mountPointCount; + if (c.nr >= c.total) { + LOG(warning, "Can't use disk %u of %u as the index is too high. " + "(Disks are indexed from zero)", c.nr, c.total); + continue; + } + if (!emptyDir(*dir)) { + LOG(warning, "Not creating chunkinfo file on disk %u as it already " + "contains data. If you want to include the disk, " + "create chunkinfo file manually.", c.nr); + assert(!dir->isOk()); + continue; + } + using vespalib::IoException; + try{ + writeChunkDef(c, dir->getPath()); + retval = true; + } catch (IoException& e) { + statusWriter.reportFailure(); + LOG(warning, "Failed to write chunkinfo file to mountpoint %s.", + dir->getPath().c_str()); + Device::State newState(getDeviceState(e.getType())); + if (newState != Device::OK) { + dir->addEvent(newState, e.what(), VESPA_STRLOC); + } + } catch (std::exception& e) { + statusWriter.reportFailure(); + LOG(warning, "Failed to write chunkinfo file to mountpoint %s", + dir->getPath().c_str()); + dir->addEvent(Device::INTERNAL_FAILURE, e.what(), VESPA_STRLOC); + } + } + // If we need more entries in mountpointlist, due to chunkinfo + // showing more indexes, add them. + for (int i = _mountPoints.size(); i < mountPointCount; ++i) { + std::ostringstream ost; + ost << _vdsRoot + "/disks/d" << i; + Directory::LP dir(_deviceManager->getDirectory(ost.str(), i)); + dir->addEvent(Device::NOT_FOUND, + "Disk not found during scanning of disks directory", + VESPA_STRLOC); + _mountPoints.push_back(dir); + } + if (static_cast<int>(_mountPoints.size()) > mountPointCount) { + _mountPoints.resize(mountPointCount); + } + return retval; +} + +uint16_t +MountPointList::findIndex(const Directory& dir) const +{ + for (uint16_t i = 0; i < _mountPoints.size(); ++i) { + if (_mountPoints[i].get() != 0 && dir == *_mountPoints[i]) return i; + } + throw vespalib::IllegalArgumentException( + "Could not find directory " + dir.toString(), VESPA_STRLOC); +} + +std::string +MountPointList::getDiskStatusFileName() const +{ + return _vdsRoot + "/disks.status"; +} + +vespalib::string +MountPointList::reportXmlStatus(vespalib::xml::XmlOutputStream& xos, + const framework::HttpUrlPath&) const +{ + xos << *_deviceManager; + return ""; +} + +} + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h new file mode 100644 index 00000000000..33a9574682a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::MountPointList + * \ingroup persistence + * + * \brief Class holding information about the mount points used by storage + * + * We need to keep a list of mount points, to read and write the mount point + * file, and to access what mount points should be used and not. + * + * NOTE: A mountpoint is often referred to as a disk, even though you technicly + * can have multiple mountpoints per partition and multiple partitions per disk. + * + * IMPORTANT: Remember to call verifyHealthyDisks() before starting to use them. + */ + +#pragma once + +#include <vespa/vespalib/util/printable.h> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/memfilepersistence/device/directory.h> +#include <vespa/storageframework/storageframework.h> +#include <vector> +#include <vespa/vespalib/util/linkedptr.h> +#include <vespa/persistence/spi/persistenceprovider.h> + +namespace storage { +namespace lib { + class NodeState; +} + +namespace memfile { + +struct MountPointList : public framework::XmlStatusReporter { + typedef std::unique_ptr<MountPointList> UP; + + /** Create a mount point list. */ + MountPointList(const std::string& vdsRoot, + const std::vector<vespalib::string>& diskPath, + vespalib::LinkedPtr<DeviceManager>); + + DeviceManager& getDeviceManager() { return *_deviceManager; } + + /** + * Call init to initialize the mount point list in the regular fashion. + * @param diskCount Number of disks to find, or 0 to auto-detect. + * @return The number of usable disks found. + */ + void init(uint16_t diskCount); + + /** + * Initialize the disks, see description of diskPath config in + * stor-devices. Will be called as part of init(). + */ + void initDisks(); + + /** + * Scan disks directory for disks. Add entries found, which does not exist, + * or are marked NOT_FOUND to this instance. + * + * To prevent reading from possible bad disks, we cannot access the disks + * themselves. Thus, in case of symlinks, it assumes the symlink is to a + * directory. + */ + void scanForDisks(); + + /** + * Read the disk status file and adjust the list. + * Important that any entry marking a disk bad (except for NOT_FOUND if it + * should be in the file) overrides any disks marked ok in this instance. + * + * Similarily to scanForDisks(), this does not access the disks itself. + */ + void readFromFile(); + + /** + * Initialize the partition monitors within the partitions. Done after + * partition creation, as partition objects are generated for bad disks. + */ + void initializePartitionMonitors(); + + /** + * Write the current state of disks to the disk status file. + * Disks that are OK or NOT_FOUND does not need to be written to file. + */ + void writeToFile() const; + + /** + * Go through all the mountpoints marked ok, and check that they work. + * <ul> + * <li> Verify that symlinks point to a directory, not a file. + * <li> Read disk chunk files, stating mountpoint is number A/N. + * <li> Write disk chunk files on mountpoints missing these. + * + * IMPORTANT: This must be called before starting to use the disks. + * getSize() may not return correct size before this has been called. + * + * @return True if there are at least one mountpoint appearing healthy. + * @throws document::IllegalStateException If the mountpoint chunk files + * disagree on how many mountpoints there are. + */ + bool verifyHealthyDisks(int mountPointCount); + + /** Get how many mountpoints exist. */ + uint32_t getSize() const { return _mountPoints.size(); } + + /** Get the given mountpoint. */ + Directory& operator[](uint16_t i) + { assert(_mountPoints.size() > i); return *_mountPoints[i]; } + const Directory& operator[](uint16_t i) const + { assert(_mountPoints.size() > i); return *_mountPoints[i]; } + + uint16_t findIndex(const Directory& dir) const; + + // XmlStatusReporter implementation + vespalib::string reportXmlStatus(vespalib::xml::XmlOutputStream&, + const framework::HttpUrlPath&) const; + + /** + * Returns the current state of the mountpoints. + */ + spi::PartitionStateList getPartitionStates() const; + +private: + vespalib::LinkedPtr<DeviceManager> _deviceManager; + std::string _vdsRoot; + std::vector<vespalib::string> _diskPath; + std::vector<Directory::LP> _mountPoints; + + /** Get the name used for the disk status file. */ + std::string getDiskStatusFileName() const; +}; + +} // memfile + +} // storage + + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp new file mode 100644 index 00000000000..2829ae8a212 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/partition.h> + +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/vespalib/util/exceptions.h> + +LOG_SETUP(".persistence.device.partition"); + +namespace storage { + +namespace memfile { + +Partition::Partition(DeviceManager& manager, + uint64_t id, + const std::string& mountPoint) + : Device(manager), + _id(id), + _mountPoint(mountPoint), + _disk(manager.getDisk(mountPoint)), + _monitor() +{ + assert(_disk.get()); +} + +void Partition::initializeMonitor() +{ + try{ + _monitor.reset(new PartitionMonitor(_mountPoint)); + _monitor->setPolicy(_manager.getStatPolicy(), _manager.getStatPeriod()); + } catch (vespalib::IoException& e) { + std::ostringstream error; + error << "Failed to create partition monitor for partition " + << _mountPoint << ": " << e.getMessage(); + LOG(warning, "%s", error.str().c_str()); + addEvent(IOEvent(_manager.getClock().getTimeInSeconds().getTime(), + Device::IO_FAILURE, error.str(), VESPA_STRLOC)); + } +} + +void Partition::addEvent(const IOEvent& e) +{ + // No events yet defined that is partition specific + _disk->addEvent(e); +} + +const IOEvent* +Partition::getLastEvent() const +{ + if (!_events.empty()) return &_events.back(); + return _disk->getLastEvent(); +} + +void +Partition::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + out << "Partition: " << _id << " " << _mountPoint << " "; + Device::print(out, verbose, indent); +} + +} // memfile + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partition.h b/memfilepersistence/src/vespa/memfilepersistence/device/partition.h new file mode 100644 index 00000000000..eeedafb7a49 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/partition.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::Partition + * \ingroup persistence + * + * \brief Class representing a disk partition. + * + * IMPORTANT: Partition objects may be generated for faulty partitions too, + * thus creating the object must not result in a disk operation. + */ + +#pragma once + +#include <vespa/memfilepersistence/device/disk.h> +#include <vespa/memfilepersistence/device/partitionmonitor.h> + +namespace storage { + +namespace memfile { + +class Partition : public Device { + uint64_t _id; + std::string _mountPoint; + Disk::LP _disk; + PartitionMonitor::LP _monitor; + + Partition(DeviceManager& manager, uint64_t id, + const std::string& mountPoint); + + friend class DeviceManager; + +public: + typedef vespalib::LinkedPtr<Partition> LP; + + void initializeMonitor(); + + uint64_t getId() const { return _id; } + const std::string& getMountPoint() const { return _mountPoint; } + + Disk& getDisk() { return *_disk; } + const Disk& getDisk() const { return *_disk; } + + PartitionMonitor* getMonitor() { return _monitor.get(); } + const PartitionMonitor* getMonitor() const { return _monitor.get(); } + + virtual void addEvent(const IOEvent& e); + const IOEvent* getLastEvent() const; + + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + bool operator==(const Partition& p) const { return (_id == p._id); } + bool operator!=(const Partition& p) const { return (_id != p._id); } + +}; + +} // memfile + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp new file mode 100644 index 00000000000..db1e61bc24e --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp @@ -0,0 +1,392 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/device/partitionmonitor.h> + +#include <vespa/log/log.h> +#include <vespa/vespalib/util/exceptions.h> + +LOG_SETUP(".persistence.device.partition.monitor"); + +namespace storage { + +namespace memfile { + +namespace { + + uint32_t getBlockSize(struct statvfs& info) { + // f_bsize have a strange name in man page, but as far as we've seen + // on actual file systems, it seems to correspond to block size. + return info.f_bsize; + } + + float calcRootOnlyRatio(struct statvfs& info) { + return (static_cast<uint64_t>(info.f_bfree) + - static_cast<uint64_t>(info.f_bavail)) + / info.f_blocks; + } + + struct RealStatter : public PartitionMonitor::Statter { + virtual void statFileSystem(const std::string& file, + struct statvfs& info) + { + if (statvfs(file.c_str(), &info) != 0) { + vespalib::asciistream ost; + ost << "Failed to run statvfs to find data on disk containing " + << "file " << file << ": errno(" << errno << ") - " + << vespalib::getLastErrorString() << "."; + throw vespalib::IoException( + ost.str(), vespalib::IoException::getErrorType(errno), + VESPA_STRLOC); + } + } + }; + +} + +uint64_t +PartitionMonitor::calcTotalSpace(struct statvfs& info) const { + // Ignore the part of the filesystem only root can write to. + uint64_t nonRootBlocksExisting( + static_cast<uint64_t>(info.f_blocks) + - static_cast<uint64_t>(info.f_bfree) + + static_cast<uint64_t>(info.f_bavail)); + return nonRootBlocksExisting * _blockSize; +} + +uint64_t +PartitionMonitor::calcUsedSpace(struct statvfs& info) const { + return (_partitionSize - info.f_bavail * _blockSize); +} + +float +PartitionMonitor::calcInodeFillRatio(struct statvfs& info) const { + uint64_t freeForRootOnly = info.f_ffree - info.f_favail; + uint64_t nonRootInodes = info.f_files - freeForRootOnly; + float freeInodesRatio = static_cast<float>(info.f_favail) / nonRootInodes; + return float(1.0) - freeInodesRatio; +} + +uint64_t +PartitionMonitor::calcDynamicPeriod() const +{ + uint32_t lastFillRate = (100 * _usedSpace / _partitionSize); + uint32_t maxFillRate = static_cast<uint32_t>(100 * _maxFillRate); + if (lastFillRate >= maxFillRate) { + return 1; + } else { + uint32_t fillDiff = (maxFillRate - lastFillRate); + return _period * fillDiff * fillDiff; + } +} + +PartitionMonitor::PartitionMonitor(const std::string& file) + : _fileOnPartition(file), + _fileSystemId(0), + _policy(STAT_PERIOD), + _blockSize(0), + _partitionSize(0), + _usedSpace(0), + _period(100), + _queriesSinceStat(0), + _maxFillRate(0.98), + _rootOnlyRatio(0), + _inodeFillRate(0), + _statter() +{ + setStatter(std::unique_ptr<Statter>(new RealStatter)); + LOG(debug, "%s: Monitor created with default setting of period at 100.", + _fileOnPartition.c_str()); +} + +void +PartitionMonitor::setPolicy(vespa::config::storage::StorDevicesConfig::StatfsPolicy policy, + uint32_t period) +{ + switch (policy) { + case vespa::config::storage::StorDevicesConfig::STAT_ALWAYS: + setAlwaysStatPolicy(); break; + case vespa::config::storage::StorDevicesConfig::STAT_ONCE: + setStatOncePolicy(); break; + case vespa::config::storage::StorDevicesConfig::STAT_PERIOD: + if (period == 0) { + setStatPeriodPolicy(); + } else { + setStatPeriodPolicy(period); + } + break; + case vespa::config::storage::StorDevicesConfig::STAT_DYNAMIC: + if (period == 0) { + setStatDynamicPolicy(); + } else { + setStatDynamicPolicy(period); + } + break; + } +} + +void +PartitionMonitor::setAlwaysStatPolicy() +{ + _policy = ALWAYS_STAT; + LOG(debug, "%s: Set stat policy to always stat.", _fileOnPartition.c_str()); +} + +void +PartitionMonitor::setStatOncePolicy() +{ + _policy = STAT_ONCE; + LOG(debug, "%s: Set stat policy to stat once.", _fileOnPartition.c_str()); +} + +void +PartitionMonitor::setStatPeriodPolicy(uint32_t period) +{ + _policy = STAT_PERIOD; + _period = period; + LOG(debug, "%s: Set stat policy to stat every %u attempt.", + _fileOnPartition.c_str(), _period); +} + +void +PartitionMonitor::setStatDynamicPolicy(uint32_t basePeriod) +{ + _policy = STAT_DYNAMIC; + _period = basePeriod; + LOG(debug, "%s: Set stat policy to stat dynamicly with base %u.", + _fileOnPartition.c_str(), _period); +} + +void +PartitionMonitor::setStatter(std::unique_ptr<Statter> statter) +{ + vespalib::LockGuard lock(_updateLock); + _statter = std::move(statter); + struct statvfs info; + _statter->statFileSystem(_fileOnPartition, info); + _blockSize = getBlockSize(info); + _partitionSize = calcTotalSpace(info); + // Calculations further down assumes total size can be held within + // a signed 64 bit. + assert(_partitionSize + < static_cast<uint64_t>(std::numeric_limits<int64_t>::max())); + _usedSpace = calcUsedSpace(info); + _rootOnlyRatio = calcRootOnlyRatio(info); + _inodeFillRate = calcInodeFillRatio(info); + _fileSystemId = info.f_fsid; + LOG(debug, "FileSystem(%s): Total size: %" PRIu64 ", used: %" PRIu64 + ", root only %f, max fill rate %f, fill rate %f.", + _fileOnPartition.c_str(), + _partitionSize, + _usedSpace, + _rootOnlyRatio, + _maxFillRate, + static_cast<double>(_usedSpace) / _partitionSize); +} + +void +PartitionMonitor::updateIfNeeded() const +{ + uint32_t period = 0; + switch (_policy) { + case STAT_ONCE: period = std::numeric_limits<uint32_t>::max(); break; + case ALWAYS_STAT: period = 1; break; + case STAT_PERIOD: period = _period; break; + case STAT_DYNAMIC: period = calcDynamicPeriod(); break; + } + if (++_queriesSinceStat >= period) { + struct statvfs info; + try{ + _statter->statFileSystem(_fileOnPartition, info); + _usedSpace = calcUsedSpace(info); + _inodeFillRate = calcInodeFillRatio(info); + _queriesSinceStat = 0; + } catch (vespalib::Exception& e) { + LOG(warning, "Failed to stat filesystem with file %s. Using " + "last stored used space of %" PRIu64 ".", + _fileOnPartition.c_str(), _usedSpace); + } + } +} +uint64_t +PartitionMonitor::getUsedSpace() const +{ + vespalib::LockGuard lock(_updateLock); + updateIfNeeded(); + return _usedSpace; +} + +float +PartitionMonitor::getFillRate(int64_t afterAdding) const +{ + vespalib::LockGuard lock(_updateLock); + updateIfNeeded(); + float fillRate; + if (static_cast<int64_t>(_usedSpace) + afterAdding + >= static_cast<int64_t>(_partitionSize)) + { + fillRate = 1; + } else if (static_cast<int64_t>(_usedSpace) + afterAdding < 0) { + fillRate = 0; + } else { + fillRate = (static_cast<double>(_usedSpace) + afterAdding) + / _partitionSize; + } + if (fillRate < _inodeFillRate) { + fillRate = _inodeFillRate; + LOG(spam, "Inode fill rate is now %f. %u requests since last stat.", + fillRate, _queriesSinceStat); + } else { + LOG(spam, "Fill rate is now %f. %u requests since last stat.", + fillRate, _queriesSinceStat); + } + return fillRate; +} + +void +PartitionMonitor::setMaxFillness(float maxFill) +{ + if (maxFill <= 0 || maxFill > 1.0) { + vespalib::asciistream ost; + ost << "Max fill rate must be in the range <0,1]. Value of " + << maxFill << " is not legal."; + throw vespalib::IllegalArgumentException(ost.str(), VESPA_STRLOC); + } + _maxFillRate = maxFill; +} + +void +PartitionMonitor::addingData(uint64_t dataSize) +{ + vespalib::LockGuard lock(_updateLock); + _usedSpace = std::max(_usedSpace, _usedSpace + dataSize); +} + +void +PartitionMonitor::removingData(uint64_t dataSize) +{ + vespalib::LockGuard lock(_updateLock); + _usedSpace = (_usedSpace > dataSize ? _usedSpace - dataSize : 0); +} + +uint64_t +PartitionMonitor::getPartitionId(const std::string& fileOnPartition) +{ + RealStatter realStatter; + struct statvfs info; + realStatter.statFileSystem(fileOnPartition, info); + return info.f_fsid; +} + +namespace { + void printSize(std::ostream& out, uint64_t size) { + std::string s; + if (size < 10 * 1024) { + s = "B"; + } else { + size = size / 1024; + if (size < 10 * 1024) { + s = "kB"; + } else { + size = size / 1024; + if (size < 10 * 1024) { + s = "MB"; + } else { + size = size / 1024; + if (size < 10 * 1024) { + s = "GB"; + } else { + size = size / 1024; + s = "TB"; + } + } + } + } + out << " (" << size << " " << s << ")"; + } +} + +void +PartitionMonitor::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + vespalib::LockGuard lock(_updateLock); + out << "PartitionMonitor(" << _fileOnPartition; + if (verbose) { + out << ") {" + << "\n" << indent << " Fill rate: " + << (100.0 * _usedSpace / _partitionSize) + << " %" + << "\n" << indent << " Inode fill rate: " << (100 * _inodeFillRate) + << " %" + << "\n" << indent << " Detected block size: " << _blockSize + << "\n" << indent << " File system id: " << _fileSystemId + << "\n" << indent << " Total size: " << _partitionSize; + printSize(out, _partitionSize); + out << "\n" << indent << " Used size: " << _usedSpace; + printSize(out, _usedSpace); + out << "\n" << indent << " Queries since last stat: " + << _queriesSinceStat + << "\n" << indent << " Monitor policy: "; + } else { + out << ", "; + } + switch (_policy) { + case STAT_ONCE: out << "STAT_ONCE"; break; + case ALWAYS_STAT: out << "ALWAYS_STAT"; break; + case STAT_PERIOD: out << "STAT_PERIOD(" << _period << ")"; break; + case STAT_DYNAMIC: out << "STAT_DYNAMIC(" << calcDynamicPeriod() << ")"; + break; + } + if (verbose) { + if (_policy == STAT_DYNAMIC) { + out << "\n" << indent << " Period at current fillrate " + << calcDynamicPeriod(); + } + out << "\n" << indent << " Root only ratio " << _rootOnlyRatio + << "\n" << indent << " Max fill rate " << (100 * _maxFillRate) + << " %" + << "\n" << indent << "}"; + } else { + bool inodesFill = false; + double fillRate = static_cast<double>(_usedSpace) / _partitionSize; + if (_inodeFillRate > fillRate) { + inodesFill = true; + fillRate = _inodeFillRate; + } + + out << ", " << _usedSpace << "/" << _partitionSize << " used - " + << (100 * fillRate) << " % full" << (inodesFill ? " (inodes)" : "") + << ")"; + } +} + +void +PartitionMonitor::printXml(vespalib::XmlOutputStream& xos) const +{ + using namespace vespalib::xml; + xos << XmlTag("partitionmonitor") + << XmlContent(toString(true)) + << XmlEndTag(); +} + +void +PartitionMonitor::overrideRealStat(uint32_t blockSize, uint32_t totalBlocks, + uint32_t blocksUsed, float inodeFillRate) +{ + vespalib::LockGuard lock(_updateLock); + if (_policy != STAT_ONCE) { + throw vespalib::IllegalStateException( + "Makes no sense to override real stat if policy isnt set to " + "STAT_ONCE. Values will just be set back to real values again.", + VESPA_STRLOC); + } + _blockSize = blockSize; + _partitionSize = totalBlocks * blockSize; + _usedSpace = blocksUsed * blockSize; + _inodeFillRate = inodeFillRate; +} + +} + +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h new file mode 100644 index 00000000000..401a070389e --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::PartitionMonitor + * \ingroup persistence + * + * \brief Monitors how full a file system is. + * + * This class is used by the persistence layer to monitor how full a disk is. + * It remembers how full the disk is, and can also take hints, such that it + * can give reasonable correct answers cheaply. + */ +#pragma once + +#include <vespa/vespalib/util/printable.h> +#include <sys/statvfs.h> +#include <vespa/config-stor-devices.h> +#include <vespa/vespalib/util/linkedptr.h> +#include <vespa/vespalib/util/sync.h> +#include <vespa/vespalib/util/xmlserializable.h> + +namespace storage { + +namespace memfile { + +class PartitionMonitorTest; + +class PartitionMonitor : public vespalib::Printable, + public vespalib::XmlSerializable +{ +public: + typedef vespalib::LinkedPtr<PartitionMonitor> LP; + + /** + * Use an object to stat through, such that unit tests can fake stat + * responses. + */ + struct Statter { + virtual ~Statter() {} + virtual void statFileSystem(const std::string& file, + struct statvfs& info) = 0; + }; + +private: + enum MonitorPolicy { ALWAYS_STAT, STAT_ONCE, STAT_PERIOD, STAT_DYNAMIC }; + + vespalib::Lock _updateLock; + std::string _fileOnPartition; + uint64_t _fileSystemId; + MonitorPolicy _policy; + uint32_t _blockSize; + uint64_t _partitionSize; + mutable uint64_t _usedSpace; + uint32_t _period; + mutable uint32_t _queriesSinceStat; + float _maxFillRate; + float _rootOnlyRatio; + mutable float _inodeFillRate; + std::unique_ptr<Statter> _statter; + + void setStatter(std::unique_ptr<Statter> statter); + uint64_t calcTotalSpace(struct statvfs& info) const; + uint64_t calcUsedSpace(struct statvfs& info) const; + uint64_t calcDynamicPeriod() const; + float calcInodeFillRatio(struct statvfs& info) const; + + friend class PartitionMonitorTest; + +public: + /** Default policy is STAT_PERIOD(100). Default max fill rate 0.98. */ + PartitionMonitor(const std::string& fileOnFileSystem); + + /** Set monitor policy from config. */ + void setPolicy(vespa::config::storage::StorDevicesConfig::StatfsPolicy, uint32_t period); + + /** Always stat on getFillRate() requests. */ + void setAlwaysStatPolicy(); + /** + * Stat only once, then depend on addingData/removingData hints to provide + * correct answers. + */ + void setStatOncePolicy(); + /** + * Run stat each period getFillRate() request. Depend on hints to keep value + * sane within a period. + */ + void setStatPeriodPolicy(uint32_t period = 100); + /** + * Run stat often when close to full, but seldom when there is lots of free + * space. In current algorithm, we will check each percentage diff from full + * multiplied itself times the baseperiod request. + */ + void setStatDynamicPolicy(uint32_t basePeriod = 10); + + /** Get the file system id of this instance. */ + uint64_t getFileSystemId() const { return _fileSystemId; } + + float getRootOnlyRatio() const { return _rootOnlyRatio; } + + uint64_t getPartitionSize() const { return _partitionSize; } + + uint64_t getUsedSpace() const; + + /** + * Get the fill rate of the file system. Where 0 is empty and 1 is 100% + * full. + */ + float getFillRate(int64_t afterAdding = 0) const; + + /** Set the limit where the file system is considered full. (0-1) */ + void setMaxFillness(float maxFill); + + /** Query whether disk fill rate is high enough to be considered full. */ + bool isFull(int64_t afterAdding = 0, double maxFillRate = -1) const + { + if (maxFillRate == -1) { + maxFillRate = _maxFillRate; + } + return (getFillRate(afterAdding) >= maxFillRate); + } + + /** + * To keep the monitor more up to date without having to do additional stat + * commands, give clues when you add or remove data from the file system. + */ + void addingData(uint64_t dataSize); + + /** + * To keep the monitor more up to date without having to do additional stat + * commands, give clues when you add or remove data from the file system. + */ + void removingData(uint64_t dataSize); + + virtual void print(std::ostream& out, bool verbose, + const std::string& indent) const; + + /** + * Calculate the file system id for a given file. Used when wanting an + * instance for a new file, but you're unsure whether you already have a + * tracker for that file system. + */ + static uint64_t getPartitionId(const std::string& fileOnPartition); + + /** Used in unit testing only. */ + void overrideRealStat(uint32_t blockSize, uint32_t totalBlocks, + uint32_t blocksUsed, float inodeFillRate = 0.1); + + virtual void printXml(vespalib::XmlOutputStream&) const; + +private: + void updateIfNeeded() const; + +}; + +} // memfile + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt new file mode 100644 index 00000000000..8bf8eb37b04 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_init OBJECT + SOURCES + filescanner.cpp + DEPENDS +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp new file mode 100644 index 00000000000..74708bb36d8 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp @@ -0,0 +1,240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/init/filescanner.h> + +#include <vespa/document/bucket/bucketid.h> +#include <iomanip> +#include <vespa/log/log.h> +#include <vespa/vespalib/io/fileutil.h> + +LOG_SETUP(".persistence.memfile.filescanner"); + +namespace storage { +namespace memfile { + +FileScanner::Metrics::Metrics(framework::Clock& clock) + : metrics::MetricSet("dbinit.filescan", "", + "Metrics for the memfile filescanner"), + _alienFiles(), + _alienFileCounter("alienfiles", "", + "Unknown files found during disk scanning.", this), + _temporaryFilesDeleted("tempfilesdeleted", "", + "Temporary files found and deleted during initialization.", this), + _multipleBucketsSameDisk("multiplebucketssamedisk", "", + "Multiple buckets found on same disk.", this), + _wrongDir("wrongdir", "", + "Number of buckets moved from wrong to right directory.", this), + _wrongDisk("wrongdisk", "", + "Number of buckets found on non-ideal disk.", this), + _dirsListed("dirslisted", "", + "Directories listed in list step of initialization.", this), + _startTime(clock), + _listLatency("listlatency", "", + "Time used until list phase is done. (in ms)", this) +{ +} + +FileScanner::FileScanner(framework::ComponentRegister& reg, + const MountPointList& mountPoints, + uint32_t directoryLevels, + uint32_t directorySpread) + : framework::Component(reg, "filescanner"), + _directoryMapper(directoryLevels, directorySpread), + _mountPoints(mountPoints), + _dirLevels(directoryLevels), + _dirSpread(directorySpread), + _globalLock(), + _globalMetrics(getClock()) +{ + registerMetric(_globalMetrics); +} + +void +FileScanner::buildBucketList(document::BucketId::List & list, + uint16_t partition, + uint16_t part, uint16_t totalParts) +{ + Context context(_mountPoints[partition], getClock()); + std::vector<uint32_t> path(_dirLevels); + if (_dirLevels > 0) { + // If we have dirlevels, split into parts on top level only + for (uint32_t i=0, n=_dirSpread; i<n; ++i) { + if (i % totalParts == part) { + path[0] = i; + buildBucketList(list, context, path, 1); + } + } + } else if (part == 0) { + // If we don't have dirlevels, send all data in part 0 + buildBucketList(list, context, path); + } + // Grab lock and update metrics + vespalib::LockGuard lock(_globalLock); + std::vector<metrics::Metric::LP> newMetrics; + context._metrics.addToSnapshot(_globalMetrics, newMetrics); + assert(newMetrics.empty()); +} + +void +FileScanner::buildBucketList(document::BucketId::List & list, + Context& context, + std::vector<uint32_t>& path, + uint32_t dirLevel) +{ + if (dirLevel >= _dirLevels) { + buildBucketList(list, context, path); + return; + } + for (uint32_t i=0, n=_dirSpread; i<n; ++i) { + path[dirLevel] = i; + buildBucketList(list, context, path, dirLevel + 1); + } +} + +std::string +FileScanner::getPathName(Context& context, std::vector<uint32_t>& path, + const document::BucketId* bucket) const +{ + std::ostringstream ost; + ost << context._dir.getPath() << std::hex << std::setfill('0'); + for (uint32_t i=0, n=path.size(); i<n; ++i) { + ost << '/' << std::setw(4) << path[i]; + } + if (bucket != 0) { + ost << '/' << std::setw(16) + << bucket->stripUnused().getRawId() << ".0"; + } + return ost.str(); +} + +void +FileScanner::buildBucketList(document::BucketId::List & list, + Context& context, + std::vector<uint32_t>& path) +{ + std::string pathName(getPathName(context, path)); + if (!vespalib::fileExists(pathName)) { + LOG(spam, "Directory %s does not exist.", pathName.c_str()); + return; + } + LOG(spam, "Listing directory %s", pathName.c_str()); + vespalib::DirectoryList dir(vespalib::listDirectory(pathName)); + for (uint32_t i=0; i<dir.size(); ++i) { + if (!processFile(list, context, path, pathName, dir[i])) { + // To only process alien files once, we lock rather than use + // context object. Should be few (none) alien files so shouldn't + // matter from a performance point of view + vespalib::LockGuard lock(_globalLock); + _globalMetrics._alienFileCounter.inc(); + if (_globalMetrics._alienFiles.size() + <= _config._maxAlienFilesLogged) + { + LOG(spam, "Detected alien file %s/%s", + pathName.c_str(), dir[i].c_str()); + _globalMetrics._alienFiles.push_back(pathName + "/" + dir[i]); + } + } + } + context._metrics._dirsListed.inc(); +} + + +// Always called from lister thread (which might be worker thread) +bool +FileScanner::processFile(document::BucketId::List & list, + Context& context, + std::vector<uint32_t>& path, + const std::string& pathName, + const std::string& name) +{ + if (name == "." || name == ".." + || name == "chunkinfo" || name == "creationinfo") + { + LOG(spam, "Ignoring expected file that is not a slotfile '%s'.", + name.c_str()); + return true; + } + document::BucketId bucket(extractBucketId(name)); + if (bucket.getRawId() == 0) { + // Delete temporary files generated by storage + if (name.size() > 4 && name.substr(name.size() - 4) == ".tmp") { + context._metrics._temporaryFilesDeleted.inc(); + LOG(debug, "Deleting temporary file found '%s'. Assumed it was " + "generated by storage temporarily while processing a " + "request and process or disk died before operation " + "completed.", + (pathName + "/" + name).c_str()); + vespalib::unlink(pathName + "/" + name); + return true; + } + return false; + } + if (handleBadLocation(bucket, context, path)) { + LOG(spam, "Adding bucket %s.", bucket.toString().c_str()); + list.push_back(bucket); + } + return true; +} + +document::BucketId +FileScanner::extractBucketId(const std::string& name) const +{ + if (name.size() < 9) return document::BucketId(); + std::string::size_type pos = name.find('.'); + if (pos == std::string::npos || pos > 16) return document::BucketId(); + char *endPtr; + document::BucketId::Type idnum = strtoull(&name[0], &endPtr, 16); + if (endPtr != &name[pos]) return document::BucketId(); + uint32_t fileNr = strtol(&name[pos + 1], &endPtr, 16); + if (*endPtr != '\0') return document::BucketId(); + // Check for deprecated name types + if (fileNr != 0) { + LOG(warning, "Found buckets split with old file splitting system. Have " + "you upgraded from VDS version < 3.1 to >= 3.1 ? This " + "requires a refeed as files stored are not backward " + "compatible."); + return document::BucketId(); + } + return document::BucketId(idnum); +} + +bool +FileScanner::handleBadLocation(const document::BucketId& bucket, + Context& context, + std::vector<uint32_t>& path) +{ + std::vector<uint32_t> expectedPath(_directoryMapper.getPath(bucket)); + + // If in wrong directory on disk, do a rename to move it where VDS will + // access it. + if (expectedPath != path) { + std::string source(getPathName(context, path, &bucket)); + std::string target(getPathName(context, expectedPath, &bucket)); + + if (vespalib::fileExists(target)) { + std::ostringstream err; + err << "Cannot move file from wrong directory " << source + << " to " << target << " as file already exist. Multiple " + << "instances of bucket on same disk. Should not happen. " + << "Ignoring file at in bad location."; + LOG(warning, "%s", err.str().c_str()); + context._metrics._multipleBucketsSameDisk.inc(); + return false; + } + if (!vespalib::rename(source, target, false, true)) { + std::ostringstream err; + err << "Cannot move file from " << source << " to " << target + << " as source file does not exist. Should not happen."; + LOG(error, "%s", err.str().c_str()); + throw vespalib::IllegalStateException(err.str(), VESPA_STRLOC); + } + LOGBP(warning, "Found bucket in wrong directory. Moved %s to %s.", + source.c_str(), target.c_str()); + context._metrics._wrongDir.inc(); + } + return true; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h new file mode 100644 index 00000000000..e83a01e932e --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::FileScanner + * \ingroup memfile + * + * \brief Scans a directory for memfiles. + * + * When storage starts up, we need to know what data already exist. This process + * will identify what buckets we have data for. + */ + +#pragma once + +#include <vespa/metrics/metrics.h> +#include <vespa/memfilepersistence/device/mountpointlist.h> +#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h> +#include <vespa/storageframework/storageframework.h> + +namespace document { + class BucketId; +} + +namespace storage { +namespace memfile { + +class FileScanner : private framework::Component { +public: + typedef std::unique_ptr<FileScanner> UP; + + struct Config { + uint32_t _maxAlienFilesLogged; + Config() + : _maxAlienFilesLogged(10) {} + }; + struct Metrics : public metrics::MetricSet { + std::vector<std::string> _alienFiles; + metrics::LongCountMetric _alienFileCounter; + metrics::LongCountMetric _temporaryFilesDeleted; + metrics::LongCountMetric _multipleBucketsSameDisk; + metrics::LongCountMetric _wrongDir; + metrics::LongCountMetric _wrongDisk; + metrics::LongCountMetric _dirsListed; + framework::MilliSecTimer _startTime; + metrics::LongAverageMetric _listLatency; + + Metrics(framework::Clock&); + }; + +private: + struct Context { + const Directory& _dir; + Metrics _metrics; + + Context(const Directory& d, framework::Clock& c) + : _dir(d), _metrics(c) {} + }; + + BucketDirectoryMapper _directoryMapper; + const MountPointList& _mountPoints; + Config _config; + uint32_t _dirLevels; + uint32_t _dirSpread; + // As there is only one FileScanner instance in storage, we need a + // lock to let multiple threads update global data in the scanner. + // Each operation will typically keep a Context object it can use + // without locking and then grab lock to update global data after + // completion. + vespalib::Lock _globalLock; + Metrics _globalMetrics; + +public: + FileScanner(framework::ComponentRegister&, const MountPointList&, + uint32_t dirLevels, uint32_t dirSpread); + + void buildBucketList(document::BucketId::List & list, + uint16_t partition, + uint16_t part, uint16_t totalParts); + + const Metrics& getMetrics() const { return _globalMetrics; } + + +private: + void buildBucketList(document::BucketId::List & list, + Context&, + std::vector<uint32_t>& path, + uint32_t dirLevel); + std::string getPathName(Context&, std::vector<uint32_t>& path, + const document::BucketId* bucket = 0) const; + void buildBucketList(document::BucketId::List & list, + Context&, + std::vector<uint32_t>& path); + bool processFile(document::BucketId::List & list, + Context&, + std::vector<uint32_t>& path, + const std::string& pathName, + const std::string& name); + document::BucketId extractBucketId(const std::string& name) const; + bool handleBadLocation(const document::BucketId& bucket, + Context&, + std::vector<uint32_t>& path); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt new file mode 100644 index 00000000000..166f8499725 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_mapper OBJECT + SOURCES + buffer.cpp + memfilemapper.cpp + memfile_v1_serializer.cpp + memfile_v1_verifier.cpp + locationreadplanner.cpp + simplememfileiobuffer.cpp + fileinfo.cpp + locationreadplanner.cpp + bufferedfilewriter.cpp + bucketdirectorymapper.cpp + DEPENDS +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp new file mode 100644 index 00000000000..956e806968d --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h> + +#include <vespa/document/bucket/bucketid.h> +#include <vespa/vespalib/util/random.h> + +namespace storage { +namespace memfile { + +BucketDirectoryMapper::BucketDirectoryMapper(uint32_t dirLevels, + uint32_t dirSpread) + : _dirLevels(dirLevels), + _dirSpread(dirSpread) +{ +} + +std::vector<uint32_t> +BucketDirectoryMapper::getPath(const document::BucketId& bucket) +{ + document::BucketId::Type seed = bucket.getId(); + seed = seed ^ (seed >> 32); + vespalib::RandomGen randomizer(static_cast<uint32_t>(seed) ^ 0xba5eba11); + std::vector<uint32_t> position(_dirLevels); + for (uint32_t i=0; i<_dirLevels; ++i) { + position[i] = randomizer.nextUint32() % _dirSpread; + } + return position; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h new file mode 100644 index 00000000000..a12c0f9c7cb --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::BucketDirectoryMapper + * \ingroup memfile + * + * \brief Maps buckets to directories on disk. + * + * To avoid having too many files in one directory, we want to map buckets to + * different directories. As these are all in the same partition anyways, we + * don't really need the distribution to be different based on node indexes or + * disk indexes. + * + * This class hides a simple function for distributing buckets between + * directories. + */ + +#pragma once + +#include <vector> + +namespace document { + class BucketId; +} + +namespace storage { +namespace memfile { + +class BucketDirectoryMapper { + uint32_t _dirLevels; + uint32_t _dirSpread; + +public: + BucketDirectoryMapper(uint32_t dirLevels, uint32_t dirSpread); + + std::vector<uint32_t> getPath(const document::BucketId&); +}; + +} +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp new file mode 100644 index 00000000000..5ecb439b3f0 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/buffer.h> +#include <algorithm> +#include <stdlib.h> + +namespace storage { +namespace memfile { + +Buffer::Buffer(size_t size) + : _buffer(size), + _size(size) +{ +} + +void +Buffer::resize(size_t size) +{ + BackingType buffer(size); + size_t commonSize(std::min(size, _size)); + memcpy(buffer.get(), _buffer.get(), commonSize); + _buffer.swap(buffer); + _size = size; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h new file mode 100644 index 00000000000..2484209d23e --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::Buffer + * \ingroup memfile + * + * \brief Simple wrapper class to contain an aligned buffer. + * + * For direct IO operations, we need to use 512 byte aligned buffers. This is + * a simple wrapper class to get such a buffer. + */ + +#pragma once + +#include <boost/utility.hpp> +#include <vespa/vespalib/util/alloc.h> +#include <vespa/vespalib/util/linkedptr.h> + +namespace storage { +namespace memfile { + +class Buffer : boost::noncopyable // Ensure no accidental copying of a buffer +{ + // Use AutoAlloc to transparently use mmap for large buffers. + // It is crucial that any backing buffer type returns an address that is + // 512-byte aligned, or direct IO will scream at us and fail everything. + static constexpr size_t MMapLimit = vespalib::MMapAlloc::HUGEPAGE_SIZE; + using BackingType = vespalib::AutoAlloc<MMapLimit, 512>; + + BackingType _buffer; + // Actual, non-aligned size (as opposed to _buffer.size()). + size_t _size; + +public: + typedef vespalib::LinkedPtr<Buffer> LP; + + Buffer(size_t size); + + /** + * Resize buffer while keeping data that exists in the intersection of + * the old and new buffers' sizes. + */ + void resize(size_t size); + + char* getBuffer() noexcept { + return static_cast<char*>(_buffer.get()); + } + const char* getBuffer() const noexcept { + return static_cast<const char*>(_buffer.get()); + } + size_t getSize() const noexcept { + return _size; + } + + operator char*() noexcept { return getBuffer(); } + +}; + +} // storage +} // memfile + + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp new file mode 100644 index 00000000000..369df0c1143 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp @@ -0,0 +1,219 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/bufferedfilewriter.h> + +#include <boost/scoped_array.hpp> +#include <vespa/vespalib/util/guard.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/util/exceptions.h> + +LOG_SETUP(".persistence.memfile.bufferedfilewriter"); + +namespace storage { + +namespace memfile { + +using vespalib::ValueGuard; + +BufferedFileWriter:: +BufferedFileWriter(vespalib::File& file, char* buffer, uint32_t bufferSize) + : _file(file), + _buffer(buffer), + _bufferSize(bufferSize), + _bufferedData(0), + _filePosition(0), + _writeCount(0), + _cache(0), + _cacheDirtyUpTo(0), + _writing(false) +{ + // Since we normally use direct IO for writing, we want to have + // 512b aligned buffers + if (bufferSize < 512) { + // Only warn for this. Used in testing. + LOGBP(warning, "Using buffer smaller than 512b"); + } else if (bufferSize % 512 != 0) { + std::ostringstream ost; + ost << "Buffered file writer got buffer of length " << bufferSize + << " (Not dividable by 512)"; + throw vespalib::IllegalArgumentException(ost.str()); + } + LOG(spam, "Using buffer in writer of %u bytes", bufferSize); +} + +BufferedFileWriter::~BufferedFileWriter() +{ + if (LOG_WOULD_LOG(debug) && _bufferedData != 0) { + LOG(debug, "Discarding %u bytes of buffered, unflushed data", + _bufferedData); + } +} + +void +BufferedFileWriter::setMemoryCache(Cache* cache) +{ + _cache = cache; + _cacheDirtyUpTo = 0; + if (cache == 0) { + LOG(spam, "No longer using a memory cache"); + } else { + LOG(spam, "Using memory cache of %u bytes", _cache->getCachedAmount()); + } +} + +void BufferedFileWriter::write(const char* data, uint32_t size, uint32_t pos) +{ + _writing = true; + // If at least parts of data written is cached in slotfileimage, update + // cache rather than write to file. + if (_cache != 0 && _cache->getCachedAmount() > pos) { + uint32_t len = std::min(size, _cache->getCachedAmount() - pos); + _cache->setData(data, len, pos); + if (_cache->duplicateCacheWrite()) { + len = 0; + } + if (len != size) { // Write remaining directly to disk + LOG(spam, "Writing remainder after cache, bypassing buffer. " + "%u bytes at pos %u.", size - len, pos + len); + _file.write(data + len, size - len, pos + len); + ++_writeCount; + } else { + LOG(spam, "Writing %u bytes to memory cache at position %u.", + size, pos); + } + _cacheDirtyUpTo = std::max(_cacheDirtyUpTo, pos + len); + } else { + LOG(spam, "Writing directly to file, bypassing buffer. %u" + " bytes at pos %u", size, pos); + _file.write(data, size, pos); + ++_writeCount; + } + _writing = false; +} + +void BufferedFileWriter::flush() +{ + if (_bufferedData == 0) return; + LOG(spam, "Flushing buffer. Writing %u at pos %u.", + _bufferedData, _filePosition); + write(_buffer, _bufferedData, _filePosition); + _filePosition += _bufferedData; + _bufferedData = 0; +} + +void BufferedFileWriter::write(const void *buffer, size_t size) +{ + LOG(spam, "Writing %" PRIu64 " bytes to buffer at position %u.", + size, _filePosition + _bufferedData); + if (!_buffer) { // If we don't use a buffer, just write to file. + write(static_cast<const char*>(buffer), size, _filePosition); + _filePosition += size; + return; + } + // In case of exception later, reset state to original state + ValueGuard<uint32_t> bufIndexGuard(_bufferedData); + ValueGuard<uint32_t> filePositionGuard(_filePosition); + // Buffer may contain data prior to this write call. If this is + // successfully written to disk, we need to update state to revert + // to such that we don't lose that write. + + if (_bufferedData + size >= _bufferSize) { + size_t part = _bufferSize - _bufferedData; + memcpy(_buffer + _bufferedData, buffer, part); + _bufferedData = _bufferSize; + buffer = static_cast<const char*>(buffer) + part; + flush(); + bufIndexGuard = 0; + filePositionGuard = _filePosition + _bufferSize - part; + size -= part; + } + + if (_bufferedData + size >= _bufferSize) { + if (reinterpret_cast<unsigned long>(buffer)%0x200 == 0) { + // Write the big part that is a multiple of _bufferSize to the file. + size_t part((size/_bufferSize)*_bufferSize); + write(static_cast<const char*>(buffer), part, _filePosition); + _filePosition += part; + buffer = static_cast<const char*>(buffer) + part; + size -= part; + } else { + for (; _bufferedData + size >= _bufferSize; size -= _bufferSize, buffer = static_cast<const char*>(buffer) + _bufferSize) { + memcpy(_buffer, buffer, _bufferSize); + _bufferedData = _bufferSize; + flush(); + } + } + } + + // We now have room for the rest of the data in buffer + assert(_bufferedData + size < _bufferSize); + memcpy(_buffer + _bufferedData, buffer, size); + _bufferedData += size; + // Finished successfully, deactivate guards + bufIndexGuard.deactivate(); + filePositionGuard.deactivate(); +} + +void BufferedFileWriter::writeGarbage(uint32_t size) { + LOG(spam, "Writing %u bytes of garbage at position %u.", + size, _filePosition + _bufferedData); + if (!_buffer) { + ValueGuard<uint32_t> filePositionGuard(_filePosition); + uint32_t maxBufferSize = 0xFFFF; + uint32_t bufSize = (size > maxBufferSize ? maxBufferSize : size); + boost::scoped_array<char> buf(new char[bufSize]); + while (size > 0) { + uint32_t part = (size > bufSize ? bufSize : size); + write(&buf[0], part, _filePosition); + _filePosition += part; + size -= part; + } + filePositionGuard.deactivate(); + return; + } + // In case of exception later, reset state to original state + ValueGuard<uint32_t> bufIndexGuard(_bufferedData); + ValueGuard<uint32_t> filePositionGuard(_filePosition); + + if (_bufferedData + size >= _bufferSize) { + size_t part = _bufferSize - _bufferedData; + memset(_buffer + _bufferedData, 0xFF, part); + _bufferedData += part; // Use any garbage data already there. + flush(); + bufIndexGuard = 0; + filePositionGuard = _filePosition + _bufferSize - part; + size -= part; + } + + memset(_buffer + _bufferedData, 0xFF, std::min(_bufferSize-_bufferedData, size)); + + for (;_bufferedData + size >= _bufferSize; size -= _bufferSize) { + _bufferedData = _bufferSize; + flush(); + } + + // We now have room for the rest of the data in buffer + assert(_bufferedData + size < _bufferSize); + _bufferedData += size; // Use any garbage data already there. + // Finished successfully, deactivate guards + bufIndexGuard.deactivate(); + filePositionGuard.deactivate(); +} + +void BufferedFileWriter::setFilePosition(uint32_t pos) +{ + if (pos != _filePosition + _bufferedData) { + flush(); + _filePosition = pos; + } +} + +uint32_t BufferedFileWriter::getFilePosition() const +{ + return _filePosition + _bufferedData; +} + +} + +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h new file mode 100644 index 00000000000..e1101d94b58 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @class storage::BufferedFileWriter + * @ingroup filestorage + * + * @brief A utility class for buffered writing to a file. + * + * To minimize the number of system calls, and to minimize the chance of + * fragmentation, files should be written to disk in large chunks. Since + * it's easier to write algorithms which writes files in smaller pieces, this + * class exists to buffer such writes and send them to disk at a later time. + * + * @author H�kon Humberset + * @date 2005-11-03 + */ + +#pragma once + +#include <boost/utility.hpp> +#include <vespa/fastos/types.h> +#include <vector> + +namespace vespalib { + class File; +} + +namespace storage { + +namespace memfile { + +class BufferedFileWriter : public boost::noncopyable { +public: + struct Cache { + virtual ~Cache() {} + virtual uint32_t getCachedAmount() const = 0; + /** Index given must be within [0 - getCachedAmount()> */ + virtual char* getCache(uint32_t atIndex) = 0; + /** If true, write to both cache and file, else, write to cache only. */ + virtual bool duplicateCacheWrite() const = 0; + /** Function for updating content in cache. Implemented in cache as new + * core overrides it to ignore data ahead of a given index. */ + virtual void setData(const char* data, size_t len, uint64_t pos) + { memcpy(getCache(pos), data, len); } + }; + +private: + vespalib::File& _file; + char* _buffer; + uint32_t _bufferSize; + uint32_t _bufferedData; + uint32_t _filePosition; + uint32_t _writeCount; + Cache* _cache; + uint32_t _cacheDirtyUpTo; + bool _writing; + +public: + /** + * Create a new buffered file writer. + * + * @param filedescriptor Write to this file which should already be open for + * writing. + * @param buffer Pointer to the buffer to use in this writer. Note that + * if buffer is 0, fakemode will be used, where all writes + * are sent on to OS. This mode can be used to test difference + * in performance of using this class or not. + * @param bufferSize The size of the buffer to keep. + */ + BufferedFileWriter(vespalib::File&, char* buffer, uint32_t bufferSize); + /** + * Destructor does not flush(). Make sure to call flush() manually. + * (flush() can fail, and destructors should not throw exceptions) + */ + ~BufferedFileWriter(); + + uint32_t getBufferSize() const { return _bufferSize; } + + /** + * If set, write portion written inside of memory cache here instead of + * to file. + */ + void setMemoryCache(Cache* cache); + + bool isMemoryCacheDirty() const { return (_cacheDirtyUpTo != 0); } + + uint32_t getLastDirtyIndex() const { return _cacheDirtyUpTo; } + + void tagCacheClean() { _cacheDirtyUpTo = 0; } + + /** Write all buffered data to disk. */ + void flush(); + + // Functions using the held file position. + + /** Writes the given data to file and increases the file position. */ + void write(const void *buffer, size_t size); + + /** Writes undefined data of given size to file and increases position. */ + void writeGarbage(uint32_t size); + + /** Set the file position to the given value. (Flushes before changing) */ + void setFilePosition(uint32_t pos); + + /** Get the current file position. */ + uint32_t getFilePosition() const; + + uint32_t getBufferedSize() const { return _bufferedData; } + + /** Get how many times this writer has flushed data to disk. */ + uint32_t getWriteCount() const { return _writeCount; } + +private: + void write(const char* data, uint32_t size, uint32_t pos); +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp new file mode 100644 index 00000000000..bda57a13aa7 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/fileinfo.h> + +namespace storage { + +namespace memfile { + +FileInfo::FileInfo() + : _metaDataListSize(0), + _headerBlockSize(0), + _bodyBlockSize(0) +{ +} + +FileInfo::FileInfo(uint32_t metaDataListSize, + uint32_t headerBlockSize, + uint32_t bodyBlockSize) + : _metaDataListSize(metaDataListSize), + _headerBlockSize(headerBlockSize), + _bodyBlockSize(bodyBlockSize) +{ +} + + +FileInfo::FileInfo(const Header& header, size_t fileSize) + : _metaDataListSize(header._metaDataListSize), + _headerBlockSize(header._headerBlockSize), + _bodyBlockSize( + fileSize - header._headerBlockSize + - sizeof(MetaSlot) * header._metaDataListSize - sizeof(Header)) +{ +} + +uint32_t +FileInfo::getHeaderBlockStartIndex() const +{ + return sizeof(Header) + _metaDataListSize * sizeof(MetaSlot); +} + +uint32_t +FileInfo::getBodyBlockStartIndex() const +{ + return getHeaderBlockStartIndex() + _headerBlockSize; +} + +uint32_t +FileInfo::getFileSize() const +{ + return getBodyBlockStartIndex() + _bodyBlockSize; +} + +std::string +FileInfo::toString() const +{ + std::ostringstream ost; + ost << "FileInfo(" + << "meta_size " << _metaDataListSize + << " header_start " << getHeaderBlockStartIndex() + << " body_start " << getBodyBlockStartIndex() + << ")"; + return ost.str(); +} + +} + +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h new file mode 100644 index 00000000000..973e1f2cd10 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/vespalib/util/crc.h> + +namespace storage { + +namespace memfile { + +struct MetaSlot : private Types { + Timestamp _timestamp; + GlobalId _gid; + uint32_t _headerPos; + uint32_t _headerSize; + uint32_t _bodyPos; + uint32_t _bodySize; + uint16_t _flags; + uint16_t _checksum; + + MetaSlot() : _timestamp(0), _headerPos(0), _headerSize(0), + _bodyPos(0), _bodySize(0), _flags(0), _checksum(39859) + { + //_checksum = calcSlotChecksum(); + //std::cerr << "Empty checksum " << _checksum << "\n"; + } + + uint16_t calcSlotChecksum() const { + static uint32_t size(sizeof(MetaSlot) - sizeof(_checksum)); + vespalib::crc_32_type calculator; + calculator.process_bytes(this, size); + return calculator.checksum() & 0xffff; + + } + + bool inUse() const { + return (_flags & IN_USE); + } + + void print(std::ostream & out) const { + vespalib::asciistream tmp; + print(tmp); + out << tmp.str(); + } + void print(vespalib::asciistream & out) const { + out << "Slot(" << std::dec << _timestamp << ", " << _gid << ", " + << _headerPos << " - " << _headerSize << ", " << _bodyPos + << " - " << _bodySize << ", 0x" << std::hex << _flags << ", 0x" + << _checksum << ")" << std::dec; + } + + // Functions used by unit tests (avoid renaming all old func usage) + void updateChecksum() { _checksum = calcSlotChecksum(); } + void setTimestamp(Timestamp ts) { _timestamp = ts; } + void setHeaderPos(uint32_t p) { _headerPos = p; } + void setHeaderSize(uint32_t sz) { _headerSize = sz; } + void setBodyPos(uint32_t p) { _bodyPos = p; } + void setBodySize(uint32_t sz) { _bodySize = sz; } + void setUseFlag(bool isInUse) + { _flags = (isInUse ? _flags | IN_USE : _flags & ~IN_USE); } +}; + +inline std::ostream& operator<<(std::ostream& out, const MetaSlot& slot) { + vespalib::asciistream tmp; + slot.print(tmp); + return out << tmp.str(); +} +inline vespalib::asciistream& operator<<(vespalib::asciistream & out, const MetaSlot& slot) { + slot.print(out); return out; +} + +/** + * Represents a slotfile header. + */ +struct Header { + uint32_t _version; + uint32_t _metaDataListSize; + uint32_t _headerBlockSize; + uint32_t _checksum; + uint32_t _fileChecksum; + uint32_t _notInUse0; // Some reserved bits, which we can use later if + uint64_t _notInUse1; // needed without altering the file format. + uint64_t _notInUse2; + uint64_t _notInUse3; + uint64_t _notInUse4; + uint64_t _notInUse5; + + Header() + : _version(Types::TRADITIONAL_SLOTFILE), + _metaDataListSize(0), + _headerBlockSize(0), + _checksum(0), + _fileChecksum(0), + _notInUse0(0), _notInUse1(0), _notInUse2(0), + _notInUse3(0), _notInUse4(0), _notInUse5(0) + { + } + + uint32_t calcHeaderChecksum() const { + vespalib::crc_32_type calculator; + calculator.process_bytes(this, 12); + return calculator.checksum(); + } + bool verify() const { + return (_version == Types::TRADITIONAL_SLOTFILE + && _checksum == calcHeaderChecksum()); + } + // Functions used by unit tests (avoid renaming all old func usage) + void updateChecksum() { _checksum = calcHeaderChecksum(); } + void setVersion(uint32_t version) { _version = version; } + void setMetaDataListSize(uint32_t sz) { _metaDataListSize = sz; } + void setHeaderBlockSize(uint32_t sz) { _headerBlockSize = sz; } + + void print(std::ostream& out, const std::string& indent = "") const { + out << indent << "SlotFileHeader(\n" + << indent << " version: " << std::hex << _version << std::dec << "\n" + << indent << " meta data list size: " << _metaDataListSize << "\n" + << indent << " header block size: " << _headerBlockSize << "b\n" + << indent << " checksum: " << std::hex << _checksum + << indent << (verify() ? " (OK)\n" : " (MISMATCH)\n") + << indent << " file checksum: " << _fileChecksum << "\n" + << indent << ")"; + } + }; + +struct FileInfo { + typedef std::unique_ptr<FileInfo> UP; + + uint32_t _metaDataListSize; + uint32_t _headerBlockSize; + uint32_t _bodyBlockSize; + + // Cached header bytes to write in addition to metadata when + // needing to write back metadata 512 byte aligned + std::vector<char> _firstHeaderBytes; + + FileInfo(); + FileInfo(uint32_t metaDataListSize, uint32_t headerBlockSize, uint32_t bodyBlockSize); + FileInfo(const Header& header, size_t fileSize); + + uint32_t getBlockSize(Types::DocumentPart part) const { + return (part == Types::BODY ? _bodyBlockSize : _headerBlockSize); + } + uint32_t getBlockIndex(Types::DocumentPart part) const { + return (part == Types::BODY ? getBodyBlockStartIndex() + : getHeaderBlockStartIndex()); + } + uint32_t getHeaderBlockStartIndex() const; + uint32_t getBodyBlockStartIndex() const; + uint32_t getFileSize() const; + std::string toString() const; +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp new file mode 100644 index 00000000000..e853e374943 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/locationreadplanner.h> +#include <vespa/memfilepersistence/memfile/memfileiointerface.h> + +namespace storage { +namespace memfile { + +LocationDiskIoPlanner::LocationDiskIoPlanner( + const MemFileIOInterface& io, + DocumentPart part, + const std::vector<DataLocation>& desiredLocations, + uint32_t maxGap, + uint32_t blockStartIndex) + : _io(io), + _operations(), + _part(part), + _blockStartIndex(blockStartIndex) +{ + processLocations(desiredLocations, maxGap); +} + +namespace { + uint32_t alignDown(uint32_t value) { + uint32_t blocks = value / 512; + return blocks * 512; + }; + + uint32_t alignUp(uint32_t value) { + uint32_t blocks = (value + 512 - 1) / 512; + return blocks * 512; + }; +} + +void +LocationDiskIoPlanner::scheduleLocation(DataLocation loc, + std::vector<DataLocation>& ops) +{ + if (!_io.isCached(loc, _part) && loc._size) { + // Convert the relative location from the buffer to an + // absolute location. + ops.push_back(DataLocation(loc._pos + _blockStartIndex, + loc._size)); + } +} + +void +LocationDiskIoPlanner::processLocations( + const std::vector<DataLocation>& desiredLocations, + uint32_t maxGap) +{ + // Build list of disk read operations to do + std::vector<DataLocation> allOps; + + // Create list of all locations we need to read + for (std::size_t i = 0; i < desiredLocations.size(); ++i) { + scheduleLocation(desiredLocations[i], allOps); + } + + // Sort list, and join elements close together into single IO ops + std::sort(allOps.begin(), allOps.end()); + for (size_t i = 0; i < allOps.size(); ++i) { + uint32_t start = alignDown(allOps[i]._pos); + uint32_t stop = alignUp(allOps[i]._pos + allOps[i]._size); + if (i != 0) { + uint32_t lastStop = _operations.back()._pos + + _operations.back()._size; + if (lastStop >= start || start - lastStop < maxGap) { + _operations.back()._size += (stop - lastStop); + continue; + } + } + + _operations.push_back(DataLocation(start, stop - start)); + } +} + +uint32_t +LocationDiskIoPlanner::getTotalBufferSize() const +{ + uint32_t totalSize = 0; + for (size_t i = 0; i < _operations.size(); ++i) { + totalSize += _operations[i]._size; + } + return totalSize; +} + +void +LocationDiskIoPlanner::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + (void) verbose; (void) indent; + for (std::size_t i = 0; i < _operations.size(); ++i) { + if (i > 0) out << ","; + out << "[" << _operations[i]._pos << "," + << (_operations[i]._size + _operations[i]._pos) << "]"; + } +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h new file mode 100644 index 00000000000..915f158eb85 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::LocationDiskIoPlanner + * \ingroup memfile + * + * \brief Creates list of minimal IO operations to do versus disk. + * + * When accessing many locations on disk, it is not necessarily ideal to do a + * disk access per location. This class creates a minimal set of locations to + * access to avoid accessing more than a maximum gap of uninteresting data. + */ +#pragma once + +#include <vespa/memfilepersistence/common/types.h> + +namespace storage { +namespace memfile { + +class MemSlot; + +class MemFileIOInterface; + +class LocationDiskIoPlanner : public Types, public vespalib::Printable +{ +public: + LocationDiskIoPlanner(const MemFileIOInterface& io, + DocumentPart part, + const std::vector<DataLocation>& desiredLocations, + uint32_t maxGap, + uint32_t blockStartIndex); + + const std::vector<DataLocation>& getIoOperations() const { + return _operations; + } + + /** + * Get the total amount of space needed to hold all the data from all + * locations identified to be accessed. Useful to create a buffer of correct + * size. + */ + uint32_t getTotalBufferSize() const; + + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + +private: + const MemFileIOInterface& _io; + std::vector<DataLocation> _operations; + DocumentPart _part; + uint32_t _blockStartIndex; + + void processLocations( + const std::vector<DataLocation>& desiredLocations, + uint32_t maxGap); + + void scheduleLocation(DataLocation loc, + std::vector<DataLocation>&); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h new file mode 100644 index 00000000000..e138e17480a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::MapperSlotOperation + * \ingroup memfile + * + * \brief Utility class to wrap const casting. + * + * The MemFile objects wants to track all changes done to them by clients, such + * that they can track internally whether they are altered in memory from the + * physical file. Thus, only const MemSlot objects are exposed. If one wants to + * alter the MemFile slots one has to call functions in MemFile to do it. + * + * But the mapper code need to alter some information in the MemFile and MemSlot + * objects. For instance, it has to clear altered tag after flushing content to + * disk. The mappers thus need to alter the objects in a way regular clients + * should not be allowed to. + * + * To implement this, we use this class, which contains only the functionality + * needed by the mappers, and which const cast to let the mapper change the + * state it needs to. + */ + +#pragma once + +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/memfile/memfile.h> + +namespace storage { +namespace memfile { + +struct MapperSlotOperation : protected Types { + static void setFlag(const MemFile& file, uint32_t flags) + { + const_cast<MemFile&>(file).setFlag(flags); + } + static void clearFlag(const MemFile& file, uint32_t flags) + { + const_cast<MemFile&>(file).clearFlag(flags); + } + static void setFlag(const MemSlot& slot, uint32_t flags) + { + const_cast<MemSlot&>(slot).setFlag(flags); + } + static void clearFlag(const MemSlot& slot, uint32_t flags) + { + const_cast<MemSlot&>(slot).clearFlag(flags); + } + static void setLocation(const MemSlot& slot, DocumentPart part, + const DataLocation& dl) + { + const_cast<MemSlot&>(slot).setLocation(part, dl); + } + static void setChecksum(const MemSlot& slot, uint16_t checksum) + { + const_cast<MemSlot&>(slot).setChecksum(checksum); + } +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp new file mode 100644 index 00000000000..12f7219e2ca --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp @@ -0,0 +1,1029 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h> +#include <vespa/memfilepersistence/common/exceptions.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/mapper/locationreadplanner.h> +#include <vespa/memfilepersistence/mapper/uniqueslotgenerator.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> +#include <vespa/vespalib/util/crc.h> + +LOG_SETUP(".persistence.memfilev1"); + +namespace storage { +namespace memfile { + +namespace { + +void alignUp(uint32_t& value, uint32_t offset = 0, uint32_t block = 512) { + uint32_t blocks = (value + offset + block - 1) / block; + value = blocks * block - offset; +} + +int32_t getBufferPos( + const DataLocation& location, + const std::vector<DataLocation>& locations) +{ + uint32_t posNow = 0; + for (uint32_t i = 0; i < locations.size(); ++i) { + if (locations[i].contains(location)) { + return posNow + location._pos - locations[i]._pos; + } + + posNow += locations[i]._size; + } + + return -1; +} + +} + +MemFileV1Serializer::MemFileV1Serializer(ThreadMetricProvider& metricProvider) + : _metricProvider(metricProvider) +{ +} + +namespace { + +class SlotValidator +{ +public: + SlotValidator(uint32_t headerBlockOffset, + uint32_t bodyBlockOffset, + uint32_t fileSize) + : _headerBlockOffset(headerBlockOffset), + _bodyBlockOffset(bodyBlockOffset), + _fileSize(fileSize) + { + } + + bool slotHasValidInformation(const MetaSlot& ms) const { + const uint16_t slotCrc(ms.calcSlotChecksum()); + const bool checksumOk(slotCrc == ms._checksum); + return (checksumOk && slotLocationsWithinFileBounds(ms)); + } + +private: + bool slotLocationsWithinFileBounds(const MetaSlot& ms) const { + // The reason for checking header location bounds against file size + // instead of body block offset is that the latter is computed from the + // file meta header information and will thus be entirely unaware of + // any file truncations. + return (_headerBlockOffset + ms._headerPos + ms._headerSize <= _fileSize + && _bodyBlockOffset + ms._bodyPos + ms._bodySize <= _fileSize); + } + + const uint32_t _headerBlockOffset; + const uint32_t _bodyBlockOffset; + const uint32_t _fileSize; +}; + +} + +void +MemFileV1Serializer::loadFile(MemFile& file, Environment& env, + Buffer& buffer, uint64_t bytesRead) +{ + SerializationMetrics& metrics(getMetrics().serialization); + SimpleMemFileIOBuffer& ioBuf( + static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); + + vespalib::LazyFile* lf = &ioBuf.getFileHandle(); + + assert(file.getSlotCount() == 0); + assert(bytesRead >= 64); + + const Header* header(reinterpret_cast<const Header*>(buffer.getBuffer())); + if (header->_checksum != header->calcHeaderChecksum()) { + std::ostringstream error; + error << "Header checksum mismatch. Stored checksum " << std::hex + << header->_checksum << " does not match calculated checksum " + << header->calcHeaderChecksum(); + throw CorruptMemFileException(error.str(), file.getFile(), VESPA_STRLOC); + } + uint32_t headerBlockIndex = sizeof(Header) + + header->_metaDataListSize * sizeof(MetaSlot); + + // Read all we need including first header bytes until alignment + uint32_t firstAlignedHeaderByte = headerBlockIndex; + alignUp(firstAlignedHeaderByte); + if (firstAlignedHeaderByte > bytesRead) { + framework::MilliSecTimer timer(env._clock); + LOG(spam, + "Only read %zu of required %u header bytes. " + "Resizing buffer and reading remaining data", + bytesRead, + firstAlignedHeaderByte); + buffer.resize(firstAlignedHeaderByte); + header = reinterpret_cast<const Header*>(buffer.getBuffer()); + off_t moreBytesRead = lf->read( + buffer + bytesRead, + firstAlignedHeaderByte - bytesRead, + bytesRead); + bytesRead += moreBytesRead; + if (bytesRead != firstAlignedHeaderByte) { + size_t fileSize = lf->getFileSize(); + if (firstAlignedHeaderByte > fileSize) { + std::ostringstream error; + error << "Header indicates file is bigger than it " + << "physically is. First aligned byte in header block " + << "starts at byte " << firstAlignedHeaderByte + << " while file is " << fileSize << " bytes long."; + throw CorruptMemFileException(error.str(), file.getFile(), VESPA_STRLOC); + + } + assert(bytesRead == firstAlignedHeaderByte); + } + metrics.tooLargeMetaReadLatency.addValue(timer); + } + + FileInfo::UP data(new FileInfo); + data->_metaDataListSize = header->_metaDataListSize; + data->_headerBlockSize = header->_headerBlockSize; + const uint32_t headerBlockOffset( + sizeof(Header) + data->_metaDataListSize * sizeof(MetaSlot)); + const uint32_t bodyBlockOffset = headerBlockOffset + data->_headerBlockSize; + const uint32_t fileSize = lf->getFileSize(); + + // Avoid underflow in case of truncation. + const uint32_t bodyBlockSize( + fileSize > bodyBlockOffset ? fileSize - bodyBlockOffset : 0); + + data->_bodyBlockSize = bodyBlockSize; + data->_firstHeaderBytes.resize(firstAlignedHeaderByte - headerBlockIndex); + memcpy(&data->_firstHeaderBytes[0], buffer.getBuffer() + headerBlockIndex, + data->_firstHeaderBytes.size()); + + LOG(debug, + "File %s header info: metaDataListSize=%u, " + "headerBlockSize=%u, bodyBlockSize=%u", + file.getFile().getPath().c_str(), + data->_metaDataListSize, + data->_headerBlockSize, + data->_bodyBlockSize); + + ioBuf.setFileInfo(std::move(data)); + + uint32_t metaEntriesRead(header->_metaDataListSize); + bool foundBadSlot = false; + uint32_t lastBadSlot = 0; + SlotValidator validator(headerBlockOffset, bodyBlockOffset, fileSize); + + for (uint32_t i = 0; i < metaEntriesRead; ++i) { + const MetaSlot* ms(reinterpret_cast<const MetaSlot*>( + buffer + sizeof(Header) + i * sizeof(MetaSlot))); + + if (!validator.slotHasValidInformation(*ms)) { + foundBadSlot = true; + lastBadSlot = i; + continue; // Don't add bad slots. + } + + if (!ms->inUse()) { + break; + } + + MemSlot slot(ms->_gid, + ms->_timestamp, + DataLocation(ms->_headerPos, ms->_headerSize), + DataLocation(ms->_bodyPos, ms->_bodySize), + ms->_flags, + ms->_checksum); + + file.addSlot(slot); + } + + // We bail here instead of doing so inside the loop because this allows us + // to add all healthy slots to the file prior to throwing the exception. + // Any caller code that wants/need to inspect the good slots is then able + // to do so. It is not a given that this is a strong requirement; the check + // may be moved inside the loop if it can be established that no caller code + // expects the good slots to be present after a loadFile exception. + if (foundBadSlot) { + std::ostringstream error; + error << "Found bad slot in file '" + << file.getFile().getPath() + << "' at slot index " << lastBadSlot + << ", forcing repair of file. Details of file " + "corruption to follow."; + throw CorruptMemFileException(error.str(), file.getFile(), + VESPA_STRLOC); + } + + file.clearFlag(SLOTS_ALTERED); + + LOG(spam, "After loading file, its state is %s", file.toString(true).c_str()); +} + +void +MemFileV1Serializer::cacheLocationsForPart(SimpleMemFileIOBuffer& cache, + DocumentPart part, + uint32_t blockIndex, + const std::vector<DataLocation>& locationsToCache, + const std::vector<DataLocation>& locationsRead, + SimpleMemFileIOBuffer::BufferAllocation& buf) +{ + vespalib::asciistream error; + for (uint32_t i = 0; i < locationsToCache.size(); ++i) { + DataLocation loc(locationsToCache[i]); + assert(loc.valid()); + + if (loc._size == 0) { + LOG(spam, "Bailing since location size is 0"); + continue; + } + + loc._pos += blockIndex; + int32_t bufferPos = getBufferPos(loc, locationsRead); + + assert(bufferPos != -1); + + MemFileV1Verifier verifier; + if (!verifier.verifyBlock(part, locationsToCache[i]._pos, + error, + buf.getBuffer() + bufferPos, + loc._size)) + { + throw CorruptMemFileException( + error.str(), cache.getFileSpec(), VESPA_STRLOC); + } + + cache.cacheLocation(part, + locationsToCache[i], + buf.getSharedBuffer(), + buf.getBufferPosition() + bufferPos); + } +} + +void +MemFileV1Serializer::cacheLocations(MemFileIOInterface& io, + Environment& env, + const Options& options, + DocumentPart part, + const std::vector<DataLocation>& locations) +{ + SimpleMemFileIOBuffer& cache(static_cast<SimpleMemFileIOBuffer&>(io)); + + const FileInfo& data(cache.getFileInfo()); + uint32_t blockStartIndex(part == HEADER + ? data.getHeaderBlockStartIndex() + : data.getBodyBlockStartIndex()); + + LOG(spam, "%s: cacheLocations for %s with %zu locations. " + "max read-through gap is %u", + cache.getFileHandle().getFilename().c_str(), + getDocumentPartName(part), + locations.size(), + options._maximumGapToReadThrough); + + LocationDiskIoPlanner planner( + cache, + part, + locations, + options._maximumGapToReadThrough, + blockStartIndex); + + if (planner.getIoOperations().empty()) { + LOG(spam, "%s: no disk read operations required for %zu %s locations", + cache.getFileHandle().getFilename().c_str(), + locations.size(), + getDocumentPartName(part)); + return; + } + + const std::vector<DataLocation>& readLocations(planner.getIoOperations()); + + const size_t bufferSize = planner.getTotalBufferSize(); + assert(bufferSize % 512 == 0); + const SimpleMemFileIOBuffer::SharedBuffer::Alignment align512( + SimpleMemFileIOBuffer::SharedBuffer::ALIGN_512_BYTES); + + SimpleMemFileIOBuffer::BufferAllocation buf( + cache.allocateBuffer(part, bufferSize, align512)); + assert(reinterpret_cast<size_t>(buf.getBuffer()) % 512 == 0); + LOG(spam, + "Allocated %u bytes with offset %u from shared buffer %p " + "(of total %zu bytes, %zu bytes used, %zu bytes free)", + buf.getSize(), + buf.getBufferPosition(), + buf.getSharedBuffer().get(), + buf.getSharedBuffer()->getSize(), + buf.getSharedBuffer()->getUsedSize(), + buf.getSharedBuffer()->getFreeSize()); + + framework::MilliSecTime readStart(env._clock.getTimeInMillis()); + SerializationMetrics& metrics(getMetrics().serialization); + + uint64_t total(read(cache.getFileHandle(), buf.getBuffer(), readLocations)); + + metrics::LongAverageMetric& latency(part == HEADER ? metrics.headerReadLatency + : metrics.bodyReadLatency); + metrics::LongAverageMetric& sz(part == HEADER ? metrics.headerReadSize + : metrics.bodyReadSize); + framework::MilliSecTime readDone(env._clock.getTimeInMillis()); + latency.addValue((readDone - readStart).getTime()); + sz.addValue(total); + + cacheLocationsForPart(cache, part, blockStartIndex, locations, + readLocations, buf); + + framework::MilliSecTime timeNow(env._clock.getTimeInMillis()); + metrics.cacheUpdateAndImplicitVerifyLatency.addValue( + (timeNow - readDone).getTime()); +} + +uint64_t +MemFileV1Serializer::read(vespalib::LazyFile& file, + char* buf, + const std::vector<DataLocation>& readOps) +{ + uint32_t currPos = 0; + uint64_t totalRead = 0; + + for (uint32_t i = 0; i < readOps.size(); i++) { + file.read(buf + currPos, readOps[i]._size, readOps[i]._pos); + currPos += readOps[i]._size; + totalRead += readOps[i]._size; + } + return totalRead; +} + +void +MemFileV1Serializer::ensureFormatSpecificDataSet(const MemFile& ) +{ +/* + if (file.getFormatSpecificData() == 0) { + assert(!file.fileExists()); + file.setFormatSpecificData(MemFile::FormatSpecificData::UP(new Data)); + } +*/ +} + +uint32_t +MemFileV1Serializer::writeMetaData(BufferedFileWriter& writer, + const MemFile& file) +{ + const SimpleMemFileIOBuffer& ioBuf( + static_cast<const SimpleMemFileIOBuffer&>(file.getMemFileIO())); + uint32_t lastPos = writer.getFilePosition(); + const FileInfo& data(ioBuf.getFileInfo()); + + // Create the header + Header header; + header._version = file.getCurrentVersion(); + header._metaDataListSize = data._metaDataListSize; + header._headerBlockSize = data._headerBlockSize; + header.updateChecksum(); + header._fileChecksum = file.getBucketInfo().getChecksum(); + writer.write(&header, sizeof(Header)); + for (uint32_t i=0, n=header._metaDataListSize; i<n; ++i) { + MetaSlot meta; + if (i < file.getSlotCount()) { + const MemSlot& slot(file[i]); + assert(i == 0 || (file[i].getTimestamp() + > file[i-1].getTimestamp())); + meta._timestamp = slot.getTimestamp(); + meta._gid = slot.getGlobalId(); + meta._flags = slot.getPersistedFlags(); + meta._headerPos = slot.getLocation(HEADER)._pos; + meta._headerSize = slot.getLocation(HEADER)._size; + meta._bodyPos = slot.getLocation(BODY)._pos; + meta._bodySize = slot.getLocation(BODY)._size; + meta.updateChecksum(); + } + writer.write(&meta, sizeof(MetaSlot)); + } + return (writer.getFilePosition() - lastPos); +} + +// TODO: make exception safe +MemFileV1Serializer::FlushResult +MemFileV1Serializer::flushUpdatesToFile(MemFile& file, Environment& env) +{ + framework::MilliSecTime startTime(env._clock.getTimeInMillis()); + MemFilePersistenceThreadMetrics& metrics(getMetrics()); + SerializationWriteMetrics& writeMetrics(metrics.serialization.partialWrite); + SimpleMemFileIOBuffer& ioBuf( + static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); + const FileInfo& data(ioBuf.getFileInfo()); + BucketId bid(file.getFile().getBucketId()); + + LOG(spam, + "Attempting partial write of file %s", + file.getFile().getPath().c_str()); + + if (file.getSlotCount() > data._metaDataListSize) { + LOG(debug, + "Cannot do partial write of file %s as its " + "in-memory slot count (%u) is greater than its " + "persisted metadata list size (%u)", + file.getFile().getPath().c_str(), + file.getSlotCount(), data._metaDataListSize); + return FlushResult::TooFewMetaEntries; + } + + // TODO: replace this with multimap to avoid vector allocations + // for every single unique location? Could potentially also use + // a Boost.Intrusive rbtree with a pool-based allocation scheme + // to avoid multiple allocations even for the nodes themselves. + typedef MemFile::LocationMap LocationMap; + LocationMap headersToWrite, bodiesToWrite; + LocationMap existingHeaders, existingBodies; + + file.getLocations(headersToWrite, bodiesToWrite, + NON_PERSISTED_LOCATIONS); + + // We don't need the slot list for this, just using it to find a + // gap in the file + file.getLocations(existingHeaders, existingBodies, + PERSISTED_LOCATIONS | NO_SLOT_LIST); + + // Figure out total size of unwritten data for each part and + // whether or not there exists a single continuous gap in the + // part's block in which we can fit the data. Also keep track + // of the total amount of data we actually use so we can check + // if file should be downsized afterwards. + uint32_t totalSpaceUsed[2] = { 0 }; + uint32_t maxUsedExtent[2] = { 0 }; + uint32_t bytesToWrite[2] = { 0 }; + + for (uint32_t partId = 0; partId < 2; ++partId) { + DocumentPart part(static_cast<DocumentPart>(partId)); + LocationMap& unwritten(part == HEADER ? headersToWrite : bodiesToWrite); + LocationMap& existing(part == HEADER ? existingHeaders : existingBodies); + + for (LocationMap::iterator it(unwritten.begin()), e(unwritten.end()); + it != e; ++it) + { + bytesToWrite[partId] += it->first._size; + } + alignUp(bytesToWrite[partId]); + for (LocationMap::iterator it(existing.begin()), e(existing.end()); + it != e; ++it) + { + totalSpaceUsed[partId] += it->first._size; + maxUsedExtent[partId] = std::max(maxUsedExtent[partId], + it->first._pos + it->first._size); + } + LOG(spam, "Max used %s extent before align: %u", + getDocumentPartName(part), + maxUsedExtent[partId]); + + assert(maxUsedExtent[partId] <= data.getBlockSize(part)); + alignUp(maxUsedExtent[partId]); + + if (maxUsedExtent[partId] > data.getBlockSize(part) + || (bytesToWrite[partId] + > (data.getBlockSize(part) - maxUsedExtent[partId]))) + { + LOG(debug, "Could not find sufficient free space in %s to " + "perform a partial write for %s. Only %u bytes available, " + "but need at least %u bytes; rewriting entire file.", + getDocumentPartName(part), + file.getFile().getPath().c_str(), + (data.getBlockSize(part) >= maxUsedExtent[partId] + ? data.getBlockSize(part) - maxUsedExtent[partId] + : 0), + bytesToWrite[partId]); + return FlushResult::TooSmall; + } + } + if (LOG_WOULD_LOG(debug)) { + for (int partId = 0; partId < 2; ++partId) { + DocumentPart part(static_cast<DocumentPart>(partId)); + LOG(debug, + "%s: block %s has totalSpaceUsed=%u, maxUsedExtent=%u " + "bytesToWrite=%u blockIndex=%u blockSize=%u", + bid.toString().c_str(), + getDocumentPartName(part), + totalSpaceUsed[part], + maxUsedExtent[part], + bytesToWrite[part], + data.getBlockIndex(part), + data.getBlockSize(part)); + } + } + // Verify not too much free space. Remember to include bytes to write + // currently, and count free space forced added for alignment and to + // overrepresent blocks as used. + // TODO: are the overrepresent factors correct wrt. new data added? + std::shared_ptr<const MemFilePersistenceConfig> memFileCfg; + { + auto guard = env.acquireConfigReadLock(); + memFileCfg = guard.memFilePersistenceConfig(); + } + { + uint32_t usedSpace = static_cast<uint32_t>( + sizeof(Header) + + sizeof(MetaSlot) * file.getSlotCount() + * memFileCfg->overrepresentMetaDataFactor + + totalSpaceUsed[HEADER] + * memFileCfg->overrepresentHeaderBlockFactor + + totalSpaceUsed[BODY] + + bytesToWrite[HEADER] + + bytesToWrite[BODY]); + alignUp(usedSpace, 0, memFileCfg->fileBlockSize); + alignUp(usedSpace, 0, memFileCfg->minimumFileSize); + if (double(usedSpace) / data.getFileSize() < memFileCfg->minFillRate) { + LOG(debug, "File %s only uses %u of %u bytes (%f %%), which is " + "less than min fill rate of %f %%. " + "Resizing file to become smaller.", + file.getFile().getPath().c_str(), + usedSpace, data.getFileSize(), + 100.0 * usedSpace / data.getFileSize(), + 100.0 * memFileCfg->minFillRate); + return FlushResult::TooLarge; + } + } + // At this point, we've checked if we can downsize the file with + // a no-go outcome. If there are no altered slots, we can safely + // do an early exit here to avoid rewriting metadata needlessly. + if (!file.slotsAltered()) { + LOG(spam, + "No slots in %s altered, returning without writing anything.", + bid.toString().c_str()); + assert(bytesToWrite[HEADER] == 0); + assert(bytesToWrite[BODY] == 0); + return FlushResult::UnAltered; + } + + // Persist dirty locations to disk, updating all slots as we go. + // NOTE: it is assumed that the buffered data blocks contain pre- + // serialized checksums, document ids etc as appropriate since + // we only write the raw data to disk. + Buffer buffer(1024 * 1024); + BufferedFileWriter writer(ioBuf.getFileHandle(), buffer, buffer.getSize()); + framework::MilliSecTime locationWriteTime(env._clock.getTimeInMillis()); + + for (uint32_t partId = 0; partId < 2; ++partId) { + DocumentPart part(static_cast<DocumentPart>(partId)); + LocationMap& locations(part == HEADER ? headersToWrite : bodiesToWrite); + + uint32_t realPos = data.getBlockIndex(part) + maxUsedExtent[partId]; + alignUp(realPos); + uint32_t pos = realPos - data.getBlockIndex(part); + + LOG(spam, + "%s: writing data for part %d, index %d, max " + "used extent %d, block size %d", + bid.toString().c_str(), + part, + data.getBlockIndex(part), + maxUsedExtent[partId], + data.getBlockSize(part)); + + writer.setFilePosition(realPos); + for (LocationMap::iterator it(locations.begin()), e(locations.end()); + it != e; ++it) + { + uint32_t size = it->first._size; + writer.write(ioBuf.getBuffer(it->first, part), size); + DataLocation newSlotLocation(pos, size); + ioBuf.persist(part, it->first, newSlotLocation); + + LOG(spam, + "%s: wrote location %d,%d to disk, resulting location was %d,%d", + bid.toString().c_str(), + it->first._pos, + it->first._size, + newSlotLocation._pos, + newSlotLocation._size); + + std::vector<const MemSlot*>& slots(it->second.slots); + for (uint32_t j = 0; j < slots.size(); ++j) { + LOG(spam, "%s: setting %s location for slot %s to %u,%u", + bid.toString().c_str(), + getDocumentPartName(part), + slots[j]->toString().c_str(), + newSlotLocation._pos, + newSlotLocation._size); + MapperSlotOperation::setLocation(*slots[j], part, newSlotLocation); + } + pos += size; + } + pos = writer.getFilePosition(); + alignUp(pos); + assert(part == BODY || pos <= data.getBlockIndex(BODY)); + writer.writeGarbage(pos - writer.getFilePosition()); + + framework::MilliSecTime timeNow(env._clock.getTimeInMillis()); + metrics::LongAverageMetric& latency(part == HEADER ? writeMetrics.headerLatency + : writeMetrics.bodyLatency); + metrics::LongAverageMetric& sz(part == HEADER ? writeMetrics.headerSize + : writeMetrics.bodySize); + latency.addValue((timeNow - locationWriteTime).getTime()); + sz.addValue(bytesToWrite[part]); + locationWriteTime = timeNow; + } + + // Write metadata back to file + writer.setFilePosition(0); + writeMetaData(writer, file); + writer.write(&data._firstHeaderBytes[0], data._firstHeaderBytes.size()); + writer.flush(); + MapperSlotOperation::clearFlag(file, SLOTS_ALTERED); + + framework::MilliSecTime finishTime(env._clock.getTimeInMillis()); + writeMetrics.metaLatency.addValue((finishTime - locationWriteTime).getTime()); + writeMetrics.metaSize.addValue(writer.getFilePosition()); + writeMetrics.totalLatency.addValue((finishTime - startTime).getTime()); + return FlushResult::ChangesWritten; +} + +namespace { + uint32_t + getMetaSlotCount(uint32_t usedSlotCount, + const FileSpecification& file, + const MemFilePersistenceConfig& cfg, + const Options& options) + { + uint32_t wanted = static_cast<uint32_t>( + usedSlotCount * options._growFactor + * options._overrepresentMetaDataFactor); + if (wanted < uint32_t(cfg.minimumFileMetaSlots)) { + wanted = cfg.minimumFileMetaSlots; + } + if (wanted > uint32_t(cfg.maximumFileMetaSlots)) { + if (uint32_t(cfg.maximumFileMetaSlots) >= usedSlotCount) { + wanted = cfg.maximumFileMetaSlots; + } else { + std::ostringstream ost; + ost << "Need " << usedSlotCount << " slots and want " + << wanted << " slots in file, but max slots is " + << cfg.maximumFileMetaSlots; + throw MemFileIoException( + ost.str(), file, MemFileIoException::FILE_FULL, + VESPA_STRLOC); + } + } + return wanted; + } + + uint32_t + getHeaderBlockSize(uint32_t minBytesNeeded, + uint32_t startBlockIndex, + const FileSpecification& file, + const MemFilePersistenceConfig& cfg, + const Options& options) + { + uint32_t wanted = static_cast<uint32_t>( + minBytesNeeded * options._growFactor + * options._overrepresentHeaderBlockFactor); + if (wanted < uint32_t(cfg.minimumFileHeaderBlockSize)) { + wanted = cfg.minimumFileHeaderBlockSize; + } + if (wanted > uint32_t(cfg.maximumFileHeaderBlockSize)) { + if (uint32_t(cfg.maximumFileHeaderBlockSize) + >= minBytesNeeded) + { + wanted = cfg.maximumFileHeaderBlockSize; + } else { + std::ostringstream ost; + ost << "Need " << minBytesNeeded << " header bytes and want " + << wanted << " header bytes in file, but max is " + << cfg.maximumFileHeaderBlockSize; + throw MemFileIoException( + ost.str(), file, MemFileIoException::FILE_FULL, + VESPA_STRLOC); + } + } + alignUp(wanted, startBlockIndex); + return wanted; + } + + uint32_t + getBodyBlockSize(uint32_t minBytesNeeded, + uint32_t startBlockIndex, + const FileSpecification& file, + const MemFilePersistenceConfig& cfg, + const Options& options) + { + assert(startBlockIndex % 512 == 0); + uint32_t wanted = static_cast<uint32_t>( + minBytesNeeded * options._growFactor); + if (wanted + startBlockIndex < uint32_t(cfg.minimumFileSize)) { + wanted = cfg.minimumFileSize - startBlockIndex; + } + if (wanted + startBlockIndex > uint32_t(cfg.maximumFileSize)) { + if (uint32_t(cfg.maximumFileSize) + >= minBytesNeeded + startBlockIndex) + { + wanted = cfg.maximumFileSize - startBlockIndex; + } else { + std::ostringstream ost; + ost << "Need " << minBytesNeeded << " body bytes and want " + << wanted << " body bytes in file, but max is " + << (cfg.maximumFileSize - startBlockIndex) + << " as the body block starts at index " << startBlockIndex; + throw MemFileIoException( + ost.str(), file, MemFileIoException::FILE_FULL, + VESPA_STRLOC); + } + } + alignUp(wanted, startBlockIndex, cfg.fileBlockSize); + return wanted; + } + + struct TempCache : public BufferedFileWriter::Cache { + uint32_t _headerBlockIndex; + std::vector<char> _buffer; + + TempCache(uint32_t headerBlockIndex) + : _headerBlockIndex(headerBlockIndex), + _buffer() + { + uint32_t firstAligned = _headerBlockIndex; + alignUp(firstAligned); + _buffer.resize(firstAligned - _headerBlockIndex); + } + + virtual uint32_t getCachedAmount() const + { return _buffer.size() + _headerBlockIndex; } + + virtual char* getCache(uint32_t pos) { + // We should never get requests to write prior to header block + // index. + assert(pos >= _headerBlockIndex); + return (&_buffer[0] + (pos - _headerBlockIndex)); + } + + virtual bool duplicateCacheWrite() const { return true; } + + virtual void setData(const char* data, size_t len, uint64_t pos) { + if (pos < _headerBlockIndex) { + if (len <= _headerBlockIndex - pos) return; + uint32_t diff = (_headerBlockIndex - pos); + len -= diff; + pos += diff; + data += diff; + } + Cache::setData(data, len, pos); + } + }; + +} + +// Iterate and write locations in timestamp order. Keep track of what +// locations have already been written and what their new location +// is in the rewritten file. Returns total number of bytes written +// for all unique locations. Modifies slot locations in-place in MemFile. +uint32_t +MemFileV1Serializer::writeAndUpdateLocations( + MemFile& file, + SimpleMemFileIOBuffer& ioBuf, + BufferedFileWriter& writer, + DocumentPart part, + const MemFile::LocationMap& locationsToWrite, + const Environment& env) +{ + framework::MilliSecTimer timer(env._clock); + BucketId bid(file.getFile().getBucketId()); + std::map<DataLocation, DataLocation> writtenLocations; + uint32_t index = 0; + for (uint32_t i = 0; i < file.getSlotCount(); ++i) { + const MemSlot& slot(file[i]); + + DataLocation originalLoc(slot.getLocation(part)); + if (originalLoc._size == 0) { + LOG(spam, "Slot %s has empty %s, not writing anything", + slot.toString().c_str(), + getDocumentPartName(part)); + assert(originalLoc._pos == 0); + continue; + } + + MemFile::LocationMap::const_iterator it( + locationsToWrite.find(originalLoc)); + assert(it != locationsToWrite.end()); + std::map<DataLocation, DataLocation>::iterator written( + writtenLocations.find(originalLoc)); + + DataLocation loc; + if (written == writtenLocations.end()) { + uint32_t size = it->first._size; + loc = DataLocation(index, size); + + LOG(spam, "%s: writing %s for slot %s to location (%u, %u)", + file.getFile().getBucketId().toString().c_str(), + getDocumentPartName(part), + slot.toString().c_str(), + index, size); + + writer.write(ioBuf.getBuffer(originalLoc, part), size); + index += size; + writtenLocations[originalLoc] = loc; + } else { + LOG(spam, "%s: %s already written for slot %s; " + "updating to location (%u, %u)", + file.getFile().getBucketId().toString().c_str(), + getDocumentPartName(part), + slot.toString().c_str(), + written->second._pos, written->second._size); + loc = written->second; + } + assert(loc.valid()); + MapperSlotOperation::setLocation(slot, part, loc); + } + // Move in cache. Cannot be done inside loop. + ioBuf.remapAndPersistAllLocations(part, writtenLocations); + + SerializationWriteMetrics& writeMetrics( + getMetrics().serialization.fullWrite); + metrics::LongAverageMetric& latency(part == HEADER ? writeMetrics.headerLatency + : writeMetrics.bodyLatency); + metrics::LongAverageMetric& sz(part == HEADER ? writeMetrics.headerSize + : writeMetrics.bodySize); + latency.addValue(timer); + sz.addValue(index); // Equal to written size. + + return index; +} + +void +MemFileV1Serializer::rewriteFile(MemFile& file, Environment& env) +{ + framework::MilliSecTime startTime(env._clock.getTimeInMillis()); + SerializationWriteMetrics& writeMetrics( + getMetrics().serialization.fullWrite); + file.ensureHeaderAndBodyBlocksCached(); + + SimpleMemFileIOBuffer& ioBuf( + static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); + + const FileSpecification& oldSpec(file.getFile()); + std::string newPath = oldSpec.getPath() + ".new"; + + LOG(debug, "Rewriting entire file %s", oldSpec.getPath().c_str()); + ioBuf.getFileHandle().close(); + vespalib::LazyFile::UP newFile = env.createFile(newPath); + newFile->open(ioBuf.getFileHandle().getFlags() + | vespalib::File::CREATE | vespalib::File::TRUNC, true); + MapperSlotOperation::setFlag(file, FILE_EXIST); + + FileInfo::UP data(new FileInfo); + Buffer buffer(32 * 1024 * 1024); + BufferedFileWriter writer(*newFile, buffer, buffer.getSize()); + + std::shared_ptr<const MemFilePersistenceConfig> memFileCfg; + std::shared_ptr<const Options> options; + { + auto guard = env.acquireConfigReadLock(); + memFileCfg = guard.memFilePersistenceConfig(); + options = guard.options(); + } + + // Create the header + Header header; + header._version = getFileVersion(); + header._metaDataListSize = getMetaSlotCount( + file.getSlotCount(), file.getFile(), *memFileCfg, *options); + data->_metaDataListSize = header._metaDataListSize; + header._fileChecksum = file.getBucketInfo().getChecksum(); + + // Dump header and metadata to writer, so we can start writing header + // and bodies. If buffer is too small causing this to be written, we + // need to write it again after updating it. + writer.write(&header, sizeof(Header)); + LOG(spam, "Writing garbage for %u meta entries", + header._metaDataListSize); + writer.writeGarbage(sizeof(MetaSlot) * header._metaDataListSize); + + TempCache tempCache(writer.getFilePosition()); + writer.setMemoryCache(&tempCache); + + typedef MemFile::LocationMap LocationMap; + LocationMap headersToWrite, bodiesToWrite; + // Don't need the slot list, we update that implicitly + file.getLocations(headersToWrite, bodiesToWrite, + PERSISTED_LOCATIONS + | NON_PERSISTED_LOCATIONS + | NO_SLOT_LIST); + + uint32_t headerIndex = writeAndUpdateLocations( + file, ioBuf, writer, HEADER, headersToWrite, env); + + header._headerBlockSize = getHeaderBlockSize( + headerIndex, + data->getHeaderBlockStartIndex(), + file.getFile(), + *memFileCfg, + *options); + header._checksum = header.calcHeaderChecksum(); + data->_headerBlockSize = header._headerBlockSize; + + if (headerIndex < header._headerBlockSize) { + LOG(spam, "Writing %u bytes of header garbage filler", + header._headerBlockSize - headerIndex); + writer.writeGarbage(header._headerBlockSize - headerIndex); + } + + uint32_t bodyIndex = writeAndUpdateLocations( + file, ioBuf, writer, BODY, bodiesToWrite, env); + + data->_bodyBlockSize = getBodyBlockSize( + bodyIndex, + data->getBodyBlockStartIndex(), + file.getFile(), + *memFileCfg, + *options); + if (bodyIndex < data->_bodyBlockSize) { + writer.writeGarbage(data->_bodyBlockSize - bodyIndex); + } + + framework::MilliSecTime timeBeforeMetaWrite(env._clock.getTimeInMillis()); + // Update meta entries + std::vector<MetaSlot> writeSlots(header._metaDataListSize); + + for (uint32_t i = 0; i < file.getSlotCount(); ++i) { + const MemSlot& slot(file[i]); + MetaSlot& meta(writeSlots[i]); + + DataLocation headerLoc = slot.getLocation(HEADER); + assert(headerLoc.valid()); + DataLocation bodyLoc = slot.getLocation(BODY); + assert(bodyLoc.valid()); + assert(i == 0 || (file[i].getTimestamp() > file[i - 1].getTimestamp())); + + meta._timestamp = slot.getTimestamp(); + meta._gid = slot.getGlobalId(); + meta._flags = slot.getPersistedFlags(); + meta._headerPos = headerLoc._pos; + meta._headerSize = headerLoc._size; + meta._bodyPos = bodyLoc._pos; + meta._bodySize = bodyLoc._size; + assert(meta.inUse()); + + meta.updateChecksum(); + MapperSlotOperation::setChecksum(slot, meta._checksum); + } + + if (writer.getWriteCount() != 0) { + // If we didn't have large enough buffer to hold entire file, reposition + // to start to write meta data after updates. + writer.setFilePosition(0); + writer.write(&header, sizeof(Header)); + writer.write(&writeSlots[0], writeSlots.size() * sizeof(MetaSlot)); + writer.write(&tempCache._buffer[0], tempCache._buffer.size()); + } else { + // Otherwise, just update the content in the write buffer. + memcpy(buffer, &header, sizeof(Header)); + memcpy(buffer + sizeof(Header), + &writeSlots[0], writeSlots.size() * sizeof(MetaSlot)); + } + + writer.flush(); + data->_firstHeaderBytes.swap(tempCache._buffer); + int64_t sizeDiff = 0; + if (file.getFormatSpecificData() != 0) { + sizeDiff = ioBuf.getFileInfo().getFileSize(); + } + sizeDiff = static_cast<int64_t>(data->getFileSize()) - sizeDiff; + + //file.setFormatSpecificData(MemFile::FormatSpecificData::UP(data.release())); + ioBuf.setFileInfo(std::move(data)); + file.setCurrentVersion(TRADITIONAL_SLOTFILE); + newFile->close(); + vespalib::rename(newPath, oldSpec.getPath()); + + ioBuf.getFileHandle().open( + ioBuf.getFileHandle().getFlags(), + true); + + // Update partitionmonitor with size usage. + PartitionMonitor* partitionMonitor( + file.getFile().getDirectory().getPartition().getMonitor()); + if (partitionMonitor == 0) { + // Only report if monitor exist. + } else if (sizeDiff > 0) { + partitionMonitor->addingData(static_cast<uint32_t>(sizeDiff)); + } else if (sizeDiff < 0) { + partitionMonitor->removingData(static_cast<uint32_t>(-1 * sizeDiff)); + } + MapperSlotOperation::clearFlag(file, SLOTS_ALTERED); + + framework::MilliSecTime timeAfterMetaWrite(env._clock.getTimeInMillis()); + writeMetrics.metaLatency.addValue((timeAfterMetaWrite - timeBeforeMetaWrite).getTime()); + writeMetrics.metaSize.addValue(sizeof(MetaSlot) * header._metaDataListSize); + writeMetrics.totalLatency.addValue((timeAfterMetaWrite - startTime).getTime()); +} + +bool +MemFileV1Serializer::verify(MemFile& file, Environment& env, + std::ostream& reportStream, + bool repairErrors, uint16_t fileVerifyFlags) +{ + MemFileV1Verifier verifier; + SerializationMetrics& metrics(getMetrics().serialization); + framework::MilliSecTimer timer(env._clock); + + bool ok(verifier.verify(file, env, reportStream, repairErrors, fileVerifyFlags)); + + metrics.verifyLatency.addValue(timer); + return ok; +} + +} +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h new file mode 100644 index 00000000000..bc1bdd902b9 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/mapper/bufferedfilewriter.h> +#include <vespa/memfilepersistence/mapper/versionserializer.h> +#include <vespa/memfilepersistence/mapper/fileinfo.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <vespa/memfilepersistence/common/environment.h> +#include <vespa/memfilepersistence/spi/threadmetricprovider.h> + +namespace storage { +namespace memfile { + +class MemFileV1Serializer : public VersionSerializer +{ + ThreadMetricProvider& _metricProvider; + MemFilePersistenceThreadMetrics& getMetrics() { + return _metricProvider.getMetrics(); + } +public: + typedef vespalib::LinkedPtr<MemFileV1Serializer> LP; + + MemFileV1Serializer(ThreadMetricProvider&); + + virtual FileVersion getFileVersion() { return TRADITIONAL_SLOTFILE; } + + virtual void loadFile(MemFile& file, Environment&, + Buffer& buffer, uint64_t bytesRead); + + void cacheLocationsForPart(SimpleMemFileIOBuffer& cache, + DocumentPart part, + uint32_t blockIndex, + const std::vector<DataLocation>& locationsToCache, + const std::vector<DataLocation>& locationsRead, + SimpleMemFileIOBuffer::BufferAllocation& buf); + + virtual void cacheLocations(MemFileIOInterface& cache, + Environment& env, + const Options& options, + DocumentPart part, + const std::vector<DataLocation>& locations); + + virtual FlushResult flushUpdatesToFile(MemFile&, Environment&); + + virtual void rewriteFile(MemFile&, Environment&); + + virtual bool verify(MemFile&, Environment&, + std::ostream& errorReport, bool repairErrors, + uint16_t fileVerifyFlags); + + uint64_t read(vespalib::LazyFile& file, + char* buf, + const std::vector<DataLocation>& readOps); + + void ensureFormatSpecificDataSet(const MemFile& file); + + uint32_t writeMetaData(BufferedFileWriter& writer, + const MemFile& file); + + uint32_t writeAndUpdateLocations( + MemFile& file, + SimpleMemFileIOBuffer& ioBuf, + BufferedFileWriter& writer, + DocumentPart part, + const MemFile::LocationMap& locationsToWrite, + const Environment& env); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp new file mode 100644 index 00000000000..223292ada2c --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp @@ -0,0 +1,698 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h> +#include <vespa/memfilepersistence/memfile/memfile.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <vespa/memfilepersistence/mapper/buffer.h> +#include <vespa/log/log.h> + +LOG_SETUP(".persistence.memfilev1.verifier"); + +namespace storage { + +namespace memfile { + +namespace { + +void alignUp(uint32_t& value, uint32_t offset = 0, uint32_t block = 512) { + uint32_t blocks = (value + offset + block - 1) / block; + value = blocks * block - offset; +} + +struct TimestampSlotOrder + : public std::binary_function<MetaSlot*, + MetaSlot*, bool> +{ + bool operator()(const MetaSlot* slot1, + const MetaSlot* slot2) const + { + return (slot1->_timestamp < slot2->_timestamp); + } +}; + +struct HeaderSlotOrder + : public std::binary_function<MetaSlot*, + MetaSlot*, bool> +{ + bool operator()(const MetaSlot* slot1, + const MetaSlot* slot2) const + { + if (slot1->_headerPos == slot2->_headerPos) { + return (slot1->_headerSize < slot2->_headerSize); + } + return (slot1->_headerPos < slot2->_headerPos); + } +}; + +struct BodySlotOrder + : public std::binary_function<MetaSlot*, + MetaSlot*, bool> +{ + bool operator()(const MetaSlot* slot1, + const MetaSlot* slot2) const + { + if (slot1->_bodyPos == slot2->_bodyPos) { + return (slot1->_bodySize < slot2->_bodySize); + } + return (slot1->_bodyPos < slot2->_bodyPos); + } +}; + +uint32_t calculateChecksum(const void* pos, uint32_t size) { + vespalib::crc_32_type calculator; + calculator.process_bytes(pos, size); + return calculator.checksum(); +} + +template<typename T> +bool verifyBodyBlock(const T& id, vespalib::asciistream & error, + const char* data, uint32_t size) +{ + uint32_t bodyLen = size - sizeof(uint32_t); + const char* bodyCrcPos = data + bodyLen; + const uint32_t bodyCrc = *reinterpret_cast<const uint32_t*>(bodyCrcPos); + uint32_t calculatedChecksum = calculateChecksum(data, bodyLen); + if (calculatedChecksum != bodyCrc) { + error << "Body checksum mismatch for " << id + << ": Stored checksum is 0x" << std::hex << bodyCrc + << " while calculated one is 0x" << calculatedChecksum << "."; + return false; + } + return true; +} + +template<typename T> +bool verifyHeaderBlock(const T& id, vespalib::asciistream & error, + const char* data, uint32_t size, + Types::DocumentId* documentId = 0) +{ + if (size <= 3 * sizeof(uint32_t)) { + error << "Error in header for " << id << ": " << size << " byte " + << "header is too small to contain required data."; + return false; + } + const char* nameCrcPos = data + size - sizeof(uint32_t); + const uint32_t nameCrc = *reinterpret_cast<const uint32_t*>(nameCrcPos); + const char* nameLenPos = nameCrcPos - sizeof(uint32_t); + const uint32_t nameLen = *reinterpret_cast<const uint32_t*>(nameLenPos); + if (size < 3 * sizeof(uint32_t) + nameLen) { + error << "Error in header for " << id << ": " << size << " byte " + << "header is not big enough to contain a document " + << "identifier " << nameLen << " bytes long."; + return false; + } + const char *namePos = nameLenPos - nameLen; + uint32_t calculatedNameCrc( + calculateChecksum(namePos, nameLen + sizeof(uint32_t))); + if (calculatedNameCrc != nameCrc) { + error << "Document identifier checksum mismatch for " << id + << ": Stored checksum is 0x" << std::hex << nameCrc + << " while calculated one is 0x" << calculatedNameCrc << "."; + return false; + } + const char* blobCrcPos = namePos - sizeof(uint32_t); + const uint32_t blobCrc = *reinterpret_cast<const uint32_t*>(blobCrcPos); + uint32_t blobLen = size - nameLen - 3 * sizeof(uint32_t); + uint32_t calculatedChecksum = calculateChecksum(data, blobLen); + if (calculatedChecksum != blobCrc) { + error << "Header checksum mismatch for " << id + << ": Stored checksum is 0x" << std::hex << blobCrc + << " while calculated one is 0x" << calculatedChecksum << "."; + return false; + } + if (documentId != 0) { + *documentId = Types::DocumentId(Types::String(namePos, nameLen)); + } + return true; +} + +} + +// Utility classes for simplifying creating report from verify +struct MemFileV1Verifier::ReportCreator { + bool _ok; + const MemFile& _file; + std::ostream& _report; + + ReportCreator(const MemFile& file, std::ostream& out) + : _ok(true), _file(file), _report(out) {} + + void addMessage(const std::string& msg) { + LOG(warning, "verify(%s): %s", + _file.getFile().getPath().c_str(), msg.c_str()); + _report << msg << "\n"; + _ok = false; + } +}; + +namespace { + struct ReportMessage { + MemFileV1Verifier::ReportCreator& _report; + mutable std::ostringstream _ost; + + ReportMessage(MemFileV1Verifier::ReportCreator& rc) + : _report(rc), _ost() {} + ~ReportMessage() { + _report.addMessage(_ost.str()); + } + // Copy constructor must exist for compiler not to complain + ReportMessage(const ReportMessage& o) : _report(o._report), _ost() {} + }; + + std::ostream& getReportStream(const ReportMessage& m) { return m._ost; } +} + +#define REPORT(report) getReportStream(ReportMessage(report)) + +bool +MemFileV1Verifier::verifyBlock(Types::DocumentPart part, + uint32_t id, + vespalib::asciistream & error, + const char* data, uint32_t size) +{ + return (part == Types::HEADER + ? verifyHeaderBlock(id, error, data, size) + : verifyBodyBlock(id, error, data, size)); +} + +const Header* +MemFileV1Verifier::verifyHeader(ReportCreator& report, + const Buffer& buffer, size_t fileSize) const +{ + const Header& header = *reinterpret_cast<const Header*>(buffer.getBuffer()); + if (header._checksum != header.calcHeaderChecksum()) { + REPORT(report) << "Header checksum mismatch. Was " << std::hex + << header.calcHeaderChecksum() << ", stored " + << header._checksum; + return 0; + } + FileInfo data(header, fileSize); + if (data.getBodyBlockStartIndex() > fileSize) { + REPORT(report) << "Header indicates file is bigger than it physically " + << "is. File size is " << fileSize << " bytes, but " + << "header reports that it contains " + << header._metaDataListSize + << " meta data entries and a headerblock of " + << header._headerBlockSize << " bytes, thus the minimum " + << "file size is " + << (header._metaDataListSize * sizeof(MetaSlot) + + sizeof(Header) + header._headerBlockSize); + return 0; + } + return &header; +} + +bool +MemFileV1Verifier::verifyDocumentBody( + ReportCreator& report, const MetaSlot& slot, const Buffer& buffer, + uint32_t blockIndex, uint32_t blockSize) const +{ + if (slot._bodySize == 0) return true; + if (slot._bodyPos > blockSize || + slot._bodyPos + slot._bodySize > blockSize || + slot._bodyPos + slot._bodySize < slot._bodyPos) + { + REPORT(report) << slot << " has body size/pos not contained within " + << "body block of size " << blockSize << "."; + return false; + } + if (slot._bodySize <= sizeof(uint32_t)) { + REPORT(report) << slot << " body is not big enough to possibly " + << "contain a body."; + return false; + } + vespalib::asciistream error; + if (!verifyBodyBlock(slot, error, + buffer.getBuffer() + blockIndex + slot._bodyPos, + slot._bodySize)) + { + REPORT(report) << error.str(); + return false; + } + return true; +} + +void +MemFileV1Verifier::verifyMetaDataBlock( + ReportCreator& report, const Buffer& buffer, + const Header& header, const BucketInfo& info, + std::vector<const MetaSlot*>& slots) const +{ + assert(slots.size() == 0); + slots.reserve(header._metaDataListSize); + Timestamp lastTimestamp(0); + bool foundNotInUse = false; + bool foundUsedAfterUnused = false; + bool wrongOrder = false; + for (uint32_t i=0, n=header._metaDataListSize; i<n; ++i) { + const MetaSlot& slot(*reinterpret_cast<const MetaSlot*>( + buffer.getBuffer() + sizeof(Header) + i * sizeof(MetaSlot))); + if (slot._checksum != slot.calcSlotChecksum()) { + REPORT(report) << "Slot " << i << " at timestamp " + << slot._timestamp << " failed checksum " + << "verification. Was " << std::hex + << slot.calcSlotChecksum() + << ", stored " << slot._checksum; + continue; + } + if (!slot.inUse()) { + foundNotInUse = true; + continue; + } + if (foundNotInUse) { + if (!foundUsedAfterUnused) { + REPORT(report) << "Slot " << i << " found after unused entries"; + } + foundUsedAfterUnused = true; + } + // Handle timestamp collisions later + if (slot._timestamp < lastTimestamp) { + wrongOrder = true; + REPORT(report) << "Slot " << i << " is out of timestamp order. (" + << slot._timestamp << " <= " << lastTimestamp + << ")"; + } + slots.push_back(&slot); + lastTimestamp = slot._timestamp; + } + if (info.getChecksum() != header._fileChecksum) { + REPORT(report) << "File checksum should have been 0x" << std::hex + << info.getChecksum() << " according to metadata found, but is set " + << "to 0x" << header._fileChecksum << "."; + } + if (wrongOrder) { + std::sort(slots.begin(), slots.end(), TimestampSlotOrder()); + } +} + +void +MemFileV1Verifier::verifyInBounds( + ReportCreator& report, const Header& header, bool doHeader, + const FileInfo& data, std::vector<const MetaSlot*>& slots) const +{ + // Gather all information different for header and body parts, + // to avoid differences further down. + uint32_t blockSize = (doHeader ? header._headerBlockSize + : data._bodyBlockSize); + uint32_t minSize = (doHeader ? 3*sizeof(uint32_t) : 0); + std::string part(doHeader ? "Header" : "Body"); + std::vector<const MetaSlot*> okSlots; + okSlots.reserve(slots.size()); + // Go through all slots ordered, and remove illegal ones. + for (uint32_t i=0, n=slots.size(); i<n; ++i) { + uint32_t pos(doHeader ? slots[i]->_headerPos : slots[i]->_bodyPos); + uint32_t size(doHeader ? slots[i]->_headerSize : slots[i]->_bodySize); + if (size < minSize) { + REPORT(report) << part << " of slot (" << *slots[i] << ") " + << "is too small to be valid"; + } else if (size != 0 && + (pos >= blockSize || pos + size > blockSize || + pos + size < pos)) // 3 checks as + can overflow + { + REPORT(report) << part << " of slot (" << *slots[i] << ") goes out " + << "of bounds. (Blocksize " << blockSize << ")"; + } else if (size == 0 && pos != 0) { + REPORT(report) << part << " of slot (" << *slots[i] << ") " + << "has size 0 but is not positioned at pos 0 " + << "as zero sized blocks should be"; + } else { + okSlots.push_back(slots[i]); + } + } + okSlots.swap(slots); +} + +void +MemFileV1Verifier::verifyDataBlock( + ReportCreator& report, Environment& env, const Buffer& buffer, + const FileInfo& data, const BucketId& bucket, + std::vector<const MetaSlot*>& slots, bool doHeader) const +{ + std::vector<const MetaSlot*> okSlots; + okSlots.reserve(slots.size()); + for (uint32_t i=0, n=slots.size(); i<n; ++i) { + if (!doHeader && slots[i]->_bodySize == 0) { + okSlots.push_back(slots[i]); + continue; + } + if (doHeader) { + DocumentId id; + if (!verifyDocumentHeader(report, *slots[i], buffer, id, + data.getHeaderBlockStartIndex(), + data._headerBlockSize)) + { + continue; + } + BucketId foundBucket(env._bucketFactory.getBucketId(id)); + foundBucket.setUsedBits(bucket.getUsedBits()); + foundBucket = foundBucket.stripUnused(); + if (id.getGlobalId() != slots[i]->_gid) { + REPORT(report) << *slots[i] + << " has gid " << slots[i]->_gid + << " but its header block contains document id " + << id << " with " << id.getGlobalId(); + } + else if (bucket == foundBucket) { + okSlots.push_back(slots[i]); + } else { + REPORT(report) << "Slot " << *slots[i] + << " belongs to bucket " << foundBucket + << " not in bucket " << bucket; + } + } else { + if (!verifyDocumentBody(report, *slots[i], buffer, + data.getBodyBlockStartIndex(), + data._bodyBlockSize)) + { + continue; + } + okSlots.push_back(slots[i]); + } + } + slots.swap(okSlots); +} + +bool +MemFileV1Verifier::verifyDocumentHeader( + ReportCreator& report, const MetaSlot& slot, const Buffer& buffer, + DocumentId& did, uint32_t blockIndex, uint32_t blockSize) const +{ + if (slot._headerPos > blockSize || + slot._headerPos + slot._headerSize > blockSize || + slot._headerPos + slot._headerSize < slot._headerPos) + { + REPORT(report) << slot << " has header size/pos not contained within " + << "header block of size " << blockSize << "."; + return false; + } + vespalib::asciistream error; + if (!verifyHeaderBlock(slot, error, + buffer.getBuffer() + blockIndex + slot._headerPos, + slot._headerSize, &did)) + { + REPORT(report) << error.str(); + return false; + } + return true; +} + +namespace { +// Helper function for verifyNonOverlap + void verifySlotsAtSamePosition( + MemFileV1Verifier::ReportCreator& report, + bool header, + std::vector<const MetaSlot*>& slots, + vespalib::hash_set<const MetaSlot*, + vespalib::hash<void *> >& faultySlots) + { + const Types::GlobalId& gid(slots[0]->_gid); + for (uint32_t i=1; i<slots.size(); ++i) { + if (slots[i]->_gid != gid) { + REPORT(report) << "Multiple slots with different gids use same " + << (header ? "header" : "body") + << " position. For instance slot " + << *slots[0] << " and " << *slots[i] + << ". Repairing will delete all " << slots.size() + << " slots using this position, as we don't " + << "know who is correct."; + for (uint32_t j=0; j<slots.size(); ++j) { + faultySlots.insert(slots[j]); + } + break; + } + } + } +} + +void +MemFileV1Verifier::verifyNonOverlap( + ReportCreator& report, bool doHeader, + std::vector<const MetaSlot*>& slots) const +{ + // Gather all information different for header and body parts, + // to avoid differences further down. + std::string part(doHeader ? "Header" : "Body"); + std::vector<const MetaSlot*> order(slots); + // Using stable sort to sort slots, makes slots in same position + // keep timestamp order. (Thus we can use that if we want to remove + // oldest or newest illegally at same timestamp) + if (doHeader) { + std::stable_sort(order.begin(), order.end(), HeaderSlotOrder()); + } else { + std::stable_sort(order.begin(), order.end(), BodySlotOrder()); + } + // Temporary store slots that need to be removed + vespalib::hash_set<const MetaSlot*, vespalib::hash<void *> > failedSlots; + // Slots that points to the same area within a block. + std::vector<const MetaSlot*> local; + uint32_t lastPos = 0, lastSize = 0; + // Go through all slots ordered, and remove illegal ones. + for (uint32_t i=0, n=order.size(); i<n; ++i) { + uint32_t pos(doHeader ? order[i]->_headerPos : order[i]->_bodyPos); + uint32_t size(doHeader ? order[i]->_headerSize : order[i]->_bodySize); + if (size == 0) { + // Ignore zero sized entries + } else if (pos == lastPos && size == lastSize) { + local.push_back(order[i]); + } else if (pos < lastPos + lastSize) { + std::ostringstream ost; + if (!local.empty()) { + for (uint32_t j=0; j<local.size(); ++j) { + failedSlots.insert(local[j]); + if (j != 0) ost << ", "; + ost << *local[j]; + } + } + failedSlots.insert(order[i]); + if (local.empty()) { + REPORT(report) << part << " of slot(" << *order[i] << ") " + << "overlaps with previously removed slots."; + } else { + REPORT(report) << part << " of slot (" << *order[i] << ") " + << "overlaps with " + << (local.size() == 1 ? "slot" + : "the following slots") + << " " << ost.str() << "."; + } + local.clear(); + lastPos = pos; + lastSize = size; + } else { + if (local.size() > 1) { + verifySlotsAtSamePosition(report, doHeader, local, failedSlots); + } + local.clear(); + local.push_back(order[i]); + lastPos = pos; + lastSize = size; + } + } + if (local.size() > 1) { + verifySlotsAtSamePosition(report, doHeader, local, failedSlots); + } + if (failedSlots.size() == 0) return; + std::vector<const MetaSlot*> okSlots; + okSlots.reserve(slots.size() - failedSlots.size()); + for (uint32_t i=0, n=slots.size(); i<n; ++i) { + if (failedSlots.find(slots[i]) == failedSlots.end()) { + okSlots.push_back(slots[i]); + } + } + okSlots.swap(slots); +} + + + +bool +MemFileV1Verifier::verify(MemFile& file, Environment& env, + std::ostream& reportStream, + bool repairErrors, uint16_t fileVerifyFlags) +{ + bool verifyHeaderData = ((fileVerifyFlags & DONT_VERIFY_HEADER) == 0); + bool verifyBodyData = ((fileVerifyFlags & DONT_VERIFY_BODY) == 0); + + LOG(debug, "verify(%s%s%s%s)", + file.getFile().toString().c_str(), + repairErrors ? ", repairing errors" : "", + verifyHeaderData ? ", verifying header block" : "", + verifyBodyData ? ", verifying body block" : ""); + + SimpleMemFileIOBuffer& ioBuf( + static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); + + framework::MilliSecTimer startTimer(env._clock); + ReportCreator report(file, reportStream); + file.verifyConsistent(); + if (!file.fileExists()) return report._ok; + + // First read at least the header from disk + size_t fileSize = ioBuf.getFileHandle().getFileSize(); + if (fileSize < sizeof(Header)) { + REPORT(report) << "File was only " << fileSize + << " B long and cannot be valid. Delete file to repair."; + if (repairErrors) { + env._memFileMapper.deleteFile(file, env); + } + return report._ok; + } + const size_t initialIndexRead( + env.acquireConfigReadLock().options()->_initialIndexRead); + Buffer buffer(std::min(fileSize, initialIndexRead)); + size_t readBytes = ioBuf.getFileHandle().read(buffer, buffer.getSize(), 0); + + // Exception should have been thrown by read if mismatch here. + assert(readBytes == buffer.getSize()); + + // Ensure slotfile header is ok. If not just delete whole file. + const Header* header = verifyHeader(report, buffer, fileSize); + if (header == 0) { + if (repairErrors) { + env._memFileMapper.deleteFile(file, env); + } + return report._ok; + } + + FileInfo data(*header, fileSize); + + // Read remaining data needed in check, if any + size_t lastNeededByte = sizeof(Header) + + sizeof(MetaSlot) * header->_metaDataListSize; + if (verifyBodyData) { + lastNeededByte = fileSize; + } else if (verifyHeaderData) { + lastNeededByte += header->_headerBlockSize; + } + if (buffer.getSize() < lastNeededByte) { + buffer.resize(lastNeededByte); + header = reinterpret_cast<const Header*>(buffer.getBuffer()); + } + if (lastNeededByte > readBytes) { + readBytes += ioBuf.getFileHandle().read( + buffer + readBytes, buffer.getSize() - readBytes, readBytes); + } + + // Exception should have been thrown by read if mismatch here. + assert(readBytes == buffer.getSize()); + + // Build list of slots. Do simple checking. + std::vector<const MetaSlot*> slots; + verifyMetaDataBlock(report, buffer, *header, file.getBucketInfo(), slots); + verifyInBounds(report, *header, true, data, slots); + verifyInBounds(report, *header, false, data, slots); + + // Check header and body blocks if wanted + if (verifyHeaderData) { + verifyDataBlock(report, env, buffer, data, file.getFile().getBucketId(), + slots, true); + } + if (verifyBodyData) { + verifyDataBlock(report, env, buffer, data, file.getFile().getBucketId(), + slots, false); + } + // Check for overlapping slots last, in case only one of the slots + // actually overlapped pointed to a legal document, we may have + // already removed the problem. + verifyNonOverlap(report, true, slots); + verifyNonOverlap(report, false, slots); + verifyUniqueTimestamps(report, slots); + // If the slotlist is altered from what we read from disk, we need + // to write it back if we're gonna repair the errors. + if (!report._ok && repairErrors) { + // Remove bad entries from the memfile instance + // Entries that are cached in full may be removed from file and just + // tagged not in file anymore in cache. + std::vector<Timestamp> keep; + for (uint32_t i=0; i<slots.size(); ++i) { + keep.push_back(slots[i]->_timestamp); + } + env._memFileMapper.removeAllSlotsExcept( + const_cast<MemFile&>(file), keep); + + // Edit header and metadata part of buffer to only keep wanted data + // Since both source and target is the same buffer, create new meta + // data in new buffer and memcpy back afterwards + Buffer metaData(header->_metaDataListSize * sizeof(MetaSlot)); + BucketInfo info(file.getBucketInfo()); + const_cast<Header*>(header)->_fileChecksum = info.getChecksum(); + for (uint32_t i=0; i<header->_metaDataListSize; ++i) { + MetaSlot* slot(reinterpret_cast<MetaSlot*>( + metaData.getBuffer() + i * sizeof(MetaSlot))); + if (i >= slots.size()) { + *slot = MetaSlot(); + } else if (slot != slots[i]) { + *slot = *slots[i]; + } + } + memcpy(buffer.getBuffer() + sizeof(Header), metaData.getBuffer(), + metaData.getSize()); + // Then rewrite metadata section to disk leaving out bad entries + uint32_t dataToWrite(sizeof(Header) + + sizeof(MetaSlot) * header->_metaDataListSize); + alignUp(dataToWrite); + ioBuf.getFileHandle().write(buffer, dataToWrite, 0); + + // Tag memfile up to date + uint32_t memFileFlags = FILE_EXIST + | HEADER_BLOCK_READ + | BODY_BLOCK_READ; + for (MemFile::const_iterator it = file.begin(ITERATE_REMOVED); + it != file.end(); ++it) + { + if (!ioBuf.isCached(it->getLocation(BODY), BODY)) { + memFileFlags &= ~BODY_BLOCK_READ; + } + if (!ioBuf.isCached(it->getLocation(HEADER), HEADER)) { + memFileFlags &= ~HEADER_BLOCK_READ; + } + + if (!ioBuf.isPersisted(it->getLocation(BODY), BODY) + || !ioBuf.isPersisted(it->getLocation(HEADER), HEADER)) + { + memFileFlags |= SLOTS_ALTERED; + } + + if (it->alteredInMemory()) { + memFileFlags |= SLOTS_ALTERED; + } + } + assert(file.fileExists()); + const_cast<MemFile&>(file).clearFlag(LEGAL_MEMFILE_FLAGS); + const_cast<MemFile&>(file).setFlag(memFileFlags); + LOG(warning, "verify(%s): Errors repaired", file.toString().c_str()); + } else if (report._ok) { + LOG(debug, "verify(%s): Ok", file.toString().c_str()); + } else { + LOG(debug, "verify(%s): Not repairing errors", file.toString().c_str()); + } + +// env._metrics.slotfileMetrics._verifyLatencyTotal.addValue(startTimer); + return report._ok; +} + +void +MemFileV1Verifier::verifyUniqueTimestamps( + ReportCreator& report, std::vector<const MetaSlot*>& slots) const +{ + std::vector<const MetaSlot*> okSlots; + okSlots.reserve(slots.size()); + // Slots should already be in order as verifyMetaDataBlock has run + Timestamp last(0); + for (uint32_t i=0, n=slots.size(); i<n; ++i) { + if (slots[i]->_timestamp == last && i != 0) { + REPORT(report) << "Slot " << i << " (" << *slots[i] + << ") has same timestamp as slot " << (i-1) + << " (" << *slots[i-1] << ")."; + } else { + okSlots.push_back(slots[i]); + last = slots[i]->_timestamp; + } + } + okSlots.swap(slots); +} + + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h new file mode 100644 index 00000000000..27d663a82b9 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/mapper/fileinfo.h> +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/common/environment.h> + +namespace storage { + + +namespace memfile { + +class MemFile; +class Environment; +class Buffer; + +class MemFileV1Verifier : public Types +{ +public: + bool verify(MemFile&, + Environment&, + std::ostream& errorReport, + bool repairErrors, + uint16_t fileVerifyFlags); + + bool verifyBlock(Types::DocumentPart part, + uint32_t id, + vespalib::asciistream & error, + const char* data, + uint32_t size); + + + class ReportCreator; + +private: + const Header* verifyHeader(ReportCreator& report, + const Buffer& buffer, + size_t fileSize) const; + + void verifyMetaDataBlock(ReportCreator& report, + const Buffer& buffer, + const Header& header, + const BucketInfo& info, + std::vector<const MetaSlot*>& slots) const; + + void verifyInBounds(ReportCreator& report, + const Header& header, + bool doHeader, + const FileInfo& data, + std::vector<const MetaSlot*>& slots) const; + + void verifyDataBlock(ReportCreator& report, + Environment& env, + const Buffer& buffer, + const FileInfo& data, + const BucketId& bucket, + std::vector<const MetaSlot*>& slots, + bool doHeader) const; + + void verifyNonOverlap(ReportCreator& report, + bool doHeader, + std::vector<const MetaSlot*>& slots) const; + + bool verifyDocumentHeader(ReportCreator& report, + const MetaSlot& slot, + const Buffer& buffer, + DocumentId& did, + uint32_t blockIndex, + uint32_t blockSize) const; + + bool verifyDocumentBody(ReportCreator& report, + const MetaSlot& slot, + const Buffer& buffer, + uint32_t blockIndex, + uint32_t blockSize) const; + + void verifyUniqueTimestamps(ReportCreator& report, + std::vector<const MetaSlot*>& slots) const; +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp new file mode 100644 index 00000000000..9d9360a3d3a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp @@ -0,0 +1,340 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> + +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/common/exceptions.h> +#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> +#include <vespa/vdslib/distribution/distribution.h> + +LOG_SETUP(".persistence.memfile.mapper"); + +namespace storage { +namespace memfile { + +// Repair defined in macro, such that log entries will be unique for the various +// instances calling it (different file line numbers) +#define VESPA_REPAIR_MEMFILE(file) \ +{ \ + std::ostringstream memFileErrors; \ + bool memFileRepairResult = repair(file, env, memFileErrors); \ + if (!memFileRepairResult) { \ + LOG(warning, "Repaired %s: %s", \ + file.toString().c_str(), memFileErrors.str().c_str()); \ + sendNotifyBucketCommand(file, env); \ + } else { \ + LOGBP(warning, "Repair for %s triggered but found nothing to repair.", \ + file.toString().c_str()); \ + } \ +} + +// To avoid duplicating code, this macro is used when autoRepair is on, and +// call itself with autorepair off, handling the autorepair. +#define VESPA_HANDLE_AUTOREPAIR(file, func) { \ + try{ \ + func; \ + } catch (CorruptMemFileException& e) { \ + LOGBP(warning, "Corrupt file %s: %s", \ + file.toString().c_str(), e.what()); \ + VESPA_REPAIR_MEMFILE(file); \ + func; \ + } \ + return; \ +} + +void +MemFileMapper::sendNotifyBucketCommand(const MemFile&, + Environment&) +{ +/* TODO: Move to service layer. + BucketInfo info(file.getBucketInfo()); + // Send notify bucket change command to update distributor + api::NotifyBucketChangeCommand::SP msg( + new api::NotifyBucketChangeCommand(file.getFile().getBucketId(), + info)); + uint16_t distributor( + env._storageServer.getDistribution()->getIdealDistributorNode( + *env._storageServer.getStateUpdater().getSystemState(), + file.getFile().getBucketId())); + msg->setAddress(api::StorageMessageAddress( + env._storageServer.getClusterName(), + lib::NodeType::DISTRIBUTOR, + distributor)); + msg->setSourceIndex(env._nodeIndex); + env._fileStorHandler.sendCommand(msg); +*/ +} + +void +MemFileMapper::addVersionSerializer(VersionSerializer::LP serializer) +{ + FileVersion version = serializer->getFileVersion(); + if (_serializers.find(version) != _serializers.end()) { + std::ostringstream error; + error << "A serializer for version " << version + << " is already registered."; + throw vespalib::IllegalStateException(error.str(), VESPA_STRLOC); + } + _serializers[version] = serializer; +} + +VersionSerializer& +MemFileMapper::getVersionSerializer(const MemFile& file) +{ + std::map<FileVersion, VersionSerializer::LP>::iterator it( + _serializers.find(file.getCurrentVersion())); + if (it == _serializers.end()) { + std::ostringstream ost; + ost << "Unknown serialization version " + << getFileVersionName(file.getCurrentVersion()) + << " (" << file.getCurrentVersion() << ")\n"; + throw CorruptMemFileException(ost.str(), file.getFile(), VESPA_STRLOC); + } + return *it->second; +} + +MemFileMapper::MemFileMapper(ThreadMetricProvider& metricProvider) + : _metricProvider(metricProvider) +{ + addVersionSerializer(VersionSerializer::LP(new MemFileV1Serializer(metricProvider))); +} + +void +MemFileMapper::setDefaultMemFileIO(MemFile& file, + vespalib::LazyFile::UP lf, + const Environment& env) +{ + std::map<FileVersion, VersionSerializer::LP>::iterator serializer( + _serializers.find(file.getFile().getWantedFileVersion())); + assert(serializer != _serializers.end()); + + file.setMemFileIO( + std::unique_ptr<MemFileIOInterface>( + new SimpleMemFileIOBuffer( + *serializer->second, + std::move(lf), + FileInfo::UP(new FileInfo()), + file.getFile(), + env))); +} + +void +MemFileMapper::loadFileImpl(MemFile& file, Environment& env) +{ + framework::MilliSecTimer timer(env._clock); + + if (file.getSlotCount() != 0 || file.getCurrentVersion() != UNKNOWN) { + throw InvalidStateException("File is already loaded", file.getFile(), + VESPA_STRLOC); + } + + vespalib::LazyFile::UP f = env.createFile(file.getFile().getPath()); + vespalib::LazyFile* lf = f.get(); + + setDefaultMemFileIO(file, std::move(f), env); + + // Early exit for file not found to avoid having to use + // exception for common control path + if (!vespalib::fileExists(file.getFile().getPath())) { + LOG(debug, "Cannot load file '%s' as it does not exist", + file.getFile().getPath().c_str()); + file.setFlag(HEADER_BLOCK_READ | BODY_BLOCK_READ); + return; + } + file.setFlag(FILE_EXIST); + + Buffer buffer(env.acquireConfigReadLock().options()->_initialIndexRead); + off_t readBytes = lf->read(buffer, buffer.getSize(), 0); + + if (readBytes < 4) { + std::ostringstream err; + err << "Only " << readBytes << " bytes read from file. Not enough to " + << "get a file version."; + throw CorruptMemFileException(err.str(), file.getFile(), VESPA_STRLOC); + } + SerializationMetrics& metrics(getMetrics().serialization); + metrics.initialMetaReadLatency.addValue(timer); + + file.setFlag(BUCKET_INFO_OUTDATED); + + FileVersion version = static_cast<FileVersion>( + *reinterpret_cast<uint32_t*>(buffer.getBuffer())); + std::map<FileVersion, VersionSerializer::LP>::iterator serializer( + _serializers.find(version)); + file.setCurrentVersion(version); + if (serializer == _serializers.end()) { + std::ostringstream err; + err << "Unknown file version " << std::hex << version; + throw CorruptMemFileException(err.str(), file.getFile(), VESPA_STRLOC); + } + serializer->second->loadFile(file, env, buffer, readBytes); + + metrics.totalLoadFileLatency.addValue(timer); +} + +void +MemFileMapper::loadFile(MemFile& file, Environment& env, bool autoRepair) +{ + try { + loadFileImpl(file, env); + } catch (CorruptMemFileException& e) { + LOGBP(warning, "Corrupt file %s: %s", + file.toString().c_str(), e.what()); + if (autoRepair) { + VESPA_REPAIR_MEMFILE(file); + // Must reset version info, slots etc to avoid getting errors + // that file is already loaded. + file.resetMetaState(); + loadFileImpl(file, env); + } + // Add bucket to set of modified buckets so service layer can request + // new bucket info. + env.addModifiedBucket(file.getFile().getBucketId()); + } +} + +void +MemFileMapper::flush(MemFile& f, Environment& env, bool autoRepair) +{ + (void) autoRepair; + if (f.fileExists()) { + VersionSerializer& serializer(getVersionSerializer(f)); + typedef VersionSerializer::FlushResult FlushResult; + FlushResult result = serializer.flushUpdatesToFile(f, env); + if (result == FlushResult::TooSmall) { + f.compact(); + result = serializer.flushUpdatesToFile(f, env); + } + if (result == FlushResult::ChangesWritten + || result == FlushResult::UnAltered) + { + return; + } + MemFilePersistenceThreadMetrics& metrics(_metricProvider.getMetrics()); + switch (result) { + case FlushResult::TooFewMetaEntries: + metrics.serialization.fullRewritesDueToTooSmallFile.inc(); + break; + case FlushResult::TooSmall: + metrics.serialization.fullRewritesDueToTooSmallFile.inc(); + break; + case FlushResult::TooLarge: + metrics.serialization.fullRewritesDueToDownsizingFile.inc(); + break; + default: + break; + } + } else { + // If a file does not yet exist, its content by definition exists + // entirely in memory. Consequently it costs next to nothing to run + // compaction since there is no need to read any meta/header blocks + // from disk. However, the gains from compacting may be significant if + // the bucket e.g. contains many versions of the same document. + f.compact(); + } + + // If we get here we failed to write updates only and will rewrite + std::map<FileVersion, VersionSerializer::LP>::iterator serializer( + _serializers.find(f.getFile().getWantedFileVersion())); + assert(serializer != _serializers.end()); + + serializer->second->rewriteFile(f, env); +} + +bool +MemFileMapper::verify(MemFile& file, Environment& env, + std::ostream& errorReport, bool repairErrors, + uint16_t fileVerifyFlags) +{ + if (file.fileExists()) { + std::map<FileVersion, VersionSerializer::LP>::iterator serializer( + _serializers.find(file.getCurrentVersion())); + if (serializer != _serializers.end()) { + bool wasOk = serializer->second->verify( + file, env, errorReport, repairErrors, fileVerifyFlags); + if (!wasOk) sendNotifyBucketCommand(file, env); + return wasOk; + } + // If we get here, version is corrupted. Delete file if repairing. + errorReport << "Header read from " << file.getFile().getPath() + << " is of wrong version " + << getFileVersionName(file.getCurrentVersion()) + << "(0x" << std::hex << file.getCurrentVersion() << std::dec + << "). Corrupt file or unsupported format."; + if (repairErrors) { + deleteFile(file, env); + } + sendNotifyBucketCommand(file, env); + return false; + } + return true; +} + +void +MemFileMapper::deleteFile(const MemFile& constFile, Environment& env) +{ + MemFile& file(const_cast<MemFile&>(constFile)); + framework::MilliSecTimer timer(env._clock); + std::vector<Timestamp> keep; + file.clearFlag(FILE_EXIST); + file.setCurrentVersion(UNKNOWN); + + SimpleMemFileIOBuffer& ioBuf( + static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); + + uint32_t fileSize = ioBuf.getFileHandle().getFileSize(); + ioBuf.getFileHandle().unlink(); + + // Indicate we get free space to partition monitor + PartitionMonitor& partitionMonitor( + *constFile.getFile().getDirectory().getPartition().getMonitor()); + partitionMonitor.removingData(fileSize); + getMetrics().serialization.deleteFileLatency.addValue(timer); +} + +void +MemFileMapper::removeAllSlotsExcept(MemFile& file, std::vector<Timestamp>& keep) +{ + std::vector<const MemSlot*> slotsToRemove; + MemFile::const_iterator orgIt(file.begin(ITERATE_REMOVED)); + std::vector<Timestamp>::reverse_iterator keepIt(keep.rbegin()); + + // Linear merge of vectors to extract inverse set of `keep`; these will + // be the slots we should remove. The output of this is pretty much what + // std::set_symmetric_difference would've given us, but can't use that + // algorithm directly due to our non-implicitly convertible mixing of + // iterator value types. + // Note that iterator ranges are sorted in _descending_ order. + while (orgIt != file.end()) { + if (keepIt == keep.rend() || orgIt->getTimestamp() > *keepIt) { + slotsToRemove.push_back(&*orgIt); + ++orgIt; + } else if (orgIt->getTimestamp() == *keepIt) { + ++orgIt; + ++keepIt; + } else { + // The case where the verifier knows of a slot that the MemFile + // does not _may_ happen in the case of corruptions causing apparent + // timestamp collisions. In this case, sending in timestamps to + // keep could lead to ambiguities, but in general we can assume that + // one of the slots will be removed before this due to a mismatching + // checksum. + LOG(warning, + "Verifier code requested to keep slot at time %zu in " + "file %s, but that slot does not exist in the internal state. " + "Assuming this is due to corruption which will be fixed " + "automatically.", + keepIt->getTime(), + file.getFile().getPath().c_str()); + ++keepIt; + } + } + std::reverse(slotsToRemove.begin(), slotsToRemove.end()); + file.removeSlots(slotsToRemove); +} + +} // storage +} // memfile diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h new file mode 100644 index 00000000000..30f483fc582 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::MemFileMapper + * \ingroup memfile + * + * \brief Maps memory representation of files to and from physical files. + * + * The mapper can map to and from all file formats supported. It keeps track + * of all possible formats and call the implementation of these as needed. This + * global class is needed such that files can seemlessly change file format when + * one wants to start using another than one used before. + * + * Note that there will be one MemFileMapper instance per disk thread, such that + * the mapper doesn't have to worry about being threadsafe with multiple + * threads calling it at the same time. + */ + +#pragma once + +#include <vespa/memfilepersistence/mapper/versionserializer.h> +#include <vespa/memfilepersistence/spi/threadmetricprovider.h> + +namespace storage { +namespace memfile { + +class MemFileMapper : private Types { +private: + std::map<FileVersion, VersionSerializer::LP> _serializers; + ThreadMetricProvider& _metricProvider; + void setDefaultMemFileIO(MemFile& file, + vespalib::LazyFile::UP lf, + const Environment& env); + +public: + MemFileMapper(ThreadMetricProvider&); + + /** + * Initialize a MemFile entry with the data found in corresponding file. + * This sets: + * - Flag whether file exist or not. + * - If file exist, sets header data in file, such as: + * - File version + * - Meta entry count + * - Header block size + * - Body block size + * - File checksum + */ + void loadFile(MemFile&, Environment&, bool autoRepair = true); + + /** + * Flushes all content in MemFile that is not already persisted to disk. + * This might require a rewrite of the file, if the size of the file need + * to change. Flush updates the following in the MemFile: + * - Updates state saying all is persisted. + * - If file was rewritten and was in unwanted version, file version may + * have changed to wanted version. + * - Sizes of blocks in the file may have changed. + * - Rewrite file if changes would leave the file too empty. (Thus, + * memfile given might not be dirty but still a write may be needed) + */ + void flush(MemFile&, Environment&, bool autoRepair = true); + + /** + * Verify that file is not corrupt. + * @return True if file is fine. + */ + bool verify(MemFile& file, Environment& env, + std::ostream& errorReport, uint16_t fileVerifyFlags = 0) + { return verify(file, env, errorReport, false, fileVerifyFlags); } + + /** + * Verify that file is not corrupt and repair it if it is. + * @return True if file was fine. False if any errors were fixed. + */ + bool repair(MemFile& file, Environment& env, + std::ostream& errorReport, uint16_t fileVerifyFlags = 0) + { return verify(file, env, errorReport, true, fileVerifyFlags); } + + /** + * Utility functions used by verify to remove data from memfile that is no + * longer pointing to valid data. + */ + void deleteFile(const MemFile& file, Environment& env); + void removeAllSlotsExcept(MemFile& file, std::vector<Timestamp>& keep); + +private: + void addVersionSerializer(VersionSerializer::LP); + VersionSerializer& getVersionSerializer(const MemFile& file); + + void loadFileImpl(MemFile&, Environment&); + + /** + * Check file for errors, generate report of errors. Fix if repairErrors + * is set. Returns true if no failures were found. + */ + bool verify(MemFile& file, Environment&, + std::ostream& errorReport, bool repairErrors, + uint16_t fileVerifyFlags); + + MemFilePersistenceThreadMetrics& getMetrics() const { + return _metricProvider.getMetrics(); + } + + void sendNotifyBucketCommand(const MemFile&, Environment&); +}; + +} // storage +} // memfile + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h new file mode 100644 index 00000000000..0eac46a1065 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/metrics/metrics.h> + +namespace storage { +namespace memfile { + +class SerializationWriteMetrics : public metrics::MetricSet +{ +public: + metrics::LongAverageMetric headerLatency; + metrics::LongAverageMetric headerSize; + metrics::LongAverageMetric bodyLatency; + metrics::LongAverageMetric bodySize; + metrics::LongAverageMetric metaLatency; + metrics::LongAverageMetric metaSize; + metrics::LongAverageMetric totalLatency; + + SerializationWriteMetrics(const std::string& name, metrics::MetricSet& owner) + : metrics::MetricSet(name, "", + "Write metrics for memfile persistence engine", + &owner), + headerLatency("header_latency", "", + "Time spent writing a single contiguous header location " + "on the disk.", this), + headerSize("header_size", "", + "Average size of contiguous header disk writes", this), + bodyLatency("body_latency", "", + "Time spent writing a single contiguous body location " + "on the disk.", this), + bodySize("body_size", "", + "Average size of contiguous body disk writes", this), + metaLatency("meta_latency", "", + "Time spent writing file header and slot metadata", this), + metaSize("meta_size", "", + "Size of file header and metadata writes", this), + totalLatency("total_latency", "", + "Total time spent performing slot file writing", this) + { + } +}; + +class SerializationMetrics : public metrics::MetricSet +{ +public: + metrics::LongAverageMetric initialMetaReadLatency; + metrics::LongAverageMetric tooLargeMetaReadLatency; + metrics::LongAverageMetric totalLoadFileLatency; + metrics::LongAverageMetric verifyLatency; + metrics::LongAverageMetric deleteFileLatency; + metrics::LongAverageMetric headerReadLatency; + metrics::LongAverageMetric headerReadSize; + metrics::LongAverageMetric bodyReadLatency; + metrics::LongAverageMetric bodyReadSize; + metrics::LongAverageMetric cacheUpdateAndImplicitVerifyLatency; + metrics::LongCountMetric fullRewritesDueToDownsizingFile; + metrics::LongCountMetric fullRewritesDueToTooSmallFile; + SerializationWriteMetrics partialWrite; + SerializationWriteMetrics fullWrite; + + SerializationMetrics(const std::string& name, + metrics::MetricSet* owner = 0) + : metrics::MetricSet(name, "", + "(De-)serialization I/O metrics for memfile " + "persistence engine", owner), + initialMetaReadLatency( + "initial_meta_read_latency", "", + "Time spent doing the initial read of " + "the file header and most (or all) of metadata", + this), + tooLargeMetaReadLatency( + "too_large_meta_read_latency", "", + "Time spent doing additional read for " + "metadata too large to be covered by initial " + "read", this), + totalLoadFileLatency( + "total_load_file_latency", "", + "Total time spent initially loading a " + "file from disk", this), + verifyLatency( + "verify_latency", "", + "Time spent performing file verification", this), + deleteFileLatency( + "delete_file_latency", "", + "Time spent deleting a file from disk", this), + headerReadLatency( + "header_read_latency", "", + "Time spent reading a single contiguous header location " + "on the disk (may span many document blobs)", this), + headerReadSize( + "header_read_size", "", + "Size of contiguous header disk location reads", this), + bodyReadLatency( + "body_read_latency", "", + "Time spent reading a single contiguous body location " + "on the disk (may span many document blobs)", this), + bodyReadSize( + "body_read_size", "", + "Size of contiguous body disk location reads", this), + cacheUpdateAndImplicitVerifyLatency( + "cache_update_and_implicit_verify_latency", "", + "Time spent updating memory cache structures and verifying " + "read data blocks for corruptions", this), + fullRewritesDueToDownsizingFile( + "full_rewrites_due_to_downsizing_file", "", + "Number of times a file was rewritten fully because the " + "original file had too low fill rate", this), + fullRewritesDueToTooSmallFile( + "full_rewrites_due_to_too_small_file", "", + "Number of times a file was rewritten fully because the " + "original file did not have sufficient free space for a " + "partial write", this), + partialWrite("partialwrite", *this), + fullWrite("fullwrite", *this) + { + } +}; + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp new file mode 100644 index 00000000000..505e9c32f3b --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp @@ -0,0 +1,538 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/memfilepersistence/common/environment.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <vespa/vespalib/util/crc.h> +#include <vespa/vespalib/stllike/hash_set.h> +#include <vespa/log/log.h> + +LOG_SETUP(".memfile.simpleiobuffer"); + +namespace storage { + +namespace memfile { + +namespace { + +uint32_t calculateChecksum(const void* pos, uint32_t size) { + vespalib::crc_32_type calculator; + calculator.process_bytes(pos, size); + return calculator.checksum(); +} + +} + +SimpleMemFileIOBuffer::SimpleMemFileIOBuffer( + VersionSerializer& reader, + vespalib::LazyFile::UP file, + FileInfo::UP info, + const FileSpecification& fileSpec, + const Environment& env) + : _reader(reader), + _data(2), + _workingBuffers(2), + _file(std::move(file)), + _fileInfo(std::move(info)), + _fileSpec(fileSpec), + _env(env), + _options(env.acquireConfigReadLock().options()) +{ +} + +void +SimpleMemFileIOBuffer::close() +{ + if (_file->isOpen()) { + _file->close(); + } +} + +const SimpleMemFileIOBuffer::Data& +SimpleMemFileIOBuffer::getData(DocumentPart part, DataLocation loc) const +{ + DataMap::const_iterator iter = _data[part].find(loc); + + if (iter == _data[part].end()) { + std::ostringstream ost; + ost << "Location " << loc + << " was not found for " << (part == HEADER ? "Header" : "Body"); + throw PartNotCachedException(ost.str(), VESPA_STRLOC); + } + + return iter->second; +} + +document::Document::UP +SimpleMemFileIOBuffer::getDocumentHeader( + const document::DocumentTypeRepo& repo, + DataLocation loc) const +{ + const Data& data = getData(HEADER, loc); + + Document::UP doc(new Document()); + document::ByteBuffer buf(data.buf->getBuffer() + data.pos, + data.buf->getSize() - data.pos); + + doc->deserializeHeader(repo, buf, false); + return doc; +} + +document::DocumentId +SimpleMemFileIOBuffer::getDocumentId(DataLocation loc) const +{ + const Data& data = getData(HEADER, loc); + + const char* buf = data.buf->getBuffer() + data.pos + loc._size; + buf -= 2 * sizeof(uint32_t); + + uint32_t nameLen = *(const uint32_t*)(buf); + buf -= nameLen; + + return document::DocumentId(vespalib::stringref(buf, nameLen)); +} + +void +SimpleMemFileIOBuffer::readBody( + const document::DocumentTypeRepo& repo, + DataLocation loc, + Document& doc) const +{ + const Data& data = getData(BODY, loc); + + document::ByteBuffer buf(data.buf->getBuffer() + data.pos, + data.buf->getSize() - data.pos); + + doc.deserializeBody(repo, buf, false); +} + +DataLocation +SimpleMemFileIOBuffer::addLocation(DocumentPart part, + BufferAllocation newData) +{ + if (!newData.getSharedBuffer().get()) { + LOG(spam, "Not adding location since data is null"); + return DataLocation(0, 0); + } + + DataMap& target = _data[part]; + DataLocation loc = DataLocation(_fileInfo->getBlockSize(part), newData.getSize()); + + DataMap::reverse_iterator iter = target.rbegin(); + if (iter != target.rend() && iter->first.endPos() > loc._pos) { + loc = DataLocation(iter->first.endPos(), newData.getSize()); + } + + std::pair<DataMap::iterator, bool> existing( + target.insert(std::make_pair(loc, Data(newData.getSharedBuffer(), + newData.getBufferPosition(), + false)))); + if (!existing.second) { + LOG(error, "%s: addLocation attempted %s insert with location %u,%u, " + "but that location already exists", + _fileSpec.toString().c_str(), + getDocumentPartName(part), + loc._pos, + loc._size); + assert(false); + } + + LOG(spam, "%s: added %s at location %u,%u (buffer %p, position %u)", + _fileSpec.getBucketId().toString().c_str(), + getDocumentPartName(part), + loc._pos, + loc._size, + newData.getSharedBuffer().get(), + newData.getBufferPosition()); + return loc; +} + +void +SimpleMemFileIOBuffer::HeaderChunkEncoder::bufferDocument(const Document& doc) +{ + assert(_serializedDoc.empty()); + doc.serializeHeader(_serializedDoc); +} + +/** + * Buffer is comprised of the following: + * - Document header blob (n bytes) + * - CRC32 of header blob (4 bytes) + * - Document Id (n bytes) + * - Length of document id (4 bytes) + * - CRC32 of document id and length (4 bytes) + * + * To a reader, the length of the header blob is inferred from length of + * total buffer chunk minus the overhead by the doc id string and metadata in + * the chunk trailer. + */ +void +SimpleMemFileIOBuffer::HeaderChunkEncoder::writeTo(BufferAllocation& buf) const +{ + assert(buf.getSize() >= encodedSize()); + // Note that docSize may be zero throughout this function. + const uint32_t docSize = _serializedDoc.size(); + const uint32_t docChecksum = calculateChecksum( + _serializedDoc.peek(), docSize); + const uint32_t idLen = _docId.size(); + + vespalib::crc_32_type nameChecksum; + nameChecksum.process_bytes(_docId.c_str(), idLen); + nameChecksum.process_bytes(reinterpret_cast<const char*>(&idLen), + sizeof(uint32_t)); + const uint32_t trailerChecksum = nameChecksum.checksum(); + + memcpy(buf.getBuffer(), _serializedDoc.peek(), docSize); + char* trailer = buf.getBuffer() + docSize; + memcpy(trailer, &docChecksum, sizeof(uint32_t)); + trailer += sizeof(uint32_t); + memcpy(trailer, _docId.c_str(), idLen); + trailer += idLen; + memcpy(trailer, &idLen, sizeof(uint32_t)); + trailer += sizeof(uint32_t); + memcpy(trailer, &trailerChecksum, sizeof(uint32_t)); +} + +bool +SimpleMemFileIOBuffer::writeBackwardsCompatibleRemoves() const +{ + return !_options->_defaultRemoveDocType.empty(); +} + +document::Document::UP +SimpleMemFileIOBuffer::generateBlankDocument( + const DocumentId& id, + const document::DocumentTypeRepo& repo) const +{ + vespalib::string typeName( + id.hasDocType() ? id.getDocType() + : _options->_defaultRemoveDocType); + const document::DocumentType* docType(repo.getDocumentType(typeName)); + if (!docType) { + throw vespalib::IllegalArgumentException( + "Could not serialize document for remove with unknown " + "doctype '" + typeName + "'"); + } + return std::unique_ptr<Document>(new Document(*docType, id)); +} + +SimpleMemFileIOBuffer::BufferAllocation +SimpleMemFileIOBuffer::serializeHeader(const Document& doc) +{ + HeaderChunkEncoder encoder(doc.getId()); + encoder.bufferDocument(doc); + BufferAllocation buf(allocateBuffer(HEADER, encoder.encodedSize())); + encoder.writeTo(buf); + + return buf; +} + +SimpleMemFileIOBuffer::BufferAllocation +SimpleMemFileIOBuffer::serializeDocumentIdOnlyHeader( + const DocumentId& id, + const document::DocumentTypeRepo& repo) +{ + HeaderChunkEncoder encoder(id); + if (writeBackwardsCompatibleRemoves()) { + Document::UP blankDoc(generateBlankDocument(id, repo)); + encoder.bufferDocument(*blankDoc); + } + BufferAllocation buf(allocateBuffer(HEADER, encoder.encodedSize())); + encoder.writeTo(buf); + + return buf; +} + +DataLocation +SimpleMemFileIOBuffer::addDocumentIdOnlyHeader( + const DocumentId& docId, + const document::DocumentTypeRepo& repo) +{ + return addLocation(HEADER, serializeDocumentIdOnlyHeader(docId, repo)); +} + +DataLocation +SimpleMemFileIOBuffer::addHeader(const Document& doc) +{ + return addLocation(HEADER, serializeHeader(doc)); +} + +SimpleMemFileIOBuffer::BufferAllocation +SimpleMemFileIOBuffer::serializeBody(const Document& doc) +{ + vespalib::nbostream output(5 * 1024); + doc.serializeBody(output); + + if (output.empty()) { + return BufferAllocation(); + } + + BufferAllocation val(allocateBuffer(BODY, output.size() + sizeof(uint32_t))); + memcpy(val.getBuffer(), output.peek(), output.size()); + + // Also append CRC32 of body block to buffer + uint32_t checksum = calculateChecksum(output.peek(), output.size()); + char* trailer = val.getBuffer() + output.size(); + memcpy(trailer, &checksum, sizeof(uint32_t)); + + return val; +} + +SimpleMemFileIOBuffer::BufferAllocation +SimpleMemFileIOBuffer::allocateBuffer(DocumentPart part, + uint32_t sz, + SharedBuffer::Alignment align) +{ + // If the requested size is greater than or equal to our working buffer + // size, simply allocate a separate buffer for it. + if (sz >= WORKING_BUFFER_SIZE) { + return BufferAllocation(SharedBuffer::LP(new SharedBuffer(sz)), 0, sz); + } + + SharedBuffer::LP& bufLP(_workingBuffers[part]); + bool requireNewBlock = false; + if (!bufLP.get()) { + requireNewBlock = true; + } else if (!bufLP->hasRoomFor(sz, align)) { + requireNewBlock = true; + } + + if (!requireNewBlock) { + return BufferAllocation(bufLP, + static_cast<uint32_t>(bufLP->allocate(sz, align)), + sz); + } else { + SharedBuffer::LP newBuf(new SharedBuffer(WORKING_BUFFER_SIZE)); + bufLP = newBuf; + return BufferAllocation(newBuf, + static_cast<uint32_t>(newBuf->allocate(sz, align)), + sz); + } +} + +DataLocation +SimpleMemFileIOBuffer::addBody(const Document& doc) +{ + return addLocation(BODY, serializeBody(doc)); +} + +void +SimpleMemFileIOBuffer::clear(DocumentPart part) +{ + LOG(debug, "%s: cleared all data for part %s", + _fileSpec.getBucketId().toString().c_str(), + getDocumentPartName(part)); + _data[part].clear(); +} + +bool +SimpleMemFileIOBuffer::verifyConsistent() const +{ + return true; +} + +void +SimpleMemFileIOBuffer::move(const FileSpecification& target) +{ + LOG(debug, "Moving %s -> %s", + _file->getFilename().c_str(), + target.getPath().c_str()); + _file->close(); + + if (vespalib::fileExists(_file->getFilename())) { + vespalib::rename(_file->getFilename(), target.getPath(), true, true); + } + + _file.reset( + new vespalib::LazyFile(target.getPath(), vespalib::File::DIRECTIO, true)); +} + +DataLocation +SimpleMemFileIOBuffer::copyCache(const MemFileIOInterface& source, + DocumentPart part, + DataLocation loc) +{ + if (loc._size == 0) { + return loc; + } + + const SimpleMemFileIOBuffer& srcBuf( + static_cast<const SimpleMemFileIOBuffer&>(source)); + Data data = srcBuf.getData(part, loc); + + BufferAllocation val(allocateBuffer(part, loc._size)); + memcpy(val.getBuffer(), data.buf->getBuffer() + data.pos, loc._size); + + LOG(spam, + "Copied cached data from %s to %s for location %u,%u buffer pos=%u", + srcBuf._fileSpec.getBucketId().toString().c_str(), + _fileSpec.getBucketId().toString().c_str(), + loc._pos, + loc._size, + data.pos); + + return addLocation(part, val); +} + + +void +SimpleMemFileIOBuffer::cacheLocation(DocumentPart part, + DataLocation loc, + BufferType::LP& buf, + uint32_t bufferPos) +{ + LOG(spam, + "%s: added existing %s buffer at location %u,%u " + "buffer=%p buffer pos=%u", + _fileSpec.toString().c_str(), + getDocumentPartName(part), + loc._pos, + loc._size, + buf.get(), + bufferPos); + _data[part][loc] = Data(buf, bufferPos, true); +} + +bool +SimpleMemFileIOBuffer::isCached(DataLocation loc, + DocumentPart type) const +{ + if (loc._size == 0) { + // Count zero-sized locations as cached + return true; + } + + return _data[type].find(loc) != _data[type].end(); +} + +bool +SimpleMemFileIOBuffer::isPersisted(DataLocation loc, + DocumentPart type) const +{ + DataMap::const_iterator iter = _data[type].find(loc); + + // If the buffer doesn't know about the data at all, + // we must assume it is already persisted. How else would the file + // know about the location? + if (iter == _data[type].end()) { + return true; + } + + return iter->second.persisted; +} + +void +SimpleMemFileIOBuffer::ensureCached(Environment& env, + DocumentPart part, + const std::vector<DataLocation>& locations) +{ + std::vector<DataLocation> nonCached; + nonCached.reserve(locations.size()); + + for (uint32_t i = 0; i < locations.size(); ++i) { + if (_data[part].find(locations[i]) == _data[part].end()) { + nonCached.push_back(locations[i]); + } + } + + _reader.cacheLocations(*this, env, *_options, part, nonCached); +} + +void +SimpleMemFileIOBuffer::persist(DocumentPart part, + DataLocation oldLoc, + DataLocation newLoc) +{ + Data newData = getData(part, oldLoc); + newData.persisted = true; + size_t erased = _data[part].erase(oldLoc); + assert(erased > 0); + (void) erased; + _data[part][newLoc] = newData; + + LOG(spam, "%s: persisted %s for %u,%u -> %u,%u", + _fileSpec.getBucketId().toString().c_str(), + getDocumentPartName(part), + oldLoc._pos, oldLoc._size, + newLoc._pos, newLoc._size); +} + +void +SimpleMemFileIOBuffer::remapAndPersistAllLocations( + DocumentPart part, + const std::map<DataLocation, DataLocation>& locs) +{ + DataMap remappedData; + + typedef std::map<DataLocation, DataLocation>::const_iterator Iter; + for (Iter it(locs.begin()), e(locs.end()); it != e; ++it) { + DataLocation oldLoc = it->first; + DataLocation newLoc = it->second; + + LOG(spam, "%s: remapping %u,%u -> %u,%u", + _fileSpec.getBucketId().toString().c_str(), + oldLoc._pos, oldLoc._size, + newLoc._pos, newLoc._size); + + Data newData = getData(part, oldLoc); + newData.persisted = true; + std::pair<DataMap::iterator, bool> inserted( + remappedData.insert(std::make_pair(newLoc, newData))); + assert(inserted.second); + } + _data[part].swap(remappedData); + + LOG(debug, + "%s: remapped %zu locations. Discarded %zu locations that " + "had no new mapping", + _fileSpec.getBucketId().toString().c_str(), + locs.size(), + _data[part].size() - locs.size()); +} + +const char* +SimpleMemFileIOBuffer::getBuffer(DataLocation loc, DocumentPart part) const +{ + const Data& data = getData(part, loc); + return data.buf->getBuffer() + data.pos; +} + +uint32_t +SimpleMemFileIOBuffer::getSerializedSize(DocumentPart part, + DataLocation loc) const +{ + if (part == HEADER) { + const Data& data = getData(part, loc); + assert(loc._size > sizeof(uint32_t)*3); + const char* bufEnd = data.buf->getBuffer() + data.pos + loc._size; + uint32_t docIdLen = *reinterpret_cast<const uint32_t*>( + bufEnd - sizeof(uint32_t)*2); + return loc._size - sizeof(uint32_t)*3 - docIdLen; + } else { + return loc._size - sizeof(uint32_t); + } +} + +size_t +SimpleMemFileIOBuffer::getCachedSize(DocumentPart part) const +{ + const DataMap& dm(_data[part]); + vespalib::hash_set<const void*> seenBufs(dm.size()); + size_t ret = 0; + for (DataMap::const_iterator it(dm.begin()), e(dm.end()); it != e; ++it) { + if (seenBufs.find(it->second.buf->getBuffer()) != seenBufs.end()) { + continue; + } + + size_t bufSize = it->second.buf->getSize(); + // Account for (approximate) mmap overhead. + bufSize = util::alignUpPow2<4096>(bufSize); + ret += bufSize; + seenBufs.insert(it->second.buf->getBuffer()); + } + return ret; +} + +} + +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h new file mode 100644 index 00000000000..8dbffcaf795 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h @@ -0,0 +1,365 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/memfile/memfileiointerface.h> +#include <vespa/memfilepersistence/mapper/buffer.h> +#include <vespa/document/fieldvalue/document.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/memfilepersistence/mapper/fileinfo.h> +#include <vespa/vespalib/util/exception.h> +#include <vespa/vespalib/util/alloc.h> +#include <vespa/memfilepersistence/mapper/versionserializer.h> + +namespace storage +{ +namespace memfile +{ + +namespace util { + +/** + * @param Alignment (template) must be a power of two. + * @return val aligned up so that retval >= val && retval % Alignment == 0 + */ +template <size_t Alignment> +size_t +alignUpPow2(const size_t val) +{ + const size_t mask = Alignment - 1; + return (val + mask) & ~mask; +} + +/** + * Round any non-power of two value up to the nearest power of two. E.g: + * nextPow2(3) -> 4 + * nextPow2(15) -> 16 + * nextPow2(40) -> 64 + * nextPow2(64) -> 64 + * + * From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + */ +inline uint32_t +nextPow2(uint32_t v) +{ + --v; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + ++v; + return v; +} + +} + +/** + * Implements a simple buffered cache for a memfile. + */ +class SimpleMemFileIOBuffer : public MemFileIOInterface +{ +public: + /** + * Any buffer requests >= than this size will get their own, separately + * allocated buffer. For all other cases, we'll attempt to squeeze as many + * documents as possible into the same (shared) buffer. + */ + static const size_t WORKING_BUFFER_SIZE = 16*1024; // TODO(vekterli): make configurable + + class SharedBuffer + { + public: + static const size_t ALLOC_ALIGNMENT = 8; + enum Alignment { + NO_ALIGN, + ALIGN_512_BYTES + }; + + typedef vespalib::LinkedPtr<SharedBuffer> LP; + explicit SharedBuffer(size_t totalSize) + : _buf(totalSize), + _usedSize(0) + { + } + + size_t getSize() const { return _buf.size(); } + size_t getUsedSize() const { return _usedSize; } + size_t getFreeSize() const { return getSize() - getUsedSize(); } + bool hasRoomFor(size_t sz, Alignment align = NO_ALIGN) const { + return (align == ALIGN_512_BYTES + ? util::alignUpPow2<512>(_usedSize) + : _usedSize) + sz <= getSize(); + } + + /** + * Returns an offset into the shared buffer which is valid to use for + * sz bytes. If align is ALIGN_512_BYTES, the returned offset will be + * aligned on a 512-byte boundary. It is the responsibility of the + * caller to ensure buffers used for Direct I/O are allocated with a + * size that is also evenly 512-byte divisible. + */ + size_t allocate(size_t sz, Alignment align = NO_ALIGN) { + if (align == ALIGN_512_BYTES) { + _usedSize = util::alignUpPow2<512>(_usedSize); + } + assert(hasRoomFor(sz)); + size_t ret = _usedSize; + _usedSize += util::alignUpPow2<ALLOC_ALIGNMENT>(sz); + return ret; + } + + char* getBuffer() { + return static_cast<char*>(_buf.get()); + } + const char* getBuffer() const { + return static_cast<const char*>(_buf.get()); + } + private: + vespalib::MMapAlloc _buf; + size_t _usedSize; + }; + + struct BufferAllocation + { + BufferAllocation() : pos(0), size(0) {} + + BufferAllocation(const SharedBuffer::LP& b, uint32_t p, uint32_t sz) + : buf(b), pos(p), size(sz) {} + + /** + * Get buffer area available to this specific allocation + */ + char* getBuffer() { return buf->getBuffer() + pos; } + const char* getBuffer() const { return buf->getBuffer() + pos; } + + /** + * Get buffer that is (potentially) shared between many individual + * allocations. + */ + SharedBuffer::LP& getSharedBuffer() { return buf; } + uint32_t getBufferPosition() const { return pos; } + uint32_t getSize() const { return size; } + + SharedBuffer::LP buf; + uint32_t pos; + uint32_t size; + }; + + /** + * Utility class for fully encoding a chunk of file data for a document + * header in a slotfile. Supports writing header chunks with and without + * a document payload. + */ + class HeaderChunkEncoder + { + vespalib::nbostream _serializedDoc; + vespalib::string _docId; + public: + static const size_t DEFAULT_STREAM_ALLOC_SIZE = 5 * 2014; + + HeaderChunkEncoder(const document::DocumentId& docId) + : _serializedDoc(DEFAULT_STREAM_ALLOC_SIZE), + _docId(docId.toString()) + { + } + + /** + * Serializes header chunk to buf, which must have at least a size + * of encodedSize() bytes available. + */ + void writeTo(BufferAllocation& buf) const; + + /** + * Assign (and buffer) document that should be written to the chunk. + * If this method is not called on an encoder prior to writeTo(), the + * chunk will contain only a document ID but no payload. This is + * perfectly fine for 5.1+, but is not supported by 5.0 readers. + * It is safe for the provided document to go out of scope after having + * called this method. + * Since this method buffers it may only be called once per encoder. + */ + void bufferDocument(const document::Document&); + + /** + * Compute total size of chunk as it will reside on disk, including + * document blob/id payload and metadata overhead. + * Max doc size is <=64M so we cannot possibly exceed 32 bits. + */ + uint32_t encodedSize() const { + return (_serializedDoc.size() + trailerLength()); + } + private: + static constexpr uint32_t fixedTrailerLength() { + // CRC32 of doc blob + u32 doc id length + CRC32 of doc id. + return (sizeof(uint32_t) * 3); + } + uint32_t trailerLength() const { + return (fixedTrailerLength() + _docId.size()); + } + }; + + typedef SharedBuffer BufferType; + + class PartNotCachedException : public vespalib::Exception { + public: + PartNotCachedException(const std::string& msg, + const std::string& location) + : vespalib::Exception(msg, location) {}; + }; + + SimpleMemFileIOBuffer( + VersionSerializer& reader, + vespalib::LazyFile::UP file, + FileInfo::UP fileInfo, + const FileSpecification& fileSpec, + const Environment& env); + + virtual Document::UP getDocumentHeader( + const document::DocumentTypeRepo& repo, + DataLocation loc) const; + + virtual document::DocumentId getDocumentId(DataLocation loc) const; + + virtual void readBody( + const document::DocumentTypeRepo& repo, + DataLocation loc, + Document& doc) const; + + virtual DataLocation addDocumentIdOnlyHeader( + const DocumentId& id, + const document::DocumentTypeRepo& repo); + + virtual DataLocation addHeader(const Document& doc); + + virtual DataLocation addBody(const Document& doc); + + virtual void clear(DocumentPart type); + + virtual bool verifyConsistent() const; + + /** + * Moves the underlying file to another location. + */ + virtual void move(const FileSpecification& target); + + virtual void close(); + + virtual DataLocation copyCache(const MemFileIOInterface& source, + DocumentPart part, + DataLocation loc); + + /** + * Add a location -> buffer mapping + */ + void cacheLocation(DocumentPart part, + DataLocation loc, + BufferType::LP& buf, + uint32_t bufferPos); + + /** + * @return Returns true if the given location is cached. + */ + virtual bool isCached(DataLocation loc, DocumentPart type) const; + + /** + * @return Returns true if the given location has been persisted to disk. + */ + virtual bool isPersisted(DataLocation loc, DocumentPart type) const; + + virtual uint32_t getSerializedSize(DocumentPart part, + DataLocation loc) const; + + virtual void ensureCached(Environment& env, + DocumentPart part, + const std::vector<DataLocation>& locations); + + /** + * Moves the given location into the persisted data area. + * oldLoc must be outside the persisted data area, and newLoc must be within. + */ + void persist(DocumentPart part, DataLocation oldLoc, DataLocation newLoc); + + /** + * Remaps every single location for the given part. + * WARNING: All existing locations that are not remapped will be discarded! + */ + void remapAndPersistAllLocations(DocumentPart part, + const std::map<DataLocation, DataLocation>& locs); + + vespalib::LazyFile& getFileHandle() { return *_file; }; + const vespalib::LazyFile& getFileHandle() const { return *_file; }; + + const FileInfo& getFileInfo() const { return *_fileInfo; } + void setFileInfo(FileInfo::UP fileInfo) { _fileInfo = std::move(fileInfo); } + + const FileSpecification& getFileSpec() const { return _fileSpec; } + + const char* getBuffer(DataLocation loc, DocumentPart part) const; + + size_t getCachedSize(DocumentPart part) const; + + BufferAllocation allocateBuffer(DocumentPart part, + uint32_t sz, + SharedBuffer::Alignment align + = SharedBuffer::NO_ALIGN); + + /** + * Whether removes should be written with a document header payload in + * order to be backwards-compatible with VDS 5.0. This is in order to + * support a scenario where a cluster is downgraded from 5.1+ -> 5.0. + */ + bool writeBackwardsCompatibleRemoves() const; + + /** + * Generate a document with no content which stores the given document ID + * and is of the type inferred by the ID. If the ID is of legacy format + * (and thus without a type), the default configured type will be used. + */ + Document::UP generateBlankDocument(const DocumentId&, + const document::DocumentTypeRepo&) const; + +private: + struct Data { + Data() : pos(0), persisted(false) {} + + Data(const BufferType::LP& b, uint32_t p, bool isPersisted) + : buf(b), pos(p), persisted(isPersisted) {} + + BufferType::LP buf; + uint32_t pos; + bool persisted; + }; + + typedef std::map<DataLocation, Data> DataMap; + + VersionSerializer& _reader; + std::vector<DataMap> _data; + std::vector<SharedBuffer::LP> _workingBuffers; + vespalib::LazyFile::UP _file; + FileInfo::UP _fileInfo; + FileSpecification _fileSpec; + const Environment& _env; + // Same memfile config is used during entire lifetime of buffer object. + // This makes live reconfigs kick in for all files only when all buckets + // have been evicted from the cache post-reconfig, but greatly simplifies + // the reasoning about a given bucket in the face of such actions. + std::shared_ptr<const Options> _options; + + DataLocation addLocation(DocumentPart part, + BufferAllocation newData); + + const Data& getData(DocumentPart part, DataLocation loc) const; + + BufferAllocation serializeDocumentIdOnlyHeader( + const DocumentId& id, + const document::DocumentTypeRepo&); + BufferAllocation serializeHeader(const Document& doc); + BufferAllocation serializeBody(const Document& doc); + + friend class SimpleMemFileIOBufferTest; +}; + +} +} + + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp new file mode 100644 index 00000000000..fecdfb1b1c8 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <storage/persistence/memfile/mapper/slotreadplanner.h> + +#include <storage/persistence/memfile/memfile/memslot.h> + +namespace storage { +namespace memfile { + +SlotDiskIoPlanner::SlotDiskIoPlanner( + const std::vector<const MemSlot*> desiredSlots, + DocumentPart highestPartNeeded, + uint32_t maxGap, + uint32_t headerBlockStartIndex, + uint32_t bodyBlockStartIndex) + : _operations(), + _startIndexes(2, 0) +{ + _startIndexes[HEADER] = headerBlockStartIndex; + _startIndexes[BODY] = bodyBlockStartIndex; + processSlots(desiredSlots, highestPartNeeded, maxGap); +} + +namespace { + uint32_t alignDown(uint32_t value) { + uint32_t blocks = value / 512; + return blocks * 512; + }; + + uint32_t alignUp(uint32_t value) { + uint32_t blocks = (value + 512 - 1) / 512; + return blocks * 512; + }; +} + +void +SlotDiskIoPlanner::scheduleLocation(const MemSlot& slot, + DocumentPart type, + std::vector<DataLocation>& ops) +{ + if (!slot.partAvailable(type) && slot.getLocation(type)._size) { + ops.push_back(DataLocation( + slot.getLocation(type)._pos + _startIndexes[type], + slot.getLocation(type)._size)); + } +} + +void +SlotDiskIoPlanner::processSlots( + const std::vector<const MemSlot*> desiredSlots, + DocumentPart highestPartNeeded, + uint32_t maxGap) +{ + // Build list of disk read operations to do + std::vector<DataLocation> allOps; + // Create list of all locations we need to read + for (std::size_t i = 0; i < desiredSlots.size(); ++i) { + for (uint32_t p = 0; p <= uint32_t(highestPartNeeded); ++p) { + scheduleLocation(*desiredSlots[i], (DocumentPart) p, allOps); + } + } + // Sort list, and join elements close together into single IO ops + std::sort(allOps.begin(), allOps.end()); + for (size_t i = 0; i < allOps.size(); ++i) { + uint32_t start = alignDown(allOps[i]._pos); + uint32_t stop = alignUp(allOps[i]._pos + allOps[i]._size); + if (i != 0) { + uint32_t lastStop = _operations.back()._pos + + _operations.back()._size; + if (lastStop >= start || start - lastStop < maxGap) { + _operations.back()._size += (stop - lastStop); + continue; + } + } + _operations.push_back(DataLocation(start, stop - start)); + } +} + +uint32_t +SlotDiskIoPlanner::getTotalBufferSize() const +{ + uint32_t totalSize = 0; + for (size_t i = 0; i < _operations.size(); ++i) { + totalSize += _operations[i]._size; + } + return totalSize; +} + +void +SlotDiskIoPlanner::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + (void) verbose; (void) indent; + for (std::size_t i = 0; i < _operations.size(); ++i) { + if (i > 0) out << ","; + out << "[" << _operations[i]._pos << "," + << (_operations[i]._size + _operations[i]._pos) << "]"; + } +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h new file mode 100644 index 00000000000..a2e17debcdf --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::SlotDiskIoPlanner + * \ingroup memfile + * + * \brief Creates list of minimal IO operations to do versus disk. + * + * When accessing many locations on disk, it is not necessarily ideal to do a + * disk access per location. This class creates a minimal set of locations to + * access to avoid accessing more than a maximum gap of uninteresting data. + */ +#pragma once + +#include <storage/persistence/memfile/common/types.h> + +namespace storage { +namespace memfile { + +class MemSlot; + +class SlotDiskIoPlanner : public Types, public vespalib::Printable +{ +public: + SlotDiskIoPlanner(const std::vector<const MemSlot*> desiredSlots, + DocumentPart highestPartNeeded, + uint32_t maxGap, + uint32_t headerBlockStartIndex, + uint32_t bodyBlockStartIndex); + + const std::vector<DataLocation>& getIoOperations() const { + return _operations; + } + + /** + * Get the total amount of space needed to hold all the data from all + * locations identified to be accessed. Useful to create a buffer of correct + * size. + */ + uint32_t getTotalBufferSize() const; + + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + +private: + std::vector<DataLocation> _operations; + std::vector<uint32_t> _startIndexes; + + void processSlots( + const std::vector<const MemSlot*> desiredSlots, + DocumentPart highestPartNeeded, + uint32_t maxGap); + + void scheduleLocation(const MemSlot&, DocumentPart, + std::vector<DataLocation>&); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp new file mode 100644 index 00000000000..52408067977 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/mapper/uniqueslotgenerator.h> +#include <vespa/memfilepersistence/memfile/memfile.h> +#include <memfilepersistence/memfile/doccache.h> + +namespace storage { + +namespace memfile { + +bool +UniqueSlotGenerator::ContentLocation::operator==( + const ContentLocation& other) const +{ + if (_loc.valid() && other._loc.valid()) return _loc == other._loc; + return _content == other._content; +} + +bool +UniqueSlotGenerator::ContentLocation::operator<( + const ContentLocation& other) const +{ + if (_loc.valid() && other._loc.valid()) return _loc < other._loc; + if (other._loc.valid()) return false; + if (_loc.valid()) return true; + return _content < other._content; +} + +void +UniqueSlotGenerator::ContentLocation::print(std::ostream& out, bool, + const std::string&) const +{ + out << "ContentLocation(" << _loc << ", " + << std::hex << _content << std::dec << ")"; +} + +UniqueSlotGenerator::UniqueSlotGenerator(const MemFile& memFile) + : _slots(2), + _slotsInOrder(2) +{ + for (uint32_t i = 0; i < memFile.getSlotCount(); i++) { + const MemSlot& slot = memFile[i]; + addSlot(HEADER, slot); + if (slot.hasBodyContent()) addSlot(BODY, slot); + } +} + +void +UniqueSlotGenerator::addSlot(DocumentPart part, const MemSlot& slot) +{ + ContentLocation contentLoc(slot.getLocation(part)); + if (slot.getDocCache() != NULL) { + contentLoc._content = slot.getDocCache()->getPart(part).get(); + } + SlotList& loc = _slots[part][contentLoc]; + loc.push_back(&slot); + if (loc.size() == 1) { + _slotsInOrder[part].push_back(&loc); + } +} + +void +UniqueSlotGenerator::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + (void) verbose; + for (uint32_t i=0; i<2; ++i) { + DocumentPart part(static_cast<DocumentPart>(i)); + out << getDocumentPartName(part) << ":"; + const OrderedSlotList& list = _slotsInOrder[part]; + for (uint32_t j = 0; j < list.size(); ++j) { + const SlotList& slotList = *list[j]; + out << "\n" << indent << slotList[0]->getLocation(part) << ": "; + for (uint32_t k = 0; k < slotList.size(); ++k) { + if (k > 0) out << ", "; + out << slotList[k]->getTimestamp(); + } + } + if (i == 0) out << "\n"; + } +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h new file mode 100644 index 00000000000..c2ce3adede8 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::UniqueSlotGenerator + * \ingroup memfile + * + * Generates a mapping from unique content locations on disk + * (or unique documents if not persisted) to a list of slots. + */ +#pragma once + +#include <vespa/memfilepersistence/common/types.h> + +namespace storage { +namespace memfile { + +class MemSlot; +class MemFile; + +class UniqueSlotGenerator : private Types, public vespalib::Printable +{ +public: + typedef std::vector<const MemSlot*> SlotList; + +private: + struct ContentLocation : public vespalib::Printable { + DataLocation _loc; + const document::StructFieldValue* _content; + + ContentLocation(const DataLocation& loc) : _loc(loc), _content(0) {} + + ContentLocation(const DataLocation& loc, + const document::StructFieldValue* content) + : _loc(loc), _content(content) {} + + bool operator<(const ContentLocation& other) const; + bool operator==(const ContentLocation& other) const; + + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + }; + + void addSlot(DocumentPart, const MemSlot&); + + typedef std::map<ContentLocation, SlotList> LocationToSlotMap; + typedef std::vector<SlotList*> OrderedSlotList; + + std::vector<LocationToSlotMap> _slots; + std::vector<OrderedSlotList> _slotsInOrder; + +public: + UniqueSlotGenerator(const MemFile& memFile); + + uint32_t getNumUnique(DocumentPart part) const { + return _slotsInOrder[part].size(); + } + + const SlotList& getSlots(DocumentPart part, uint32_t uniqueIndex) const { + return *_slotsInOrder[part][uniqueIndex]; + } + + void print(std::ostream&, bool verbose, const std::string& indent) const; + +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h new file mode 100644 index 00000000000..b57734c2b24 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::VersionSerializer + * \ingroup memfile + * + * \brief Super class for file mappers implementing a file format. + * + * An implementation of this handles all specifics of reading and writing + * a file format. + */ + +#pragma once + +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/mapper/buffer.h> +#include <vespa/memfilepersistence/mapper/mapperslotoperation.h> +#include <vespa/memfilepersistence/memfile/memfile.h> +#include <vespa/vespalib/util/linkedptr.h> +#include <vespa/memfilepersistence/memfile/memfileiointerface.h> + +namespace storage { +namespace memfile { + +// Avoid circular dependencies +class MemFileEnvironment; +class Options; + +struct VersionSerializer : protected Types { + typedef vespalib::LinkedPtr<VersionSerializer> LP; + + virtual ~VersionSerializer() {} + + /** Returns the file version this implementation handles. */ + virtual FileVersion getFileVersion() = 0; + + /** + * The MemFileMapper main class reads file header to figure out what version + * it is in. Then loadFile is called on correct implementation to interpret + * the file. The part of the file already read is given to loadFile to avoid + * a re-read of the initial data. + */ + virtual void loadFile(MemFile& file, Environment&, + Buffer& buffer, uint64_t bytesRead) = 0; + + /** + * Flushes all content in MemFile that is altered or not persisted to disk + * to the physical file. This function should not handle file rewriting. If + * updates cannot be done to the existing file it needs to return in case + * we then want to rewrite the file in another format. + * + * Flush must update the following in the MemFile: + * - Update state saying all is persisted and nothing is altered + * - All block position and sizes need to be correct after flush. + * + * @return True if written successfully, false if file rewrite is required. + */ + enum class FlushResult { + ChangesWritten, + TooFewMetaEntries, + TooSmall, + TooLarge, + UnAltered + }; + virtual FlushResult flushUpdatesToFile(MemFile&, Environment&) = 0; + + /** + * This function is typically called when file doesn't already exist or + * flushUpdatesToFile return false, indicating that file need a total + * rewrite. Before calling this function, all data must be cached in the + * MemFile instance. + */ + virtual void rewriteFile(MemFile&, Environment&) = 0; + + /** + * Check file for errors, generate report of errors. Fix if repairErrors + * is set. Returns true if no failures were found or no errors were fixed. + */ + virtual bool verify(MemFile&, Environment&, + std::ostream& errorReport, bool repairErrors, + uint16_t fileVerifyFlags) = 0; + + + /** + * Cache locations into the given buffer. + */ + virtual void cacheLocations(MemFileIOInterface& buffer, + Environment& env, + const Options& options, + DocumentPart part, + const std::vector<DataLocation>& locations) = 0; + +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt new file mode 100644 index 00000000000..798b0b873e5 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_memfile OBJECT + SOURCES + memslot.cpp + memfile.cpp + slotiterator.cpp + memfilecompactor.cpp + memfilecache.cpp + shared_data_location_tracker.cpp + DEPENDS +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp new file mode 100644 index 00000000000..281ada62a89 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp @@ -0,0 +1,1116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/memfile/memfile.h> + +#include <ext/algorithm> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/common/exceptions.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> +#include <vespa/memfilepersistence/memfile/memfilecompactor.h> +#include <vespa/memfilepersistence/memfile/shared_data_location_tracker.h> +#include <vespa/vespalib/util/crc.h> +#include <vespa/memfilepersistence/common/environment.h> +#include <iomanip> + +namespace { + +template<class A> +std::vector<A> toVector(A entry) { + std::vector<A> entries; + entries.push_back(entry); + return entries; +}; + +} + +LOG_SETUP(".persistence.memfile.memfile"); + +#define FAIL_INCONSISTENT(msg, slot) \ +{ \ + std::ostringstream error; \ + error << msg; \ + throw InconsistentSlotException(slot.toString() + ": " + error.str(), \ + _file, slot, VESPA_STRLOC); \ +} +#define FAIL_INCONSISTENT_FILE(msg) \ +{ \ + std::ostringstream error; \ + error << msg; \ + throw InconsistentException(error.str(), _file, VESPA_STRLOC); \ +} + +#define RETHROW_NON_MEMFILE_EXCEPTIONS \ + catch (MemFileException& exceptionToRethrow) { \ + throw; \ + } catch (vespalib::IoException& exceptionToRethrow) { \ + std::ostringstream wrappedMessage; \ + wrappedMessage << "Got IO exception while processing within " \ + << "memfile. Wrapping in memfile exception: "; \ + const std::string& sourceExceptionMessage( \ + exceptionToRethrow.getMessage()); \ + size_t pos = sourceExceptionMessage.find(':'); \ + wrappedMessage << sourceExceptionMessage.substr(pos + 2); \ + throw MemFileIoException(wrappedMessage.str(), _file, \ + exceptionToRethrow.getType(), VESPA_STRLOC) \ + .setCause(exceptionToRethrow); \ + } catch (vespalib::Exception& exceptionToRethrow) { \ + throw MemFileWrapperException( \ + "Got generic exception while processing within " \ + "memfile. Wrapping in memfile exception: " \ + + std::string(exceptionToRethrow.getMessage()), \ + _file, VESPA_STRLOC).setCause(exceptionToRethrow); \ + } + +namespace storage { +namespace memfile { + +MemFile::MemFile(const FileSpecification& file, + Environment& env, + const LoadOptions& opts) + : _flags(BUCKET_INFO_OUTDATED), + _info(), + _entries(), + _file(file), + _currentVersion(UNKNOWN), + _env(env) +{ + try{ + env._memFileMapper.loadFile(*this, env, opts.autoRepair); + } RETHROW_NON_MEMFILE_EXCEPTIONS; +} + +MemFile::MemFile(const FileSpecification& file, Environment& env, + bool callLoadFile) + : _flags(BUCKET_INFO_OUTDATED), + _info(), + _entries(), + _file(file), + _currentVersion(UNKNOWN), + _env(env) +{ + if (callLoadFile) { + env._memFileMapper.loadFile(*this, env, false); + } +} + +void +MemFile::verifyConsistent() const +{ + _buffer->verifyConsistent(); +} + +uint16_t +MemFile::getDisk() const +{ + return _file.getDirectory().getIndex(); +} + +void +MemFile::move(const FileSpecification& file) +{ + // Any given bucket can either be moved to a more specific or less + // specific bucket in the same subtree. + assert(file.getBucketId().contains(_file.getBucketId()) + || _file.getBucketId().contains(file.getBucketId())); + _buffer->move(file); + _file = file; +} + +uint32_t +MemFile::getSlotCount() const +{ + return _entries.size(); +} + +const MemSlot* +MemFile::getSlotWithId(const document::DocumentId& id, + framework::MicroSecTime maxTimestamp) const +{ + for (uint32_t n=_entries.size(), i=n-1; i<n; --i) { + if (_entries[i].getTimestamp() > maxTimestamp) continue; + if (id.getGlobalId() != _entries[i].getGlobalId()) continue; + if (getDocumentId(_entries[i]) == id) return &_entries[i]; + } + return 0; +} + +namespace { + +struct MemSlotTimestampPredicate +{ + bool operator()(const MemSlot& a, Types::Timestamp time) const + { + return a.getTimestamp() < time; + } +}; + +} + +const MemSlot* +MemFile::getSlotAtTime(Timestamp time) const +{ + std::vector<MemSlot>::const_iterator it( + std::lower_bound(_entries.begin(), _entries.end(), + time, MemSlotTimestampPredicate())); + if (it != _entries.end() && it->getTimestamp() == time) { + return &*it; + } + return 0; +} + +void +MemFile::getSlotsByTimestamp( + const std::vector<Timestamp>& timestamps, + std::vector<const MemSlot*>& returned) const +{ + assert(__gnu_cxx::is_sorted(timestamps.begin(), timestamps.end())); + + std::size_t source = 0; + std::size_t target = 0; + + while (source < _entries.size() && target < timestamps.size()) { + if (_entries[source].getTimestamp() == timestamps[target]) { + returned.push_back(&_entries[source]); + ++source; + ++target; + } else if (_entries[source].getTimestamp() < timestamps[target]) { + ++source; + } else { + ++target; + } + } +} + +document::Document::UP +MemFile::getDocument(const MemSlot& slot, GetFlag getFlag) const +{ + LOG(spam, + "%s: getDocument(%s, %s)", + _file.getBucketId().toString().c_str(), + slot.toString().c_str(), + getFlag == HEADER_ONLY ? "header only" : "full document"); + ensureDocumentCached(slot, getFlag == HEADER_ONLY); + + auto& repo = _env.repo(); + Document::UP doc = _buffer->getDocumentHeader( + repo, slot.getLocation(HEADER)); + + if (doc.get() && getFlag == ALL && slot.getLocation(BODY)._size > 0) { + _buffer->readBody(repo, slot.getLocation(BODY), *doc); + } + + return doc; +} + +document::DocumentId +MemFile::getDocumentId(const MemSlot& slot) const +{ + LOG(spam, + "%s: getDocumentId(%s)", + _file.getBucketId().toString().c_str(), + slot.toString().c_str()); + ensureDocumentCached(slot, true); + + return _buffer->getDocumentId(slot.getLocation(HEADER)); +} + +void +MemFile::assertSlotContainedInThisBucket(const MemSlot& slot) const +{ + document::BucketId fileBucket(getBucketId()); + // Non-orderdoc documents should pass this first (very cheap) test. + if (slot.getGlobalId().containedInBucket(fileBucket)) { + return; + } + // Expensive path: get doc id and check against it instead. + DocumentId id(getDocumentId(slot)); + document::BucketIdFactory factory; + document::BucketId slotBucket(factory.getBucketId(id)); + + LOG(spam, + "%s: slot %s has GID not contained in bucket, checking against id %s", + fileBucket.toString().c_str(), + slot.toString().c_str(), + id.toString().c_str()); + + if (!fileBucket.contains(slotBucket)) { + LOG(error, + "Slot %s with document ID %s is not contained in %s. Terminating " + "in order to avoid bucket corruption.", + slot.toString().c_str(), + id.toString().c_str(), + fileBucket.toString().c_str()); + assert(false); + } +} + +void +MemFile::addPutSlot(const Document& doc, Timestamp time) +{ + DataLocation headerLoc = _buffer->addHeader(doc); + DataLocation bodyLoc = _buffer->addBody(doc); + + addSlot(MemSlot(doc.getId().getGlobalId(), + time, + headerLoc, + bodyLoc, + IN_USE | CHECKSUM_OUTDATED, + 0)); +} + +void +MemFile::addUpdateSlot(const Document& header, const MemSlot& body, Timestamp time) +{ + if (!body.getLocation(BODY).valid()) { + LOG(error, + "Slot %s has invalid body location while not " + "having body cached. This is an invalid state.", + body.toString().c_str()); + assert(false); + } + + DataLocation headerLoc = _buffer->addHeader(header); + DataLocation bodyLoc = body.getLocation(BODY); + + addSlot(MemSlot(header.getId().getGlobalId(), + time, + headerLoc, + bodyLoc, + IN_USE | CHECKSUM_OUTDATED, + 0)); +} + +void +MemFile::addRemoveSlot(const MemSlot& header, Timestamp time) +{ + addSlot(MemSlot(header.getGlobalId(), + time, + header.getLocation(HEADER), + DataLocation(0,0), + DELETED | IN_USE | CHECKSUM_OUTDATED, + 0)); +} + +void +MemFile::addRemoveSlotForNonExistingEntry(const DocumentId& docId, + Timestamp time, + RemoveType removeType) +{ + addSlot(MemSlot(docId.getGlobalId(), + time, + _buffer->addDocumentIdOnlyHeader(docId, _env.repo()), + DataLocation(0,0), + DELETED + | IN_USE + | CHECKSUM_OUTDATED + | (removeType == UNREVERTABLE_REMOVE ? DELETED_IN_PLACE : 0), + 0)); +} + +void +MemFile::addSlot(const MemSlot& slot) +{ + LOG(spam, + "%s: adding %s to memfile", + _file.getBucketId().toString().c_str(), + slot.toString().c_str()); + // TODO: Add exception here? + //assert(slot.partAvailable(BODY)); + assert(slot.getLocation(HEADER).valid()); + assert(slot.getLocation(BODY).valid()); + // Don't let full disk block remove entries or entries that + // are already fully persisted + + if (!slot.deleted() + && !slot.deletedInPlace() + && !(partPersisted(slot, HEADER) + && partPersisted(slot, BODY))) + { + verifyDiskNotFull(); + } + + // Optimize common case where slot we're adding has a higher + // timestamp than the last slot already stored. + if (!_entries.empty() + && slot.getTimestamp() > _entries.back().getTimestamp()) + { + _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; + _entries.push_back(slot); + return; + } + + std::vector<MemSlot> entries; + entries.reserve(_entries.size() + 1); + bool inserted = false; + for (uint32_t i=0; i<_entries.size(); ++i) { + if (_entries[i].getTimestamp() == slot.getTimestamp()) { + std::ostringstream err; + err << "Attempt of adding slot at timestamp " + << slot.getTimestamp() << " which already exist in file. " + << "Call modifySlot instead."; + LOG(error, "%s", err.str().c_str()); + assert(false); + } + if (!inserted && _entries[i].getTimestamp() > slot.getTimestamp()) { + inserted = true; + entries.push_back(slot); + } + entries.push_back(_entries[i]); + } + if (!inserted) { + entries.push_back(slot); + } + _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; + _entries.swap(entries); +} + +void +MemFile::copySlot(const MemFile& source, const MemSlot& slot) +{ + addSlot(MemSlot(slot.getGlobalId(), + slot.getTimestamp(), + _buffer->copyCache(*source._buffer, HEADER, slot.getLocation(HEADER)), + _buffer->copyCache(*source._buffer, BODY, slot.getLocation(BODY)), + slot.getFlags(), + slot.getChecksum())); +} + +class MemFile::MemFileBufferCacheCopier : public BufferCacheCopier +{ +public: + MemFileBufferCacheCopier(MemFile& target, const MemFile& source) + : _target(target), + _source(source) + { + } + +private: + DataLocation doCopyFromSourceToLocal( + Types::DocumentPart part, + DataLocation sourceLocation) override + { + return _target._buffer->copyCache( + *_source._buffer, part, sourceLocation); + } + + MemFile& _target; + const MemFile& _source; +}; + +void +MemFile::copySlotsFrom( + const MemFile& source, + const std::vector<const MemSlot*>& sourceSlots) +{ + // TODO we probably want a pre-allocation hint here to avoid many mmaps + MemFileBufferCacheCopier cacheCopier(*this, source); + SharedDataLocationTracker headerTracker(cacheCopier, HEADER); + SharedDataLocationTracker bodyTracker(cacheCopier, BODY); + + for (auto slot : sourceSlots) { + auto headerLoc = headerTracker.getOrCreateSharedLocation( + slot->getLocation(HEADER)); + auto bodyLoc = bodyTracker.getOrCreateSharedLocation( + slot->getLocation(BODY)); + addSlot(MemSlot(slot->getGlobalId(), + slot->getTimestamp(), + headerLoc, + bodyLoc, + slot->getFlags(), + slot->getChecksum())); + } +} + +void +MemFile::removeSlot(const MemSlot& slot) +{ + _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; + std::vector<MemSlot>::iterator it( + std::lower_bound(_entries.begin(), _entries.end(), + slot.getTimestamp(), + MemSlotTimestampPredicate())); + if (it != _entries.end() + && it->getTimestamp() == slot.getTimestamp()) + { + _entries.erase(it); + } else { + LOG(error, + "Attempted to remove a slot that does not exist: %s", + slot.toString().c_str()); + assert(false); + } +} + +void +MemFile::removeSlots(const std::vector<const MemSlot*>& slotsToRemove) +{ + if (slotsToRemove.empty()) return; + // Optimized way of removing slots. Should not throw exceptions, + // (and is not exception safe) + std::vector<MemSlot> slots( + _entries.size() - slotsToRemove.size(), + MemSlot(GlobalId(), Timestamp(0), DataLocation(), DataLocation(), + 0, 0)); + uint32_t r=0; + for (uint32_t i=0,j=0; i<_entries.size(); ++i) { + if (r >= slotsToRemove.size() || slotsToRemove[r] != &_entries[i]) { + _entries[i].swap(slots[j]); + ++j; + } else { + ++r; + } + } + _entries.swap(slots); + if (_entries.size() != slots.size()) { + _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; + } + // Verify that we found all slots to remove + if (r < slotsToRemove.size()) { + Timestamp ts(0); + for (uint32_t i=0; i<slotsToRemove.size(); ++i) { + assert(slotsToRemove[i]->getTimestamp() > ts); + ts = slotsToRemove[i]->getTimestamp(); + } + LOG(error, + "Slot %s wasn't in the file. Only existing slots may be " + "given to removeSlots as non-existing slot stops other " + "slots from being removed.", + slotsToRemove[r]->toString().c_str()); + assert(false); + } +} + +void +MemFile::modifySlot(const MemSlot& slot) +{ + _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; + // MemSlot actually pointed to by const MemSlot* is non-const + // in entries-vector, so this should be well defined according + // to the C++ ISO standard + MemSlot* slotToModify = const_cast<MemSlot*>( + getSlotAtTime(slot.getTimestamp())); + + assert(slotToModify != NULL); + + LOG(spam, "Modifying %s -> %s", + slotToModify->toString().c_str(), + slot.toString().c_str()); + *slotToModify = slot; +} + +void +MemFile::matchLocationWithFlags(LocationMap& result, + DocumentPart part, + const MemSlot* slot, + uint32_t flags) const +{ + DataLocation loc = slot->getLocation(part); + bool isPersisted = _buffer->isPersisted(loc, part); + + if ((flags & NON_PERSISTED_LOCATIONS) && !isPersisted) { + result[loc].slots.push_back(slot); + } else if ((flags & PERSISTED_LOCATIONS) && isPersisted) { + result[loc].slots.push_back(slot); + } +} + +void +MemFile::getLocations(LocationMap& headers, + LocationMap& bodies, + uint32_t flags) const +{ + for (uint32_t i = 0; i < _entries.size(); ++i) { + matchLocationWithFlags(headers, HEADER, &_entries[i], flags); + matchLocationWithFlags(bodies, BODY, &_entries[i], flags); + } +} + +bool +MemFile::compact() +{ + auto options = _env.acquireConfigReadLock().options(); + MemFileCompactor compactor( + _env._clock.getTimeInMicros(), + CompactionOptions() + .revertTimePeriod(options->_revertTimePeriod) + .keepRemoveTimePeriod(options->_keepRemoveTimePeriod) + .maxDocumentVersions(options->_maxDocumentVersions)); + std::vector<const MemSlot*> slotsToRemove( + compactor.getSlotsToRemove(*this)); + removeSlots(slotsToRemove); + return !slotsToRemove.empty(); +} + +MemFile::const_iterator +MemFile::begin(uint32_t iteratorFlags, + Timestamp fromTimestamp, + Timestamp toTimestamp) const +{ + if (iteratorFlags & ITERATE_GID_UNIQUE) { + return const_iterator(SlotIterator::CUP(new GidUniqueSlotIterator( + *this, iteratorFlags & ITERATE_REMOVED, + fromTimestamp, toTimestamp))); + } else { + return const_iterator(SlotIterator::CUP(new AllSlotsIterator( + *this, iteratorFlags & ITERATE_REMOVED, + fromTimestamp, toTimestamp))); + } +} + +void +MemFile::ensureDocumentIdCached(const MemSlot& slot) const +{ + _buffer->ensureCached(_env, HEADER, toVector(slot.getLocation(HEADER))); +} + +void +MemFile::ensureDocumentCached(const MemSlot& slot, bool headerOnly) const +{ + _buffer->ensureCached(_env, HEADER, toVector(slot.getLocation(HEADER))); + if (!headerOnly) { + _buffer->ensureCached(_env, BODY, toVector(slot.getLocation(BODY))); + } +} + +void +MemFile::ensureDocumentCached(const std::vector<Timestamp>& timestamps, + bool headerOnly) const +{ + LOG(spam, "ensureDocumentCached with %zu timestamps", + timestamps.size()); + if (!fileExists()) { + return; + } + try{ + std::vector<const MemSlot*> slots; + getSlotsByTimestamp(timestamps, slots); + + std::vector<DataLocation> headerLocations; + headerLocations.reserve(timestamps.size()); + std::vector<DataLocation> bodyLocations; + if (!headerOnly) { + bodyLocations.reserve(timestamps.size()); + } + for (uint32_t i = 0; i < slots.size(); ++i) { + headerLocations.push_back(slots[i]->getLocation(HEADER)); + + if (!headerOnly) { + bodyLocations.push_back(slots[i]->getLocation(BODY)); + } + } + + _buffer->ensureCached(_env, HEADER, headerLocations); + if (!headerOnly) { + _buffer->ensureCached(_env, BODY, bodyLocations); + } + } RETHROW_NON_MEMFILE_EXCEPTIONS; +} + +void +MemFile::ensureEntriesCached(bool includeBody) const +{ + if (!fileExists()) { + return; + } + + try{ + std::vector<DataLocation> headerLocations; + std::vector<DataLocation> bodyLocations; + + for (uint32_t i = 0; i < _entries.size(); ++i) { + headerLocations.push_back(_entries[i].getLocation(HEADER)); + + if (includeBody) { + bodyLocations.push_back(_entries[i].getLocation(BODY)); + } + } + + _buffer->ensureCached(_env, HEADER, headerLocations); + if (includeBody) { + _buffer->ensureCached(_env, BODY, bodyLocations); + } + } RETHROW_NON_MEMFILE_EXCEPTIONS; +} + +void +MemFile::ensureHeaderBlockCached() const +{ + ensureEntriesCached(false); +} + +void +MemFile::ensureBodyBlockCached() const +{ + ensureEntriesCached(true); +} + +/** + * Functionally this is the same as ensureBodyBlockCached, but with + * clearer semantics. + */ +void +MemFile::ensureHeaderAndBodyBlocksCached() const +{ + ensureEntriesCached(true); +} + +bool +MemFile::documentIdAvailable(const MemSlot& slot) const +{ + return partAvailable(slot, HEADER); +} + +bool +MemFile::partAvailable(const MemSlot& slot, DocumentPart part) const +{ + return _buffer->isCached(slot.getLocation(part), part); +} + +bool +MemFile::partPersisted(const MemSlot& slot, DocumentPart part) const +{ + assert(_buffer.get()); + + return _buffer->isPersisted(slot.getLocation(part), part); +} + +uint32_t +MemFile::getSerializedSize(const MemSlot& slot, DocumentPart part) const { + DataLocation loc = slot.getLocation(part); + return _buffer->getSerializedSize(part, loc); +} + +const Types::BucketInfo& +MemFile::getBucketInfo() const +{ + if (_flags & BUCKET_INFO_OUTDATED) { + uint32_t uniqueCount = 0, uniqueSize = 0, usedSize = 0; + uint32_t checksum = 0; + + typedef vespalib::hash_set<GlobalId, GlobalId::hash> SeenMap; + SeenMap seen(_entries.size() * 2); + uint32_t maxHeaderExtent = 0, maxBodyExtent = 0; + + MemSlotVector::const_reverse_iterator e(_entries.rend()); + for (MemSlotVector::const_reverse_iterator it(_entries.rbegin()); + it != e; ++it) + { + const MemSlot& slot(*it); + // We now always write sequentially within the blocks, so used size + // for one block is effectively the max location extent seen within + // it. + maxHeaderExtent = std::max(maxHeaderExtent, + slot.getLocation(HEADER)._pos + + slot.getLocation(HEADER)._size); + maxBodyExtent = std::max(maxBodyExtent, + slot.getLocation(BODY)._pos + + slot.getLocation(BODY)._size); + + SeenMap::insert_result inserted(seen.insert(slot.getGlobalId())); + if (!inserted.second) { + continue; + } + if (slot.deleted()) continue; + + const uint32_t slotSize = slot.getLocation(HEADER)._size + + slot.getLocation(BODY)._size; + uniqueSize += slotSize; + ++uniqueCount; + + vespalib::crc_32_type calculator; + calculator.process_bytes(slot.getGlobalId().get(), + GlobalId::LENGTH); + Timestamp time = slot.getTimestamp(); + calculator.process_bytes(&time, sizeof(Timestamp)); + checksum ^= calculator.checksum(); + } + + if (uniqueCount > 0 && checksum < 2) { + checksum += 2; + } + + // Only set used size if we have any entries at all. + if (!_entries.empty()) { + usedSize = 64 + 40 * _entries.size() + + maxHeaderExtent + maxBodyExtent; + } + + spi::BucketInfo info(spi::BucketChecksum(checksum), + uniqueCount, + uniqueSize, + _entries.size(), + usedSize, + BucketInfo::READY, + BucketInfo::NOT_ACTIVE); + + _info = info; + _flags &= ~BUCKET_INFO_OUTDATED; + } + return _info; +} + +void +MemFile::flushToDisk(FlushFlag flag) +{ + if ((flag == CHECK_NON_DIRTY_FILE_FOR_SPACE) || (_flags & SLOTS_ALTERED)) { + LOG(spam, "Flushing %s to disk since flags is %x", toString().c_str(), _flags); + try{ + _env._memFileMapper.flush(*this, _env); + } RETHROW_NON_MEMFILE_EXCEPTIONS; + } else { + LOG(spam, "Not flushing %s as it is not altered", toString().c_str()); + } + + // For now, close all files after done flushing, to avoid getting + // too many open at the same time. Later cache may cache limited + // amount of file handles + getMemFileIO().close(); +} + +void +MemFile::clearCache(DocumentPart part) +{ + _buffer->clear(part); + if (part == HEADER) { + _cacheSizeOverride.headerSize = 0; + } else { + _cacheSizeOverride.bodySize = 0; + } +} + +bool +MemFile::repair(std::ostream& errorReport, uint32_t verifyFlags) +{ + try{ + return _env._memFileMapper.repair( + *this, _env, errorReport, verifyFlags); + } RETHROW_NON_MEMFILE_EXCEPTIONS; +} + +void +MemFile::resetMetaState() +{ + LOG(debug, "Resetting meta state for MemFile"); + _flags = BUCKET_INFO_OUTDATED; + _currentVersion = UNKNOWN; + _info = BucketInfo(); + _entries.clear(); +} + +MemSlot::MemoryUsage +MemFile::getCacheSize() const +{ + assert(_buffer.get()); + + if (_cacheSizeOverride.sum() > 0) { + return _cacheSizeOverride; + } + + MemSlot::MemoryUsage retVal; + retVal.metaSize = sizeof(MemSlot) * _entries.size(); + retVal.headerSize += _buffer->getCachedSize(HEADER); + retVal.bodySize += _buffer->getCachedSize(BODY); + return retVal; +} + +void +MemFile::verifyDiskNotFull() +{ + const double maxFillRate( + _env.acquireConfigReadLock().options()->_diskFullFactor); + + Directory& dir = _file.getDirectory(); + + if (dir.getPartition().getMonitor() == 0) { + LOG(warning, "No partition monitor found for directory %s. Skipping " + "disk full test.", dir.toString(true).c_str()); + } else if (dir.isFull(0, maxFillRate)) { + std::ostringstream token; + token << dir << " is full"; + std::ostringstream ost; + ost << "Disallowing operation on file " << getFile().getPath() + << " because disk is or would be " + << (100 * dir.getPartition().getMonitor() + ->getFillRate()) << " % full, which is " + << "more than the max setting of " + << 100 * maxFillRate << " % full." + << " (Note that this may be both due to space or inodes. " + << "Check \"df -i\" too if manually checking)" + << " (" << dir.toString(true) << ")"; + LOGBT(warning, token.str(), "%s", ost.str().c_str()); + throw vespalib::IoException( + ost.str(), vespalib::IoException::NO_SPACE, VESPA_STRLOC); + } else { + LOG(spam, "Disk will only be %f %% full after operation, which " + "is below limit of %f %%; allowing it to go through.", + 100.0 * dir.getPartition().getMonitor() + ->getFillRate(), + 100.0 * maxFillRate); + } +} + +bool +MemFile::operator==(const MemFile& other) const +{ + if (_info == other._info && + _entries.size() == other._entries.size() && + _file == other._file && + _currentVersion == other._currentVersion) + { + for (uint32_t i=0, n=_entries.size(); i<n; ++i) { + if (_entries[i] != other._entries[i]) return false; + } + return true; + } + return false; +} + +namespace { + void printMemFlags(std::ostream& out, uint32_t flags) { + bool anyPrinted = false; + for (uint32_t val=1,i=1; i<=32; ++i, val *= 2) { + if (flags & val) { + if (anyPrinted) { out << "|"; } + anyPrinted = true; + const char* name = Types::getMemFileFlagName( + static_cast<Types::MemFileFlag>(val)); + if (strcmp(name, "INVALID") == 0) { + out << "INVALID(" << std::hex << val << std::dec << ")"; + } else { + out << name; + } + } + } + if (!anyPrinted) out << "none"; + } +} + +void +MemFile::printHeader(std::ostream& out, bool verbose, + const std::string& indent) const +{ + if (!verbose) { + out << "MemFile(" << _file.getBucketId() << ", dir " + << _file.getDirectory().getIndex(); + } else { + out << "MemFile(" << _file.getBucketId() + << "\n" << indent << " Path(\"" + << _file.getPath() << "\")" + << "\n" << indent << " Wanted version(" + << Types::getFileVersionName(_file.getWantedFileVersion()) + << "(" << std::hex << _file.getWantedFileVersion() << "))" + << "\n" << indent << " Current version(" + << Types::getFileVersionName(_currentVersion) + << "(" << std::hex << _currentVersion << "))" + << "\n" << indent << " " << getBucketInfo() + << "\n" << indent << " Flags "; + printMemFlags(out, _flags); + + if (_formatData.get()) { + out << "\n" << indent << " " << _formatData->toString(); + } + } +} + +void +MemFile::printEntries(std::ostream& out, bool verbose, + const std::string& indent) const +{ + if (verbose && !_entries.empty()) { + out << ") {"; + for (uint32_t i=0; i<_entries.size(); ++i) { + out << "\n" << indent << " "; + print(_entries[i], out, false, indent + " "); + } + out << "\n" << indent << "}"; + } else { + out << ", " << _entries.size() << " entries)"; + } +} + +void +MemFile::printEntriesState(std::ostream& out, bool verbose, + const std::string& indent) const +{ + for (uint32_t i=0; i<_entries.size(); ++i) { + if (verbose) { + printUserFriendly(_entries[i], out, indent); + } else { + print(_entries[i], out, false, indent); + } + out << "\n" << indent; + } + const SimpleMemFileIOBuffer& ioBuf( + static_cast<const SimpleMemFileIOBuffer&>(getMemFileIO())); + const FileInfo& fileInfo(ioBuf.getFileInfo()); + + unsigned int emptyCount = fileInfo._metaDataListSize - _entries.size(); + if (emptyCount > 0) { + out << std::dec << emptyCount << " empty entries.\n" << indent; + } +} + +void +MemFile::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + printHeader(out, verbose, indent); + printEntries(out, verbose, indent); +} + +void +MemFile::printUserFriendly(const MemSlot& slot, + std::ostream& out, + const std::string& indent) const +{ + out << "MemSlot(" << slot.getGlobalId() + << std::setfill(' ') + << std::dec << "\n" + << indent << " Header pos: " + << std::setw(10) << slot.getLocation(HEADER)._pos + << " - " << std::setw(10) << slot.getLocation(HEADER)._size + << ", Body pos: " << std::setw(10) << slot.getLocation(BODY)._pos + << " - " << std::setw(10) << slot.getLocation(BODY)._size << "\n" << indent + << " Timestamp: " << slot.getTimestamp().toString() + << " (" << slot.getTimestamp().getTime() << ")\n" + << indent << " Checksum: 0x" + << std::hex << std::setw(4) << slot.getChecksum() + << std::setfill(' ') << "\n" << indent << " Flags: 0x" + << std::setw(4) << slot.getFlags(); + std::list<std::string> flags; + + if ((slot.getFlags() & IN_USE) == 0) flags.push_back("NOT IN USE"); + if ((slot.getFlags() & DELETED) != 0) flags.push_back("DELETED"); + if ((slot.getFlags() & DELETED_IN_PLACE) != 0) flags.push_back("DELETED_IN_PLACE"); + if ((slot.getFlags() & CHECKSUM_OUTDATED) != 0) flags.push_back("CHECKSUM_OUTDATED"); + + for (std::list<std::string>::iterator it = flags.begin(); + it != flags.end(); ++it) + { + out << ", " << *it; + } + + const document::DocumentId id = getDocumentId(slot); + + out << "\n" << indent << " Name: " << id; + document::BucketIdFactory factory; + document::BucketId bucket( + factory.getBucketId( + document::DocumentId(id))); + out << "\n" << indent << " Bucket: " << bucket; + out << ")"; +} + +void +MemFile::print(const MemSlot& slot, + std::ostream& out, + bool verbose, + const std::string& indent) const +{ + if (verbose) { + out << "MemSlot("; + } + out << std::dec << slot.getTimestamp() << ", " << slot.getGlobalId() << ", h " + << slot.getLocation(HEADER)._pos << " - " << slot.getLocation(HEADER)._size << ", b " + << slot.getLocation(BODY)._pos << " - " << slot.getLocation(BODY)._size << ", f " + + << std::hex << slot.getFlags() << ", c " << slot.getChecksum() + << ", C(" << (documentIdAvailable(slot) ? "D" : "") + << (partAvailable(slot, HEADER) ? "H" : "") + << (partAvailable(slot, BODY) ? "B" : "") + << ")"; + if (verbose) { + out << ") {"; + if (documentIdAvailable(slot)) { + out << "\n" << indent << " "; + + getDocument(slot, ALL) + ->print(out, true, indent + " "); + } else { + out << "\n" << indent << " Nothing cached beyond metadata."; + } + out << "\n" << indent << "}"; + } +} + +void +MemFile::printState(std::ostream& out, bool userFriendlyOutput, + bool printBody, bool printHeader2, + //SlotFile::MetaDataOrder order, + const std::string& indent) const +{ + const SimpleMemFileIOBuffer& ioBuf( + static_cast<const SimpleMemFileIOBuffer&>(getMemFileIO())); + const FileInfo& fileInfo(ioBuf.getFileInfo()); + + out << "\n" << indent << "Filename: '" << getFile().getPath() << "'"; + if (!fileExists()) { + out << " (non-existing)"; + return; + } else if (ioBuf.getFileHandle().isOpen()) { + out << " (fd " << ioBuf.getFileHandle().getFileDescriptor() << ")"; + } + out << "\n"; + + uint32_t filesize = ioBuf.getFileHandle().getFileSize(); + out << "Filesize: " << filesize << "\n"; + Buffer buffer(filesize); + char* buf = buffer.getBuffer(); + uint32_t readBytes = ioBuf.getFileHandle().read(buf, filesize, 0); + if (readBytes != filesize) { + out << "Failed to read whole file of size " << filesize + << ". Adjusting file size to " << readBytes + << " we managed to read."; + filesize = readBytes; + } + + const Header* header(reinterpret_cast<const Header*>(buf)); + header->print(out); + out << "\n" << indent; + + if (filesize < fileInfo.getHeaderBlockStartIndex()) + { + out << "File not big enough to contain all " + << fileInfo._metaDataListSize << " meta data entries.\n" + << indent; + } else { + printEntriesState(out, userFriendlyOutput, indent); + } + + if (filesize < fileInfo.getBodyBlockStartIndex()) + { + out << "File not big enough to contain the whole " + << fileInfo._headerBlockSize << " byte header block.\n" << indent; + } else { + out << "Header block: (" << std::dec << fileInfo._headerBlockSize + << "b)"; + if (printHeader2) { + const char* start = &buf[0] + fileInfo.getHeaderBlockStartIndex(); + out << "\n" << indent; + document::StringUtil::printAsHex( + out, start, fileInfo._headerBlockSize, 16, false); + } + out << "\n" << indent; + } + + if (filesize < fileInfo.getFileSize()) + { + out << "File not big enough to contain the whole " + << fileInfo._bodyBlockSize << " byte content block.\n" << indent; + } else { + out << "Content block: (" << std::dec << fileInfo._bodyBlockSize << "b)"; + if (printBody) { + const char* start = &buf[0] + fileInfo.getBodyBlockStartIndex(); + out << "\n" << indent; + document::StringUtil::printAsHex( + out, start, fileInfo._bodyBlockSize, 16, false); + } + out << "\n" << indent; + } +} + + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h new file mode 100644 index 00000000000..ccd8a306332 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h @@ -0,0 +1,316 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::MemFile + * \ingroup memfile + * + * \brief Class representing a file storing documents in slots. + * + * This is a memory representation of the file, that isn't constricted by the + * actual file format. + * + * A memfile must contains: + * - A header with generic information about the file, including version of + * file format. + * + * A memfile may also contain: + * - Cached meta data describing contents. + * - Cached document header content. + * - Cached document body content. + * + * The representation knows what parts of it that are persisted in a file, and + * what parts exist only in memory. + * + * For ease of use, information is loaded into the cache automatically by the + * MemFile implementation. Thus, the memfile needs a pointer to the file mapper + * implementation. + */ + +#pragma once + +#include <vespa/memfilepersistence/common/filespecification.h> +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/memfile/memslot.h> +#include <vespa/memfilepersistence/memfile/slotiterator.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/memfilepersistence/memfile/memfileiointerface.h> + +namespace storage { +namespace memfile { + +class Environment; +//class SlotFileV1SerializerTest; + +class MemFile : private Types, + public vespalib::Printable +{ +public: + struct FormatSpecificData { + typedef std::unique_ptr<FormatSpecificData> UP; + + virtual ~FormatSpecificData() {} + + virtual std::string toString() const = 0; + }; + + typedef IteratorWrapper const_iterator; + + struct LocationContent { + std::vector<const MemSlot*> slots; + }; + typedef std::map<DataLocation, LocationContent> LocationMap; + typedef std::vector<MemSlot> MemSlotVector; + +private: + void verifyDiskNotFull(); + + mutable uint32_t _flags; + mutable BucketInfo _info; + MemFileIOInterface::UP _buffer; + MemSlotVector _entries; + FileSpecification _file; + mutable FileVersion _currentVersion; + Environment& _env; + mutable FormatSpecificData::UP _formatData; + MemSlot::MemoryUsage _cacheSizeOverride; + + friend class MemFilePtr; + friend class MemCacheTest; + class MemFileBufferCacheCopier; + + /** + * Verify tests need to be able to create memfiles that hasn't called + * loadfile, and possibly call loadFile without autorepair set. Such + * memfiles are invalid as many functions require header+metadata to be + * cached. Should only be used for unit tests. + */ + friend class MemFileV1VerifierTest; + MemFile(const FileSpecification&, Environment&, bool callLoadFile); + + // Ensures that all entries are cached. + // If includeBody is true, caches the body as well. + void ensureEntriesCached(bool includeBody) const; + + // Put the given location in the result map if the + // location is persisted according to the given flags. + void matchLocationWithFlags(LocationMap& result, + DocumentPart part, + const MemSlot* slot, + uint32_t flags) const; + +public: + struct LoadOptions { + bool autoRepair; + LoadOptions() : autoRepair(true) + {} + }; + + MemFile(const FileSpecification& spec, + Environment& env, + const LoadOptions& opts = LoadOptions()); + + const FileSpecification& getFile() const { return _file; } + const document::BucketId& getBucketId() const noexcept { + return _file.getBucketId(); + } + FileVersion getCurrentVersion() const { return _currentVersion; } + + bool empty() const { return _entries.empty(); } + bool fileExists() const { return (_flags & FILE_EXIST); } + bool headerBlockCached() const { return (_flags & HEADER_BLOCK_READ); } + bool bodyBlockCached() const { return (_flags & BODY_BLOCK_READ); } + bool slotsAltered() const { return _flags & SLOTS_ALTERED; } + + /** + * Called by the mapper when it has to call loadFile a second + * time due to corruption repairs. Must NOT be called by anyone + * else! + */ + void resetMetaState(); + + void verifyConsistent() const; + + /** Moves the physical file on disk (if any) to the new file name. */ + void move(const FileSpecification& newFileName); + + uint16_t getDisk() const; + + FormatSpecificData* getFormatSpecificData() const + { return _formatData.get(); } + void setFormatSpecificData(FormatSpecificData::UP d) const + { _formatData = std::move(d); } + void setCurrentVersion(FileVersion ver) const { _currentVersion = ver; } + + uint32_t getSlotCount() const; + const MemSlot& operator[](uint32_t index) const { return _entries[index]; } + const MemSlot* getSlotWithId(const document::DocumentId&, + Timestamp maxTimestamp = MAX_TIMESTAMP) const; + const MemSlot* getSlotAtTime(Timestamp) const; + + void getSlotsByTimestamp(const std::vector<Timestamp>&, + std::vector<const MemSlot*>& returned) const; + + // Get flags are defined in types.h (GetFlag) + Document::UP getDocument(const MemSlot& slot, GetFlag getFlag) const; + + document::DocumentId getDocumentId(const MemSlot& slot) const; + + /** + * Returns the number of bytes required by this memfile while + * in cache. + * + * @return Returns the cache size. + */ + MemSlot::MemoryUsage getCacheSize() const; + + void addPutSlot(const Document& doc, Timestamp time); + + void addUpdateSlot(const Document& header, + const MemSlot& body, + Timestamp time); + + void addRemoveSlot(const MemSlot& header, Timestamp time); + + enum RemoveType + { + REGULAR_REMOVE, + UNREVERTABLE_REMOVE + }; + + void addRemoveSlotForNonExistingEntry(const DocumentId& docId, + Timestamp time, + RemoveType removeType); + + void addSlot(const MemSlot&); + void removeSlot(const MemSlot&); + + void setMemFileIO(MemFileIOInterface::UP buffer) { + _buffer = std::move(buffer); + } + MemFileIOInterface& getMemFileIO() { return *_buffer; } + const MemFileIOInterface& getMemFileIO() const { return *_buffer; } + + void getLocations(LocationMap& headers, + LocationMap& bodies, + uint32_t flags) const; + + /** + * Copies a slot from another memfile. + */ + void copySlot(const MemFile& source, const MemSlot&); + + void copySlotsFrom(const MemFile& source, + const std::vector<const MemSlot*>& sourceSlots); + + /** Remove given slots. Slots must exist and be in rising timestamp order */ + void removeSlots(const std::vector<const MemSlot*>&); + void modifySlot(const MemSlot&); + + void setFlag(uint32_t flags) { + verifyLegalFlags(flags, LEGAL_MEMFILE_FLAGS, "MemFile::setFlag"); + _flags |= flags; + } + + void clearFlag(uint32_t flags) { + verifyLegalFlags(flags, LEGAL_MEMFILE_FLAGS, "MemFile::clearFlags"); + _flags &= ~flags; + } + + /** + * Removes entries overwritten after revert time period and remove + * entries older than keep remove period. + * + * @return True if anything was compacted + */ + bool compact(); + + const_iterator begin(uint32_t iteratorFlags = 0, + Timestamp fromTimestamp = UNSET_TIMESTAMP, + Timestamp toTimestamp = UNSET_TIMESTAMP) const; + + const_iterator end() const { return const_iterator(); } + + void ensureDocumentIdCached(const MemSlot&) const; + void ensureDocumentCached(const MemSlot&, bool headerOnly) const; + void ensureHeaderBlockCached() const; + void ensureBodyBlockCached() const; + void ensureHeaderAndBodyBlocksCached() const; + void ensureDocumentCached(const std::vector<Timestamp>&, + bool headerOnly) const; + + /** + * Assert that a given slot is contained in the bucket this MemFile has + * been created for (i.e. output of getBucketId()). In the common case, + * only the slot GID will be consulted, but in the case of orderdoc docs + * the document ID may have to be fetched. + * + * Precondition: `slot` must have its data blocks already added to the + * file's buffer cache. This means any fetches of the document ID should + * not require disk access, but will incur cache lookup and heap + * allocation overhead. + * Postcondition: no side effects if `slot` is contained in bucket. Logs + * error and dumps core otherwise. + */ + void assertSlotContainedInThisBucket(const MemSlot& slot) const; + + bool documentIdAvailable(const MemSlot&) const; + bool partAvailable(const MemSlot&, DocumentPart part) const; + bool partPersisted(const MemSlot&, DocumentPart) const; + + uint32_t getSerializedSize(const MemSlot&, DocumentPart part) const; + + /** + * Fetches the bucket info. If metadata is altered, info will be + * recalculated, and bucket database updated. + */ + const BucketInfo& getBucketInfo() const; + + void flushToDisk(FlushFlag flags = NONE); + + void clearCache(DocumentPart part); + + /** + * Repair any errors found in this slotfile. + * If given, stuff error report into given ostream. + * + * @return True if file was fine. False if any errors were repaired. + */ + bool repair(std::ostream& errorReport, uint32_t fileVerifyFlags = 0); + + /** + * Tests for equality of memfiles. Equality requires MemFile to look equal + * for clients. It will not read data from file, so the same parts of the + * file must be cached for objects to be equal. Non-persistent flags need + * not be equal (The same parts need not be persisted to backend files) + * + * Used in unit testing only. + */ + bool operator==(const MemFile& other) const; + + /** Stat wants control of printing of slots. */ + void printHeader(std::ostream& out, bool verbose, + const std::string& indent) const; + void printEntries(std::ostream& out, bool verbose, + const std::string& indent) const; + void printEntriesState(std::ostream& out, bool verbose, + const std::string& indent) const; + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + + /** Stat wants control of printing of slots. */ + void printUserFriendly(const MemSlot& slot, + std::ostream& out, + const std::string& indent) const; + void print(const MemSlot& slot, + std::ostream& out, bool verbose, + const std::string& indent) const; + + /** Debug function to print state. */ + void printState(std::ostream& out, bool userFriendlyOutput = false, + bool printBody = true, bool printHeader = true, + //MetaDataOrder order = DEFAULT, + const std::string& indent = "") const; +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp new file mode 100644 index 00000000000..722fef80103 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp @@ -0,0 +1,529 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/common/environment.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/memfile/memfilecache.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/util/vstringfmt.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> + +LOG_SETUP(".persistence.memfile.cache"); + +namespace storage { +namespace memfile { + + +void +MemFileCache::Entry::setInUse(bool inUse) { + LOG(debug, "Setting in use to %d for file %s", inUse, _file.toString().c_str()); + _inUse = inUse; +} + +void +MemFileCache::returnToCache(MemFileCache::Entry& entry) +{ + // Ensure file descriptor is closed before returning to cache + entry._file.getMemFileIO().close(); + vespalib::LockGuard lock(_cacheLock); + + BucketInfo info(entry._file.getBucketInfo()); + BucketId id(entry._file.getFile().getBucketId()); + + LOG(debug, "%s being returned to cache", id.toString().c_str()); + + MemoryUsage newUsage = entry._file.getCacheSize(); + + if (_memoryToken->getSize() == 0 || newUsage.sum() == 0) { + entry._file.flushToDisk(); + eraseNoLock(id); + return; + } + + // File must be flushed before being returned to the cache. + assert(!entry._file.slotsAltered()); + entry.setInUse(false); + + Entry* ptr = 0; + { + BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); + BucketIdx::iterator it(bucketIdx.find(id)); + assert(it != bucketIdx.end()); + ptr = it->_ptr.get(); + + if (entry._returnToCacheWhenFinished) { + EntryWrapper wrp(it->_ptr, ++_lastUsedCounter, id); + _entries.replace(it, wrp); + _memoryUsage.add(newUsage); + entry._cacheSize = newUsage; + } else { + _entries.erase(it); + } + } + + LOG(spam, + "Bucket %s, ptr %p returned to cache: %s with %s. " + "Total cache size after return: %s", + id.toString().c_str(), + ptr, + info.toString().c_str(), + newUsage.toString().c_str(), + _memoryUsage.toString().c_str()); + + evictWhileFull(); +} + +void +MemFileCache::done(MemFileCache::Entry& entry) +{ + LOG(spam, "Finished with file %s", + entry._file.getFile().toString().c_str()); + + try { + entry._file.verifyConsistent(); + } catch (vespalib::Exception e) { + LOG(debug, + "Verification of cache entry %s failed: %s", + entry._file.getFile().toString().c_str(), + e.getMessage().c_str()); + + entry.setInUse(false); + throw; + } + + assert(entry.isInUse()); + + returnToCache(entry); +} + +struct MemFileCache::CacheEntryGuard : public MemFilePtr::EntryGuard { + MemFileCache& _cache; + Environment& _env; + MemFileCache::Entry* _entry; + + CacheEntryGuard( + MemFileCache& cache, + Environment& env, + MemFileCache::Entry& entry) + : MemFilePtr::EntryGuard(entry._file), + _cache(cache), + _env(env), + _entry(&entry) + { + } + virtual ~CacheEntryGuard() { + if (_entry) { + _cache.done(*_entry); + } + } + + MemFile& getFile() { + return _entry->_file; + } + + virtual void deleteFile() { + LOG(debug, "Cache entry guard deleting %s", _file->toString().c_str()); + _env._memFileMapper.deleteFile(*_file, _env); + erase(); + } + + virtual void erase() { + LOG(debug, "Cache entry guard erasing %s from cache", + _file->toString().c_str()); + _cache.erase(document::BucketId(_entry->_file.getFile().getBucketId())); + _entry = 0; + } + + virtual void move(EntryGuard& target) { + LOG(debug, "Cache entry guard moving %s", _file->toString().c_str()); + _cache.move(*this, static_cast<CacheEntryGuard&>(target)); + } + + void moveState(CacheEntryGuard& target) { + // Move state over to target. + target._entry = _entry; + target._file = _file; + + // Invalidate this. + _entry = NULL; + _file = NULL; + } + + MemFile* operator->() { + return &_entry->_file; + } +}; + +MemFileCache::MemFileCache(framework::ComponentRegister& componentRegister, + MemFilePersistenceCacheMetrics& metrics) + : Component(componentRegister, "memfilecache"), + _lastUsedCounter(0), + _allocationType(getMemoryManager().registerAllocationType( + framework::MemoryAllocationType( + "memfilecache", framework::MemoryAllocationType::CACHE))), + _memoryToken(getMemoryManager().allocate(_allocationType, 0, 0, 200)), + _metrics(metrics), + _bodyEvicter(_metrics.body_evictions), + _headerEvicter(_metrics.header_evictions), + _metaDataEvicter(_metrics.meta_evictions) +{ +}; + +void +MemFileCache::setCacheSize(MemoryUsage cacheSize) +{ + vespalib::LockGuard lock(_cacheLock); + + _cacheLimit = cacheSize; + + _memoryToken->resize(std::min(_memoryToken->getSize(), _cacheLimit.sum()), + _cacheLimit.sum()); + + evictWhileFull(); +} + +MemFilePtr +MemFileCache::get(const BucketId& id, Environment& env, Directory& dir, + bool createIfNotExisting) +{ + vespalib::LockGuard lock(_cacheLock); + + BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); + + BucketIdx::iterator it(bucketIdx.find(id)); + if (it == bucketIdx.end()) { + LOG(debug, + "Bucket %s was not in cache. Creating cache entry.", + id.toString().c_str()); + + FileSpecification file(id, dir, env.calculatePathInDir(id, dir)); + const uint64_t counter(++_lastUsedCounter); + lock.unlock(); + // Create memfile outside lock, since this will involve disk reads + // in the common case that there's a bucket file on the disk. The + // content layer shall guarantee that no concurrent operations happen + // for a single bucket, so this should be fully thread safe. + Entry::LP entry(new Entry(file, env, createIfNotExisting)); + + vespalib::LockGuard reLock(_cacheLock); + std::pair<LRUCache::iterator, bool> inserted( + _entries.insert(EntryWrapper(entry, counter, id))); + assert(inserted.second); + _metrics.misses.inc(); + + return MemFilePtr(MemFilePtr::EntryGuard::LP( + new CacheEntryGuard(*this, env, *entry))); + } else { + if (it->_ptr->isInUse()) { + LOG(error, + "Bug! File %s, ptr %p was in use while in the file cache", + it->_ptr->_file.toString(true).c_str(), it->_ptr.get()); + assert(false); + } + + it->_ptr->setInUse(true); + _memoryUsage.sub(it->_ptr->_cacheSize); + EntryWrapper wrp(it->_ptr, ++_lastUsedCounter, id); + _entries.replace(it, wrp); + _metrics.hits.inc(); + } + LOG(debug, + "Bucket %s was already in cache. Returning cache entry with " + "memory usage %s, new total memory usage: %s", + id.toString().c_str(), + it->_ptr->_cacheSize.toString().c_str(), + _memoryUsage.toString().c_str()); + + return MemFilePtr(MemFilePtr::EntryGuard::LP( + new CacheEntryGuard(*this, env, *it->_ptr))); +} + +// TODO: can this be removed?? +MemFileCache::BucketInfoMap +MemFileCache::flushDirtyEntries() +{ + vespalib::LockGuard lock(_cacheLock); + BucketInfoMap retVal; + + uint32_t total = 0, count = 0; + BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); + for (BucketIdx::iterator it = bucketIdx.begin(); it != bucketIdx.end(); ++it) { + ++total; + if (!it->_ptr->isInUse()) { + retVal[it->_ptr->_file.getFile().getBucketId()] = + it->_ptr->_file.getBucketInfo(); + + it->_ptr->_file.flushToDisk(); + // For now, close all files after done flushing, to avoid getting + // too many open at the same time. Later cache may cache limited + // amount of file handles + it->_ptr->_file.getMemFileIO().close(); + + ++count; + } + } + LOG(debug, "Flushed %u of %u entries in cache. Rest are in use", count, total); + + return retVal; +} + +void +MemFileCache::clear() +{ + vespalib::LockGuard lock(_cacheLock); + + uint32_t total = 0, count = 0; + BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); + for (BucketIdx::iterator it = bucketIdx.begin(); + it != bucketIdx.end();) + { + ++total; + if (!it->_ptr->isInUse()) { + // Any file not in use should have been flushed to disk already. + assert(!it->_ptr->_file.slotsAltered()); + _memoryUsage.sub(it->_ptr->_cacheSize); + it = bucketIdx.erase(it); + ++count; + } else { + ++it; + } + } + LOG(debug, "Flushed and cleared %u of %u entries in cache. Rest are in use", + count, total); +} + +void +MemFileCache::eraseNoLock(const document::BucketId& id) +{ + BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); + BucketIdx::iterator iter = bucketIdx.find(id); + + assert(iter != bucketIdx.end()); + assert(iter->_ptr->isInUse()); + //assert(!iter->_ptr->_file.slotsAltered()); + LOG(debug, "Removing %s from cache", id.toString().c_str()); + bucketIdx.erase(iter); +} + +void +MemFileCache::erase(const document::BucketId& id) { + vespalib::LockGuard lock(_cacheLock); + eraseNoLock(id); +} + +void +MemFileCache::move(CacheEntryGuard& source, CacheEntryGuard& target) +{ + vespalib::LockGuard lock(_cacheLock); + assert(target->empty()); + + document::BucketId sourceId = source->getFile().getBucketId(); + document::BucketId targetId = target->getFile().getBucketId(); + + LOG(debug, "Renaming file %s to %s", + source->toString().c_str(), + target->toString().c_str()); + source->move(target->getFile()); + source.moveState(target); + + BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); + BucketIdx::iterator sourceIt(bucketIdx.find(sourceId)); + BucketIdx::iterator targetIt(bucketIdx.find(targetId)); + assert(sourceIt != bucketIdx.end()); + assert(targetIt != bucketIdx.end()); + + EntryWrapper wrp(sourceIt->_ptr, sourceIt->_lastUsed, targetId); + bucketIdx.erase(sourceIt); + _entries.replace(targetIt, wrp); +} + +MemFileCache::TimeIdx::iterator +MemFileCache::getLeastRecentlyUsedBucket() +{ + return boost::multi_index::get<1>(_entries).begin(); + +} + +uint64_t +MemFileCache::size() const +{ + LOG(spam, "memory usage is now %s (total is %zu)", + _memoryUsage.toString().c_str(), _memoryUsage.sum()); + return _memoryUsage.sum(); +} + +bool +MemFileCache::contains(const document::BucketId& bucketId) const +{ + vespalib::LockGuard lock(_cacheLock); + const BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); + return bucketIdx.find(bucketId) != bucketIdx.end(); +} + +MemFileCache::TimeIdx::iterator +MemFileCache::MetaDataEvictionPolicy::evict( + MemFileCache::TimeIdx& lruIndex, + MemFileCache::TimeIdx::iterator& it, + MemFileCache::MemoryUsage& curUsage) +{ + LOG(debug, "Evicting entire memfile for %s from cache. %s held", + it->_bid.toString().c_str(), + it->_ptr->_cacheSize.toString().c_str()); + curUsage.sub(it->_ptr->_cacheSize); + _evictionMetric.inc(); + return lruIndex.erase(it); +} + +MemFileCache::TimeIdx::iterator +MemFileCache::BodyEvictionPolicy::evict( + MemFileCache::TimeIdx& /*lruIndex*/, + MemFileCache::TimeIdx::iterator& it, + MemFileCache::MemoryUsage& curUsage) +{ + LOG(debug, "Removing body of %s from cache. %s held", + it->_bid.toString().c_str(), + it->_ptr->_cacheSize.toString().c_str()); + + if (it->_ptr->_cacheSize.bodySize) { + it->_ptr->_file.clearCache(BODY); + curUsage.bodySize -= it->_ptr->_cacheSize.bodySize; + it->_ptr->_cacheSize.bodySize = 0; + _evictionMetric.inc(); + } + return ++it; +} + +MemFileCache::TimeIdx::iterator +MemFileCache::HeaderEvictionPolicy::evict( + MemFileCache::TimeIdx& /*lruIndex*/, + MemFileCache::TimeIdx::iterator& it, + MemFileCache::MemoryUsage& curUsage) +{ + LOG(debug, "Removing header and body of %s from cache. %s held", + it->_bid.toString().c_str(), + it->_ptr->_cacheSize.toString().c_str()); + + if (it->_ptr->_cacheSize.headerSize) { + it->_ptr->_file.clearCache(HEADER); + it->_ptr->_file.clearCache(BODY); + curUsage.headerSize -= it->_ptr->_cacheSize.headerSize; + curUsage.bodySize -= it->_ptr->_cacheSize.bodySize; + it->_ptr->_cacheSize.headerSize = 0; + it->_ptr->_cacheSize.bodySize = 0; + _evictionMetric.inc(); + } + return ++it; +} + +template <typename EvictionPolicy> +void +MemFileCache::executeCacheEvictionPolicy(EvictionPolicy& policy) +{ + MemFileCache::TimeIdx& timeIdx = boost::multi_index::get<1>(_entries); + for (MemFileCache::TimeIdx::iterator + i(timeIdx.upper_bound(policy.getEvictionCursor())), + e(timeIdx.end()); + i != e;) + { + if (_memoryUsage.sum() <= _cacheLimit.sum() + || (policy.getValue(_memoryUsage) + <= policy.getValue(_cacheLimit))) + { + LOG(spam, "Aborting current policy because " + "memory usage %s is less than soft limit %s", + _memoryUsage.toString().c_str(), + _cacheLimit.toString().c_str()); + + return; + } + + LOG(spam, "Need to evict more data as memory usage is %zu, hard limit is %zu", + _memoryUsage.sum(), _cacheLimit.sum()); + + // If memfile is in use, skip. It will be readded with new + // timestamp once it's done being used, which means the + // invariant of there not being any files < the cursor holding + // cached data of the policy's type will be maintained. + if (i->_ptr->isInUse()) { + LOG(spam, "Not evicting %s as it is currently active", + i->_bid.toString().c_str()); + ++i; + continue; + } + policy.setEvictionCursor(i->_lastUsed); + i = policy.evict(timeIdx, i, _memoryUsage); + } +} + +void +MemFileCache::executeEvictionPolicies() +{ + executeCacheEvictionPolicy(_bodyEvicter); + if (_memoryUsage.sum() <= _cacheLimit.sum()) { + return; + } + executeCacheEvictionPolicy(_headerEvicter); + if (_memoryUsage.sum() <= _cacheLimit.sum()) { + return; + } + executeCacheEvictionPolicy(_metaDataEvicter); +} + +void +MemFileCache::evictWhileFull() +{ + if (size() > _cacheLimit.sum()) { + LOG(debug, "Before cache eviction, cache usage was %s" + ", new max size is %" PRIu64, + _memoryUsage.toString().c_str(), _cacheLimit.sum()); + + executeEvictionPolicies(); + + LOG(spam, "After cache eviction, memory usage is %s", + _memoryUsage.toString().c_str()); + } else { + LOG(spam, "Max cache size is %" PRIu64 " bytes, but cache " + "only using %" PRIu64 " bytes, so not evicting anything", + _cacheLimit.sum(), _memoryUsage.sum()); + } + + _metrics.files.set(_entries.size()); + _metrics.meta.set(_memoryUsage.metaSize); + _metrics.header.set(_memoryUsage.headerSize); + _metrics.body.set(_memoryUsage.bodySize); +} + +MemFileCache::Statistics +MemFileCache::getCacheStats() const +{ + vespalib::LockGuard lock(_cacheLock); + return Statistics(_memoryUsage, _memoryToken->getSize(), _entries.size()); +} + +void +MemFileCache::printCacheEntriesHtml(std::ostream& out) const +{ + vespalib::LockGuard lock(_cacheLock); + out << "<p>Cache entries (most recently used first):</p>\n" + << "<ol>\n"; + const MemFileCache::TimeIdx& timeIdx(boost::multi_index::get<1>(_entries)); + for (MemFileCache::TimeIdx::const_reverse_iterator + it(timeIdx.rbegin()), e(timeIdx.rend()); + it != e; ++it) + { + out << "<li>"; + out << it->_bid << ": "; + if (!it->_ptr->isInUse()) { + out << it->_ptr->_cacheSize.toString(); + } else { + out << "<em>(in use)</em>"; + } + out << "</li>\n"; + } + out << "</ol>\n"; +} + +} // memfile + +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h new file mode 100644 index 00000000000..cc25bd5f7a8 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h @@ -0,0 +1,301 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::slotfile::MemFileCache + * \ingroup memfile + * + * \brief Cache holding onto all mem file objects in memory. + * + * This is the global memory file cache keeping track of all the memory files + * in memory. + */ + +#pragma once + +#include <vespa/metrics/metrics.h> +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/memfile/memfile.h> +#include <vespa/memfilepersistence/memfile/memfileptr.h> +#include <boost/multi_index_container.hpp> +#include <boost/multi_index/identity.hpp> +#include <boost/multi_index/member.hpp> +#include <boost/multi_index/mem_fun.hpp> +#include <boost/multi_index/ordered_index.hpp> +#include <boost/multi_index/sequenced_index.hpp> +#include <vespa/storageframework/generic/memory/memorymanagerinterface.h> + +namespace storage { + +namespace memfile { + +class MemFilePersistenceCacheMetrics; +class Environment; // Avoid cyclic dependency with environment + +class MemFileCache : private framework::Component, + private Types +{ +public: + typedef MemSlot::MemoryUsage MemoryUsage; + + struct Statistics + { + MemoryUsage _memoryUsage; + size_t _cacheSize; + size_t _numEntries; + + Statistics(const MemoryUsage& memoryUsage, + size_t cacheSize, + size_t numEntries) + : _memoryUsage(memoryUsage), + _cacheSize(cacheSize), + _numEntries(numEntries) + {} + }; +private: + class Entry : boost::noncopyable { + public: + typedef vespalib::LinkedPtr<Entry> LP; + + MemFile _file; + MemoryUsage _cacheSize; + Environment& _env; + bool _inUse; + bool _returnToCacheWhenFinished; + + Entry(FileSpecification& file, Environment& env, + bool returnToCacheWhenFinished = true) + : _file(file, env), _env(env), _inUse(true), + _returnToCacheWhenFinished(returnToCacheWhenFinished) + {} + + bool isInUse() const { + return _inUse; + } + + void setInUse(bool inUse); + }; + + struct EntryWrapper { + EntryWrapper( + Entry::LP ptr, + uint64_t lastUsed, + const document::BucketId& bid) + : _ptr(ptr), _lastUsed(lastUsed), _bid(bid) {} + + const Entry* operator->() const { + return _ptr.get(); + }; + + Entry* operator->() { + return _ptr.get(); + }; + + Entry::LP _ptr; + uint64_t _lastUsed; + document::BucketId _bid; + }; + + struct CacheEntryGuard; + + vespalib::Lock _cacheLock; + + typedef boost::multi_index::ordered_unique< + boost::multi_index::member<EntryWrapper, BucketId, &EntryWrapper::_bid> + > BucketIdOrder; + + typedef boost::multi_index::ordered_non_unique< + boost::multi_index::member<EntryWrapper, uint64_t, &EntryWrapper::_lastUsed> + > TimeOrder; + + typedef boost::multi_index::multi_index_container< + EntryWrapper, + boost::multi_index::indexed_by< + BucketIdOrder, + TimeOrder + > + > LRUCache; + + typedef boost::multi_index::nth_index<LRUCache, 0>::type BucketIdx; + typedef boost::multi_index::nth_index<LRUCache, 1>::type TimeIdx; + + class CacheEvictionPolicy + { + uint64_t _evictionCursor; + protected: + metrics::LongCountMetric& _evictionMetric; + public: + CacheEvictionPolicy(metrics::LongCountMetric& evictionMetric) + : _evictionCursor(0), + _evictionMetric(evictionMetric) + {} + + uint64_t getEvictionCursor() const { + return _evictionCursor; + } + void setEvictionCursor(uint64_t cursor) { + _evictionCursor = cursor; + } + }; + + class MetaDataEvictionPolicy : public CacheEvictionPolicy + { + public: + MetaDataEvictionPolicy(metrics::LongCountMetric& evictionMetric) + : CacheEvictionPolicy(evictionMetric) {} + + TimeIdx::iterator evict( + TimeIdx& lruIndex, + TimeIdx::iterator& it, + MemoryUsage& curUsage); + + uint64_t getValue(const MemoryUsage& usage) const { + return usage.sum(); + } + }; + + class BodyEvictionPolicy : public CacheEvictionPolicy + { + public: + BodyEvictionPolicy(metrics::LongCountMetric& evictionMetric) + : CacheEvictionPolicy(evictionMetric) {} + + TimeIdx::iterator evict( + TimeIdx& lruIndex, + TimeIdx::iterator& it, + MemoryUsage& curUsage); + + uint64_t getValue(const MemoryUsage& usage) const { + return usage.bodySize; + } + }; + + class HeaderEvictionPolicy : public CacheEvictionPolicy + { + public: + HeaderEvictionPolicy(metrics::LongCountMetric& evictionMetric) + : CacheEvictionPolicy(evictionMetric) {} + + TimeIdx::iterator evict( + TimeIdx& lruIndex, + TimeIdx::iterator& it, + MemoryUsage& curUsage); + + uint64_t getValue(const MemoryUsage& usage) const { + return usage.headerSize + usage.bodySize; + } + }; + + + MemoryUsage _memoryUsage; + + LRUCache _entries; + uint64_t _lastUsedCounter; + const framework::MemoryAllocationType& _allocationType; + framework::MemoryToken::UP _memoryToken; + + MemFilePersistenceCacheMetrics& _metrics; + + BodyEvictionPolicy _bodyEvicter; + HeaderEvictionPolicy _headerEvicter; + MetaDataEvictionPolicy _metaDataEvicter; + + void done(Entry&); + void move(CacheEntryGuard& source, CacheEntryGuard& target); + void evictWhileFull(); + void executeEvictionPolicies(); + void returnToCache(MemFileCache::Entry& entry); + + TimeIdx::iterator getLeastRecentlyUsedBucket(); + + /** + * @return Returns the current size of the cache. + */ + uint64_t size() const; + + void eraseNoLock(const document::BucketId& id); + + friend class CacheEntryGuard; + friend class MemCacheTest; + + template <typename EvictionPolicy> + void + executeCacheEvictionPolicy(EvictionPolicy& policy); + + MemoryUsage _cacheLimit; + +public: + typedef std::unique_ptr<MemFileCache> UP; + + MemFileCache(framework::ComponentRegister& componentRegister, + MemFilePersistenceCacheMetrics& metrics); + + /** + * Get a memfile for the given bucket on the given disk. + * @param env Needed for cache to be able to create non-existing entries. + * @param dir If not given, use the default directory from the environment. + * @param createIfNotInCache If false, the bucket won't be inserted into the + * cache after, unless it was already cached before this operation. + */ + MemFilePtr get(const BucketId&, + Environment& env, + Directory& dir, + bool createIfNotInCache = true); + + /** + * Removes the given bucket id from cache. Bucket must be in use, + * so erase() will as a consequence not subtract the bucket's cache + * usage from the total cache usage as that has already been done + * upon retrieving the bucket in the first place. + */ + void erase(const document::BucketId& id); + + typedef std::map<document::BucketId, BucketInfo> BucketInfoMap; + + /** + * This function exists just temporarily for memfile layer to flush all + * dirty entries found after each operation. This will be removed in favor + * of another mechanism later. + */ + BucketInfoMap flushDirtyEntries(); + + /** + * Clears the cache of all non-active entries (flushing dirty entries + * as necessary). + */ + void clear(); + + /** + * @return Returns true if the given bucket exists in the cache. + */ + bool contains(const document::BucketId& bucketId) const; + + /** + * Used for unit testing only. + */ + framework::MemoryToken& getMemoryToken() { return *_memoryToken; } + const MemFilePersistenceCacheMetrics& getMetrics() const { + return _metrics; + } + + /** + * Set maximum cache size. + */ + void setCacheSize(MemoryUsage limits); + + uint64_t getCacheSize() { return _memoryToken->getSize(); } + + /** + * NOTE: takes lock, never call from within memfilecache code. + * @return Statistics over cache memory usage and entry counts + */ + Statistics getCacheStats() const; + + /** + * Dump all cache entries as a most recently used-ordered list. + * Used for verbose status page printing. + */ + void printCacheEntriesHtml(std::ostream& out) const; +}; + +} // storage +} // memfile + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp new file mode 100644 index 00000000000..17bf530d450 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp @@ -0,0 +1,208 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/memfile/memfile.h> +#include <vespa/memfilepersistence/memfile/memfilecompactor.h> +#include <algorithm> + +LOG_SETUP(".persistence.memfile.compactor"); + +namespace storage { +namespace memfile { + +struct DocumentVersionInfo { + document::DocumentId _id; + uint32_t _versions; + bool _tombstoned; + + DocumentVersionInfo(const document::DocumentId& docId, bool tombstoned) + : _id(docId), + _versions(1), + _tombstoned(tombstoned) + { + } + + bool newerVersionExists() const noexcept { + return (_versions != 1); + } +}; + +namespace { + +bool +isTombstone(const MemSlot& slot) +{ + return slot.deleted(); +} + +// Deduct with underflow protection +template<typename T> +T deduct(T a, T b) { + return (a > b ? a - b : T(0)); +} + +struct CompactSlotInfo : private Types { + typedef std::list<DocumentVersionInfo> DocList; + typedef vespalib::hash_map<GlobalId, DocList, GlobalId::hash> Map; + Map _info; + const MemFile& _memFile; + + CompactSlotInfo(const MemFile& memFile) + : _info(2 * memFile.getSlotCount()), + _memFile(memFile) + { + } + + /** + * Registers a particular document version as having been seen in the file, + * keeping track of how many newer versions have been observed thus far and + * whether at least one of these was a tombstone (remove entry). + * + * Potential GID collisions are handled by utilizing the actual document + * ID to track specific documents. + * + * Returns a reference to the currently tracked version state for the + * document the slot is for. Returned reference is valid until the next + * invocation of registerSeen() or the owning CompactSlotInfo instance + * is destructed. + */ + DocumentVersionInfo& registerSeen(const MemSlot& slot) { + document::DocumentId id = _memFile.getDocumentId(slot); + DocList& gidDocs(_info[slot.getGlobalId()]); + auto matchesId = [&](const DocumentVersionInfo& doc) { + return (id == doc._id); + }; + auto existing = std::find_if( + gidDocs.begin(), gidDocs.end(), matchesId); + + if (existing == gidDocs.end()) { // (Very) common case + gidDocs.emplace_back(id, isTombstone(slot)); + return gidDocs.back(); + } else { + ++existing->_versions; + if (isTombstone(slot)) { + existing->_tombstoned = true; + } + return *existing; + } + } +}; + +class DecreasingTimestampSlotRange +{ +public: + DecreasingTimestampSlotRange(const MemFile& memFile) + : _memFile(memFile) + { + } + MemFile::const_iterator begin() const { + return _memFile.begin(Types::ITERATE_REMOVED); + } + MemFile::const_iterator end() const { + return _memFile.end(); + } +private: + const MemFile& _memFile; +}; + +DecreasingTimestampSlotRange +allSlotsInDecreasingTimestampOrder(const MemFile& memFile) +{ + return {memFile}; +} + +} + +MemFileCompactor::MemFileCompactor( + framework::MicroSecTime currentTime, + const CompactionOptions& options) + : _options(options), + _currentTime(currentTime), + _revertTimePoint(deduct(currentTime, options._revertTimePeriod)), + _keepRemoveTimePoint(deduct(currentTime, options._keepRemoveTimePeriod)) +{ + assert(_options._maxDocumentVersions != 0); +} + +/* + * Cases to handle: + * - Document has too many versions; always remove slot + * - But otherwise, only remove if older than revert time. + * - Remove entry is too old; remove slot if older than revert time AND keep + * remove time. + * - Tombstoned entries are not resurrected as they are either compacted + * away due to being outside the revert time period or their tombstone + * survives by being inside the revert time period. The "keep remove + * time" period is also forced to be at least as high as the revert time + * period at configuration time. + * - Otherwise, keep the slot. + */ +MemFileCompactor::SlotList +MemFileCompactor::getSlotsToRemove(const MemFile& memFile) +{ + memFile.ensureHeaderBlockCached(); + + std::vector<const MemSlot*> removeSlots; + CompactSlotInfo slots(memFile); + + LOG(spam, + "Running compact on %s. Using revertTime=%zu, " + "keepRemoveTime=%zu, maxDocumentVersions=%u", + memFile.toString(true).c_str(), + _revertTimePoint.getTime(), + _keepRemoveTimePoint.getTime(), + _options._maxDocumentVersions); + + for (auto& slot : allSlotsInDecreasingTimestampOrder(memFile)) { + DocumentVersionInfo& info(slots.registerSeen(slot)); + + if (exceededVersionCount(info)) { + alwaysCompact(slot, removeSlots); + } else if (info.newerVersionExists()) { + // A tombstone also counts as a newer version. + compactIfNotRevertible(slot, removeSlots); + } else if (isTombstone(slot) && keepRemoveTimeExpired(slot)) { + compactIfNotRevertible(slot, removeSlots); + } // else: keep slot since it's the newest or within revert period. + } + + std::reverse(removeSlots.begin(), removeSlots.end()); + return removeSlots; +} + +bool +MemFileCompactor::exceededVersionCount( + const DocumentVersionInfo& info) const noexcept +{ + return (info._versions > _options._maxDocumentVersions); +} + +bool +MemFileCompactor::keepRemoveTimeExpired(const MemSlot& slot) const noexcept +{ + return (slot.getTimestamp() < _keepRemoveTimePoint); +} + +void +MemFileCompactor::compactIfNotRevertible( + const MemSlot& slot, + SlotList& slotsToRemove) const +{ + // May compact slot away if its timestamp is older than the point in time + // where we expect reverts may be sent. + if (slot.getTimestamp() < _revertTimePoint) { + alwaysCompact(slot, slotsToRemove); + } +} + +void +MemFileCompactor::alwaysCompact(const MemSlot& slot, + SlotList& slotsToRemove) const +{ + LOG(spam, "Compacting slot %s", slot.toString().c_str()); + slotsToRemove.push_back(&slot); +} + + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h new file mode 100644 index 00000000000..f402489e627 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::MemFileCompactor + * \ingroup memfile + * + * \brief Class containing logic to find what slots in a memfile can be removed. + */ +#pragma once + +#include <vespa/memfilepersistence/common/types.h> +#include <limits> + +namespace storage { +namespace memfile { + +class MemFile; +class MemSlot; + +struct CompactionOptions +{ + framework::MicroSecTime _revertTimePeriod; + framework::MicroSecTime _keepRemoveTimePeriod; + uint32_t _maxDocumentVersions {std::numeric_limits<uint32_t>::max()}; + + CompactionOptions& revertTimePeriod(framework::MicroSecTime t) { + _revertTimePeriod = t; + return *this; + } + + CompactionOptions& keepRemoveTimePeriod(framework::MicroSecTime t) { + _keepRemoveTimePeriod = t; + return *this; + } + + CompactionOptions& maxDocumentVersions(uint32_t maxVersions) { + _maxDocumentVersions = maxVersions; + return *this; + } +}; + +class DocumentVersionInfo; + +class MemFileCompactor : public Types +{ +public: + using SlotList = std::vector<const MemSlot*>; + + MemFileCompactor(framework::MicroSecTime currentTime, + const CompactionOptions& options); + + SlotList getSlotsToRemove(const MemFile& memFile); +private: + bool exceededVersionCount(const DocumentVersionInfo&) const noexcept; + bool keepRemoveTimeExpired(const MemSlot& slot) const noexcept; + void compactIfNotRevertible(const MemSlot& slot, + SlotList& slotsToRemove) const; + void alwaysCompact(const MemSlot& slot, SlotList& slotsToRemove) const; + + CompactionOptions _options; + framework::MicroSecTime _currentTime; + framework::MicroSecTime _revertTimePoint; + framework::MicroSecTime _keepRemoveTimePoint; +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h new file mode 100644 index 00000000000..fbe06d2c4df --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/common/filespecification.h> + +namespace storage { + +namespace memfile { + +class Environment; + +class MemFileIOInterface : public Types { +public: + virtual ~MemFileIOInterface() {} + + typedef std::unique_ptr<MemFileIOInterface> UP; + + /** + * Deserializes the data in the given location (must already be read from disk), + * into a document object. If the data is not already read from disk, returns NULL. + */ + virtual Document::UP getDocumentHeader( + const document::DocumentTypeRepo&, + DataLocation loc) const = 0; + + virtual document::DocumentId getDocumentId(DataLocation loc) const = 0; + + /** + * Deserializes the given document's body part with the data in the given data + * location. + */ + virtual void readBody( + const document::DocumentTypeRepo&, + DataLocation loc, + Document& doc) const = 0; + + virtual DataLocation addDocumentIdOnlyHeader( + const DocumentId&, + const document::DocumentTypeRepo&) = 0; + + virtual DataLocation addHeader(const Document& doc) = 0; + + virtual DataLocation addBody(const Document& doc) = 0; + + virtual void clear(DocumentPart part) = 0; + + virtual bool verifyConsistent() const = 0; + + virtual void move(const FileSpecification& target) = 0; + + virtual DataLocation copyCache(const MemFileIOInterface& source, + DocumentPart part, + DataLocation loc) = 0; + + virtual void ensureCached(Environment& env, + DocumentPart part, + const std::vector<DataLocation>& locations) = 0; + + virtual bool isCached(DataLocation loc, DocumentPart part) const = 0; + + virtual bool isPersisted(DataLocation loc, DocumentPart part) const = 0; + + virtual uint32_t getSerializedSize(DocumentPart part, + DataLocation loc) const = 0; + + virtual void close() = 0; + + virtual size_t getCachedSize(DocumentPart part) const = 0; + + void clear() { + clear(HEADER); + clear(BODY); + } +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h new file mode 100644 index 00000000000..545686e5f2f --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::slotfile::MemFilePtr + * \ingroup memfile + * + * \brief Utility class for managing an entry taken from cache. + * + * To be able to seamlessly return copy by value objects from the cache that + * can be used, and automatically return to the cache on destruction, this + * wrapper class exist to ensure that then the last user stops using it, it + * will be released. + * + * This object is created by the cache and returned to the disk thread using it. + * A linked pointer should thus be safe as we assume all users of it will be in + * the same thread. It assumes the cache itself has a lifetime longer than this + * object. + */ + +#pragma once + +#include <vespa/vespalib/util/linkedptr.h> + +namespace storage { +namespace memfile { + +class MemFile; + +class MemFilePtr { +public: + /** + * Utility class to ensure we call done() on cache after all cache + * pointers are deleted. The cache implements a subclass of this class + * doing it, to prevent cyclic dependency with cache. + */ + struct EntryGuard { + typedef vespalib::LinkedPtr<EntryGuard> LP; + + MemFile* _file; + + EntryGuard(MemFile& file) : _file(&file) {} + virtual ~EntryGuard() {} + + virtual void erase() = 0; + virtual void deleteFile() = 0; + virtual void move(EntryGuard& target) = 0; + }; + +private: + EntryGuard::LP _entry; + +public: + MemFilePtr() {}; + MemFilePtr(EntryGuard::LP entry) : _entry(entry) {} + + // Behave like pointer to MemFile for ease of use. + MemFile* operator->() { return _entry->_file; } + MemFile& operator*() { return *_entry->_file; } + MemFile* get() { + return (_entry.get() != 0 ? _entry->_file : 0); + } + const MemFile* operator->() const { return _entry->_file; } + const MemFile& operator*() const { return *_entry->_file; } + const MemFile* get() const { + return (_entry.get() != 0 ? _entry->_file : 0); + } + + /** Removes the entry from cache and deletes the underlying file. */ + void deleteFile() { _entry->deleteFile(); } + + /** + * Erases the entry from the cache. Does not touch the underlying file so + * therefore requires the memfile's alteredSlots() to return false. + */ + void eraseFromCache() { _entry->erase(); } + + /** + * Removes the entry from cache and renames the underlying file. + * The end result is that this mem file now points to the renamed file. + * The target MemFilePtr is invalid after this operation. + * + * @return Returns false if the target file already existed. + */ + void move(MemFilePtr& target) { + _entry->move(*target._entry); + } +}; + +} // storage +} // memfile + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp new file mode 100644 index 00000000000..b7b88682fce --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/memfile/memslot.h> +#include <iostream> +#include <iomanip> + +#define ASSERT_FLAG(a) \ +{ \ + if (!(_flags & a)) { \ + std::ostringstream error; \ + error << "Expected one of flags " << std::hex << a << " to be set at " \ + << "this point, but only the given flags are set: " << _flags \ + << ", " << toString(true); \ + throw vespalib::IllegalStateException(error.str(), VESPA_STRLOC); \ + } \ +} + +namespace storage { +namespace memfile { + +MemSlot::MemSlot(const MemSlot& other) + : _timestamp(other._timestamp), + _header(other._header), + _body(other._body), + _gid(other._gid), + _flags(other._flags), + _checksum(other._checksum) +{ +} + +MemSlot::MemSlot(const GlobalId& gid, Timestamp time, + DataLocation header, DataLocation body, + uint16_t flags, uint16_t checksum) + : _timestamp(time), + _header(header), + _body(body), + _gid(gid), + _flags(flags), + _checksum(checksum) +{ +} + +MemSlot::~MemSlot() +{ +} + +MemSlot::MemoryUsage +MemSlot::getCacheSize() const +{ + MemoryUsage retVal; + retVal.metaSize = sizeof(MemSlot); + retVal.headerSize = _header._size; + retVal.bodySize = _body._size; + return retVal; +} + +MemSlot& +MemSlot::operator=(const MemSlot& other) +{ + _timestamp = other._timestamp; + _header = other._header; + _body = other._body; + _gid = other._gid; + _checksum = other._checksum; + + // Flags must be copied after cache. + _flags = other._flags; + return *this; +} + +void +MemSlot::swap(MemSlot& other) +{ + std::swap(_timestamp, other._timestamp); + std::swap(_header, other._header); + std::swap(_body, other._body); + std::swap(_gid, other._gid); + std::swap(_checksum, other._checksum); + std::swap(_flags, other._flags); +} + +bool +MemSlot::hasBodyContent() const +{ + return _body._size > 0; +} + +bool +MemSlot::operator==(const MemSlot& other) const +{ + if (_checksum != other._checksum + || _timestamp != other._timestamp + || _header != other._header + || _body != other._body + || _flags != other._flags + || _gid != other._gid) + { + return false; + } + return true; +} + +void +MemSlot::print(std::ostream& out, bool verbose, + const std::string& /*indent*/) const +{ + if (verbose) { + out << "MemSlot("; + } + out << std::dec << _timestamp << ", " << _gid << ", h " + << _header._pos << " - " << _header._size << ", b " + << _body._pos << " - " << _body._size << ", f " + + << std::hex << _flags << ", c " << _checksum; + if (verbose) { + out << ")"; + } +} + +std::string +MemSlot::MemoryUsage::toString() const +{ + std::ostringstream ss; + ss << "MemoryUsage(meta=" << metaSize + << ", header=" << headerSize + << ", body=" << bodySize + << ")"; + return ss.str(); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h new file mode 100644 index 00000000000..53a20a86f8a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::MemSlot + * \ingroup memfile + * + * \brief Class representing a slot in a MemFile. + * + * The MemSlot class keeps all the data we need for a single entry in the + * slotfile. + * + * Note that a lot of these instances will be kept in the memory cache. It is + * important that the memory footprint of this class is really small, such that + * we can fit many entries in the cache. The layout of the class is thus a bit + * specialized to keep a low footprint. + * + * Currently, 40 bytes are used for metadata. + * + * A note about constness. The cached part are considered mutable, such that + * all read access can be const. Only operations causing the slot to change on + * disk (given a flush) is non-const. + */ +#pragma once + +#include <vespa/memfilepersistence/common/types.h> + +namespace storage { +namespace memfile { + +class MemFile; + +class MemSlot : private Types, + private boost::operators<MemSlot> +{ + // Metadata for slot we need to keep. + Timestamp _timestamp; // 64 bit - 8 bytes timestamp + DataLocation _header; // 2x32 bit - 8 bytes header location + DataLocation _body; // 2x32 bit - 8 bytes body location + GlobalId _gid; // 96 bit - 12 bytes + uint16_t _flags; // 16 bit - 2 bytes flag + uint16_t _checksum; // 16 bit - 2 bytes checksum + + friend class MemFileTest; + + // used by tests to simulate gid collision. + void setGlobalId(const GlobalId& gid) { + _gid = gid; + } + +public: + struct MemoryUsage { + MemoryUsage() : + headerSize(0), + bodySize(0), + metaSize(0) {} + + MemoryUsage(uint64_t metaSz, uint64_t headerSz, uint64_t bodySz) + : headerSize(headerSz), + bodySize(bodySz), + metaSize(metaSz) + {} + + uint64_t headerSize; + uint64_t bodySize; + uint64_t metaSize; + + uint64_t sum() const { + return headerSize + bodySize + metaSize; + } + + void add(const MemoryUsage& usage) { + headerSize += usage.headerSize; + bodySize += usage.bodySize; + metaSize += usage.metaSize; + } + + void sub(const MemoryUsage& usage) { + headerSize -= usage.headerSize; + bodySize -= usage.bodySize; + metaSize -= usage.metaSize; + } + + std::string toString() const; + }; + + typedef vespalib::LinkedPtr<MemSlot> LP; + + MemSlot(const MemSlot&); + /** Constructor used by mappers reading from file. */ + MemSlot(const GlobalId& gid, Timestamp time, + DataLocation header, DataLocation body, + uint16_t flags, uint16_t checksum); + ~MemSlot(); + + MemSlot& operator=(const MemSlot&); + void swap(MemSlot&); + + Timestamp getTimestamp() const { return _timestamp; } + const GlobalId& getGlobalId() const { return _gid; } + + DataLocation getLocation(DocumentPart part) const + { return (part == HEADER ? _header : _body); } + + bool inUse() const { return (_flags & IN_USE); } + bool deleted() const { return (_flags & DELETED); } + bool deletedInPlace() const { return (_flags & DELETED_IN_PLACE); } + + bool checksumOutdated() const { return (_flags & CHECKSUM_OUTDATED); } + + bool alteredInMemory() const { return (_flags & SLOTS_ALTERED); } + + bool usingUnusedFlags() const { return (_flags & UNUSED); } + + uint16_t getFlags() const { return _flags; } + + bool hasBodyContent() const; + + uint16_t getPersistedFlags() const + { return (_flags & LEGAL_PERSISTED_SLOT_FLAGS); } + + /** + * Returns the number of bytes required to keep this slot + * in memory. + */ + MemoryUsage getCacheSize() const; + + void setFlag(uint32_t flags) + { _flags |= flags | (flags & 0xff ? CHECKSUM_OUTDATED : 0); } + + void clearFlag(uint32_t flags) { _flags &= ~flags; } + + void setLocation(DocumentPart part, DataLocation location) { + if (part == HEADER) { + _header = location; + } else { + _body = location; + } + _flags |= CHECKSUM_OUTDATED; + } + + void setChecksum(uint16_t checksum) + { _checksum = checksum; _flags &= ~CHECKSUM_OUTDATED; } + + uint16_t getChecksum() const { return _checksum; } + + void clearPersistence() { + _header = DataLocation(); + if (_body._size > 0) { + _body = DataLocation(); + } + _flags |= CHECKSUM_OUTDATED; + } + + void turnToUnrevertableRemove() { + if (_flags & DELETED_IN_PLACE) return; + _body = DataLocation(0, 0); + _flags |= DELETED | DELETED_IN_PLACE; + _flags |= ALTERED_IN_MEMORY | CHECKSUM_OUTDATED; + } + + /** + * Tests for equality of memfiles. Equality requires MemFile to look equal + * for clients. It will not read data from file, so the same parts of the + * file must be cached for objects to be equal. Non-persistent flags need + * not be equal (The same parts need not be persisted to backend files) + * + * Used in unit testing only. + */ + bool operator==(const MemSlot& other) const; + + // Implement print functions so we can be used similar to as we were + // a document::Printable (Don't want inheritance in this class) + void print(std::ostream& out, bool verbose, + const std::string& indent) const; + + std::string toString(bool verbose = false) const { + std::ostringstream ost; + print(ost, verbose, ""); + return ost.str(); + } +}; + +inline std::ostream& operator<<(std::ostream& out, const MemSlot& slot) { + slot.print(out, false, ""); + return out; +} + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp new file mode 100644 index 00000000000..82ac8ac62d2 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/memfile/shared_data_location_tracker.h> + +namespace storage { +namespace memfile { + +DataLocation +SharedDataLocationTracker::getOrCreateSharedLocation( + DataLocation sourceLocation) +{ + DataLocation& bufferedLoc(_trackedLocations[sourceLocation]); + if (!bufferedLoc.valid()) { + bufferedLoc = _cacheCopier.copyFromSourceToLocal(_part, sourceLocation); + } + return bufferedLoc; +} + + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h new file mode 100644 index 00000000000..e0b1a7b9a2a --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/common/types.h> + +namespace storage { +namespace memfile { + +class BufferCacheCopier; + +/** + * Simple utility to track locations copied between files and to help + * ensure locations that were shared in the source file will also be shared + * in the destination file. + */ +class SharedDataLocationTracker +{ +public: + SharedDataLocationTracker(BufferCacheCopier& cacheCopier, + Types::DocumentPart part) + : _cacheCopier(cacheCopier), + _part(part), + _trackedLocations() + { + } + + /** + * Get a location to data contained in the destination which points at the + * exact same data as that given by sourceLocation in the source. Multiple + * requests to the same source location will return the same destination + * location. + */ + DataLocation getOrCreateSharedLocation(DataLocation sourceLocation); +private: + BufferCacheCopier& _cacheCopier; + Types::DocumentPart _part; + std::map<DataLocation, DataLocation> _trackedLocations; +}; + +/** + * Interface for copying data between individual MemFile buffer caches. + */ +class BufferCacheCopier +{ + virtual DataLocation doCopyFromSourceToLocal( + Types::DocumentPart part, + DataLocation sourceLocation) = 0; +public: + virtual ~BufferCacheCopier() {} + + /** + * Copy a given file part location from a source cache into a new location + * in the destination cache. Returns new location in destination cache. + * It is assumed that locations returned by this method will be unique. + */ + DataLocation copyFromSourceToLocal(Types::DocumentPart part, + DataLocation sourceLocation) + { + return doCopyFromSourceToLocal(part, sourceLocation); + } +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp new file mode 100644 index 00000000000..1780870c050 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp @@ -0,0 +1,107 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/memfile/slotiterator.h> +#include <vespa/memfilepersistence/memfile/memfile.h> + +namespace storage { +namespace memfile { + +GidUniqueSlotIterator::GidUniqueSlotIterator(const MemFile& file, + bool iterateRemoves, + Timestamp fromTimestamp, + Timestamp toTimestamp) + : _file(file), + _seen(2 * file.getSlotCount()), + _iterateRemoves(iterateRemoves), + _fromTimestamp(fromTimestamp), + _toTimestamp(toTimestamp), + _currentIndex(file.getSlotCount()) +{ + iterate(); +} + +void +GidUniqueSlotIterator::iterate() const +{ + for (uint32_t i = _currentIndex - 1; i < _currentIndex; --i) { + // To avoid separate implementations for const and non-const + // iterators we do a const cast here. For const iterators, only + // const MemSlot entries will be exposed externally, so no + // modifications will be allowed for those. + MemSlot& slot(const_cast<MemSlot&>(_file[i])); + if (_fromTimestamp != Timestamp(0) && + slot.getTimestamp() < _fromTimestamp) continue; + if (_toTimestamp != Timestamp(0) && + slot.getTimestamp() > _toTimestamp) continue; + + SeenMap::insert_result inserted(_seen.insert(slot.getGlobalId())); + if (!inserted.second) { + continue; + } + if (slot.deleted() && !_iterateRemoves) continue; + _current = &slot; + _currentIndex = i; + return; + } + _current = 0; + _currentIndex = 0; +} + +SlotIterator* +GidUniqueSlotIterator::clone() const { + GidUniqueSlotIterator* sit( + new GidUniqueSlotIterator(_file, _iterateRemoves, + _fromTimestamp, _toTimestamp)); + sit->_seen = _seen; + sit->_currentIndex = _currentIndex; + sit->_current = _current; + return sit; +} + +AllSlotsIterator::AllSlotsIterator(const MemFile& file, + bool iterateRemoves, + Timestamp fromTimestamp, + Timestamp toTimestamp) + : _file(file), + _iterateRemoves(iterateRemoves), + _fromTimestamp(fromTimestamp), + _toTimestamp(toTimestamp), + _currentIndex(file.getSlotCount()) +{ + iterate(); +} + +SlotIterator* +AllSlotsIterator::clone() const { + AllSlotsIterator* sit = new AllSlotsIterator(_file, _iterateRemoves, + _fromTimestamp, _toTimestamp); + sit->_currentIndex = _currentIndex; + sit->_current = _current; + return sit; +} + +void +AllSlotsIterator::iterate() const +{ + for (uint32_t i = _currentIndex - 1; i < _currentIndex; --i) { + // To avoid seprate implementations for const and non-const + // iterators we do a const cast here. For const iterators, only + // const MemSlot entries will be exposed externally, so no + // modifications will be allowed for those. + MemSlot& slot(const_cast<MemSlot&>(_file[i])); + if (_fromTimestamp != Timestamp(0) && + slot.getTimestamp() < _fromTimestamp) continue; + if (_toTimestamp != Timestamp(0) && + slot.getTimestamp() > _toTimestamp) continue; + if (slot.deleted() && !_iterateRemoves) continue; + _current = &slot; + _currentIndex = i; + return; + } + _current = 0; + _currentIndex = 0; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h new file mode 100644 index 00000000000..c10075ef143 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::SlotIterator + * \ingroup memfile + * + * \brief Utility class for iterating slots in a MemFile. + * + * When needing to iterate the slots, sometimes one want to iterate only unique + * slots and sometimes you want to iterate deleted slots. Iterating only unique + * slots adds a CPU cost, so one would want to avoid adding that cost if + * iterating all. + * + * To simplify code iterating slots, they can use a SlotIterator, such that they + * don't have to reimplement the iteration. + * + * The typical way of creating such an iterator, is by calling MemFile's + * getSlotIterator function, which will give you an iterator of suitable + * implementation. Do not use these directly. + */ + +#pragma once + +#include <boost/operators.hpp> +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/vespalib/stllike/hash_set.h> + +namespace storage { +namespace memfile { + +class MemFile; // MemFile depends on this file. Don't want circular dependency +class MemSlot; + +class SlotIterator : protected Types { +protected: + mutable MemSlot* _current; + + virtual void iterate() const = 0; + SlotIterator() : _current(0) {} + +public: + typedef std::unique_ptr<SlotIterator> UP; + typedef std::unique_ptr<const SlotIterator> CUP; + + virtual ~SlotIterator() {} + + virtual SlotIterator* clone() const = 0; + + MemSlot* getCurrent() { return _current; } + const MemSlot* getCurrent() const { return _current; } + + const MemSlot& operator++() const { iterate(); return *_current; } +}; + +class GidUniqueSlotIterator : public SlotIterator { + const MemFile& _file; + typedef vespalib::hash_set<GlobalId, GlobalId::hash> SeenMap; + mutable SeenMap _seen; + bool _iterateRemoves; + Timestamp _fromTimestamp; + Timestamp _toTimestamp; + mutable uint32_t _currentIndex; + +public: + GidUniqueSlotIterator(const MemFile& file, + bool iterateRemoves, + Timestamp fromTimestamp, + Timestamp toTimestamp); + + virtual void iterate() const; + virtual SlotIterator* clone() const; +}; + +class AllSlotsIterator : public SlotIterator { + const MemFile& _file; + bool _iterateRemoves; + Timestamp _fromTimestamp; + Timestamp _toTimestamp; + mutable uint32_t _currentIndex; + +public: + AllSlotsIterator(const MemFile& file, + bool iterateRemoves, + Timestamp fromTimestamp, + Timestamp toTimestamp); + + virtual void iterate() const; + virtual SlotIterator* clone() const; +}; + +/** + * \class storage::memfile::IteratorWrapper + * \ingroup memfile + * + * \brief Wrapper class for iterators, such that we can return by value. + * + * Iterators use inheritance, so we need a wrapper class to wrap the + * implementation in order to be able to return iterators by value, as one is + * acustomed to in the standard library. + */ +class IteratorWrapper : public boost::operators<IteratorWrapper> { + SlotIterator::CUP _it; + +public: + IteratorWrapper() {} // Creates end() iterator. + IteratorWrapper(SlotIterator::CUP it) : _it(std::move(it)) {} + // Override to clone implementation + IteratorWrapper(const IteratorWrapper& o) : _it(o._it->clone()) {} + IteratorWrapper& operator=(const IteratorWrapper& o) { + _it.reset(0); + if (o._it.get() != 0) _it.reset(o._it->clone()); + return *this; + } + + bool operator==(const IteratorWrapper& o) const { + const MemSlot* slot(_it.get() == 0 ? 0 : _it->getCurrent()); + const MemSlot* slot2(o._it.get() == 0 ? 0 : o._it->getCurrent()); + return (slot == slot2); + } + + const MemSlot& operator*() const { return *_it->getCurrent(); } + const MemSlot* operator->() const { return _it->getCurrent(); } + const MemSlot& operator++() const { return ++*_it; } +}; + + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore new file mode 100644 index 00000000000..7e7c0fe7fae --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore @@ -0,0 +1,2 @@ +/.depend +/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt new file mode 100644 index 00000000000..e30807d99b2 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_spi OBJECT + SOURCES + memfilepersistence.cpp + memfilepersistenceprovider.cpp + operationhandler.cpp + iteratorhandler.cpp + joinoperationhandler.cpp + splitoperationhandler.cpp + visitorslotmatcher.cpp + threadlocals.cpp + cacheevictionguard.cpp + DEPENDS +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp new file mode 100644 index 00000000000..ba9bc8669c6 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/spi/cacheevictionguard.h> +#include <vespa/memfilepersistence/memfile/memfile.h> + +LOG_SETUP(".persistence.memfile.cacheevictionguard"); + +namespace storage { +namespace memfile { + +MemFileCacheEvictionGuard::~MemFileCacheEvictionGuard() +{ + if (!_ok) { + LOG(debug, + "Clearing %s from cache to force reload " + "of file on next access.", + _ptr->getFile().getBucketId().toString().c_str()); + // Throw away all non-persisted changes to file and clear it from the + // cache to force a full reload on next access. This is the safest + // option, as all operations that are not yet persisted should fail + // back to the client automatically. + _ptr->clearFlag(Types::SLOTS_ALTERED); + _ptr.eraseFromCache(); // nothrow + } +} + +} +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h new file mode 100644 index 00000000000..6df524a1c58 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/memfile/memfileptr.h> + +namespace storage { +namespace memfile { + +/** + * Guard which will forcefully un-mark a file as being modified and evict + * it from the cache if an exception occurs before it is destructed (more + * specifically, if unguard() is never invoked on it). + * + * Any data not yet persisted when the memfile is evicted will be lost. + * It's up to the caller to ensure that this does not actually cause + * any true data loss. + */ +class MemFileCacheEvictionGuard +{ +public: + MemFileCacheEvictionGuard(const MemFilePtr& ptr) + : _ptr(ptr), + _ok(false) + { + assert(_ptr.get()); + } + ~MemFileCacheEvictionGuard(); + + MemFile* operator->() { return _ptr.get(); } + MemFile& operator*() { return *_ptr; } + const MemFile* operator->() const { return _ptr.get(); } + const MemFile& operator*() const { return *_ptr; } + + const MemFilePtr& get() const { return _ptr; } + MemFilePtr& get() { return _ptr; } + + void unguard() { _ok = true; } +private: + MemFilePtr _ptr; + bool _ok; +}; + +} +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp new file mode 100644 index 00000000000..c95d59001f4 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp @@ -0,0 +1,431 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <iterator> +#include <vespa/document/fieldset/fieldsets.h> +#include <vespa/document/select/bodyfielddetector.h> +#include <vespa/memfilepersistence/spi/iteratorhandler.h> +#include <vespa/memfilepersistence/spi/visitorslotmatcher.h> +#include <vespa/memfilepersistence/spi/cacheevictionguard.h> + +LOG_SETUP(".persistence.memfile.handler.iterator"); + +namespace storage { +namespace memfile { + +CachePrefetchRequirements +CachePrefetchRequirements::createFromSelection(const document::DocumentTypeRepo& repo, + const document::select::Node& sel) +{ + CachePrefetchRequirements ret; + document::select::BodyFieldDetector bfd(repo); + sel.visit(bfd); + ret.setHeaderPrefetchRequired(bfd.foundHeaderField); + ret.setBodyPrefetchRequired(bfd.foundBodyField); + return ret; +} + +IteratorHandler::IteratorHandler(Environment& env) + : OperationHandler(env) +{ +} + +IteratorHandler::~IteratorHandler() +{ +} + +void +IteratorHandler::sanityCheckActiveIteratorCount() +{ + if (_sharedState._iterators.size() + >= SharedIteratorHandlerState::WARN_ACTIVE_ITERATOR_COUNT + && !_sharedState._hasWarnedLargeIteratorCount) + { + LOG(warning, "Number of active iterators has reached warn-limit " + "of %zu. Potential iterator leak? destroyIterator() must be " + "invoked for each successful createIterator() invocation.", + SharedIteratorHandlerState::WARN_ACTIVE_ITERATOR_COUNT); + _sharedState._hasWarnedLargeIteratorCount = true; + } +} + +spi::CreateIteratorResult +IteratorHandler::createIterator(const spi::Bucket& bucket, + const document::FieldSet& fields, + const spi::Selection& sel, + spi::IncludedVersions versions) +{ + uint64_t id; + // By default, no explicit prefetching is required. + CachePrefetchRequirements prefetcher; + + vespalib::LinkedPtr<document::select::Node> docSelection; + if (!sel.getDocumentSelection().getDocumentSelection().empty()) { + docSelection.reset( + parseDocumentSelection( + sel.getDocumentSelection().getDocumentSelection(), + true).release()); + if (!docSelection.get()) { + return spi::CreateIteratorResult( + spi::Result::PERMANENT_ERROR, + "Got invalid/unparseable document selection string"); + } + prefetcher = CachePrefetchRequirements::createFromSelection( + _env.repo(), *docSelection); + // NOTE: Suboptimal behavior; since the field detector doesn't + // understand that ID-related selections require header reads, + // we take the safest route here and simply always require the + // header to be prefetched if we have _any_ kind of non-empty + // document selection. + prefetcher.setHeaderPrefetchRequired(true); + } + prefetcher.setFromTimestamp(Timestamp(sel.getFromTimestamp().getValue())); + prefetcher.setToTimestamp(Timestamp(sel.getToTimestamp().getValue())); + + { + vespalib::LockGuard lock(_sharedState._stateLock); + id = _sharedState._nextId; + + std::pair<IteratorStateMap::iterator, bool> inserted( + _sharedState._iterators.insert( + IteratorStateMap::value_type( + id, + IteratorState( + bucket, + sel, + document::FieldSet::UP(fields.clone()), + versions, + docSelection, + prefetcher)))); + + assert(inserted.second); // Should never have duplicates + ++_sharedState._nextId; + sanityCheckActiveIteratorCount(); + } + LOG(debug, "Created new iterator state for bucket %s " + "with iterator id %zu", + bucket.getBucketId().toString().c_str(), + id); + return spi::CreateIteratorResult(spi::IteratorId(id)); +} + +spi::Result +IteratorHandler::destroyIterator(spi::IteratorId id) +{ + vespalib::LockGuard lock(_sharedState._stateLock); + uint64_t iterId = id; + IteratorStateMap::iterator iter( + _sharedState._iterators.find(iterId)); + if (iter == _sharedState._iterators.end()) { + std::ostringstream ss; + ss << "destroyIterator called with unknown iterator id (" + << iterId << ")"; + LOG(error, "%s", ss.str().c_str()); + return spi::Result(); + } + LOG(debug, "Destroying iterator state for iterator id %zu", iterId); + assert(!iter->second.isActive()); + _sharedState._iterators.erase(iter); + return spi::Result(); +} + +spi::DocEntry::SizeType +IteratorHandler::getDocumentSize(const MemFile& file, + const MemSlot& slot, + bool headerOnly) const +{ + spi::DocEntry::SizeType size = file.getSerializedSize(slot, HEADER); + if (!headerOnly) { + size += file.getSerializedSize(slot, BODY); + } + return size; +} + +spi::DocEntry::SizeType +IteratorHandler::getEntrySize(spi::DocEntry::SizeType docSize) const +{ + return docSize + sizeof(spi::DocEntry); +} + +void +IteratorHandler::prefetch(const CachePrefetchRequirements& requirements, + MemFile& file) const +{ + if (requirements.noPrefetchRequired()) { + LOG(spam, "%s: no prefetching required", + file.getFile().getBucketId().toString().c_str()); + return; + } + // Let body prefetching also imply header prefetching, at least for now. + // If this changes, so must the explicit caching of remaining timestamps + // in iterate(). + bool headerOnly = !requirements.isBodyPrefetchRequired(); + if (requirements.prefetchEntireBlocks()) { + LOG(spam, "%s: prefetching entire blocks for header: yes, body: %s", + file.getFile().getBucketId().toString().c_str(), + headerOnly ? "no" : "yes"); + if (headerOnly) { + file.ensureHeaderBlockCached(); + } else { + file.ensureHeaderAndBodyBlocksCached(); + } + } else { + std::vector<Timestamp> timestamps; + for (size_t i = 0; i < file.getSlotCount(); ++i) { + const MemSlot& slot(file[i]); + // TODO(vekterli): replace this sub-optimal code with a lower bound search + if (slot.getTimestamp() < requirements.getFromTimestamp()) { + continue; + } + if (slot.getTimestamp() > requirements.getToTimestamp()) { + break; + } + timestamps.push_back(slot.getTimestamp()); + } + LOG(spam, "%s: prefetching %zu slots in timestamp range [%zu, %zu]", + file.getFile().getBucketId().toString().c_str(), + timestamps.size(), + requirements.getFromTimestamp().getTime(), + requirements.getToTimestamp().getTime()); + file.ensureDocumentCached(timestamps, headerOnly); + } +} + +std::vector<Types::Timestamp>& +IteratorHandler::getOrFillRemainingTimestamps(MemFile& file, + IteratorState& state) +{ + std::vector<Types::Timestamp>& remaining(state.getRemaining()); + if (remaining.empty()) { + if (state.getSelection().getTimestampSubset().empty()) { + VisitorSlotMatcher matcher( + _env.repo(), state.getDocumentSelectionPtr()); + + int flags = 0; + switch (state.getIncludedVersions()) { + case spi::NEWEST_DOCUMENT_ONLY: + flags = ITERATE_GID_UNIQUE; + break; + case spi::NEWEST_DOCUMENT_OR_REMOVE: + flags = ITERATE_GID_UNIQUE | ITERATE_REMOVED; + break; + case spi::ALL_VERSIONS: + flags = ITERATE_REMOVED; + break; + } + + remaining = select( + file, + matcher, + flags, + Timestamp(state.getSelection().getFromTimestamp()), + Timestamp(state.getSelection().getToTimestamp())); + } else { + const std::vector<spi::Timestamp>& subset( + state.getSelection().getTimestampSubset()); + remaining.reserve(subset.size()); + for (size_t i = 0; i < subset.size(); ++i) { + // Ensure timestamps are strictly increasing + assert(i == 0 || subset[i] > subset[i - 1]); + remaining.push_back(Types::Timestamp(subset[i])); + } + + state.setIncludedVersions(spi::ALL_VERSIONS); + } + } + return remaining; +} + +bool +IteratorHandler::addMetaDataEntry(spi::IterateResult::List& result, + const MemSlot& slot, + uint64_t& totalSize, + uint64_t maxByteSize) const +{ + size_t entrySize = getEntrySize(0); + if (totalSize + entrySize >= maxByteSize && !result.empty()) { + return false; + } + totalSize += entrySize; + + int metaFlags = (slot.deleted() || slot.deletedInPlace()) ? spi::REMOVE_ENTRY : 0; + spi::DocEntry::LP docEntry( + new spi::DocEntry( + spi::Timestamp(slot.getTimestamp().getTime()), + metaFlags)); + result.push_back(docEntry); + return true; +} + +bool +IteratorHandler::addRemoveEntry(spi::IterateResult::List& results, + const MemFile& file, + const MemSlot& slot, + uint64_t& totalSize, + uint64_t maxByteSize) const +{ + DocumentId did = file.getDocumentId(slot); + size_t idSize = did.getSerializedSize(); + size_t entrySize = getEntrySize(idSize); + + if (totalSize + entrySize >= maxByteSize && !results.empty()) { + return false; + } + totalSize += entrySize; + + spi::DocEntry::LP docEntry( + new spi::DocEntry( + spi::Timestamp(slot.getTimestamp().getTime()), + spi::REMOVE_ENTRY, + did)); + results.push_back(docEntry); + return true; +} + +bool +IteratorHandler::addPutEntry(spi::IterateResult::List& results, + const MemFile& file, + const MemSlot& slot, + bool headerOnly, + const document::FieldSet& fieldsToKeep, + uint64_t& totalSize, + uint64_t maxByteSize) const +{ + size_t docSize = getDocumentSize(file, slot, headerOnly); + size_t entrySize = getEntrySize(docSize); + if (totalSize + entrySize >= maxByteSize && !results.empty()) { + return false; + } + Document::UP doc( + file.getDocument(slot, headerOnly ? HEADER_ONLY : ALL)); + totalSize += entrySize; + // If we want either the full doc or just the header, don't waste time + // stripping unwanted document fields. + if (fieldsToKeep.getType() != document::FieldSet::ALL + && fieldsToKeep.getType() != document::FieldSet::HEADER) + { + document::FieldSet::stripFields(*doc, fieldsToKeep); + } + spi::DocEntry::LP docEntry( + new spi::DocEntry(spi::Timestamp(slot.getTimestamp().getTime()), + 0, + std::move(doc), + docSize)); + results.push_back(docEntry); + return true; +} + +spi::IterateResult +IteratorHandler::iterate(spi::IteratorId id, uint64_t maxByteSize) +{ + spi::IterateResult::List results; + + IteratorState* state; + { + vespalib::LockGuard lock(_sharedState._stateLock); + IteratorStateMap::iterator iter( + _sharedState._iterators.find(id)); + if (iter == _sharedState._iterators.end()) { + LOG(error, "Invoked iterate(id=%zu, maxByteSize=%zu) " + "with unknown id", + uint64_t(id), + maxByteSize); + + return spi::IterateResult(spi::Result::PERMANENT_ERROR, + "Unknown iterator ID"); + } + assert(!iter->second.isActive()); + state = &iter->second; + if (state->isCompleted()) { + return spi::IterateResult(results, true); + } + state->setActive(true); + } + + ActiveGuard activeGuard(*state); + MemFileCacheEvictionGuard file(getMemFile(state->getBucket())); + + const document::FieldSet& fields(state->getFields()); + bool metaDataOnly = (fields.getType() == document::FieldSet::NONE); + bool headerOnly = true; + + // Ensure we have relevant parts of the file prefetched if this is required. + const CachePrefetchRequirements& prefetchRequirements( + state->getCachePrefetchRequirements()); + prefetch(prefetchRequirements, *file); + + std::vector<Timestamp>& remaining( + getOrFillRemainingTimestamps(*file, *state)); + + if (!metaDataOnly) { + document::HeaderFields h; + headerOnly = h.contains(fields); + // Don't bother doing duplicate work if we've already prefetched + // everything we need. + if (!((headerOnly && prefetchRequirements.isHeaderPrefetchRequired()) + || prefetchRequirements.isBodyPrefetchRequired())) + { + LOG(spam, "Caching %zu remaining slots from disk for %s", + remaining.size(), + state->getBucket().getBucketId().toString().c_str()); + file->ensureDocumentCached(remaining, headerOnly); + } + } else { + LOG(spam, "Not caching any of the %zu remaining slots from disk " + "for %s since iteration is metadata only", + remaining.size(), + state->getBucket().getBucketId().toString().c_str()); + } + + size_t totalSize = 0; + while (!remaining.empty()) { + Timestamp ts = remaining.back(); + const MemSlot* slot = file->getSlotAtTime(ts); + + if (slot) { + if (metaDataOnly) { + if (!addMetaDataEntry(results, *slot, totalSize, maxByteSize)) { + break; + } + } else if (slot->deleted() || slot->deletedInPlace()) { + if (state->getIncludedVersions() == spi::NEWEST_DOCUMENT_ONLY) { + // Probably altered by unrevertable remove between time + // of timestamp gathering and actual iteration. + remaining.pop_back(); + continue; + } + if (!addRemoveEntry(results, *file, *slot, + totalSize, maxByteSize)) + { + break; + } + } else { + if (!addPutEntry(results, *file, *slot, + headerOnly, fields, totalSize, maxByteSize)) + { + break; + } + } + } + remaining.pop_back(); + } + + file.unguard(); + + LOG(debug, "Iteration of bucket %s returned result with %zu entries " + "and %zu bytes. Remaining docs: %zu", + state->getBucket().getBucketId().toString().c_str(), + results.size(), + totalSize, + remaining.size()); + + if (remaining.empty()) { + state->setCompleted(); + return spi::IterateResult(results, true); + } + + return spi::IterateResult(results, false); +} + +} +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h new file mode 100644 index 00000000000..7b3ee9627e5 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h @@ -0,0 +1,252 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::IteratorHandler + * \ingroup memfile + * + * \brief Class exposing iterators over a bucket + */ +#pragma once + +#include <map> +#include <vespa/memfilepersistence/spi/operationhandler.h> +#include <vespa/persistence/spi/persistenceprovider.h> +#include <vespa/document/fieldset/fieldsetrepo.h> + +namespace document { + +class FieldSet; + +namespace select { +class Node; +} +} + +namespace storage { + +class GetIterCommand; + +namespace memfile { + +class CachePrefetchRequirements : public Types +{ +public: + CachePrefetchRequirements() + : _headerPrefetchRequired(false), + _bodyPrefetchRequired(false), + _fromTimestamp(0), + _toTimestamp(UINT64_MAX) + { + } + + bool noPrefetchRequired() const { + return !_headerPrefetchRequired && !_bodyPrefetchRequired; + } + + bool isHeaderPrefetchRequired() const { return _headerPrefetchRequired; } + void setHeaderPrefetchRequired(bool required) { _headerPrefetchRequired = required; } + + bool isBodyPrefetchRequired() const { return _bodyPrefetchRequired; } + void setBodyPrefetchRequired(bool required) { _bodyPrefetchRequired = required; } + + bool prefetchEntireBlocks() const { + return (_fromTimestamp == Timestamp(0) + && _toTimestamp == Timestamp(UINT64_MAX)); + } + + Timestamp getFromTimestamp() const { return _fromTimestamp; } + void setFromTimestamp(Timestamp fromTimestamp) { _fromTimestamp = fromTimestamp; } + Timestamp getToTimestamp() const { return _toTimestamp; } + void setToTimestamp(Timestamp toTimestamp) { _toTimestamp = toTimestamp; } + + static CachePrefetchRequirements createFromSelection( + const document::DocumentTypeRepo& repo, + const document::select::Node& sel); +private: + // Whether or not document selection requires header/body to be read + // beforehand to work efficiently. + bool _headerPrefetchRequired; + bool _bodyPrefetchRequired; + + Timestamp _fromTimestamp; + Timestamp _toTimestamp; +}; + +class IteratorState +{ + spi::Bucket _bucket; + spi::Selection _selection; + vespalib::LinkedPtr<document::FieldSet> _fieldSet; + vespalib::LinkedPtr<document::select::Node> _documentSelection; + std::vector<Types::Timestamp> _remaining; + spi::IncludedVersions _versions; + CachePrefetchRequirements _prefetchRequirements; + bool _isActive; + bool _isCompleted; + std::map<std::string, bool> _headerOnlyForDocumentType; + +public: + IteratorState(const spi::Bucket& bucket, + const spi::Selection& sel, + document::FieldSet::UP fieldSet, + spi::IncludedVersions versions, + vespalib::LinkedPtr<document::select::Node> docSel, + const CachePrefetchRequirements& prefetchRequirements) + : _bucket(bucket), + _selection(sel), + _fieldSet(vespalib::LinkedPtr<document::FieldSet>(fieldSet.release())), + _documentSelection(docSel), + _remaining(), + _versions(versions), + _prefetchRequirements(prefetchRequirements), + _isActive(false), + _isCompleted(false) + {} + + const spi::Bucket& getBucket() const { return _bucket; } + + const CachePrefetchRequirements& getCachePrefetchRequirements() const { + return _prefetchRequirements; + } + + bool isActive() const { return _isActive; } + void setActive(bool active) { _isActive = active; } + + bool isCompleted() const { return _isCompleted; } + void setCompleted(bool completed = true) { _isCompleted = completed; } + + const spi::Selection& getSelection() const { return _selection; } + spi::Selection& getSelection() { return _selection; } + const document::FieldSet& getFields() const { return *_fieldSet; } + + spi::IncludedVersions getIncludedVersions() const { return _versions; } + void setIncludedVersions(spi::IncludedVersions versions) { _versions = versions; } + bool hasDocumentSelection() const { return _documentSelection.get() != 0; } + + /** + * Can only be called if hasDocumentSelection() == true + */ + const document::select::Node& getDocumentSelection() const + { + return *_documentSelection; + } + /** + * @return pointer to doc selection if one has been given, NULL otherwise. + */ + const document::select::Node* getDocumentSelectionPtr() const + { + return _documentSelection.get(); + } + const std::vector<Types::Timestamp>& getRemaining() const { return _remaining; } + std::vector<Types::Timestamp>& getRemaining() { return _remaining; } +}; + +class SharedIteratorHandlerState +{ +public: + typedef std::map<uint64_t, IteratorState> IteratorStateMap; +private: + IteratorStateMap _iterators; + uint64_t _nextId; + vespalib::Lock _stateLock; + // Debugging aid: + static const size_t WARN_ACTIVE_ITERATOR_COUNT = 2048; + bool _hasWarnedLargeIteratorCount; + + friend class IteratorHandler; + friend class IteratorHandlerTest; +public: + SharedIteratorHandlerState() : _nextId(1) {} +}; + +class IteratorHandler : public OperationHandler +{ +private: + typedef SharedIteratorHandlerState::IteratorStateMap IteratorStateMap; + + class ActiveGuard + { + IteratorState& _state; + public: + ActiveGuard(IteratorState& state) : _state(state) {} + ~ActiveGuard() { + _state.setActive(false); + } + }; + + /** + * Get the serialized size of a document, only counting the header if + * headerOnly is true. + */ + spi::DocEntry::SizeType getDocumentSize(const MemFile&, + const MemSlot&, + bool headerOnly) const; + /** + * Get the in-memory size of a single DocEntry object to more accurately + * limit per-iteration memory usage. + */ + spi::DocEntry::SizeType getEntrySize(spi::DocEntry::SizeType docSize) const; + /** + * Populate the state's remaining timestamps-vector, either from an + * explicitly specified timestamp subset in the selection, or from its + * document selection if no timestamp subset is given. + * @return mutable reference to the state's remaining-vector. + */ + std::vector<Types::Timestamp>& getOrFillRemainingTimestamps( + MemFile& file, + IteratorState&); + + /** + * If header/body precaching is required, cache _all_ documents in the + * required part(s) for the file. Otherwise, do nothing. + */ + void prefetch(const CachePrefetchRequirements& requirements, + MemFile& file) const; + + bool addMetaDataEntry(spi::IterateResult::List& result, + const MemSlot& slot, + uint64_t& totalSize, + uint64_t maxByteSize) const; + bool addRemoveEntry(spi::IterateResult::List& result, + const MemFile& file, + const MemSlot& slot, + uint64_t& totalSize, + uint64_t maxByteSize) const; + bool addPutEntry(spi::IterateResult::List& result, + const MemFile& file, + const MemSlot& slot, + bool headerOnly, + const document::FieldSet& fieldsToKeep, + uint64_t& totalSize, + uint64_t maxByteSize) const; + + /** + * Sanity checking to ensure we don't leak iterators. Checks if the number + * of active iterators exceeds a predefined Large Number(tm) and warns + * if this is the case. Mutates shared state (sets a "has warned" flag), + * so must only be called when holding shared state mutex. + */ + void sanityCheckActiveIteratorCount(); + +public: + typedef std::unique_ptr<IteratorHandler> UP; + + SharedIteratorHandlerState _sharedState; + + IteratorHandler(Environment&); + ~IteratorHandler(); + + spi::CreateIteratorResult createIterator(const spi::Bucket& bucket, + const document::FieldSet& fieldSet, + const spi::Selection& sel, + spi::IncludedVersions versions); + spi::Result destroyIterator(spi::IteratorId id); + spi::IterateResult iterate(spi::IteratorId id, uint64_t maxByteSize); + + const SharedIteratorHandlerState& getState() const { + return _sharedState; + } +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp new file mode 100644 index 00000000000..449e3dedf85 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/spi/joinoperationhandler.h> +#include <vespa/memfilepersistence/spi/cacheevictionguard.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/log/log.h> + +LOG_SETUP(".persistence.memfile.handler.join"); + +namespace storage { +namespace memfile { + +JoinOperationHandler::JoinOperationHandler(Environment& env) + : OperationHandler(env), + _env(env) +{ +} + +void +JoinOperationHandler::copySlots(MemFile& sourceFile, MemFile& targetFile) +{ + sourceFile.ensureBodyBlockCached(); + LOG(spam, + "Moving data from %s to %s", + sourceFile.toString().c_str(), + targetFile.toString().c_str()); + + std::vector<const MemSlot*> slotsToCopy; + slotsToCopy.reserve(sourceFile.getSlotCount()); + + for (uint32_t j = 0; j < sourceFile.getSlotCount(); j++) { + const MemSlot* slot(&sourceFile[j]); + + if (!targetFile.getSlotAtTime(slot->getTimestamp())) { + slotsToCopy.push_back(slot); + } + } + targetFile.copySlotsFrom(sourceFile, slotsToCopy); + LOG(spam, "Moved data from %s to %s", + sourceFile.toString().c_str(), targetFile.toString().c_str()); +} + +spi::Result +JoinOperationHandler::join( + const spi::Bucket& source1, + const spi::Bucket& source2, + const spi::Bucket& target) +{ + if ((source1.getBucketId() == source2.getBucketId()) + && (target.getBucketId() == source1.getBucketId())) + { + return singleJoin(source1, target); + } + + MemFileCacheEvictionGuard targetFile( + getMemFile(target.getBucketId(), target.getPartition(), false)); + + std::vector<spi::Bucket> sources; + sources.push_back(source1); + if (source1.getBucketId() != source2.getBucketId()) { + sources.push_back(source2); + } + + for (uint32_t i = 0; i < sources.size(); i++) { + MemFileCacheEvictionGuard sourceFile( + getMemFile(sources[i].getBucketId(), + sources[i].getPartition(), + false)); + + if (targetFile->empty()) { + LOG(spam, "Renaming %s to %s", + sourceFile->toString().c_str(), targetFile->toString().c_str()); + // It is assumed that if this fails, the nature of the exception is + // such that it will cause the disk to automatically be marked as + // down and for the process to restart, meaning we should not get + // out of sync between the service and persistence layers. + sourceFile.get().move(targetFile.get()); + } else { + copySlots(*sourceFile, *targetFile); + targetFile->flushToDisk(); + sourceFile.get().deleteFile(); + } + sourceFile.unguard(); + } + targetFile.unguard(); + + return spi::Result(); +} + +void +JoinOperationHandler::clearBucketFromCache(const spi::Bucket& bucket) +{ + getMemFile(bucket.getBucketId(), bucket.getPartition(), false) + .eraseFromCache(); +} + +/* + * Moving same bucket between partitions, potentially joining data + * if target file already exists. + */ +spi::Result +JoinOperationHandler::singleJoin( + const spi::Bucket& source, + const spi::Bucket& target) +{ + assert(source.getBucketId() == target.getBucketId()); + assert(source.getPartition() != target.getPartition()); + // Internal joins sidestep the cache completely, so we have to ensure + // the bucket is cleared from it before commencing. Otherwise, it's + // possible that the cached file offsets will not reflect what's actually + // stored on disk, leading to potential data corruption! The bucket shall + // not have been taken out of the cache before this point. + clearBucketFromCache(target); + + Directory& toJoinDir = _env.getDirectory(source.getPartition()); + FileSpecification toJoinSpec( + source.getBucketId(), toJoinDir, + _env.calculatePathInDir(source.getBucketId(), toJoinDir)); + + MemFile toJoin(toJoinSpec, _env); + + Directory& toKeepDir = _env.getDirectory(target.getPartition()); + FileSpecification toKeepSpec( + source.getBucketId(), toKeepDir, + _env.calculatePathInDir(source.getBucketId(), toKeepDir)); + assert(toJoinDir != toKeepDir); + + const double maxFillRate( + _env.acquireConfigReadLock().memFilePersistenceConfig() + ->diskFullFactorMove); + if (source.getPartition() != target.getPartition() && + toKeepDir.isFull(0, maxFillRate)) + { + std::string failure = + vespalib::make_string("Not moving bucket %s to directory %s because it's " + "fill rate is %G (>%G)", + source.getBucketId().toString().c_str(), + toKeepDir.toString().c_str(), + toKeepDir.getPartition().getMonitor()->getFillRate(), + maxFillRate); + + LOG(debug, "%s", failure.c_str()); + + return spi::Result(spi::Result::TRANSIENT_ERROR, failure); + } + + MemFile toKeep(toKeepSpec, _env); + + copySlots(toJoin, toKeep); + toKeep.flushToDisk(); + + // Delete original file. + _env._memFileMapper.deleteFile(toJoin, _env); + + return spi::Result(); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h new file mode 100644 index 00000000000..c310a9f6f71 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::JoinHandler + * \ingroup memfile + */ +#pragma once + +#include <vespa/memfilepersistence/spi/operationhandler.h> +#include <vespa/persistence/spi/persistenceprovider.h> + +namespace storage { + +namespace memfile { + +class JoinOperationHandler : public OperationHandler { +public: + typedef std::unique_ptr<JoinOperationHandler> UP; + + JoinOperationHandler(Environment&); + + spi::Result join(const spi::Bucket& source1, + const spi::Bucket& source2, + const spi::Bucket& target); + + spi::Result singleJoin(const spi::Bucket& source, + const spi::Bucket& target); + +private: + Environment& _env; + + void copySlots(MemFile& source, MemFile& target); + void clearBucketFromCache(const spi::Bucket&); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp new file mode 100644 index 00000000000..c369ee47391 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/spi/memfilepersistence.h> + +namespace storage { +namespace memfile { + + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h new file mode 100644 index 00000000000..d11673667f4 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::MemFilePersistence + * \ingroup memfile + * + * \brief Top class in memfile persistence actually implementing the SPI + */ + +#pragma once + +namespace storage { +namespace memfile { + +struct MemFilePersistence +{ +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp new file mode 100644 index 00000000000..cec695423af --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp @@ -0,0 +1,889 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/document/fieldset/fieldsetrepo.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h> +#include <vespa/memfilepersistence/common/exceptions.h> +#include <vespa/log/log.h> + +LOG_SETUP(".memfilepersistenceprovider"); + +#define TRACE(context, level, func, message) \ +{ \ + if ((context).getTrace().shouldTrace(level)) { \ + vespalib::string messageToTrace( \ + vespalib::make_string("MemFilePP.%s: %s", func, message)); \ + (context).getTrace().trace(level, messageToTrace); \ + } \ +} +#define TRACEGENERIC(context, type) \ +if ((context).getTrace().shouldTrace(9)) { \ + vespalib::string messageToTrace( \ + vespalib::make_string("MemFilePP.%s: Load type %s, priority %u.", \ + type, (context).getLoadType().toString().c_str(), \ + (uint32_t) (context).getPriority())); \ + (context).getTrace().trace(9, messageToTrace); \ +} + +namespace storage { +namespace memfile { + +namespace { + +Device::State +mapIoExceptionToDeviceState(MemFileIoException::Type type) +{ + using vespalib::IoException; + switch (type) { + case IoException::ILLEGAL_PATH: + return Device::PATH_FAILURE; + case IoException::NO_PERMISSION: + return Device::NO_PERMISSION; + case IoException::DISK_PROBLEM: + return Device::IO_FAILURE; + case IoException::TOO_MANY_OPEN_FILES: + return Device::TOO_MANY_OPEN_FILES; + default: + return Device::OK; + } +} + +} // end of anonymous namespace + +MemFilePtr& +MemFilePersistenceProvider::getThreadLocalMemFile() const +{ + return _threadLocals.get()._memFile; +} + +MemFilePersistenceThreadMetrics& +MemFilePersistenceProvider::getMetrics() const +{ + ThreadContext& context = _threadLocals.get(); + if (context._metrics == NULL) { + context._metrics = _metrics.addThreadMetrics(); + } + + return *context._metrics; +} + +bool +MemFilePersistenceProvider::hasCachedMemFile() const +{ + return _threadLocals.get()._memFile.get(); +} + +MemFilePtr +MemFilePersistenceProvider::getMemFile(const spi::Bucket& b, + bool keepInCache) const +{ + MemFilePtr& ptr = getThreadLocalMemFile(); + + if (ptr.get()) { + assert(ptr->getFile().getBucketId() == b); + + MemFilePtr retVal = ptr; + ptr = MemFilePtr(); + return retVal; + } + + return _env->_cache.get(b.getBucketId(), + *_env, + _env->getDirectory(b.getPartition()), + keepInCache); +} + +void +MemFilePersistenceProvider::setActiveMemFile(MemFilePtr ptr, + const char* user) const +{ + LOG(spam, "Inserting active memfile %s for user %s", + ptr->getFile().getBucketId().toString().c_str(), + user); + getThreadLocalMemFile() = ptr; +} + +void +MemFilePersistenceProvider::clearActiveMemFile(spi::Context* context) const +{ + LOG(spam, "Clearing active memfile"); + MemFilePtr& ptr = getThreadLocalMemFile(); + assert(ptr.get() == NULL || !ptr->slotsAltered()); + ptr = MemFilePtr(); + if (context != 0) { + TRACE(*context, 9, "clearActiveMemFile", "Done clearing"); + } +} + +enum MemFileAccessGuardScopeExitAction { + REINSERT_AS_ACTIVE = 0x1, +}; + +/** + * The MemFile access guard provides a simple scope guard for providing + * exception safety for operations toward MemFiles. + * The guard will always evict a file from the cache iff the guard has not + * been dismissed upon destruction. This will throw away all non-persisted + * changes to file and clear it from the cache to force a full reload on next + * access. This is the safest option, as all operations that are not yet + * persisted should fail back to the client automatically. + * + * The current MemFile will be reinserted as the thread's active MemFile + * iff REINSERT_AS_ACTIVE has specified as a guard construction flag and + * the guard was dismissed before destruction. + */ +class MemFileAccessGuard : public Types +{ + MemFileAccessGuard(const MemFileAccessGuard&); + MemFileAccessGuard& operator=(const MemFileAccessGuard&); +public: + MemFileAccessGuard(const MemFilePersistenceProvider& spi, + const MemFilePtr& ptr, + const char* user, + uint32_t flags = 0) + : _spi(spi), + _ptr(ptr), + _user(user), + _flags(flags), + _dismissed(false) + { + assert(_ptr.get()); + } + + ~MemFileAccessGuard() { + if (!_dismissed) { + LOG(debug, + "Access guard in %s not dismissed on scope exit, clearing %s" + " from cache to force reload of file on next access.", + _user, + _ptr->getFile().getBucketId().toString().c_str()); + + _ptr->clearFlag(SLOTS_ALTERED); + _ptr.eraseFromCache(); // nothrow + } + if ((_flags & REINSERT_AS_ACTIVE) && _dismissed) { + _spi.setActiveMemFile(_ptr, _user); + } else { + _spi.clearActiveMemFile(); + } + } + + // Misc accessors + MemFile* operator->() { + return _ptr.get(); + } + MemFile& operator*() { + return *_ptr; + } + const MemFile* operator->() const { + return _ptr.get(); + } + const MemFile& operator*() const { + return *_ptr; + } + MemFilePtr& getMemFilePtr() { + return _ptr; + } + const MemFilePtr& getMemFilePtr() const { + return _ptr; + } + + /** + * If all access towards the MemFile has been successfully performed, + * calling dismiss() will ensure that the specified cleanup actions + * are not taken upon scope exit. + */ + void dismiss() { + _dismissed = true; + } + +private: + const MemFilePersistenceProvider& _spi; + MemFilePtr _ptr; + const char* _user; + const uint32_t _flags; + bool _dismissed; +}; + +void +MemFilePersistenceProvider::handleBucketCorruption(const FileSpecification& file) const +{ + spi::Bucket fixBucket(file.getBucketId(), + spi::PartitionId(file.getDirectory().getIndex())); + + // const_cast is nasty, but maintain() must necessarily be able to + // modify state... + MemFilePersistenceProvider& mutableSelf( + const_cast<MemFilePersistenceProvider&>(*this)); + + spi::Result maintainResult(mutableSelf.maintain(fixBucket, spi::HIGH)); + if (maintainResult.getErrorCode() != spi::Result::NONE) { + LOG(warning, + "Failed to successfully repair %s after corruptions: %s", + fixBucket.toString().c_str(), + maintainResult.toString().c_str()); + } + + // Add bucket to set of modified buckets so service layer can request + // new bucket info. + _env->addModifiedBucket(file.getBucketId()); +} + +template<typename C> +C MemFilePersistenceProvider::handleException(const std::exception& e, + bool canRepairBucket) const +{ + LOG(debug, "Handling exception caught during processing: %s", e.what()); + + const MemFileIoException* io = dynamic_cast<const MemFileIoException*>(&e); + if (io != NULL) { + std::ostringstream error; + error << "Exception caught processing operation for " + << io->getFile().getPath() << ": " << io->getMessage(); + + Device::State deviceState( + mapIoExceptionToDeviceState(io->getType())); + + if (deviceState != Device::OK) { + io->getFile().getDirectory().addEvent( + deviceState, + io->getMessage(), + VESPA_STRLOC); + + _env->_mountPoints->writeToFile(); + + return C(spi::Result::FATAL_ERROR, error.str()); + } + if (io->getType() == vespalib::IoException::CORRUPT_DATA + && canRepairBucket) + { + handleBucketCorruption(io->getFile()); + } + + return C(spi::Result::TRANSIENT_ERROR, error.str()); + } + const CorruptMemFileException* ce( + dynamic_cast<const CorruptMemFileException*>(&e)); + if (ce != 0) { + std::ostringstream error; + error << "Exception caught processing operation for " + << ce->getFile().getPath() << ": " << ce->getMessage(); + if (canRepairBucket) { + handleBucketCorruption(ce->getFile()); + } + return C(spi::Result::TRANSIENT_ERROR, error.str()); + } + + const TimestampExistException* ts = + dynamic_cast<const TimestampExistException*>(&e); + if (ts != NULL) { + return C(spi::Result::TIMESTAMP_EXISTS, ts->getMessage()); + } + + return C(spi::Result::PERMANENT_ERROR, e.what()); +} + +MemFilePersistenceProvider::MemFilePersistenceProvider( + framework::ComponentRegister& compReg, + const config::ConfigUri & configUri) + : framework::Component(compReg, "memfilepersistenceprovider"), + framework::StatusReporter("memfilepersistenceprovider", + "VDS Persistence Provider"), + _componentRegister(compReg), + _configUri(configUri), + _config(*config::ConfigGetter<vespa::config::storage::StorMemfilepersistenceConfig>::getConfig(configUri.getConfigId(), + configUri.getContext())), + _memFileMapper(*this), + _repo(0), + _metrics(*this), + _threadLocals(1024) +{ + registerMetric(_metrics); + registerStatusPage(*this); +} + +MemFilePersistenceProvider::~MemFilePersistenceProvider() +{ +} + +void +MemFilePersistenceProvider::setDocumentRepo(const document::DocumentTypeRepo& repo) +{ + _repo = &repo; + if (_env.get()) { + _env->setRepo(_repo); + } +} + +using MemFilePersistenceConfig + = vespa::config::storage::StorMemfilepersistenceConfig; +using PersistenceConfig = vespa::config::content::PersistenceConfig; + +namespace { + +MemFileCache::MemoryUsage +getCacheLimits(const MemFilePersistenceConfig& cfg) +{ + MemFileCache::MemoryUsage cacheLimits; + cacheLimits.metaSize = cfg.cacheSize * cfg.cacheSizeMetaPercentage / 100; + cacheLimits.headerSize = cfg.cacheSize * cfg.cacheSizeHeaderPercentage / 100; + cacheLimits.bodySize = cfg.cacheSize * cfg.cacheSizeBodyPercentage / 100; + return cacheLimits; +} + +std::unique_ptr<Options> +makeOptions(const MemFilePersistenceConfig& memFileCfg, + const PersistenceConfig& persistenceCfg) +{ + return std::unique_ptr<Options>(new Options(memFileCfg, persistenceCfg)); +} + +} + +void +MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::storage::StorMemfilepersistenceConfig> cfg) +{ + assert(cfg.get() != nullptr); + auto guard = _env->acquireConfigWriteLock(); + + guard.setMemFilePersistenceConfig(std::move(cfg)); + + if (guard.hasPersistenceConfig()) { + guard.setOptions(makeOptions(*guard.memFilePersistenceConfig(), + *guard.persistenceConfig())); + } + + // Data race free; acquires internal cache lock. + _cache->setCacheSize(getCacheLimits(*guard.memFilePersistenceConfig())); +} + +void +MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::content::PersistenceConfig> cfg) +{ + assert(cfg.get() != nullptr); + auto guard = _env->acquireConfigWriteLock(); + + guard.setPersistenceConfig(std::move(cfg)); + + if (guard.hasMemFilePersistenceConfig()) { + guard.setOptions(makeOptions(*guard.memFilePersistenceConfig(), + *guard.persistenceConfig())); + } +} + +void +MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::storage::StorDevicesConfig> cfg) +{ + assert(cfg.get() != nullptr); + auto guard = _env->acquireConfigWriteLock(); + guard.setDevicesConfig(std::move(cfg)); +} + +spi::PartitionStateListResult +MemFilePersistenceProvider::getPartitionStates() const +{ + // Lazily initialize to ensure service layer has set up enough for us + // to use all we need (memory manager for instance) + if (_env.get() == 0) { + assert(_repo != 0); + _cache.reset(new MemFileCache(_componentRegister, + _metrics._cache)); + _cache->setCacheSize(getCacheLimits(_config)); + try{ + _env.reset(new Environment( + _configUri, *_cache, _memFileMapper, *_repo, getClock())); + } catch (NoDisksException& e) { + return spi::PartitionStateListResult(spi::PartitionStateList( + spi::PartitionId::Type(0))); + } + _fileScanner.reset(new FileScanner( + _componentRegister, *_env->_mountPoints, + _config.dirLevels, _config.dirSpread)); + _util.reset(new OperationHandler(*_env)); + _iteratorHandler.reset(new IteratorHandler(*_env)); + _joinOperationHandler.reset(new JoinOperationHandler(*_env)); + _splitOperationHandler.reset(new SplitOperationHandler(*_env)); + } + return _env->_mountPoints->getPartitionStates(); +} + +spi::BucketIdListResult +MemFilePersistenceProvider::listBuckets(spi::PartitionId partition) const +{ + spi::BucketIdListResult::List buckets; + _fileScanner->buildBucketList(buckets, partition, 0, 1); + return spi::BucketIdListResult(buckets); +} + +spi::BucketIdListResult +MemFilePersistenceProvider::getModifiedBuckets() const +{ + document::BucketId::List modified; + _env->swapModifiedBuckets(modified); // Atomic op + return spi::BucketIdListResult(modified); +} + +spi::BucketInfoResult +MemFilePersistenceProvider::getBucketInfo(const spi::Bucket& bucket) const +{ + LOG(spam, "getBucketInfo(%s)", bucket.toString().c_str()); + try { + bool retainMemFile = hasCachedMemFile(); + MemFileAccessGuard file(*this, + getMemFile(bucket, false), + "getBucketInfo", + retainMemFile ? REINSERT_AS_ACTIVE : 0); + + spi::BucketInfo info = file->getBucketInfo(); + + file.dismiss(); + return spi::BucketInfoResult(info); + } catch (std::exception& e) { + return handleException<spi::BucketInfoResult>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::put(const spi::Bucket& bucket, spi::Timestamp ts, + const document::Document::SP& doc, + spi::Context& context) +{ + TRACEGENERIC(context, "put"); + LOG(spam, "put(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts), + doc->getId().toString().c_str()); + try { + TRACE(context, 9, "put", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "put", + REINSERT_AS_ACTIVE); + TRACE(context, 9, "put", "Altering file in memory"); + _util->write(*file, *doc, Timestamp(ts)); + + TRACE(context, 9, "put", "Dismissing file"); + file.dismiss(); + return spi::Result(); + } catch (std::exception& e) { + return handleException<spi::Result>(e, true); + } +} + +spi::RemoveResult +MemFilePersistenceProvider::remove(const spi::Bucket& bucket, spi::Timestamp ts, + const DocumentId& id, spi::Context& context) +{ + TRACEGENERIC(context, "remove"); + LOG(spam, "remove(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts), + id.toString().c_str()); + try { + TRACE(context, 9, "remove", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "remove", + REINSERT_AS_ACTIVE); + TRACE(context, 9, "remove", "Altering file in memory"); + spi::Timestamp oldTs(_util->remove(*file, + id, Timestamp(ts), + OperationHandler::ALWAYS_PERSIST_REMOVE).getTime()); + TRACE(context, 9, "remove", "Dismissing file"); + file.dismiss(); + return spi::RemoveResult(oldTs > 0); + } catch (std::exception& e) { + return handleException<spi::RemoveResult>(e, true); + } +} + +spi::RemoveResult +MemFilePersistenceProvider::removeIfFound(const spi::Bucket& bucket, + spi::Timestamp ts, + const DocumentId& id, + spi::Context& context) +{ + TRACEGENERIC(context, "removeIfFound"); + LOG(spam, "removeIfFound(%s, %zu, %s)", bucket.toString().c_str(), + uint64_t(ts), id.toString().c_str()); + try { + TRACE(context, 9, "removeIfFound", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "removeiffound", + REINSERT_AS_ACTIVE); + TRACE(context, 9, "removeIfFound", "Altering file in memory"); + spi::Timestamp oldTs(_util->remove(*file, + id, Timestamp(ts), + OperationHandler::PERSIST_REMOVE_IF_FOUND).getTime()); + TRACE(context, 9, "removeIfFound", "Dismissing file"); + file.dismiss(); + return spi::RemoveResult(oldTs > 0); + } catch (std::exception& e) { + return handleException<spi::RemoveResult>(e, true); + } +} + +spi::UpdateResult +MemFilePersistenceProvider::MemFilePersistenceProvider::update( + const spi::Bucket& bucket, spi::Timestamp ts, + const document::DocumentUpdate::SP& upd, spi::Context& context) +{ + TRACEGENERIC(context, "update"); + LOG(spam, "update(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts), + upd->getId().toString().c_str()); + try { + TRACE(context, 9, "update", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "update", + REINSERT_AS_ACTIVE); + TRACE(context, 9, "update", "Reading old entry"); + bool headerOnly = !upd->affectsDocumentBody(); + OperationHandler::ReadResult ret = _util->read( + *file, + upd->getId(), + Timestamp(ts), + headerOnly ? HEADER_ONLY : ALL); + + Document::UP doc = ret.getDoc(); + if (!doc.get()) { + if (upd->getCreateIfNonExistent()) { + TRACE(context, 9, "update", "Doc did not exist, creating one"); + doc.reset(new Document(upd->getType(), upd->getId())); + upd->applyTo(*doc); + _util->write(*file, *doc, Timestamp(ts)); + file.dismiss(); + return spi::UpdateResult(spi::Timestamp(ts)); + } else { + TRACE(context, 9, "update", "Doc did not exist"); + file.dismiss(); + return spi::UpdateResult(); + } + } + + if (Timestamp(ts) == ret._ts) { + file.dismiss(); + if (doc->getId() == upd->getId()) { + TRACE(context, 9, "update", "Timestamp exist same doc"); + return spi::UpdateResult(spi::Result::TRANSIENT_ERROR, + "Update was already performed."); + } else { + // TODO: Assert-fail if we ever get here?? + TRACE(context, 9, "update", "Timestamp exist other doc"); + std::ostringstream error; + error << "Update of " << upd->getId() + << ": There already exists a document" + << " with timestamp " << ts; + + return spi::UpdateResult(spi::Result::TIMESTAMP_EXISTS, error.str()); + } + } + + TRACE(context, 9, "update", "Altering file in memory"); + upd->applyTo(*doc); + if (headerOnly) { + TRACE(context, 9, "update", "Writing new header entry"); + _util->update(*file, *doc, Timestamp(ts), Timestamp(ret._ts)); + } else { + TRACE(context, 9, "update", "Writing new doc entry"); + _util->write(*file, *doc, Timestamp(ts)); + } + if (headerOnly) { + ++getMetrics().headerOnlyUpdates; + } + + TRACE(context, 9, "update", "Dismissing file"); + file.dismiss(); + return spi::UpdateResult(spi::Timestamp(ret._ts.getTime())); + } catch (std::exception& e) { + return handleException<spi::UpdateResult>(e, true); + } +} + +spi::GetResult +MemFilePersistenceProvider::get(const spi::Bucket& bucket, + const document::FieldSet& fieldSet, + const DocumentId& id, + spi::Context& context) const +{ + TRACEGENERIC(context, "get"); + LOG(spam, "get(%s, %s)", bucket.toString().c_str(), id.toString().c_str()); + try { + TRACE(context, 9, "get", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "get"); + document::HeaderFields headerFields; + bool headerOnly = headerFields.contains(fieldSet); + + TRACE(context, 9, "get", "Reading from file."); + OperationHandler::ReadResult ret = + _util->read(*file, id, Timestamp(0), + headerOnly ? HEADER_ONLY : ALL); + + file.dismiss(); + if (!ret._doc.get()) { + TRACE(context, 9, "get", "Doc not found"); + return spi::GetResult(); + } + if (headerOnly) { + TRACE(context, 9, "get", "Retrieved doc header only"); + ++getMetrics().headerOnlyGets; + } + // Don't create unnecessary copy if we want the full doc or header + if (fieldSet.getType() == document::FieldSet::ALL + || fieldSet.getType() == document::FieldSet::HEADER) + { + TRACE(context, 9, "get", "Returning doc"); + return spi::GetResult(ret.getDoc(), spi::Timestamp(ret._ts.getTime())); + } else { + TRACE(context, 9, "get", "Returning stripped doc"); + document::FieldSet::stripFields(*ret._doc, fieldSet); + return spi::GetResult(ret.getDoc(), spi::Timestamp(ret._ts.getTime())); + } + } catch (std::exception& e) { + return handleException<spi::GetResult>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::flush(const spi::Bucket& bucket, + spi::Context& context) +{ + TRACEGENERIC(context, "flush"); + LOG(spam, "flush(%s)", bucket.toString().c_str()); + try { + TRACE(context, 9, "flush", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "flush"); + + LOG(spam, "Attempting to auto-flush %s", + file->getFile().toString().c_str()); + TRACE(context, 9, "flush", "Flushing to disk"); + file->flushToDisk(); + + TRACE(context, 9, "flush", "Dismissing file"); + file.dismiss(); + return spi::Result(); + } catch (std::exception& e) { + return handleException<spi::Result>(e, true); + } +} + +spi::CreateIteratorResult +MemFilePersistenceProvider::createIterator(const spi::Bucket& b, + const document::FieldSet& fieldSet, + const spi::Selection& sel, + spi::IncludedVersions versions, + spi::Context& context) +{ + TRACEGENERIC(context, "createIterator"); + LOG(spam, "createIterator(%s)", b.toString().c_str()); + try { + clearActiveMemFile(); + return _iteratorHandler->createIterator(b, fieldSet, sel, versions); + } catch (std::exception& e) { + return handleException<spi::CreateIteratorResult>(e, true); + } +} + +spi::IterateResult +MemFilePersistenceProvider::iterate(spi::IteratorId iterId, + uint64_t maxByteSize, + spi::Context& context) const +{ + TRACEGENERIC(context, "iterate"); + try { + clearActiveMemFile(&context); + spi::IterateResult result( + _iteratorHandler->iterate(iterId, maxByteSize)); + TRACE(context, 9, "iterate", "Done filling iterator"); + return result; + } catch (std::exception& e) { + return handleException<spi::IterateResult>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::destroyIterator(spi::IteratorId iterId, + spi::Context& context) +{ + TRACEGENERIC(context, "destroyIterator"); + try { + return _iteratorHandler->destroyIterator(iterId); + } catch (std::exception& e) { + return handleException<spi::IterateResult>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::deleteBucket(const spi::Bucket& bucket, + spi::Context& context) +{ + TRACEGENERIC(context, "deleteBucket"); + LOG(spam, "deleteBucket(%s)", bucket.toString().c_str()); + try { + TRACE(context, 9, "deleteBucket", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "deleteBucket"); + TRACE(context, 9, "deleteBucket", "Deleting it"); + file.getMemFilePtr().deleteFile(); + // It is assumed guard will only kick in if deleteFile has failed + // _before_ it erases the bucket from the cache (since this should + // be a nothrow op). Otherwise, this will crash trying to deref a + // null ptr. + TRACE(context, 9, "deleteBucket", "Dismissing file"); + file.dismiss(); + return spi::Result(); + } catch (std::exception& e) { + return handleException<spi::IterateResult>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::split(const spi::Bucket& source, + const spi::Bucket& target1, + const spi::Bucket& target2, + spi::Context& context) +{ + TRACEGENERIC(context, "split"); + LOG(spam, "split(%s -> %s, %s)", source.toString().c_str(), + target1.toString().c_str(), target2.toString().c_str()); + try { + clearActiveMemFile(); + return _splitOperationHandler->split(source, target1, target2); + } catch (std::exception& e) { + return handleException<spi::Result>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::join(const spi::Bucket& source1, + const spi::Bucket& source2, + const spi::Bucket& target, + spi::Context& context) +{ + TRACEGENERIC(context, "join"); + LOG(spam, "join(%s, %s -> %s)", source1.toString().c_str(), + source2.toString().c_str(), target.toString().c_str()); + try { + clearActiveMemFile(); + return _joinOperationHandler->join(source1, source2, target); + } catch (std::exception& e) { + return handleException<spi::Result>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::removeEntry(const spi::Bucket& bucket, + spi::Timestamp ts, + spi::Context& context) +{ + TRACEGENERIC(context, "removeEntry"); + LOG(spam, "removeEntry(%s, %zu)", bucket.toString().c_str(), uint64_t(ts)); + try { + TRACE(context, 9, "removeEntry", "Grabbing memfile"); + MemFileAccessGuard file(*this, getMemFile(bucket), "revert", + REINSERT_AS_ACTIVE); + const MemSlot* slot = file->getSlotAtTime(Timestamp(ts)); + if (slot) { + TRACE(context, 9, "removeEntry", "Removing slot"); + file->removeSlot(*slot); + } + + TRACE(context, 9, "removeEntry", "Dismissing file"); + file.dismiss(); + return spi::Result(); + } catch (std::exception& e) { + return handleException<spi::Result>(e, true); + } +} + +spi::Result +MemFilePersistenceProvider::maintain(const spi::Bucket& bucket, + spi::MaintenanceLevel level) +{ + LOG(spam, "maintain(%s)", bucket.toString().c_str()); + try { + MemFileAccessGuard file(*this, getMemFile(bucket, false), "maintain"); + assert(!file->slotsAltered()); + if (!file->fileExists()) { + LOG(debug, + "maintain(%s): file '%s' does not exist, nothing to maintain. " + "Assuming file was corrupted and auto-deleted.", + bucket.toString().c_str(), + file->getFile().getPath().c_str()); + return spi::Result(); + } + + std::ostringstream report; + const uint32_t verifyFlags((level == spi::HIGH) ? 0 : DONT_VERIFY_BODY); + if (!file->repair(report, verifyFlags)) { + LOG(debug, + "repair() on %s indicated errors, evicting from cache to " + "force reload of file with altered metadata", + bucket.toString().c_str()); + return spi::Result(); // No dismissal of guard; auto-evict. + } + assert(!file->slotsAltered()); + file->compact(); + file->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE); + + file.dismiss(); + return spi::Result(); + } catch (std::exception& e) { + // Failing maintain() cannot cause an auto-repair since this will + // in turn call maintain(). + return handleException<spi::Result>(e, false); + } +} + +vespalib::string +MemFilePersistenceProvider::getReportContentType(const framework::HttpUrlPath&) const +{ + return "text/html"; +} + +namespace { + +void +printMemoryUsage(std::ostream& out, + const char* part, + uint64_t usage, + uint64_t total) +{ + out << "<li>" << part << ": " << usage; + if (total > 0) { + out << " (" << ((static_cast<double>(usage) / total) * 100.0) << "%)"; + } + out << "</li>\n"; +} + +} + +bool +MemFilePersistenceProvider::reportStatus(std::ostream& out, + const framework::HttpUrlPath& path) const +{ + framework::PartlyHtmlStatusReporter htmlReporter(*this); + htmlReporter.reportHtmlHeader(out, path); + + out << "<h1>Mem file persistence provider status page</h1>\n"; + bool printVerbose = path.hasAttribute("verbose"); + if (!printVerbose) { + out << "<p><a href=\"memfilepersistenceprovider?verbose\">" + "More verbose</a></p>\n"; + } else { + out << "<p><a href=\"memfilepersistenceprovider\">" + "Less verbose</a></p>\n"; + } + + MemFileCache::Statistics cacheStats(_env->_cache.getCacheStats()); + const MemFileCache::MemoryUsage& memUsage(cacheStats._memoryUsage); + out << "<p>Cache with " << cacheStats._numEntries + << " entries using " << memUsage.sum() + << " of max " << cacheStats._cacheSize + << " bytes</p>\n"; + out << "<ul>\n"; + printMemoryUsage(out, "Meta", memUsage.metaSize, memUsage.sum()); + printMemoryUsage(out, "Header", memUsage.headerSize, memUsage.sum()); + printMemoryUsage(out, "Body", memUsage.bodySize, memUsage.sum()); + out << "</ul>\n"; + out << "</p>\n"; + + if (printVerbose) { + _env->_cache.printCacheEntriesHtml(out); + } + + htmlReporter.reportHtmlFooter(out, path); + + return true; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h new file mode 100644 index 00000000000..69edb680d5c --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h @@ -0,0 +1,164 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/init/filescanner.h> +#include <vespa/persistence/spi/abstractpersistenceprovider.h> +#include <vespa/storageframework/storageframework.h> +#include <vespa/memfilepersistence/spi/operationhandler.h> +#include <vespa/memfilepersistence/spi/iteratorhandler.h> +#include <vespa/memfilepersistence/spi/joinoperationhandler.h> +#include <vespa/memfilepersistence/spi/splitoperationhandler.h> +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> +#include <vespa/memfilepersistence/spi/threadmetricprovider.h> +#include <vespa/storageframework/generic/status/httpurlpath.h> +#include <vespa/memfilepersistence/spi/threadlocals.h> +#include <vespa/config/config.h> + +namespace storage { + +namespace memfile { + +class ThreadContext { +public: + MemFilePtr _memFile; + MemFilePersistenceThreadMetrics* _metrics; + + ThreadContext() + : _metrics(NULL) + {} +}; + +class MemFilePersistenceProvider : public spi::AbstractPersistenceProvider, + public framework::Component, + public Types, + public framework::StatusReporter, + public ThreadMetricProvider +{ +public: + typedef std::unique_ptr<MemFilePersistenceProvider> UP; + + MemFilePersistenceProvider( + framework::ComponentRegister& reg, + const config::ConfigUri & configUri); + + ~MemFilePersistenceProvider(); + + spi::PartitionStateListResult getPartitionStates() const; + + spi::BucketIdListResult listBuckets(spi::PartitionId) const; + + spi::BucketIdListResult getModifiedBuckets() const; + + spi::BucketInfoResult getBucketInfo(const spi::Bucket&) const; + + spi::Result put(const spi::Bucket&, spi::Timestamp, + const document::Document::SP&, spi::Context&); + + spi::RemoveResult remove(const spi::Bucket&, spi::Timestamp, + const DocumentId&, spi::Context&); + + spi::RemoveResult removeIfFound(const spi::Bucket&, spi::Timestamp, + const DocumentId&, spi::Context&); + + spi::UpdateResult update(const spi::Bucket&, spi::Timestamp, + const document::DocumentUpdate::SP&, spi::Context&); + + spi::GetResult get(const spi::Bucket&, const document::FieldSet&, + const spi::DocumentId&, spi::Context&) const; + + spi::Result flush(const spi::Bucket&, spi::Context&); + + spi::CreateIteratorResult createIterator(const spi::Bucket&, + const document::FieldSet&, + const spi::Selection&, + spi::IncludedVersions versions, + spi::Context&); + + spi::IterateResult iterate(spi::IteratorId, + uint64_t maxByteSize, spi::Context&) const; + + spi::Result destroyIterator(spi::IteratorId, spi::Context&); + + spi::Result deleteBucket(const spi::Bucket&, spi::Context&); + + spi::Result split(const spi::Bucket& source, + const spi::Bucket& target1, + const spi::Bucket& target2, + spi::Context&); + + spi::Result join(const spi::Bucket& source1, + const spi::Bucket& source2, + const spi::Bucket& target, + spi::Context&); + + spi::Result removeEntry(const spi::Bucket&, + spi::Timestamp, spi::Context&); + + spi::Result maintain(const spi::Bucket&, + spi::MaintenanceLevel level); + + Environment& getEnvironment() { + return *_env; + } + + virtual vespalib::string getReportContentType( + const framework::HttpUrlPath&) const; + virtual bool reportStatus(std::ostream&, + const framework::HttpUrlPath&) const; + + /** + Used by unit tests. + */ + void clearActiveMemFile(spi::Context* = 0) const; + const IteratorHandler& getIteratorHandler() const { return *_iteratorHandler; } + + MemFilePersistenceThreadMetrics& getMetrics() const; + + void setDocumentRepo(const document::DocumentTypeRepo& repo); + void setConfig(std::unique_ptr<vespa::config::storage::StorMemfilepersistenceConfig> config); + void setConfig(std::unique_ptr<vespa::config::content::PersistenceConfig> config); + void setConfig(std::unique_ptr<vespa::config::storage::StorDevicesConfig> config); +private: + framework::ComponentRegister& _componentRegister; + + config::ConfigUri _configUri; + vespa::config::storage::StorMemfilepersistenceConfig _config; + mutable MemFileMapper _memFileMapper; + + const document::DocumentTypeRepo* _repo; + mutable MemFileCache::UP _cache; + mutable Environment::UP _env; + mutable FileScanner::UP _fileScanner; + mutable OperationHandler::UP _util; + mutable IteratorHandler::UP _iteratorHandler; + mutable JoinOperationHandler::UP _joinOperationHandler; + mutable SplitOperationHandler::UP _splitOperationHandler; + mutable MemFilePersistenceMetrics _metrics; + + mutable ThreadLocals<ThreadContext> _threadLocals; + + std::pair<spi::Result::ErrorType, vespalib::string> + getErrorFromException(const std::exception& e); + + MemFilePtr getMemFile(const spi::Bucket& b, bool keepInCache = true) const; + void setActiveMemFile(MemFilePtr ptr, const char* user) const; + bool hasCachedMemFile() const; + + template<typename C> C handleException(const std::exception& e, + bool canRepairBucket) const; + + void handleBucketCorruption(const FileSpecification& file) const; + + //void addBucketToNotifySet(const MemFile& file) const; + + MemFilePtr& getThreadLocalMemFile() const; + + friend class MemFileAccessGuard; +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h new file mode 100644 index 00000000000..70c711e81fd --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/metrics/metrics.h> +#include <vespa/memfilepersistence/mapper/serializationmetrics.h> + +namespace storage { +namespace memfile { + +class MemFilePersistenceThreadMetrics : public metrics::MetricSet +{ +public: + metrics::LongCountMetric headerOnlyGets; + metrics::LongCountMetric headerOnlyUpdates; + SerializationMetrics serialization; + + MemFilePersistenceThreadMetrics(const std::string& name, metrics::MetricSet& owner) + : metrics::MetricSet(name, "partofsum thread", + "Metrics for a worker thread using memfile persistence " + "provider", &owner), + headerOnlyGets("headeronlygets", "", + "Number of gets that only read header", this), + headerOnlyUpdates("headeronlyupdates", "", + "Number of updates that only wrote header", this), + serialization("serialization", this) + { + } +}; + +class MemFilePersistenceCacheMetrics : public metrics::MetricSet +{ +public: + metrics::LongValueMetric files; + metrics::LongValueMetric meta; + metrics::LongValueMetric header; + metrics::LongValueMetric body; + metrics::LongCountMetric hits; + metrics::LongCountMetric misses; + metrics::LongCountMetric meta_evictions; + metrics::LongCountMetric header_evictions; + metrics::LongCountMetric body_evictions; + + MemFilePersistenceCacheMetrics(metrics::MetricSet& owner) + : metrics::MetricSet("cache", "", + "Metrics for the VDS persistence cache", &owner), + files("files", "", "Number of files cached", this), + meta("meta", "", "Bytes of file metadata cached", this), + header("header", "", "Bytes of file header parts cached", this), + body("body", "", "Bytes of file body parts cached", this), + hits("hits", "", "Number of times a bucket was attempted fetched " + "from the cache and it was already present", this), + misses("misses", "", "Number of times a bucket was attempted fetched " + "from the cache and it could not be found, requiring a load", this), + meta_evictions("meta_evictions", "", "Bucket meta data evictions", this), + header_evictions("header_evictions", "", "Bucket header (and " + "implicitly body, if present) data evictions", this), + body_evictions("body_evictions", "", "Bucket body data evictions", this) + {} +}; + +class MemFilePersistenceMetrics : public metrics::MetricSet +{ + framework::Component& _component; + +public: + vespalib::Lock _threadMetricsLock; + std::list<vespalib::LinkedPtr<MemFilePersistenceThreadMetrics> > _threadMetrics; + + std::unique_ptr<metrics::SumMetric<MemFilePersistenceThreadMetrics> > _sumMetric; + MemFilePersistenceCacheMetrics _cache; + + MemFilePersistenceMetrics(framework::Component& component) + : metrics::MetricSet("memfilepersistence", "", + "Metrics for the VDS persistence layer"), + _component(component), + _cache(*this) + { + } + + MemFilePersistenceThreadMetrics* addThreadMetrics() { + metrics::MetricLockGuard metricLock(_component.getMetricManagerLock()); + vespalib::LockGuard guard(_threadMetricsLock); + + if (!_sumMetric.get()) { + _sumMetric.reset(new metrics::SumMetric<MemFilePersistenceThreadMetrics> + ("allthreads", "sum", "", this)); + } + + std::string name = vespalib::make_string("thread_%zu", _threadMetrics.size()); + + MemFilePersistenceThreadMetrics* metrics = + new MemFilePersistenceThreadMetrics(name, *this); + + _threadMetrics.push_back(vespalib::LinkedPtr<MemFilePersistenceThreadMetrics>( + metrics)); + _sumMetric->addMetricToSum(*metrics); + return metrics; + } +}; + +} +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp new file mode 100644 index 00000000000..349663fb7c1 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp @@ -0,0 +1,287 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/spi/operationhandler.h> + +#include <vespa/log/log.h> + +LOG_SETUP(".persistence.memfile.handler.operation"); + +namespace storage { +namespace memfile { + +OperationHandler::OperationHandler(Environment& env) + : _env(env) +{ +} + +OperationHandler::ReadResult +OperationHandler::read(MemFile& file, const DocumentId& id, + Timestamp maxTimestamp, GetFlag getFlags) const +{ + if (maxTimestamp == Timestamp(0)) { + maxTimestamp = MAX_TIMESTAMP; + } + const MemSlot* slot(file.getSlotWithId(id, maxTimestamp)); + if (slot == 0 || slot->deleted()) { + return ReadResult(Document::UP(), Timestamp(0)); + } + return ReadResult(file.getDocument(*slot, getFlags), slot->getTimestamp()); +} + +OperationHandler::ReadResult +OperationHandler::read(MemFile& file, Timestamp timestamp, + GetFlag getFlags) const +{ + const MemSlot* slot(file.getSlotAtTime(timestamp)); + if (slot == 0 || slot->deleted()) { + return ReadResult(Document::UP(), Timestamp(0)); + } + + return ReadResult(file.getDocument(*slot, getFlags), slot->getTimestamp()); +} + +Types::Timestamp +OperationHandler::remove(MemFile& file, + const DocumentId& id, + Timestamp timestamp, + RemoveType persistRemove) +{ + LOG(debug, "remove(%s, %s, %zu, %s)", + file.getFile().getPath().c_str(), + id.toString().c_str(), + timestamp.getTime(), + persistRemove ? "always persist" : "persist only if put is found"); + + const MemSlot* slotAtTime(file.getSlotAtTime(timestamp)); + if (slotAtTime) { + if (slotAtTime->deleted()) { + LOG(spam, + "Slot %s already existed at timestamp %zu but was already " + "deleted; not doing anything", + slotAtTime->toString().c_str(), + timestamp.getTime()); + return Timestamp(0); + } + LOG(spam, + "Slot %s already existed at timestamp %zu, delegating to " + "unrevertableRemove", + slotAtTime->toString().c_str(), + timestamp.getTime()); + return unrevertableRemove(file, id, timestamp); + } + + const MemSlot* slot(file.getSlotWithId(id)); + + if (slot == 0 || slot->getTimestamp() > timestamp) { + LOG(spam, "No slot existed, or timestamp was higher"); + + if (persistRemove == ALWAYS_PERSIST_REMOVE) { + file.addRemoveSlotForNonExistingEntry( + id, timestamp, MemFile::REGULAR_REMOVE); + } + return Timestamp(0); + } + + if (slot->deleted()) { + LOG(spam, "Document %s was already deleted.", + id.toString().c_str()); + + if (persistRemove == ALWAYS_PERSIST_REMOVE) { + file.addRemoveSlot(*slot, timestamp); + } + + return Timestamp(0); + } + + Timestamp oldTs(slot->getTimestamp()); + file.addRemoveSlot(*slot, timestamp); + return oldTs; +} + +Types::Timestamp +OperationHandler::unrevertableRemove(MemFile& file, + const DocumentId& id, + Timestamp timestamp) +{ + LOG(debug, "unrevertableRemove(%s, %s, %zu)", + file.getFile().getPath().c_str(), + id.toString().c_str(), + timestamp.getTime()); + + const MemSlot* slot(file.getSlotAtTime(timestamp)); + if (slot == 0) { + file.addRemoveSlotForNonExistingEntry( + id, timestamp, MemFile::UNREVERTABLE_REMOVE); + return Timestamp(0); + } + if (slot->getGlobalId() != id.getGlobalId()) { + // Should Not Happen(tm) case: given timestamp+document id does not + // match the document ID stored on file for the timestamp. In this + // case we throw out the old slot and insert a new unrevertable remove + // slot with the new document ID. + LOG(error, "Unrevertable remove for timestamp %zu with document id %s " + "does not match the document id %s of the slot stored at this " + "timestamp! Existing slot: %s. Removing old slot to get in sync.", + timestamp.getTime(), + id.toString().c_str(), + file.getDocumentId(*slot).toString().c_str(), + slot->toString().c_str()); + file.removeSlot(*slot); + file.addRemoveSlotForNonExistingEntry( + id, timestamp, MemFile::UNREVERTABLE_REMOVE); + return timestamp; + } + + MemSlot newSlot(*slot); + newSlot.turnToUnrevertableRemove(); + file.modifySlot(newSlot); + return timestamp; +} + +void +OperationHandler::write(MemFile& file, const Document& doc, Timestamp time) +{ + const MemSlot* slot(file.getSlotAtTime(time)); + if (slot != 0) { + if (doc.getId().getGlobalId() == slot->getGlobalId() && + !slot->deleted()) + { + LOG(debug, "Tried to put already existing document %s at time " + "%zu into file %s. Probably sent here by merge from other " + "copy. Flagging put ok and doing nothing.", + doc.getId().toString().c_str(), + time.getTime(), + file.getFile().getPath().c_str()); + return; + } else { + std::ostringstream ost; + ost << "Failed adding document " << doc.getId().toString() + << " to slotfile '" << file.getFile().getPath() + << "'. Entry " << *slot << " already exists at that timestamp"; + LOG(warning, "%s", ost.str().c_str()); + throw TimestampExistException( + ost.str(), file.getFile(), time, VESPA_STRLOC); + } + } + + file.addPutSlot(doc, time); +} + +bool +OperationHandler::update(MemFile& file, const Document& header, + Timestamp newTime, Timestamp existingTime) +{ + const MemSlot* slot; + if (existingTime == Timestamp(0)) { + slot = file.getSlotWithId(header.getId()); + } else { + slot = file.getSlotAtTime(existingTime); + if (slot == NULL) { + return false; + } + + DocumentId docId = file.getDocumentId(*slot); + if (docId != header.getId()) { + std::ostringstream ost; + ost << "Attempted update of doc " << header.getId() << " with " + << "timestamp " << existingTime << " failed as non-matching " + << "doc " << docId << " existed at timestamp."; + throw MemFileIoException(ost.str(), file.getFile(), + MemFileIoException::INTERNAL_FAILURE, VESPA_STRLOC); + } + } + if (slot == 0 || slot->deleted()) return false; + + file.addUpdateSlot(header, *slot, newTime); + return true; +} + +std::vector<Types::Timestamp> +OperationHandler::select(MemFile& file, + SlotMatcher& checker, + uint32_t iteratorFlags, + Timestamp fromTimestamp, + Timestamp toTimestamp) +{ + verifyLegalFlags(iteratorFlags, LEGAL_ITERATOR_FLAGS, "select"); + checker.preload(file); + std::vector<Timestamp> result; + result.reserve(file.getSlotCount()); + for (MemFile::const_iterator it = file.begin(iteratorFlags, + fromTimestamp, + toTimestamp); + it != file.end(); ++it) + { + if (checker.match(SlotMatcher::Slot(*it, file))) { + result.push_back(it->getTimestamp()); + } + } + reverse(result.begin(), result.end()); + return result; +} + +void +OperationHandler::verifyBucketMapping(const DocumentId& id, + const BucketId& bucket) const +{ + BucketId docBucket(_env._bucketFactory.getBucketId(id)); + docBucket.setUsedBits(bucket.getUsedBits()); + if (bucket != docBucket) { + docBucket = _env._bucketFactory.getBucketId(id); + throw vespalib::IllegalStateException("Document " + id.toString() + + " (bucket " + docBucket.toString() + ") does not belong in " + + "bucket " + bucket.toString() + ".", VESPA_STRLOC); + } +} + +MemFilePtr +OperationHandler::getMemFile(const spi::Bucket& b, bool keepInCache) +{ + return getMemFile(b.getBucketId(), b.getPartition(), keepInCache); +} + +MemFilePtr +OperationHandler::getMemFile(const document::BucketId& id, Directory& dir, + bool keepInCache) { + return _env._cache.get(id, _env, dir, keepInCache); +} + +MemFilePtr +OperationHandler::getMemFile(const document::BucketId& id, uint16_t diskIndex, + bool keepInCache) +{ + return getMemFile(id, _env.getDirectory(diskIndex), keepInCache); +} + +document::FieldSet::UP +OperationHandler::parseFieldSet(const std::string& fieldSet) +{ + document::FieldSetRepo fsr; + return fsr.parse(_env.repo(), fieldSet); +} + +std::unique_ptr<document::select::Node> +OperationHandler::parseDocumentSelection( + const std::string& documentSelection, bool allowLeaf) +{ + std::unique_ptr<document::select::Node> ret; + try { + document::select::Parser parser( + _env.repo(), _env._bucketFactory); + ret = parser.parse(documentSelection); + } catch (document::select::ParsingFailedException& e) { + LOG(debug, "Failed to parse document selection '%s': %s", + documentSelection.c_str(), e.getMessage().c_str()); + return std::unique_ptr<document::select::Node>(); + } + if (ret->isLeafNode() && !allowLeaf) { + LOG(debug, "Document selection results in a single leaf node: '%s'", + documentSelection.c_str()); + return std::unique_ptr<document::select::Node>(); + } + return ret; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h new file mode 100644 index 00000000000..9a2700c4209 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::OperationHandler + * \ingroup memfile + * + * \brief Super class for operation handlers. + * + * The operation handler superclass provides common functionality needed to + * operation handlers. + */ +#pragma once + +#include <boost/utility.hpp> +#include <vespa/memfilepersistence/common/exceptions.h> +#include <vespa/memfilepersistence/common/filespecification.h> +#include <vespa/memfilepersistence/common/types.h> +#include <vespa/memfilepersistence/memfile/memfile.h> +#include <vespa/memfilepersistence/memfile/memfilecache.h> +#include <vespa/memfilepersistence/memfile/memfileptr.h> +#include <vespa/memfilepersistence/common/slotmatcher.h> +#include <vespa/persistence/spi/bucketinfo.h> +#include <vespa/memfilepersistence/common/environment.h> +#include <vespa/document/fieldset/fieldsetrepo.h> + +namespace storage { +namespace memfile { + +class OperationHandler : protected Types, + private boost::noncopyable +{ +protected: + Environment& _env; + +public: + typedef std::unique_ptr<OperationHandler> UP; + + OperationHandler(Environment&); + virtual ~OperationHandler() {} + + struct ReadResult : private Types { + ReadResult(Document::UP doc, + Timestamp ts) + : _doc(std::move(doc)), + _ts(ts) {}; + + ReadResult(ReadResult&& other) + : _doc(std::move(other._doc)), + _ts(other._ts) {}; + + Document::UP _doc; + Timestamp _ts; + + Document::UP getDoc() { return std::move(_doc); } + }; + + ReadResult read(MemFile&, + const DocumentId&, + Timestamp maxTimestamp, + GetFlag getFlags) const; + + ReadResult read(MemFile&, Timestamp timestamp, GetFlag getFlags) const; + + enum RemoveType + { + ALWAYS_PERSIST_REMOVE, + PERSIST_REMOVE_IF_FOUND + }; + + Types::Timestamp remove(MemFile&, + const DocumentId&, + Timestamp, + RemoveType); + + Types::Timestamp unrevertableRemove(MemFile&, + const DocumentId&, + Timestamp); + + void write(MemFile&, const Document& doc, Timestamp); + + bool update(MemFile&, + const Document& headerToOverwrite, + Timestamp newTime, + Timestamp existingTime = Timestamp(0)); + + /** + * Get the slots matching a given matcher. + * + * @return The timestamps of the matching slots, ordered in rising + * timestamp order. + */ + std::vector<Timestamp> select(MemFile&, SlotMatcher&, + uint32_t iteratorFlags, + Timestamp fromTimestamp = Timestamp(0), + Timestamp toTimestamp = Timestamp(0)); + + /** Verify that a document id belongs to a given bucket. */ + void verifyBucketMapping(const DocumentId&, const BucketId&) const; + + MemFilePtr getMemFile(const spi::Bucket& b, bool keepInCache = true); + + MemFilePtr getMemFile(const document::BucketId& id, Directory& dir, + bool keepInCache = true); + + MemFilePtr getMemFile(const document::BucketId& id, uint16_t disk, + bool keepInCache = true); + + document::FieldSet::UP parseFieldSet(const std::string& fieldSet); + + std::unique_ptr<document::select::Node> + parseDocumentSelection(const std::string& documentSelection, + bool allowLeaf); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp new file mode 100644 index 00000000000..34d86259832 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/spi/splitoperationhandler.h> +#include <vespa/memfilepersistence/spi/cacheevictionguard.h> +#include <vespa/log/log.h> + +LOG_SETUP(".persistence.memfile.handler.split"); + +namespace storage { +namespace memfile { + +SplitOperationHandler::SplitOperationHandler(Environment& env) + : OperationHandler(env) +{ +} + +namespace { + +struct BucketMatcher : public SlotMatcher { + const document::BucketIdFactory& _factory; + document::BucketId _bid; + + BucketMatcher(const document::BucketIdFactory& factory, const document::BucketId& bid) + : SlotMatcher(PRELOAD_HEADER), + _factory(factory), + _bid(bid) {} + + virtual bool match(const Slot& slot) { + document::DocumentId id(slot.getDocumentId()); + document::BucketId bucket = _factory.getBucketId(id); + bucket.setUsedBits(_bid.getUsedBits()); + + if (bucket.stripUnused() == _bid.stripUnused()) { + return true; + } else { + return false; + } + } +}; + +} + +void +SplitOperationHandler::copyTimestamps( + const MemFile& source, + MemFile& target, + const std::vector<Timestamp>& timestamps) +{ + std::vector<const MemSlot*> slotsToCopy; + slotsToCopy.reserve(timestamps.size()); + for (uint32_t i = 0; i < timestamps.size(); i++) { + const MemSlot* slot = source.getSlotAtTime(timestamps[i]); + + if (!target.getSlotAtTime(timestamps[i])) { + slotsToCopy.push_back(slot); + } + } + target.copySlotsFrom(source, slotsToCopy); +} + +uint32_t +SplitOperationHandler::splitIntoFile(MemFile& source, + const spi::Bucket& target) +{ + BucketMatcher matcher(_env._bucketFactory, target.getBucketId()); + + std::vector<Timestamp> ts = select(source, matcher, ITERATE_REMOVED); + + MemFileCacheEvictionGuard targetFile(getMemFile(target, false)); + + LOG(debug, + "Found %" PRIu64 " slots to move from file %s to file %s", + ts.size(), + source.getFile().toString().c_str(), + targetFile->getFile().toString().c_str()); + + copyTimestamps(source, *targetFile, ts); + + targetFile->flushToDisk(); + targetFile.unguard(); + return ts.size(); +} + +spi::Result +SplitOperationHandler::split(const spi::Bucket& source, + const spi::Bucket& target1, + const spi::Bucket& target2) +{ + MemFileCacheEvictionGuard file(getMemFile(source, false)); + file->ensureBodyBlockCached(); + + uint32_t totalDocsMoved = 0; + totalDocsMoved += splitIntoFile(*file, target1); + if (target2.getBucketId().getRawId() != 0) { + totalDocsMoved += splitIntoFile(*file, target2); + } + if (file->getBucketInfo().getEntryCount() != totalDocsMoved) { + LOG(error, "Split(%s) code moved only %u of %u entries out of source " + "file.", + source.getBucketId().toString().c_str(), + totalDocsMoved, file->getBucketInfo().getEntryCount()); + assert(false); + } + file.get().deleteFile(); + file.unguard(); + return spi::Result(); +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h new file mode 100644 index 00000000000..5bc1376d55f --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * \class storage::memfile::SplitHandler + * \ingroup memfile + * + * \brief Class used to do basic operations to memfiles. + */ +#pragma once + +#include <vespa/memfilepersistence/spi/operationhandler.h> +#include <vespa/persistence/spi/persistenceprovider.h> + +namespace storage { + +namespace memfile { + +class SplitOperationHandler : public OperationHandler { +public: + typedef std::unique_ptr<SplitOperationHandler> UP; + + SplitOperationHandler(Environment&); + + spi::Result split(const spi::Bucket& source, + const spi::Bucket& target1, + const spi::Bucket& target2); + +private: + /** + * Copies the slots designated by the given list of timestamps from one mem + * file to another. If the target already has a slot at any of the given + * timestamps, those timestamps aren't copied. + */ + void copyTimestamps(const MemFile& source, MemFile& target, + const std::vector<Timestamp>& timestamps); + + uint32_t splitIntoFile(MemFile& source, const spi::Bucket& target); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp new file mode 100644 index 00000000000..b6c71165f28 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/spi/threadlocals.h> + +namespace storage { + +namespace memfile { + +vespalib::Lock ThreadStatic::_threadLock; +uint16_t ThreadStatic::_nextThreadIdx = 0; +__thread int ThreadStatic::_threadIdx = -1; + +void ThreadStatic::initThreadIndex() +{ + if (_threadIdx == -1) { + vespalib::LockGuard guard(_threadLock); + _threadIdx = _nextThreadIdx; + ++_nextThreadIdx; + } +} + +} + +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h new file mode 100644 index 00000000000..518eec7f909 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/vespalib/util/sync.h> + +namespace storage { + +namespace memfile { + +class ThreadStatic { +public: + static vespalib::Lock _threadLock; + static uint16_t _nextThreadIdx; + static __thread int _threadIdx; + + void initThreadIndex(); +}; + +/** + * This class takes ownership of a set of thread local + * variables. The maximum number of unique threads the + * class can use must be predetermined on construction. + */ +template<typename T> +class ThreadLocals : public ThreadStatic { + static const size_t CACHE_LINE_SIZE = 64; // Architectural assumption. + struct CacheLinePaddedValue + { + T _data; + private: + // Ensure addressing the data of one entry does not touch the cache + // line of any following entries. Could make this an exact fit, but + // not very important since there are very few TLS entries in total. + char _padding[CACHE_LINE_SIZE]; + }; +public: + mutable std::vector<CacheLinePaddedValue> _contexts; + + ThreadLocals(uint32_t maxThreadCount) + : _contexts(maxThreadCount) + { + } + + T& get() { + initThreadIndex(); + assert(_threadIdx < (int)_contexts.size()); + return _contexts[_threadIdx]._data; + } +}; + +} + +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h b/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h new file mode 100644 index 00000000000..5ffe259028c --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +namespace storage { +namespace memfile { + +class MemFilePersistenceThreadMetrics; + +class ThreadMetricProvider +{ +public: + virtual ~ThreadMetricProvider() {} + + virtual MemFilePersistenceThreadMetrics& getMetrics() const = 0; +}; + +} +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp new file mode 100644 index 00000000000..37779feb60f --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/memfilepersistence/spi/visitorslotmatcher.h> +#include <vespa/document/select/bodyfielddetector.h> + +namespace storage { +namespace memfile { + +namespace { + +SlotMatcher::PreloadFlag +getCacheRequirements(const document::select::Node* selection, + const document::DocumentTypeRepo& repo) { + if (!selection) { + return SlotMatcher::PRELOAD_META_DATA_ONLY; + } + + document::select::BodyFieldDetector detector(repo); + selection->visit(detector); + + if (detector.foundBodyField) { + return SlotMatcher::PRELOAD_BODY; + } else { + return SlotMatcher::PRELOAD_HEADER; + } +} + +bool needDocument(const document::select::Node* selection) +{ + if (selection) { + document::select::NeedDocumentDetector detector; + selection->visit(detector); + return detector.needDocument(); + } else { + return false; + } +} + +} // namespace + +VisitorSlotMatcher::VisitorSlotMatcher( + const document::DocumentTypeRepo& repo, + const document::select::Node* selection) + : SlotMatcher(getCacheRequirements(selection, repo)), + _selection(selection), + _needDocument(needDocument(selection)) +{ +}; + +bool +VisitorSlotMatcher::match(const Slot& slot) { + if (_selection) { + if (!slot.isRemove() && _needDocument) { + document::Document::UP doc( + slot.getDocument(!(_preload == PRELOAD_BODY))); + return (_selection->contains(*doc) + == document::select::Result::True); + } else { + document::DocumentId docId(slot.getDocumentId()); + return (_selection->contains(docId) + == document::select::Result::True); + } + } + + return true; +} + +} +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h new file mode 100644 index 00000000000..9b1412da4b7 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/memfilepersistence/common/slotmatcher.h> + +namespace storage { +namespace memfile { + +class VisitorSlotMatcher : public SlotMatcher +{ +private: + const document::select::Node* _selection; + bool _needDocument; + +public: + VisitorSlotMatcher(const document::DocumentTypeRepo& repo, + const document::select::Node* selection); + + virtual bool match(const Slot& slot); + +}; + +} +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore new file mode 100644 index 00000000000..c7687cb62d2 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore @@ -0,0 +1,5 @@ +/.depend +/Makefile +/dumpslotfile +/vdsdisktool +vdsdisktool-bin diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt new file mode 100644 index 00000000000..cc8ad0a2320 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(memfilepersistence_tools OBJECT + SOURCES + dumpslotfile.cpp + vdsdisktool.cpp + DEPENDS +) +vespa_add_executable(memfilepersistence_dumpslotfile_app + SOURCES + dumpslotfileapp.cpp + OUTPUT_NAME dumpslotfile + INSTALL bin + DEPENDS + memfilepersistence +) +vespa_add_executable(memfilepersistence_vdsdisktool_app + SOURCES + vdsdiskapp.cpp + OUTPUT_NAME vdsdisktool-bin + INSTALL bin + DEPENDS + memfilepersistence +) diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp new file mode 100644 index 00000000000..a44aa81d3ab --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp @@ -0,0 +1,361 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/config/helper/configgetter.h> +#include <vespa/document/config/config-documenttypes.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/document/document.h> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/common/environment.h> +#include <vespa/memfilepersistence/device/devicemanager.h> +#include <vespa/memfilepersistence/mapper/memfilemapper.h> +#include <vespa/memfilepersistence/memfile/memfilecache.h> +#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> +#include <vespa/memfilepersistence/tools/dumpslotfile.h> +#include <string> +#include <vespa/persistence/spi/bucketinfo.h> +#include <vespa/storageframework/defaultimplementation/clock/realclock.h> +#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h> +#include <vespa/storageframework/defaultimplementation/memory/nomemorymanager.h> +#include <vespa/vespalib/util/programoptions.h> + +LOG_SETUP(".vds.dumpslotfile"); + +using config::ConfigGetter; +using document::DocumenttypesConfig; +using config::FileSpec; +using document::DocumentTypeRepo; + +namespace storage { +namespace memfile { + +namespace { + std::ostream* cout; + std::ostream* cerr; + + struct CmdOptions : public vespalib::ProgramOptions { + bool showSyntaxPage; + bool userFriendlyOutput; + bool printHeader; + bool printBody; + bool toXml; + bool toBinary; + bool includeRemovedDocs; + bool includeRemoveEntries; +// std::string metaDataSort; + std::string documentManConfigId; + std::string filename; + uint64_t timestampToShow; + std::string docId; +// bool useConstructor; + + CmdOptions(int argc, const char* const* argv) + : vespalib::ProgramOptions(argc, argv), + showSyntaxPage(false) + { + setSyntaxMessage( + "Utility program for showing the contents of the slotfiles " + "used by Vespa Document Storage in a user readable format. " + "Intended for debugging purposes." + ); + addOption("h help", showSyntaxPage, false, + "Shows this help page"); + addOption("n noheader", printHeader, true, + "If given, the header block content is not shown"); + addOption("N nobody", printBody, true, + "If given, the body block content is not shown"); + addOption("f friendly", userFriendlyOutput, false, + "Gives less compact, but more user friendly output"); + addOption("x toxml", toXml, false, + "Print document XML of contained documents"); + addOption("b tobinary", toBinary, false, + "Print binary representations of contained documents"); + addOption("includeremoveddocs", includeRemovedDocs, false, + "When showing XML, include documents that are still in " + "the file, but have been removed."); + addOption("includeremoveentries", includeRemoveEntries, false, + "When showing XML, include remove entries."); + addOption("c documentconfig", documentManConfigId, + std::string("client"), + "The document config to use, needed if deserializing " + "documents."); +// addOption("s sort", metaDataSort, std::string("none"), +// "How to sort metadatalist. Valid arguments: " +// "bodypos, headerpos & none."); + addOption("t time", timestampToShow, uint64_t(0), + "If set, only present data related to this timestamp, " + "when outputting XML or binary data."); + addOption("docid", docId, std::string(""), + "Retrieve single document using get semantics"); +// addOption("useconstructor", useConstructor, false, "Debug option"); + addArgument("slotfile", filename, "The slotfile to dump."); + } + }; + + void printDoc(document::Document& doc, CmdOptions& o) { + if (o.toXml) { + *cout << doc.toXml() << "\n"; + } else { + document::ByteBuffer::UP bbuf(doc.serialize()); + *cout << std::string(bbuf->getBuffer(), bbuf->getLength()); + } + } + + void printFailure(const std::string& failure) { + *cerr << failure << "\n"; + } + + uint64_t extractBucketId(const std::string& path) { + size_t slashPos = path.find_last_of('/'); + bool foundSlash = true; + if (slashPos == std::string::npos) { + foundSlash = false; + } + + size_t dotPos = path.find_last_of('.'); + if (dotPos == std::string::npos + || (foundSlash && (slashPos > dotPos))) + { + dotPos = path.size(); + } + + std::string bucketIdAsHex; + if (foundSlash) { + bucketIdAsHex.assign(path.begin() + slashPos + 1, + path.begin() + dotPos); + } else { + bucketIdAsHex.assign(path.begin(), + path.begin() + dotPos); + } + + char* endp; + uint64_t bucketId = strtoull(bucketIdAsHex.c_str(), &endp, 16); + if (*endp != '\0') { + return 0; + } + return bucketId; + } + + struct EnvironmentImpl : ThreadMetricProvider { + framework::defaultimplementation::ComponentRegisterImpl _compReg; + framework::Component _component; + framework::defaultimplementation::RealClock _clock; + framework::defaultimplementation::NoMemoryManager _memoryMan; + MemFilePersistenceMetrics _metrics; + MemFilePersistenceThreadMetrics* _threadMetrics; + std::unique_ptr<MemFileCache> _cache; + MemFileMapper _mapper; + DeviceManager _deviceManager; + document::DocumentType _docType; + DocumentTypeRepo::SP _repo; + vespa::config::storage::StorMemfilepersistenceConfigBuilder _memFileConfig; + vespa::config::content::PersistenceConfigBuilder _persistenceConfig; + vespa::config::storage::StorDevicesConfigBuilder _deviceConfig; + config::ConfigSet _configSet; + config::IConfigContext::SP _configContext; + std::unique_ptr<config::ConfigUri> _internalConfig; + std::unique_ptr<Environment> _env; + + EnvironmentImpl(config::ConfigUri& externalConfig, + const char* documentConfigId) + : _compReg(), + _component(_compReg, "dumpslotfile"), + _clock(), + _metrics(_component), + _threadMetrics(_metrics.addThreadMetrics()), + _cache(), + _mapper(*this), + _deviceManager(DeviceMapper::UP(new SimpleDeviceMapper), _clock), + _docType("foo", 1) + { + _compReg.setClock(_clock); + _compReg.setMemoryManager(_memoryMan); + _cache.reset(new MemFileCache(_compReg, _metrics._cache)); + LOG(debug, "Setting up document repo"); + if (documentConfigId == 0) { + _repo.reset(new DocumentTypeRepo(_docType)); + } else { + config::ConfigUri uri( + externalConfig.createWithNewId(documentConfigId)); + std::unique_ptr<document::DocumenttypesConfig> config( + ConfigGetter<DocumenttypesConfig>::getConfig( + uri.getConfigId(), uri.getContext())); + _repo.reset(new DocumentTypeRepo(*config)); + } + _deviceConfig.rootFolder = "."; + std::string configId("defaultId"); + _configSet.addBuilder(configId, &_memFileConfig); + _configSet.addBuilder(configId, &_persistenceConfig); + _configSet.addBuilder(configId, &_deviceConfig); + _configContext.reset(new config::ConfigContext(_configSet)); + _internalConfig.reset( + new config::ConfigUri(configId, _configContext)); + _env.reset(new Environment( + *_internalConfig, *_cache, _mapper, *_repo, _clock, true)); + } + + MemFilePersistenceThreadMetrics& getMetrics() const { + return *_threadMetrics; + } + + }; + +} + +int SlotFileDumper::dump(int argc, const char * const * argv, + config::ConfigUri& config, + std::ostream& out, std::ostream& err) +{ + cout = &out; + cerr = &err; + CmdOptions o(argc, argv); + try{ + o.parse(); + } catch (vespalib::InvalidCommandLineArgumentsException& e) { + if (!o.showSyntaxPage) { + err << e.getMessage() << "\n\n"; + o.writeSyntaxPage(err); + err << "\n"; + return 1; + } + } + if (o.showSyntaxPage) { + o.writeSyntaxPage(err); + err << "\n"; + return 0; + } + if (!o.toXml && (o.includeRemovedDocs || o.includeRemoveEntries)) { + err << "Options for what to include in XML makes no sense when " + "not printing XML content.\n\n"; + o.writeSyntaxPage(err); + err << "\n"; + return 1; + } + if (o.toBinary && o.timestampToShow == 0 && o.docId == "") { + err << "To binary option only works for a single document. " + "Use --time or --docid options.\n\n"; + o.writeSyntaxPage(err); + err << "\n"; + return 1; + } +// if (o.metaDataSort != "none" && o.metaDataSort != "bodypos") { +// err << "Illegal value for metadata sorting: '" << o.metaDataSort +// << "'. Legal values are:\n" +// << " none - Keep order on disk (currently timestamp)\n" +// << " bodypos - Reorder metadata by position of body\n" +// << " headerpos - Reorder metadata by position of header\n\n"; +// o.writeSyntaxPage(err); +// err << "\n"; +// return 1; +// } + + EnvironmentImpl env(config, o.toXml ? o.documentManConfigId.c_str() : ""); + + document::BucketId bucket(extractBucketId(o.filename)); + Directory::LP dir(env._deviceManager.getDirectory(o.filename, 0)); + FileSpecification fileSpec(bucket, *dir, o.filename); + + MemFile::LoadOptions opts; + opts.autoRepair = false; + MemFile memFile(fileSpec, *env._env, opts); + + if (!o.toXml && !o.toBinary) { + spi::BucketInfo info; + info = memFile.getBucketInfo(); + if (bucket.getRawId() == 0) { + out << "Failed to extract bucket id from filename\n"; + } else { + out << bucket << " (extracted from filename)\n"; + } + out << "Unique document count: " << info.getDocumentCount() + << "\nTotal document size: " + << info.getDocumentSize() << "\n"; + out << "Used size: " << info.getUsedSize() << "\n"; + out << "Entry count: " << info.getEntryCount() << "\n"; + +/* + SlotFile::MetaDataOrder order = SlotFile::DEFAULT; + if (o.metaDataSort == "bodypos") { + order = SlotFile::BODYPOS; + } else if (o.metaDataSort == "headerpos") { + order = SlotFile::HEADERPOS; + } +*/ + memFile.printState(out, o.userFriendlyOutput, o.printBody, + o.printHeader/*, order*/); + out << "\n"; + std::ostringstream ost; + uint16_t verifyFlags = 0; // May verify only header/body + if (env._mapper.verify(memFile, *env._env, ost, verifyFlags)) { + out << "Slotfile verified.\n"; + } else { + out << "Slotfile failed verification.\n"; + out << ost.str() << "\n"; + } + } else { + std::ostringstream ost; + uint16_t verifyFlags = 0; // May verify only header/body + if (!env._mapper.verify(memFile, *env._env, ost, verifyFlags)) { + out << "Slotfile failed verification.\n"; + out << ost.str() << "\n"; + return 1; + } + + if (o.toXml) { + out << "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"; + out << "<vespafeed>\n"; + } + if (o.docId != "") { + const MemSlot* slot( + memFile.getSlotWithId(document::DocumentId(o.docId))); + if (slot != 0 && !slot->deleted()) { + document::Document::UP doc(memFile.getDocument(*slot, + o.printBody ? + Types::ALL : Types::HEADER_ONLY)); + if (doc.get()) { + printDoc(*doc, o); + } else { + printFailure("No document with id " + o.docId + + " found."); + } + } else { + printFailure("No document with id " + o.docId + " found."); + } + } else { + uint32_t iteratorFlags = o.includeRemoveEntries ? + Types::ITERATE_REMOVED : 0; + if (!o.includeRemovedDocs) { + iteratorFlags |= Types::ITERATE_GID_UNIQUE; + } + for (MemFile::const_iterator it = memFile.begin(iteratorFlags); + it != memFile.end(); ++it) + { + if (o.timestampToShow == 0 + || (Types::Timestamp)o.timestampToShow + == it->getTimestamp()) + { + if (it->deleted() || it->deletedInPlace()) { + printFailure("Found remove entry"); + } else { + document::Document::UP doc(memFile.getDocument(*it, + o.printBody ? + Types::ALL : Types::HEADER_ONLY)); + if (doc.get()) { + printDoc(*doc, o); + } else { + printFailure("Unable to get document in " + + it->toString(true)); + } + } + } + } + } + if (o.toXml) { + out << "</vespafeed>\n"; + } + } + return 0; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h new file mode 100644 index 00000000000..698f3a5066b --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/fastos/fastos.h> +#include <iostream> + +namespace config { + class ConfigUri; +} + +namespace storage { +namespace memfile { + +struct SlotFileDumper { + static int dump(int argc, const char * const * argv, + config::ConfigUri& config, + std::ostream& out, std::ostream& err); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp new file mode 100644 index 00000000000..c5300f53571 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/config/subscription/configuri.h> +#include <vespa/memfilepersistence/tools/dumpslotfile.h> + +namespace { + +struct DumpSlotFileApp : public FastOS_Application { + int Main() { + try{ + config::ConfigUri config(""); + return storage::memfile::SlotFileDumper::dump( + _argc, _argv, config, std::cout, std::cerr); + } catch (std::exception& e) { + std::cerr << "Aborting due to exception:\n" << e.what() << "\n"; + return 1; + } + } +}; + +} // anonymous + +int main(int argc, char **argv) { + DumpSlotFileApp app; + return app.Entry(argc, argv); +} diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp new file mode 100644 index 00000000000..6fae206f15b --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/defaults.h> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/tools/vdsdisktool.h> + +LOG_SETUP(".vdsdisktool"); + +namespace { + struct DiskApp : public FastOS_Application { + int Main() { + try { + std::string dir = vespa::Defaults::vespaHome(); + dir.append("var/db/vespa/vds"); + return storage::memfile::VdsDiskTool::run( + _argc, _argv, dir.c_str(), + std::cout, std::cerr); + } catch (std::exception& e) { + std::cerr << "Application aborted with exception:\n" << e.what() + << "\n"; + return 1; + } + } + }; +} // anonymous + +int main(int argc, char **argv) { + DiskApp app; + return app.Entry(argc, argv); +} + diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp new file mode 100644 index 00000000000..2a3f1d58134 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp @@ -0,0 +1,518 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + + +#include <vespa/fastos/fastos.h> +#include <vespa/document/util/stringutil.h> +#include <vespa/fnet/frt/frt.h> +#include <iostream> +#include <vespa/log/log.h> +#include <vespa/memfilepersistence/device/mountpointlist.h> +#include <vespa/memfilepersistence/tools/vdsdisktool.h> +#include <vespa/storageframework/defaultimplementation/clock/realclock.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/util/programoptions.h> + +LOG_SETUP(".vdsdiskapp"); + +using std::vector; + +namespace storage { +namespace memfile { + +using vespalib::getLastErrorString; + +namespace { + + struct Sorter { + bool operator()(const std::pair<std::string, std::string>& first, + const std::pair<std::string, std::string>& second) + { return (first.first < second.first); } + }; + + /** + * Read pid from pid file. In case we want to extend pid file to contain + * more information later, accept multiple lines in file as long as pid is + * in first, and allow a pid: prefix to the pid. + */ + uint32_t readPid(const std::string& pidFile) { + vespalib::LazyFile lf(pidFile, vespalib::File::READONLY); + vector<char> data(32); + size_t read = lf.read(&data[0], 32, 0); + // If pid file has been extended to have more data, ignore it. + for (uint32_t i=0; i<32; ++i) { + if (data[i] == '\n') { + data[i] = '\0'; + read = i; + break; + } + } + // Allow a "pid:" prefix if it exists. + int start = 0; + if (strncmp("pid:", &data[0], 4) == 0) { + start = 4; + } + // Fail unless the first line was just a number with the pid + char* endp; + uint32_t pid = strtoull(&data[start], &endp, 10); + if (*endp != '\0' || read >= 32) { + throw vespalib::IllegalStateException( + "Unexpected content in pid file " + pidFile, + VESPA_STRLOC); + } + if (pid == 0) { + throw vespalib::IllegalStateException( + "Read pid 0 from pidfile which is illegal.", + VESPA_STRLOC); + } + return pid; + } +} + +struct CmdLineOptions : public vespalib::ProgramOptions { + std::ostream& _err; + std::string _rootpath; + bool _showSyntax; + std::string _cluster; + uint32_t _nodeIndex; + std::string _mode; + uint32_t _diskIndex; + std::string _message; + /* + std::string _slobrokConfigId; + std::string _slobrokConnectionSpec; + */ + + CmdLineOptions(int argc, const char * const * argv, + const std::string& rootpath, std::ostream& err) + : vespalib::ProgramOptions(argc, argv), + _err(err), + _rootpath(rootpath) + { + setSyntaxMessage( + "This tool is used to stop VDS from using a given partition " + "you no longer want it to use, or to reenable use of a partition " + "that previously have been disabled. Note that currently, this " + "requires a restart of the storage node, which this tool will " + "do automatically. Note that the tool must be run on the storage " + "node where you want to enable/disable a partition.\n\n" + "Examples:\n" + " vdsdisktool disable 2 \"Seeing a lot of smart warnings on this one\"\n" + " vdsdisktool -c mycluster -i 3 disable 0 \"Shouldn't have put this on OS drive\"\n" + " vdsdisktool enable 2\n" + ); + addOption("h help", _showSyntax, false, + "Show this help page."); + addOption("c cluster", _cluster, std::string(""), + "Which cluster the storage node whose disks should be " + "adjusted. If only data from one cluster is detected " + "on the node, this does not have to be specified"); + addOption("i index", _nodeIndex, uint32_t(0xffffffff), + "The node index of the storage node whose disks should be " + "adjusted. If only data from one storage node is detected " + "on the node, this does not have to be specified"); + addArgument("Mode", _mode, + "There are three modes. They are status, enable and disable" + ". The status mode is used to just query current disk " + "status without. The enable and disable modes will enable " + "or disable a disk."); + addArgument("Disk Index", _diskIndex, uint32_t(0xffffffff), + "The disk index which you want to enable/disable. Not " + "specified in status mode, but required otherwise."); + addArgument("Reason", _message, std::string(""), + "Give a reason for why we're enabling or disabling a disk. " + "Required when disabling a disk, such that other " + "administrators can see why it has happened."); + } + + vector<std::string> listDir(const std::string& dir) { + DIR* dirp = opendir(dir.c_str()); + struct dirent* entry; + vector<std::string> result; + if (dirp) while ((entry = readdir(dirp))) { + if (entry == 0) { + std::ostringstream ost; + ost << "Failed to read directory '" << dir << "', errno " + << errno << ": " << getLastErrorString() << "\n"; + int tmp = closedir(dirp); + assert(tmp == 0); + (void) tmp; + throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); + } + std::string name(reinterpret_cast<char*>(&entry->d_name)); + assert(name.size() > 0); + if (name[0] == '.') continue; + result.push_back(name); + } + int tmp = closedir(dirp); + assert(tmp == 0); + (void) tmp; + return result; + } + + std::set<std::string> detectPossibleClusters() { + if (!vespalib::fileExists(_rootpath)) { + throw vespalib::IllegalStateException( + "No VDS installations found at all in " + _rootpath, + VESPA_STRLOC); + } + vector<std::string> files(listDir(_rootpath)); + std::set<std::string> result(files.begin(), files.end()); + return result; + } + + std::set<uint16_t> + detectPossibleNodeIndexes(const std::string& cluster) + { + std::string dir = _rootpath + "/" + cluster + "/storage"; + if (!vespalib::fileExists(dir)) { + throw vespalib::IllegalStateException( + "No VDS installations found at all in " + dir, + VESPA_STRLOC); + } + vector<std::string> files(listDir(dir)); + std::set<uint16_t> result; + for (uint32_t i=0; i<files.size(); ++i) { + char* endp; + uint64_t index = strtoull(files[i].c_str(), &endp, 10); + if (*endp != '\0' || index > 0xffff) { + _err << "Found strange file in directory supposed to " + << "contain node indexes: '" << files[i] << "'.\n"; + } else { + result.insert(index); + } + } + return result; + } + + bool validate() { + // Validate that cluster was in fact found. Uses storage disk + // directories to scan for legal targets. + LOG(debug, "Detecting clusters"); + std::set<std::string> clusters(detectPossibleClusters()); + if (clusters.size() == 0) { + _err << "No VDS clusters at all detected on this node.\n"; + return false; + } + bool clusterFound = false; + if (_cluster != "") { + if (clusters.find(_cluster) == clusters.end()) { + _err << "No cluster named '" << _cluster + << "' found.\n"; + } else { + clusterFound = true; + } + } else if (clusters.size() != 1u) { + _err << "Cluster must be specified as there are multiple " + "targets.\n"; + } else { + _cluster = *clusters.begin(); + clusterFound = true; + } + if (!clusterFound) { + _err << "Detected cluster names on local node:\n"; + for (std::set<std::string>::const_iterator it = clusters.begin(); + it != clusters.end(); ++it) + { + _err << " " << *it << "\n"; + } + return false; + } + // Validate that node index was in fact found. Uses storage disk + // directories to scan for legal targets. + LOG(debug, "Detecting node indexes"); + std::set<uint16_t> nodeIndexes( + detectPossibleNodeIndexes(_cluster)); + if (nodeIndexes.size() == 0) { + _err << "No node indexes at all detected on this node in " + "cluster '" << _cluster << ".\n"; + return false; + } + bool indexFound = false; + if (_nodeIndex != uint32_t(0xffffffff)) { + if (_nodeIndex > 0xffff) { + _err << "Illegal node index " << _nodeIndex + << " specified. Nodes must be in the range of " + << "0-65535.\n"; + return false; + } + if (nodeIndexes.find(_nodeIndex) == nodeIndexes.end()) { + _err << "No node with index " << _nodeIndex + << " found in cluster '" << _cluster + << "'.\n"; + } else { + indexFound = true; + } + } else if (nodeIndexes.size() != 1u) { + _err << "Node index must be specified as there are multiple " + "targets.\n"; + } else { + _nodeIndex = *nodeIndexes.begin(); + indexFound = true; + } + if (!indexFound) { + _err << "Detected node indexes on local node in cluster '" + << _cluster << "':\n"; + for (std::set<uint16_t>::const_iterator it = nodeIndexes.begin(); + it != nodeIndexes.end(); ++it) + { + _err << " " << *it << "\n"; + } + return false; + } + // Validate modes + if (_mode != "enable" && _mode != "disable" && _mode != "status") { + _err << "Illegal mode '" << _mode << "'.\n"; + return false; + } + // Warn if senseless options are given in status mode + if (_mode == "status" && (_diskIndex != 0xffffffff || _message != "")) { + _err << "Warning: Disk index and/or reason makes no sense in " + << "status mode.\n"; + } + if ((_mode == "enable" || _mode == "disable") + && _diskIndex == 0xffffffff) + { + _err << "A disk index must be given to specify which disk to " + << _mode << ".\n"; + return false; + } + if (_mode == "disable" && _message == "") { + _err << "A reason must be given for why you are disabling the " + "disk.\n"; + return false; + } + if (_mode == "enable" || _mode == "disable") { + std::ostringstream dir; + dir << _rootpath << "/" << _cluster << "/storage/" << _nodeIndex + << "/disks/d" << _diskIndex; + if (!vespalib::fileExists(dir.str())) { + _err << "Cannot " << _mode << " missing disk " + << _diskIndex << ". No disk detected at " + << dir.str() << "\n"; + return false; + } + } + return true; + } + + vector<uint16_t> getNodeIndexes() { + vector<uint16_t> indexes; + indexes.push_back(_nodeIndex); + return indexes; + } + + std::string getNodePath(uint16_t nodeIndex) { + std::ostringstream ost; + ost << _rootpath << "/" << _cluster << "/storage/" << nodeIndex; + return ost.str(); + } + + std::string getPidFile(uint16_t nodeIndex) { + return getNodePath(nodeIndex) + "/pidfile"; + } + +}; + + +int +VdsDiskTool::run(int argc, const char * const * argv, + const std::string& rootPath, + std::ostream& out, std::ostream& err) +{ + CmdLineOptions options(argc, argv, rootPath, err); + try{ + LOG(debug, "Parsing command line options"); + options.parse(); + } catch (vespalib::InvalidCommandLineArgumentsException& e) { + LOG(debug, "Failed parsing command line options"); + if (!options._showSyntax) { + err << e.getMessage() << "\n"; + options.writeSyntaxPage(err, false); + err << "\n"; + return 1; + } + } + if (options._showSyntax) { + options.writeSyntaxPage(err, false); + err << "\n"; + return 0; + } + LOG(debug, "Validating options"); + if (!options.validate()) { + LOG(debug, "Options failed validation"); + options.writeSyntaxPage(err, false); + return 1; + } + LOG(debug, "Iterate over all nodes to operate on"); + // Iterate over all node indexes to operate on. + for (uint32_t indexIterator = 0; + indexIterator < options.getNodeIndexes().size(); ++indexIterator) + { + uint16_t nodeIndex = options.getNodeIndexes()[indexIterator]; + std::string pidFile = options.getPidFile(nodeIndex); + + // Read pid if process is running + uint32_t pid = 0; + try{ + if (vespalib::fileExists(pidFile)) { + pid = readPid(pidFile); + if (kill(pid, 0) != 0) { + err << "Failed to signal process with pid " + << pid << " (" << errno << "): " + << getLastErrorString() << ". If storage node is " + << "running it needs to be manually restarted" + << " before changes take effect.\n"; + } else if (options._mode == "status") { + out << "Storage node " << nodeIndex + << " in cluster " << options._cluster + << " is running with pid " << pid << ".\n"; + } + } + } catch (vespalib::IoException& e) { + err << "Failed to read pid file: " << e.getMessage() + << "\n"; + if (options._mode != "status") { + err << "Not restarting storage node after changes.\n"; + } + } + framework::defaultimplementation::RealClock clock; + // Read the disk status file. + DeviceManager::LP devMan(new DeviceManager( + DeviceMapper::UP(new SimpleDeviceMapper), + clock)); + MountPointList mountPointList(options.getNodePath(nodeIndex), + vector<vespalib::string>(), + devMan); + mountPointList.scanForDisks(); + if (options._mode == "enable" || options._mode == "disable") { + if (mountPointList.getSize() <= options._diskIndex + || mountPointList[options._diskIndex].getState() + == Device::NOT_FOUND) + { + err << "Disk " << options._diskIndex << " on node " + << nodeIndex << " in cluster " + << options._cluster << " does not exist. " + << "Cannot enable or disable a non-existing " + << "disk.\n"; + return 1; + } + if (mountPointList[options._diskIndex].getState() + != Device::OK) + { + err << "Disk " << options._diskIndex << " on node " + << nodeIndex << " in cluster " + << options._cluster << " fails pre-initialize " + << "routine. Cannot enable or disable disk with " + << "such a problem: " + << mountPointList[options._diskIndex] << "\n"; + return 1; + } + } + vector<Device::State> preFileStates( + mountPointList.getSize()); + for (uint32_t i=0; i<mountPointList.getSize(); ++i) { + preFileStates[i] = mountPointList[i].getState(); + } + mountPointList.readFromFile(); + if (options._mode == "enable") { + Directory& dir(mountPointList[options._diskIndex]); + if (dir.getState() == Device::OK) { + out << "Disk " << options._diskIndex << " on node " + << nodeIndex << " in cluster " + << options._cluster << " is already enabled. " + << "Nothing to do.\n"; + continue; + } + // Shouldn't be null when state is not OK + assert(dir.getLastEvent() != 0); + IOEvent oldEvent(*dir.getLastEvent()); + dir.clearEvents(); + dir.getPartition().clearEvents(); + dir.getPartition().getDisk().clearEvents(); + if (preFileStates[options._diskIndex] != Device::OK) { + out << "Cannot enable disk " << options._diskIndex + << " on node " << nodeIndex << " in cluster " + << options._cluster << ", as it has a failure " + << "that must be fixed by an admin.\n"; + if (preFileStates[options._diskIndex] + != oldEvent.getState()) + { + out << "Clearing any stored state such that the " + << "disk will work once admin fixes\n" + << "the current error.\n"; + } + } else { + out << "Reactivating disk " << options._diskIndex + << " on node " << nodeIndex << " in cluster " + << options._cluster << ". Removed stored event: " + << oldEvent << "\n"; + } + } else if (options._mode == "disable") { + Directory& dir(mountPointList[options._diskIndex]); + if (dir.getState() != Device::OK) { + // Shouldn't be null when state is not OK + assert(dir.getLastEvent() != 0); + IOEvent oldEvent(*dir.getLastEvent()); + out << "Disk " << options._diskIndex << " on node " + << nodeIndex << " in cluster " + << options._cluster << " is already disabled. " + << "Overriding old event: " << oldEvent << "\n"; + } + dir.clearEvents(); + dir.getPartition().clearEvents(); + dir.getPartition().getDisk().clearEvents(); + IOEvent newEvent(clock.getTimeInSeconds().getTime(), + Device::DISABLED_BY_ADMIN, + options._message, "vdsdisktool"); + dir.addEvent(newEvent); + out << "Deactivated disk " << options._diskIndex + << " on node " << nodeIndex << " in cluster " + << options._cluster << ". Added event: " + << newEvent << "\n"; + } else if (options._mode == "status") { + out << "Disks on storage node " << nodeIndex + << " in cluster " << options._cluster << ":\n"; + if (mountPointList.getSize() == 0) { + out << " No disks at all are set up.\n"; + } + for (uint32_t i=0; i<mountPointList.getSize(); ++i) { + out << " Disk " << i << ": "; + Directory& dir(mountPointList[i]); + if (dir.isOk()) { + out << "OK\n"; + } else { + const IOEvent* event(dir.getLastEvent()); + assert(event != 0); // If so disk is ok + out << Device::getStateString( + event->getState()) + << " - " << event->getDescription() << "\n"; + } + } + } + if (options._mode == "enable" || options._mode == "disable") { + out << "Writing disk status file to disk\n"; + mountPointList.writeToFile(); + if (pid != 0) { + out << "Killing node such that it reads new data\n"; + int result = kill(pid, SIGTERM); + if (result != 0) { + if (errno == EINVAL) { + err << "Signal SIGTERM not recognized.\n"; + } else if (errno == EPERM) { + err << "No permission to send kill signal to " + "storage process\n"; + } else if (errno == ESRCH) { + err << "No process or process group found " + "using pid " << pid << "\n"; + } + } + } + out << "Done\n"; + continue; + } + } + return 0; +} + +} // memfile +} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h new file mode 100644 index 00000000000..f764db274ce --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/fastos/fastos.h> +#include <iostream> + +namespace storage { +namespace memfile { + +struct VdsDiskTool { + static int run(int argc, const char * const * argv, + const std::string& rootPath, + std::ostream& out, std::ostream& err); +}; + +} // memfile +} // storage + diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl new file mode 100644 index 00000000000..7d7afcbc9d7 --- /dev/null +++ b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +use strict; + +# Simple wrapper for executing vdsdisktool-bin + +my $args = &getArgs(); +&run("vdsdisktool-bin $args"); + +exit(0); + +sub isHelpRequest { + foreach my $arg (@ARGV) { + if ($arg eq '-h' || $arg eq '--help') { + return 1; + } + } + return 0; +} + +sub getArgs { + my @args; + foreach my $arg (@ARGV) { + $arg =~ s/([ \t\f])/\\$1/g; + push @args, $arg; + } + return join(' ', @args); +} + +sub isDebugRun { + foreach my $arg (@ARGV) { + if ($arg eq '--debug-perl-wrapper') { + return 1; + } + } + return 0; +} + +sub run { + my ($cmd) = @_; + if (&isDebugRun()) { + print "Debug: Would have executed '$cmd'.\n"; + } else { + exec($cmd); + } +} |