diff options
Diffstat (limited to 'memfilepersistence')
172 files changed, 0 insertions, 26982 deletions
diff --git a/memfilepersistence/.gitignore b/memfilepersistence/.gitignore deleted file mode 100644 index f3c7a7c5da6..00000000000 --- a/memfilepersistence/.gitignore +++ /dev/null @@ -1 +0,0 @@ -Makefile diff --git a/memfilepersistence/CMakeLists.txt b/memfilepersistence/CMakeLists.txt deleted file mode 100644 index a1fcd0028ac..00000000000 --- a/memfilepersistence/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_define_module( - DEPENDS - vespadefaults - fastos - vespalog - vespalib - document - config_cloudconfig - configdefinitions - vdslib - persistence - storageframework - - LIBS - src/vespa/memfilepersistence - src/vespa/memfilepersistence/common - src/vespa/memfilepersistence/device - src/vespa/memfilepersistence/init - src/vespa/memfilepersistence/mapper - src/vespa/memfilepersistence/memfile - src/vespa/memfilepersistence/spi - src/vespa/memfilepersistence/tools - - TEST_DEPENDS - persistence_persistence_conformancetest - vdstestlib - - TESTS - src/tests - src/tests/conformance - src/tests/device - src/tests/init - src/tests/helper - src/tests/spi - src/tests/tools -) diff --git a/memfilepersistence/OWNERS b/memfilepersistence/OWNERS deleted file mode 100644 index dbcff24b338..00000000000 --- a/memfilepersistence/OWNERS +++ /dev/null @@ -1 +0,0 @@ -vekterli diff --git a/memfilepersistence/README b/memfilepersistence/README deleted file mode 100644 index 38a0e92febb..00000000000 --- a/memfilepersistence/README +++ /dev/null @@ -1,4 +0,0 @@ -This module contains Storage's persistence SPI implementation. - -It uses memory representation of files, which currently are persisted in -slotfiles on disk. diff --git a/memfilepersistence/src/.gitignore b/memfilepersistence/src/.gitignore deleted file mode 100644 index d6d89678e22..00000000000 --- a/memfilepersistence/src/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/Makefile.ini -/config_command.sh -/project.dsw -/memfilepersistence.mak diff --git a/memfilepersistence/src/Doxyfile b/memfilepersistence/src/Doxyfile deleted file mode 100644 index 671dc341e73..00000000000 --- a/memfilepersistence/src/Doxyfile +++ /dev/null @@ -1,994 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -# Doxyfile 1.2.18 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# General configuration options -#--------------------------------------------------------------------------- - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = Storage - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = ../doc - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, -# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en -# (Japanese with english messages), Korean, Norwegian, Polish, Portuguese, -# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish and Ukrainian. - -OUTPUT_LANGUAGE = English - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = NO - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = YES - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these class will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited -# members of a class in the documentation of that class as if those members were -# ordinary class members. Constructors, destructors and assignment operators of -# the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = NO - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. It is allowed to use relative paths in the argument list. - -STRIP_FROM_PATH = - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower case letters. If set to YES upper case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# users are adviced to set this option to NO. - -CASE_SENSE_NAMES = YES - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = NO - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = YES - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = YES - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like the Qt-style comments (thus requiring an -# explict @brief command for a brief description. - -JAVADOC_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the DETAILS_AT_TOP tag is set to YES then Doxygen -# will output the detailed description near the top, like JavaDoc. -# If set to NO, the detailed description appears after the member -# documentation. - -DETAILS_AT_TOP = NO - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# reimplements. - -INHERIT_DOCS = YES - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 4 - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consist of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. -# For instance some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = YES - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources -# only. Doxygen will then generate output that is more tailored for Java. -# For instance namespaces will be presented as packages, qualified scopes -# will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = YES - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = storage - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp -# *.h++ *.idl *.odl - -FILE_PATTERNS = *.h *.cpp - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories -# that are symbolic links (a Unix filesystem feature) are excluded from the input. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. - -EXCLUDE_PATTERNS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command <filter> <input-file>, where <filter> -# is the value of the INPUT_FILTER tag, and <input-file> is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. - -INPUT_FILTER = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# If the REFERENCED_BY_RELATION tag is set to YES (the default) -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = YES - -# If the REFERENCES_RELATION tag is set to YES (the default) -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = YES - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = NO - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet - -HTML_STYLESHEET = ../cpp/vespa_link.css - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output dir. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non empty doxygen will try to run -# the html help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the Html help documentation and to the tree view. - -TOC_EXPAND = NO - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be -# generated containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript and frames is required (for instance Mozilla, Netscape 4.0+, -# or Internet explorer 4.0+). Note that for large projects the tree generation -# can take a very long time. In such cases it is better to disable this feature. -# Windows users are probably better off using the HTML help feature. - -GENERATE_TREEVIEW = NO - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = YES - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be invoked. If left blank `latex' will be used as the default command name. - -LATEX_CMD_NAME = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = makeindex - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = a4wide - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = NO - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = YES - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimised for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assigments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_XML = NO - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_PREDEFINED tags. - -EXPAND_ONLY_PREDEF = NO - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. - -PREDEFINED = - -# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse the -# parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::addtions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES tag can be used to specify one or more tagfiles. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in Html, RTF and LaTeX) for classes with base or -# super classes. Setting the tag to NO turns the diagrams off. Note that this -# option is superceded by the HAVE_DOT option below. This is only a fallback. It is -# recommended to install and use dot, since it yield more powerful graphs. - -CLASS_DIAGRAMS = YES - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = NO - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found on the path. - -DOT_PATH = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width -# (in pixels) of the graphs generated by dot. If a graph becomes larger than -# this value, doxygen will try to truncate the graph, so that it fits within -# the specified constraint. Beware that most browsers cannot cope with very -# large images. - -MAX_DOT_GRAPH_WIDTH = 1024 - -# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height -# (in pixels) of the graphs generated by dot. If a graph becomes larger than -# this value, doxygen will try to truncate the graph, so that it fits within -# the specified constraint. Beware that most browsers cannot cope with very -# large images. - -MAX_DOT_GRAPH_HEIGHT = 1024 - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermedate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES - -#--------------------------------------------------------------------------- -# Configuration::addtions related to the search engine -#--------------------------------------------------------------------------- - -# The SEARCHENGINE tag specifies whether or not a search engine should be -# used. If set to NO the values of all tags below this one will be ignored. - -SEARCHENGINE = NO - -# The CGI_NAME tag should be the name of the CGI script that -# starts the search engine (doxysearch) with the correct parameters. -# A script with this name will be generated by doxygen. - -CGI_NAME = search.cgi - -# The CGI_URL tag should be the absolute URL to the directory where the -# cgi binaries are located. See the documentation of your http daemon for -# details. - -CGI_URL = - -# The DOC_URL tag should be the absolute URL to the directory where the -# documentation is located. If left blank the absolute path to the -# documentation, with file:// prepended to it, will be used. - -DOC_URL = - -# The DOC_ABSPATH tag should be the absolute path to the directory where the -# documentation is located. If left blank the directory on the local machine -# will be used. - -DOC_ABSPATH = - -# The BIN_ABSPATH tag must point to the directory where the doxysearch binary -# is installed. - -BIN_ABSPATH = /usr/local/bin/ - -# The EXT_DOC_PATHS tag can be used to specify one or more paths to -# documentation generated for other projects. This allows doxysearch to search -# the documentation for these projects as well. - -EXT_DOC_PATHS = diff --git a/memfilepersistence/src/tests/.gitignore b/memfilepersistence/src/tests/.gitignore deleted file mode 100644 index b8a959a31c5..00000000000 --- a/memfilepersistence/src/tests/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -/.depend -/Makefile -/dirconfig.tmp -/test.vlog -/testfile.0 -/testrunner -/vdsroot -memfilepersistence_testrunner_app diff --git a/memfilepersistence/src/tests/CMakeLists.txt b/memfilepersistence/src/tests/CMakeLists.txt deleted file mode 100644 index 1ffe7f785e9..00000000000 --- a/memfilepersistence/src/tests/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(memfilepersistence_testrunner_app TEST - SOURCES - testrunner.cpp - DEPENDS - memfilepersistence_testconformance - memfilepersistence_testdevices - memfilepersistence_testinit - memfilepersistence_testspi - memfilepersistence_testtools -) - -# TODO: Test with a larger chunk size to parallelize test suite runs -vespa_add_test( - NAME memfilepersistence_testrunner_app - COMMAND memfilepersistence_testrunner_app -) diff --git a/memfilepersistence/src/tests/conformance/.gitignore b/memfilepersistence/src/tests/conformance/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/tests/conformance/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/tests/conformance/CMakeLists.txt b/memfilepersistence/src/tests/conformance/CMakeLists.txt deleted file mode 100644 index 077174191c9..00000000000 --- a/memfilepersistence/src/tests/conformance/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_testconformance - SOURCES - memfileconformancetest.cpp - DEPENDS -) diff --git a/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp b/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp deleted file mode 100644 index 1bd9e2f4e9c..00000000000 --- a/memfilepersistence/src/tests/conformance/memfileconformancetest.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/spi/memfilepersistence.h> -#include <vespa/persistence/conformancetest/conformancetest.h> - -#include <vespa/log/log.h> -LOG_SETUP(".test.conformance"); - -using namespace storage::spi; - -namespace storage { -namespace memfile { - - /* -struct MemFileConformanceTest : public ConformanceTest { - struct Factory : public PersistenceFactory { - - PersistenceSPI::UP getPersistenceImplementation() { - return PersistenceSPI::UP(new MemFilePersistence); - } - }; - - MemFileConformanceTest() - : ConformanceTest(PersistenceFactory::UP(new Factory)) {} - - CPPUNIT_TEST_SUITE(MemFileConformanceTest); - DEFINE_CONFORMANCE_TESTS(); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(MemFileConformanceTest); -*/ - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/device/.gitignore b/memfilepersistence/src/tests/device/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/tests/device/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/tests/device/CMakeLists.txt b/memfilepersistence/src/tests/device/CMakeLists.txt deleted file mode 100644 index 3b7884772b6..00000000000 --- a/memfilepersistence/src/tests/device/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_testdevices - SOURCES - mountpointlisttest.cpp - devicemanagertest.cpp - devicestest.cpp - devicemappertest.cpp - partitionmonitortest.cpp - DEPENDS - memfilepersistence -) diff --git a/memfilepersistence/src/tests/device/devicemanagertest.cpp b/memfilepersistence/src/tests/device/devicemanagertest.cpp deleted file mode 100644 index 0d0b4ad1de3..00000000000 --- a/memfilepersistence/src/tests/device/devicemanagertest.cpp +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/device/devicemanager.h> -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/vespalib/util/exception.h> -#include <vespa/vespalib/util/stringfmt.h> -#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> - -namespace storage { - -namespace memfile { - -class DeviceManagerTest : public CppUnit::TestFixture { - CPPUNIT_TEST_SUITE(DeviceManagerTest); - CPPUNIT_TEST(testEventClass); - CPPUNIT_TEST(testEventSending); - CPPUNIT_TEST(testXml); - CPPUNIT_TEST_SUITE_END(); - -public: - void testEventClass(); - void testEventSending(); - void testXml(); - - framework::defaultimplementation::FakeClock _clock; -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(DeviceManagerTest); - -void DeviceManagerTest::testEventClass() -{ - // Test that creation various IO events through common errno errors - // generates understandable errors. - { - IOEvent e(IOEvent::createEventFromErrno(1, ENOTDIR, "/mypath")); - CPPUNIT_ASSERT_EQUAL( - std::string("IOEvent(PATH_FAILURE, Not a directory: /mypath, time 1)"), - e.toString(true)); - CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, e.getState()); - } - { - IOEvent e(IOEvent::createEventFromErrno(2, EACCES, "/mypath")); - CPPUNIT_ASSERT_EQUAL( - std::string("IOEvent(NO_PERMISSION, Permission denied: /mypath, time 2)"), - e.toString(true)); - CPPUNIT_ASSERT_EQUAL(Device::NO_PERMISSION, e.getState()); - } - { - IOEvent e(IOEvent::createEventFromErrno(3, EIO, "/mypath")); - CPPUNIT_ASSERT_EQUAL( - std::string("IOEvent(IO_FAILURE, Input/output error: /mypath, time 3)"), - e.toString(true)); - CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, e.getState()); - } - { - IOEvent e( - IOEvent::createEventFromErrno(4, EBADF, "/mypath", VESPA_STRLOC)); - CPPUNIT_ASSERT_PREFIX( - std::string("IOEvent(INTERNAL_FAILURE, Bad file descriptor: /mypath" - ", testEventClass in"), - e.toString(true)); - CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, e.getState()); - } -} - -namespace { - - struct Listener : public IOEventListener { - std::ostringstream ost; - - Listener() : ost() { ost << "\n"; } - ~Listener() {} - - void handleDirectoryEvent(Directory& dir, const IOEvent& e) override { - ost << "Dir " << dir.getPath() << ": " << e.toString(true) << "\n"; - } - void handlePartitionEvent(Partition& part, const IOEvent& e) override { - ost << "Partition " << part.getMountPoint() << ": " << e.toString(true) << "\n"; - } - void handleDiskEvent(Disk& disk, const IOEvent& e) override { - ost << "Disk " << disk.getId() << ": " << e.toString(true) << "\n"; - } - }; - -} - -void DeviceManagerTest::testEventSending() -{ - // Test that adding events to directories in the manager actually sends - // these events on to listeners. - DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); - Listener l; - manager.addIOEventListener(l); - Directory::SP dir(manager.getDirectory("/home/foo/var", 0)); - // IO failures are disk events. Will mark all partitions and - // directories on that disk bad - dir->addEvent(IOEvent::createEventFromErrno(1, EIO, "/home/foo/var/foo")); - dir->addEvent(IOEvent::createEventFromErrno(2, EBADF, "/home/foo/var/bar")); - dir->addEvent(IOEvent::createEventFromErrno(3, EACCES, "/home/foo/var/car")); - dir->addEvent(IOEvent::createEventFromErrno(4, EISDIR, "/home/foo/var/var")); - std::string expected("\n" - "Disk 1: IOEvent(IO_FAILURE, Input/output error: " - "/home/foo/var/foo, time 1)\n" - "Dir /home/foo/var: IOEvent(INTERNAL_FAILURE, Bad file " - "descriptor: /home/foo/var/bar, time 2)\n" - "Dir /home/foo/var: IOEvent(NO_PERMISSION, Permission denied: " - "/home/foo/var/car, time 3)\n" - "Dir /home/foo/var: IOEvent(PATH_FAILURE, Is a directory: " - "/home/foo/var/var, time 4)\n" - ); - CPPUNIT_ASSERT_EQUAL(expected, l.ost.str()); -} - -void DeviceManagerTest::testXml() -{ - DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); - Directory::SP dir(manager.getDirectory("/home/", 0)); - dir->getPartition().initializeMonitor(); - std::string xml = manager.toXml(" "); - CPPUNIT_ASSERT_MSG(xml, - xml.find("<partitionmonitor>") != std::string::npos); -} - -} - -} diff --git a/memfilepersistence/src/tests/device/devicemappertest.cpp b/memfilepersistence/src/tests/device/devicemappertest.cpp deleted file mode 100644 index a0568268b56..00000000000 --- a/memfilepersistence/src/tests/device/devicemappertest.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/device/devicemapper.h> -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/vespalib/util/exceptions.h> - -namespace storage { - -namespace memfile { - -class DeviceMapperTest : public CppUnit::TestFixture { - CPPUNIT_TEST_SUITE(DeviceMapperTest); - CPPUNIT_TEST(testSimpleDeviceMapper); - CPPUNIT_TEST(testAdvancedDeviceMapper); - CPPUNIT_TEST_SUITE_END(); - -public: - void testSimpleDeviceMapper(); - void testAdvancedDeviceMapper(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(DeviceMapperTest); - -void DeviceMapperTest::testSimpleDeviceMapper() -{ - SimpleDeviceMapper mapper; - CPPUNIT_ASSERT_EQUAL(uint64_t(1), mapper.getDeviceId("whatever&�")); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), mapper.getDeviceId("whatever&�")); - CPPUNIT_ASSERT_EQUAL(uint64_t(2), mapper.getDeviceId("whatnot")); - std::string expected("Whatever& �=)/%#)="); - CPPUNIT_ASSERT_EQUAL(expected, mapper.getMountPoint(expected)); -} - -void DeviceMapperTest::testAdvancedDeviceMapper() -{ - AdvancedDeviceMapper mapper; - try{ - mapper.getDeviceId("/doesnotexist"); - CPPUNIT_FAIL("Expected exception"); - } catch (vespalib::Exception& e) { - std::string what(e.what()); - CPPUNIT_ASSERT_CONTAIN( - "Failed to run stat to find data on file /doesnotexist", what); - } -} - -} - -} // storage diff --git a/memfilepersistence/src/tests/device/devicestest.cpp b/memfilepersistence/src/tests/device/devicestest.cpp deleted file mode 100644 index bb6ad3ee6a2..00000000000 --- a/memfilepersistence/src/tests/device/devicestest.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/device/devicemanager.h> -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/vespalib/util/exceptions.h> -#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> - -namespace storage { - -namespace memfile { - -class DevicesTest : public CppUnit::TestFixture { - CPPUNIT_TEST_SUITE(DevicesTest); - CPPUNIT_TEST(testDisk); - CPPUNIT_TEST(testPartition); - CPPUNIT_TEST(testDirectory); - CPPUNIT_TEST_SUITE_END(); - -public: - void testDisk(); - void testPartition(); - void testDirectory(); - - framework::defaultimplementation::FakeClock _clock; -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(DevicesTest); - -void DevicesTest::testDisk() -{ - DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); - Disk::SP disk1(manager.getDisk("/something/on/disk")); - Disk::SP disk2(manager.getDisk("/something/on/disk")); - CPPUNIT_ASSERT_EQUAL(disk1->getId(), disk2->getId()); - CPPUNIT_ASSERT_EQUAL(disk1.get(), disk2.get()); - Disk::SP disk3(manager.getDisk("/something/on/disk2")); - CPPUNIT_ASSERT(disk2->getId() != disk3->getId()); - disk3->toString(); // Add code coverage -} - -void DevicesTest::testPartition() -{ - DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); - Partition::SP part(manager.getPartition("/etc")); - CPPUNIT_ASSERT_EQUAL(std::string("/etc"), part->getMountPoint()); - part->toString(); // Add code coverage -} - -void DevicesTest::testDirectory() -{ - DeviceManager manager(DeviceMapper::UP(new SimpleDeviceMapper), _clock); - Directory::SP dir1(manager.getDirectory("/on/disk", 0)); - CPPUNIT_ASSERT_EQUAL(std::string("/on/disk"), dir1->getPath()); - CPPUNIT_ASSERT(dir1->getLastEvent() == 0); - CPPUNIT_ASSERT_EQUAL(Device::OK, dir1->getState()); - CPPUNIT_ASSERT(dir1->isOk()); - CPPUNIT_ASSERT_EQUAL(std::string("/on/disk 0"), dir1->toString()); - - dir1->addEvent(Device::IO_FAILURE, "Ouch", ""); - CPPUNIT_ASSERT(!dir1->isOk()); - CPPUNIT_ASSERT(dir1->getLastEvent() != 0); - CPPUNIT_ASSERT_EQUAL(std::string("/on/disk 5 0 Ouch"), dir1->toString()); - dir1->toString(); // Add code coverage -} - -} - -} // storage diff --git a/memfilepersistence/src/tests/device/mountpointlisttest.cpp b/memfilepersistence/src/tests/device/mountpointlisttest.cpp deleted file mode 100644 index 56b59926dff..00000000000 --- a/memfilepersistence/src/tests/device/mountpointlisttest.cpp +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/device/mountpointlist.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> -#include <vespa/vdstestlib/cppunit/macros.h> - -using vespalib::fileExists; -using vespalib::isDirectory; -using vespalib::isSymLink; -using vespalib::readLink; - -namespace storage { - -namespace memfile { - -class MountPointList_Test : public CppUnit::TestFixture { - CPPUNIT_TEST_SUITE(MountPointList_Test); - CPPUNIT_TEST(testScanning); - CPPUNIT_TEST(testStatusFile); - CPPUNIT_TEST(testInitDisks); - CPPUNIT_TEST_SUITE_END(); - - static const std::string _prefix; - -public: - void testScanning(); - void testStatusFile(); - void testInitDisks(); - - void init(); - void tearDown() override; - - framework::defaultimplementation::FakeClock _clock; - -private: - DeviceManager::UP newDeviceManager() { - return DeviceManager::UP( - new DeviceManager( - DeviceMapper::UP(new SimpleDeviceMapper), - _clock)); - } -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(MountPointList_Test); - -const std::string MountPointList_Test::_prefix("./vdsroot"); - -namespace { - void run(const std::string& cmd) { - CPPUNIT_ASSERT_MESSAGE(cmd, system(cmd.c_str()) == 0); - } -} - -void MountPointList_Test::init() -{ - tearDown(); - run("rm -rf "+_prefix); - run("mkdir -p "+_prefix+"/disks"); - - run("mkdir "+_prefix+"/disks/d0"); // Regular dir - // disks/d1 intentionally missing - run("mkdir "+_prefix+"/disks/D2"); // Wrongly named dir - run("mkdir "+_prefix+"/disks/d3"); // Regular non-empty dir - run("touch "+_prefix+"/disks/d3/foo"); - run("touch "+_prefix+"/disks/d4"); // Not a dir - run("ln -s D2 "+_prefix+"/disks/d5"); // Symlink to dir - run("ln -s d4 "+_prefix+"/disks/d6"); // Symlink to file -} - -void MountPointList_Test::tearDown() {} - -void MountPointList_Test::testScanning() -{ - init(); - MountPointList list(_prefix, - std::vector<vespalib::string>(), - DeviceManager::UP( - new DeviceManager( - DeviceMapper::UP(new SimpleDeviceMapper), - _clock))); - list.scanForDisks(); - - // Check that we got the expected entries. - CPPUNIT_ASSERT_EQUAL(7u, list.getSize()); - - for (uint32_t i=0; i<7u; ++i) { - std::ostringstream ost; - ost << _prefix << "/disks/d" << i; - CPPUNIT_ASSERT_EQUAL(ost.str(), list[i].getPath()); - } - - // Note.. scanForDisks() should not in any circumstances access the - // disks. Thus it should not know that d1 is inaccessible, or that d6 - // is actually a symlink to a file - CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState()); - CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[1].getState()); - CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState()); - CPPUNIT_ASSERT_EQUAL(Device::OK, list[3].getState()); - CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState()); - CPPUNIT_ASSERT_EQUAL(Device::OK, list[5].getState()); - CPPUNIT_ASSERT_EQUAL(Device::OK, list[6].getState()); - - list.verifyHealthyDisks(-1); - CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState()); - CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[1].getState()); - CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState()); - CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, list[3].getState()); - CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState()); - CPPUNIT_ASSERT_EQUAL(Device::OK, list[5].getState()); - CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[6].getState()); -} - -void MountPointList_Test::testStatusFile() -{ - init(); - std::string statusFileName(_prefix + "/disks.status"); - - // Try reading non-existing file, and writing a file - { - MountPointList list(_prefix, - std::vector<vespalib::string>(), - DeviceManager::UP( - new DeviceManager( - DeviceMapper::UP(new SimpleDeviceMapper), - _clock))); - - _clock.setAbsoluteTimeInSeconds(5678); - list.scanForDisks(); - - // File does not currently exist, that should be ok though. - list.readFromFile(); - list.verifyHealthyDisks(-1); - CPPUNIT_ASSERT_EQUAL(7u, list.getSize()); - list[5].addEvent(IOEvent(1234, Device::IO_FAILURE, "Argh", "Hmm")); - CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, list[5].getState()); - - // Write to file. - list.writeToFile(); - } - - // Check contents of file. - { - std::ifstream in(statusFileName.c_str()); - std::string line; - CPPUNIT_ASSERT(std::getline(in, line)); - - CPPUNIT_ASSERT_PREFIX( - std::string(_prefix + "/disks/d1 1 5678 Disk not found " - "during scanning of disks directory"), - line); - CPPUNIT_ASSERT(std::getline(in, line)); - CPPUNIT_ASSERT_PREFIX( - std::string(_prefix +"/disks/d2 1 5678 Disk not found during scanning of " - "disks directory"), - line); - CPPUNIT_ASSERT(std::getline(in, line)); - CPPUNIT_ASSERT_PREFIX( - std::string(_prefix + "/disks/d3 4 5678 Foreign data in mountpoint. New " - "mountpoints added should be empty."), - line); - CPPUNIT_ASSERT(std::getline(in, line)); - CPPUNIT_ASSERT_PREFIX( - std::string(_prefix + "/disks/d4 2 5678 File d4 in disks directory is not " - "a directory."), - line); - CPPUNIT_ASSERT(std::getline(in, line)); - CPPUNIT_ASSERT_PREFIX(std::string(_prefix + "/disks/d5 5 1234 Argh"), - line); - CPPUNIT_ASSERT(std::getline(in, line)); - CPPUNIT_ASSERT_PREFIX( - std::string(_prefix + "/disks/d6 2 5678 The path exist, but is not a " - "directory."), - line); - CPPUNIT_ASSERT(std::getline(in, line)); - CPPUNIT_ASSERT_EQUAL(std::string("EOF"), line); - } - - // Starting over to get new device instances. - // Scan disk, read file, and check that erronious disks are not used. - { - MountPointList list(_prefix, - std::vector<vespalib::string>(), - DeviceManager::UP( - new DeviceManager( - DeviceMapper::UP(new SimpleDeviceMapper), - _clock))); - list.scanForDisks(); - list.readFromFile(); - // Check that we got the expected entries. - CPPUNIT_ASSERT_EQUAL(7u, list.getSize()); - - // Note.. scanForDisks() should not under any circumstance access the - // disks. Thus it should not know that d1 is inaccessible. - CPPUNIT_ASSERT_EQUAL(Device::OK, list[0].getState()); - CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[1].getState()); - CPPUNIT_ASSERT_EQUAL(Device::NOT_FOUND, list[2].getState()); - CPPUNIT_ASSERT_EQUAL(Device::INTERNAL_FAILURE, list[3].getState()); - CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[4].getState()); - CPPUNIT_ASSERT_EQUAL(Device::IO_FAILURE, list[5].getState()); - CPPUNIT_ASSERT_EQUAL(Device::PATH_FAILURE, list[6].getState()); - } -} - -void MountPointList_Test::testInitDisks() -{ - vespalib::string d3target = "d3target"; - vespalib::string foodev = _prefix + "/foodev"; - vespalib::string bardev = _prefix + "/bardev"; - - tearDown(); - run("rm -rf " + _prefix); - run("mkdir -p " + _prefix + "/disks/d2"); - run("ln -s " + d3target + " " + _prefix + "/disks/d3"); - - std::vector<vespalib::string> diskPaths { - // disks/d0 should become a regular directory - _prefix + "/disks/d0", - // disks/d1 should be a symlink to /foo - foodev, - // disks/d2 should already be a directory - "/ignored", - // disks/d3 should already be a symlink - "/ignored2" - }; - - MountPointList list(_prefix, diskPaths, newDeviceManager()); - list.initDisks(); - - CPPUNIT_ASSERT(isDirectory(_prefix + "/disks")); - CPPUNIT_ASSERT(isDirectory(_prefix + "/disks/d0")); - CPPUNIT_ASSERT(isSymLink(_prefix + "/disks/d1")); - CPPUNIT_ASSERT_EQUAL(foodev, readLink(_prefix + "/disks/d1")); - CPPUNIT_ASSERT(isDirectory(_prefix + "/disks/d2")); - CPPUNIT_ASSERT(isSymLink(_prefix + "/disks/d3")); - CPPUNIT_ASSERT_EQUAL(d3target, readLink(_prefix + "/disks/d3")); -} - -} // memfile - -} // storage - diff --git a/memfilepersistence/src/tests/device/partitionmonitortest.cpp b/memfilepersistence/src/tests/device/partitionmonitortest.cpp deleted file mode 100644 index a31f1fde28b..00000000000 --- a/memfilepersistence/src/tests/device/partitionmonitortest.cpp +++ /dev/null @@ -1,206 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/device/partitionmonitor.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/vdstestlib/cppunit/macros.h> - -namespace storage { - -namespace memfile { - -struct PartitionMonitorTest : public CppUnit::TestFixture -{ - void testNormalUsage(); - void testHighInodeFillrate(); - void testAlwaysStatPolicy(); - void testPeriodPolicy(); - void testStatOncePolicy(); - void testDynamicPolicy(); - void testIsFull(); - - CPPUNIT_TEST_SUITE(PartitionMonitorTest); - CPPUNIT_TEST(testNormalUsage); - CPPUNIT_TEST(testHighInodeFillrate); - CPPUNIT_TEST(testAlwaysStatPolicy); - CPPUNIT_TEST(testPeriodPolicy); - CPPUNIT_TEST(testStatOncePolicy); - CPPUNIT_TEST(testDynamicPolicy); - CPPUNIT_TEST(testIsFull); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(PartitionMonitorTest); - -struct FakeStatter : public PartitionMonitor::Statter { - struct statvfs _info; - - FakeStatter() { - _info.f_bsize = 4096; - _info.f_frsize = 4096; - _info.f_blocks = 1000; - _info.f_bfree = 500; - _info.f_bavail = 400; - _info.f_files = 64; - _info.f_ffree = 32; - _info.f_favail = 30; - _info.f_fsid = 13; - _info.f_namemax = 256; - } - void removeData(uint32_t size) { - _info.f_bavail += (size / _info.f_bsize); - _info.f_bfree += (size / _info.f_bsize); - } - void addData(uint32_t size) { - _info.f_bavail -= (size / _info.f_bsize); - _info.f_bfree -= (size / _info.f_bsize); - } - - void statFileSystem(const std::string&, struct statvfs& info) override { - info = _info; - } -}; - -void PartitionMonitorTest::testNormalUsage() -{ - const std::string file_name = TEST_PATH("testrunner.cpp"); - PartitionMonitor monitor(file_name); - FakeStatter* statter = new FakeStatter(); - monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); - std::string expected( - "PartitionMonitor(" + file_name + ", STAT_PERIOD(100), " - "2048000/3686400 used - 55.5556 % full)"); - CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(false)); - expected = - "PartitionMonitor(" + file_name + ") {\n" - " Fill rate: 55.5556 %\n" - " Inode fill rate: 51.6129 %\n" - " Detected block size: 4096\n" - " File system id: 13\n" - " Total size: 3686400 (3600 kB)\n" - " Used size: 2048000 (2000 kB)\n" - " Queries since last stat: 0\n" - " Monitor policy: STAT_PERIOD(100)\n" - " Root only ratio 0\n" - " Max fill rate 98 %\n" - "}"; - CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(true)); - CPPUNIT_ASSERT(monitor.getFillRate() > 0.55); -} - -void PartitionMonitorTest::testHighInodeFillrate() -{ - const std::string file_name = TEST_PATH("testrunner.cpp"); - PartitionMonitor monitor(file_name); - FakeStatter* statter = new FakeStatter(); - statter->_info.f_favail = 2; - monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); - std::string expected( - "PartitionMonitor(" + file_name + ", STAT_PERIOD(100), " - "2048000/3686400 used - 94.1176 % full (inodes))"); - CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(false)); - expected = - "PartitionMonitor(" + file_name + ") {\n" - " Fill rate: 55.5556 %\n" - " Inode fill rate: 94.1176 %\n" - " Detected block size: 4096\n" - " File system id: 13\n" - " Total size: 3686400 (3600 kB)\n" - " Used size: 2048000 (2000 kB)\n" - " Queries since last stat: 0\n" - " Monitor policy: STAT_PERIOD(100)\n" - " Root only ratio 0\n" - " Max fill rate 98 %\n" - "}"; - CPPUNIT_ASSERT_EQUAL(expected, monitor.toString(true)); - CPPUNIT_ASSERT(monitor.getFillRate() > 0.94); -} - -void PartitionMonitorTest::testAlwaysStatPolicy() -{ - PartitionMonitor monitor(TEST_PATH("testrunner.cpp")); - FakeStatter* statter = new FakeStatter(); - monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); - monitor.setAlwaysStatPolicy(); - for (uint32_t i=0; i<10; ++i) { - monitor.getFillRate(); - CPPUNIT_ASSERT_EQUAL(0u, monitor._queriesSinceStat); - } -} - -void PartitionMonitorTest::testPeriodPolicy() -{ - PartitionMonitor monitor(TEST_PATH("testrunner.cpp")); - FakeStatter* statter = new FakeStatter(); - monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); - monitor.setStatPeriodPolicy(4); - for (uint32_t i=1; i<16; ++i) { - monitor.getFillRate(); - CPPUNIT_ASSERT_EQUAL(i % 4, monitor._queriesSinceStat); - } -} - -void PartitionMonitorTest::testStatOncePolicy() -{ - PartitionMonitor monitor(TEST_PATH("testrunner.cpp")); - FakeStatter* statter = new FakeStatter(); - monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); - monitor.setStatOncePolicy(); - for (uint32_t i=1; i<16; ++i) { - monitor.getFillRate(); - CPPUNIT_ASSERT_EQUAL(i, monitor._queriesSinceStat); - } -} - -void PartitionMonitorTest::testDynamicPolicy() -{ - PartitionMonitor monitor(TEST_PATH("testrunner.cpp")); - FakeStatter* statter = new FakeStatter(); - monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); - monitor.setStatDynamicPolicy(2); - // Add some data, such that we see that period goes down - CPPUNIT_ASSERT_EQUAL(uint64_t(3698), monitor.calcDynamicPeriod()); - CPPUNIT_ASSERT_EQUAL(55, (int) (100 * monitor.getFillRate())); - monitor.addingData(256 * 1024); - CPPUNIT_ASSERT_EQUAL(uint64_t(2592), monitor.calcDynamicPeriod()); - CPPUNIT_ASSERT_EQUAL(62, (int) (100 * monitor.getFillRate())); - monitor.addingData(512 * 1024); - CPPUNIT_ASSERT_EQUAL(uint64_t(968), monitor.calcDynamicPeriod()); - CPPUNIT_ASSERT_EQUAL(76, (int) (100 * monitor.getFillRate())); - // Add such that we hint that we have more data than possible on disk - monitor.addingData(1024 * 1024); - // Let fake stat just have a bit more data than before - statter->addData(256 * 1024); - // With high fill rate, we should check stat each time - CPPUNIT_ASSERT_EQUAL(uint64_t(1), monitor.calcDynamicPeriod()); - // As period is 1, we will now do a new stat, it should find we - // actually have less fill rate - CPPUNIT_ASSERT_EQUAL(62, (int) (100 * monitor.getFillRate())); -} - -void PartitionMonitorTest::testIsFull() -{ - PartitionMonitor monitor(TEST_PATH("testrunner.cpp")); - monitor.setMaxFillness(0.85); - FakeStatter* statter = new FakeStatter(); - monitor.setStatOncePolicy(); - monitor.setStatter(std::unique_ptr<PartitionMonitor::Statter>(statter)); - - CPPUNIT_ASSERT_EQUAL(55, (int) (100 * monitor.getFillRate())); - CPPUNIT_ASSERT(!monitor.isFull()); - monitor.addingData(512 * 1024); - CPPUNIT_ASSERT_EQUAL(69, (int) (100 * monitor.getFillRate())); - CPPUNIT_ASSERT(!monitor.isFull()); - monitor.addingData(600 * 1024); - CPPUNIT_ASSERT_EQUAL(86, (int) (100 * monitor.getFillRate())); - CPPUNIT_ASSERT(monitor.isFull()); - monitor.removingData(32 * 1024); - CPPUNIT_ASSERT_EQUAL(85, (int) (100 * monitor.getFillRate())); - CPPUNIT_ASSERT(monitor.isFull()); - monitor.removingData(32 * 1024); - CPPUNIT_ASSERT_EQUAL(84, (int) (100 * monitor.getFillRate())); - CPPUNIT_ASSERT(!monitor.isFull()); -} - -} - -} // storage diff --git a/memfilepersistence/src/tests/helper/CMakeLists.txt b/memfilepersistence/src/tests/helper/CMakeLists.txt deleted file mode 100644 index 3a12123250c..00000000000 --- a/memfilepersistence/src/tests/helper/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_testhelper - SOURCES - testhelper.cpp - DEPENDS - vdstestlib -) diff --git a/memfilepersistence/src/tests/helper/testhelper.cpp b/memfilepersistence/src/tests/helper/testhelper.cpp deleted file mode 100644 index e6662c52d1b..00000000000 --- a/memfilepersistence/src/tests/helper/testhelper.cpp +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <tests/helper/testhelper.h> -#include <vespa/vespalib/testkit/testapp.h> - -#include <vespa/log/log.h> -LOG_SETUP(".testhelper"); - -namespace storage { - -void addStorageDistributionConfig(vdstestlib::DirConfig& dc) -{ - vdstestlib::DirConfig::Config* config; - config = &dc.getConfig("stor-distribution", true); - config->clear(); - config->set("group[1]"); - config->set("group[0].name", "foo"); - config->set("group[0].index", "0"); - config->set("group[0].nodes[50]"); - - for (uint32_t i = 0; i < 50; i++) { - std::ostringstream key; key << "group[0].nodes[" << i << "].index"; - std::ostringstream val; val << i; - config->set(key.str(), val.str()); - } -} - -vdstestlib::DirConfig getStandardConfig(bool storagenode) { - vdstestlib::DirConfig dc; - vdstestlib::DirConfig::Config* config; - config = &dc.addConfig("stor-cluster"); - config = &dc.addConfig("load-type"); - config = &dc.addConfig("bucket"); - config = &dc.addConfig("messagebus"); - config = &dc.addConfig("stor-prioritymapping"); - config = &dc.addConfig("stor-bucketdbupdater"); - config = &dc.addConfig("metricsmanager"); - config->set("consumer[1]"); - config->set("consumer[0].name", "\"status\""); - config->set("consumer[0].addedmetrics[1]"); - config->set("consumer[0].addedmetrics[0]", "\"*\""); - config = &dc.addConfig("stor-communicationmanager"); - config->set("rpcport", "0"); - config->set("mbusport", "0"); - config = &dc.addConfig("stor-bucketdb"); - config->set("chunklevel", "0"); - config = &dc.addConfig("stor-distributormanager"); - config = &dc.addConfig("stor-opslogger"); - config = &dc.addConfig("stor-memfilepersistence"); - // Easier to see what goes wrong with only 1 thread per disk. - config->set("minimum_file_meta_slots", "2"); - config->set("minimum_file_header_block_size", "368"); - config->set("minimum_file_size", "4096"); - config->set("threads[1]"); - config->set("threads[0].lowestpri 255"); - config->set("dir_spread", "4"); - config->set("dir_levels", "0"); - // Unit tests typically use fake low time values, so don't complain - // about them or compact/delete them by default. Override in tests testing that - // behavior - config = &dc.addConfig("persistence"); - config->set("keep_remove_time_period", "2000000000"); - config->set("revert_time_period", "2000000000"); - config = &dc.addConfig("stor-bouncer"); - config = &dc.addConfig("stor-integritychecker"); - config = &dc.addConfig("stor-bucketmover"); - config = &dc.addConfig("stor-messageforwarder"); - config = &dc.addConfig("stor-server"); - config->set("enable_dead_lock_detector", "false"); - config->set("enable_dead_lock_detector_warnings", "false"); - config->set("max_merges_per_node", "25"); - config->set("max_merge_queue_size", "20"); - config->set("root_folder", - (storagenode ? "vdsroot" : "vdsroot.distributor")); - config->set("is_distributor", - (storagenode ? "false" : "true")); - config = &dc.addConfig("stor-devices"); - config->set("root_folder", - (storagenode ? "vdsroot" : "vdsroot.distributor")); - config = &dc.addConfig("stor-status"); - config->set("httpport", "0"); - config = &dc.addConfig("stor-visitor"); - config->set("defaultdocblocksize", "8192"); - // By default, need "old" behaviour of maxconcurrent - config->set("maxconcurrentvisitors_fixed", "4"); - config->set("maxconcurrentvisitors_variable", "0"); - config = &dc.addConfig("stor-visitordispatcher"); - addFileConfig(dc, "documenttypes", TEST_PATH("config-doctypes.cfg")); - addStorageDistributionConfig(dc); - return dc; -} - -void addFileConfig(vdstestlib::DirConfig& dc, - const std::string& configDefName, - const std::string& fileName) -{ - vdstestlib::DirConfig::Config* config; - config = &dc.getConfig(configDefName, true); - config->clear(); - std::ifstream in(fileName.c_str()); - std::string line; - while (std::getline(in, line, '\n')) { - std::string::size_type pos = line.find(' '); - if (pos == std::string::npos) { - config->set(line); - } else { - config->set(line.substr(0, pos), line.substr(pos + 1)); - } - } - in.close(); -} - -TestName::TestName(const std::string& n) - : name(n) -{ - LOG(debug, "Starting test %s", name.c_str()); -} - -TestName::~TestName() { - LOG(debug, "Done with test %s", name.c_str()); -} - -} // storage diff --git a/memfilepersistence/src/tests/helper/testhelper.h b/memfilepersistence/src/tests/helper/testhelper.h deleted file mode 100644 index b8dcc82140a..00000000000 --- a/memfilepersistence/src/tests/helper/testhelper.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once -#include <vespa/vdstestlib/cppunit/dirconfig.h> -#include <vespa/vdstestlib/cppunit/macros.h> - - -#include <fstream> -#include <sstream> - -#define ASSERT_REPLY_COUNT(count, dummylink) \ - { \ - std::ostringstream msgost; \ - if ((dummylink).getNumReplies() != count) { \ - for (uint32_t ijx=0; ijx<(dummylink).getNumReplies(); ++ijx) { \ - msgost << (dummylink).getReply(ijx)->toString(true) << "\n"; \ - } \ - } \ - CPPUNIT_ASSERT_EQUAL_MSG(msgost.str(), size_t(count), \ - (dummylink).getNumReplies()); \ - } -#define ASSERT_COMMAND_COUNT(count, dummylink) \ - { \ - std::ostringstream msgost; \ - if ((dummylink).getNumCommands() != count) { \ - for (uint32_t ijx=0; ijx<(dummylink).getNumCommands(); ++ijx) { \ - msgost << (dummylink).getCommand(ijx)->toString(true) << "\n"; \ - } \ - } \ - CPPUNIT_ASSERT_EQUAL_MSG(msgost.str(), size_t(count), \ - (dummylink).getNumCommands()); \ - } - -namespace storage { - -void addFileConfig(vdstestlib::DirConfig& dc, - const std::string& configDefName, - const std::string& fileName); - - -void addStorageDistributionConfig(vdstestlib::DirConfig& dc); - -vdstestlib::DirConfig getStandardConfig(bool storagenode); - -// Class used to print start and end of test. Enable debug when you want to see -// which test creates what output or where we get stuck -struct TestName { - std::string name; - TestName(const std::string& n); - ~TestName(); -}; - -} // storage - diff --git a/memfilepersistence/src/tests/init/.gitignore b/memfilepersistence/src/tests/init/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/tests/init/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/tests/init/CMakeLists.txt b/memfilepersistence/src/tests/init/CMakeLists.txt deleted file mode 100644 index de3464a0820..00000000000 --- a/memfilepersistence/src/tests/init/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_testinit - SOURCES - filescannertest.cpp - DEPENDS - memfilepersistence -) diff --git a/memfilepersistence/src/tests/init/filescannertest.cpp b/memfilepersistence/src/tests/init/filescannertest.cpp deleted file mode 100644 index 97b7e7ad0c8..00000000000 --- a/memfilepersistence/src/tests/init/filescannertest.cpp +++ /dev/null @@ -1,491 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/device/devicemanager.h> -#include <vespa/memfilepersistence/init/filescanner.h> -#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h> -#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h> -#include <vespa/storageframework/defaultimplementation/clock/realclock.h> -#include <vespa/vdslib/state/nodestate.h> -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/document/bucket/bucketid.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/util/exceptions.h> -#include <vespa/vespalib/util/random.h> -#include <iomanip> -#include <sys/errno.h> - -namespace storage { -namespace memfile { - -struct FileScannerTest : public CppUnit::TestFixture { - struct TestParameters { - uint32_t filesPerDisk; - uint32_t diskCount; - uint32_t bucketSplitBits; - uint32_t dirLevels; - uint32_t dirSpread; - uint32_t parts; - std::set<uint32_t> disksDown; - bool diskDownWithBrokenSymlink; - bool bucketWrongDir; - bool bucketMultipleDirs; - bool bucketMultipleDisks; - bool addTemporaryFiles; - bool addAlienFiles; - bool dirWithNoListPermission; - bool dirWithNoWritePermission; - bool dirWithNoExecutePermission; - bool fileWithNoReadPermission; - bool fileWithNoWritePermission; - - TestParameters() - : filesPerDisk(10), diskCount(5), bucketSplitBits(20), - dirLevels(1), dirSpread(16), parts(1), disksDown(), - diskDownWithBrokenSymlink(false), - bucketWrongDir(false), bucketMultipleDirs(false), - bucketMultipleDisks(false), - addTemporaryFiles(false), addAlienFiles(false), - dirWithNoListPermission(false), - dirWithNoWritePermission(false), - dirWithNoExecutePermission(false), - fileWithNoReadPermission(false), - fileWithNoWritePermission(false) {} - void addAllComplexities() { - disksDown.insert(0); - disksDown.insert(2); - disksDown.insert(4); - bucketWrongDir = true; - bucketMultipleDirs = true; - bucketMultipleDisks = true; - parts = 7; - addTemporaryFiles = true; - addAlienFiles = true; - dirWithNoWritePermission = true; - fileWithNoWritePermission = true; - fileWithNoReadPermission = true; - } - }; - - void testNormalUsage() { - TestParameters params; - runTest(params); - } - void testMultipleParts() { - TestParameters params; - params.parts = 3; - runTest(params); - } - void testBucketInWrongDirectory() { - TestParameters params; - params.bucketWrongDir = true; - runTest(params); - } - void testBucketInMultipleDirectories() { - TestParameters params; - params.bucketMultipleDirs = true; - runTest(params); - } - void testZeroDirLevel() { - TestParameters params; - params.dirLevels = 0; - runTest(params); - } - void testSeveralDirLevels() { - TestParameters params; - params.dirLevels = 3; - runTest(params); - } - void testNonStandardDirSpread() { - TestParameters params; - params.dirSpread = 63; - runTest(params); - } - void testDiskDown() { - TestParameters params; - params.disksDown.insert(1); - runTest(params); - } - void testDiskDownBrokenSymlink() { - TestParameters params; - params.disksDown.insert(1); - params.disksDown.insert(3); - params.diskDownWithBrokenSymlink = true; - runTest(params); - } - void testRemoveTemporaryFile() { - TestParameters params; - params.addTemporaryFiles = true; - runTest(params); - } - void testAlienFile() { - TestParameters params; - params.addAlienFiles = true; - runTest(params); - } - void testUnlistableDirectory() { - TestParameters params; - params.dirWithNoListPermission = true; - runTest(params); - } - void testDirWithNoWritePermission() { - TestParameters params; - params.dirWithNoWritePermission = true; - runTest(params); - } - void testDirWithNoExecutePermission() { - TestParameters params; - params.dirWithNoWritePermission = true; - runTest(params); - } - void testFileWithNoReadPermission() { - TestParameters params; - params.bucketWrongDir = true; - params.fileWithNoReadPermission = true; - runTest(params); - } - void testFileWithNoWritePermission() { - TestParameters params; - params.bucketWrongDir = true; - params.fileWithNoWritePermission = true; - runTest(params); - } - void testAllFailuresCombined() { - TestParameters params; - params.addAllComplexities(); - runTest(params); - } - - CPPUNIT_TEST_SUITE(FileScannerTest); - CPPUNIT_TEST(testNormalUsage); - CPPUNIT_TEST(testMultipleParts); - CPPUNIT_TEST(testBucketInWrongDirectory); - CPPUNIT_TEST(testBucketInMultipleDirectories); - CPPUNIT_TEST(testZeroDirLevel); - CPPUNIT_TEST(testSeveralDirLevels); - CPPUNIT_TEST(testNonStandardDirSpread); - CPPUNIT_TEST(testDiskDown); - CPPUNIT_TEST(testDiskDownBrokenSymlink); - CPPUNIT_TEST(testRemoveTemporaryFile); - CPPUNIT_TEST(testAlienFile); - CPPUNIT_TEST(testUnlistableDirectory); - CPPUNIT_TEST(testDirWithNoWritePermission); - CPPUNIT_TEST(testDirWithNoExecutePermission); - CPPUNIT_TEST(testFileWithNoReadPermission); - CPPUNIT_TEST(testFileWithNoWritePermission); - CPPUNIT_TEST(testAllFailuresCombined); - CPPUNIT_TEST_SUITE_END(); - - // Actual implementation of the tests. - - /** Run a console command and fail test if it fails. */ - void run(std::string cmd); - - /** Struct containing metadata for a single bucket. */ - struct BucketData { - document::BucketId bucket; - uint32_t disk; - std::vector<uint32_t> directory; - bool shouldExist; // Set to false for buckets that won't exist due to - // some failure. - - BucketData() : shouldExist(true) {} - - bool sameDir(BucketData& other) const { - return (disk == other.disk && directory == other.directory); - } - }; - - /** - * Create an overview of the buckets we're gonna use in the test. - * (Without any failures introduced) - */ - std::vector<BucketData> createBuckets(const TestParameters& params); - - /** - * Create the data in the bucket map and introduce the failures specified - * in the test. Mark buckets in bucket list that won't exist due to the - * failures so we know how to verify result of test. - */ - void createData(const TestParameters&, std::vector<BucketData>& buckets, - std::vector<std::string>& tempFiles, - std::vector<std::string>& alienFiles); - - /** - * Run a test with a given set of parameters, calling createData to set up - * the data, and then using a file scanner to actually list the files. - */ - void runTest(const TestParameters&); - -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(FileScannerTest); - -void -FileScannerTest::run(std::string cmd) -{ - int result = system(cmd.c_str()); - if (result != 0) { - CPPUNIT_FAIL("Failed to run command '" + cmd + "'."); - } -} - -std::vector<FileScannerTest::BucketData> -FileScannerTest::createBuckets(const TestParameters& params) -{ - std::vector<BucketData> buckets; - BucketDirectoryMapper dirMapper(params.dirLevels, params.dirSpread); - for (uint32_t i=0; i<params.diskCount; ++i) { - if (params.disksDown.find(i) != params.disksDown.end()) { - continue; - } - for (uint32_t j=0; j<params.filesPerDisk; ++j) { - BucketData data; - data.bucket = document::BucketId(params.bucketSplitBits, - params.filesPerDisk * i + j); - data.disk = i; - data.directory = dirMapper.getPath(data.bucket); - buckets.push_back(data); - } - } - return buckets; -} - -void -FileScannerTest::createData(const TestParameters& params, - std::vector<BucketData>& buckets, - std::vector<std::string>& tempFiles, - std::vector<std::string>& alienFiles) -{ - if (params.bucketWrongDir) { - CPPUNIT_ASSERT(params.dirLevels > 0); - buckets[0].directory[0] = (buckets[0].directory[0] + 1) - % params.dirSpread; - } - if (params.bucketMultipleDirs) { - CPPUNIT_ASSERT(params.dirLevels > 0); - BucketData copy(buckets[1]); - copy.directory[0] = (buckets[1].directory[0] + 1) % params.dirSpread; - buckets.push_back(copy); - } - if (params.bucketMultipleDisks && params.dirLevels > 0) { - BucketData copy(buckets[2]); - uint32_t disk = 0; - for (; disk<params.diskCount; ++disk) { - if (disk == copy.disk) continue; - if (params.disksDown.find(disk) == params.disksDown.end()) break; - } - CPPUNIT_ASSERT(disk < params.diskCount); - copy.disk = disk; - buckets.push_back(copy); - } - - run("mkdir -p vdsroot"); - run("chmod -R a+rwx vdsroot"); - run("rm -rf vdsroot"); - run("mkdir -p vdsroot/disks"); - vespalib::RandomGen randomizer; - uint32_t diskToHaveBrokenSymlink = (params.disksDown.empty() - ? 0 : randomizer.nextUint32(0, params.disksDown.size())); - uint32_t downIndex = 0; - for (uint32_t i=0; i<params.diskCount; ++i) { - if (params.disksDown.find(i) != params.disksDown.end()) { - if (downIndex++ == diskToHaveBrokenSymlink - && params.diskDownWithBrokenSymlink) - { - std::ostringstream path; - path << "vdsroot/disks/d" << i; - run("ln -s /non-existing-dir " + path.str()); - } - } else { - std::ostringstream path; - path << "vdsroot/disks/d" << i; - run("mkdir -p " + path.str()); - std::ofstream of((path.str() + "/chunkinfo").c_str()); - of << "#chunkinfo\n" << i << "\n" << params.diskCount << "\n"; - } - } - for (uint32_t i=0; i<buckets.size(); ++i) { - if (!buckets[i].shouldExist) continue; - std::ostringstream path; - path << "vdsroot/disks/d" << buckets[i].disk << std::hex; - for (uint32_t j=0; j<buckets[i].directory.size(); ++j) { - path << '/' << std::setw(4) << std::setfill('0') - << buckets[i].directory[j]; - } - run("mkdir -p " + path.str()); - if (params.dirWithNoListPermission && i == 8) { - run("chmod a-r " + path.str()); - // Scanner will abort with exception, so we don't really know - // how many docs will not be found due to this. - continue; - } - if (params.dirWithNoExecutePermission && i == 9) { - run("chmod a-x " + path.str()); - // Scanner will abort with exception, so we don't really know - // how many docs will not be found due to this. - continue; - } - path << '/' << std::setw(16) << std::setfill('0') - << buckets[i].bucket.getId() << ".0"; - run("touch " + path.str()); - if (params.addTemporaryFiles && i == 4) { - run("touch " + path.str() + ".tmp"); - tempFiles.push_back(path.str() + ".tmp"); - } - if (params.addAlienFiles && i == 6) { - run("touch " + path.str() + ".alien"); - alienFiles.push_back(path.str() + ".alien"); - } - if (params.fileWithNoWritePermission && i == 0) { - // Overlapping with wrong dir so it would want to move file - run("chmod a-w " + path.str()); - } - if (params.fileWithNoReadPermission && i == 0) { - // Overlapping with wrong dir so it would want to move file - run("chmod a-r " + path.str()); - } - if (params.dirWithNoWritePermission && i == 9) { - run("chmod a-w " + path.str()); - } - } -} - -namespace { - struct BucketDataFound { - uint16_t _disk; - bool _checked; - - BucketDataFound() : _disk(65535), _checked(false) {} - BucketDataFound(uint32_t disk) : _disk(disk), _checked(false) {} - }; -} - -void -FileScannerTest::runTest(const TestParameters& params) -{ - std::vector<BucketData> buckets(createBuckets(params)); - std::vector<std::string> tempFiles; - std::vector<std::string> alienFiles; - createData(params, buckets, tempFiles, alienFiles); - - framework::defaultimplementation::RealClock clock; - framework::defaultimplementation::ComponentRegisterImpl compReg; - compReg.setClock(clock); - - MountPointList mountPoints("./vdsroot", - std::vector<vespalib::string>(), - DeviceManager::UP( - new DeviceManager( - DeviceMapper::UP(new SimpleDeviceMapper), - clock))); - mountPoints.init(params.diskCount); - - FileScanner scanner(compReg, mountPoints, - params.dirLevels, params.dirSpread); - std::map<document::BucketId, BucketDataFound> foundBuckets; - uint32_t extraBucketsSameDisk = 0; - uint32_t extraBucketsOtherDisk = 0; - for (uint32_t j=0; j<params.diskCount; ++j) { - // std::cerr << "Disk " << j << "\n"; - if (params.disksDown.find(j) != params.disksDown.end()) continue; - for (uint32_t i=0; i<params.parts; ++i) { - document::BucketId::List bucketList; - try{ - scanner.buildBucketList(bucketList, j, i, params.parts); - for (uint32_t k=0; k<bucketList.size(); ++k) { - if (foundBuckets.find(bucketList[k]) != foundBuckets.end()) - { - if (j == foundBuckets[bucketList[k]]._disk) { - ++extraBucketsSameDisk; - } else { - ++extraBucketsOtherDisk; - } -// std::cerr << "Bucket " << bucketList[k] -// << " on disk " << j << " is already found on disk " -// << foundBuckets[bucketList[k]]._disk << ".\n"; - } - foundBuckets[bucketList[k]] = BucketDataFound(j); - } - } catch (vespalib::IoException& e) { - if (!(params.dirWithNoListPermission - && e.getType() == vespalib::IoException::NO_PERMISSION)) - { - throw; - } - } - } - } - std::vector<BucketData> notFound; - std::vector<BucketData> wasFound; - std::vector<BucketDataFound> foundNonExisting; - // Verify that found buckets match buckets expected. - for (uint32_t i=0; i<buckets.size(); ++i) { - std::map<document::BucketId, BucketDataFound>::iterator found( - foundBuckets.find(buckets[i].bucket)); - if (buckets[i].shouldExist && found == foundBuckets.end()) { - notFound.push_back(buckets[i]); - } else if (!buckets[i].shouldExist && found != foundBuckets.end()) { - wasFound.push_back(buckets[i]); - } - if (found != foundBuckets.end()) { found->second._checked = true; } - } - for (std::map<document::BucketId, BucketDataFound>::iterator it - = foundBuckets.begin(); it != foundBuckets.end(); ++it) - { - if (!it->second._checked) { - foundNonExisting.push_back(it->second); - } - } - if (params.dirWithNoListPermission) { - CPPUNIT_ASSERT(!notFound.empty()); - } else if (!notFound.empty()) { - std::ostringstream ost; - ost << "Failed to find " << notFound.size() << " of " - << buckets.size() << " buckets. Including buckets:"; - for (uint32_t i=0; i<5 && i<notFound.size(); ++i) { - ost << " " << notFound[i].bucket; - } - CPPUNIT_FAIL(ost.str()); - } - CPPUNIT_ASSERT(wasFound.empty()); - CPPUNIT_ASSERT(foundNonExisting.empty()); - if (params.bucketMultipleDirs) { - // TODO: Test something else here? This is not correct test, as when - // there are two buckets on the same disk, one of them will be ignored by - // the bucket lister. - // CPPUNIT_ASSERT_EQUAL(1u, extraBucketsSameDisk); - } else { - CPPUNIT_ASSERT_EQUAL(0u, extraBucketsSameDisk); - } - if (params.bucketMultipleDisks) { - CPPUNIT_ASSERT_EQUAL(1u, extraBucketsOtherDisk); - } else { - CPPUNIT_ASSERT_EQUAL(0u, extraBucketsOtherDisk); - } - if (params.addTemporaryFiles) { - CPPUNIT_ASSERT_EQUAL( - 1, int(scanner.getMetrics()._temporaryFilesDeleted.getValue())); - } else { - CPPUNIT_ASSERT_EQUAL( - 0, int(scanner.getMetrics()._temporaryFilesDeleted.getValue())); - } - if (params.addAlienFiles) { - CPPUNIT_ASSERT_EQUAL( - 1, int(scanner.getMetrics()._alienFileCounter.getValue())); - } else { - CPPUNIT_ASSERT_EQUAL( - 0, int(scanner.getMetrics()._alienFileCounter.getValue())); - } - // We automatically delete temporary files (created by VDS, indicating - // an operation that only half finished. - for (uint32_t i=0; i<tempFiles.size(); ++i) { - CPPUNIT_ASSERT_MSG(tempFiles[i], !vespalib::fileExists(tempFiles[i])); - } - // We don't automatically delete alien files - for (uint32_t i=0; i<alienFiles.size(); ++i) { - CPPUNIT_ASSERT_MSG(alienFiles[i], vespalib::fileExists(alienFiles[i])); - } -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/mapper/.gitignore b/memfilepersistence/src/tests/mapper/.gitignore deleted file mode 100644 index e69de29bb2d..00000000000 --- a/memfilepersistence/src/tests/mapper/.gitignore +++ /dev/null diff --git a/memfilepersistence/src/tests/spi/.gitignore b/memfilepersistence/src/tests/spi/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/tests/spi/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/tests/spi/CMakeLists.txt b/memfilepersistence/src/tests/spi/CMakeLists.txt deleted file mode 100644 index 25c4acf2c32..00000000000 --- a/memfilepersistence/src/tests/spi/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_testspi - SOURCES - memfiletestutils.cpp - providerconformancetest.cpp - memfilev1serializertest.cpp - memfilev1verifiertest.cpp - basicoperationhandlertest.cpp - splitoperationhandlertest.cpp - joinoperationhandlertest.cpp - iteratorhandlertest.cpp - memfiletest.cpp - memcachetest.cpp - simplememfileiobuffertest.cpp - memfileautorepairtest.cpp - shared_data_location_tracker_test.cpp - buffered_file_writer_test.cpp - buffer_test.cpp - simulatedfailurefile.cpp - DEPENDS - memfilepersistence_testhelper - memfilepersistence -) diff --git a/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp deleted file mode 100644 index 28cc56aa44e..00000000000 --- a/memfilepersistence/src/tests/spi/basicoperationhandlertest.cpp +++ /dev/null @@ -1,743 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfiletestutils.h" -#include "simulatedfailurefile.h" -#include "options_builder.h" -#include <vespa/document/fieldset/fieldsetrepo.h> -#include <vespa/document/fieldset/fieldsets.h> -#include <vespa/persistence/spi/test.h> -#include <vespa/document/bucket/fixed_bucket_spaces.h> -#include <vespa/vdstestlib/cppunit/macros.h> - -using storage::spi::test::makeSpiBucket; - -namespace storage { -namespace memfile { -namespace { - spi::LoadType defaultLoadType(0, "default"); -} - -class BasicOperationHandlerTest : public SingleDiskMemFileTestUtils -{ - CPPUNIT_TEST_SUITE(BasicOperationHandlerTest); - CPPUNIT_TEST(testGetHeaderOnly); - CPPUNIT_TEST(testGetFieldFiltering); - CPPUNIT_TEST(testRemove); - CPPUNIT_TEST(testRemoveWithNonMatchingTimestamp); - CPPUNIT_TEST(testRemoveWithNonMatchingTimestampAlwaysPersist); - CPPUNIT_TEST(testRemoveForExistingRemoveSameTimestamp); - CPPUNIT_TEST(testRemoveForExistingRemoveNewTimestamp); - CPPUNIT_TEST(testRemoveForExistingRemoveNewTimestampAlwaysPersist); - CPPUNIT_TEST(testRemoveDocumentNotFound); - CPPUNIT_TEST(testRemoveDocumentNotFoundAlwaysPersist); - CPPUNIT_TEST(testRemoveExistingOlderDocumentVersion); - CPPUNIT_TEST(testPutSameTimestampAsRemove); - CPPUNIT_TEST(testUpdateBody); - CPPUNIT_TEST(testUpdateHeaderOnly); - CPPUNIT_TEST(testUpdateTimestampExists); - CPPUNIT_TEST(testUpdateForNonExistentDocWillFail); - CPPUNIT_TEST(testUpdateMayCreateDoc); - CPPUNIT_TEST(testRemoveEntry); - CPPUNIT_TEST(testEraseFromCacheOnFlushException); - CPPUNIT_TEST(testEraseFromCacheOnMaintainException); - CPPUNIT_TEST(testEraseFromCacheOnDeleteBucketException); - CPPUNIT_TEST(list_buckets_returns_empty_set_for_non_default_bucketspace); - CPPUNIT_TEST(get_modified_buckets_returns_empty_set_for_non_default_bucketspace); - CPPUNIT_TEST_SUITE_END(); - - void doTestRemoveDocumentNotFound( - OperationHandler::RemoveType persistRemove); - void doTestRemoveWithNonMatchingTimestamp( - OperationHandler::RemoveType persistRemove); - void doTestRemoveForExistingRemoveNewTimestamp( - OperationHandler::RemoveType persistRemove); -public: - void setupTestConfig(); - void testPutHeadersOnly(); - void testPutHeadersOnlyDocumentNotFound(); - void testPutHeadersOnlyTimestampNotFound(); - void testGetHeaderOnly(); - void testGetFieldFiltering(); - void testRemove(); - void testRemoveWithNonMatchingTimestamp(); - void testRemoveWithNonMatchingTimestampAlwaysPersist(); - void testRemoveForExistingRemoveSameTimestamp(); - void testRemoveForExistingRemoveNewTimestamp(); - void testRemoveForExistingRemoveNewTimestampAlwaysPersist(); - void testRemoveDocumentNotFound(); - void testRemoveDocumentNotFoundAlwaysPersist(); - void testRemoveExistingOlderDocumentVersion(); - void testPutSameTimestampAsRemove(); - void testUpdateBody(); - void testUpdateHeaderOnly(); - void testUpdateTimestampExists(); - void testUpdateForNonExistentDocWillFail(); - void testUpdateMayCreateDoc(); - void testRemoveEntry(); - void testEraseFromCacheOnFlushException(); - void testEraseFromCacheOnMaintainException(); - void testEraseFromCacheOnDeleteBucketException(); - void list_buckets_returns_empty_set_for_non_default_bucketspace(); - void get_modified_buckets_returns_empty_set_for_non_default_bucketspace(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(BasicOperationHandlerTest); - -/** - * Test that doing a header-only get gives back a document containing - * only the document header - */ -void -BasicOperationHandlerTest::testGetHeaderOnly() -{ - document::BucketId bucketId(16, 4); - - Document::SP doc(createRandomDocumentAtLocation(4)); - doc->setValue(doc->getField("hstringval"), document::StringFieldValue("hypnotoad")); - doc->setValue(doc->getField("headerval"), document::IntFieldValue(42)); - - doPut(doc, bucketId, Timestamp(4567), 0); - flush(bucketId); - - spi::GetResult reply = doGet(bucketId, doc->getId(), document::HeaderFields()); - - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, reply.getErrorCode()); - CPPUNIT_ASSERT(reply.hasDocument()); - CPPUNIT_ASSERT_EQUAL(std::string("headerval: 42\nhstringval: hypnotoad\n"), - stringifyFields(reply.getDocument())); - CPPUNIT_ASSERT_EQUAL( - size_t(1), - getPersistenceProvider().getMetrics().headerOnlyGets.getValue()); -} - -void -BasicOperationHandlerTest::testGetFieldFiltering() -{ - document::BucketId bucketId(16, 4); - Document::SP doc(createRandomDocumentAtLocation(4)); - doc->setValue(doc->getField("headerval"), document::IntFieldValue(42)); - doc->setValue(doc->getField("hstringval"), - document::StringFieldValue("groovy")); - - document::FieldSetRepo repo; - - doPut(doc, bucketId, Timestamp(4567), 0); - flush(bucketId); - spi::GetResult reply(doGet(bucketId, - doc->getId(), - *repo.parse(*getTypeRepo(), "testdoctype1:hstringval"))); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, reply.getErrorCode()); - CPPUNIT_ASSERT(reply.hasDocument()); - CPPUNIT_ASSERT_EQUAL(std::string("hstringval: groovy\n"), - stringifyFields(reply.getDocument())); - CPPUNIT_ASSERT_EQUAL( - size_t(1), - getPersistenceProvider().getMetrics().headerOnlyGets.getValue()); -} - -void -BasicOperationHandlerTest::testRemove() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - - document::Document::SP doc = doPut(4, Timestamp(1)); - - CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, - doc->getId(), - Timestamp(2), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - - getPersistenceProvider().flush(makeSpiBucket(bucketId), context); - - env()._cache.clear(); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL)); - - CPPUNIT_ASSERT_EQUAL(Timestamp(2), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT((*file)[1].deleted()); - CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), (*file)[1].getLocation(BODY)); - CPPUNIT_ASSERT_EQUAL((*file)[0].getLocation(HEADER), - (*file)[1].getLocation(HEADER)); -} - -/** - * Test that removing a document with a max timestamp for which there - * is no matching document does not add a remove slot to the memfile - */ -void -BasicOperationHandlerTest::doTestRemoveWithNonMatchingTimestamp( - OperationHandler::RemoveType persistRemove) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - document::Document::SP doc = doPut(4, Timestamp(1234)); - - CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId, - doc->getId(), - Timestamp(1233), - persistRemove)); - - getPersistenceProvider().flush(makeSpiBucket(bucketId), context); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL( - uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE - ? 2 : 1), - file->getSlotCount()); - - int i = 0; - if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) { - CPPUNIT_ASSERT_EQUAL(Timestamp(1233), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT((*file)[0].deleted()); - CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), (*file)[0].getLocation(BODY)); - CPPUNIT_ASSERT((*file)[0].getLocation(HEADER) - != (*file)[1].getLocation(HEADER)); - CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[0])); - ++i; - } - - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[i].getTimestamp()); - CPPUNIT_ASSERT(!(*file)[i].deleted()); - CPPUNIT_ASSERT(file->getDocument((*file)[i], ALL)->getValue("content").get()); -} - -/** - * Test that removing a document with a max timestamp for which there - * is no matching document does not add a remove slot to the memfile - */ -void -BasicOperationHandlerTest::testRemoveWithNonMatchingTimestamp() -{ - doTestRemoveWithNonMatchingTimestamp( - OperationHandler::PERSIST_REMOVE_IF_FOUND); -} - -void -BasicOperationHandlerTest::testRemoveWithNonMatchingTimestampAlwaysPersist() -{ - doTestRemoveWithNonMatchingTimestamp( - OperationHandler::ALWAYS_PERSIST_REMOVE); -} - -/** - * Test that doing a remove with a timestamp for which there already - * exists a remove does not add another remove slot - */ -void -BasicOperationHandlerTest::testRemoveForExistingRemoveSameTimestamp() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - document::Document::SP doc = doPut(4, Timestamp(1234)); - - CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, - doc->getId(), - Timestamp(1235), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId, - doc->getId(), - Timestamp(1235), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - - getPersistenceProvider().flush(makeSpiBucket(bucketId), context); - - // Should only be one remove entry still - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); - - CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT((*file)[1].deleted()); -} - -void -BasicOperationHandlerTest::doTestRemoveForExistingRemoveNewTimestamp( - OperationHandler::RemoveType persistRemove) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - document::Document::SP doc = doPut(4, Timestamp(1234)); - - CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, - doc->getId(), - Timestamp(1235), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - CPPUNIT_ASSERT_EQUAL(false, doRemove(bucketId, - doc->getId(), - Timestamp(1236), - persistRemove)); - - getPersistenceProvider().flush(makeSpiBucket(bucketId), context); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL( - uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE - ? 3 : 2), - file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); - - CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT((*file)[1].deleted()); - - if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) { - CPPUNIT_ASSERT_EQUAL(Timestamp(1236), (*file)[2].getTimestamp()); - CPPUNIT_ASSERT((*file)[2].deleted()); - } -} - -/** - * Test that doing a second remove with a newer timestamp does not add - * another remove slot when PERSIST_REMOVE_IF_FOUND is specified - */ -void -BasicOperationHandlerTest::testRemoveForExistingRemoveNewTimestamp() -{ - doTestRemoveForExistingRemoveNewTimestamp( - OperationHandler::PERSIST_REMOVE_IF_FOUND); -} - -void -BasicOperationHandlerTest::testRemoveForExistingRemoveNewTimestampAlwaysPersist() -{ - doTestRemoveForExistingRemoveNewTimestamp( - OperationHandler::ALWAYS_PERSIST_REMOVE); -} - -/** - * Test removing an older version of a document. Older version should be removed - * in-place without attempting to add a new slot (which would fail). - */ -void -BasicOperationHandlerTest::testRemoveExistingOlderDocumentVersion() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - document::Document::SP doc = doPut(4, Timestamp(1234)); - - CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, - doc->getId(), - Timestamp(1235), - OperationHandler::ALWAYS_PERSIST_REMOVE)); - - getPersistenceProvider().flush(makeSpiBucket(bucketId), context); - - CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, - doc->getId(), - Timestamp(1234), - OperationHandler::ALWAYS_PERSIST_REMOVE)); - - getPersistenceProvider().flush(makeSpiBucket(bucketId), context); - - // Should now be two remove entries. - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[0])); - CPPUNIT_ASSERT((*file)[0].deleted()); - - CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT_EQUAL(doc->getId(), file->getDocumentId((*file)[1])); - CPPUNIT_ASSERT((*file)[1].deleted()); -} - -void -BasicOperationHandlerTest::doTestRemoveDocumentNotFound( - OperationHandler::RemoveType persistRemove) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - document::DocumentId docId("userdoc:test:4:0"); - doPut(4, Timestamp(1234)); - - CPPUNIT_ASSERT_EQUAL(false, - doRemove(bucketId, - docId, - Timestamp(1235), - persistRemove)); - - getPersistenceProvider().flush(makeSpiBucket(bucketId), context); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL( - uint32_t(persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE - ? 2 : 1), - file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); - if (persistRemove == OperationHandler::ALWAYS_PERSIST_REMOVE) { - CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT((*file)[1].deleted()); - CPPUNIT_ASSERT_EQUAL(docId, file->getDocumentId((*file)[1])); - } -/* TODO: Test this in service layer tests. - CPPUNIT_ASSERT_EQUAL( - uint64_t(1), - env()._metrics.remove[documentapi::LoadType::DEFAULT].notFound.getValue()); -*/ -} - -/** - * Test that removing a non-existing document when PERSIST_EXISTING_ONLY is - * specified does not add a remove entry - */ -void -BasicOperationHandlerTest::testRemoveDocumentNotFound() -{ - doTestRemoveDocumentNotFound( - OperationHandler::PERSIST_REMOVE_IF_FOUND); -} - -void -BasicOperationHandlerTest::testRemoveDocumentNotFoundAlwaysPersist() -{ - doTestRemoveDocumentNotFound( - OperationHandler::ALWAYS_PERSIST_REMOVE); -} - -void -BasicOperationHandlerTest::testPutSameTimestampAsRemove() -{ - document::BucketId bucketId(16, 4); - - document::Document::SP doc = doPut(4, Timestamp(1234)); - - CPPUNIT_ASSERT_EQUAL(true, doRemove(bucketId, - doc->getId(), - Timestamp(1235), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - - // Flush here to avoid put+remove being thrown away by duplicate timestamp - // exception evicting the cache and unpersisted changes. - flush(bucketId); - - doPut(4, Timestamp(1235)); - flush(bucketId); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); - - CPPUNIT_ASSERT_EQUAL(Timestamp(1235), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT((*file)[1].deleted()); -} - -/** - * Test that updating body results in a new memfile slot containing - * an updated document - */ -void -BasicOperationHandlerTest::testUpdateBody() -{ - document::BucketId bucketId(16, 4); - document::StringFieldValue updateValue("foo"); - document::Document::SP doc = doPut(4, Timestamp(1234)); - document::Document originalDoc(*doc); - - document::DocumentUpdate::SP update = createBodyUpdate( - doc->getId(), updateValue); - - spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(5678)); - flush(bucketId); - CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp()); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("content").get()); - CPPUNIT_ASSERT_EQUAL(*(originalDoc.getValue("content")), - *file->getDocument((*file)[0], ALL)->getValue("content")); - - CPPUNIT_ASSERT_EQUAL(Timestamp(5678), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT(file->getDocument((*file)[1], ALL)->getValue("content").get()); - CPPUNIT_ASSERT_EQUAL(updateValue, - dynamic_cast<document::StringFieldValue&>( - *file->getDocument((*file)[1], ALL)->getValue( - "content"))); - CPPUNIT_ASSERT_EQUAL( - size_t(0), - getPersistenceProvider().getMetrics().headerOnlyUpdates.getValue()); -} - -void -BasicOperationHandlerTest::testUpdateHeaderOnly() -{ - document::BucketId bucketId(16, 4); - document::IntFieldValue updateValue(42); - document::Document::SP doc = doPut(4, Timestamp(1234)); - - document::DocumentUpdate::SP update = createHeaderUpdate( - doc->getId(), updateValue); - - spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(5678)); - flush(bucketId); - CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp()); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1234), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT(file->getDocument((*file)[0], ALL)->getValue("headerval").get() == - NULL); - - CPPUNIT_ASSERT_EQUAL(Timestamp(5678), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT(file->getDocument((*file)[1], ALL)->getValue("headerval").get()); - CPPUNIT_ASSERT_EQUAL(updateValue, - dynamic_cast<document::IntFieldValue&>( - *file->getDocument((*file)[1], ALL)->getValue( - "headerval"))); - CPPUNIT_ASSERT_EQUAL( - size_t(1), - getPersistenceProvider().getMetrics().headerOnlyUpdates.getValue()); -} - -void -BasicOperationHandlerTest::testUpdateTimestampExists() -{ - document::BucketId bucketId(16, 4); - document::IntFieldValue updateValue(42); - document::Document::SP doc = doPut(4, Timestamp(1234)); - - document::DocumentUpdate::SP update = createHeaderUpdate( - doc->getId(), updateValue); - - spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(1234)); - flush(bucketId); - CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, result.getErrorCode()); -} - -void -BasicOperationHandlerTest::testUpdateForNonExistentDocWillFail() -{ - document::BucketId bucketId(16, 4); - document::IntFieldValue updateValue(42); - Timestamp timestamp(5678); - - // Is there an easier way to get a DocumentId? - document::Document::UP doc( - createRandomDocumentAtLocation(4, timestamp.getTime())); - const DocumentId& documentId = doc->getId(); - - document::DocumentUpdate::SP update = createHeaderUpdate( - documentId, updateValue); - - spi::UpdateResult result = doUpdate(bucketId, update, timestamp); - flush(bucketId); - CPPUNIT_ASSERT_EQUAL(0, (int)result.getExistingTimestamp()); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(0), file->getSlotCount()); -} - -void -BasicOperationHandlerTest::testUpdateMayCreateDoc() -{ - document::BucketId bucketId(16, 4); - document::IntFieldValue updateValue(42); - Timestamp timestamp(5678); - - // Is there an easier way to get a DocumentId? - document::Document::UP doc( - createRandomDocumentAtLocation(4, timestamp.getTime())); - const DocumentId& documentId = doc->getId(); - - document::DocumentUpdate::SP update = createHeaderUpdate( - documentId, updateValue); - update->setCreateIfNonExistent(true); - - spi::UpdateResult result = doUpdate(bucketId, update, timestamp); - flush(bucketId); - CPPUNIT_ASSERT_EQUAL(timestamp.getTime(), - (uint64_t)result.getExistingTimestamp()); - - MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(timestamp, (*file)[0].getTimestamp()); - - auto headerval = file->getDocument((*file)[0], ALL)->getValue("headerval"); - CPPUNIT_ASSERT(headerval.get() != nullptr); - CPPUNIT_ASSERT_EQUAL(updateValue, - dynamic_cast<document::IntFieldValue&>(*headerval)); -} - -void -BasicOperationHandlerTest::testRemoveEntry() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - - doPut(4, Timestamp(1234)); - Document::SP doc = doPut(4, Timestamp(2345)); - doPut(4, Timestamp(3456)); - - getPersistenceProvider().removeEntry(makeSpiBucket(bucketId), spi::Timestamp(1234), context); - getPersistenceProvider().removeEntry(makeSpiBucket(bucketId), spi::Timestamp(3456), context); - flush(bucketId); - - memfile::MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(2345), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL)); -} - -void -BasicOperationHandlerTest::setupTestConfig() -{ - using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig; - using MemFileConfigBuilder - = vespa::config::storage::StorMemfilepersistenceConfigBuilder; - MemFileConfigBuilder builder( - *env().acquireConfigReadLock().memFilePersistenceConfig()); - builder.minimumFileMetaSlots = 2; - builder.minimumFileHeaderBlockSize = 3000; - auto newConfig = std::unique_ptr<MemFileConfig>(new MemFileConfig(builder)); - env().acquireConfigWriteLock().setMemFilePersistenceConfig( - std::move(newConfig)); -} - -void -BasicOperationHandlerTest::testEraseFromCacheOnFlushException() -{ - document::BucketId bucketId(16, 4); - - setupTestConfig(); - - document::Document::SP doc( - createRandomDocumentAtLocation(4, 2345, 1024, 1024)); - doPut(doc, bucketId, Timestamp(2345)); - flush(bucketId); - // Must throw out cache to re-create lazyfile - env()._cache.clear(); - - env()._lazyFileFactory = - std::unique_ptr<Environment::LazyFileFactory>( - new SimulatedFailureLazyFile::Factory); - - // Try partial write, followed by full rewrite - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < i+1; ++j) { - document::Document::SP doc2( - createRandomDocumentAtLocation(4, 4000 + j, 1500, 1500)); - doPut(doc2, bucketId, Timestamp(4000 + j)); - } - spi::Result result = flush(bucketId); - CPPUNIT_ASSERT(result.hasError()); - CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") - != vespalib::string::npos); - - CPPUNIT_ASSERT(!env()._cache.contains(bucketId)); - - // Check that we still have first persisted put - memfile::MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(2345), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT_EQUAL(*doc, *file->getDocument((*file)[0], ALL)); - } -} - -void -BasicOperationHandlerTest::testEraseFromCacheOnMaintainException() -{ - document::BucketId bucketId(16, 4); - - setupTestConfig(); - - getFakeClock()._absoluteTime = framework::MicroSecTime(2000 * 1000000); - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options) - .revertTimePeriod(framework::MicroSecTime(100000ULL * 1000000)) - .build()); - // Put a doc twice to allow for revert time compaction to be done - document::Document::SP doc1( - createRandomDocumentAtLocation(4, 2345, 1024, 1024)); - document::Document::SP doc2( - createRandomDocumentAtLocation(4, 2345, 1024, 1024)); - doPut(doc1, bucketId, Timestamp(1000 * 1000000)); - doPut(doc2, bucketId, Timestamp(1500 * 1000000)); - flush(bucketId); - env()._cache.clear(); - - options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options) - .revertTimePeriod(framework::MicroSecTime(100ULL * 1000000)) - .build()); - - env()._lazyFileFactory = - std::unique_ptr<Environment::LazyFileFactory>( - new SimulatedFailureLazyFile::Factory); - - spi::Result result = getPersistenceProvider().maintain(makeSpiBucket(bucketId), spi::HIGH); - CPPUNIT_ASSERT(result.hasError()); - CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") - != vespalib::string::npos); - - CPPUNIT_ASSERT(!env()._cache.contains(bucketId)); - - // Check that we still have both persisted puts - memfile::MemFilePtr file(getMemFile(bucketId)); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1000 * 1000000), (*file)[0].getTimestamp()); - CPPUNIT_ASSERT_EQUAL(*doc1, *file->getDocument((*file)[0], ALL)); - CPPUNIT_ASSERT_EQUAL(Timestamp(1500 * 1000000), (*file)[1].getTimestamp()); - CPPUNIT_ASSERT_EQUAL(*doc2, *file->getDocument((*file)[1], ALL)); -} - -void -BasicOperationHandlerTest::testEraseFromCacheOnDeleteBucketException() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - document::Document::SP doc( - createRandomDocumentAtLocation(4, 2345, 1024, 1024)); - doPut(doc, bucketId, Timestamp(2345)); - flush(bucketId); - env()._cache.clear(); - - SimulatedFailureLazyFile::Factory* factory( - new SimulatedFailureLazyFile::Factory); - factory->setReadOpsBeforeFailure(0); - env()._lazyFileFactory = - std::unique_ptr<Environment::LazyFileFactory>(factory); - - // loadFile will fail - spi::Result result = getPersistenceProvider().deleteBucket(makeSpiBucket(bucketId), context); - CPPUNIT_ASSERT(result.hasError()); - CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O read") - != vespalib::string::npos); - - CPPUNIT_ASSERT(!env()._cache.contains(bucketId)); - -} - -void BasicOperationHandlerTest::list_buckets_returns_empty_set_for_non_default_bucketspace() { - document::BucketId bucket(16, 4); - doPut(createRandomDocumentAtLocation(4), bucket, Timestamp(4567), 0); - flush(bucket); - - auto buckets = getPersistenceProvider().listBuckets(document::FixedBucketSpaces::global_space(), spi::PartitionId(0)); - CPPUNIT_ASSERT_EQUAL(size_t(0), buckets.getList().size()); -} - -void BasicOperationHandlerTest::get_modified_buckets_returns_empty_set_for_non_default_bucketspace() { - env().addModifiedBucket(document::BucketId(16, 1234)); - auto buckets = getPersistenceProvider().getModifiedBuckets(document::FixedBucketSpaces::global_space()); - CPPUNIT_ASSERT_EQUAL(size_t(0), buckets.getList().size()); -} - -} - -} diff --git a/memfilepersistence/src/tests/spi/buffer_test.cpp b/memfilepersistence/src/tests/spi/buffer_test.cpp deleted file mode 100644 index bb31577bf17..00000000000 --- a/memfilepersistence/src/tests/spi/buffer_test.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/memfilepersistence/mapper/buffer.h> - -namespace storage { -namespace memfile { - -class BufferTest : public CppUnit::TestFixture -{ -public: - void getSizeReturnsInitiallyAllocatedSize(); - void getSizeReturnsUnAlignedSizeForMMappedAllocs(); - void resizeRetainsExistingDataWhenSizingUp(); - void resizeRetainsExistingDataWhenSizingDown(); - void bufferAddressIs512ByteAligned(); - - CPPUNIT_TEST_SUITE(BufferTest); - CPPUNIT_TEST(getSizeReturnsInitiallyAllocatedSize); - CPPUNIT_TEST(getSizeReturnsUnAlignedSizeForMMappedAllocs); - CPPUNIT_TEST(resizeRetainsExistingDataWhenSizingUp); - CPPUNIT_TEST(resizeRetainsExistingDataWhenSizingDown); - CPPUNIT_TEST(bufferAddressIs512ByteAligned); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(BufferTest); - -void -BufferTest::getSizeReturnsInitiallyAllocatedSize() -{ - Buffer buf(1234); - CPPUNIT_ASSERT_EQUAL(size_t(1234), buf.getSize()); -} - -void -BufferTest::getSizeReturnsUnAlignedSizeForMMappedAllocs() -{ - Buffer buf(vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE + 1); - CPPUNIT_ASSERT_EQUAL(size_t(vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE + 1), buf.getSize()); -} - -void -BufferTest::resizeRetainsExistingDataWhenSizingUp() -{ - std::string src = "hello world"; - Buffer buf(src.size()); - memcpy(buf.getBuffer(), src.data(), src.size()); - buf.resize(src.size() * 2); - CPPUNIT_ASSERT_EQUAL(src.size() * 2, buf.getSize()); - CPPUNIT_ASSERT_EQUAL(0, memcmp(buf.getBuffer(), src.data(), src.size())); -} - -void -BufferTest::resizeRetainsExistingDataWhenSizingDown() -{ - std::string src = "hello world"; - Buffer buf(src.size()); - memcpy(buf.getBuffer(), src.data(), src.size()); - buf.resize(src.size() / 2); - CPPUNIT_ASSERT_EQUAL(src.size() / 2, buf.getSize()); - CPPUNIT_ASSERT_EQUAL(0, memcmp(buf.getBuffer(), src.data(), src.size() / 2)); -} - -void -BufferTest::bufferAddressIs512ByteAligned() -{ - Buffer buf(32); - CPPUNIT_ASSERT(reinterpret_cast<size_t>(buf.getBuffer()) % 512 == 0); -} - -} // memfile -} // storage - diff --git a/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp b/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp deleted file mode 100644 index 36270335fda..00000000000 --- a/memfilepersistence/src/tests/spi/buffered_file_writer_test.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/memfilepersistence/mapper/bufferedfilewriter.h> -#include <vespa/memfilepersistence/mapper/buffer.h> -#include <vespa/vespalib/io/fileutil.h> - -namespace storage { -namespace memfile { - -class BufferedFileWriterTest : public CppUnit::TestFixture -{ -public: - void noImplicitFlushingWhenDestructing(); - - CPPUNIT_TEST_SUITE(BufferedFileWriterTest); - CPPUNIT_TEST(noImplicitFlushingWhenDestructing); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(BufferedFileWriterTest); - -namespace { - -// Partial mock of vespalib::File. Unfortunately, there's currently no -// base interface to implement so have to override a class that already has -// implementation code present. -class MockFile : public vespalib::File -{ -public: - bool _didWrite; - - MockFile(const std::string& filename) - : File(filename), - _didWrite(false) - { - } - - void open(int flags, bool autoCreateDirectories) override { - (void) flags; - (void) autoCreateDirectories; - // Don't do anything here to prevent us from actually opening a file - // on disk. - } - - off_t write(const void *buf, size_t bufsize, off_t offset) override { - (void) buf; - (void) bufsize; - (void) offset; - _didWrite = true; - return 0; - } -}; - -} - -void -BufferedFileWriterTest::noImplicitFlushingWhenDestructing() -{ - MockFile file("foo"); - { - Buffer buffer(1024); - BufferedFileWriter writer(file, buffer, buffer.getSize()); - // Do a buffered write. This fits well within the buffer and should - // consequently not be immediately written out to the backing file. - writer.write("blarg", 5); - // Escape scope without having flushed anything. - } - // Since BufferedFileWriter is meant to be used with O_DIRECT files, - // flushing just implies writing rather than syncing (this is a half truth - // since you still sync directories etc to ensure metadata is written, but - // this constrained assumption works fine in the context of this test). - CPPUNIT_ASSERT(!file._didWrite); -} - -} // memfile -} // storage - diff --git a/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp b/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp deleted file mode 100644 index 418ead076fd..00000000000 --- a/memfilepersistence/src/tests/spi/iteratorhandlertest.cpp +++ /dev/null @@ -1,929 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> -#include <tests/spi/memfiletestutils.h> -#include <tests/spi/simulatedfailurefile.h> -#include <tests/spi/options_builder.h> -#include <vespa/persistence/spi/test.h> -#include <vespa/document/select/parser.h> - -using storage::spi::test::makeSpiBucket; - -namespace storage { -namespace memfile { -namespace { - spi::LoadType defaultLoadType(0, "default"); -} - -class IteratorHandlerTest : public SingleDiskMemFileTestUtils -{ - CPPUNIT_TEST_SUITE(IteratorHandlerTest); - CPPUNIT_TEST(testCreateIterator); - CPPUNIT_TEST(testSomeSlotsRemovedBetweenInvocations); - CPPUNIT_TEST(testAllSlotsRemovedBetweenInvocations); - CPPUNIT_TEST(testIterateMetadataOnly); - CPPUNIT_TEST(testIterateHeadersOnly); - CPPUNIT_TEST(testIterateLargeDocument); - CPPUNIT_TEST(testDocumentsRemovedBetweenInvocations); - CPPUNIT_TEST(testUnrevertableRemoveBetweenInvocations); - CPPUNIT_TEST(testUnrevertableRemoveBetweenInvocationsIncludeRemoves); - CPPUNIT_TEST(testMatchTimestampRangeDocAltered); - CPPUNIT_TEST(testIterateAllVersions); - CPPUNIT_TEST(testFieldSetFiltering); - CPPUNIT_TEST(testIteratorInactiveOnException); - CPPUNIT_TEST(testDocsCachedBeforeDocumentSelection); - CPPUNIT_TEST(testTimestampRangeLimitedPrefetch); - CPPUNIT_TEST(testCachePrefetchRequirements); - CPPUNIT_TEST(testBucketEvictedFromCacheOnIterateException); - CPPUNIT_TEST_SUITE_END(); - -public: - void testCreateIterator(); - void testSomeSlotsRemovedBetweenInvocations(); - void testAllSlotsRemovedBetweenInvocations(); - void testIterateMetadataOnly(); - void testIterateHeadersOnly(); - void testIterateLargeDocument(); - void testDocumentsRemovedBetweenInvocations(); - void testUnrevertableRemoveBetweenInvocations(); - void testUnrevertableRemoveBetweenInvocationsIncludeRemoves(); - void testMatchTimestampRangeDocAltered(); - void testIterateAllVersions(); - void testFieldSetFiltering(); - void testIteratorInactiveOnException(); - void testDocsCachedBeforeDocumentSelection(); - void testTimestampRangeLimitedPrefetch(); - void testCachePrefetchRequirements(); - void testBucketEvictedFromCacheOnIterateException(); - - void setUp() override; - void tearDown() override; - - struct Chunk - { - std::vector<spi::DocEntry::UP> _entries; - }; - -private: - spi::Selection createSelection(const std::string& docSel) const; - - - spi::CreateIteratorResult create( - const spi::Bucket& b, - const spi::Selection& sel, - spi::IncludedVersions versions = spi::NEWEST_DOCUMENT_ONLY, - const document::FieldSet& fieldSet = document::AllFields()) - { - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - return getPersistenceProvider().createIterator(b, fieldSet, sel, - versions, context); - } - - typedef std::pair<Document::SP, spi::Timestamp> DocAndTimestamp; - - std::vector<DocAndTimestamp> feedDocs(size_t numDocs, - uint32_t minSize = 110, - uint32_t maxSize = 110); - - std::vector<Chunk> doIterate(spi::IteratorId id, - uint64_t maxByteSize, - size_t maxChunks = 0, - bool allowEmptyResult = false); - - void verifyDocs(const std::vector<DocAndTimestamp>& wanted, - const std::vector<IteratorHandlerTest::Chunk>& chunks, - const std::set<vespalib::string>& removes - = std::set<vespalib::string>()) const; - - void doTestUnrevertableRemoveBetweenInvocations(bool includeRemoves); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(IteratorHandlerTest); - -void -IteratorHandlerTest::setUp() -{ - SingleDiskMemFileTestUtils::setUp(); -} - -void -IteratorHandlerTest::tearDown() -{ - SingleDiskMemFileTestUtils::tearDown(); -} - -spi::Selection -IteratorHandlerTest::createSelection(const std::string& docSel) const -{ - return spi::Selection(spi::DocumentSelection(docSel)); -} - -void -IteratorHandlerTest::testCreateIterator() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 1234))); - - spi::CreateIteratorResult iter1(create(b, createSelection("true"))); - CPPUNIT_ASSERT_EQUAL(spi::IteratorId(1), iter1.getIteratorId()); - - spi::CreateIteratorResult iter2(create(b, createSelection("true"))); - CPPUNIT_ASSERT_EQUAL(spi::IteratorId(2), iter2.getIteratorId()); -} - -std::vector<IteratorHandlerTest::Chunk> -IteratorHandlerTest::doIterate(spi::IteratorId id, - uint64_t maxByteSize, - size_t maxChunks, - bool allowEmptyResult) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - std::vector<Chunk> chunks; - - while (true) { - spi::IterateResult result(getPersistenceProvider().iterate( - id, maxByteSize, context)); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - CPPUNIT_ASSERT(result.getEntries().size() > 0 || allowEmptyResult); - - chunks.push_back(Chunk{result.steal_entries()}); - if (result.isCompleted() - || (maxChunks != 0 && chunks.size() >= maxChunks)) - { - break; - } - } - return chunks; -} - -namespace { - -size_t -getDocCount(const std::vector<IteratorHandlerTest::Chunk>& chunks) -{ - size_t count = 0; - for (size_t i=0; i<chunks.size(); ++i) { - count += chunks[i]._entries.size(); - } - return count; -} - -size_t -getRemoveEntryCount(const std::vector<spi::DocEntry::UP>& entries) -{ - size_t ret = 0; - for (size_t i = 0; i < entries.size(); ++i) { - if (entries[i]->isRemove()) { - ++ret; - } - } - return ret; -} - -struct DocEntryIndirectTimestampComparator -{ - bool operator()(const spi::DocEntry::UP& e1, - const spi::DocEntry::UP& e2) const - { - return e1->getTimestamp() < e2->getTimestamp(); - } -}; - -std::vector<spi::DocEntry::UP> -getEntriesFromChunks(const std::vector<IteratorHandlerTest::Chunk>& chunks) -{ - std::vector<spi::DocEntry::UP> ret; - for (size_t chunk = 0; chunk < chunks.size(); ++chunk) { - for (size_t i = 0; i < chunks[chunk]._entries.size(); ++i) { - ret.push_back(spi::DocEntry::UP(chunks[chunk]._entries[i]->clone())); - } - } - std::sort(ret.begin(), - ret.end(), - DocEntryIndirectTimestampComparator()); - return ret; -} - -const vespalib::LazyFile& -getFileHandle(const MemFile& mf1) -{ - return static_cast<const SimpleMemFileIOBuffer&>( - mf1.getMemFileIO()).getFileHandle(); -} - -const LoggingLazyFile& -getLoggerFile(const MemFile& file) -{ - return dynamic_cast<const LoggingLazyFile&>(getFileHandle(file)); -} - -} - -void -IteratorHandlerTest::verifyDocs(const std::vector<DocAndTimestamp>& wanted, - const std::vector<IteratorHandlerTest::Chunk>& chunks, - const std::set<vespalib::string>& removes) const -{ - std::vector<spi::DocEntry::UP> retrieved( - getEntriesFromChunks(chunks)); - size_t removeCount = getRemoveEntryCount(retrieved); - // Ensure that we've got the correct number of puts and removes - CPPUNIT_ASSERT_EQUAL(removes.size(), removeCount); - CPPUNIT_ASSERT_EQUAL(wanted.size(), retrieved.size() - removeCount); - - size_t wantedIdx = 0; - for (size_t i = 0; i < retrieved.size(); ++i) { - spi::DocEntry& entry(*retrieved[i]); - if (entry.getDocument() != 0) { - if (!(*wanted[wantedIdx].first == *entry.getDocument())) { - std::ostringstream ss; - ss << "Documents differ! Wanted:\n" - << wanted[wantedIdx].first->toString(true) - << "\n\nGot:\n" - << entry.getDocument()->toString(true); - CPPUNIT_FAIL(ss.str()); - } - CPPUNIT_ASSERT_EQUAL(wanted[wantedIdx].second, entry.getTimestamp()); - CPPUNIT_ASSERT_EQUAL(wanted[wantedIdx].first->serialize()->getLength() - + sizeof(spi::DocEntry), - size_t(entry.getSize())); - ++wantedIdx; - } else { - // Remove-entry - CPPUNIT_ASSERT(entry.getDocumentId() != 0); - CPPUNIT_ASSERT_EQUAL(entry.getDocumentId()->getSerializedSize() - + sizeof(spi::DocEntry), - size_t(entry.getSize())); - if (removes.find(entry.getDocumentId()->toString()) == removes.end()) { - std::ostringstream ss; - ss << "Got unexpected remove entry for document id " - << *entry.getDocumentId(); - CPPUNIT_FAIL(ss.str()); - } - } - } -} - -// Feed numDocs documents, starting from timestamp 1000 -std::vector<IteratorHandlerTest::DocAndTimestamp> -IteratorHandlerTest::feedDocs(size_t numDocs, - uint32_t minSize, - uint32_t maxSize) -{ - std::vector<DocAndTimestamp> docs; - for (uint32_t i = 0; i < numDocs; ++i) { - docs.push_back( - DocAndTimestamp( - doPut(4, - framework::MicroSecTime(1000 + i), - minSize, - maxSize), - spi::Timestamp(1000 + i))); - } - flush(document::BucketId(16, 4)); - return docs; -} - -void -IteratorHandlerTest::testSomeSlotsRemovedBetweenInvocations() -{ - std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096); - - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - spi::Selection sel(createSelection("true")); - - spi::CreateIteratorResult iter(create(b, sel)); - CPPUNIT_ASSERT(env()._cache.contains(b.getBucketId())); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 10000, 25); - CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); - - { - MemFilePtr file(getMemFile(b.getBucketId())); - - for (int i = 0 ; i < 2; ++i) { - const MemSlot* slot = file->getSlotWithId(docs.front().first->getId()); - CPPUNIT_ASSERT(slot != 0); - file->removeSlot(*slot); - docs.erase(docs.begin()); - } - file->flushToDisk(); - } - - std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 10000); - CPPUNIT_ASSERT_EQUAL(size_t(24), chunks2.size()); - std::move(chunks2.begin(), chunks2.end(), std::back_inserter(chunks)); - - verifyDocs(docs, chunks); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - - // Bucket should not be evicted from cache during normal operation. - CPPUNIT_ASSERT(env()._cache.contains(b.getBucketId())); -} - -void -IteratorHandlerTest::testAllSlotsRemovedBetweenInvocations() -{ - std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096); - - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - spi::Selection sel(createSelection("true")); - - spi::CreateIteratorResult iter(create(b, sel)); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25); - CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); - - { - MemFilePtr file(getMemFile(b.getBucketId())); - - for (int i = 0 ; i < 75; ++i) { - const MemSlot* slot = file->getSlotWithId(docs[i].first->getId()); - CPPUNIT_ASSERT(slot != 0); - file->removeSlot(*slot); - } - file->flushToDisk(); - docs.erase(docs.begin(), docs.begin() + 75); - } - - std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1, 0, true); - CPPUNIT_ASSERT_EQUAL(size_t(0), getDocCount(chunks2)); - verifyDocs(docs, chunks); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::testIterateMetadataOnly() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - std::vector<DocAndTimestamp> docs = feedDocs(10); - - CPPUNIT_ASSERT( - doUnrevertableRemove(b.getBucketId(), - docs[docs.size() - 2].first->getId(), - Timestamp(1008))); - - CPPUNIT_ASSERT( - doRemove(b.getBucketId(), - docs[docs.size() - 1].first->getId(), - framework::MicroSecTime(3001), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - - flush(b.getBucketId()); - - spi::Selection sel(createSelection("true")); - spi::CreateIteratorResult iter( - create(b, sel, spi::NEWEST_DOCUMENT_OR_REMOVE, document::NoFields())); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); - std::vector<spi::DocEntry::UP> entries = getEntriesFromChunks(chunks); - CPPUNIT_ASSERT_EQUAL(docs.size(), entries.size()); - std::vector<DocAndTimestamp>::const_iterator docIter( - docs.begin()); - for (size_t i = 0; i < entries.size(); ++i, ++docIter) { - const spi::DocEntry& entry = *entries[i]; - - CPPUNIT_ASSERT(entry.getDocument() == 0); - CPPUNIT_ASSERT(entry.getDocumentId() == 0); - if (i == 9) { - CPPUNIT_ASSERT(entry.isRemove()); - CPPUNIT_ASSERT_EQUAL(spi::Timestamp(3001), entry.getTimestamp()); - } else if (i == 8) { - CPPUNIT_ASSERT(entry.isRemove()); - CPPUNIT_ASSERT_EQUAL(spi::Timestamp(1008), entry.getTimestamp()); - } else { - CPPUNIT_ASSERT(!entry.isRemove()); - CPPUNIT_ASSERT_EQUAL(docIter->second, entry.getTimestamp()); - } - } - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::testIterateHeadersOnly() -{ - std::vector<DocAndTimestamp> docs = feedDocs(20); - // Remove all bodies. - for (size_t i = 0; i < docs.size(); ++i) { - clearBody(*docs[i].first); - } - - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - spi::Selection sel(createSelection("true")); - - spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY, - document::HeaderFields())); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1024); - verifyDocs(docs, chunks); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::testIterateLargeDocument() -{ - std::vector<DocAndTimestamp> docs = feedDocs(10, 10000, 10000); - std::vector<DocAndTimestamp> largedoc; - largedoc.push_back(docs.back()); - - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - spi::Selection sel(createSelection("true")); - - spi::CreateIteratorResult iter(create(b, sel)); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 100, 1); - verifyDocs(largedoc, chunks); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::testDocumentsRemovedBetweenInvocations() -{ - int docCount = 100; - std::vector<DocAndTimestamp> docs = feedDocs(docCount); - - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - spi::Selection sel(createSelection("true")); - - spi::CreateIteratorResult iter(create(b, sel)); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25); - CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); - - // Remove a subset of the documents. We should still get all the - // original documents from the iterator, assuming no compactions. - std::vector<DocumentId> removedDocs; - std::vector<DocAndTimestamp> nonRemovedDocs; - for (int i = 0; i < docCount; ++i) { - if (i % 3 == 0) { - removedDocs.push_back(docs[i].first->getId()); - CPPUNIT_ASSERT(doRemove(b.getBucketId(), - removedDocs.back(), - framework::MicroSecTime(2000 + i), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - } else { - nonRemovedDocs.push_back(docs[i]); - } - } - flush(b.getBucketId()); - - std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1); - CPPUNIT_ASSERT_EQUAL(size_t(75), chunks2.size()); - std::move(chunks2.begin(), chunks2.end(), std::back_inserter(chunks)); - - verifyDocs(docs, chunks); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::doTestUnrevertableRemoveBetweenInvocations(bool includeRemoves) -{ - int docCount = 100; - std::vector<DocAndTimestamp> docs = feedDocs(docCount); - - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - spi::Selection sel(createSelection("true")); - spi::CreateIteratorResult iter( - create(b, sel, - includeRemoves ? - spi::NEWEST_DOCUMENT_OR_REMOVE : spi::NEWEST_DOCUMENT_ONLY)); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 1, 25); - CPPUNIT_ASSERT_EQUAL(size_t(25), chunks.size()); - - // Remove a subset of the documents unrevertably. - std::vector<DocumentId> removedDocs; - std::vector<DocAndTimestamp> nonRemovedDocs; - for (int i = 0; i < docCount - 25; ++i) { - if (i < 10) { - removedDocs.push_back(docs[i].first->getId()); - CPPUNIT_ASSERT( - doUnrevertableRemove(b.getBucketId(), - removedDocs.back(), - Timestamp(1000+i))); - } else { - nonRemovedDocs.push_back(docs[i]); - } - } - flush(b.getBucketId()); - - std::vector<Chunk> chunks2 = doIterate(iter.getIteratorId(), 1); - std::vector<spi::DocEntry::UP> entries = getEntriesFromChunks(chunks2); - if (!includeRemoves) { - CPPUNIT_ASSERT_EQUAL(nonRemovedDocs.size(), chunks2.size()); - verifyDocs(nonRemovedDocs, chunks2); - } else { - CPPUNIT_ASSERT_EQUAL(size_t(75), entries.size()); - for (int i = 0; i < docCount - 25; ++i) { - spi::DocEntry& entry(*entries[i]); - if (i < 10) { - CPPUNIT_ASSERT(entry.isRemove()); - } else { - CPPUNIT_ASSERT(!entry.isRemove()); - } - } - } - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::testUnrevertableRemoveBetweenInvocations() -{ - doTestUnrevertableRemoveBetweenInvocations(false); -} - -void -IteratorHandlerTest::testUnrevertableRemoveBetweenInvocationsIncludeRemoves() -{ - doTestUnrevertableRemoveBetweenInvocations(true); -} - -void -IteratorHandlerTest::testMatchTimestampRangeDocAltered() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucketId(16, 4); - document::StringFieldValue updateValue1("update1"); - document::StringFieldValue updateValue2("update2"); - - Document::SP originalDoc = doPut(4, Timestamp(1234)); - - { - document::DocumentUpdate::SP update = createBodyUpdate( - originalDoc->getId(), updateValue1); - - spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(2345)); - CPPUNIT_ASSERT_EQUAL(1234, (int)result.getExistingTimestamp()); - } - - { - document::DocumentUpdate::SP update = createBodyUpdate( - originalDoc->getId(), updateValue2); - - spi::UpdateResult result = doUpdate(bucketId, update, Timestamp(3456)); - CPPUNIT_ASSERT_EQUAL(2345, (int)result.getExistingTimestamp()); - } - - CPPUNIT_ASSERT( - doRemove(bucketId, - originalDoc->getId(), - Timestamp(4567), - OperationHandler::PERSIST_REMOVE_IF_FOUND)); - flush(bucketId); - - spi::Bucket b(makeSpiBucket(bucketId)); - - { - spi::Selection sel(createSelection("true")); - sel.setFromTimestamp(spi::Timestamp(0)); - sel.setToTimestamp(spi::Timestamp(10)); - spi::CreateIteratorResult iter(create(b, sel)); - - spi::IterateResult result(getPersistenceProvider().iterate( - iter.getIteratorId(), 4096, context)); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - CPPUNIT_ASSERT_EQUAL(size_t(0), result.getEntries().size()); - CPPUNIT_ASSERT(result.isCompleted()); - - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - } - - { - spi::Selection sel(createSelection("true")); - sel.setFromTimestamp(spi::Timestamp(10000)); - sel.setToTimestamp(spi::Timestamp(20000)); - spi::CreateIteratorResult iter(create(b, sel)); - - spi::IterateResult result(getPersistenceProvider().iterate( - iter.getIteratorId(), 4096, context)); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - CPPUNIT_ASSERT_EQUAL(size_t(0), result.getEntries().size()); - CPPUNIT_ASSERT(result.isCompleted()); - - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - } - - { - spi::Selection sel(createSelection("true")); - sel.setFromTimestamp(spi::Timestamp(0)); - sel.setToTimestamp(spi::Timestamp(1234)); - spi::CreateIteratorResult iter(create(b, sel)); - - spi::IterateResult result(getPersistenceProvider().iterate( - iter.getIteratorId(), 4096, context)); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size()); - CPPUNIT_ASSERT(result.isCompleted()); - - const Document& receivedDoc(*result.getEntries()[0]->getDocument()); - if (!(*originalDoc == receivedDoc)) { - std::ostringstream ss; - ss << "Documents differ! Wanted:\n" - << originalDoc->toString(true) - << "\n\nGot:\n" - << receivedDoc.toString(true); - CPPUNIT_FAIL(ss.str()); - } - - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - } - - { - spi::Selection sel(createSelection("true")); - sel.setFromTimestamp(spi::Timestamp(0)); - sel.setToTimestamp(spi::Timestamp(2345)); - spi::CreateIteratorResult iter(create(b, sel)); - - spi::IterateResult result(getPersistenceProvider().iterate( - iter.getIteratorId(), 4096, context)); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size()); - CPPUNIT_ASSERT(result.isCompleted()); - - const Document& receivedDoc(*result.getEntries()[0]->getDocument()); - CPPUNIT_ASSERT(receivedDoc.getValue("content").get()); - CPPUNIT_ASSERT_EQUAL(updateValue1, - dynamic_cast<document::StringFieldValue&>( - *receivedDoc.getValue( - "content"))); - - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - } - - { - spi::Selection sel(createSelection("true")); - sel.setFromTimestamp(spi::Timestamp(0)); - sel.setToTimestamp(spi::Timestamp(3456)); - spi::CreateIteratorResult iter(create(b, sel)); - - spi::IterateResult result(getPersistenceProvider().iterate( - iter.getIteratorId(), 4096, context)); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - CPPUNIT_ASSERT_EQUAL(size_t(1), result.getEntries().size()); - CPPUNIT_ASSERT(result.isCompleted()); - - const Document& receivedDoc(*result.getEntries()[0]->getDocument()); - CPPUNIT_ASSERT(receivedDoc.getValue("content").get()); - CPPUNIT_ASSERT_EQUAL(updateValue2, - dynamic_cast<document::StringFieldValue&>( - *receivedDoc.getValue( - "content"))); - - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - } -} - -void -IteratorHandlerTest::testIterateAllVersions() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - std::vector<DocAndTimestamp> docs; - - Document::SP originalDoc(createRandomDocumentAtLocation( - 4, 1001, 110, 110)); - - doPut(originalDoc, framework::MicroSecTime(1001), 0); - - document::StringFieldValue updateValue1("update1"); - { - document::DocumentUpdate::SP update = createBodyUpdate( - originalDoc->getId(), updateValue1); - - spi::UpdateResult result = doUpdate(b.getBucketId(), update, Timestamp(2345)); - CPPUNIT_ASSERT_EQUAL(1001, (int)result.getExistingTimestamp()); - } - flush(b.getBucketId()); - - Document::SP updatedDoc(new Document(*originalDoc)); - updatedDoc->setValue("content", document::StringFieldValue("update1")); - docs.push_back(DocAndTimestamp(originalDoc, spi::Timestamp(1001))); - docs.push_back(DocAndTimestamp(updatedDoc, spi::Timestamp(2345))); - - spi::Selection sel(createSelection("true")); - spi::CreateIteratorResult iter(create(b, sel, spi::ALL_VERSIONS)); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); - verifyDocs(docs, chunks); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::testFieldSetFiltering() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - Document::SP doc(createRandomDocumentAtLocation( - 4, 1001, 110, 110)); - doc->setValue(doc->getField("headerval"), document::IntFieldValue(42)); - doc->setValue(doc->getField("hstringval"), - document::StringFieldValue("groovy, baby!")); - doc->setValue(doc->getField("content"), - document::StringFieldValue("fancy content")); - doPut(doc, framework::MicroSecTime(1001), 0); - flush(b.getBucketId()); - - document::FieldSetRepo repo; - spi::Selection sel(createSelection("true")); - spi::CreateIteratorResult iter( - create(b, sel, spi::NEWEST_DOCUMENT_ONLY, - *repo.parse(*getTypeRepo(), "testdoctype1:hstringval,content"))); - std::vector<spi::DocEntry::UP> entries( - getEntriesFromChunks(doIterate(iter.getIteratorId(), 4096))); - CPPUNIT_ASSERT_EQUAL(size_t(1), entries.size()); - CPPUNIT_ASSERT_EQUAL(std::string("content: fancy content\n" - "hstringval: groovy, baby!\n"), - stringifyFields(*entries[0]->getDocument())); -} - -void -IteratorHandlerTest::testIteratorInactiveOnException() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - feedDocs(10); - - env()._cache.clear(); - - simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1)); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - spi::CreateIteratorResult iter(create(b, createSelection("true"))); - spi::IterateResult result(getPersistenceProvider().iterate( - iter.getIteratorId(), 100000, context)); - CPPUNIT_ASSERT(result.hasError()); - // Check that iterator is marked as inactive - const SharedIteratorHandlerState& state( - getPersistenceProvider().getIteratorHandler().getState()); - CPPUNIT_ASSERT(state._iterators.find(iter.getIteratorId().getValue()) - != state._iterators.end()); - CPPUNIT_ASSERT(state._iterators.find(iter.getIteratorId().getValue()) - ->second.isActive() == false); - - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); -} - -void -IteratorHandlerTest::testDocsCachedBeforeDocumentSelection() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - std::vector<DocAndTimestamp> docs = feedDocs(100, 4096, 4096); - - env()._cache.clear(); - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options).maximumReadThroughGap(1024*1024).build()); - env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( - new LoggingLazyFile::Factory()); - - spi::Selection sel(createSelection("id.user=4")); - spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY, - document::BodyFields())); - - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - { - MemFilePtr file(getMemFile(b.getBucketId())); - // Should have 3 read ops; metadata, (precached) headers and bodies - CPPUNIT_ASSERT_EQUAL(size_t(3), - getLoggerFile(*file).operations.size()); - } -} - -void -IteratorHandlerTest::testTimestampRangeLimitedPrefetch() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - // Feed docs with timestamp range [1000, 1100) - feedDocs(100, 4096, 4096); - - env()._cache.clear(); - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options).maximumReadThroughGap(512).build()); - env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( - new LoggingLazyFile::Factory()); - - spi::Selection sel(createSelection("id.user=4")); - sel.setFromTimestamp(spi::Timestamp(1050)); - sel.setToTimestamp(spi::Timestamp(1059)); - spi::CreateIteratorResult iter(create(b, sel, spi::NEWEST_DOCUMENT_ONLY, - document::BodyFields())); - std::vector<Chunk> chunks = doIterate(iter.getIteratorId(), 4096); - CPPUNIT_ASSERT_EQUAL(size_t(10), getDocCount(chunks)); - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().destroyIterator(iter.getIteratorId(), context); - // Iterate over all slots, ensuring that only those that fall within the - // timestamp range have actually been cached. - { - MemFilePtr file(getMemFile(b.getBucketId())); - // Should have 3 read ops; metadata, (precached) headers and bodies - CPPUNIT_ASSERT_EQUAL(size_t(3), - getLoggerFile(*file).operations.size()); - for (size_t i = 0; i < file->getSlotCount(); ++i) { - const MemSlot& slot((*file)[i]); - if (slot.getTimestamp() >= Timestamp(1050) - && slot.getTimestamp() <= Timestamp(1059)) - { - CPPUNIT_ASSERT(file->partAvailable(slot, HEADER)); - CPPUNIT_ASSERT(file->partAvailable(slot, BODY)); - } else { - CPPUNIT_ASSERT(!file->partAvailable(slot, HEADER)); - CPPUNIT_ASSERT(!file->partAvailable(slot, BODY)); - } - } - } -} - -void -IteratorHandlerTest::testCachePrefetchRequirements() -{ - document::select::Parser parser( - env().repo(), env()._bucketFactory); - { - // No prefetch required. - // NOTE: since stuff like id.user=1234 won't work, we have to handle - // that explicitly in createIterator based on the assumption that a - // non-empty document selection at _least_ requires header to be read. - std::unique_ptr<document::select::Node> sel( - parser.parse("true")); - CachePrefetchRequirements req( - CachePrefetchRequirements::createFromSelection(env().repo(), - *sel)); - CPPUNIT_ASSERT(!req.isHeaderPrefetchRequired()); - CPPUNIT_ASSERT(!req.isBodyPrefetchRequired()); - } - - { - // Header prefetch required. - std::unique_ptr<document::select::Node> sel( - parser.parse("testdoctype1.hstringval='blarg'")); - CachePrefetchRequirements req( - CachePrefetchRequirements::createFromSelection(env().repo(), - *sel)); - CPPUNIT_ASSERT(req.isHeaderPrefetchRequired()); - CPPUNIT_ASSERT(!req.isBodyPrefetchRequired()); - } - - { - // Body prefetch required. - std::unique_ptr<document::select::Node> sel( - parser.parse("testdoctype1.content='foobar'")); - CachePrefetchRequirements req( - CachePrefetchRequirements::createFromSelection(env().repo(), - *sel)); - CPPUNIT_ASSERT(!req.isHeaderPrefetchRequired()); - CPPUNIT_ASSERT(req.isBodyPrefetchRequired()); - } -} - -void -IteratorHandlerTest::testBucketEvictedFromCacheOnIterateException() -{ - spi::Bucket b(makeSpiBucket(BucketId(16, 4))); - feedDocs(10); - env()._cache.clear(); - - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - spi::CreateIteratorResult iter(create(b, createSelection("true"))); - simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1)); - spi::IterateResult result(getPersistenceProvider().iterate( - iter.getIteratorId(), 100000, context)); - CPPUNIT_ASSERT(result.hasError()); - - // This test is actually a bit disingenuous since calling iterate will - // implicitly invoke maintain() on an IO exception, which will subsequently - // evict the bucket due to the exception happening again in its context. - CPPUNIT_ASSERT(!env()._cache.contains(b.getBucketId())); -} - -} -} diff --git a/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp deleted file mode 100644 index 2a9395767af..00000000000 --- a/memfilepersistence/src/tests/spi/joinoperationhandlertest.cpp +++ /dev/null @@ -1,504 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfiletestutils.h" -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/persistence/spi/test.h> - -using document::DocumentType; -using storage::spi::test::makeSpiBucket; - -namespace storage { -namespace memfile { -namespace { - spi::LoadType defaultLoadType(0, "default"); -} - -class JoinOperationHandlerTest : public MemFileTestUtils -{ - CPPUNIT_TEST_SUITE(JoinOperationHandlerTest); - CPPUNIT_TEST(testSimple); - CPPUNIT_TEST(testTargetExists); - CPPUNIT_TEST(testTargetWithOverlap); - CPPUNIT_TEST(testMultiDisk); - CPPUNIT_TEST(testMultiDiskFlushed); - CPPUNIT_TEST(testInternalJoin); - CPPUNIT_TEST(testInternalJoinDiskFull); - CPPUNIT_TEST(testTargetIoWriteExceptionEvictsTargetFromCache); - CPPUNIT_TEST(test1stSourceIoReadExceptionEvictsSourceFromCache); - CPPUNIT_TEST(test2ndSourceExceptionEvictsExistingTargetFromCache); - CPPUNIT_TEST_SUITE_END(); - -public: - void testSimple(); - void testTargetExists(); - void testTargetWithOverlap(); - void testMultiDisk(); - void testMultiDiskFlushed(); - void testInternalJoin(); - void testInternalJoinDiskFull(); - void testTargetIoWriteExceptionEvictsTargetFromCache(); - void test1stSourceIoReadExceptionEvictsSourceFromCache(); - void test2ndSourceExceptionEvictsExistingTargetFromCache(); - - void insertDocumentInBucket(uint64_t location, - Timestamp timestamp, - document::BucketId bucket); - -private: - void feedSingleDisk(); - void feedMultiDisk(); - std::string getStandardMemFileStatus(uint32_t disk = 0); - - spi::Result doJoin(const document::BucketId to, - const document::BucketId from1, - const document::BucketId from2); -}; - -namespace { - -document::BucketId TARGET = document::BucketId(15, 4); -document::BucketId SOURCE1 = document::BucketId(16, 4); -document::BucketId SOURCE2 = document::BucketId(16, (uint64_t)4 | ((uint64_t)1 << 15)); -} - -CPPUNIT_TEST_SUITE_REGISTRATION(JoinOperationHandlerTest); - -void -JoinOperationHandlerTest::feedSingleDisk() -{ - for (uint32_t i = 0; i < 100; i++) { - std::ostringstream ost; - ost << "userdoc:storage_test:1234:" << i; - const DocumentType& type( - *getTypeRepo()->getDocumentType("testdoctype1")); - document::Document::SP doc( - new document::Document(type, document::DocumentId(ost.str()))); - - document::BucketId bucket( - getBucketIdFactory().getBucketId(doc->getId())); - bucket.setUsedBits(33); - doPut(doc, Timestamp(1000 + i), 0, 33); - flush(bucket); - } -} - -void -JoinOperationHandlerTest::feedMultiDisk() -{ - for (uint32_t i = 0; i < 100; i += 2) { - doPutOnDisk(7, 4 | (1 << 15), Timestamp(1000 + i)); - } - flush(SOURCE2); - - for (uint32_t i = 1; i < 100; i += 2) { - doPutOnDisk(4, 4, Timestamp(1000 + i)); - } - flush(SOURCE1); - - { - MemFilePtr file(getMemFile(SOURCE1, 4)); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(4, (int)file->getDisk()); - } - - { - MemFilePtr file(getMemFile(SOURCE2, 7)); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(7, (int)file->getDisk()); - } -} - -std::string -JoinOperationHandlerTest::getStandardMemFileStatus(uint32_t disk) -{ - std::ostringstream ost; - - ost << getMemFileStatus(TARGET, disk) << "\n" - << getMemFileStatus(SOURCE1, disk ) << "\n" - << getMemFileStatus(SOURCE2, disk) << "\n"; - - return ost.str(); -} - -void -JoinOperationHandlerTest::insertDocumentInBucket( - uint64_t location, - Timestamp timestamp, - document::BucketId bucket) -{ - Document::SP doc( - createRandomDocumentAtLocation( - location, timestamp.getTime(), 100, 100)); - doPut(doc, bucket, timestamp); -} - -spi::Result -JoinOperationHandlerTest::doJoin(const document::BucketId to, - const document::BucketId from1, - const document::BucketId from2) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - return getPersistenceProvider().join( - makeSpiBucket(from1), - makeSpiBucket(from2), - makeSpiBucket(to), - context); -} - -void -JoinOperationHandlerTest::testSimple() -{ - setupDisks(1); - feedSingleDisk(); - - { - MemFilePtr file(getMemFile(document::BucketId(33, 1234))); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - } - - { - MemFilePtr file(getMemFile(document::BucketId(33, (uint64_t)1234 | ((uint64_t)1 << 32)))); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - } - - spi::Result result = - doJoin(document::BucketId(32, 1234), - document::BucketId(33, 1234), - document::BucketId(33, (uint64_t)1234 | ((uint64_t)1 << 32))); - - { - MemFilePtr file(getMemFile(document::BucketId(32, (uint64_t)1234))); - CPPUNIT_ASSERT_EQUAL(100, (int)file->getSlotCount()); - CPPUNIT_ASSERT(!file->slotsAltered()); - } -} - -void -JoinOperationHandlerTest::testTargetExists() -{ - setupDisks(1); - - for (uint32_t i = 0; i < 100; i += 2) { - doPut(4 | (1 << 15), Timestamp(1000 + i)); - } - flush(SOURCE2); - - for (uint32_t i = 1; i < 100; i += 2) { - doPut(4, Timestamp(1000 + i)); - } - flush(SOURCE1); - - for (uint32_t i = 0; i < 100; i++) { - uint32_t location = 4; - if (i % 2 == 0) { - location |= (1 << 15); - } - - insertDocumentInBucket(location, Timestamp(500 + i), TARGET); - } - flush(TARGET); - - doJoin(TARGET, SOURCE1, SOURCE2); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x3c00000000000004): 200,0\n" - "BucketId(0x4000000000000004): 0,0\n" - "BucketId(0x4000000000008004): 0,0\n"), - getStandardMemFileStatus()); -} - -void -JoinOperationHandlerTest::testTargetWithOverlap() -{ - setupDisks(1); - - for (uint32_t i = 0; i < 100; i += 2) { - doPut(4 | (1 << 15), Timestamp(1000 + i)); - } - flush(SOURCE2); - - for (uint32_t i = 1; i < 100; i += 2) { - doPut(4, Timestamp(1000 + i)); - } - flush(SOURCE1); - - for (uint32_t i = 0; i < 100; i++) { - uint32_t location = 4; - if (i % 2 == 0) { - location |= (1 << 15); - } - - insertDocumentInBucket(location, Timestamp(950 + i), TARGET); - } - flush(TARGET); - - doJoin(TARGET, SOURCE1, SOURCE2); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x3c00000000000004): 150,0\n" - "BucketId(0x4000000000000004): 0,0\n" - "BucketId(0x4000000000008004): 0,0\n"), - getStandardMemFileStatus()); -} - -void -JoinOperationHandlerTest::testMultiDisk() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - setupDisks(10); - feedMultiDisk(); - - getPersistenceProvider().join(makeSpiBucket(SOURCE2, spi::PartitionId(7)), - makeSpiBucket(SOURCE1, spi::PartitionId(4)), - makeSpiBucket(TARGET, spi::PartitionId(3)), - context); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x3c00000000000004): 100,3\n" - "BucketId(0x4000000000000004): 0,0\n" - "BucketId(0x4000000000008004): 0,0\n"), - getStandardMemFileStatus()); -} - -void -JoinOperationHandlerTest::testMultiDiskFlushed() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - setupDisks(10); - feedMultiDisk(); - - // Flush everything to disk, to check that we can join even - // if it's not in cache before. - env()._cache.flushDirtyEntries(); - env()._cache.clear(); - - getPersistenceProvider().join(makeSpiBucket(SOURCE2, spi::PartitionId(7)), - makeSpiBucket(SOURCE1, spi::PartitionId(4)), - makeSpiBucket(TARGET, spi::PartitionId(3)), - context); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x3c00000000000004): 100,3\n" - "BucketId(0x4000000000000004): 0,3\n" - "BucketId(0x4000000000008004): 0,3\n"), - getStandardMemFileStatus(3)); -} - -void -JoinOperationHandlerTest::testInternalJoin() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - setupDisks(10); - - for (uint32_t i = 4; i < 6; i++) { - for (uint32_t j = 0; j < 10; j++) { - uint32_t location = 4; - doPutOnDisk(i, location, Timestamp(i * 1000 + j)); - } - flush(document::BucketId(16, 4), i); - env()._cache.clear(); - } - - std::string fileName1 = - env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[4]); - std::string fileName2 = - env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[5]); - - CPPUNIT_ASSERT(vespalib::stat(fileName1).get()); - vespalib::FileInfo::UP file2(vespalib::stat(fileName2)); - - CPPUNIT_ASSERT(file2.get()); - CPPUNIT_ASSERT(file2->_size > 0); - - PartitionMonitor* mon = env().getDirectory(5).getPartition().getMonitor(); - // Set disk under 80% full. Over 80%, we shouldn't move buckets to the target. - mon->setStatOncePolicy(); - mon->overrideRealStat(512, 100000, 50000); - CPPUNIT_ASSERT(!mon->isFull(0, .80f)); - - getPersistenceProvider().join(makeSpiBucket(SOURCE1, spi::PartitionId(4)), - makeSpiBucket(SOURCE1, spi::PartitionId(4)), - makeSpiBucket(SOURCE1, spi::PartitionId(5)), - context); - - env()._cache.clear(); - - CPPUNIT_ASSERT(!vespalib::stat(fileName1).get()); - CPPUNIT_ASSERT(vespalib::stat(fileName2).get()); -} - -void -JoinOperationHandlerTest::testInternalJoinDiskFull() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - setupDisks(10); - - for (uint32_t i = 4; i < 6; i++) { - for (uint32_t j = 0; j < 10; j++) { - uint32_t location = 4; - doPutOnDisk(i, location, Timestamp(i * 1000 + j)); - } - flush(document::BucketId(16, 4), i); - env()._cache.clear(); - } - - std::string fileName1 = - env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[4]); - std::string fileName2 = - env().calculatePathInDir(SOURCE1, (*env()._mountPoints)[5]); - - CPPUNIT_ASSERT(vespalib::stat(fileName1).get()); - vespalib::FileInfo::UP file2(vespalib::stat(fileName2)); - - CPPUNIT_ASSERT(file2.get()); - CPPUNIT_ASSERT(file2->_size > 0); - - PartitionMonitor* mon = env().getDirectory(5).getPartition().getMonitor(); - // Set disk to 81% full. Over 80%, we shouldn't move buckets to the target. - mon->setStatOncePolicy(); - mon->overrideRealStat(512, 100000, 81000); - CPPUNIT_ASSERT(!mon->isFull()); - CPPUNIT_ASSERT(mon->isFull(0, .08f)); - - spi::Result result = - getPersistenceProvider().join(makeSpiBucket(SOURCE1, spi::PartitionId(4)), - makeSpiBucket(SOURCE1, spi::PartitionId(4)), - makeSpiBucket(SOURCE1, spi::PartitionId(5)), - context); - - CPPUNIT_ASSERT(result.hasError()); -} - -void -JoinOperationHandlerTest::testTargetIoWriteExceptionEvictsTargetFromCache() -{ - setupDisks(1); - feedSingleDisk(); - - document::BucketId src1(33, 1234); - document::BucketId src2(33, 1234ULL | (1ULL << 32)); - document::BucketId target(32, 1234); - - CPPUNIT_ASSERT(env()._cache.contains(src1)); - CPPUNIT_ASSERT(env()._cache.contains(src2)); - CPPUNIT_ASSERT(!env()._cache.contains(target)); - - // Reading existing (fully cached) files will go fine, but writing - // new file will not. - simulateIoErrorsForSubsequentlyOpenedFiles(); - - spi::Result result = doJoin(target, src1, src2); - CPPUNIT_ASSERT(result.hasError()); - CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") - != vespalib::string::npos); - - CPPUNIT_ASSERT(!env()._cache.contains(target)); - // NOTE: since we end up renaming src1 -> target during the first - // iteration of join, src1 will actually be empty. This should not - // matter since the service layer will query the bucket info for - // all these afterwards and will thus pick up on this automatically. - unSimulateIoErrorsForSubsequentlyOpenedFiles(); - { - MemFilePtr file(getMemFile(src1)); - CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount()); - CPPUNIT_ASSERT(!file->slotsAltered()); - } - { - MemFilePtr file(getMemFile(src2)); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - CPPUNIT_ASSERT(!file->slotsAltered()); - } - { - MemFilePtr file(getMemFile(target)); - // Renamed from src1 - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - CPPUNIT_ASSERT(!file->slotsAltered()); - } -} - -void -JoinOperationHandlerTest::test1stSourceIoReadExceptionEvictsSourceFromCache() -{ - setupDisks(1); - feedSingleDisk(); - - document::BucketId src1(33, 1234); - document::BucketId src2(33, 1234ULL | (1ULL << 32)); - document::BucketId target(32, 1234); - - env()._cache.clear(); - // Allow for reading in initial metadata so that loadFile itself doesn't - // fail. This could otherwise cause a false negative since that happens - // during initial cache lookup on a cache miss, at which point any - // exception will always stop a file from being added to the cache. Here - // we want to test the case where a file has been successfully hoisted - // out of the cache initially. - simulateIoErrorsForSubsequentlyOpenedFiles(IoErrors().afterReads(1)); - - spi::Result result = doJoin(target, src1, src2); - CPPUNIT_ASSERT(result.hasError()); - CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O read") - != vespalib::string::npos); - - CPPUNIT_ASSERT(!env()._cache.contains(src1)); - CPPUNIT_ASSERT(!env()._cache.contains(src2)); - CPPUNIT_ASSERT(!env()._cache.contains(target)); -} - -/** - * It must be exception safe for any source bucket to throw an exception during - * processing. Otherwise the node will core due to cache sanity checks. - * - * See VESPA-674 for context. In this scenario, it was not possible to write - * to the target file when attempting to join in the 2nd source bucket due to - * the disk fill ratio exceeding configured limits. - */ -void -JoinOperationHandlerTest::test2ndSourceExceptionEvictsExistingTargetFromCache() -{ - setupDisks(1); - feedSingleDisk(); - - constexpr uint64_t location = 1234; - - document::BucketId src1(33, location); - document::BucketId src2(33, location | (1ULL << 32)); - document::BucketId target(32, location); - - // Ensure target file is _not_ empty so that copySlots is triggered for - // each source bucket (rather than just renaming the file, which does not - // invoke the file read/write paths). - insertDocumentInBucket(location, Timestamp(100000), target); - flush(target); - - env()._cache.clear(); - // File rewrites are buffered before ever reaching the failure simulation - // layer, so only 1 actual write is used to flush the target file after - // the first source file has been processed. Attempting to flush the writes - // for the second source file should fail with an exception. - simulateIoErrorsForSubsequentlyOpenedFiles( - IoErrors().afterReads(INT_MAX).afterWrites(1)); - - spi::Result result = doJoin(target, src1, src2); - CPPUNIT_ASSERT(result.hasError()); - CPPUNIT_ASSERT(result.getErrorMessage().find("A simulated I/O write") - != vespalib::string::npos); - - CPPUNIT_ASSERT(!env()._cache.contains(src1)); - CPPUNIT_ASSERT(!env()._cache.contains(src2)); - CPPUNIT_ASSERT(!env()._cache.contains(target)); -} - -} - -} diff --git a/memfilepersistence/src/tests/spi/logginglazyfile.h b/memfilepersistence/src/tests/spi/logginglazyfile.h deleted file mode 100644 index d525f85b165..00000000000 --- a/memfilepersistence/src/tests/spi/logginglazyfile.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vespalib/io/fileutil.h> -#include <sstream> - -namespace storage::memfile { - -class LoggingLazyFile : public vespalib::LazyFile { -public: - class Factory : public Environment::LazyFileFactory { - public: - vespalib::LazyFile::UP createFile(const std::string& fileName) const override { - return vespalib::LazyFile::UP( - new LoggingLazyFile(fileName, vespalib::File::DIRECTIO)); - } - }; - - enum OpType { - READ = 0, - WRITE - }; - - struct Entry { - OpType opType; - size_t bufsize; - off_t offset; - - std::string toString() const { - std::ostringstream ost; - ost << (opType == READ ? "Reading " : "Writing ") - << bufsize - << " bytes at " - << offset; - return ost.str(); - } - }; - - mutable std::vector<Entry> operations; - - LoggingLazyFile(const std::string& filename, int flags) - : LazyFile(filename, flags) {}; - - size_t getOperationCount() const { - return operations.size(); - } - - off_t write(const void *buf, size_t bufsize, off_t offset) override { - Entry e; - e.opType = WRITE; - e.bufsize = bufsize; - e.offset = offset; - - operations.push_back(e); - - return vespalib::LazyFile::write(buf, bufsize, offset); - } - - size_t read(void *buf, size_t bufsize, off_t offset) const override { - Entry e; - e.opType = READ; - e.bufsize = bufsize; - e.offset = offset; - - operations.push_back(e); - - return vespalib::LazyFile::read(buf, bufsize, offset); - } - - std::string toString() const { - std::ostringstream ost; - for (uint32_t i = 0; i < operations.size(); i++) { - ost << operations[i].toString() << "\n"; - } - - return ost.str(); - } - -}; - -} diff --git a/memfilepersistence/src/tests/spi/memcachetest.cpp b/memfilepersistence/src/tests/spi/memcachetest.cpp deleted file mode 100644 index 5e9f1a28225..00000000000 --- a/memfilepersistence/src/tests/spi/memcachetest.cpp +++ /dev/null @@ -1,397 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/memfile/memfilecache.h> -#include <tests/spi/memfiletestutils.h> - - -namespace storage { -namespace memfile { - -class MemCacheTest : public SingleDiskMemFileTestUtils -{ - CPPUNIT_TEST_SUITE(MemCacheTest); - CPPUNIT_TEST(testSimpleLRU); - CPPUNIT_TEST(testCacheSize); - CPPUNIT_TEST(testEvictBody); - CPPUNIT_TEST(testEvictHeader); - CPPUNIT_TEST(testKeepBodyWhenLessThanOneFourth); - CPPUNIT_TEST(testComplexEviction); - CPPUNIT_TEST(testEraseEmptyOnReturn); - CPPUNIT_TEST(testDeleteDoesNotReAddMemoryUsage); - CPPUNIT_TEST(testEraseDoesNotReAddMemoryUsage); - CPPUNIT_TEST(testGetWithNoCreation); - CPPUNIT_TEST_SUITE_END(); - -public: - void testSimpleLRU(); - void testCacheSize(); - void testReduceCacheSizeCallback(); - void testReduceCacheSizeCallbackWhileActive(); - void testEvictBody(); - void testEvictHeader(); - void testKeepBodyWhenLessThanOneFourth(); - void testComplexEviction(); - void testEraseEmptyOnReturn(); - void testDeleteDoesNotReAddMemoryUsage(); - void testEraseDoesNotReAddMemoryUsage(); - void testGetWithNoCreation(); - -private: - framework::defaultimplementation::ComponentRegisterImpl::UP _register; - framework::Component::UP _component; - FakeClock::UP _clock; - std::unique_ptr<MemFilePersistenceMetrics> _metrics; - - std::unique_ptr<MemFileCache> _cache; - - void setSize(const document::BucketId& id, - uint64_t metaSize, - uint64_t headerSz = 0, - uint64_t bodySz = 0, - bool createIfNotInCache = true) - { - MemFilePtr file(_cache->get(id, env(), env().getDirectory(), - createIfNotInCache)); - CPPUNIT_ASSERT(file.get()); - - file->_cacheSizeOverride.metaSize = metaSize; - file->_cacheSizeOverride.headerSize = headerSz; - file->_cacheSizeOverride.bodySize = bodySz; - } - - std::string - getBucketStatus(uint32_t buckets) - { - std::ostringstream ost; - for (uint32_t i = 1; i < buckets + 1; i++) { - document::BucketId id(16, i); - ost << id << " "; - if (!_cache->contains(id)) { - ost << "<nil>\n"; - } else { - MemFilePtr file(_cache->get(id, env(), env().getDirectory())); - if (file->_cacheSizeOverride.bodySize > 0) { - ost << "body,"; - } - if (file->_cacheSizeOverride.headerSize > 0) { - ost << "header\n"; - } else { - ost << "meta only\n"; - } - } - } - - return ost.str(); - } - - uint64_t cacheSize() { - return _cache->size(); - } - - document::BucketId getLRU() { - return _cache->getLeastRecentlyUsedBucket()->_bid; - } - - void setCacheSize(uint64_t sz) { - MemFileCache::MemoryUsage usage; - usage.metaSize = sz / 3; - usage.headerSize = sz / 3; - usage.bodySize = sz - usage.metaSize - usage.headerSize; - - _cache->setCacheSize(usage); - } - - void stealMemory(uint64_t memToSteal) { - setCacheSize(_cache->getCacheSize() - memToSteal); - } - - void setup(uint64_t maxMemory) { - tearDown(); - _register.reset( - new framework::defaultimplementation::ComponentRegisterImpl); - _clock.reset(new FakeClock); - _register->setClock(*_clock); - _component.reset(new framework::Component(*_register, "testcomponent")); - _metrics.reset(new MemFilePersistenceMetrics(*_component)); - _cache.reset(new MemFileCache(*_register, _metrics->_cache)); - setCacheSize(maxMemory); - } - -public: - void tearDown() override { - _cache.reset(0); - _metrics.reset(0); - _component.reset(0); - _register.reset(0); - _clock.reset(0); - } -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(MemCacheTest); - -namespace { - FakeClock clock; -} - -void -MemCacheTest::testSimpleLRU() -{ - setup(2000); - - for (uint32_t i = 1; i < 4; i++) { - setSize(document::BucketId(16, i), 100); - } - - CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 1), getLRU()); - - setSize(document::BucketId(16, 1), 100); - - CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().hits.getValue()); - CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 2), getLRU()); -} - -void -MemCacheTest::testCacheSize() -{ - setup(400); - - setSize(document::BucketId(16, 2), 100); - setSize(document::BucketId(16, 1), 150); - - CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().hits.getValue()); - CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue()); - - CPPUNIT_ASSERT_EQUAL(250ul, cacheSize()); - - setSize(document::BucketId(16, 1), 200); - - CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().hits.getValue()); - CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue()); - - CPPUNIT_ASSERT_EQUAL(300ul, cacheSize()); - - CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 2))); - CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 1))); - - setSize(document::BucketId(16, 1), 301); - - CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().hits.getValue()); - CPPUNIT_ASSERT_EQUAL(2UL, _cache->getMetrics().misses.getValue()); - - CPPUNIT_ASSERT(!_cache->contains(document::BucketId(16, 2))); - CPPUNIT_ASSERT(_cache->contains(document::BucketId(16, 1))); - - _cache->clear(); - CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); -} - -void -MemCacheTest::testEvictBody() -{ - setup(1400); - - CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().body_evictions.getValue()); - - setSize(BucketId(16, 1), 150, 100, 0); - setSize(BucketId(16, 2), 100, 100, 900); - - CPPUNIT_ASSERT_EQUAL(1350ul, cacheSize()); - - stealMemory(150); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x4000000000000001) header\n" - "BucketId(0x4000000000000002) header\n"), - getBucketStatus(2)); - CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().body_evictions.getValue()); -} - -void -MemCacheTest::testKeepBodyWhenLessThanOneFourth() -{ - setup(450); - - setSize(BucketId(16, 1), 150, 0, 0); - setSize(BucketId(16, 2), 100, 50, 50); - - stealMemory(150); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x4000000000000001) <nil>\n" - "BucketId(0x4000000000000002) body,header\n"), - getBucketStatus(2)); -} - -void -MemCacheTest::testEvictHeader() -{ - setup(550); - - CPPUNIT_ASSERT_EQUAL(0UL, _cache->getMetrics().header_evictions.getValue()); - - setSize(BucketId(16, 1), 150, 0, 0); - setSize(BucketId(16, 2), 100, 200, 100); - - stealMemory(150); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x4000000000000001) meta only\n" - "BucketId(0x4000000000000002) meta only\n"), - getBucketStatus(2)); - CPPUNIT_ASSERT_EQUAL(1UL, _cache->getMetrics().header_evictions.getValue()); -} - -#define ASSERT_CACHE_EVICTIONS(meta, header, body) \ - CPPUNIT_ASSERT_EQUAL(size_t(meta), _cache->getMetrics().body_evictions.getValue()); \ - CPPUNIT_ASSERT_EQUAL(size_t(header), _cache->getMetrics().header_evictions.getValue()); \ - CPPUNIT_ASSERT_EQUAL(size_t(body), _cache->getMetrics().meta_evictions.getValue()); - -void -MemCacheTest::testComplexEviction() -{ - setup(4200); - - setSize(BucketId(16, 1), 150, 0, 0); - setSize(BucketId(16, 2), 100, 200, 200); - setSize(BucketId(16, 3), 100, 200, 0); - setSize(BucketId(16, 4), 100, 400, 0); - setSize(BucketId(16, 5), 100, 200, 400); - setSize(BucketId(16, 6), 100, 200, 300); - setSize(BucketId(16, 7), 100, 0, 0); - setSize(BucketId(16, 8), 100, 200, 400); - setSize(BucketId(16, 9), 100, 200, 250); - - CPPUNIT_ASSERT_EQUAL(4100ul, cacheSize()); - - ASSERT_CACHE_EVICTIONS(0, 0, 0); - - stealMemory(600); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x4000000000000001) meta only\n" - "BucketId(0x4000000000000002) header\n" - "BucketId(0x4000000000000003) header\n" - "BucketId(0x4000000000000004) header\n" - "BucketId(0x4000000000000005) header\n" - "BucketId(0x4000000000000006) body,header\n" - "BucketId(0x4000000000000007) meta only\n" - "BucketId(0x4000000000000008) body,header\n" - "BucketId(0x4000000000000009) body,header\n"), - getBucketStatus(9)); - - CPPUNIT_ASSERT_EQUAL(3500ul, cacheSize()); - - ASSERT_CACHE_EVICTIONS(2, 0, 0); - - stealMemory(500); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x4000000000000001) meta only\n" - "BucketId(0x4000000000000002) meta only\n" - "BucketId(0x4000000000000003) meta only\n" - "BucketId(0x4000000000000004) header\n" - "BucketId(0x4000000000000005) header\n" - "BucketId(0x4000000000000006) body,header\n" - "BucketId(0x4000000000000007) meta only\n" - "BucketId(0x4000000000000008) body,header\n" - "BucketId(0x4000000000000009) body,header\n"), - getBucketStatus(9)); - - CPPUNIT_ASSERT_EQUAL(3100ul, cacheSize()); - - ASSERT_CACHE_EVICTIONS(2, 2, 0); - - stealMemory(1000); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x4000000000000001) <nil>\n" - "BucketId(0x4000000000000002) meta only\n" - "BucketId(0x4000000000000003) meta only\n" - "BucketId(0x4000000000000004) meta only\n" - "BucketId(0x4000000000000005) meta only\n" - "BucketId(0x4000000000000006) header\n" - "BucketId(0x4000000000000007) meta only\n" - "BucketId(0x4000000000000008) body,header\n" - "BucketId(0x4000000000000009) body,header\n"), - getBucketStatus(9)); - - CPPUNIT_ASSERT_EQUAL(2050ul, cacheSize()); - - ASSERT_CACHE_EVICTIONS(3, 4, 1); - - stealMemory(1100); - - CPPUNIT_ASSERT_EQUAL( - std::string( - "BucketId(0x4000000000000001) <nil>\n" - "BucketId(0x4000000000000002) <nil>\n" - "BucketId(0x4000000000000003) <nil>\n" - "BucketId(0x4000000000000004) <nil>\n" - "BucketId(0x4000000000000005) <nil>\n" - "BucketId(0x4000000000000006) <nil>\n" - "BucketId(0x4000000000000007) meta only\n" - "BucketId(0x4000000000000008) header\n" - "BucketId(0x4000000000000009) body,header\n"), - getBucketStatus(9)); - - CPPUNIT_ASSERT_EQUAL(950ul, cacheSize()); -} - -#undef ASSERT_CACHE_EVICTIONS - -void -MemCacheTest::testEraseEmptyOnReturn() -{ - setup(4200); - setSize(BucketId(16, 1), 0, 0, 0); - CPPUNIT_ASSERT(!_cache->contains(document::BucketId(16, 1))); -} - -void -MemCacheTest::testDeleteDoesNotReAddMemoryUsage() -{ - BucketId id(16, 1); - setup(1000); - setSize(id, 100, 200, 300); - CPPUNIT_ASSERT_EQUAL(600ul, cacheSize()); - { - MemFilePtr file(_cache->get(id, env(), env().getDirectory())); - file.deleteFile(); - } - CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); - -} - -void -MemCacheTest::testGetWithNoCreation() -{ - BucketId id(16, 1); - setup(1000); - setSize(id, 100, 200, 300, false); - CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); -} - - -void -MemCacheTest::testEraseDoesNotReAddMemoryUsage() -{ - BucketId id(16, 1); - setup(1000); - setSize(id, 100, 200, 300); - CPPUNIT_ASSERT_EQUAL(600ul, cacheSize()); - { - MemFilePtr file(_cache->get(id, env(), env().getDirectory())); - file.eraseFromCache(); - } - CPPUNIT_ASSERT_EQUAL(0ul, cacheSize()); - -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp b/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp deleted file mode 100644 index b7fbeba1649..00000000000 --- a/memfilepersistence/src/tests/spi/memfileautorepairtest.cpp +++ /dev/null @@ -1,409 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> -#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h> -#include <tests/spi/memfiletestutils.h> -#include <vespa/persistence/spi/test.h> - -using storage::spi::test::makeSpiBucket; - -namespace storage { -namespace memfile { - -class MemFileAutoRepairTest : public SingleDiskMemFileTestUtils -{ -public: - void setUp() override; - void tearDown() override; - - void testFileMetadataCorruptionIsAutoRepaired(); - void testDocumentContentCorruptionIsAutoRepaired(); - void testCorruptionEvictsBucketFromCache(); - void testRepairFailureInMaintainEvictsBucketFromCache(); - void testZeroLengthFileIsDeleted(); - void testTruncatedBodyLocationIsAutoRepaired(); - void testTruncatedHeaderLocationIsAutoRepaired(); - void testTruncatedHeaderBlockIsAutoRepaired(); - - void corruptBodyBlock(); - - CPPUNIT_TEST_SUITE(MemFileAutoRepairTest); - CPPUNIT_TEST(testFileMetadataCorruptionIsAutoRepaired); - CPPUNIT_TEST(testDocumentContentCorruptionIsAutoRepaired); - CPPUNIT_TEST(testCorruptionEvictsBucketFromCache); - CPPUNIT_TEST(testRepairFailureInMaintainEvictsBucketFromCache); - CPPUNIT_TEST(testZeroLengthFileIsDeleted); - CPPUNIT_TEST(testTruncatedBodyLocationIsAutoRepaired); - CPPUNIT_TEST(testTruncatedHeaderLocationIsAutoRepaired); - CPPUNIT_TEST(testTruncatedHeaderBlockIsAutoRepaired); - CPPUNIT_TEST_SUITE_END(); - -private: - void assertDocumentIsSilentlyRemoved( - const document::BucketId& bucket, - const document::DocumentId& docId); - - void reconfigureMinimumHeaderBlockSize(uint32_t newMinSize); - - document::BucketId _bucket; - std::unique_ptr<FileSpecification> _file; - std::vector<document::DocumentId> _slotIds; -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(MemFileAutoRepairTest); - -namespace { - // A totall uncached memfile with content to use for verify testing - std::unique_ptr<MemFile> _memFile; - - // Clear old content. Create new file. Make sure nothing is cached. - void prepareBucket(SingleDiskMemFileTestUtils& util, - const FileSpecification& file) { - _memFile.reset(); - util.env()._cache.clear(); - vespalib::unlink(file.getPath()); - util.createTestBucket(file.getBucketId(), 0); - util.env()._cache.clear(); - _memFile.reset(new MemFile(file, util.env())); - _memFile->getMemFileIO().close(); - - } - - MetaSlot getSlot(uint32_t index) { - assert(_memFile.get()); - vespalib::LazyFile file(_memFile->getFile().getPath(), 0); - MetaSlot result; - file.read(&result, sizeof(MetaSlot), - sizeof(Header) + sizeof(MetaSlot) * index); - return result; - } - - void setSlot(uint32_t index, MetaSlot slot, - bool updateFileChecksum = true) - { - (void)updateFileChecksum; - assert(_memFile.get()); - //if (updateFileChecksum) slot.updateFileChecksum(); - vespalib::LazyFile file(_memFile->getFile().getPath(), 0); - file.write(&slot, sizeof(MetaSlot), - sizeof(Header) + sizeof(MetaSlot) * index); - } -} - -void -MemFileAutoRepairTest::setUp() -{ - SingleDiskMemFileTestUtils::setUp(); - _bucket = BucketId(16, 0xa); - createTestBucket(_bucket, 0); - - { - MemFilePtr memFilePtr(env()._cache.get(_bucket, env(), env().getDirectory())); - _file.reset(new FileSpecification(memFilePtr->getFile())); - CPPUNIT_ASSERT(memFilePtr->getSlotCount() >= 2); - for (size_t i = 0; i < memFilePtr->getSlotCount(); ++i) { - _slotIds.push_back(memFilePtr->getDocumentId((*memFilePtr)[i])); - } - } - env()._cache.clear(); -} - -void -MemFileAutoRepairTest::tearDown() -{ - _file.reset(0); - _memFile.reset(0); - SingleDiskMemFileTestUtils::tearDown(); -}; - -void -MemFileAutoRepairTest::testFileMetadataCorruptionIsAutoRepaired() -{ - // Test corruption detected in initial metadata load - prepareBucket(*this, *_file); - document::DocumentId id(_slotIds[1]); - MetaSlot slot(getSlot(1)); - CPPUNIT_ASSERT(slot._gid == id.getGlobalId()); // Sanity checking... - { - MetaSlot s(slot); - s.setTimestamp(Timestamp(40)); - setSlot(1, s); - } - - CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); - - // File not in cache; should be detected in initial load - spi::GetResult res(doGet(_bucket, id, document::AllFields())); - // FIXME: currently loadFile is silently fixing corruptions! - //CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res.getErrorCode()); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode()); - CPPUNIT_ASSERT(!res.hasDocument()); - - CPPUNIT_ASSERT_EQUAL(std::string("400000000000000a"), getModifiedBuckets()); - CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); - - // File should now have been repaired, so a subsequent get for - // the same document should just return an empty (but OK) result. - spi::GetResult res2(doGet(_bucket, id, document::AllFields())); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res2.getErrorCode()); - CPPUNIT_ASSERT(!res2.hasDocument()); - - CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); -} - -void -MemFileAutoRepairTest::corruptBodyBlock() -{ - CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); - // Corrupt body block of slot 1 - MetaSlot slot(getSlot(1)); - { - MetaSlot s(slot); - s.setBodyPos(52); - s.setBodySize(18); - s.updateChecksum(); - setSlot(1, s); - } -} - -void -MemFileAutoRepairTest::testDocumentContentCorruptionIsAutoRepaired() -{ - // Corrupt body block - prepareBucket(*this, *_file); - document::DocumentId id(_slotIds[1]); - corruptBodyBlock(); - - CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); - - spi::GetResult res(doGet(_bucket, id, document::AllFields())); - CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res.getErrorCode()); - CPPUNIT_ASSERT(!res.hasDocument()); - - CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); - - CPPUNIT_ASSERT_EQUAL(std::string("400000000000000a"), getModifiedBuckets()); - CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); - - // File should now have been repaired, so a subsequent get for - // the same document should just return an empty (but OK) result. - spi::GetResult res2(doGet(_bucket, id, document::AllFields())); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res2.getErrorCode()); - CPPUNIT_ASSERT(!res2.hasDocument()); - - // File should now be in cache OK - CPPUNIT_ASSERT(env()._cache.contains(_bucket)); - CPPUNIT_ASSERT_EQUAL(std::string(""), getModifiedBuckets()); -} - -// Ideally we'd test this for each spi operation that accesses MemFiles, but -// they all use the same eviction+auto-repair logic... -void -MemFileAutoRepairTest::testCorruptionEvictsBucketFromCache() -{ - prepareBucket(*this, *_file); - corruptBodyBlock(); - - // Read slot 0 and shove file into cache - spi::GetResult res(doGet(_bucket, _slotIds[0], document::AllFields())); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode()); - CPPUNIT_ASSERT(res.hasDocument()); - CPPUNIT_ASSERT(env()._cache.contains(_bucket)); - - spi::GetResult res2(doGet(_bucket, _slotIds[1], document::AllFields())); - CPPUNIT_ASSERT_EQUAL(spi::Result::TRANSIENT_ERROR, res2.getErrorCode()); - CPPUNIT_ASSERT(!res2.hasDocument()); - - // Out of the cache! Begone! Shoo! - CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); - -} - -void -MemFileAutoRepairTest::testRepairFailureInMaintainEvictsBucketFromCache() -{ - prepareBucket(*this, *_file); - corruptBodyBlock(); - spi::Result result(getPersistenceProvider().maintain( - makeSpiBucket(_bucket), spi::HIGH)); - // File being successfully repaired does not constitute a failure of - // the maintain() call. - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - // It should, however, shove it out of the cache. - CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); -} - -void -MemFileAutoRepairTest::testZeroLengthFileIsDeleted() -{ - // Completely truncate auto-created file - vespalib::LazyFile file(_file->getPath(), 0); - file.resize(0); - - // No way to deal with zero-length files aside from deleting them. - spi::Result result(getPersistenceProvider().maintain( - makeSpiBucket(_bucket), spi::HIGH)); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, result.getErrorCode()); - CPPUNIT_ASSERT(!env()._cache.contains(_bucket)); - CPPUNIT_ASSERT(!vespalib::fileExists(_file->getPath())); -} - -namespace { - -uint32_t -alignDown(uint32_t value) -{ - uint32_t blocks = value / 512; - return blocks * 512; -}; - -FileInfo -fileInfoFromMemFile(const MemFilePtr& mf) -{ - auto& ioBuf(dynamic_cast<const SimpleMemFileIOBuffer&>( - mf->getMemFileIO())); - return ioBuf.getFileInfo(); -} - -} - -void -MemFileAutoRepairTest::assertDocumentIsSilentlyRemoved( - const document::BucketId& bucket, - const document::DocumentId& docId) -{ - // Corrupted (truncated) slot should be transparently removed during - // loadFile and it should be as if it was never there! - spi::Bucket spiBucket(makeSpiBucket(bucket)); - spi::GetResult res(doGet(spiBucket, docId, document::AllFields())); - CPPUNIT_ASSERT_EQUAL(spi::Result::NONE, res.getErrorCode()); - CPPUNIT_ASSERT(!res.hasDocument()); -} - -void -MemFileAutoRepairTest::testTruncatedBodyLocationIsAutoRepaired() -{ - document::BucketId bucket(16, 4); - document::Document::SP doc( - createRandomDocumentAtLocation(4, 1234, 1024, 1024)); - - doPut(doc, bucket, framework::MicroSecTime(1000)); - flush(bucket); - FileInfo fileInfo; - { - MemFilePtr mf(getMemFile(bucket)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount()); - fileInfo = fileInfoFromMemFile(mf); - - const uint32_t bodyBlockStart( - sizeof(Header) - + fileInfo._metaDataListSize * sizeof(MetaSlot) - + fileInfo._headerBlockSize); - - vespalib::LazyFile file(mf->getFile().getPath(), 0); - uint32_t slotBodySize = (*mf)[0].getLocation(BODY)._size; - CPPUNIT_ASSERT(slotBodySize > 0); - // Align down to nearest sector alignment to avoid unrelated DirectIO - // checks to kick in. Since the body block is always aligned on a - // sector boundary, we know this cannot truncate into the header block. - file.resize(alignDown(bodyBlockStart + slotBodySize - 1)); - } - env()._cache.clear(); - assertDocumentIsSilentlyRemoved(bucket, doc->getId()); -} - -void -MemFileAutoRepairTest::testTruncatedHeaderLocationIsAutoRepaired() -{ - document::BucketId bucket(16, 4); - document::Document::SP doc( - createRandomDocumentAtLocation(4, 1234, 1024, 1024)); - // Ensure header has a bunch of data (see alignment comments below). - doc->setValue(doc->getField("hstringval"), - document::StringFieldValue(std::string(1024, 'A'))); - - doPut(doc, bucket, framework::MicroSecTime(1000)); - flush(bucket); - FileInfo fileInfo; - { - MemFilePtr mf(getMemFile(bucket)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount()); - fileInfo = fileInfoFromMemFile(mf); - - const uint32_t headerBlockStart( - sizeof(Header) - + fileInfo._metaDataListSize * sizeof(MetaSlot)); - - vespalib::LazyFile file(mf->getFile().getPath(), 0); - uint32_t slotHeaderSize = (*mf)[0].getLocation(HEADER)._size; - CPPUNIT_ASSERT(slotHeaderSize > 0); - // Align down to nearest sector alignment to avoid unrelated DirectIO - // checks to kick in. The header block is not guaranteed to start on - // sector boundary, but we assume there is enough slack in the header - // section for the metadata slots themselves to be untouched since we - // have a minimum header size of 1024 for the doc in question. - file.resize(alignDown(headerBlockStart + slotHeaderSize - 1)); - } - env()._cache.clear(); - assertDocumentIsSilentlyRemoved(bucket, doc->getId()); -} - -void -MemFileAutoRepairTest::reconfigureMinimumHeaderBlockSize(uint32_t newMinSize) -{ - using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig; - using MemFileConfigBuilder - = vespa::config::storage::StorMemfilepersistenceConfigBuilder; - MemFileConfigBuilder builder( - *env().acquireConfigReadLock().memFilePersistenceConfig()); - builder.minimumFileMetaSlots = 2; - builder.minimumFileHeaderBlockSize = newMinSize; - auto newConfig = std::unique_ptr<MemFileConfig>(new MemFileConfig(builder)); - env().acquireConfigWriteLock().setMemFilePersistenceConfig( - std::move(newConfig)); -} - -void -MemFileAutoRepairTest::testTruncatedHeaderBlockIsAutoRepaired() -{ - document::BucketId bucket(16, 4); - document::Document::SP doc( - createRandomDocumentAtLocation(4, 1234, 1, 1)); - // Ensure header block is large enough that free space is added to the end. - reconfigureMinimumHeaderBlockSize(8192); - // Add header field and remove randomly generated body field, ensuring - // we have no data to add to body field. This will prevent slot body - // location checking from detecting a header truncation. - doc->setValue(doc->getField("hstringval"), - document::StringFieldValue("foo")); - doc->remove(doc->getField("content")); - - doPut(doc, bucket, framework::MicroSecTime(1000)); - flush(bucket); - FileInfo fileInfo; - { - MemFilePtr mf(getMemFile(bucket)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), mf->getSlotCount()); - fileInfo = fileInfoFromMemFile(mf); - - const uint32_t headerBlockEnd( - sizeof(Header) - + fileInfo._metaDataListSize * sizeof(MetaSlot) - + fileInfo._headerBlockSize); - - vespalib::LazyFile file(mf->getFile().getPath(), 0); - CPPUNIT_ASSERT_EQUAL(uint32_t(0), - (*mf)[0].getLocation(BODY)._size); // No body. - const auto headerLoc((*mf)[0].getLocation(HEADER)); - const uint32_t extent(headerLoc._pos + headerLoc._size); - // Make sure we don't intersect an existing slot range. - CPPUNIT_ASSERT(extent < alignDown(headerBlockEnd - 1)); - file.resize(alignDown(headerBlockEnd - 1)); - } - env()._cache.clear(); - assertDocumentIsSilentlyRemoved(bucket, doc->getId()); -} - -} -} diff --git a/memfilepersistence/src/tests/spi/memfiletest.cpp b/memfilepersistence/src/tests/spi/memfiletest.cpp deleted file mode 100644 index 019b20de2df..00000000000 --- a/memfilepersistence/src/tests/spi/memfiletest.cpp +++ /dev/null @@ -1,987 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/memfilepersistence/memfile/memfile.h> -#include <tests/spi/memfiletestutils.h> -#include <tests/spi/logginglazyfile.h> -#include <tests/spi/options_builder.h> -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/memfilepersistence/memfile/memfilecompactor.h> -#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> -#include <vespa/vespalib/util/exceptions.h> -#include <limits> - -namespace storage { -namespace memfile { - -struct MemFileTest : public SingleDiskMemFileTestUtils -{ - typedef MemFileCompactor::SlotList SlotList; - - /** - * Feed a document whose ID is deterministically generated from `seed` to - * bucket (16, 4) at time `timestamp`. - */ - document::DocumentId feedDocument( - uint64_t seed, - uint64_t timestamp, - uint32_t headerSize = 0, - uint32_t minBodySize = 10, - uint32_t maxBodySize = 100); - - /** - * Feed n instances of documents with the same ID to bucket (16, 4) using - * a timestamp range of [1000, 1000+n). - */ - void feedSameDocNTimes(uint32_t n); - - void setMaxDocumentVersionsOption(uint32_t n); - - std::vector<Types::Timestamp> compactWithVersionLimit(uint32_t maxVersions); - - void testCompactRemoveDoublePut(); - void testCompactPutRemove(); - void testCompactGidCollision(); - void testCompactGidCollisionAndNot(); - void testCompactWithMemFile(); - void testCompactCombined(); - void testCompactDifferentPuts(); - void testNoCompactionWhenDocumentVersionsWithinLimit(); - void testCompactWhenDocumentVersionsExceedLimit(); - void testCompactLimit1KeepsNewestVersionOnly(); - void testCompactionOptionsArePropagatedFromConfig(); - void testZeroDocumentVersionConfigIsCorrected(); - void testResizeToFreeSpace(); - void testNoFileWriteOnNoOpCompaction(); - void testCacheSize(); - void testClearCache(); - void testGetSlotsByTimestamp(); - void testCacheInconsistentSlot(); - void testEnsureCached(); - void testAddSlotWhenDiskFull(); - void testGetSerializedSize(); - void testGetBucketInfo(); - void testCopySlotsPreservesLocationSharing(); - void testFlushingToNonExistingFileAlwaysRunsCompaction(); - void testOrderDocSchemeDocumentsCanBeAddedToFile(); - - CPPUNIT_TEST_SUITE(MemFileTest); - CPPUNIT_TEST(testCompactRemoveDoublePut); - CPPUNIT_TEST(testCompactPutRemove); - CPPUNIT_TEST(testCompactGidCollision); - CPPUNIT_TEST(testCompactGidCollisionAndNot); - CPPUNIT_TEST(testCompactWithMemFile); - CPPUNIT_TEST(testCompactCombined); - CPPUNIT_TEST(testCompactDifferentPuts); - CPPUNIT_TEST(testNoCompactionWhenDocumentVersionsWithinLimit); - CPPUNIT_TEST(testCompactWhenDocumentVersionsExceedLimit); - CPPUNIT_TEST(testCompactLimit1KeepsNewestVersionOnly); - CPPUNIT_TEST(testCompactionOptionsArePropagatedFromConfig); - CPPUNIT_TEST(testZeroDocumentVersionConfigIsCorrected); - CPPUNIT_TEST(testNoFileWriteOnNoOpCompaction); - CPPUNIT_TEST(testCacheSize); - CPPUNIT_TEST(testClearCache); - CPPUNIT_TEST(testGetSlotsByTimestamp); - CPPUNIT_TEST(testEnsureCached); - CPPUNIT_TEST(testResizeToFreeSpace); - CPPUNIT_TEST(testAddSlotWhenDiskFull); - CPPUNIT_TEST(testGetSerializedSize); - CPPUNIT_TEST(testGetBucketInfo); - CPPUNIT_TEST(testCopySlotsPreservesLocationSharing); - CPPUNIT_TEST(testFlushingToNonExistingFileAlwaysRunsCompaction); - CPPUNIT_TEST(testOrderDocSchemeDocumentsCanBeAddedToFile); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(MemFileTest); - -/** - * Slots should actually be the same pointer. Use this assert to do correct - * check, and still print content of slots on failure. - */ -#define ASSERT_SLOT_EQUAL(slotptra, slotptrb) \ -{ \ - CPPUNIT_ASSERT(slotptra != 0); \ - CPPUNIT_ASSERT(slotptrb != 0); \ - std::ostringstream slotdiff; \ - slotdiff << "Expected: " << *slotptra << ", but got " << *slotptrb; \ - CPPUNIT_ASSERT_EQUAL_MSG(slotdiff.str(), slotptra, slotptrb); \ -} - -namespace { - -framework::MicroSecTime sec(uint64_t n) { - return framework::MicroSecTime(n * 1000000ULL); -} - -/** - * Utility functions for tests to call to do compacting, such that the - * tests themselves are not bound to the current interface. - * - * Also, this function translates second time to microsecond time. - */ -MemFileTest::SlotList getSlotsToRemove( - const MemFile& file, uint64_t currentTime, - uint64_t revertTime, uint64_t keepRemoveTime) -{ - MemFileCompactor compactor( - sec(currentTime), - CompactionOptions() - .maxDocumentVersions( - std::numeric_limits<uint32_t>::max()) - .revertTimePeriod(sec(revertTime)) - .keepRemoveTimePeriod(sec(keepRemoveTime))); - return compactor.getSlotsToRemove(file); -} - -class AutoFlush -{ -public: - AutoFlush(MemFilePtr& ptr) : _ptr(ptr) {} - ~AutoFlush() { _ptr->flushToDisk(); } -private: - MemFilePtr& _ptr; -}; - -} - -document::DocumentId -MemFileTest::feedDocument( - uint64_t seed, - uint64_t timestamp, - uint32_t headerSize, - uint32_t minDocSize, - uint32_t maxDocSize) { - document::Document::SP doc(createRandomDocumentAtLocation( - 4, seed, minDocSize, maxDocSize)); - - if (headerSize > 0) { - std::string val(headerSize, 'A'); - doc->setValue(doc->getField("hstringval"), - document::StringFieldValue(val)); - } - - doPut(doc, - document::BucketId(16, 4), - Timestamp(timestamp * 1000000)); - - return doc->getId(); -} - -void -MemFileTest::feedSameDocNTimes(uint32_t n) -{ - for (uint32_t i = 0; i < n; ++i) { - feedDocument(1234, 1000 + i); - } -} - -void -MemFileTest::setMaxDocumentVersionsOption(uint32_t n) -{ - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options) - .maxDocumentVersions(n) - .build()); -} - -void -MemFileTest::testCacheSize() -{ - // Feed some puts - for (uint32_t i = 0; i < 4; i++) { - feedDocument(1234 * (i % 2), 1000 + 200 * i); - } - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - - CPPUNIT_ASSERT(file->getCacheSize().sum() > 0); -} - -void -MemFileTest::testClearCache() -{ - // Feed some puts - for (uint32_t i = 0; i < 4; i++) { - feedDocument(1234 * (i % 2), 1000 + 200 * i); - } - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - file->flushToDisk(); - - CPPUNIT_ASSERT(file->getCacheSize().bodySize > 0); - CPPUNIT_ASSERT(file->getCacheSize().headerSize > 0); - - file->clearCache(HEADER); - - CPPUNIT_ASSERT(file->getCacheSize().bodySize > 0); - CPPUNIT_ASSERT(file->getMemFileIO().getCachedSize(BODY) > 0); - CPPUNIT_ASSERT_EQUAL(0, (int)file->getCacheSize().headerSize); - CPPUNIT_ASSERT_EQUAL(uint64_t(0), file->getMemFileIO().getCachedSize(HEADER)); - - file->clearCache(BODY); - - CPPUNIT_ASSERT_EQUAL(0, (int)file->getCacheSize().bodySize); - CPPUNIT_ASSERT_EQUAL(uint64_t(0), file->getMemFileIO().getCachedSize(BODY)); -} - - -void -MemFileTest::testCompactGidCollision() -{ - // Feed two puts - for (uint32_t i = 0; i < 2; i++) { - feedDocument(1234 * i, 1000 + 200 * i); - } - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - const_cast<MemSlot&>((*file)[1]).setGlobalId((*file)[0].getGlobalId()); - - CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount()); - - { - SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 86400)); - CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size()); - file->removeSlots(toRemove); - } -} - -void -MemFileTest::testCompactGidCollisionAndNot() -{ - // Feed some puts - for (uint32_t i = 0; i < 4; i++) { - feedDocument(1234 * (i % 2), 1000 + 200 * i); - } - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - const_cast<MemSlot&>((*file)[2]).setGlobalId((*file)[0].getGlobalId()); - const_cast<MemSlot&>((*file)[3]).setGlobalId((*file)[1].getGlobalId()); - - CPPUNIT_ASSERT_EQUAL(4, (int)file->getSlotCount()); - - { - SlotList toRemove(getSlotsToRemove(*file, 2000, 300, 86400)); - - CPPUNIT_ASSERT_EQUAL(2, (int)toRemove.size()); - ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); - ASSERT_SLOT_EQUAL(&(*file)[1], toRemove[1]); - file->removeSlots(toRemove); - } -} - - -void -MemFileTest::testCompactRemoveDoublePut() -{ - // Feed two puts at time 1000 and 1200 - for (uint32_t i = 0; i < 2; i++) { - feedDocument(1234, 1000 + 200 * i); - } - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount()); - - { - // Not time to collect yet, newest is still revertable - SlotList toRemove(getSlotsToRemove(*file, 1300, 300, 86400)); - CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size()); - } - - { - SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 86400)); - - CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size()); - ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); - file->removeSlots(toRemove); - } -} - -void -MemFileTest::testCompactPutRemove() -{ - document::DocumentId docId = feedDocument(1234, 1000); - - doRemove(docId, Timestamp(1200*1000000), 0); - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - - { - // Since remove can still be reverted, we can't revert anything. - SlotList toRemove(getSlotsToRemove(*file, 1300, 300, 600)); - - CPPUNIT_ASSERT_EQUAL(0, (int)toRemove.size()); - } - - { - SlotList toRemove(getSlotsToRemove(*file, 1600, 300, 600)); - - CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size()); - ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); - file->removeSlots(toRemove); - } - - { - SlotList toRemove(getSlotsToRemove(*file, 1900, 300, 600)); - - CPPUNIT_ASSERT_EQUAL(1, (int)toRemove.size()); - ASSERT_SLOT_EQUAL(&(*file)[0], toRemove[0]); - file->removeSlots(toRemove); - } -} - -void -MemFileTest::testCompactCombined() -{ - document::DocumentId docId; - - // Feed some puts at time 1000, 1200, 1400, 1600 and 1800 for same doc. - for (uint32_t i = 0; i < 5; i++) { - docId = feedDocument(1234, 1000 + i * 200); - } - flush(document::BucketId(16, 4)); - - // Now add remove at time 2000. - doRemove(docId, Timestamp(2000 * 1000000), 0); - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - CPPUNIT_ASSERT_EQUAL(6, (int)file->getSlotCount()); - - { - // Compact all redundant slots that are older than revert period of 300. - // This includes 1000, 1200, 1400 and 1600. - SlotList toRemove(getSlotsToRemove(*file, 2001, 300, 86400)); - CPPUNIT_ASSERT_EQUAL(4, (int)toRemove.size()); - for (int i = 0; i < 4; ++i) { - ASSERT_SLOT_EQUAL(&(*file)[i], toRemove[i]); - } - file->removeSlots(toRemove); - } -} - -void -MemFileTest::testCompactDifferentPuts() -{ - document::DocumentId docId; - - // Feed some puts - for (uint32_t i = 0; i < 2; i++) { - for (uint32_t j = 0; j < 3; j++) { - feedDocument(1234 * j, 1000 + (i * 3 + j) * 200); - } - } - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - CPPUNIT_ASSERT_EQUAL(6, (int)file->getSlotCount()); - - { - SlotList toRemove(getSlotsToRemove(*file, 3000, 300, 86400)); - CPPUNIT_ASSERT_EQUAL(3, (int)toRemove.size()); - - for (uint32_t i = 0; i < 3; i++) { - bool found = false; - for (uint32_t j = 0; j < 3; j++) { - if ((*file)[j] == *toRemove[i]) { - found = true; - } - } - - CPPUNIT_ASSERT(found); - } - file->removeSlots(toRemove); - } -} - -void -MemFileTest::testCompactWithMemFile() -{ - // Feed two puts - for (uint32_t i = 0; i < 2; i++) { - document::Document::SP doc(createRandomDocumentAtLocation( - 4, 1234, 10, 100)); - - doPut(doc, document::BucketId(16, 4), Timestamp((1000 + i * 200)*1000000), 0); - } - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - CPPUNIT_ASSERT_EQUAL(2, (int)file->getSlotCount()); - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options) - .revertTimePeriod(framework::MicroSecTime(1000)) - .build()); - - getFakeClock()._absoluteTime = framework::MicroSecTime(2000ULL * 1000000); - - CPPUNIT_ASSERT(file->compact()); - CPPUNIT_ASSERT(!file->compact()); - - CPPUNIT_ASSERT_EQUAL(1, (int)file->getSlotCount()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1200 * 1000000), (*file)[0].getTimestamp()); -} - -/** - * Feed 5 versions of a single document at absolute times 0 through 4 seconds - * and run compaction using the provided max document version option. - * Revert time/keep remove time options are effectively disabled for this test. - * Returns timestamps of all slots that are marked as compactable. - */ -std::vector<Types::Timestamp> -MemFileTest::compactWithVersionLimit(uint32_t maxVersions) -{ - document::BucketId bucket(16, 4); - std::shared_ptr<Document> doc( - createRandomDocumentAtLocation(4, 1234, 10, 100)); - uint32_t versionLimit = 5; - for (uint32_t i = 0; i < versionLimit; ++i) { - Timestamp ts(sec(i).getTime()); - doPut(doc, bucket, ts, 0); - } - flush(bucket); - - MemFilePtr file(getMemFile(bucket)); - CPPUNIT_ASSERT_EQUAL(versionLimit, file->getSlotCount()); - - framework::MicroSecTime currentTime(sec(versionLimit)); - MemFileCompactor compactor( - currentTime, - CompactionOptions() - .revertTimePeriod(sec(versionLimit)) - .keepRemoveTimePeriod(sec(versionLimit)) - .maxDocumentVersions(maxVersions)); - auto slots = compactor.getSlotsToRemove(*file); - // Convert to timestamps since caller won't have access to actual MemFile. - std::vector<Timestamp> timestamps; - for (const MemSlot* slot : slots) { - timestamps.push_back(slot->getTimestamp()); - } - return timestamps; -} - -void -MemFileTest::testNoCompactionWhenDocumentVersionsWithinLimit() -{ - auto timestamps = compactWithVersionLimit(5); - CPPUNIT_ASSERT(timestamps.empty()); -} - -void -MemFileTest::testCompactWhenDocumentVersionsExceedLimit() -{ - auto timestamps = compactWithVersionLimit(2); - CPPUNIT_ASSERT_EQUAL(size_t(3), timestamps.size()); - std::vector<Timestamp> expected = { - sec(0), sec(1), sec(2) - }; - CPPUNIT_ASSERT_EQUAL(expected, timestamps); -} - -void -MemFileTest::testCompactLimit1KeepsNewestVersionOnly() -{ - auto timestamps = compactWithVersionLimit(1); - CPPUNIT_ASSERT_EQUAL(size_t(4), timestamps.size()); - std::vector<Timestamp> expected = { - sec(0), sec(1), sec(2), sec(3) - }; - CPPUNIT_ASSERT_EQUAL(expected, timestamps); -} - -void -MemFileTest::testCompactionOptionsArePropagatedFromConfig() -{ - vespa::config::storage::StorMemfilepersistenceConfigBuilder mfcBuilder; - vespa::config::content::PersistenceConfigBuilder pcBuilder; - - pcBuilder.maximumVersionsOfSingleDocumentStored = 12345; - pcBuilder.revertTimePeriod = 555; - pcBuilder.keepRemoveTimePeriod = 777; - - vespa::config::storage::StorMemfilepersistenceConfig mfc(mfcBuilder); - vespa::config::content::PersistenceConfig pc(pcBuilder); - Options opts(mfc, pc); - - CPPUNIT_ASSERT_EQUAL(framework::MicroSecTime(555 * 1000000), - opts._revertTimePeriod); - CPPUNIT_ASSERT_EQUAL(framework::MicroSecTime(777 * 1000000), - opts._keepRemoveTimePeriod); - CPPUNIT_ASSERT_EQUAL(uint32_t(12345), opts._maxDocumentVersions); -} - -void -MemFileTest::testZeroDocumentVersionConfigIsCorrected() -{ - vespa::config::storage::StorMemfilepersistenceConfigBuilder mfcBuilder; - vespa::config::content::PersistenceConfigBuilder pcBuilder; - - pcBuilder.maximumVersionsOfSingleDocumentStored = 0; - - vespa::config::storage::StorMemfilepersistenceConfig mfc(mfcBuilder); - vespa::config::content::PersistenceConfig pc(pcBuilder); - Options opts(mfc, pc); - - CPPUNIT_ASSERT_EQUAL(uint32_t(1), opts._maxDocumentVersions); -} - -void -MemFileTest::testGetSlotsByTimestamp() -{ - for (uint32_t i = 0; i < 10; i++) { - feedDocument(i, 1000 + i); - } - flush(document::BucketId(16, 4)); - - std::vector<Timestamp> timestamps; - timestamps.push_back(Timestamp(999 * 1000000)); - timestamps.push_back(Timestamp(1001 * 1000000)); - timestamps.push_back(Timestamp(1002 * 1000000)); - timestamps.push_back(Timestamp(1007 * 1000000)); - timestamps.push_back(Timestamp(1100 * 1000000)); - std::vector<const MemSlot*> slots; - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - file->getSlotsByTimestamp(timestamps, slots); - CPPUNIT_ASSERT_EQUAL(std::size_t(3), slots.size()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1001 * 1000000), slots[0]->getTimestamp()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1002 * 1000000), slots[1]->getTimestamp()); - CPPUNIT_ASSERT_EQUAL(Timestamp(1007 * 1000000), slots[2]->getTimestamp()); -} - -void -MemFileTest::testEnsureCached() -{ - // Feed some puts - for (uint32_t i = 0; i < 5; i++) { - feedDocument(i, 1000 + i * 200, 600, 600, 600); - } - flush(document::BucketId(16, 4)); - - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options).maximumReadThroughGap(512).build()); - env()._cache.clear(); - - { - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - CPPUNIT_ASSERT(file.get()); - CPPUNIT_ASSERT_EQUAL(5, (int)file->getSlotCount()); - - file->ensureDocumentIdCached((*file)[1]); - - for (std::size_t i = 0; i < file->getSlotCount(); ++i) { - if (i == 1) { - CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); - } else { - CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); - } - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); - } - } - - env()._cache.clear(); - - { - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - file->ensureDocumentCached((*file)[2], true); - - for (std::size_t i = 0; i < file->getSlotCount(); ++i) { - if (i == 2) { - CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); - } else { - CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER)); - } - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); - } - } - - env()._cache.clear(); - - { - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - - file->ensureDocumentCached((*file)[3], false); - - for (std::size_t i = 0; i < file->getSlotCount(); ++i) { - if (i == 3) { - CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY)); - } else { - CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER)); - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); - } - } - } - - env()._cache.clear(); - - { - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - - std::vector<Timestamp> ts; - for (int i = 2; i < 5; ++i) { - ts.push_back((*file)[i].getTimestamp()); - } - - file->ensureDocumentCached(ts, false); - - for (std::size_t i = 0; i < file->getSlotCount(); ++i) { - if (i > 1 && i < 5) { - CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY)); - } else { - CPPUNIT_ASSERT(!file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], HEADER)); - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); - } - } - } - - env()._cache.clear(); - - { - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - - file->ensureHeaderBlockCached(); - - for (std::size_t i = 0; i < file->getSlotCount(); ++i) { - CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); - CPPUNIT_ASSERT(!file->partAvailable((*file)[i], BODY)); - } - } - - env()._cache.clear(); - - { - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - - file->ensureBodyBlockCached(); - - for (std::size_t i = 0; i < file->getSlotCount(); ++i) { - CPPUNIT_ASSERT(file->documentIdAvailable((*file)[i])); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], HEADER)); - CPPUNIT_ASSERT(file->partAvailable((*file)[i], BODY)); - } - } -} - -void -MemFileTest::testResizeToFreeSpace() -{ - /** - * This test tests that files are resized to a smaller size when they need - * to be. This should happen during a call to flushToDisk() in MemFile, - * which is either dirty or if passed flag to check even if clean. (Which - * the integrity checker cycle uses). A clean file is used for testing to - * ensure that no part of the code only works for dirty files. This test - * only test for the case where body block is too large. The real - * implementation here will be in the flushUpdatesToFile() function for the - * given file formats. (VersionSerializer's) If more cases wants to be - * tested add those as unit tests for the versionserializers themselves. - */ - - // Create a test bucket to test with. - BucketId bucket(16, 0xa); - createTestBucket(bucket, 0); - - off_t file_size = - ((SimpleMemFileIOBuffer&)getMemFile(bucket)->getMemFileIO()). - getFileHandle().getFileSize(); - - // Clear cache so we can manually modify backing file to increase the - // size of it. - FileSpecification file(getMemFile(bucket)->getFile()); - env()._cache.clear(); - { - // Extend file to 1 MB, which should create an excessively large - // body block such that file should be resized to be smaller - vespalib::LazyFile fileHandle(file.getPath(), 0); - fileHandle.write("foobar", 6, 2 * 1024 * 1024 - 6); - } - MemFilePtr memFile(getMemFile(bucket)); - memFile->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE); - CPPUNIT_ASSERT_EQUAL(file_size, - ((SimpleMemFileIOBuffer&)memFile->getMemFileIO()). - getFileHandle().getFileSize()); -} - -namespace { - -const vespalib::LazyFile& -getFileHandle(const MemFile& mf1) -{ - return dynamic_cast<const SimpleMemFileIOBuffer&>( - mf1.getMemFileIO()).getFileHandle(); -} - -const LoggingLazyFile& -getLoggerFile(const MemFile& file) -{ - return dynamic_cast<const LoggingLazyFile&>(getFileHandle(file)); -} - -} - -void -MemFileTest::testNoFileWriteOnNoOpCompaction() -{ - BucketId bucket(16, 4); - env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( - new LoggingLazyFile::Factory()); - - // Feed some unique puts, none of which can be compacted away. - for (uint32_t i = 0; i < 2; i++) { - document::Document::SP doc(createRandomDocumentAtLocation( - 4, i, 10, 100)); - - doPut(doc, bucket, Timestamp((1000 + i * 200)*1000000), 0); - } - flush(bucket); - - MemFilePtr file(getMemFile(bucket)); - - size_t opsBeforeFlush = getLoggerFile(*file).getOperationCount(); - file->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE); - size_t opsAfterFlush = getLoggerFile(*file).getOperationCount(); - - // Disk should not have been touched, since no slots have been - // compacted away. - if (opsBeforeFlush != opsAfterFlush) { - std::cerr << "\n" << getLoggerFile(*file).toString() << "\n"; - } - CPPUNIT_ASSERT_EQUAL(opsBeforeFlush, opsAfterFlush); -} - -void -MemFileTest::testAddSlotWhenDiskFull() -{ - { - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - { - // Add a dummy-slot that can later be removed - Document::SP doc(createRandomDocumentAtLocation(4)); - file->addPutSlot(*doc, Timestamp(1001)); - } - } - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - AutoFlush af(file); - PartitionMonitor* mon = env().getDirectory().getPartition().getMonitor(); - // Set disk to 99% full - mon->setStatOncePolicy(); - mon->setMaxFillness(.98f); - mon->overrideRealStat(512, 100000, 99000); - CPPUNIT_ASSERT(mon->isFull()); - - // Test that addSlot with a non-persisted Put fails - { - Document::SP doc(createRandomDocumentAtLocation(4)); - try { - file->addPutSlot(*doc, Timestamp(10003)); - CPPUNIT_ASSERT(false); - } catch (vespalib::IoException& e) { - CPPUNIT_ASSERT_EQUAL(vespalib::IoException::NO_SPACE, e.getType()); - } - } - - // Slots with valid header and body locations should also - // not fail, as these are added when the file is loaded - { - // Just steal parts from existing slot to ensure they're persisted - const MemSlot* existing = file->getSlotAtTime(Timestamp(1001)); - - MemSlot slot(existing->getGlobalId(), - Timestamp(1005), - existing->getLocation(HEADER), - existing->getLocation(BODY), - IN_USE, - 0x1234); - file->addSlot(slot); - } - - // Removes should not fail when disk is full - { - file->addRemoveSlot(*file->getSlotAtTime(Timestamp(1001)), Timestamp(1003)); - } -} - -void -MemFileTest::testGetSerializedSize() { - document::Document::SP doc(createRandomDocumentAtLocation( - 4, 1234, 1024, 1024)); - - std::string val("Header"); - doc->setValue(doc->getField("hstringval"), - document::StringFieldValue(val)); - - doPut(doc, document::BucketId(16, 4), framework::MicroSecTime(1000)); - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - file->ensureBodyBlockCached(); - const MemSlot* slot = file->getSlotAtTime(framework::MicroSecTime(1000)); - CPPUNIT_ASSERT(slot != 0); - - vespalib::nbostream serializedHeader; - doc->serializeHeader(serializedHeader); - - vespalib::nbostream serializedBody; - doc->serializeBody(serializedBody); - - CPPUNIT_ASSERT_EQUAL(uint32_t(serializedHeader.size()), - file->getSerializedSize(*slot, HEADER)); - CPPUNIT_ASSERT_EQUAL(uint32_t(serializedBody.size()), - file->getSerializedSize(*slot, BODY)); -} - -void -MemFileTest::testGetBucketInfo() -{ - document::Document::SP doc(createRandomDocumentAtLocation( - 4, 1234, 100, 100)); - doc->setValue(doc->getField("content"), - document::StringFieldValue("foo")); - document::Document::SP doc2(createRandomDocumentAtLocation( - 4, 1235, 100, 100)); - doc2->setValue(doc->getField("content"), - document::StringFieldValue("bar")); - - doPut(doc, document::BucketId(16, 4), framework::MicroSecTime(1000)); - flush(document::BucketId(16, 4)); - - doPut(doc2, document::BucketId(16, 4), framework::MicroSecTime(1001)); - flush(document::BucketId(16, 4)); - - // Do remove which should only add a single meta entry - doRemove(doc->getId(), Timestamp(1002), 0); - flush(document::BucketId(16, 4)); - - MemFilePtr file(getMemFile(document::BucketId(16, 4))); - - CPPUNIT_ASSERT_EQUAL(3u, file->getSlotCount()); - uint32_t maxHeaderExtent = (*file)[1].getLocation(HEADER)._pos - + (*file)[1].getLocation(HEADER)._size; - uint32_t maxBodyExtent = (*file)[1].getLocation(BODY)._pos - + (*file)[1].getLocation(BODY)._size; - - uint32_t wantedUsedSize = 64 + 40*3 + maxHeaderExtent + maxBodyExtent; - BucketInfo info = file->getBucketInfo(); - CPPUNIT_ASSERT_EQUAL(1u, info.getDocumentCount()); - CPPUNIT_ASSERT_EQUAL(3u, info.getEntryCount()); - CPPUNIT_ASSERT_EQUAL(wantedUsedSize, info.getUsedSize()); - uint32_t wantedUniqueSize = (*file)[1].getLocation(HEADER)._size - + (*file)[1].getLocation(BODY)._size; - CPPUNIT_ASSERT_EQUAL(wantedUniqueSize, info.getDocumentSize()); -} - -void -MemFileTest::testCopySlotsPreservesLocationSharing() -{ - document::BucketId bucket(16, 4); - // Feed two puts to same document (identical seed). These should not - // share any blocks. Note: implicit sec -> microsec conversion. - feedDocument(1234, 1000); // slot 0 - auto docId = feedDocument(1234, 1001); // slot 1 - // Update only header of last version of document. This should share - // slot body block 2 with that slot 1. - auto update = createHeaderUpdate(docId, document::IntFieldValue(5678)); - doUpdate(bucket, update, Timestamp(1002 * 1000000), 0); - // Feed a remove for doc in slot 2. This should share the header block of - // slot 3 with the newest document in slot 2. - doRemove(docId, Timestamp(1003 * 1000000), 0); - flush(bucket); - - { - MemFilePtr src(getMemFile(document::BucketId(16, 4))); - MemFilePtr dest(getMemFile(document::BucketId(17, 4))); - std::vector<Timestamp> timestamps { - Timestamp(1000 * 1000000), - Timestamp(1001 * 1000000), - Timestamp(1002 * 1000000), - Timestamp(1003 * 1000000) - }; - std::vector<const MemSlot*> slots { - src->getSlotAtTime(Timestamp(1000 * 1000000)), - src->getSlotAtTime(Timestamp(1001 * 1000000)), - src->getSlotAtTime(Timestamp(1002 * 1000000)), - src->getSlotAtTime(Timestamp(1003 * 1000000)) - }; - dest->copySlotsFrom(*src, slots); - dest->flushToDisk(); - CPPUNIT_ASSERT_EQUAL(uint32_t(4), dest->getSlotCount()); - - DataLocation header[4]; - DataLocation body[4]; - for (int i = 0; i < 4; ++i) { - const MemSlot* slot = dest->getSlotAtTime(timestamps[i]); - header[i] = slot->getLocation(HEADER); - body[i] = slot->getLocation(BODY); - } - CPPUNIT_ASSERT(!(header[0] == header[1])); - - CPPUNIT_ASSERT_EQUAL(body[2], body[1]); - CPPUNIT_ASSERT_EQUAL(header[3], header[2]); - } -} - -void -MemFileTest::testFlushingToNonExistingFileAlwaysRunsCompaction() -{ - document::BucketId bucket(16, 4); - - setMaxDocumentVersionsOption(1); - feedSameDocNTimes(10); - flush(bucket); - - // Max version limit is 1, flushing should have compacted it down. - MemFilePtr file(getMemFile(bucket)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); -} - -void -MemFileTest::testOrderDocSchemeDocumentsCanBeAddedToFile() -{ - // Quick explanation of the esoteric and particular values chosen below: - // orderdoc mangles the MSB of the bucket ID based on the document ID's - // ordering parameters and thus its bucket cannot be directly deduced from - // the generated GID. The values given here specify a document whose GID - // bits differ from those generated by the document and where a GID-only - // bucket ownership check would fail (nuking the node with an assertion). - // We have to make sure cases do not trigger false positives. - document::BucketId bucket(0x84000000ee723751); - auto doc = createDocument("the quick red fox trips over a hedge", - "orderdoc(3,1):storage_test:group1:9:9"); - doPut(std::shared_ptr<Document>(std::move(doc)), - bucket, - Timestamp(1000000 * 1234)); - flush(bucket); - - MemFilePtr file(getMemFile(bucket)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), file->getSlotCount()); - // Ideally we'd test the failure case as well, but that'd require framework - // support for death tests. -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/spi/memfiletestutils.cpp b/memfilepersistence/src/tests/spi/memfiletestutils.cpp deleted file mode 100644 index 9571d880e9f..00000000000 --- a/memfilepersistence/src/tests/spi/memfiletestutils.cpp +++ /dev/null @@ -1,455 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/document/datatype/documenttype.h> -#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h> -#include <tests/spi/memfiletestutils.h> -#include <tests/spi/simulatedfailurefile.h> -#include <vespa/memfilepersistence/memfile/memfilecache.h> -#include <vespa/document/update/assignvalueupdate.h> -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/document/test/make_bucket_space.h> -#include <vespa/persistence/spi/test.h> -#include <vespa/vespalib/objects/nbostream.h> -#include <vespa/vespalib/util/exceptions.h> -#include <sys/time.h> - -using document::DocumentType; -using document::test::makeBucketSpace; -using storage::spi::test::makeSpiBucket; - -namespace storage { -namespace memfile { - -namespace { - spi::LoadType defaultLoadType(0, "default"); -} - -namespace { - vdstestlib::DirConfig initialize(uint32_t numDisks) { - system(vespalib::make_string("rm -rf vdsroot").c_str()); - for (uint32_t i = 0; i < numDisks; i++) { - system(vespalib::make_string("mkdir -p vdsroot/disks/d%d", i).c_str()); - } - vdstestlib::DirConfig config(getStandardConfig(true)); - return config; - } - - template<typename T> - struct ConfigReader : public T::Subscriber - { - T config; - - ConfigReader(const std::string& configId) { - T::subscribe(configId, *this); - } - void configure(const T& c) { config = c; } - }; -} - -MemFileTestEnvironment::MemFileTestEnvironment( - uint32_t numDisks, - framework::ComponentRegister& reg, - const document::DocumentTypeRepo& repo) - : _config(initialize(numDisks)), - _provider(reg, _config.getConfigId()) -{ - _provider.setDocumentRepo(repo); - _provider.getPartitionStates(); -} - -MemFileTestUtils::MemFileTestUtils() -{ -} - -MemFileTestUtils::~MemFileTestUtils() -{ -} - -void -MemFileTestUtils::setupDisks(uint32_t numDisks) { - tearDown(); - _componentRegister.reset( - new framework::defaultimplementation::ComponentRegisterImpl); - _clock.reset(new FakeClock); - _componentRegister->setClock(*_clock); - _env.reset(new MemFileTestEnvironment(numDisks, - *_componentRegister, - *getTypeRepo())); -} - -Environment& -MemFileTestUtils::env() -{ - return static_cast<MemFilePersistenceProvider&>( - getPersistenceProvider()).getEnvironment(); -} - -MemFilePersistenceProvider& -MemFileTestUtils::getPersistenceProvider() -{ - return _env->_provider; -} - -MemFilePersistenceThreadMetrics& -MemFileTestUtils::getMetrics() -{ - return getPersistenceProvider().getMetrics(); -} - -std::string -MemFileTestUtils::getMemFileStatus(const document::BucketId& id, - uint32_t disk) -{ - MemFilePtr file(getMemFile(id, disk)); - std::ostringstream ost; - ost << id << ": " << file->getSlotCount() << "," << file->getDisk(); - return ost.str(); -} - -std::string -MemFileTestUtils::getModifiedBuckets() -{ - spi::BucketIdListResult result( - getPersistenceProvider().getModifiedBuckets(makeBucketSpace())); - const spi::BucketIdListResult::List& list(result.getList()); - std::ostringstream ss; - for (size_t i = 0; i < list.size(); ++i) { - if (i != 0) { - ss << ","; - } - ss << std::hex << list[i].getId(); - } - return ss.str(); -} - -MemFilePtr -MemFileTestUtils::getMemFile(const document::BucketId& id, uint16_t disk) -{ - return env()._cache.get(id, env(), env().getDirectory(disk)); -} - -spi::Result -MemFileTestUtils::flush(const document::BucketId& id, uint16_t disk) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - return getPersistenceProvider().flush( - makeSpiBucket(id, spi::PartitionId(disk)), context); -} - -document::Document::SP -MemFileTestUtils::doPutOnDisk( - uint16_t disk, - uint32_t location, - Timestamp timestamp, - uint32_t minSize, - uint32_t maxSize) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::Document::SP doc(createRandomDocumentAtLocation( - location, timestamp.getTime(), minSize, maxSize)); - getPersistenceProvider().put( - makeSpiBucket(document::BucketId(16, location), spi::PartitionId(disk)), - spi::Timestamp(timestamp.getTime()), - doc, - context); - return doc; -} - -bool -MemFileTestUtils::doRemoveOnDisk( - uint16_t disk, - const document::BucketId& bucketId, - const document::DocumentId& docId, - Timestamp timestamp, - OperationHandler::RemoveType persistRemove) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - if (persistRemove == OperationHandler::PERSIST_REMOVE_IF_FOUND) { - spi::RemoveResult result = getPersistenceProvider().removeIfFound( - makeSpiBucket(bucketId, spi::PartitionId(disk)), - spi::Timestamp(timestamp.getTime()), - docId, - context); - return result.wasFound(); - } - spi::RemoveResult result = getPersistenceProvider().remove( - makeSpiBucket(bucketId, spi::PartitionId(disk)), - spi::Timestamp(timestamp.getTime()), - docId, - context); - - return result.wasFound(); -} - -bool -MemFileTestUtils::doUnrevertableRemoveOnDisk( - uint16_t disk, - const document::BucketId& bucketId, - const DocumentId& docId, - Timestamp timestamp) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - spi::RemoveResult result = - getPersistenceProvider().remove( - makeSpiBucket(bucketId, spi::PartitionId(disk)), - spi::Timestamp(timestamp.getTime()), - docId, context); - - return result.wasFound(); -} - -spi::GetResult -MemFileTestUtils::doGetOnDisk( - uint16_t disk, - const document::BucketId& bucketId, - const document::DocumentId& docId, - const document::FieldSet& fields) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - return getPersistenceProvider().get( - makeSpiBucket(bucketId, spi::PartitionId(disk)), - fields, docId, context); -} - -document::DocumentUpdate::SP -MemFileTestUtils::createBodyUpdate( - const document::DocumentId& docId, - const document::FieldValue& updateValue) -{ - const DocumentType* - docType(getTypeRepo()->getDocumentType("testdoctype1")); - document::DocumentUpdate::SP update( - new document::DocumentUpdate(*docType, docId)); - std::shared_ptr<document::AssignValueUpdate> assignUpdate( - new document::AssignValueUpdate(updateValue)); - document::FieldUpdate fieldUpdate(docType->getField("content")); - fieldUpdate.addUpdate(*assignUpdate); - update->addUpdate(fieldUpdate); - return update; -} - -document::DocumentUpdate::SP -MemFileTestUtils::createHeaderUpdate( - const document::DocumentId& docId, - const document::FieldValue& updateValue) -{ - const DocumentType* - docType(getTypeRepo()->getDocumentType("testdoctype1")); - document::DocumentUpdate::SP update( - new document::DocumentUpdate(*docType, docId)); - std::shared_ptr<document::AssignValueUpdate> assignUpdate( - new document::AssignValueUpdate(updateValue)); - document::FieldUpdate fieldUpdate(docType->getField("headerval")); - fieldUpdate.addUpdate(*assignUpdate); - update->addUpdate(fieldUpdate); - return update; -} - -void -MemFileTestUtils::doPut(const document::Document::SP& doc, - Timestamp time, - uint16_t disk, - uint16_t usedBits) -{ - document::BucketId bucket( - getBucketIdFactory().getBucketId(doc->getId())); - bucket.setUsedBits(usedBits); - doPut(doc, bucket, time, disk); -} - -void -MemFileTestUtils::doPut(const document::Document::SP& doc, - document::BucketId bid, - Timestamp time, - uint16_t disk) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - getPersistenceProvider().put(makeSpiBucket(bid, spi::PartitionId(disk)), - spi::Timestamp(time.getTime()), doc, context); -} - -spi::UpdateResult -MemFileTestUtils::doUpdate(document::BucketId bid, - const document::DocumentUpdate::SP& update, - Timestamp time, - uint16_t disk) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - return getPersistenceProvider().update( - makeSpiBucket(bid, spi::PartitionId(disk)), - spi::Timestamp(time.getTime()), update, context); -} - -void -MemFileTestUtils::doRemove(const document::DocumentId& id, Timestamp time, - uint16_t disk, bool unrevertableRemove, - uint16_t usedBits) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - document::BucketId bucket(getBucketIdFactory().getBucketId(id)); - bucket.setUsedBits(usedBits); - - if (unrevertableRemove) { - getPersistenceProvider().remove( - makeSpiBucket(bucket, spi::PartitionId(disk)), - spi::Timestamp(time.getTime()), - id, context); - } else { - spi::RemoveResult result = getPersistenceProvider().removeIfFound( - makeSpiBucket(bucket, spi::PartitionId(disk)), - spi::Timestamp(time.getTime()), - id, context); - - if (!result.wasFound()) { - throw vespalib::IllegalStateException( - "Attempted to remove non-existing doc " + id.toString(), - VESPA_STRLOC); - } - } -} - -void -MemFileTestUtils::copyHeader(document::Document& dest, - const document::Document& src) -{ - // FIXME(vekterli): temporary solution while we don't have - // fieldset pruning functionality in Document. - //dest.setHeaderPtr(src.getHeaderPtr()); - vespalib::nbostream originalBodyStream; - dest.serializeBody(originalBodyStream); - - vespalib::nbostream headerStream; - src.serializeHeader(headerStream); - document::ByteBuffer hbuf(headerStream.peek(), headerStream.size()); - dest.deserializeHeader(*getTypeRepo(), hbuf); - // deserializeHeader clears fields struct, so have to re-set body - document::ByteBuffer bbuf(originalBodyStream.peek(), - originalBodyStream.size()); - dest.deserializeBody(*getTypeRepo(), bbuf); -} - -void -MemFileTestUtils::copyBody(document::Document& dest, - const document::Document& src) -{ - // FIXME(vekterli): temporary solution while we don't have - // fieldset pruning functionality in Document. - //dest.setBodyPtr(src.getBodyPtr()); - vespalib::nbostream stream; - src.serializeBody(stream); - document::ByteBuffer buf(stream.peek(), stream.size()); - dest.deserializeBody(*getTypeRepo(), buf); -} - -void -MemFileTestUtils::clearBody(document::Document& doc) -{ - // FIXME(vekterli): temporary solution while we don't have - // fieldset pruning functionality in Document. - //doc->getBody().clear(); - vespalib::nbostream stream; - doc.serializeHeader(stream); - doc.deserialize(*getTypeRepo(), stream); -} - -void -MemFileTestUtils::createTestBucket(const document::BucketId& bucket, - uint16_t disk) -{ - - uint32_t opsPerType = 2; - uint32_t numberOfLocations = 2; - uint32_t minDocSize = 0; - uint32_t maxDocSize = 128; - - for (uint32_t useHeaderOnly = 0; useHeaderOnly < 2; ++useHeaderOnly) { - bool headerOnly = (useHeaderOnly == 1); - for (uint32_t optype=0; optype < 4; ++optype) { - for (uint32_t i=0; i<opsPerType; ++i) { - uint32_t seed = useHeaderOnly * 10000 + optype * 1000 + i + 1; - uint64_t location = (seed % numberOfLocations); - location <<= 32; - location += (bucket.getRawId() & 0xffffffff); - document::Document::SP doc( - createRandomDocumentAtLocation( - location, seed, minDocSize, maxDocSize)); - if (headerOnly) { - clearBody(*doc); - } - doPut(doc, Timestamp(seed), disk, bucket.getUsedBits()); - if (optype == 0) { // Regular put - } else if (optype == 1) { // Overwritten later in time - Document::SP doc2(new Document(*doc)); - doc2->setValue(doc2->getField("content"), - document::StringFieldValue("overwritten")); - doPut(doc2, Timestamp(seed + 500), - disk, bucket.getUsedBits()); - } else if (optype == 2) { // Removed - doRemove(doc->getId(), Timestamp(seed + 500), disk, false, - bucket.getUsedBits()); - } else if (optype == 3) { // Unrevertable removed - doRemove(doc->getId(), Timestamp(seed), disk, true, - bucket.getUsedBits()); - } - } - } - } - flush(bucket, disk); -} - -void -MemFileTestUtils::simulateIoErrorsForSubsequentlyOpenedFiles( - const IoErrors& errs) -{ - std::unique_ptr<SimulatedFailureLazyFile::Factory> factory( - new SimulatedFailureLazyFile::Factory); - factory->setWriteOpsBeforeFailure(errs._afterWrites); - factory->setReadOpsBeforeFailure(errs._afterReads); - env()._lazyFileFactory = std::move(factory); -} - -void -MemFileTestUtils::unSimulateIoErrorsForSubsequentlyOpenedFiles() -{ - env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( - new DefaultLazyFileFactory(0)); -} - -std::string -MemFileTestUtils::stringifyFields(const document::Document& doc) const -{ - using namespace document; - std::vector<std::string> output; - const StructFieldValue& fields(doc.getFields()); - for (StructFieldValue::const_iterator - it(fields.begin()), e(fields.end()); - it != e; ++it) - { - std::ostringstream ss; - const Field& f(it.field()); - ss << f.getName() << ": "; - FieldValue::UP val(fields.getValue(f)); - if (val.get()) { - ss << val->toString(); - } else { - ss << "(null)"; - } - output.push_back(ss.str()); - } - std::ostringstream ret; - std::sort(output.begin(), output.end()); - std::copy(output.begin(), output.end(), - std::ostream_iterator<std::string>(ret, "\n")); - return ret.str(); -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/spi/memfiletestutils.h b/memfilepersistence/src/tests/spi/memfiletestutils.h deleted file mode 100644 index 657b116b6e5..00000000000 --- a/memfilepersistence/src/tests/spi/memfiletestutils.h +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::MemFileTestUtils - * \ingroup memfile - * - * \brief Utilities for unit tests of the MemFile layer. - * - * The memfile layer typically needs a MemFileEnvironment object that must be - * set up. This class creates such an object to be used by unit tests. Other - * utilities useful for only MemFile testing can be added here too. - */ - -#pragma once - -#include <vespa/memfilepersistence/memfile/memfilecache.h> -#include <tests/helper/testhelper.h> -#include <vespa/persistence/spi/persistenceprovider.h> -#include <vespa/memfilepersistence/spi/memfilepersistenceprovider.h> -#include <vespa/document/base/testdocman.h> -#include <vespa/document/update/documentupdate.h> -#include <vespa/storageframework/defaultimplementation/clock/realclock.h> -#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h> - -namespace storage { -namespace memfile { - -struct FakeClock : public framework::Clock { -public: - typedef std::unique_ptr<FakeClock> UP; - - framework::MicroSecTime _absoluteTime; - - FakeClock() {} - - virtual void addSecondsToTime(uint32_t nr) { - _absoluteTime += framework::MicroSecTime(nr * uint64_t(1000000)); - } - - framework::MicroSecTime getTimeInMicros() const override { - return _absoluteTime; - } - framework::MilliSecTime getTimeInMillis() const override { - return getTimeInMicros().getMillis(); - } - framework::SecondTime getTimeInSeconds() const override { - return getTimeInMicros().getSeconds(); - } - framework::MonotonicTimePoint getMonotonicTime() const override { - return framework::MonotonicTimePoint(std::chrono::microseconds( - getTimeInMicros().getTime())); - } -}; - -struct MemFileTestEnvironment { - MemFileTestEnvironment(uint32_t numDisks, - framework::ComponentRegister& reg, - const document::DocumentTypeRepo& repo); - - vdstestlib::DirConfig _config; - MemFilePersistenceProvider _provider; -}; - -class MemFileTestUtils : public Types, public document::TestDocMan, public CppUnit::TestFixture { -private: - // This variables are kept in test class. Instances that needs to be - // unique per test needs to be setup in setupDisks and cleared in - // tearDown - document::BucketIdFactory _bucketIdFactory; - framework::defaultimplementation::ComponentRegisterImpl::UP _componentRegister; - FakeClock::UP _clock; - std::unique_ptr<MemFileTestEnvironment> _env; - -public: - MemFileTestUtils(); - virtual ~MemFileTestUtils(); - - void setupDisks(uint32_t disks); - - void tearDown() override{ - _env.reset(); - _componentRegister.reset(); - _clock.reset(); - } - - std::string getMemFileStatus(const document::BucketId& id, uint32_t disk = 0); - - std::string getModifiedBuckets(); - - /** - Flushes all cached data to disk and updates the bucket database accordingly. - */ - void flush(); - - FakeClock& getFakeClock() { return *_clock; } - - spi::Result flush(const document::BucketId& id, uint16_t disk = 0); - - MemFilePersistenceProvider& getPersistenceProvider(); - - MemFilePtr getMemFile(const document::BucketId& id, uint16_t disk = 0); - - Environment& env(); - - MemFilePersistenceThreadMetrics& getMetrics(); - - MemFileTestEnvironment& getEnv() { return *_env; } - - /** - Performs a put to the given disk. - Returns the document that was inserted. - */ - document::Document::SP doPutOnDisk( - uint16_t disk, - uint32_t location, - Timestamp timestamp, - uint32_t minSize = 0, - uint32_t maxSize = 128); - - document::Document::SP doPut( - uint32_t location, - Timestamp timestamp, - uint32_t minSize = 0, - uint32_t maxSize = 128) - { return doPutOnDisk(0, location, timestamp, minSize, maxSize); } - - /** - Performs a remove to the given disk. - Returns the new doccount if document was removed, or -1 if not found. - */ - bool doRemoveOnDisk( - uint16_t disk, - const document::BucketId& bid, - const document::DocumentId& id, - Timestamp timestamp, - OperationHandler::RemoveType persistRemove); - - bool doRemove( - const document::BucketId& bid, - const document::DocumentId& id, - Timestamp timestamp, - OperationHandler::RemoveType persistRemove) { - return doRemoveOnDisk(0, bid, id, timestamp, persistRemove); - } - - bool doUnrevertableRemoveOnDisk(uint16_t disk, - const document::BucketId& bid, - const DocumentId& id, - Timestamp timestamp); - - bool doUnrevertableRemove(const document::BucketId& bid, - const DocumentId& id, - Timestamp timestamp) - { - return doUnrevertableRemoveOnDisk(0, bid, id, timestamp); - } - - virtual const document::BucketIdFactory& getBucketIdFactory() const - { return _bucketIdFactory; } - - document::BucketIdFactory& getBucketIdFactory() - { return _bucketIdFactory; } - - /** - * Do a remove toward storage set up in test environment. - * - * @id Document to remove. - * @disk If set, use this disk, otherwise lookup in bucket db. - * @unrevertableRemove If set, instead of adding put, turn put to remove. - * @usedBits Generate bucket to use from docid using this amount of bits. - */ - void doRemove(const DocumentId& id, Timestamp, uint16_t disk, - bool unrevertableRemove = false, uint16_t usedBits = 16); - - spi::GetResult doGetOnDisk( - uint16_t disk, - const document::BucketId& bucketId, - const document::DocumentId& docId, - const document::FieldSet& fields); - - spi::GetResult doGet( - const document::BucketId& bucketId, - const document::DocumentId& docId, - const document::FieldSet& fields) - { return doGetOnDisk(0, bucketId, docId, fields); } - - document::DocumentUpdate::SP createBodyUpdate( - const document::DocumentId& id, - const document::FieldValue& updateValue); - - document::DocumentUpdate::SP createHeaderUpdate( - const document::DocumentId& id, - const document::FieldValue& updateValue); - - virtual const std::shared_ptr<const document::DocumentTypeRepo> getTypeRepo() const - { return document::TestDocMan::getTypeRepoSP(); } - - /** - * Do a put toward storage set up in test environment. - * - * @doc Document to put. Use TestDocMan to generate easily. - * @disk If set, use this disk, otherwise lookup in bucket db. - * @usedBits Generate bucket to use from docid using this amount of bits. - */ - void doPut(const Document::SP& doc, Timestamp, - uint16_t disk, uint16_t usedBits = 16); - - void doPut(const document::Document::SP& doc, - document::BucketId bid, - Timestamp time, - uint16_t disk = 0); - - spi::UpdateResult doUpdate(document::BucketId bid, - const document::DocumentUpdate::SP& update, - Timestamp time, - uint16_t disk = 0); - - /** - * Create a test bucket with various content representing most states a - * bucket can represent. (Such that tests have a nice test bucket to use - * that require operations to handle all the various bucket contents. - * - * @disk If set, use this disk, otherwise lookup in bucket db. - */ - void createTestBucket(const BucketId&, uint16_t disk = 0xffff); - - /** - * In-place modify doc so that it has no more body fields. - */ - void clearBody(document::Document& doc); - - /** - * Copy all header data from src into dest, replacing any - * header fields it may already have there. NOTE: this will - * also overwrite document ID, type etc! - */ - void copyHeader(document::Document& dest, - const document::Document& src); - - /** - * Copy all body data from src into dest, replacing any - * body fields it may already have there. - */ - void copyBody(document::Document& dest, - const document::Document& src); - - std::string stringifyFields(const Document& doc) const; - - struct IoErrors { - int _afterReads; - int _afterWrites; - - IoErrors() - : _afterReads(0), - _afterWrites(0) - { - } - - IoErrors& afterReads(int n) { - _afterReads = n; - return *this; - } - - IoErrors& afterWrites(int n) { - _afterWrites = n; - return *this; - } - }; - - /** - * Replaces internal LazyFile factory so that it produces LazyFile - * implementations that trigger I/O exceptions on read/write. Optionally, - * can supply a parameter setting explicit bounds on how many operations - * are allowed on a file before trigging exceptions from there on out. A - * bound of -1 in practice means "don't fail ever" while 0 means "fail the - * next op of that type". - */ - void simulateIoErrorsForSubsequentlyOpenedFiles( - const IoErrors& errs = IoErrors()); - - /** - * Replace internal LazyFile factory with the default, non-failing impl. - */ - void unSimulateIoErrorsForSubsequentlyOpenedFiles(); -}; - -class SingleDiskMemFileTestUtils : public MemFileTestUtils -{ -public: - void setUp() override { - setupDisks(1); - } -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp b/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp deleted file mode 100644 index 9eb2ca00f60..00000000000 --- a/memfilepersistence/src/tests/spi/memfilev1serializertest.cpp +++ /dev/null @@ -1,1100 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> -#include <tests/spi/memfiletestutils.h> -#include <vespa/memfilepersistence/mapper/locationreadplanner.h> -#include <tests/spi/simulatedfailurefile.h> -#include <tests/spi/options_builder.h> - -namespace storage { -namespace memfile { - -struct MemFileV1SerializerTest : public SingleDiskMemFileTestUtils -{ - void tearDown() override; - void setUpPartialWriteEnvironment(); - void resetConfig(uint32_t minimumFileSize, uint32_t minimumFileHeaderBlockSize); - void doTestPartialWriteRemove(bool readAll); - void doTestPartialWriteUpdate(bool readAll); - - void testWriteReadSingleDoc(); - void testWriteReadPartial(); - void testWriteReadPartialRemoved(); - void testPartialWritePutHeaderOnly(); - void testPartialWritePut(); - void testPartialWriteRemoveCached(); - void testPartialWriteRemoveNotCached(); - void testPartialWriteUpdateCached(); - void testPartialWriteUpdateNotCached(); - void testPartialWriteTooMuchFreeSpace(); - void testPartialWriteNotEnoughFreeSpace(); - void testWriteReadSingleRemovedDoc(); - void testLocationDiskIoPlannerSimple(); - void testLocationDiskIoPlannerMergeReads(); - void testLocationDiskIoPlannerAlignReads(); - void testLocationDiskIoPlannerOneDocument(); - void testSeparateReadsForHeaderAndBody(); - void testLocationsRemappedConsistently(); - void testHeaderBufferTooSmall(); - - /*std::unique_ptr<MemFile> createMemFile(FileSpecification& file, - bool callLoadFile) - { - return std::unique_ptr<MemFile>(new MemFile(file, env(), callLoadFile)); - }*/ - - CPPUNIT_TEST_SUITE(MemFileV1SerializerTest); - CPPUNIT_TEST(testWriteReadSingleDoc); - CPPUNIT_TEST(testWriteReadPartial); - CPPUNIT_TEST(testWriteReadPartialRemoved); - CPPUNIT_TEST(testWriteReadSingleRemovedDoc); - CPPUNIT_TEST(testPartialWritePutHeaderOnly); - CPPUNIT_TEST(testPartialWritePut); - CPPUNIT_TEST(testPartialWriteRemoveCached); - CPPUNIT_TEST(testPartialWriteRemoveNotCached); - CPPUNIT_TEST(testPartialWriteUpdateCached); - CPPUNIT_TEST(testPartialWriteUpdateNotCached); - CPPUNIT_TEST(testLocationDiskIoPlannerSimple); - CPPUNIT_TEST(testLocationDiskIoPlannerMergeReads); - CPPUNIT_TEST(testLocationDiskIoPlannerAlignReads); - CPPUNIT_TEST(testLocationDiskIoPlannerOneDocument); - CPPUNIT_TEST(testSeparateReadsForHeaderAndBody); - CPPUNIT_TEST(testPartialWriteTooMuchFreeSpace); - CPPUNIT_TEST(testPartialWriteNotEnoughFreeSpace); - CPPUNIT_TEST(testLocationsRemappedConsistently); - CPPUNIT_TEST(testHeaderBufferTooSmall); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(MemFileV1SerializerTest); - -namespace { - -const vespalib::LazyFile& -getFileHandle(const MemFile& mf1) -{ - return static_cast<const SimpleMemFileIOBuffer&>( - mf1.getMemFileIO()).getFileHandle(); -} - -const LoggingLazyFile& -getLoggerFile(const MemFile& file) -{ - return static_cast<const LoggingLazyFile&>(getFileHandle(file)); -} - -bool isContentEqual(MemFile& mf1, MemFile& mf2, - bool requireEqualContentCached, std::ostream& error) -{ - MemFile::const_iterator it1( - mf1.begin(Types::ITERATE_GID_UNIQUE | Types::ITERATE_REMOVED)); - MemFile::const_iterator it2( - mf2.begin(Types::ITERATE_GID_UNIQUE | Types::ITERATE_REMOVED)); - while (true) { - if (it1 == mf1.end() && it2 == mf2.end()) { - return true; - } - if (it1 == mf1.end() || it2 == mf2.end()) { - error << "Different amount of GID unique slots"; - return false; - } - if (it1->getTimestamp() != it2->getTimestamp()) { - error << "Different timestamps"; - return false; - } - if (it1->getGlobalId() != it2->getGlobalId()) { - error << "Different gids"; - return false; - } - if (it1->getPersistedFlags() != it2->getPersistedFlags()) { - error << "Different persisted flags"; - return false; - } - if (requireEqualContentCached) { - if (mf1.partAvailable(*it1, Types::BODY) - ^ mf2.partAvailable(*it2, Types::BODY) - || mf1.partAvailable(*it1, Types::HEADER) - ^ mf2.partAvailable(*it2, Types::HEADER)) - { - error << "Difference in cached content: "; - return false; - } - } - - if (mf1.partAvailable(*it1, Types::HEADER) && - mf2.partAvailable(*it2, Types::HEADER)) - { - document::Document::UP doc1 = mf1.getDocument(*it1, Types::ALL); - document::Document::UP doc2 = mf2.getDocument(*it2, Types::ALL); - - CPPUNIT_ASSERT(doc1.get()); - CPPUNIT_ASSERT(doc2.get()); - - if (*doc1 != *doc2) { - error << "Documents different: Expected:\n" - << doc1->toString(true) << "\nActual:\n" - << doc2->toString(true) << "\n"; - return false; - } - } - ++it1; - ++it2; - } -} - -bool -validateMemFileStructure(const MemFile& mf, std::ostream& error) -{ - const SimpleMemFileIOBuffer& ioBuf( - dynamic_cast<const SimpleMemFileIOBuffer&>(mf.getMemFileIO())); - const FileInfo& fileInfo(ioBuf.getFileInfo()); - if (fileInfo.getFileSize() % 512) { - error << "File size is not a multiple of 512 bytes"; - return false; - } - if (fileInfo.getBlockIndex(Types::BODY) % 512) { - error << "Body start index is not a multiple of 512 bytes"; - return false; - } - if (fileInfo.getBlockSize(Types::BODY) % 512) { - error << "Body size is not a multiple of 512 bytes"; - return false; - } - return true; -} - -} - -void -MemFileV1SerializerTest::tearDown() { - //_memFile.reset(); -} - -/** - * Adjust minimum slotfile size values to avoid rewriting file - * when we want to get a partial write - */ -void -MemFileV1SerializerTest::setUpPartialWriteEnvironment() -{ - resetConfig(4096, 2048); -} - -void -MemFileV1SerializerTest::resetConfig(uint32_t minimumFileSize, - uint32_t minimumFileHeaderBlockSize) -{ - using MemFileConfig = vespa::config::storage::StorMemfilepersistenceConfig; - using MemFileConfigBuilder - = vespa::config::storage::StorMemfilepersistenceConfigBuilder; - - MemFileConfigBuilder persistenceConfig( - *env().acquireConfigReadLock().memFilePersistenceConfig()); - persistenceConfig.minimumFileHeaderBlockSize = minimumFileHeaderBlockSize; - persistenceConfig.minimumFileSize = minimumFileSize; - auto newCfg = std::unique_ptr<MemFileConfig>( - new MemFileConfig(persistenceConfig)); - env().acquireConfigWriteLock().setMemFilePersistenceConfig( - std::move(newCfg)); -} - -struct DummyMemFileIOInterface : MemFileIOInterface { - Document::UP getDocumentHeader(const document::DocumentTypeRepo&, - DataLocation) const override - { - return Document::UP(); - } - - document::DocumentId getDocumentId(DataLocation) const override { - return document::DocumentId(""); - } - - void readBody(const document::DocumentTypeRepo&, - DataLocation, - Document&) const override - { - } - DataLocation addDocumentIdOnlyHeader( - const DocumentId&, - const document::DocumentTypeRepo&) override - { - return DataLocation(); - } - DataLocation addHeader(const Document&) override { return DataLocation(); } - DataLocation addBody(const Document&) override { return DataLocation(); } - void clear(DocumentPart) override {} - bool verifyConsistent() const override { return true; } - void move(const FileSpecification&) override {} - DataLocation copyCache(const MemFileIOInterface&, DocumentPart, DataLocation) override { - return DataLocation(); - } - - void close() override {}; - bool isCached(DataLocation, DocumentPart) const override { return false; } - bool isPersisted(DataLocation, DocumentPart) const override { return false; } - uint32_t getSerializedSize(DocumentPart, DataLocation) const override { return 0; } - - void ensureCached(Environment&, DocumentPart, const std::vector<DataLocation>&) override {} - - size_t getCachedSize(DocumentPart) const override { return 0; } -}; - -#define VESPA_MEMFILEV1_SETUP_SOURCE \ - system("rm -f testfile.0"); \ - document::Document::SP doc(createRandomDocumentAtLocation(4)); \ - FileSpecification file(document::BucketId(16, 4), env().getDirectory(0), "testfile.0"); \ - MemFile source(file, env()); - -#define VESPA_MEMFILEV1_DIFF(source, target) \ - "\nSource:\n" + source.toString(true) \ - + "\nTarget:\n" + target.toString(true) - -#define VESPA_MEMFILEV1_VALIDATE_STRUCTURE(mfile) \ -{ \ - std::ostringstream validateErr; \ - if (!validateMemFileStructure(mfile, validateErr)) { \ - CPPUNIT_FAIL(validateErr.str()); \ - } \ -} - -#define VESPA_MEMFILEV1_ASSERT_SERIALIZATION(sourceMemFile) \ -env()._memFileMapper.flush(sourceMemFile, env()); \ -VESPA_MEMFILEV1_VALIDATE_STRUCTURE(sourceMemFile) \ -MemFile target(file, env()); \ -VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target) \ -{ \ - target.ensureBodyBlockCached(); \ - target.getBucketInfo(); \ - std::ostringstream diff; \ - if (!isContentEqual(sourceMemFile, target, true, diff)) { \ - std::string msg = "MemFiles not content equal: " + diff.str() \ - + VESPA_MEMFILEV1_DIFF(sourceMemFile, target); \ - CPPUNIT_FAIL(msg); \ - } \ -} - -void -MemFileV1SerializerTest::testWriteReadSingleDoc() -{ - VESPA_MEMFILEV1_SETUP_SOURCE; - source.addPutSlot(*doc, Timestamp(1001)); - std::string foo(VESPA_MEMFILEV1_DIFF(source, source)); - VESPA_MEMFILEV1_ASSERT_SERIALIZATION(source); -} - -void -MemFileV1SerializerTest::testWriteReadPartial() -{ - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - std::map<Timestamp, Document::SP> docs; - { - MemFile source(file, env()); - - for (int i = 0; i < 50; ++i) { - Document::SP doc(createRandomDocumentAtLocation(4, i, 1000, 2000)); - source.addPutSlot(*doc, Timestamp(1001 + i)); - docs[Timestamp(1001 + i)] = doc; - } - - env()._memFileMapper.flush(source, env()); - VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source); - } - - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options).maximumReadThroughGap(1024).build()); - env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( - new LoggingLazyFile::Factory()); - - MemFile target(file, env()); - - std::vector<Timestamp> timestamps; - - for (int i = 0; i < 50; i+=4) { - timestamps.push_back(Timestamp(1001 + i)); - } - CPPUNIT_ASSERT_EQUAL(size_t(13), timestamps.size()); - - getLoggerFile(target).operations.clear(); - target.ensureDocumentCached(timestamps, false); - // Headers are small enough that they get read in 1 op + 13 body reads - CPPUNIT_ASSERT_EQUAL(14, (int)getLoggerFile(target).operations.size()); - - for (std::size_t i = 0; i < timestamps.size(); ++i) { - const MemSlot* slot = target.getSlotAtTime(timestamps[i]); - CPPUNIT_ASSERT(slot); - CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); - CPPUNIT_ASSERT(target.partAvailable(*slot, BODY)); - CPPUNIT_ASSERT_EQUAL(*docs[timestamps[i]], *target.getDocument(*slot, ALL)); - } - VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); -} - -void -MemFileV1SerializerTest::testWriteReadPartialRemoved() -{ - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - MemFile source(file, env()); - - for (int i = 0; i < 50; ++i) { - Document::SP doc(createRandomDocumentAtLocation(4, i, 1000, 2000)); - source.addPutSlot(*doc, Timestamp(1001 + i)); - source.addRemoveSlot(*source.getSlotAtTime(Timestamp(1001 + i)), - Timestamp(2001 + i)); - } - - env()._memFileMapper.flush(source, env()); - VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source); - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options).maximumReadThroughGap(1024).build()); - env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( - new LoggingLazyFile::Factory); - - MemFile target(file, env()); - - std::vector<Timestamp> timestamps; - - for (int i = 0; i < 50; i+=4) { - timestamps.push_back(Timestamp(2001 + i)); - } - - getLoggerFile(target).operations.clear(); - target.ensureDocumentCached(timestamps, false); - // All removed; should only read header locations - CPPUNIT_ASSERT_EQUAL(1, (int)getLoggerFile(target).operations.size()); - - for (std::size_t i = 0; i < timestamps.size(); ++i) { - const MemSlot* slot = target.getSlotAtTime(timestamps[i]); - const MemSlot* removedPut( - target.getSlotAtTime(timestamps[i] - Timestamp(1000))); - CPPUNIT_ASSERT(slot); - CPPUNIT_ASSERT(removedPut); - CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); - CPPUNIT_ASSERT_EQUAL(removedPut->getLocation(HEADER), - slot->getLocation(HEADER)); - CPPUNIT_ASSERT_EQUAL(DataLocation(0, 0), slot->getLocation(BODY)); - } - VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); -} - -void MemFileV1SerializerTest::testWriteReadSingleRemovedDoc() -{ - VESPA_MEMFILEV1_SETUP_SOURCE; - source.addPutSlot(*doc, Timestamp(1001)); - source.addRemoveSlot( - *source.getSlotAtTime(Timestamp(1001)), Timestamp(2001)); - VESPA_MEMFILEV1_ASSERT_SERIALIZATION(source); -} - -/** - * Write a single put with no body to the memfile and ensure it is - * persisted properly without a body block - */ -void -MemFileV1SerializerTest::testPartialWritePutHeaderOnly() -{ - setUpPartialWriteEnvironment(); - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - document::Document::SP doc(createRandomDocumentAtLocation(4)); - { - MemFile source(file, env()); - source.addPutSlot(*doc, Timestamp(1001)); - env()._memFileMapper.flush(source, env()); - VESPA_MEMFILEV1_VALIDATE_STRUCTURE(source); - } - { - // Have to put a second time since the first one will always - // rewrite the entire file - MemFile target(file, env()); - Document::SP doc2(createRandomDocumentAtLocation(4)); - clearBody(*doc2); - target.addPutSlot(*doc2, Timestamp(1003)); - env()._memFileMapper.flush(target, env()); - VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); - } - { - MemFile target(file, env()); - target.ensureBodyBlockCached(); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); - - const MemSlot& slot = *target.getSlotAtTime(Timestamp(1003)); - CPPUNIT_ASSERT(slot.getLocation(HEADER)._pos > 0); - CPPUNIT_ASSERT(slot.getLocation(HEADER)._size > 0); - CPPUNIT_ASSERT_EQUAL( - DataLocation(0, 0), slot.getLocation(BODY)); - VESPA_MEMFILEV1_VALIDATE_STRUCTURE(target); - } -} - - - - -void -MemFileV1SerializerTest::testLocationDiskIoPlannerSimple() -{ - std::vector<MemSlot> slots; - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1001), - DataLocation(0, 1024), - DataLocation(4096, 512), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1003), - DataLocation(1024, 1024), - DataLocation(8192, 512), 0, 0)); - } - - std::vector<DataLocation> headers; - std::vector<DataLocation> bodies; - headers.push_back(slots[0].getLocation(HEADER)); - bodies.push_back(slots[0].getLocation(BODY)); - - DummyMemFileIOInterface dummyIo; - { - LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 100, 0); - - CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); - CPPUNIT_ASSERT_EQUAL( - DataLocation(0, 1024), - planner.getIoOperations()[0]); - } - { - LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 100, 4096); - - CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); - CPPUNIT_ASSERT_EQUAL( - DataLocation(8192, 512), // + block index - planner.getIoOperations()[0]); - } -} - -void -MemFileV1SerializerTest::testLocationDiskIoPlannerMergeReads() -{ - std::vector<MemSlot> slots; - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1001), - DataLocation(0, 1024), - DataLocation(5120, 512), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1002), - DataLocation(2048, 1024), - DataLocation(7168, 512), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1003), - DataLocation(1024, 1024), - DataLocation(9216, 512), 0, 0)); - } - - std::vector<DataLocation> headers; - std::vector<DataLocation> bodies; - for (int i = 0; i < 2; ++i) { - headers.push_back(slots[i].getLocation(HEADER)); - bodies.push_back(slots[i].getLocation(BODY)); - } - - DummyMemFileIOInterface dummyIo; - { - LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 1025, 0); - - CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); - CPPUNIT_ASSERT_EQUAL( - DataLocation(0, 3072), - planner.getIoOperations()[0]); - } - - { - LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 1025, 0); - - CPPUNIT_ASSERT_EQUAL(2, (int)planner.getIoOperations().size()); - CPPUNIT_ASSERT_EQUAL( - DataLocation(5120, 512), - planner.getIoOperations()[0]); - CPPUNIT_ASSERT_EQUAL( - DataLocation(7168, 512), - planner.getIoOperations()[1]); - } -} - -void -MemFileV1SerializerTest::testLocationDiskIoPlannerOneDocument() -{ - std::vector<MemSlot> slots; - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1001), - DataLocation(0, 1024), - DataLocation(5120, 512), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1002), - DataLocation(2048, 1024), - DataLocation(7168, 512), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1003), - DataLocation(1024, 1024), - DataLocation(9216, 512), 0, 0)); - } - - std::vector<DataLocation> headers; - std::vector<DataLocation> bodies; - headers.push_back(slots[1].getLocation(HEADER)); - bodies.push_back(slots[1].getLocation(BODY)); - - DummyMemFileIOInterface dummyIo; - { - LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 1000, 0); - CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); - CPPUNIT_ASSERT_EQUAL( - DataLocation(2048, 1024), - planner.getIoOperations()[0]); - } - - { - LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 1000, 0); - CPPUNIT_ASSERT_EQUAL(1, (int)planner.getIoOperations().size()); - CPPUNIT_ASSERT_EQUAL( - DataLocation(7168, 512), - planner.getIoOperations()[0]); - } -} - -void -MemFileV1SerializerTest::testLocationDiskIoPlannerAlignReads() -{ - std::vector<MemSlot> slots; - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1001), - DataLocation(7, 100), - DataLocation(5000, 500), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1002), - DataLocation(2000, 100), - DataLocation(7000, 500), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1003), - DataLocation(110, 200), - DataLocation(9000, 500), 0, 0)); - } - - { - Document::SP doc(createRandomDocumentAtLocation(4)); - slots.push_back( - MemSlot( - doc->getId().getGlobalId(), - Timestamp(1004), - DataLocation(3000, 100), - DataLocation(11000, 500), 0, 0)); - } - - std::vector<DataLocation> headers; - std::vector<DataLocation> bodies; - for (int i = 0; i < 2; ++i) { - headers.push_back(slots[i].getLocation(HEADER)); - bodies.push_back(slots[i].getLocation(BODY)); - } - - DummyMemFileIOInterface dummyIo; - { - LocationDiskIoPlanner planner(dummyIo, HEADER, headers, 512, 0); - std::vector<DataLocation> expected; - expected.push_back(DataLocation(0, 512)); - expected.push_back(DataLocation(1536, 1024)); - - CPPUNIT_ASSERT_EQUAL(expected, planner.getIoOperations()); - } - { - LocationDiskIoPlanner planner(dummyIo, BODY, bodies, 512, 0); - std::vector<DataLocation> expected; - expected.push_back(DataLocation(4608, 1024)); - expected.push_back(DataLocation(6656, 1024)); - - CPPUNIT_ASSERT_EQUAL(expected, planner.getIoOperations()); - } -} - -// TODO(vekterli): add read planner test with a location cached - -void -MemFileV1SerializerTest::testSeparateReadsForHeaderAndBody() -{ - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - Document::SP doc(createRandomDocumentAtLocation(4, 0, 1000, 2000)); - { - MemFile source(file, env()); - source.addPutSlot(*doc, Timestamp(1001)); - - env()._memFileMapper.flush(source, env()); - } - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options) - .maximumReadThroughGap(1024*1024*100) - .build()); - env()._lazyFileFactory = std::unique_ptr<Environment::LazyFileFactory>( - new LoggingLazyFile::Factory()); - - MemFile target(file, env()); - - std::vector<Timestamp> timestamps; - timestamps.push_back(Timestamp(1001)); - - getLoggerFile(target).operations.clear(); - target.ensureDocumentCached(timestamps, false); - - CPPUNIT_ASSERT_EQUAL(2, (int)getLoggerFile(target).operations.size()); - const MemSlot* slot = target.getSlotAtTime(Timestamp(1001)); - CPPUNIT_ASSERT(slot); - CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); - CPPUNIT_ASSERT(target.partAvailable(*slot, BODY)); - CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(*slot, ALL)); - - CPPUNIT_ASSERT(getMetrics().serialization.headerReadSize.getLast() > 0); - CPPUNIT_ASSERT(getMetrics().serialization.bodyReadSize.getLast() > 0); -} - -/** - * Write a single put with body to the memfile and ensure it is - * persisted properly with both header and body blocks - */ -void -MemFileV1SerializerTest::testPartialWritePut() -{ - setUpPartialWriteEnvironment(); - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - Document::SP doc(createRandomDocumentAtLocation(4)); - { - MemFile source(file, env()); - source.addPutSlot(*doc, Timestamp(1001)); - - env()._memFileMapper.flush(source, env()); - } - - { - // Have to put a second time since the first one will always - // rewrite the entire file - MemFile target(file, env()); - Document::SP doc2(createRandomDocumentAtLocation(4)); - target.addPutSlot(*doc2, Timestamp(1003)); - env()._memFileMapper.flush(target, env()); - } - { - MemFile target(file, env()); - target.ensureBodyBlockCached(); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); - - const MemSlot& slot = *target.getSlotAtTime(Timestamp(1003)); - CPPUNIT_ASSERT(slot.getLocation(HEADER)._pos > 0); - CPPUNIT_ASSERT(slot.getLocation(HEADER)._size > 0); - - CPPUNIT_ASSERT(slot.getLocation(BODY)._size > 0); - CPPUNIT_ASSERT(slot.getLocation(BODY)._pos > 0); - } -} - -void -MemFileV1SerializerTest::doTestPartialWriteRemove(bool readAll) -{ - setUpPartialWriteEnvironment(); - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - Document::SP doc(createRandomDocumentAtLocation(4)); - { - MemFile source(file, env()); - source.addPutSlot(*doc, Timestamp(1001)); - env()._memFileMapper.flush(source, env()); - } - { - MemFile target(file, env()); - // Only populate cache before removing if explicitly told so - if (readAll) { - target.ensureBodyBlockCached(); - } - CPPUNIT_ASSERT_EQUAL(uint32_t(1), target.getSlotCount()); - target.addRemoveSlot(target[0], Timestamp(1003)); - - env()._memFileMapper.flush(target, env()); - } - { - MemFile target(file, env()); - target.ensureBodyBlockCached(); - - CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); - - const MemSlot& originalSlot = target[0]; - const MemSlot& removeSlot = target[1]; - CPPUNIT_ASSERT(originalSlot.getLocation(HEADER)._size > 0); - CPPUNIT_ASSERT(originalSlot.getLocation(BODY)._size > 0); - CPPUNIT_ASSERT_EQUAL( - originalSlot.getLocation(HEADER), - removeSlot.getLocation(HEADER)); - CPPUNIT_ASSERT_EQUAL( - DataLocation(0, 0), removeSlot.getLocation(BODY)); - } -} - -/** - * Ensure that removes get the same header location as the Put - * they're removing, and that they get a zero body location - */ -void -MemFileV1SerializerTest::testPartialWriteRemoveCached() -{ - doTestPartialWriteRemove(true); -} - -void -MemFileV1SerializerTest::testPartialWriteRemoveNotCached() -{ - doTestPartialWriteRemove(false); -} - -void -MemFileV1SerializerTest::doTestPartialWriteUpdate(bool readAll) -{ - setUpPartialWriteEnvironment(); - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - Document::SP doc(createRandomDocumentAtLocation(4)); - { - MemFile source(file, env()); - source.addPutSlot(*doc, Timestamp(1001)); - env()._memFileMapper.flush(source, env()); - } - - Document::SP doc2; - { - MemFile target(file, env()); - if (readAll) { - target.ensureBodyBlockCached(); - } - - doc2.reset(new Document(*doc->getDataType(), doc->getId())); - clearBody(*doc2); - doc2->setValue(doc->getField("hstringval"), - document::StringFieldValue("Some updated content")); - - target.addUpdateSlot(*doc2, *target.getSlotAtTime(Timestamp(1001)), - Timestamp(1003)); - env()._memFileMapper.flush(target, env()); - } - - { - MemFile target(file, env()); - CPPUNIT_ASSERT_EQUAL(uint32_t(2), target.getSlotCount()); - const MemSlot& originalSlot = target[0]; - const MemSlot& updateSlot = target[1]; - CPPUNIT_ASSERT(originalSlot.getLocation(HEADER)._size > 0); - CPPUNIT_ASSERT(originalSlot.getLocation(BODY)._size > 0); - CPPUNIT_ASSERT_EQUAL( - originalSlot.getLocation(BODY), - updateSlot.getLocation(BODY)); - CPPUNIT_ASSERT( - updateSlot.getLocation(HEADER) - != originalSlot.getLocation(HEADER)); - - CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(target[0], ALL)); - copyHeader(*doc, *doc2); - CPPUNIT_ASSERT_EQUAL(*doc, *target.getDocument(target[1], ALL)); - } -} - -/** - * Ensure that header updates keep the same body block - */ -void -MemFileV1SerializerTest::testPartialWriteUpdateCached() -{ - doTestPartialWriteUpdate(true); -} - -void -MemFileV1SerializerTest::testPartialWriteUpdateNotCached() -{ - doTestPartialWriteUpdate(false); -} - -void -MemFileV1SerializerTest::testPartialWriteTooMuchFreeSpace() -{ - setUpPartialWriteEnvironment(); - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - { - MemFile source(file, env()); - Document::SP doc(createRandomDocumentAtLocation(4)); - source.addPutSlot(*doc, Timestamp(1001)); - env()._memFileMapper.flush(source, env()); - } - int64_t sizeBefore; - // Append filler to slotfile to make it too big for comfort, - // forcing a rewrite to shrink it down - { - vespalib::File slotfile(file.getPath()); - slotfile.open(0); - CPPUNIT_ASSERT(slotfile.isOpen()); - sizeBefore = slotfile.getFileSize(); - slotfile.resize(sizeBefore * 20); // Well over min fill rate of 10% - } - // Write new slot to file; it should now be rewritten with the - // same file size as originally - { - MemFile source(file, env()); - Document::SP doc(createRandomDocumentAtLocation(4)); - source.addPutSlot(*doc, Timestamp(1003)); - env()._memFileMapper.flush(source, env()); - } - { - vespalib::File slotfile(file.getPath()); - slotfile.open(0); - CPPUNIT_ASSERT(slotfile.isOpen()); - CPPUNIT_ASSERT_EQUAL( - sizeBefore, - slotfile.getFileSize()); - } - CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization - .fullRewritesDueToDownsizingFile.getValue()); - CPPUNIT_ASSERT_EQUAL(uint64_t(0), getMetrics().serialization - .fullRewritesDueToTooSmallFile.getValue()); -} - -void -MemFileV1SerializerTest::testPartialWriteNotEnoughFreeSpace() -{ - setUpPartialWriteEnvironment(); - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - // Write file initially - MemFile source(file, env()); - { - Document::SP doc(createRandomDocumentAtLocation(4)); - source.addPutSlot(*doc, Timestamp(1001)); - env()._memFileMapper.flush(source, env()); - } - - uint32_t minFile = 1024 * 512; - auto memFileCfg = env().acquireConfigReadLock().memFilePersistenceConfig(); - resetConfig(minFile, memFileCfg->minimumFileHeaderBlockSize); - - // Create doc bigger than initial minimum filesize, - // prompting a full rewrite - Document::SP doc( - createRandomDocumentAtLocation(4, 0, 4096, 4096)); - source.addPutSlot(*doc, Timestamp(1003)); - - env()._memFileMapper.flush(source, env()); - - CPPUNIT_ASSERT_EQUAL( - minFile, - uint32_t(getFileHandle(source).getFileSize())); - - CPPUNIT_ASSERT_EQUAL(uint64_t(0), getMetrics().serialization - .fullRewritesDueToDownsizingFile.getValue()); - CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization - .fullRewritesDueToTooSmallFile.getValue()); - - // Now, ensure we respect minimum file size and don't try to - // "helpfully" rewrite the file again (try to detect full - // file rewrite with help from the fact we don't currently - // check whether or not the file is < the minimum filesize. - // If that changes, so must this) - memFileCfg = env().acquireConfigReadLock().memFilePersistenceConfig(); - resetConfig(2 * minFile, memFileCfg->minimumFileHeaderBlockSize); - - source.addRemoveSlot(*source.getSlotAtTime(Timestamp(1003)), - Timestamp(1005)); - env()._memFileMapper.flush(source, env()); - - CPPUNIT_ASSERT_EQUAL( - minFile, - uint32_t(getFileHandle(source).getFileSize())); - - CPPUNIT_ASSERT_EQUAL(uint64_t(1), getMetrics().serialization - .fullRewritesDueToTooSmallFile.getValue()); -} - -// Test that we don't mess up when remapping locations that -// have already been written during the same operation. That is: -// part A is remapped (P1, S1) -> (P2, S2) -// part B is remapped (P2, S2) -> (P3, S3) -// Obviously, part B should not overwrite the location of part A, -// but this will happen if we don't do the updating in one batch. -void -MemFileV1SerializerTest::testLocationsRemappedConsistently() -{ - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - - std::map<Timestamp, Document::SP> docs; - { - MemFile mf(file, env()); - Document::SP tmpDoc( - createRandomDocumentAtLocation(4, 0, 100, 100)); - - // Create docs identical in size but differing only in doc ids - // By keeping same size but inserting with _lower_ timestamps - // for docs that get higher location positions, we ensure that - // when the file is rewritten, the lower timestamp slots will - // get remapped to locations that match existing locations for - // higher timestamp slots. - for (int i = 0; i < 2; ++i) { - std::ostringstream ss; - ss << "doc" << i; - DocumentId id(document::UserDocIdString("userdoc:foo:4:" + ss.str())); - Document::SP doc(new Document(*tmpDoc->getDataType(), id)); - doc->getFields() = tmpDoc->getFields(); - mf.addPutSlot(*doc, Timestamp(1000 - i)); - docs[Timestamp(1000 - i)] = doc; - } - - env()._memFileMapper.flush(mf, env()); - // Dirty the cache for rewrite - { - DocumentId id2(document::UserDocIdString("userdoc:foo:4:doc9")); - Document::UP doc2(new Document(*tmpDoc->getDataType(), id2)); - doc2->getFields() = tmpDoc->getFields(); - mf.addPutSlot(*doc2, Timestamp(2000)); - docs[Timestamp(2000)] = std::move(doc2); - } - - // Force rewrite - auto memFileCfg = env().acquireConfigReadLock() - .memFilePersistenceConfig(); - resetConfig(1024*512, memFileCfg ->minimumFileHeaderBlockSize); - env()._memFileMapper.flush(mf, env()); - } - - MemFile target(file, env()); - target.ensureBodyBlockCached(); - - std::ostringstream err; - if (!env()._memFileMapper.verify(target, env(), err)) { - std::cerr << err.str() << "\n"; - CPPUNIT_FAIL("MemFile verification failed"); - } - - typedef std::map<Timestamp, Document::SP>::iterator Iter; - for (Iter it(docs.begin()); it != docs.end(); ++it) { - const MemSlot* slot = target.getSlotAtTime(it->first); - CPPUNIT_ASSERT(slot); - CPPUNIT_ASSERT(target.partAvailable(*slot, HEADER)); - CPPUNIT_ASSERT(target.partAvailable(*slot, BODY)); - CPPUNIT_ASSERT_EQUAL(*it->second, *target.getDocument(*slot, ALL)); - } -} - -/** - * Test that we read in the correct header information when we have to read - * in two passes to get it in its entirety. - */ -void -MemFileV1SerializerTest::testHeaderBufferTooSmall() -{ - system("rm -f testfile.0"); - FileSpecification file(BucketId(16, 4), env().getDirectory(0), "testfile.0"); - FileInfo wantedInfo; - { - MemFile f(file, env()); - // 50*40 bytes of meta list data should be more than sufficient - for (size_t i = 0; i < 50; ++i) { - Document::SP doc(createRandomDocumentAtLocation(4, i)); - f.addPutSlot(*doc, Timestamp(1001 + i)); - env()._memFileMapper.flush(f, env()); - } - SimpleMemFileIOBuffer& io( - dynamic_cast<SimpleMemFileIOBuffer&>(f.getMemFileIO())); - wantedInfo = io.getFileInfo(); - } - - // Force initial index read to be too small to contain all metadata, - // triggering buffer resize and secondary read. - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options).initialIndexRead(512).build()); - { - MemFile f(file, env()); - CPPUNIT_ASSERT_EQUAL(uint32_t(50), f.getSlotCount()); - // Ensure we've read correct file info - SimpleMemFileIOBuffer& io( - dynamic_cast<SimpleMemFileIOBuffer&>(f.getMemFileIO())); - const FileInfo& info(io.getFileInfo()); - CPPUNIT_ASSERT_EQUAL(wantedInfo.getFileSize(), info.getFileSize()); - CPPUNIT_ASSERT_EQUAL(wantedInfo.getHeaderBlockStartIndex(), - info.getHeaderBlockStartIndex()); - CPPUNIT_ASSERT_EQUAL(wantedInfo.getBodyBlockStartIndex(), - info.getBodyBlockStartIndex()); - CPPUNIT_ASSERT_EQUAL(wantedInfo.getBlockSize(HEADER), - info.getBlockSize(HEADER)); - CPPUNIT_ASSERT_EQUAL(wantedInfo.getBlockSize(BODY), - info.getBlockSize(BODY)); - } -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp b/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp deleted file mode 100644 index c38842bfeb0..00000000000 --- a/memfilepersistence/src/tests/spi/memfilev1verifiertest.cpp +++ /dev/null @@ -1,496 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/memfilepersistence/mapper/memfile_v1_serializer.h> -#include <vespa/memfilepersistence/mapper/memfile_v1_verifier.h> -#include <tests/spi/memfiletestutils.h> - -namespace storage { -namespace memfile { - -struct MemFileV1VerifierTest : public SingleDiskMemFileTestUtils -{ - void testVerify(); - - void tearDown() override; - - std::unique_ptr<MemFile> createMemFile(FileSpecification& file, - bool callLoadFile) - { - return std::unique_ptr<MemFile>(new MemFile(file, env(), callLoadFile)); - } - - CPPUNIT_TEST_SUITE(MemFileV1VerifierTest); - CPPUNIT_TEST_IGNORED(testVerify); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(MemFileV1VerifierTest); - -namespace { - // A totall uncached memfile with content to use for verify testing - std::unique_ptr<MemFile> _memFile; - - // Clear old content. Create new file. Make sure nothing is cached. - void prepareBucket(SingleDiskMemFileTestUtils& util, - const FileSpecification& file) { - _memFile.reset(); - util.env()._cache.clear(); - vespalib::unlink(file.getPath()); - util.createTestBucket(file.getBucketId(), 0); - util.env()._cache.clear(); - _memFile.reset(new MemFile(file, util.env())); - _memFile->getMemFileIO().close(); - - } - - // Get copy of header of memfile created - Header getHeader() { - assert(_memFile.get()); - vespalib::LazyFile file(_memFile->getFile().getPath(), 0); - Header result; - file.read(&result, sizeof(Header), 0); - return result; - } - - MetaSlot getSlot(uint32_t index) { - assert(_memFile.get()); - vespalib::LazyFile file(_memFile->getFile().getPath(), 0); - MetaSlot result; - file.read(&result, sizeof(MetaSlot), - sizeof(Header) + sizeof(MetaSlot) * index); - return result; - } - - void setSlot(uint32_t index, MetaSlot slot, - bool updateFileChecksum = true) - { - (void)updateFileChecksum; - assert(_memFile.get()); - //if (updateFileChecksum) slot.updateFileChecksum(); - vespalib::LazyFile file(_memFile->getFile().getPath(), 0); - file.write(&slot, sizeof(MetaSlot), - sizeof(Header) + sizeof(MetaSlot) * index); - } - - void setHeader(const Header& header) { - assert(_memFile.get()); - vespalib::LazyFile file(_memFile->getFile().getPath(), 0); - file.write(&header, sizeof(Header), 0); - } - - void verifySlotFile(MemFileV1VerifierTest& util, - const std::string& expectedError, - const std::string& message, - int32_t remainingEntries, - bool includeContent = true, - bool includeHeader = true) - { - assert(_memFile.get()); - FileSpecification file(_memFile->getFile()); - _memFile.reset(); - _memFile = util.createMemFile(file, false); - std::ostringstream before; - try{ - util.env()._memFileMapper.loadFile(*_memFile, util.env(), false); - _memFile->print(before, true, ""); - } catch (vespalib::Exception& e) { - before << "Unknown. Exception during loadFile\n"; - } - std::ostringstream errors; - uint32_t flags = (includeContent ? 0 : Types::DONT_VERIFY_BODY) - | (includeHeader ? 0 : Types::DONT_VERIFY_HEADER); - if (util.env()._memFileMapper.verify( - *_memFile, util.env(), errors, flags)) - { - _memFile->print(std::cerr, true, ""); - std::cerr << errors.str() << "\n"; - CPPUNIT_FAIL("verify() failed to detect: " + message); - } - CPPUNIT_ASSERT_CONTAIN_MESSAGE(message + "\nBefore: " + before.str(), - expectedError, errors.str()); - errors.str(""); - if (util.env()._memFileMapper.repair( - *_memFile, util.env(), errors, flags)) - { - CPPUNIT_FAIL("repair() failed to detect: " + message - + ": " + errors.str()); - } - CPPUNIT_ASSERT_CONTAIN_MESSAGE(message + "\nBefore: " + before.str(), - expectedError, errors.str()); - std::ostringstream remainingErrors; - if (!util.env()._memFileMapper.verify( - *_memFile, util.env(), remainingErrors, flags)) - { - CPPUNIT_FAIL("verify() returns issue after repair of: " - + message + ": " + remainingErrors.str()); - } - CPPUNIT_ASSERT_MESSAGE(remainingErrors.str(), - remainingErrors.str().size() == 0); - if (remainingEntries < 0) { - if (_memFile->fileExists()) { - CPPUNIT_FAIL(message + ": Expected file to not exist anymore"); - } - } else if (dynamic_cast<SimpleMemFileIOBuffer&>(_memFile->getMemFileIO()) - .getFileHandle().getFileSize() == 0) - { - std::ostringstream ost; - ost << "Expected " << remainingEntries << " to remain in file, " - << "but file does not exist\n"; - CPPUNIT_FAIL(message + ": " + ost.str()); - } else { - if (int64_t(_memFile->getSlotCount()) != remainingEntries) { - std::ostringstream ost; - ost << "Expected " << remainingEntries << " to remain in file, " - << "but found " << _memFile->getSlotCount() << "\n"; - ost << errors.str() << "\n"; - ost << "Before: " << before.str() << "\nAfter: "; - _memFile->print(ost, true, ""); - CPPUNIT_FAIL(message + ": " + ost.str()); - } - } - } -} - -void -MemFileV1VerifierTest::tearDown() -{ - _memFile.reset(0); - SingleDiskMemFileTestUtils::tearDown(); -}; - -void -MemFileV1VerifierTest::testVerify() -{ - BucketId bucket(16, 0xa); - std::unique_ptr<FileSpecification> file; - createTestBucket(bucket, 0); - - { - MemFilePtr memFilePtr(env()._cache.get(bucket, env(), env().getDirectory())); - file.reset(new FileSpecification(memFilePtr->getFile())); - env()._cache.clear(); - } - { // Ensure buildTestFile builds a valid file - // Initial file should be fine. - MemFile memFile(*file, env()); - std::ostringstream errors; - if (!env()._memFileMapper.verify(memFile, env(), errors)) { - memFile.print(std::cerr, false, ""); - CPPUNIT_FAIL("Slotfile failed verification: " + errors.str()); - } - } - // Header tests - prepareBucket(*this, *file); - Header orgheader(getHeader()); - { // Test wrong version - Header header(orgheader); - header.setVersion(0xc0edbabe); - header.updateChecksum(); - setHeader(header); - verifySlotFile(*this, - "400000000000000a.0 is of wrong version", - "Faulty version", - -1); - } - { // Test meta data list size bigger than file - prepareBucket(*this, *file); - Header header(orgheader); - header.setMetaDataListSize(0xFFFF); - header.updateChecksum(); - setHeader(header); - verifySlotFile(*this, - "indicates file is bigger than it physically is", - "Too big meta data list size", - -1); - } - { // Test header block size bigger than file - prepareBucket(*this, *file); - Header header(orgheader); - header.setHeaderBlockSize(0xFFFF); - header.updateChecksum(); - setHeader(header); - verifySlotFile(*this, - "Header indicates file is bigger than it physically is", - "Too big header block size", - -1); - } - { // Test wrong header crc - prepareBucket(*this, *file); - Header header(orgheader); - header.setMetaDataListSize(4); - setHeader(header); - verifySlotFile(*this, - "Header checksum mismatch", - "Wrong header checksum", - -1); - } - // Meta data tests - prepareBucket(*this, *file); - MetaSlot slot6(getSlot(6)); - { // Test extra removes - currently allowed - MetaSlot slot7(getSlot(7)); - MetaSlot s(slot7); - s.setTimestamp(Timestamp(s._timestamp.getTime() - 1)); - s.updateChecksum(); - setSlot(6, s); - s.setTimestamp(Timestamp(s._timestamp.getTime() + 1)); - s.updateChecksum(); - setSlot(7, s); - std::ostringstream errors; - if (!env()._memFileMapper.verify(*_memFile, env(), errors)) { - _memFile->print(std::cerr, false, ""); - std::cerr << errors.str() << "\n"; - CPPUNIT_FAIL("Supposed to be legal with multiple remove values"); - } - setSlot(7, slot7); - } - { - // Test metadata crc mismatch with "used" flag being accidentally - // flipped. Should not inhibit adding of subsequent slots. - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setUseFlag(false); - setSlot(6, s); - verifySlotFile(*this, - "Slot 6 at timestamp 2001 failed checksum verification", - "Crc failure with use flag", 23, false); - } - { // Test overlapping documents - MetaSlot s(slot6); - // Direct overlapping header - prepareBucket(*this, *file); - s.setHeaderPos(0); - s.setHeaderSize(51); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "overlaps with slot", - "Direct overlapping header", 6, false, false); - // Contained header - // (contained bit not valid header so fails on other error now) - prepareBucket(*this, *file); - s.setHeaderPos(176); - s.setHeaderSize(80); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "not big enough to contain a document id", - "Contained header", 7, false); - // Partly overlapping header - // (contained bit not valid header so fails on other error now) - prepareBucket(*this, *file); - s.setHeaderPos(191); - s.setHeaderSize(35); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "not big enough to contain a document id", - "Partly overlapping header", 7, false); - prepareBucket(*this, *file); - s.setHeaderPos(185); - s.setHeaderSize(33); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "not big enough to contain a document id", - "Partly overlapping header (2)", 7, false); - // Direct overlapping body - prepareBucket(*this, *file); - s = slot6; - s.setBodyPos(0); - s.setBodySize(136); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "Multiple slots with different gids use same body position", - "Directly overlapping body", 6, false); - // Contained body - prepareBucket(*this, *file); - s.setBodyPos(10); - s.setBodySize(50); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "overlaps with slot", - "Contained body", 6, false); - CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(1)) == 0); - // Overlapping body - prepareBucket(*this, *file); - s.setBodyPos(160); - s.setBodySize(40); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "overlaps with slot", - "Overlapping body", 5, false); - CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(2)) == 0); - CPPUNIT_ASSERT(_memFile->getSlotAtTime(Timestamp(1501)) == 0); - // Overlapping body, verifying bodies - // (Bad body bit should be removed first, so only one slot needs - // removing) - prepareBucket(*this, *file); - setSlot(6, s); - verifySlotFile(*this, - "Body checksum mismatch", - "Overlapping body(2)", 7, true); - } - { // Test out of bounds - MetaSlot s(slot6); - - // Header out of bounds - prepareBucket(*this, *file); - s.setHeaderPos(500); - s.setHeaderSize(5000); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "goes out of bounds", - "Header out of bounds", 7, false, false); - // Body out of bounds - prepareBucket(*this, *file); - s = slot6; - s.setBodyPos(2400); - s.setBodySize(6000); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "goes out of bounds", - "Body out of bounds", 7, false); - } - { // Test timestamp collision - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setTimestamp(Timestamp(10002)); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "has same timestamp as slot 5", - "Timestamp collision", 6, false); - } - { // Test timestamp out of order - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setTimestamp(Timestamp(38)); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "Slot 6 is out of timestamp order", - "Timestamp out of order", 8, false); - } - { // Test metadata crc mismatch - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setTimestamp(Timestamp(40)); - setSlot(6, s); - verifySlotFile(*this, - "Slot 6 at timestamp 40 failed checksum verification", - "Crc failure", 7, false); - } - { // Test used after unused - // This might actually lose documents after the unused entries. - // The memfile will not know about the documents after unused entry. - // If the memfile contains changes and writes metadata back due to this, - // the following entries will be missing. - // (To prevent this repair would have to add metadata entries, but that - // may be problems if repair happens at a time where all header or body - // data in the file needs to be cached.) - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setUseFlag(false); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "Slot 7 found after unused entries", - "Used after unused", 6, false); - } - { // Test header blob corrupt - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setHeaderPos(519); - s.setHeaderSize(86); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "Header checksum mismatch", - "Corrupt header blob.", 7); - } - { // Test body blob corrupt - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setBodyPos(52); - s.setBodySize(18); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "Body checksum mismatch", - "Corrupt body blob.", 7); - } - { // Test too long name for header chunk - prepareBucket(*this, *file); - MetaSlot s(slot6); - s.setHeaderPos(160); - s.setHeaderSize(33); - s.updateChecksum(); - setSlot(6, s); - verifySlotFile(*this, - "header is not big enough to contain a document", - "Too long name in header.", 7); - } - { // Test wrong file checksum -// Currently disabled. Currently only possible to calculate file checksum from -// memfile now, and memfile object wont be valid. -/* - // First test if we actually have less entries at all.. - prepareBucket(*this, *file); - MetaSlot s(getSlot(7)); - s.setUseFlag(false); - s.updateChecksum(); - setSlot(7, s, false); - s = getSlot(8); - s.setUseFlag(false); - s.updateChecksum(); - setSlot(8, s, false); - verifySlotFile(*this, - "File checksum should have been", - "Wrong file checksum in file.", 7, false); -std::cerr << "U\n"; - // Then test with different timestamp in remaining document - prepareBucket(*this, *file); - s = getSlot(6); - s.setTimestamp(s._timestamp + 1); - s.updateChecksum(); - setSlot(6, s, false); - verifySlotFile(*this, - "File checksum should have been", - "Wrong file checksum in file.", 9, false); -std::cerr << "V\n"; - // Then check with different gid - prepareBucket(*this, *file); - s = getSlot(6); - s._gid = GlobalId("sdfsdfsedsdfsdfsd"); - s.updateChecksum(); - setSlot(6, s, false); - verifySlotFile(*this, - "File checksum should have been", - "Wrong file checksum in file.", 9, false, false); -*/ - } - { // Test that documents not belonging in a bucket is removed -// Currently disabled. Hard to test. Needs total rewrite -/* - prepareBucket(*this, *file); - Blob b(createBlob(43u, "userdoc::0:315", "header", "body")); - _memFile->write(b, 80); - CPPUNIT_ASSERT_EQUAL(4u, _memFile->getBlobCount()); - CPPUNIT_ASSERT(_memFile->read(b)); - verifySlotFile(*this, - "belongs in bucket", - "Document not belonging there", 9); - CPPUNIT_ASSERT_EQUAL(3u, _memFile->getBlobCount()); -*/ - } -} - -} -} diff --git a/memfilepersistence/src/tests/spi/options_builder.h b/memfilepersistence/src/tests/spi/options_builder.h deleted file mode 100644 index 7f04a02086c..00000000000 --- a/memfilepersistence/src/tests/spi/options_builder.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/memfilepersistence/common/environment.h> -#include <vespa/vespalib/stllike/string.h> -#include <memory> - -namespace storage { -namespace memfile { - -class OptionsBuilder -{ - Options _newOptions; -public: - OptionsBuilder(const Options& opts) - : _newOptions(opts) - { - } - - OptionsBuilder& maximumReadThroughGap(uint32_t readThroughGap) { - _newOptions._maximumGapToReadThrough = readThroughGap; - return *this; - } - - OptionsBuilder& initialIndexRead(uint32_t bytesToRead) { - _newOptions._initialIndexRead = bytesToRead; - return *this; - } - - OptionsBuilder& revertTimePeriod(framework::MicroSecTime revertTime) { - _newOptions._revertTimePeriod = revertTime; - return *this; - } - - OptionsBuilder& defaultRemoveDocType(vespalib::stringref typeName) { - _newOptions._defaultRemoveDocType = typeName; - return *this; - } - - OptionsBuilder& maxDocumentVersions(uint32_t maxVersions) { - _newOptions._maxDocumentVersions = maxVersions; - return *this; - } - - std::unique_ptr<Options> build() const { - return std::unique_ptr<Options>(new Options(_newOptions)); - } -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/tests/spi/providerconformancetest.cpp b/memfilepersistence/src/tests/spi/providerconformancetest.cpp deleted file mode 100644 index 7ba91bde619..00000000000 --- a/memfilepersistence/src/tests/spi/providerconformancetest.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfiletestutils.h" -#include <vespa/persistence/conformancetest/conformancetest.h> - -namespace storage { -namespace memfile { - -struct ProviderConformanceTest : public spi::ConformanceTest { - struct Factory : public PersistenceFactory { - framework::defaultimplementation::ComponentRegisterImpl _compRegister; - framework::defaultimplementation::RealClock _clock; - std::unique_ptr<MemFileCache> cache; - - Factory() - : _compRegister(), - _clock() - { - _compRegister.setClock(_clock); - } - - spi::PersistenceProvider::UP - getPersistenceImplementation(const std::shared_ptr<const document::DocumentTypeRepo>& repo, - const document::DocumenttypesConfig&) override - { - system("rm -rf vdsroot"); - system("mkdir -p vdsroot/disks/d0"); - vdstestlib::DirConfig config(getStandardConfig(true)); - - MemFilePersistenceProvider::UP result( - new MemFilePersistenceProvider( - _compRegister, - config.getConfigId())); - result->setDocumentRepo(*repo); - return spi::PersistenceProvider::UP(result.release()); - } - - bool - supportsRevert() const - { - return true; - } - }; - - ProviderConformanceTest() - : spi::ConformanceTest(PersistenceFactory::UP(new Factory)) {} - - CPPUNIT_TEST_SUITE(ProviderConformanceTest); - DEFINE_CONFORMANCE_TESTS(); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(ProviderConformanceTest); - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp b/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp deleted file mode 100644 index ee95cc0026d..00000000000 --- a/memfilepersistence/src/tests/spi/shared_data_location_tracker_test.cpp +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/memfilepersistence/memfile/shared_data_location_tracker.h> - -namespace storage { -namespace memfile { - -class SharedDataLocationTrackerTest : public CppUnit::TestFixture -{ -public: - void headerIsPassedDownToCacheAccessor(); - void bodyIsPassedDownToCacheAccessor(); - void firstInvocationReturnsNewLocation(); - void multipleInvocationsForSharedSlotReturnSameLocation(); - - CPPUNIT_TEST_SUITE(SharedDataLocationTrackerTest); - CPPUNIT_TEST(headerIsPassedDownToCacheAccessor); - CPPUNIT_TEST(bodyIsPassedDownToCacheAccessor); - CPPUNIT_TEST(firstInvocationReturnsNewLocation); - CPPUNIT_TEST(multipleInvocationsForSharedSlotReturnSameLocation); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(SharedDataLocationTrackerTest); - -namespace { - -using Params = std::pair<Types::DocumentPart, DataLocation>; -constexpr auto HEADER = Types::HEADER; -constexpr auto BODY = Types::BODY; - -/** - * A simple mock of a buffer cache which records all invocations - * and returns a location increasing by 100 for each invocation. - */ -struct MockBufferCacheCopier : BufferCacheCopier -{ - // This is practically _screaming_ for GoogleMock. - std::vector<Params> invocations; - - DataLocation doCopyFromSourceToLocal( - Types::DocumentPart part, - DataLocation sourceLocation) override - { - Params params(part, sourceLocation); - const size_t invocationsBefore = invocations.size(); - invocations.push_back(params); - return DataLocation(invocationsBefore * 100, - invocationsBefore * 100 + 100); - } -}; - -} - -void -SharedDataLocationTrackerTest::headerIsPassedDownToCacheAccessor() -{ - MockBufferCacheCopier cache; - SharedDataLocationTracker tracker(cache, HEADER); - tracker.getOrCreateSharedLocation({0, 100}); - CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size()); - CPPUNIT_ASSERT_EQUAL(Params(HEADER, {0, 100}), cache.invocations[0]); -} - -void -SharedDataLocationTrackerTest::bodyIsPassedDownToCacheAccessor() -{ - MockBufferCacheCopier cache; - SharedDataLocationTracker tracker(cache, BODY); - tracker.getOrCreateSharedLocation({0, 100}); - CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size()); - CPPUNIT_ASSERT_EQUAL(Params(BODY, {0, 100}), cache.invocations[0]); -} - -void -SharedDataLocationTrackerTest::firstInvocationReturnsNewLocation() -{ - MockBufferCacheCopier cache; - SharedDataLocationTracker tracker(cache, HEADER); - // Auto-incrementing per cache copy invocation. - CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100), - tracker.getOrCreateSharedLocation({500, 600})); - CPPUNIT_ASSERT_EQUAL(DataLocation(100, 200), - tracker.getOrCreateSharedLocation({700, 800})); - - CPPUNIT_ASSERT_EQUAL(size_t(2), cache.invocations.size()); - CPPUNIT_ASSERT_EQUAL(Params(HEADER, {500, 600}), cache.invocations[0]); - CPPUNIT_ASSERT_EQUAL(Params(HEADER, {700, 800}), cache.invocations[1]); -} - -void -SharedDataLocationTrackerTest - ::multipleInvocationsForSharedSlotReturnSameLocation() -{ - MockBufferCacheCopier cache; - SharedDataLocationTracker tracker(cache, HEADER); - CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100), - tracker.getOrCreateSharedLocation({500, 600})); - // Same source location, thus we can reuse the same destination location - // as well. - CPPUNIT_ASSERT_EQUAL(DataLocation(0, 100), - tracker.getOrCreateSharedLocation({500, 600})); - - CPPUNIT_ASSERT_EQUAL(size_t(1), cache.invocations.size()); - CPPUNIT_ASSERT_EQUAL(Params(HEADER, {500, 600}), cache.invocations[0]); -} - -} // memfile -} // storage - diff --git a/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp b/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp deleted file mode 100644 index 7701df98661..00000000000 --- a/memfilepersistence/src/tests/spi/simplememfileiobuffertest.cpp +++ /dev/null @@ -1,656 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> -#include <vespa/document/repo/documenttyperepo.h> -#include <tests/spi/memfiletestutils.h> -#include <tests/spi/options_builder.h> - -namespace storage { -namespace memfile { - -class SimpleMemFileIOBufferTest : public SingleDiskMemFileTestUtils -{ - CPPUNIT_TEST_SUITE(SimpleMemFileIOBufferTest); - CPPUNIT_TEST(testAddAndReadDocument); - CPPUNIT_TEST(testNonExistingLocation); - CPPUNIT_TEST(testCopy); - CPPUNIT_TEST(testCacheLocation); - CPPUNIT_TEST(testPersist); - CPPUNIT_TEST(testGetSerializedSize); - CPPUNIT_TEST(testRemapLocations); - CPPUNIT_TEST(testAlignmentUtilFunctions); - CPPUNIT_TEST(testCalculatedCacheSize); - CPPUNIT_TEST(testSharedBuffer); - CPPUNIT_TEST(testSharedBufferUsage); - CPPUNIT_TEST(testHeaderChunkEncoderComputesSizesCorrectly); - CPPUNIT_TEST(testHeaderChunkEncoderSerializesIdCorrectly); - CPPUNIT_TEST(testHeaderChunkEncoderSerializesHeaderCorrectly); - CPPUNIT_TEST(testRemovesCanBeWrittenWithBlankDefaultDocument); - CPPUNIT_TEST(testRemovesCanBeWrittenWithIdInferredDoctype); - CPPUNIT_TEST(testRemovesWithInvalidDocTypeThrowsException); - CPPUNIT_TEST_SUITE_END(); - - using BufferType = SimpleMemFileIOBuffer::BufferType; - using BufferSP = BufferType::SP; - using BufferAllocation = SimpleMemFileIOBuffer::BufferAllocation; - using HeaderChunkEncoder = SimpleMemFileIOBuffer::HeaderChunkEncoder; - using SimpleMemFileIOBufferUP = std::unique_ptr<SimpleMemFileIOBuffer>; - - BufferAllocation allocateBuffer(size_t sz) { - return BufferAllocation(BufferSP(new BufferType(sz)), 0, sz); - } - - /** - * Create an I/O buffer instance with for a dummy bucket. If removeDocType - * is non-empty, remove entries will be written in backwards compatible - * mode. - */ - SimpleMemFileIOBufferUP createIoBufferWithDummySpec( - vespalib::stringref removeDocType = ""); - -public: - class DummyFileReader : public VersionSerializer { - public: - FileVersion getFileVersion() override { return FileVersion(); } - void loadFile(MemFile&, Environment&, Buffer&, uint64_t ) override {} - FlushResult flushUpdatesToFile(MemFile&, Environment&) override { - return FlushResult::TooSmall; - } - void rewriteFile(MemFile&, Environment&) override {} - bool verify(MemFile&, Environment&, std::ostream&, bool, uint16_t) override { return false; }; - void cacheLocations(MemFileIOInterface&, Environment&, const Options&, - DocumentPart, const std::vector<DataLocation>&) override {} - }; - - DummyFileReader dfr; - - void testAddAndReadDocument(); - void testNonExistingLocation(); - void testCopy(); - void testCacheLocation(); - void testPersist(); - void testGetSerializedSize(); - void testRemapLocations(); - void testAlignmentUtilFunctions(); - void testCalculatedCacheSize(); - void testSharedBuffer(); - void testSharedBufferUsage(); - void testHeaderChunkEncoderComputesSizesCorrectly(); - void testHeaderChunkEncoderSerializesIdCorrectly(); - void testHeaderChunkEncoderSerializesHeaderCorrectly(); - void testRemovesCanBeWrittenWithBlankDefaultDocument(); - void testRemovesCanBeWrittenWithIdInferredDoctype(); - void testRemovesWithInvalidDocTypeThrowsException(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(SimpleMemFileIOBufferTest); - - -void -SimpleMemFileIOBufferTest::testAddAndReadDocument() -{ - FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); - document::Document::SP doc(createRandomDocumentAtLocation( - 123, - 456, - 789, - 1234)); - - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env()); - - DataLocation h = buffer.addHeader(*doc); - DataLocation b = buffer.addBody(*doc); - - Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), h); - buffer.readBody(*getTypeRepo(), b, *newDoc); - - CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); - CPPUNIT_ASSERT_EQUAL(true, buffer.isCached(h, HEADER)); - CPPUNIT_ASSERT_EQUAL(true, buffer.isCached(b, BODY)); - CPPUNIT_ASSERT_EQUAL(false, buffer.isCached(h, BODY)); - CPPUNIT_ASSERT_EQUAL(false, buffer.isCached(b, HEADER)); - CPPUNIT_ASSERT_EQUAL(doc->getId(), buffer.getDocumentId(h)); -} - -void -SimpleMemFileIOBufferTest::testPersist() -{ - FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); - document::Document::SP doc(createRandomDocumentAtLocation( - 123, - 456, - 789, - 1234)); - - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env()); - - DataLocation h = buffer.addHeader(*doc); - DataLocation b = buffer.addBody(*doc); - - CPPUNIT_ASSERT(!buffer.isPersisted(h, HEADER)); - CPPUNIT_ASSERT(!buffer.isPersisted(b, BODY)); - - buffer.persist(HEADER, h, DataLocation(1000, h.size())); - buffer.persist(BODY, b, DataLocation(5000, b.size())); - - Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), DataLocation(1000, h.size())); - buffer.readBody(*getTypeRepo(), DataLocation(5000, b.size()), *newDoc); - - CPPUNIT_ASSERT(buffer.isPersisted(DataLocation(1000, h.size()), HEADER)); - CPPUNIT_ASSERT(buffer.isPersisted(DataLocation(5000, b.size()), BODY)); - - CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); -} - -void -SimpleMemFileIOBufferTest::testCopy() -{ - FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env()); - - for (uint32_t i = 0; i < 10; ++i) { - document::Document::SP doc(createRandomDocumentAtLocation( - 123, - 456, - 789, - 1234)); - - DataLocation h = buffer.addHeader(*doc); - DataLocation b = buffer.addBody(*doc); - - SimpleMemFileIOBuffer buffer2(dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env()); - - DataLocation h2 = buffer2.copyCache(buffer, HEADER, h); - DataLocation b2 = buffer2.copyCache(buffer, BODY, b); - - Document::UP newDoc = buffer2.getDocumentHeader(*getTypeRepo(), h2); - buffer2.readBody(*getTypeRepo(), b2, *newDoc); - - CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); - } -} - -void -SimpleMemFileIOBufferTest::testNonExistingLocation() -{ - FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); - document::Document::SP doc(createRandomDocumentAtLocation( - 123, - 456, - 789, - 1234)); - - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env()); - - DataLocation h = buffer.addHeader(*doc); - DataLocation b = buffer.addBody(*doc); - - buffer.clear(HEADER); - - try { - Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), h); - CPPUNIT_ASSERT(false); - } catch (SimpleMemFileIOBuffer::PartNotCachedException& e) { - } - - buffer.clear(BODY); - - try { - document::Document newDoc; - buffer.readBody(*getTypeRepo(), b, newDoc); - CPPUNIT_ASSERT(false); - } catch (SimpleMemFileIOBuffer::PartNotCachedException& e) { - } -} - -void -SimpleMemFileIOBufferTest::testCacheLocation() -{ - FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); - - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - FileInfo::UP(new FileInfo(100, 10000, 50000)), - fileSpec, - env()); - - document::Document::SP doc(createRandomDocumentAtLocation( - 123, - 456, - 789, - 1234)); - - BufferAllocation headerBuf = buffer.serializeHeader(*doc); - BufferAllocation bodyBuf = buffer.serializeBody(*doc); - - DataLocation hloc(1234, headerBuf.getSize()); - DataLocation bloc(5678, bodyBuf.getSize()); - - buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0); - buffer.cacheLocation(BODY, bloc, bodyBuf.getSharedBuffer(), 0); - - Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), hloc); - buffer.readBody(*getTypeRepo(), bloc, *newDoc); - - CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); -} - -void -SimpleMemFileIOBufferTest::testGetSerializedSize() -{ - FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); - - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - FileInfo::UP(new FileInfo(100, 10000, 50000)), - fileSpec, - env()); - - document::Document::SP doc(createRandomDocumentAtLocation( - 123, - 456, - 789, - 1234)); - - BufferAllocation headerBuf = buffer.serializeHeader(*doc); - BufferAllocation bodyBuf = buffer.serializeBody(*doc); - - DataLocation hloc(1234, headerBuf.getSize()); - DataLocation bloc(5678, bodyBuf.getSize()); - - buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0); - buffer.cacheLocation(BODY, bloc, bodyBuf.getSharedBuffer(), 0); - - vespalib::nbostream serializedHeader; - doc->serializeHeader(serializedHeader); - - vespalib::nbostream serializedBody; - doc->serializeBody(serializedBody); - - CPPUNIT_ASSERT_EQUAL(uint32_t(serializedHeader.size()), - buffer.getSerializedSize(HEADER, hloc)); - CPPUNIT_ASSERT_EQUAL(uint32_t(serializedBody.size()), - buffer.getSerializedSize(BODY, bloc)); -} - -// Test that remapping does not overwrite datalocations that it has -// already updated -void -SimpleMemFileIOBufferTest::testRemapLocations() -{ - FileSpecification fileSpec(BucketId(16, 123), env().getDirectory(), "testfile.0"); - - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - FileInfo::UP(new FileInfo(100, 10000, 50000)), - fileSpec, - env()); - - document::Document::SP doc(createRandomDocumentAtLocation( - 123, - 100, - 100)); - BufferAllocation headerBuf = buffer.serializeHeader(*doc); - BufferAllocation bodyBuf = buffer.serializeBody(*doc); - - document::Document::SP doc2(createRandomDocumentAtLocation( - 123, - 100, - 100)); - - BufferAllocation headerBuf2 = buffer.serializeHeader(*doc2); - BufferAllocation bodyBuf2 = buffer.serializeBody(*doc2); - - DataLocation hloc(30000, headerBuf.getSize()); - DataLocation hloc2(0, headerBuf2.getSize()); - DataLocation hloc3(10000, hloc2._size); - - buffer.cacheLocation(HEADER, hloc, headerBuf.getSharedBuffer(), 0); - buffer.cacheLocation(HEADER, hloc2, headerBuf2.getSharedBuffer(), 0); - - std::map<DataLocation, DataLocation> remapping; - remapping[hloc2] = hloc; - remapping[hloc] = hloc3; - - buffer.remapAndPersistAllLocations(HEADER, remapping); - - Document::UP newDoc = buffer.getDocumentHeader(*getTypeRepo(), hloc3); - document::ByteBuffer bbuf(bodyBuf.getBuffer(), bodyBuf.getSize()); - newDoc->deserializeBody(*getTypeRepo(), bbuf); - - CPPUNIT_ASSERT_EQUAL(*doc, *newDoc); - - Document::UP newDoc2 = buffer.getDocumentHeader(*getTypeRepo(), hloc); - document::ByteBuffer bbuf2(bodyBuf.getBuffer(), bodyBuf.getSize()); - newDoc2->deserializeBody(*getTypeRepo(), bbuf2); - CPPUNIT_ASSERT_EQUAL(*doc2, *newDoc2); -} - -/** - * Not technically a part of SimpleMemFileIOBuffer, but used by it and - * currently contained within its header file. Move test somewhere else - * if the code itself is moved. - */ -void -SimpleMemFileIOBufferTest::testAlignmentUtilFunctions() -{ - using namespace util; - CPPUNIT_ASSERT_EQUAL(size_t(0), alignUpPow2<4096>(0)); - CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(1)); - CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(512)); - CPPUNIT_ASSERT_EQUAL(size_t(4096), alignUpPow2<4096>(4096)); - CPPUNIT_ASSERT_EQUAL(size_t(8192), alignUpPow2<4096>(4097)); - CPPUNIT_ASSERT_EQUAL(size_t(32), alignUpPow2<16>(20)); - CPPUNIT_ASSERT_EQUAL(size_t(32), alignUpPow2<32>(20)); - CPPUNIT_ASSERT_EQUAL(size_t(64), alignUpPow2<64>(20)); - CPPUNIT_ASSERT_EQUAL(size_t(128), alignUpPow2<128>(20)); - - CPPUNIT_ASSERT_EQUAL(uint32_t(0), nextPow2(0)); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), nextPow2(1)); - CPPUNIT_ASSERT_EQUAL(uint32_t(4), nextPow2(3)); - CPPUNIT_ASSERT_EQUAL(uint32_t(16), nextPow2(15)); - CPPUNIT_ASSERT_EQUAL(uint32_t(64), nextPow2(40)); - CPPUNIT_ASSERT_EQUAL(uint32_t(64), nextPow2(64)); -} - -/** - * Test that allocated buffers are correctly reported with their sizes - * rounded up to account for mmap overhead. - */ -void -SimpleMemFileIOBufferTest::testCalculatedCacheSize() -{ - FileSpecification fileSpec(BucketId(16, 123), - env().getDirectory(), "testfile.0"); - SimpleMemFileIOBuffer buffer(dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env()); - - CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(HEADER)); - CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(BODY)); - - // All buffers are on a 4k page granularity. - BufferAllocation sharedHeaderBuffer(allocateBuffer(1500)); // -> 4096 - buffer.cacheLocation(HEADER, DataLocation(0, 85), - sharedHeaderBuffer.getSharedBuffer(), 0); - CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(HEADER)); - - buffer.cacheLocation(HEADER, DataLocation(200, 100), - sharedHeaderBuffer.getSharedBuffer(), 85); - CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(HEADER)); - - BufferAllocation singleHeaderBuffer(allocateBuffer(200)); // -> 4096 - buffer.cacheLocation(HEADER, DataLocation(0, 100), - singleHeaderBuffer.getSharedBuffer(), 0); - CPPUNIT_ASSERT_EQUAL(size_t(8192), buffer.getCachedSize(HEADER)); - - BufferAllocation singleBodyBuffer(allocateBuffer(300)); // -> 4096 - buffer.cacheLocation(BODY, DataLocation(0, 100), - singleBodyBuffer.getSharedBuffer(), 0); - CPPUNIT_ASSERT_EQUAL(size_t(4096), buffer.getCachedSize(BODY)); - - buffer.clear(HEADER); - CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(HEADER)); - - buffer.clear(BODY); - CPPUNIT_ASSERT_EQUAL(size_t(0), buffer.getCachedSize(BODY)); -} - -void -SimpleMemFileIOBufferTest::testSharedBuffer() -{ - typedef SimpleMemFileIOBuffer::SharedBuffer SharedBuffer; - - { - SharedBuffer buf(1024); - CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getSize()); - CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getFreeSize()); - CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getUsedSize()); - CPPUNIT_ASSERT(buf.hasRoomFor(1024)); - CPPUNIT_ASSERT(!buf.hasRoomFor(1025)); - - CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(13)); - // Allocation should be rounded up to nearest alignment. - // TODO: is this even necessary? - CPPUNIT_ASSERT_EQUAL(size_t(16), buf.getUsedSize()); - CPPUNIT_ASSERT_EQUAL(size_t(1008), buf.getFreeSize()); - CPPUNIT_ASSERT(buf.hasRoomFor(1008)); - CPPUNIT_ASSERT(!buf.hasRoomFor(1009)); - CPPUNIT_ASSERT_EQUAL(size_t(16), buf.allocate(1)); - CPPUNIT_ASSERT_EQUAL(size_t(24), buf.getUsedSize()); - - CPPUNIT_ASSERT_EQUAL(size_t(24), buf.allocate(999)); - CPPUNIT_ASSERT(!buf.hasRoomFor(1)); - CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize()); - CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize()); - } - // Test exact fit. - { - SharedBuffer buf(1024); - CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(1024)); - CPPUNIT_ASSERT(!buf.hasRoomFor(1)); - CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize()); - CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize()); - } - // Test 512-byte alignment. - { - SharedBuffer buf(1024); - CPPUNIT_ASSERT(buf.hasRoomFor(1000, SharedBuffer::ALIGN_512_BYTES)); - CPPUNIT_ASSERT_EQUAL(size_t(0), buf.allocate(10)); - CPPUNIT_ASSERT(!buf.hasRoomFor(1000, SharedBuffer::ALIGN_512_BYTES)); - CPPUNIT_ASSERT(!buf.hasRoomFor(513, SharedBuffer::ALIGN_512_BYTES)); - CPPUNIT_ASSERT(buf.hasRoomFor(512, SharedBuffer::ALIGN_512_BYTES)); - CPPUNIT_ASSERT_EQUAL(size_t(512), buf.allocate(512, SharedBuffer::ALIGN_512_BYTES)); - CPPUNIT_ASSERT_EQUAL(size_t(0), buf.getFreeSize()); - CPPUNIT_ASSERT_EQUAL(size_t(1024), buf.getUsedSize()); - } -} - -void -SimpleMemFileIOBufferTest::testSharedBufferUsage() -{ - FileSpecification fileSpec(BucketId(16, 123), - env().getDirectory(), "testfile.0"); - SimpleMemFileIOBuffer ioBuf(dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env()); - - const size_t threshold = SimpleMemFileIOBuffer::WORKING_BUFFER_SIZE; - - // Brand new allocation - BufferAllocation ba(ioBuf.allocateBuffer(HEADER, 1)); - CPPUNIT_ASSERT(ba.buf.get()); - CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba.pos); - CPPUNIT_ASSERT_EQUAL(uint32_t(1), ba.size); - // Should reuse buffer, but get other offset - BufferAllocation ba2(ioBuf.allocateBuffer(HEADER, 500)); - CPPUNIT_ASSERT_EQUAL(ba.buf.get(), ba2.buf.get()); - CPPUNIT_ASSERT_EQUAL(uint32_t(8), ba2.pos); - CPPUNIT_ASSERT_EQUAL(uint32_t(500), ba2.size); - CPPUNIT_ASSERT_EQUAL(size_t(512), ba2.buf->getUsedSize()); - - // Allocate a buffer so big that it should get its own buffer instance - BufferAllocation ba3(ioBuf.allocateBuffer(HEADER, threshold)); - CPPUNIT_ASSERT(ba3.buf.get() != ba2.buf.get()); - CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba3.pos); - CPPUNIT_ASSERT_EQUAL(uint32_t(threshold), ba3.size); - - // But smaller allocs should still be done from working buffer - BufferAllocation ba4(ioBuf.allocateBuffer(HEADER, 512)); - CPPUNIT_ASSERT_EQUAL(ba.buf.get(), ba4.buf.get()); - CPPUNIT_ASSERT_EQUAL(uint32_t(512), ba4.pos); - CPPUNIT_ASSERT_EQUAL(uint32_t(512), ba4.size); - CPPUNIT_ASSERT_EQUAL(size_t(1024), ba4.buf->getUsedSize()); - - // Allocate lots of smaller buffers from the same buffer until we run out. - while (true) { - BufferAllocation tmp(ioBuf.allocateBuffer(HEADER, 1024)); - CPPUNIT_ASSERT_EQUAL(ba.buf.get(), tmp.buf.get()); - if (!tmp.buf->hasRoomFor(2048)) { - break; - } - } - BufferAllocation ba5(ioBuf.allocateBuffer(HEADER, 2048)); - CPPUNIT_ASSERT(ba5.buf.get() != ba.buf.get()); - CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba5.pos); - CPPUNIT_ASSERT_EQUAL(uint32_t(2048), ba5.size); - - // Allocating for different part should get different buffer. - BufferAllocation ba6(ioBuf.allocateBuffer(BODY, 128)); - CPPUNIT_ASSERT(ba6.buf.get() != ba5.buf.get()); - CPPUNIT_ASSERT_EQUAL(uint32_t(0), ba6.pos); - CPPUNIT_ASSERT_EQUAL(uint32_t(128), ba6.size); -} - -void -SimpleMemFileIOBufferTest::testHeaderChunkEncoderComputesSizesCorrectly() -{ - document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100)); - - std::string idString = doc->getId().toString(); - HeaderChunkEncoder encoder(doc->getId()); - // Without document, payload is: 3x u32 + doc id string (no zero term). - CPPUNIT_ASSERT_EQUAL(sizeof(uint32_t)*3 + idString.size(), - static_cast<size_t>(encoder.encodedSize())); - - encoder.bufferDocument(*doc); - vespalib::nbostream stream; - doc->serializeHeader(stream); - // With document, add size of serialized document to the mix. - CPPUNIT_ASSERT_EQUAL(sizeof(uint32_t)*3 + idString.size() + stream.size(), - static_cast<size_t>(encoder.encodedSize())); -} - -SimpleMemFileIOBufferTest::SimpleMemFileIOBufferUP -SimpleMemFileIOBufferTest::createIoBufferWithDummySpec( - vespalib::stringref removeDocType) -{ - FileSpecification fileSpec(BucketId(16, 123), - env().getDirectory(), "testfile.0"); - // Override config. - auto options = env().acquireConfigReadLock().options(); - env().acquireConfigWriteLock().setOptions( - OptionsBuilder(*options) - .defaultRemoveDocType(removeDocType) - .build()); - - SimpleMemFileIOBufferUP ioBuf( - new SimpleMemFileIOBuffer( - dfr, - vespalib::LazyFile::UP(), - std::unique_ptr<FileInfo>(new FileInfo), - fileSpec, - env())); - return ioBuf; -} - -void -SimpleMemFileIOBufferTest::testHeaderChunkEncoderSerializesIdCorrectly() -{ - document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100)); - HeaderChunkEncoder encoder(doc->getId()); - - SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec()); - - BufferAllocation buf(ioBuf->allocateBuffer(HEADER, encoder.encodedSize())); - encoder.writeTo(buf); - DataLocation newLoc = ioBuf->addLocation(HEADER, buf); - document::DocumentId checkId = ioBuf->getDocumentId(newLoc); - - CPPUNIT_ASSERT_EQUAL(doc->getId(), checkId); -} - -void -SimpleMemFileIOBufferTest::testHeaderChunkEncoderSerializesHeaderCorrectly() -{ - document::Document::SP doc(createRandomDocumentAtLocation(123, 100, 100)); - HeaderChunkEncoder encoder(doc->getId()); - encoder.bufferDocument(*doc); - - SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec()); - BufferAllocation buf(ioBuf->allocateBuffer(HEADER, encoder.encodedSize())); - encoder.writeTo(buf); - DataLocation newLoc = ioBuf->addLocation(HEADER, buf); - Document::UP checkDoc = ioBuf->getDocumentHeader(*getTypeRepo(), newLoc); - - CPPUNIT_ASSERT_EQUAL(doc->getId(), checkDoc->getId()); - CPPUNIT_ASSERT_EQUAL(doc->getType(), checkDoc->getType()); -} - -void -SimpleMemFileIOBufferTest::testRemovesCanBeWrittenWithBlankDefaultDocument() -{ - SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1")); - - document::DocumentId id("userdoc:yarn:12345:fluff"); - DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo())); - // Despite adding with document id only, we should now actually have a - // valid document header. Will fail with a DeserializeException if no - // header has been written. - Document::UP removeWithHeader( - ioBuf->getDocumentHeader(*getTypeRepo(), loc)); - CPPUNIT_ASSERT_EQUAL(removeWithHeader->getId(), id); - CPPUNIT_ASSERT_EQUAL(removeWithHeader->getType(), - *getTypeRepo()->getDocumentType("testdoctype1")); -} - -void -SimpleMemFileIOBufferTest::testRemovesCanBeWrittenWithIdInferredDoctype() -{ - SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1")); - - document::DocumentId id("id:yarn:testdoctype2:n=12345:fluff"); - DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo())); - // Since document id contains an explicit document type, the blank remove - // document header should be written with that type instead of the one - // provided as default via config. - Document::UP removeWithHeader( - ioBuf->getDocumentHeader(*getTypeRepo(), loc)); - CPPUNIT_ASSERT_EQUAL(removeWithHeader->getId(), id); - CPPUNIT_ASSERT_EQUAL(removeWithHeader->getType(), - *getTypeRepo()->getDocumentType("testdoctype2")); -} - -void -SimpleMemFileIOBufferTest::testRemovesWithInvalidDocTypeThrowsException() -{ - SimpleMemFileIOBufferUP ioBuf(createIoBufferWithDummySpec("testdoctype1")); - - document::DocumentId id("id:yarn:nosuchtype:n=12345:fluff"); - try { - DataLocation loc(ioBuf->addDocumentIdOnlyHeader(id, *getTypeRepo())); - CPPUNIT_FAIL("No exception thrown on bad doctype"); - } catch (const vespalib::Exception& e) { - CPPUNIT_ASSERT(e.getMessage().find("Could not serialize document " - "for remove with unknown doctype " - "'nosuchtype'") - != std::string::npos); - } -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/spi/simulatedfailurefile.cpp b/memfilepersistence/src/tests/spi/simulatedfailurefile.cpp deleted file mode 100644 index b7da294f8eb..00000000000 --- a/memfilepersistence/src/tests/spi/simulatedfailurefile.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "simulatedfailurefile.h" -#include <vespa/vespalib/util/exceptions.h> - -namespace storage { -namespace memfile { - -vespalib::LazyFile::UP -SimulatedFailureLazyFile::Factory::createFile(const std::string& fileName) const { - return vespalib::LazyFile::UP( - new SimulatedFailureLazyFile(fileName, - vespalib::File::DIRECTIO, - _readOpsBeforeFailure, - _writeOpsBeforeFailure)); -} - -SimulatedFailureLazyFile::SimulatedFailureLazyFile( - const std::string& filename, - int flags, - int readOpsBeforeFailure, - int writeOpsBeforeFailure) - : LazyFile(filename, flags), - _readOpsBeforeFailure(readOpsBeforeFailure), - _writeOpsBeforeFailure(writeOpsBeforeFailure) -{ -} - -off_t -SimulatedFailureLazyFile::write(const void *buf, size_t bufsize, off_t offset) -{ - if (_writeOpsBeforeFailure == 0) { - throw vespalib::IoException( - "A simulated I/O write exception was triggered", - vespalib::IoException::CORRUPT_DATA, VESPA_STRLOC); - } - --_writeOpsBeforeFailure; - return vespalib::LazyFile::write(buf, bufsize, offset); -} - -size_t -SimulatedFailureLazyFile::read(void *buf, size_t bufsize, off_t offset) const -{ - if (_readOpsBeforeFailure == 0) { - throw vespalib::IoException( - "A simulated I/O read exception was triggered", - vespalib::IoException::CORRUPT_DATA, VESPA_STRLOC); - } - --_readOpsBeforeFailure; - return vespalib::LazyFile::read(buf, bufsize, offset); -} - -} // ns memfile -} // ns storage - diff --git a/memfilepersistence/src/tests/spi/simulatedfailurefile.h b/memfilepersistence/src/tests/spi/simulatedfailurefile.h deleted file mode 100644 index e3dbd5e13e2..00000000000 --- a/memfilepersistence/src/tests/spi/simulatedfailurefile.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <tests/spi/memfiletestutils.h> -#include <tests/spi/logginglazyfile.h> - -namespace storage { -namespace memfile { - -class SimulatedFailureLazyFile : public vespalib::LazyFile -{ - mutable int _readOpsBeforeFailure; - mutable int _writeOpsBeforeFailure; -public: - class Factory : public Environment::LazyFileFactory { - public: - Factory() - : _readOpsBeforeFailure(-1), - _writeOpsBeforeFailure(0) - { } - vespalib::LazyFile::UP createFile(const std::string& fileName) const override; - - void setReadOpsBeforeFailure(int ops) { - _readOpsBeforeFailure = ops; - } - - void setWriteOpsBeforeFailure(int ops) { - _writeOpsBeforeFailure = ops; - } - private: - int _readOpsBeforeFailure; - int _writeOpsBeforeFailure; - }; - - SimulatedFailureLazyFile( - const std::string& filename, - int flags, - int readOpsBeforeFailure, - int writeOpsBeforeFailure); - - off_t write(const void *buf, size_t bufsize, off_t offset) override; - size_t read(void *buf, size_t bufsize, off_t offset) const override; -}; - -} // ns memfile -} // ns storage - diff --git a/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp b/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp deleted file mode 100644 index 89a7d0f6e03..00000000000 --- a/memfilepersistence/src/tests/spi/splitoperationhandlertest.cpp +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfiletestutils.h" -#include <vespa/document/datatype/documenttype.h> -#include <vespa/persistence/spi/test.h> - -using document::DocumentType; -using storage::spi::test::makeSpiBucket; - -namespace storage { -namespace memfile { -namespace { - spi::LoadType defaultLoadType(0, "default"); -} - -class SplitOperationHandlerTest : public SingleDiskMemFileTestUtils -{ - - void doTestMultiDisk(uint16_t sourceDisk, - uint16_t targetDisk0, - uint16_t targetDisk1); - - - CPPUNIT_TEST_SUITE(SplitOperationHandlerTest); - CPPUNIT_TEST(testSimple); - CPPUNIT_TEST(testMultiDisk); - CPPUNIT_TEST(testMultiDiskNonZeroSourceIndex); - CPPUNIT_TEST(testExceptionDuringSplittingEvictsAllBuckets); - CPPUNIT_TEST_SUITE_END(); - -public: - void testSimple(); - void testMultiDisk(); - void testMultiDiskNonZeroSourceIndex(); - void testExceptionDuringSplittingEvictsAllBuckets(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(SplitOperationHandlerTest); - -void -SplitOperationHandlerTest::testSimple() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - setupDisks(1); - - for (uint32_t i = 0; i < 100; i++) { - uint32_t location = 4; - if (i % 2 == 0) { - location |= (1 << 16); - } - - doPut(location, Timestamp(1000 + i)); - } - flush(document::BucketId(16, 4)); - - env()._cache.clear(); - - document::BucketId sourceBucket = document::BucketId(16, 4); - document::BucketId target1 = document::BucketId(17, 4); - document::BucketId target2 = document::BucketId(17, 4 | (1 << 16)); - - SplitOperationHandler handler(env()); - spi::Result result = getPersistenceProvider().split( - makeSpiBucket(sourceBucket), - makeSpiBucket(target1), - makeSpiBucket(target2), - context); - - env()._cache.clear(); - - { - MemFilePtr file(handler.getMemFile(sourceBucket, 0)); - CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount()); - } - - { - MemFilePtr file(handler.getMemFile(target1, 0)); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - for (uint32_t i = 0; i < file->getSlotCount(); ++i) { - file->getDocument((*file)[i], ALL); - } - } - - { - MemFilePtr file(handler.getMemFile(target2, 0)); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - for (uint32_t i = 0; i < file->getSlotCount(); ++i) { - file->getDocument((*file)[i], ALL); - } - } -} - -void -SplitOperationHandlerTest::doTestMultiDisk(uint16_t sourceDisk, - uint16_t targetDisk0, - uint16_t targetDisk1) -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - setupDisks(3); - - for (uint32_t i = 0; i < 100; i++) { - uint32_t location = 4; - if (i % 2 == 0) { - location |= (1 << 16); - } - - doPutOnDisk(sourceDisk, location, Timestamp(1000 + i)); - } - flush(document::BucketId(16, 4)); - - env()._cache.clear(); - - document::BucketId sourceBucket = document::BucketId(16, 4); - document::BucketId target1 = document::BucketId(17, 4); - document::BucketId target2 = document::BucketId(17, 4 | (1 << 16)); - - SplitOperationHandler handler(env()); - spi::Result result = getPersistenceProvider().split( - makeSpiBucket(sourceBucket, spi::PartitionId(sourceDisk)), - makeSpiBucket(target1, spi::PartitionId(targetDisk0)), - makeSpiBucket(target2, spi::PartitionId(targetDisk1)), - context); - - env()._cache.clear(); - - { - MemFilePtr file(handler.getMemFile(sourceBucket, sourceDisk)); - CPPUNIT_ASSERT_EQUAL(0, (int)file->getSlotCount()); - } - - { - MemFilePtr file(handler.getMemFile(target1, targetDisk0)); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - for (uint32_t i = 0; i < file->getSlotCount(); ++i) { - file->getDocument((*file)[i], ALL); - } - } - - { - MemFilePtr file(handler.getMemFile(target2, targetDisk1)); - CPPUNIT_ASSERT_EQUAL(50, (int)file->getSlotCount()); - for (uint32_t i = 0; i < file->getSlotCount(); ++i) { - file->getDocument((*file)[i], ALL); - } - } -} - -void -SplitOperationHandlerTest::testMultiDisk() -{ - doTestMultiDisk(0, 1, 2); -} - -void -SplitOperationHandlerTest::testMultiDiskNonZeroSourceIndex() -{ - doTestMultiDisk(1, 2, 0); -} - -void -SplitOperationHandlerTest::testExceptionDuringSplittingEvictsAllBuckets() -{ - spi::Context context(defaultLoadType, spi::Priority(0), - spi::Trace::TraceLevel(0)); - setupDisks(1); - - for (uint32_t i = 0; i < 100; i++) { - uint32_t location = 4; - if (i % 2 == 0) { - location |= (1 << 16); - } - - doPut(location, Timestamp(1000 + i)); - } - flush(document::BucketId(16, 4)); - - simulateIoErrorsForSubsequentlyOpenedFiles(); - - document::BucketId sourceBucket(16, 4); - document::BucketId target1(17, 4); - document::BucketId target2(17, 4 | (1 << 16)); - - try { - SplitOperationHandler handler(env()); - spi::Result result = getPersistenceProvider().split( - makeSpiBucket(sourceBucket), - makeSpiBucket(target1), - makeSpiBucket(target2), - context); - CPPUNIT_FAIL("Exception not thrown on flush failure"); - } catch (std::exception&) { - } - - CPPUNIT_ASSERT(!env()._cache.contains(sourceBucket)); - CPPUNIT_ASSERT(!env()._cache.contains(target1)); - CPPUNIT_ASSERT(!env()._cache.contains(target2)); - - unSimulateIoErrorsForSubsequentlyOpenedFiles(); - - // Source must not have been deleted - { - SplitOperationHandler handler(env()); - MemFilePtr file(handler.getMemFile(sourceBucket, 0)); - CPPUNIT_ASSERT_EQUAL(100, (int)file->getSlotCount()); - } -} - -} - -} diff --git a/memfilepersistence/src/tests/testrunner.cpp b/memfilepersistence/src/tests/testrunner.cpp deleted file mode 100644 index 7bd12fedce5..00000000000 --- a/memfilepersistence/src/tests/testrunner.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vdstestlib/cppunit/cppunittestrunner.h> - -#include <vespa/log/log.h> -LOG_SETUP("persistencecppunittests"); - -int -main(int argc, const char *argv[]) -{ - vdstestlib::CppUnitTestRunner testRunner; - return testRunner.run(argc, argv); -} diff --git a/memfilepersistence/src/tests/tools/.gitignore b/memfilepersistence/src/tests/tools/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/tests/tools/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/tests/tools/CMakeLists.txt b/memfilepersistence/src/tests/tools/CMakeLists.txt deleted file mode 100644 index fdbe5f4cc01..00000000000 --- a/memfilepersistence/src/tests/tools/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_testtools - SOURCES - dumpslotfiletest.cpp - vdsdisktooltest.cpp - DEPENDS - memfilepersistence_testspi - memfilepersistence -) diff --git a/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp b/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp deleted file mode 100644 index 92c0736a10a..00000000000 --- a/memfilepersistence/src/tests/tools/dumpslotfiletest.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - - -#include <vespa/config/subscription/configuri.h> -#include <vespa/document/base/testdocrepo.h> -#include <vespa/memfilepersistence/tools/dumpslotfile.h> -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/vespalib/util/programoptions_testutils.h> -#include <tests/spi/memfiletestutils.h> -#include <vespa/config/helper/configgetter.hpp> -#include <vespa/document/config/config-documenttypes.h> - -namespace storage { -namespace memfile { - -class DumpSlotFileTest : public SingleDiskMemFileTestUtils -{ - CPPUNIT_TEST_SUITE(DumpSlotFileTest); - CPPUNIT_TEST(testSimple); - CPPUNIT_TEST_SUITE_END(); - -public: - void testSimple(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(DumpSlotFileTest); - -#define ASSERT_MATCH(optstring, pattern) \ -{ \ - vespalib::AppOptions opts("dumpslotfile " optstring); \ - std::ostringstream out; \ - config::ConfigUri configUri(config::ConfigUri::createFromInstance( \ - document::TestDocRepo::getDefaultConfig())); \ - std::unique_ptr<document::DocumenttypesConfig> config = config::ConfigGetter<document::DocumenttypesConfig>::getConfig(configUri.getConfigId(), configUri.getContext()); \ - SlotFileDumper::dump(opts.getArgCount(), opts.getArguments(), \ - configUri, out, out); \ - CPPUNIT_ASSERT_MATCH_REGEX(pattern, out.str()); \ - output = out.str(); \ -} - -void -DumpSlotFileTest::testSimple() -{ - std::string output; - // Test syntax page - ASSERT_MATCH("--help", ".*Usage: dumpslotfile.*"); - // Test non-existing file. (Handle as empty file) - ASSERT_MATCH("00a.0", - ".*BucketId\\(0x000000000000000a\\)" - ".*document count: 0.*non-existing.*"); - // Parse bucketid without extension. - ASSERT_MATCH("000000000000000a", - ".*BucketId\\(0x000000000000000a\\) " - "\\(extracted from filename\\).*"); - // Parse invalid bucket id. - ASSERT_MATCH("000010000000000g", - ".*Failed to extract bucket id from filename.*"); - // Test toXml with no data. Thus doesn't require doc config - ASSERT_MATCH("--toxml --documentconfig whatevah 000a.0", - ".*<vespafeed>.*"); - // Test invalid arguments - ASSERT_MATCH("--foobar", ".*Invalid option 'foobar'\\..*"); - // What to show in XML doesn't make sense in non-xml mode - ASSERT_MATCH("--includeremoveddocs 0.0", - ".*Options for what to include in XML makes no sense when not " - "printing XML content.*"); - ASSERT_MATCH("--includeremoveentries 0.0", - ".*Options for what to include in XML makes no sense when not " - "printing XML content.*"); - // To binary only works for single doc - ASSERT_MATCH("--tobinary 0.0", - ".*To binary option only works for a single document.*"); - - BucketId bid(1, 0); - createTestBucket(bid, 0); - ASSERT_MATCH("-nN vdsroot/disks/d0/400000000000000.0", - ".*" - "Unique document count: 8.*" - "Total document size: [0-9]+.*" - "Used size: [0-9]+.*" - "Filename: .*/d0/.*" - "Filesize: 12288.*" - "SlotFileHeader.*" - "[0-9]+ empty entries.*" - "Header block.*" - "Content block.*" - "Slotfile verified.*" - ); - ASSERT_MATCH("vdsroot/disks/d0/400000000000000.0", ".*ff ff ff ff.*"); - - // User friendly output - ASSERT_MATCH("--friendly -nN vdsroot/disks/d0/400000000000000.0", - ".*id:mail:testdoctype1:n=0:9380.html.*"); - - ASSERT_MATCH("--tobinary " - "--docid id:mail:testdoctype1:n=0:doesnotexisthere.html " - "vdsroot/disks/d0/400000000000000.0", - ".*No document with id id:mail:testdoctype1:n=0:doesnotexi.* " - "found.*"); - - // Should test XML with content.. But needs document config for it to work. - // Should be able to create programmatically from testdocman. - ASSERT_MATCH("--toxml --documentconfig '' " - "vdsroot/disks/d0/400000000000000.0", - ".*<vespafeed>\n" - "<document documenttype=\"testdoctype1\" " - "documentid=\"id:mail:testdoctype1:n=0:9639.html\">\n" - "<content>overwritten</content>\n" - "</document>.*"); - - // To binary - ASSERT_MATCH("--tobinary --docid id:mail:testdoctype1:n=0:9380.html " - "vdsroot/disks/d0/400000000000000.0", - ".*"); - { - TestDocMan docMan; - document::ByteBuffer buf(output.c_str(), output.size()); - document::Document doc(docMan.getTypeRepo(), buf); - CPPUNIT_ASSERT_EQUAL(std::string( - "<document documenttype=\"testdoctype1\" " - "documentid=\"id:mail:testdoctype1:n=0:9380.html\">\n" - "<content>To be, or not to be: that is the question:\n" - "Whether 'tis nobler in the mind to suffer\n" - "The slings and arrows of outrage</content>\n" - "</document>"), doc.toXml()); - } - - // Fail verification - { - vespalib::LazyFile file("vdsroot/disks/d0/400000000000000.0", 0); - file.write("corrupt", 7, 64); - } - ASSERT_MATCH("-nN vdsroot/disks/d0/400000000000000.0", - ".*lot 0 at timestamp [0-9]+ failed checksum verification.*"); -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp b/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp deleted file mode 100644 index 1a28cae8d7b..00000000000 --- a/memfilepersistence/src/tests/tools/vdsdisktooltest.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/config/subscription/configuri.h> -#include <vespa/memfilepersistence/tools/vdsdisktool.h> -#include <vespa/storageframework/defaultimplementation/clock/fakeclock.h> -#include <vespa/vdstestlib/cppunit/macros.h> -#include <vespa/vespalib/util/programoptions_testutils.h> -#include <tests/spi/memfiletestutils.h> - -namespace storage { -namespace memfile { - -struct VdsDiskToolTest : public SingleDiskMemFileTestUtils -{ - framework::defaultimplementation::FakeClock _clock; - - void setUp() override; - void setupRoot(); - - void testSimple(); - - CPPUNIT_TEST_SUITE(VdsDiskToolTest); - CPPUNIT_TEST(testSimple); - CPPUNIT_TEST_SUITE_END(); -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(VdsDiskToolTest); - -#define ASSERT_MATCH(optstring, pattern, exitcode) \ -{ \ - std::ostringstream out; \ - int result = 1; \ - try{ \ - vespalib::AppOptions opts("vdsdisktool " optstring); \ - result = VdsDiskTool::run(opts.getArgCount(), opts.getArguments(), \ - "vdsroot", out, out); \ - } catch (std::exception& e) { \ - out << "Application aborted with exception:\n" << e.what() << "\n"; \ - } \ - CPPUNIT_ASSERT_MATCH_REGEX(pattern, out.str()); \ - CPPUNIT_ASSERT_EQUAL(exitcode, result); \ -} - -namespace { - void createDisk(int i) { - std::ostringstream path; - path << "vdsroot/mycluster/storage/3/disks/d" << i; - CPPUNIT_ASSERT_EQUAL(0, system(("mkdir -p " + path.str()).c_str())); - } -} - -void -VdsDiskToolTest::setUp() -{ - system("rm -rf vdsroot"); -} - -void -VdsDiskToolTest::setupRoot() -{ - system("rm -rf vdsroot"); - createDisk(0); -} - -void -VdsDiskToolTest::testSimple() -{ - // Test syntax page - ASSERT_MATCH("--help", ".*Usage: vdsdisktool .*", 0); - // No VDS installation - ASSERT_MATCH("status", ".*No VDS installations found at all.*", 1); - // Common setup - setupRoot(); - ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" - "Disk 0: OK\\s*", 0); - // Two disks - system("mkdir -p vdsroot/mycluster/storage/3/disks/d1/"); - ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" - "Disk 0: OK\\s*" - "Disk 1: OK\\s*", 0); - // Two disks, non-continuous indexes - system("rm -rf vdsroot/mycluster/storage/3/disks/d1/"); - system("mkdir -p vdsroot/mycluster/storage/3/disks/d2/"); - ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" - "Disk 0: OK\\s*" - "Disk 1: NOT_FOUND - Disk not found during scan.*" - "Disk 2: OK\\s*", 0); - // Status file existing - setupRoot(); - createDisk(1); - MountPointList mountPoints("vdsroot/mycluster/storage/3", - std::vector<vespalib::string>(), - std::make_unique<DeviceManager>(std::make_unique<SimpleDeviceMapper>(), _clock)); - mountPoints.scanForDisks(); - CPPUNIT_ASSERT_EQUAL(2u, mountPoints.getSize()); - mountPoints[1].addEvent(Device::IO_FAILURE, "Bad", "Found in test"); - mountPoints.writeToFile(); - ASSERT_MATCH("status", ".*Disks on storage node 3 in cluster mycluster:\\s*" - "Disk 0: OK\\s*" - "Disk 1: IO_FAILURE - 0 Bad\\s*", 0); -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/.gitignore deleted file mode 100644 index c43cd4d8c3b..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/.depend -/Makefile -/libmemfilepersistence.so.5.1 diff --git a/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt deleted file mode 100644 index ab44e62f06f..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence - SOURCES - $<TARGET_OBJECTS:memfilepersistence_device> - $<TARGET_OBJECTS:memfilepersistence_init> - $<TARGET_OBJECTS:memfilepersistence_mapper> - $<TARGET_OBJECTS:memfilepersistence_spi> - $<TARGET_OBJECTS:memfilepersistence_common> - $<TARGET_OBJECTS:memfilepersistence_memfile> - $<TARGET_OBJECTS:memfilepersistence_tools> - INSTALL lib64 - DEPENDS -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt deleted file mode 100644 index b874aed6242..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_common OBJECT - SOURCES - environment.cpp - options.cpp - types.cpp - filespecification.cpp - exceptions.cpp - slotmatcher.cpp - config_lock_guard.cpp - DEPENDS -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h b/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h deleted file mode 100644 index 629facd4360..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/config_aliases.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/config-stor-memfilepersistence.h> -#include <vespa/config-stor-devices.h> -#include <vespa/config-persistence.h> - -namespace storage { -namespace memfile { - -// Friendly aliases to painfully long config names. -using MemFilePersistenceConfig= vespa::config::storage::StorMemfilepersistenceConfig; -using PersistenceConfig = vespa::config::content::PersistenceConfig; -using DevicesConfig = vespa::config::storage::StorDevicesConfig; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp deleted file mode 100644 index a628b538cb1..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "config_lock_guard.h" -#include "environment.h" - -namespace storage { -namespace memfile { - -bool -ConfigLockGuardBase::hasPersistenceConfig() const noexcept -{ - return (_env->_persistenceConfig.get() != nullptr); -} - -std::shared_ptr<const PersistenceConfig> -ConfigLockGuardBase::persistenceConfig() const noexcept -{ - return _env->_persistenceConfig; -} - -bool -ConfigLockGuardBase::hasMemFilePersistenceConfig() const noexcept -{ - return (_env->_config.get() != nullptr); -} - -std::shared_ptr<const MemFilePersistenceConfig> -ConfigLockGuardBase::memFilePersistenceConfig() const noexcept -{ - return _env->_config; -} - -bool -ConfigLockGuardBase::hasDevicesConfig() const noexcept -{ - return (_env->_devicesConfig.get() != nullptr); -} - -std::shared_ptr<const DevicesConfig> -ConfigLockGuardBase::devicesConfig() const noexcept -{ - return _env->_devicesConfig; -} - -bool -ConfigLockGuardBase::hasOptions() const noexcept -{ - return (_env->_options.get() != nullptr); -} - -std::shared_ptr<const Options> -ConfigLockGuardBase::options() const noexcept -{ - return _env->_options; -} - -ConfigWriteLockGuard::ConfigWriteLockGuard(Environment& e) - : ConfigLockGuardBase(e), - _lock(e._configRWLock), - _mutableEnv(&e) -{ -} - -ConfigWriteLockGuard::ConfigWriteLockGuard(ConfigWriteLockGuard&& other) - : ConfigLockGuardBase(std::move(other)), - _lock(other._lock), // Implicit lock stealing, no explicit moving - _mutableEnv(other._mutableEnv) -{ - other._mutableEnv = nullptr; -} - -void -ConfigWriteLockGuard::setPersistenceConfig( - std::unique_ptr<PersistenceConfig> cfg) noexcept -{ - mutableEnv()._persistenceConfig = std::move(cfg); -} - -void -ConfigWriteLockGuard::setMemFilePersistenceConfig( - std::unique_ptr<MemFilePersistenceConfig> cfg) noexcept -{ - mutableEnv()._config = std::move(cfg); -} - -void -ConfigWriteLockGuard::setDevicesConfig( - std::unique_ptr<DevicesConfig> cfg) noexcept -{ - mutableEnv()._devicesConfig = std::move(cfg); -} - -void -ConfigWriteLockGuard::setOptions(std::unique_ptr<Options> opts) -{ - mutableEnv()._options = std::move(opts); -} - -ConfigReadLockGuard::ConfigReadLockGuard(const Environment& e) - : ConfigLockGuardBase(e), - _lock(e._configRWLock) -{ -} - -ConfigReadLockGuard::ConfigReadLockGuard(ConfigReadLockGuard&& other) - : ConfigLockGuardBase(std::move(other)), - _lock(other._lock) // Implicit lock stealing, no explicit moving -{ -} - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h b/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h deleted file mode 100644 index 72d58120767..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/config_lock_guard.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "config_aliases.h" -#include "options.h" -#include <vespa/vespalib/util/rwlock.h> - -namespace storage { -namespace memfile { - -class Environment; - -/** - * Shared guard base allowing read access to existing configs via both - * read and write guard subclasses. - */ -class ConfigLockGuardBase { -public: - explicit ConfigLockGuardBase(const Environment& e) - : _env(&e) - { - } - - ConfigLockGuardBase(ConfigLockGuardBase&& other) - : _env(other._env) - { - // If the source is attempted used after the move, ensure it nukes - // itself with a SIGSEGV. - other._env = nullptr; - } - - // To avoid circular dependencies, all access of Environment internals - // must be in separate .cpp file. - - bool hasPersistenceConfig() const noexcept; - std::shared_ptr<const PersistenceConfig> persistenceConfig() const noexcept; - - bool hasMemFilePersistenceConfig() const noexcept; - std::shared_ptr<const MemFilePersistenceConfig> - memFilePersistenceConfig() const noexcept; - - bool hasDevicesConfig() const noexcept; - std::shared_ptr<const DevicesConfig> devicesConfig() const noexcept; - - bool hasOptions() const noexcept; - std::shared_ptr<const Options> options() const noexcept; - - ConfigLockGuardBase(const ConfigLockGuardBase&) = delete; - ConfigLockGuardBase& operator=(const ConfigLockGuardBase&) = delete; - -private: - const Environment* _env; -}; - -class ConfigWriteLockGuard : public ConfigLockGuardBase { -public: - explicit ConfigWriteLockGuard(Environment& e); - /** - * Moving a guard transfers ownership of the lock to the move target. It - * is illegal and undefined behavior to attempt to access the environment - * configuration through a guard whose lock has been transferred away. - */ - ConfigWriteLockGuard(ConfigWriteLockGuard&& other); - - // By definition, configs can only be mutated when the writer lock - // is held. - void setPersistenceConfig(std::unique_ptr<PersistenceConfig> cfg) noexcept; - void setMemFilePersistenceConfig( - std::unique_ptr<MemFilePersistenceConfig> cfg) noexcept; - void setDevicesConfig(std::unique_ptr<DevicesConfig> cfg) noexcept; - void setOptions(std::unique_ptr<Options> opts); - -private: - vespalib::RWLockWriter _lock; - // This points to the exact same object as the const ref in the base - // and basically serves as an alternative to const_cast. - Environment* _mutableEnv; - - // Hide the fact that we're storing duplicate information to other - // methods. - Environment& mutableEnv() { return *_mutableEnv; } -}; - -class ConfigReadLockGuard : public ConfigLockGuardBase { -public: - explicit ConfigReadLockGuard(const Environment& e); - ConfigReadLockGuard(ConfigReadLockGuard&& other); - - // Config reader methods already implemented in base. - -private: - vespalib::RWLockReader _lock; -}; - - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h b/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h deleted file mode 100644 index d31bb0500b8..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/configkeeper.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class ConfigKeeper - * \class memfile - * - * \brief Utility function for live reconfiguration - * - * When many threads want the same config, we don't want each of these threads - * to subscribe on the same config because of the following reasons: - * - No need to put lots of extra load on the config system. - * - Application doesn't know whether all users have the same config version - * at any given time. - * - * This class implements a utility class for handling this. - */ -#pragma once - -#include <vespa/vespalib/util/sync.h> - -namespace storage { - -template<typename ConfigClass> -class ConfigKeeper { - vespalib::Monitor _configLock; - bool _configUpdated; // Set to true if updating config. - std::unique_ptr<ConfigClass> _nextConfig; - ConfigClass _config; - -public: - ConfigKeeper() : _configUpdated(false) {} - - void updateConfig(const ConfigClass& config) { - vespalib::MonitorGuard lock(_configLock); - _nextConfig.reset(new ConfigClass(config)); - _configUpdated = true; - } - - void activateNewConfig() { - if (!_configUpdated) return; - vespalib::MonitorGuard lock(_configLock); - _config = *_nextConfig; - _nextConfig.reset(0); - _configUpdated = false; - lock.signal(); - } - - void waitForAnyActivation() { - vespalib::MonitorGuard lock(_configLock); - while (_configUpdated) lock.wait(); - } - - ConfigClass* operator->() { return &_config; } - ConfigClass& operator*() { return _config; } -}; - -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp deleted file mode 100644 index 5cbaef2363f..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/environment.cpp +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - - -#include "environment.h" -#include <vespa/vespalib/util/random.h> -#include <vespa/vespalib/util/stringfmt.h> -#include <vespa/config/helper/configgetter.hpp> -#include <vespa/config/subscription/configuri.h> -#include <vespa/vespalib/stllike/asciistream.h> - -using config::ConfigGetter; - -namespace storage::memfile { - -namespace { - -template <typename ConfigT> -std::shared_ptr<ConfigT> -resolveConfig(const config::ConfigUri& configUri) -{ - return {ConfigGetter<ConfigT>::getConfig( - configUri.getConfigId(), configUri.getContext())}; -} - -} - -vespalib::LazyFile::UP -DefaultLazyFileFactory::createFile(const std::string& fileName) const -{ - return vespalib::LazyFile::UP( - new vespalib::LazyFile( - fileName, vespalib::File::DIRECTIO | _flags)); -} - -Environment::Environment(const config::ConfigUri & configUri, - MemFileCache& cache, - MemFileMapper& mapper, - const document::DocumentTypeRepo& typeRepo, - const framework::Clock& clock, - bool ignoreDisks) - : _clock(clock), - _cache(cache), - _memFileMapper(mapper), - _bucketFactory(), - _lazyFileFactory(new DefaultLazyFileFactory( - ignoreDisks ? vespalib::File::READONLY : 0)), - _repo(&typeRepo), - _config(resolveConfig<MemFilePersistenceConfig>(configUri)), - _persistenceConfig(resolveConfig<PersistenceConfig>(configUri)), - _devicesConfig(resolveConfig<DevicesConfig>(configUri)), - _options(std::make_shared<Options>(*_config, *_persistenceConfig)) -{ - DeviceManager::UP manager( - new DeviceManager(DeviceMapper::UP(new SimpleDeviceMapper()), - _clock)); - - manager->setPartitionMonitorPolicy( - _devicesConfig->statfsPolicy, _devicesConfig->statfsPeriod); - _mountPoints.reset(new MountPointList(_devicesConfig->rootFolder, - _devicesConfig->diskPath, - std::move(manager))); - - if (!ignoreDisks) { - _mountPoints->init(0); - - // Update full disk setting for partition monitors - for (uint32_t i=0; i<_mountPoints->getSize(); ++i) { - Directory& dir(getDirectory(i)); - if (dir.getPartition().getMonitor() != 0) { - dir.getPartition().getMonitor()->setMaxFillness( - _options->_diskFullFactor); - } - } - } -} - -Types::String -Environment::calculatePathInDir(const Types::BucketId& id, Directory& dir) -{ - vespalib::asciistream os; - os << dir.getPath() << '/'; - // Directories created should only depend on bucket identifier. - document::BucketId::Type seed = id.getId(); - seed = seed ^ (seed >> 32); - vespalib::RandomGen randomizer(static_cast<uint32_t>(seed) ^ 0xba5eba11); - - for (uint32_t i = 1; i <= (uint32_t)_config->dirLevels; ++i) { - os << vespalib::make_string( - "%.4x/", - randomizer.nextUint32() % _config->dirSpread); - } - - os << vespalib::make_string("%.8" PRIx64 ".0", id.getId()); - return os.str(); -} - -Environment::~Environment() -{ -} - -Directory& Environment::getDirectory(uint16_t disk) -{ - return (*_mountPoints)[disk]; -} - -void -Environment::addModifiedBucket(const document::BucketId& bid) -{ - vespalib::LockGuard guard(_modifiedBucketsLock); - _modifiedBuckets.push_back(bid); -} - -void -Environment::swapModifiedBuckets(document::BucketId::List & ids) -{ - vespalib::LockGuard guard(_modifiedBucketsLock); - _modifiedBuckets.swap(ids); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/environment.h b/memfilepersistence/src/vespa/memfilepersistence/common/environment.h deleted file mode 100644 index 75568a707d7..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/environment.h +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::slotfile::MemFileEnvironment - * \ingroup memfile - * - * \brief Keeps environment for MemFile operations - * - * The memfile layer needs quite a lot of stuff set up in order to work. Rather - * than passing all these bits around when creating new slotfiles, we rather - * have an environment where all the static pieces not related to single files - * will be kept. - */ - -#pragma once - -#include "options.h" -#include "types.h" -#include "config_lock_guard.h" -#include "config_aliases.h" -#include <vespa/memfilepersistence/device/mountpointlist.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/document/bucket/bucketidfactory.h> -#include <vespa/config/helper/configfetcher.h> - -namespace config { class ConfigUri; } - -namespace storage::memfile { - -class MemFileMapper; -class MemFileCache; - -struct Environment : public Types { - class LazyFileFactory { - public: - virtual ~LazyFileFactory() {}; - - virtual vespalib::LazyFile::UP - createFile(const std::string& fileName) const = 0; - }; - using UP = std::unique_ptr<Environment>; - - const framework::Clock& _clock; - MemFileCache& _cache; - MemFileMapper& _memFileMapper; - MountPointList::UP _mountPoints; - document::BucketIdFactory _bucketFactory; - std::unique_ptr<LazyFileFactory> _lazyFileFactory; - vespalib::Lock _modifiedBucketsLock; - document::BucketId::List _modifiedBuckets; - - Environment(const config::ConfigUri & configUri, - MemFileCache&, - MemFileMapper&, - const document::DocumentTypeRepo&, - const framework::Clock&, - bool ignoreDisks = false); - ~Environment(); - - String calculatePathInDir(const Types::BucketId& id, Directory& dir); - - vespalib::LazyFile::UP createFile(const std::string& fileName) const { - return _lazyFileFactory->createFile(fileName); - } - - Directory& getDirectory(uint16_t disk = 0); - - void addModifiedBucket(const document::BucketId&); - void swapModifiedBuckets(document::BucketId::List &); - - ConfigReadLockGuard acquireConfigReadLock() const { - return ConfigReadLockGuard(*this); - } - - ConfigWriteLockGuard acquireConfigWriteLock() { - return ConfigWriteLockGuard(*this); - } - - /** - * Get the currently assigned document repo in a data race free manner. - * Forms a release/acquire pair with setRepo() - */ - const document::DocumentTypeRepo& repo() const noexcept { - return *_repo.load(std::memory_order_acquire); - } - /** - * Sets the currently assigned document repo in a data race free manner. - * Forms a release/acquire pair with repo() - */ - void setRepo(const document::DocumentTypeRepo* typeRepo) noexcept { - _repo.store(typeRepo, std::memory_order_release); - } -private: - mutable vespalib::RWLock _configRWLock; - /** - * For simplicity, repos are currently kept alive for the duration of the - * process. This means we don't have to care about lifetime management, but - * we still have to ensure writes that set the repo are release/acquired - * paired with their reads. Repos are provided through the SPI and _not_ - * through regular provider-level config subscription, so we therefore do - * not require the config lock to be held when reading/writing. - */ - std::atomic<const document::DocumentTypeRepo*> _repo; - /** - * Configs are kept as shared_ptrs to allow lock window to remain as small - * as possible while still retaining thread safety during pointer - * reassignments. - */ - std::shared_ptr<const MemFilePersistenceConfig> _config; - std::shared_ptr<const PersistenceConfig> _persistenceConfig; - std::shared_ptr<const DevicesConfig> _devicesConfig; - /** - * Options is not a true config as per se, but is an aggregate of multiple - * other configs and must thus be protected as if it were. - */ - std::shared_ptr<const Options> _options; - // We entrust the config guards with access to our internals. - friend class ConfigLockGuardBase; - friend class ConfigWriteLockGuard; - friend class ConfigReadLockGuard; -}; - -struct DefaultLazyFileFactory - : public Environment::LazyFileFactory -{ - int _flags; - - DefaultLazyFileFactory(int flags) : _flags(flags) {} - - vespalib::LazyFile::UP createFile(const std::string& fileName) const override; -}; - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp deleted file mode 100644 index 05fc40893d8..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "exceptions.h" - -namespace storage::memfile { - -VESPA_IMPLEMENT_EXCEPTION_SPINE(TimestampExistException); -VESPA_IMPLEMENT_EXCEPTION_SPINE(InconsistentSlotException); -VESPA_IMPLEMENT_EXCEPTION_SPINE(MemFileIoException); -VESPA_IMPLEMENT_EXCEPTION(NoDisksException, vespalib::Exception); - -VESPA_IMPLEMENT_MEMFILE_EXCEPTION(SlotNotFoundException); -VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InvalidArgumentException); -VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InvalidStateException); -VESPA_IMPLEMENT_MEMFILE_EXCEPTION(CorruptMemFileException); -VESPA_IMPLEMENT_MEMFILE_EXCEPTION(MemFileWrapperException); -VESPA_IMPLEMENT_MEMFILE_EXCEPTION(InconsistentException); - -MemFileException::MemFileException(const FileSpecification& file) - : _file(file) -{ } - -MemFileException::~MemFileException() {} - -TimestampExistException::TimestampExistException(const TimestampExistException &) = default; -TimestampExistException::~TimestampExistException() {} - -TimestampExistException::TimestampExistException( - const vespalib::string& message, const FileSpecification& file, - Types::Timestamp ts, const vespalib::string& location, int skipStack) - : Exception(message, location, skipStack + 1), - MemFileException(file), - _timestamp(ts) -{ -} - -InconsistentSlotException::InconsistentSlotException( - const vespalib::string& message, const FileSpecification& file, - const MemSlot& slot, const vespalib::string& location, int skipstack) - : InconsistentException(message, file, location, skipstack + 1), - _slot(slot) -{ -} - -InconsistentSlotException::InconsistentSlotException(const InconsistentSlotException &) = default; -InconsistentSlotException::~InconsistentSlotException() {} - -MemFileIoException::MemFileIoException( - const vespalib::string& msg, const FileSpecification& file, - Type type, const vespalib::string& location, int skipStack) - : IoException(msg, type, location, skipStack + 1), - MemFileException(file) -{ -} - -MemFileIoException::MemFileIoException(const MemFileIoException &) = default; -MemFileIoException::~MemFileIoException() {} - -} // memfile diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h b/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h deleted file mode 100644 index 6731794578a..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/exceptions.h +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::IoException - * \ingroup memfile - * - * \brief Exception thrown by memfile layer for IO problems. - * - * Storage needs to know what disk was having issues for disk related problems, - * in case it needs to disable a non-working disk. Some information on what - * file was being operated on while one is having trouble is nice anyhow. Thus - * specific exceptions have been created to keep the file specification of the - * file in question. The MemFile layer may throw some exceptions that aren't - * MemFileExceptions though. These exceptions should not be disk/file related. - */ - -#pragma once - -#include "filespecification.h" -#include <vespa/memfilepersistence/memfile/memslot.h> -#include <vespa/vespalib/util/exceptions.h> - -#define VESPA_DEFINE_MEMFILE_EXCEPTION(name) \ - struct name : public vespalib::Exception, public MemFileException { \ - name(const vespalib::string& message, const FileSpecification& file, \ - const vespalib::string& location, int skipStack = 0); \ - name(const name &); \ - name & operator = (const name &); \ - name(name &&) = default; \ - name & operator = (name &&) = default; \ - ~name(); \ - VESPA_DEFINE_EXCEPTION_SPINE(name); \ -}; - -#define VESPA_IMPLEMENT_MEMFILE_EXCEPTION(name) \ - name::name(const vespalib::string& message, const FileSpecification& file, \ - const vespalib::string& location, int skipStack) \ - : vespalib::Exception(message, location, skipStack + 1), \ - MemFileException(file) {} \ - name::name(const name &) = default; \ - name & name::operator = (const name &) = default; \ - name::~name() {} \ - VESPA_IMPLEMENT_EXCEPTION_SPINE(name); - -namespace storage::memfile { - -VESPA_DEFINE_EXCEPTION(NoDisksException, vespalib::Exception); - -/** - * \class storage::memfile::MemFileException - * \ingroup memfile - * - * \brief Interface to implement for exceptions that contain a file specification specifying what memfile was problematic. - */ -class MemFileException : protected Types { - FileSpecification _file; - -public: - MemFileException(const FileSpecification&); - virtual ~MemFileException() = 0; - - const FileSpecification& getFile() const { return _file; } -}; - -VESPA_DEFINE_MEMFILE_EXCEPTION(SlotNotFoundException); -VESPA_DEFINE_MEMFILE_EXCEPTION(InvalidArgumentException); -VESPA_DEFINE_MEMFILE_EXCEPTION(InvalidStateException); -VESPA_DEFINE_MEMFILE_EXCEPTION(CorruptMemFileException); -VESPA_DEFINE_MEMFILE_EXCEPTION(MemFileWrapperException); - -/** - * \class storage::InconsistentException - * \ingroup memfile - * - * \brief Thrown by MemFile::verifyConsistent() if inconsistent - */ -VESPA_DEFINE_MEMFILE_EXCEPTION(InconsistentException); - -/** - * @class storage::TimestampExistException - * @ingroup filestorage - * - * @brief Thrown by SlotFile::write() when timestamp given is already in use. - */ -class TimestampExistException : public vespalib::Exception, - public MemFileException -{ - Timestamp _timestamp; -public: - TimestampExistException(const vespalib::string& message, - const FileSpecification&, Timestamp ts, - const vespalib::string& location, int skipstack = 0); - TimestampExistException(const TimestampExistException &); - ~TimestampExistException(); - - VESPA_DEFINE_EXCEPTION_SPINE(TimestampExistException); - - Timestamp getTimestamp() const { return _timestamp; } -}; - -/** - * @class storage::InconsistentSlotException - * @ingroup filestorage - * - * @brief Thrown by MemFile::verifyConsistent() if a slot is inconsistent - */ -class InconsistentSlotException : public InconsistentException { - MemSlot _slot; - -public: - InconsistentSlotException(const vespalib::string& message, - const FileSpecification&, const MemSlot& slot, - const vespalib::string& location, int skipstack = 0); - InconsistentSlotException(const InconsistentSlotException &); - ~InconsistentSlotException(); - - VESPA_DEFINE_EXCEPTION_SPINE(InconsistentSlotException); -}; - -class MemFileIoException : public vespalib::IoException, - public MemFileException -{ -public: - MemFileIoException(const vespalib::string& msg, const FileSpecification&, - Type type, const vespalib::string& location, - int skipStack = 0); - MemFileIoException(const MemFileIoException &); - ~MemFileIoException(); - - VESPA_DEFINE_EXCEPTION_SPINE(MemFileIoException); -}; - -} // memfile diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp deleted file mode 100644 index 09a3205dec3..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - - -#include "filespecification.h" -#include <vespa/vespalib/util/exceptions.h> - -namespace storage { -namespace memfile { - -FileSpecification::FileSpecification(const BucketId& bucket, Directory& dir, - const String& path) - : _bucketId(bucket), - _dir(&dir), - _path(path), - _wantedVersion(TRADITIONAL_SLOTFILE) -{ - if (dir.getState() != Device::OK) { - throw vespalib::IllegalStateException( - "Attempt to create file specification for file on disk that " - "is not available: " + dir.toString(), VESPA_STRLOC); - } -} - -void -FileSpecification::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - (void) verbose; (void) indent; - out << "FileSpecification(" << _bucketId << ", " << *_dir << ", " << _path - << ", wanted version 0x" << std::hex << _wantedVersion << std::dec - << ")"; -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h b/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h deleted file mode 100644 index e7898fad288..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/filespecification.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::slotfile::FileSpecification - * \ingroup memfile - * - * \brief Information about the file currently worked on. - * - * The file specification specifies what file a given MemFile should work on. - */ - -#pragma once - -#include "types.h" -#include <vespa/memfilepersistence/device/directory.h> -#include <vespa/vespalib/util/printable.h> - -namespace storage { -namespace memfile { - -class MemFileEnvironment; - -class FileSpecification : private Types, - public vespalib::Printable -{ - BucketId _bucketId; - Directory* _dir; - String _path; - FileVersion _wantedVersion; - -public: - FileSpecification(const BucketId&, Directory&, const String& path); - - void setWantedVersion(FileVersion v) { _wantedVersion = v; } - - const document::BucketId& getBucketId() const { return _bucketId; } - Directory& getDirectory() const { return *_dir; } - const String& getPath() const { return _path; } - FileVersion getWantedFileVersion() const { return _wantedVersion; } - - void print(std::ostream& out, bool verbose, const std::string& indent) const override; - - bool operator==(const FileSpecification& o) const { - return (_bucketId == o._bucketId && _dir == o._dir - && _path == o._path && _wantedVersion == o._wantedVersion); - } -}; - -} // storage -} // memfile diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp deleted file mode 100644 index 34b2f23ed65..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/options.cpp +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "options.h" -#include <vespa/vespalib/util/exceptions.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.slotfile.options"); - -namespace storage::memfile { - -Options::Options() - : _minimumFileMetaSlots(512), - _maximumFileMetaSlots(0), - _minimumFileHeaderBlockSize(102848), - _maximumFileHeaderBlockSize(0), - _minimumFileSize(1048576), - _maximumFileSize(0), - _fileBlockSize(4096), - _revertTimePeriod(300 * 1000000ull), - _keepRemoveTimePeriod(604800 * 1000000ull), - _maxDocumentVersions(5), - _cacheSize(0), - _initialIndexRead(65536), - _maximumGapToReadThrough(65536), - _diskFullFactor(0.98), - _growFactor(2.0), - _overrepresentMetaDataFactor(1.2), - _overrepresentHeaderBlockFactor(1.1), - _defaultRemoveDocType() -{ -} - -Options::Options(const Options &) noexcept = default; -Options & Options::operator=(const Options &) = default; -Options::~Options() { } - -Options::Options(const vespa::config::storage::StorMemfilepersistenceConfig& newConfig, - const vespa::config::content::PersistenceConfig& newPersistenceConfig) - : _minimumFileMetaSlots(newConfig.minimumFileMetaSlots), - _maximumFileMetaSlots(newConfig.maximumFileMetaSlots), - _minimumFileHeaderBlockSize(newConfig.minimumFileHeaderBlockSize), - _maximumFileHeaderBlockSize(newConfig.maximumFileHeaderBlockSize), - _minimumFileSize(newConfig.minimumFileSize), - _maximumFileSize(newConfig.maximumFileSize), - _fileBlockSize(newConfig.fileBlockSize), - _revertTimePeriod(newPersistenceConfig.revertTimePeriod * 1000000ll), - _keepRemoveTimePeriod( - newPersistenceConfig.keepRemoveTimePeriod * 1000000ll), - _maxDocumentVersions( - newPersistenceConfig.maximumVersionsOfSingleDocumentStored), - _cacheSize(newConfig.cacheSize), - _initialIndexRead(newConfig.initialIndexRead), - _maximumGapToReadThrough(newConfig.maximumGapToReadThrough), - _diskFullFactor(newConfig.diskFullFactor), - _growFactor(newConfig.growFactor), - _overrepresentMetaDataFactor(newConfig.overrepresentMetaDataFactor), - _overrepresentHeaderBlockFactor(newConfig.overrepresentHeaderBlockFactor), - _defaultRemoveDocType( - newConfig.store50BackwardsCompatibleRemoveEntriesWithDoctype) -{ - validate(); -} - -bool -Options::operator==(const Options& options) const { - if (_minimumFileMetaSlots == options._minimumFileMetaSlots - && _maximumFileMetaSlots == options._maximumFileMetaSlots - && _minimumFileHeaderBlockSize - == options._minimumFileHeaderBlockSize - && _maximumFileHeaderBlockSize - == options._maximumFileHeaderBlockSize - && _minimumFileSize == options._minimumFileSize - && _maximumFileSize == options._maximumFileSize - && _fileBlockSize == options._fileBlockSize - && _revertTimePeriod == options._revertTimePeriod - && _maxDocumentVersions == options._maxDocumentVersions - && _keepRemoveTimePeriod == options._keepRemoveTimePeriod - && _cacheSize == options._cacheSize - && _initialIndexRead == options._initialIndexRead - && _maximumGapToReadThrough == options._maximumGapToReadThrough - && _diskFullFactor == options._diskFullFactor - && _defaultRemoveDocType == options._defaultRemoveDocType) - { - return true; - } - return false; -} - -namespace { - template<typename Number> - void verifyAligned(Number n, uint32_t alignSize, const char* name) { - if (n % alignSize != 0) { - std::ostringstream ost; - ost << name << " " << n - << " must be dividable by block alignment size " << alignSize; - throw vespalib::IllegalStateException( - ost.str(), VESPA_STRLOC); - } - } -} - -void Options::validate() -{ - uint32_t tmp32 = 0; - - // REVERT / KEEP REMOVE TIME PERIODS - if (_revertTimePeriod > _keepRemoveTimePeriod) { - LOG(warning, "Keep all time period (%" PRIu64 ") is set larger than keep " - "removes time period (%" PRIu64 ". Adjusting keep removes " - "period to match", - _revertTimePeriod.getTime(), _keepRemoveTimePeriod.getTime()); - _keepRemoveTimePeriod = _revertTimePeriod; - } - if (_maxDocumentVersions < 1) { - LOG(warning, "Max number of document versions attempted set to 0. " - "This is a bad idea for all the obvious reasons. Forcing " - "used value to be 1."); - _maxDocumentVersions = 1; - } - // MINIMUM FILE SIZES - if (_minimumFileMetaSlots < 1) { - LOG(warning, "Minimum file meta slots is not allowed to be less than " - "1. Setting it to 1."); - _minimumFileMetaSlots = 1; - } - if (_minimumFileMetaSlots > 1024*1024) { - LOG(warning, "Minimum file meta slots is not allowed to be more than " - "%u. Setting it to %u.", 1024*1024, 1024*1024); - _minimumFileMetaSlots = 1024*1024; - } - if (_minimumFileHeaderBlockSize > 2*1024*1024*1024u) { - LOG(warning, "Minimum file header block size is not allowed to be above" - " 2 GB. Altering it from %u B to 2 GB.", - _minimumFileHeaderBlockSize); - _minimumFileHeaderBlockSize = 2*1024*1024*1024u; - } - if (_minimumFileSize % _fileBlockSize != 0) { - tmp32 = _fileBlockSize - * ((_minimumFileSize + _fileBlockSize - 1) / _fileBlockSize); - LOG(warning, "Min file size %u not a multiplum of file block size %u. " - "Increasing minimum filesize to %u to match.", - _minimumFileSize, _fileBlockSize, tmp32); - _minimumFileSize = tmp32; - } - // MAXIMUM FILE SIZES - if (_maximumFileMetaSlots != 0 - && _maximumFileMetaSlots < _minimumFileMetaSlots) - { - LOG(warning, "Maximum file meta slots cannot be less than the minimum. " - "Adjusting it from %u to %u.", - _maximumFileMetaSlots, _minimumFileMetaSlots); - _maximumFileMetaSlots = _minimumFileMetaSlots; - } - if (_maximumFileHeaderBlockSize != 0 - && _maximumFileHeaderBlockSize < _minimumFileHeaderBlockSize) - { - LOG(warning, "Maximum file header block size cannot be less than the " - "minimum. Adjusting it from %u to %u.", - _maximumFileHeaderBlockSize, _minimumFileHeaderBlockSize); - _maximumFileHeaderBlockSize = _minimumFileHeaderBlockSize; - } - if (_maximumFileSize != 0 && _maximumFileSize < _minimumFileSize) { - LOG(warning, "Maximum file size cannot be less than the " - "minimum. Adjusting it from %u to %u.", - _maximumFileSize, _minimumFileSize); - _maximumFileSize = _minimumFileSize; - } - if (_maximumFileSize % _fileBlockSize != 0) { - tmp32 = _fileBlockSize - * ((_maximumFileSize + _fileBlockSize - 1) / _fileBlockSize); - LOG(warning, "Max file size %u not a multiplum of file block size %u. " - "Increasing maximum to %u bytes to match.", - _maximumFileSize, _fileBlockSize, tmp32); - _maximumFileSize = tmp32; - } - - if (_growFactor < 1.0 || _growFactor >= 100.0) { - throw vespalib::IllegalStateException( - "The grow factor needs to be in the range [1, 100].", - VESPA_STRLOC); - } - - if (!_defaultRemoveDocType.empty()) { - // Log the usage of this option to make it visible, as it is not - // something most people should use. - LOG(info, - "Will write remove entries in 5.0 backwards compatible mode. By " - "default this will be done using the '%s' document type unless " - "the document identifier specifies otherwise.", - _defaultRemoveDocType.c_str()); - } -} - -void Options::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - (void) verbose; - std::string s("\n" + indent + " "); - - out << "SlotFile options:" - << s << "Minimum file meta slots: " << _minimumFileMetaSlots - << s << "Maximum file meta slots: " << _maximumFileMetaSlots - << s << "Minimum file header block size: " - << _minimumFileHeaderBlockSize << " b" - << s << "Maximum file header block size: " - << _maximumFileHeaderBlockSize << " b" - << s << "Minimum file size: " << _minimumFileSize << " b" - << s << "Maximum file size: " << _maximumFileSize << " b" - << s << "Filesystem block size: " << _fileBlockSize << " b" - << s << "Revert time period: " << _revertTimePeriod << " microsecs" - << s << "Keep remove time period: " - << _keepRemoveTimePeriod << "microsecs" - << s << "Max document versions: " << _maxDocumentVersions - << s << "Cache size: " << _cacheSize - << s << "Initial index read: " << _initialIndexRead << " b" - << s << "Maximum gap to read through: " - << _maximumGapToReadThrough << " b" - << s << "Disk full factor: " << _diskFullFactor - << s << "Grow factor: " << _growFactor - << s << "Overrepresent meta data factor: " - << _overrepresentMetaDataFactor - << s << "Overrepresent header block factor: " - << _overrepresentHeaderBlockFactor - << s << "Write removes with blank documents of default type: " - << _defaultRemoveDocType - << ""; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/options.h b/memfilepersistence/src/vespa/memfilepersistence/common/options.h deleted file mode 100644 index a7310c236a1..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/options.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * @class storage::Options - * @ingroup filestorage - * - * @brief Options used by slotfiles - * - * To avoid the need for static variables which cannot be altered while the - * system is running, and which forces all slotfile instances to work with the - * same options, this options class has been created to contain all the options - * a slotfile will use. - * - * @author H�kon Humberset - * @date 2005-10-26 - */ - -#pragma once - -#include <vespa/config-stor-memfilepersistence.h> -#include <vespa/config-persistence.h> -#include <vespa/vespalib/util/printable.h> -#include <vespa/vespalib/stllike/string.h> -#include <vespa/storageframework/generic/clock/time.h> - -namespace storage::memfile { - -struct Options : public vespalib::Printable -{ - // Parameters from def file. See config file for comments. - - // FILE SIZE PARAMETERS - - uint32_t _minimumFileMetaSlots; - uint32_t _maximumFileMetaSlots; - uint32_t _minimumFileHeaderBlockSize; - uint32_t _maximumFileHeaderBlockSize; - uint32_t _minimumFileSize; - uint32_t _maximumFileSize; - uint32_t _fileBlockSize; - - // CONSISTENCY PARAMETERS - framework::MicroSecTime _revertTimePeriod; - framework::MicroSecTime _keepRemoveTimePeriod; - uint32_t _maxDocumentVersions; - - // PERFORMANCE PARAMETERS - uint64_t _cacheSize; - uint32_t _initialIndexRead; - uint32_t _maximumGapToReadThrough; - - double _diskFullFactor; - double _growFactor; - double _overrepresentMetaDataFactor; - double _overrepresentHeaderBlockFactor; - - // COMPATIBILITY PARAMETERS - // If non-empty, will cause remove entries to be written with a blank - // document containing only the document type and identifier rather than - // just writing a document id with no document at all. Note that if a - // document identifier contains a type string it will override this default - // value. - // This is a feature for backwards compatibility with 5.0, as it chokes - // when trying to read remove entries without a document. - vespalib::string _defaultRemoveDocType; - - /** - * Creates a new slotfile options instance. Implemented in header file, - * such that the current defaults can be easily viewed. - */ - Options(); - Options(const Options &) noexcept; - Options & operator=(const Options &); - ~Options(); - - Options(const vespa::config::storage::StorMemfilepersistenceConfig& newConfig, - const vespa::config::content::PersistenceConfig& newPersistenceConfig); - - void validate() const { const_cast<Options&>(*this).validate(); } - void validate(); - - /** Printable implementation */ - void print(std::ostream& out, bool verbose, - const std::string& indent) const override; - - bool operator==(const Options& options) const; -}; - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp deleted file mode 100644 index 702f7388c48..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "slotmatcher.h" -#include <vespa/memfilepersistence/memfile/memfile.h> - -namespace storage { -namespace memfile { - -Types::Timestamp -SlotMatcher::Slot::getTimestamp() const -{ - return _slot.getTimestamp(); -} - -bool -SlotMatcher::Slot::isRemove() const -{ - return _slot.deleted(); -} - -const document::GlobalId& -SlotMatcher::Slot::getGlobalId() const -{ - return _slot.getGlobalId(); -} - -document::Document::UP -SlotMatcher::Slot::getDocument(bool headerOnly) const -{ - return _file.getDocument(_slot, headerOnly ? HEADER_ONLY : ALL); -} - -document::DocumentId -SlotMatcher::Slot::getDocumentId() const -{ - return _file.getDocumentId(_slot); -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h b/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h deleted file mode 100644 index cb93dd678e8..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/slotmatcher.h +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::slotfile::SlotMatcher - * \ingroup memfile - * - * \brief Implement this to create a filter for MemSlot instances. - * - * Many operations want to do something to a subset of the slots in a file. - * Such operations can retrieve the slots that matches using an implementation - * of this filter. - * - * Creating a slot matcher, you should give information of what type of data - * you want to preload from disk. Typically you want to preload entries you - * need such as to prevent many disk accesses, but if there is some data you - * only need for a few entries, you can use the functions supplied in the - * matcher to get these instances even though they are not cached for all - * entries. - */ - -#pragma once - -#include <vespa/memfilepersistence/memfile/memslot.h> - -namespace storage { -namespace memfile { - -class MemFile; - -class SlotMatcher : private Types { -public: - enum PreloadFlag { - PRELOAD_META_DATA_ONLY = 0x0, - PRELOAD_BODY = 0x1, - PRELOAD_HEADER = 0x3, - PRELOAD_DOC_ID = 0x7 - }; - -protected: - SlotMatcher(PreloadFlag preld) : _preload(preld) {} - - PreloadFlag _preload; - -public: - class Slot { - private: - const MemSlot& _slot; - const MemFile& _file; - - public: - Slot(const MemSlot& slot, const MemFile& file) - : _slot(slot), - _file(file) {}; - - /** - Returns the timestamp of the slot. - */ - Timestamp getTimestamp() const; - - /** - * Returns whether a slot is a remove, either regular - * or unrevertable. - */ - bool isRemove() const; - - /** - Returns the global id of the slot. - */ - const GlobalId& getGlobalId() const; - - /** - * Get the document, optionally just the header. If not preloaded, will load - * this document from disk. - */ - Document::UP getDocument(bool headerOnly) const; - - document::DocumentId getDocumentId() const; - }; - - virtual ~SlotMatcher() {} - - virtual bool match(const Slot&) = 0; - - /** Do what is needed to preload wanted content. */ - void preload(MemFile&) const {}; -}; - -} // storage -} // memfile - diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp b/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp deleted file mode 100644 index 19898674555..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/types.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "types.h" -#include <vespa/vespalib/util/exceptions.h> -#include <cassert> -#include <sstream> - -namespace storage::memfile { - -const framework::MicroSecTime Types::MAX_TIMESTAMP(framework::MicroSecTime::max()); -const framework::MicroSecTime Types::UNSET_TIMESTAMP(0); - -void -Types::verifyLegalFlags(uint32_t flags, uint32_t legal, const char* operation) -{ - if ((flags & legal) != flags) { - std::ostringstream ost; - ost << "Invalid flags given to operation " << operation << ". " - << std::hex << flags << " given, but only " << legal - << " are legal."; - throw vespalib::IllegalArgumentException(ost.str(), VESPA_STRLOC); - } -} - -std::ostream& -operator<<(std::ostream& os, const DataLocation& loc) -{ - os << "DataLocation(" - << std::dec - << loc._pos - << ", " - << loc._size - << ")"; - return os; -} - -const char* -Types::getMemFileFlagName(MemFileFlag flag) { - switch (flag) { - case FILE_EXIST: return "FILE_EXIST"; - case HEADER_BLOCK_READ: return "HEADER_BLOCK_READ"; - case BODY_BLOCK_READ: return "BODY_BLOCK_READ"; - case BUCKET_INFO_OUTDATED: return "BUCKET_INFO_OUTDATED"; - case SLOTS_ALTERED: return "SLOTS_ALTERED"; - case LEGAL_MEMFILE_FLAGS: assert(false); // Not a single flag - default: return "INVALID"; - } -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/common/types.h b/memfilepersistence/src/vespa/memfilepersistence/common/types.h deleted file mode 100644 index 09485b972a6..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/common/types.h +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::slotfile::Types - * \ingroup memfile - * - * \brief This class defines and includes some types used in the slotfile layer. - * - * As many of the types are used many places in the layer, we define them here - * rather than in one random class using them. This also makes it easy to switch - * implementation by switching out which class to use here. - * - * This class should not have any members, virtual classes or anything. We don't - * want it to add to the memory footprint of classes, as it will be used also - * by classes kept many times in memory cache. - */ -#pragma once - - -#include <vespa/persistence/spi/bucketinfo.h> -#include <vespa/document/fieldvalue/document.h> -#include <vespa/storageframework/generic/clock/time.h> - -namespace storage::memfile { - -/** - * \class storage::slotfile::DataLocation - * \ingroup memfile - * - * \brief Points to data in a file storing documents. - * - * This file stores info on where header and body parts of document are stored. - * It is really format specific data, but for now it is implemented globally. - * - * All unused locations should be size zero pointing to address zero. A size - * of zero with a non-zero position is invalid, and used to indicate that this - * value is not set yet. (Typically when data isn't persisted to disk yet) - */ -struct DataLocation { - uint32_t _pos; - uint32_t _size; - - DataLocation() : _pos(1), _size(0) {} // pos 1 size 0 is invalid value. - DataLocation(uint32_t pos, uint32_t sz) : _pos(pos), _size(sz) {} - - uint32_t size() const { return _size; } - - uint32_t endPos() const { return _pos + _size; } - - bool valid() const { return (_size > 0 || _pos == 0); } - - bool operator==(const DataLocation& other) const { - return (_pos == other._pos && _size == other._size); - } - bool operator!=(const DataLocation& other) const { - return ! (*this == other); - } - - bool operator<(const DataLocation& other) const { - if (_pos == other._pos) { - return _size < other._size; - } - - return _pos < other._pos; - } - - bool contains(const DataLocation& other) const { - return (_pos <= other._pos && _pos + _size >= other._pos + other._size); - } -}; - -std::ostream& operator<<(std::ostream&, const DataLocation&); - -struct Types { - typedef document::BucketId BucketId; - typedef document::Document Document; - typedef document::DocumentId DocumentId; - typedef document::GlobalId GlobalId; - typedef framework::MicroSecTime Timestamp; - typedef Timestamp RevertToken; - typedef vespalib::string String; - typedef spi::BucketInfo BucketInfo; - - static const framework::MicroSecTime MAX_TIMESTAMP; - static const framework::MicroSecTime UNSET_TIMESTAMP; - - enum FileVersion { - UNKNOWN = 0, - TRADITIONAL_SLOTFILE = 0xABCD0001 - }; - - enum SlotFlag { - IN_USE = 0x01, - DELETED = 0x02, - DELETED_IN_PLACE = 0x04, - LEGAL_PERSISTED_SLOT_FLAGS = 0x07, - - // States not stored in file. As we have set aside 16 bits for the - // flags in the fileformat, but use so few, we use some of the - // unused bits in the memory representation to store memory state. - ALTERED_IN_MEMORY = 0x02 << 8, - CHECKSUM_OUTDATED = 0x04 << 8, - - // Masks to check for multiple bits - UNUSED = 0xf8f8 - }; - - enum GetFlag { - ALL = 0, - HEADER_ONLY = 0x1, - LEGAL_GET_FLAGS = 0x1 - }; - - enum IteratorFlag { - ITERATE_GID_UNIQUE = 0x1, - ITERATE_REMOVED = 0x2, - LEGAL_ITERATOR_FLAGS = 0x3 - }; - - enum DocContentFlag { - HAS_HEADER_ONLY, - HAS_BODY - }; - - enum DocumentPart { - HEADER, - BODY - }; - - enum MemFileFlag { - FILE_EXIST = 0x0001, - HEADER_BLOCK_READ = 0x0002, - BODY_BLOCK_READ = 0x0004, - BUCKET_INFO_OUTDATED = 0x0008, - SLOTS_ALTERED = 0x0010, - LEGAL_MEMFILE_FLAGS = 0x001f - }; - - enum FileVerifyFlags { - DONT_VERIFY_HEADER = 0x0001, - DONT_VERIFY_BODY = 0x0002, - LEGAL_VERIFY_FLAGS = 0x0003 - }; - - enum FlushFlag { - NONE = 0, - CHECK_NON_DIRTY_FILE_FOR_SPACE = 1 - }; - - enum GetLocationsFlag { - NON_PERSISTED_LOCATIONS = 0x0001, - PERSISTED_LOCATIONS = 0x0002, - NO_SLOT_LIST = 0x0004 - }; - - enum DocumentCopyType { - DEEP_COPY, - SHALLOW_COPY - }; - - static const char* getDocumentPartName(DocumentPart part) { - switch (part) { - case HEADER: return "Header"; - case BODY: return "Body"; - default: return "Invalid"; - } - } - - static const char* getFileVersionName(FileVersion version) { - switch (version) { - case UNKNOWN: return "UNKNOWN"; - case TRADITIONAL_SLOTFILE: return "TRADITIONAL_SLOTFILE"; - default: return "INVALID"; - } - } - - static const char* getMemFileFlagName(MemFileFlag flag); - - static void verifyLegalFlags(uint32_t flags, uint32_t legal, const char* operation); -protected: - ~Types() {} // Noone should refer to objects as Types objects -}; - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt deleted file mode 100644 index 88bb6513ba2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_device OBJECT - SOURCES - device.cpp - disk.cpp - partition.cpp - directory.cpp - devicemapper.cpp - devicemanager.cpp - ioevent.cpp - partitionmonitor.cpp - mountpointlist.cpp - DEPENDS -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp deleted file mode 100644 index cd1fcafb52c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/device.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "device.h" -#include "ioevent.h" -#include <sstream> -#include <algorithm> - -namespace storage::memfile { - -Device::Device(DeviceManager& manager) - : _manager(manager) -{} - -Device::~Device() {} - -std::string Device::getStateString(State s) -{ - switch (s) { - case OK: return "OK"; - case TOO_MANY_OPEN_FILES: return "TOO_MANY_OPEN_FILES"; - case NOT_FOUND: return "NOT_FOUND"; - case PATH_FAILURE: return "PATH_FAILURE"; - case NO_PERMISSION: return "NO_PERMISSION"; - case IO_FAILURE: return "IO_FAILURE"; - case INTERNAL_FAILURE: return "INTERNAL_FAILURE"; - case DISABLED_BY_ADMIN: return "DISABLED_BY_ADMIN"; - default: - { - std::ostringstream ost; - ost << "UNKNOWN(" << s << ")"; - return ost.str(); - } - } -} - -void -Device::print(std::ostream& out, bool, const std::string&) const -{ - const IOEvent* event = getLastEvent(); - if (event == 0) { - out << Device::OK; - } else { - out << event->getState() << " "; - out << event->getTimestamp() << " "; - std::string desc = event->getDescription(); - std::replace(desc.begin(), desc.end(), '\n', ' '); - out << desc; - } -} - -void -Device::clearEvents() -{ - _events.clear(); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/device.h b/memfilepersistence/src/vespa/memfilepersistence/device/device.h deleted file mode 100644 index 63413e61b0a..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/device.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::Device - * \ingroup persistence - * - * @brief Class holding information about a device. - * - * Base class for devices, such as directories, partitions and disks. - */ - -#pragma once - -#include <vespa/vespalib/util/printable.h> -#include <list> - -namespace storage { - -namespace memfile { - -class IOEvent; -class DeviceManager; - -class Device : public vespalib::Printable { -private: - // These objects are not possible to copy. They represents physical - // resources on a computer - Device(const Device&); - Device& operator=(Device&); - -protected: - DeviceManager& _manager; - std::list<IOEvent> _events; - - Device(DeviceManager& manager); - -public: - /** - * Storage device states. Most serious states are at the bottom of the - * list. If a single state is requested from the device, the one with - * the highest value wins through. - */ - enum State { - OK, - NOT_FOUND, // Not found - PATH_FAILURE, // Illegal path - NO_PERMISSION, // Permission problems - INTERNAL_FAILURE, // Probably problem with process. - IO_FAILURE, // Disk problems - TOO_MANY_OPEN_FILES, // Too many open files so we can't use disk. - // This is a global problem that will not be stored - // as disk state, but must exist in order to be - // able to report event. - DISABLED_BY_ADMIN // If disabled through admin tool - }; - - static std::string getStateString(State s); - - virtual ~Device(); - - virtual void addEvent(const IOEvent& e) = 0; - virtual void clearEvents(); - virtual const IOEvent* getLastEvent() const = 0; - - const std::list<IOEvent>& getEvents() const { return _events; } - void print(std::ostream& out, bool verbose, const std::string& indent) const override; - -}; - -} // memfile - -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp deleted file mode 100644 index bf03b798e1c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "devicemanager.h" -#include <vespa/vespalib/util/exceptions.h> -#include <vespa/vespalib/util/xmlstream.h> - -namespace storage::memfile { - -DeviceManager::DeviceManager(DeviceMapper::UP mapper, - const framework::Clock& clock) - : _deviceMapper(std::move(mapper)), - _disks(), - _partitions(), - _directories(), - _eventListeners(), - _statPolicy(vespa::config::storage::StorDevicesConfig::STAT_DYNAMIC), - _statPeriod(0), - _clock(clock) -{ -} - -DeviceManager::~DeviceManager() {} - -void -DeviceManager::setPartitionMonitorPolicy( - vespa::config::storage::StorDevicesConfig::StatfsPolicy policy, uint32_t period) -{ - _statPolicy = policy; - _statPeriod = period; - for (std::map<std::string, Partition::SP>::iterator it - = _partitions.begin(); it != _partitions.end(); ++it) - { - Partition& p(*it->second); - if (p.getMonitor() != 0) p.getMonitor()->setPolicy(policy, period); - } -} - -void DeviceManager::notifyDiskEvent(Disk& d, const IOEvent& e) -{ - for (std::set<IOEventListener*>::iterator it = _eventListeners.begin(); - it != _eventListeners.end(); ++it) - { - assert(*it != 0); - (*it)->handleDiskEvent(d, e); - } -} - -void -DeviceManager::notifyDirectoryEvent(Directory& dir, const IOEvent& e) -{ - for (std::set<IOEventListener*>::iterator it = _eventListeners.begin(); - it != _eventListeners.end(); ++it) - { - assert(*it != 0); - (*it)->handleDirectoryEvent(dir, e); - } -} - -void -DeviceManager::notifyPartitionEvent(Partition& part, const IOEvent& e) -{ - for (std::set<IOEventListener*>::iterator it = _eventListeners.begin(); - it != _eventListeners.end(); ++it) - { - assert(*it != 0); - (*it)->handlePartitionEvent(part, e); - } -} - -void -DeviceManager::addIOEventListener(IOEventListener& listener) -{ - _eventListeners.insert(&listener); -} - -void -DeviceManager::removeIOEventListener(IOEventListener& listener) -{ - _eventListeners.erase(&listener); -} - -Directory::SP -DeviceManager::getDirectory(const std::string& dir, uint16_t index) -{ - std::map<std::string, Directory::SP>::iterator it = - _directories.find(dir); - if (it != _directories.end()) { - return it->second; - } - Directory::SP d(new Directory(*this, index, dir)); - _directories[dir] = d; - return d; -} - -Directory::SP -DeviceManager::deserializeDirectory(const std::string& serialized) -{ - // Deserialize object - Directory::SP d(new Directory(serialized, *this)); - // If not existing, just add it. - std::map<std::string, Directory::SP>::iterator it = - _directories.find(d->getPath()); - if (it == _directories.end()) { - _directories[d->getPath()] = d; - return d; - } - // If already existing, merge info with existing entry. - it->second->addEvents(*d); - return it->second; -} - -Partition::SP -DeviceManager::getPartition(const std::string& path) -{ - try{ - std::string mountPoint(_deviceMapper->getMountPoint(path)); - uint64_t id = _deviceMapper->getPartitionId(mountPoint); - std::map<std::string, Partition::SP>::iterator it( - _partitions.find(mountPoint)); - if (it != _partitions.end()) { - return it->second; - } - Partition::SP part(new Partition(*this, id, mountPoint)); - if (part->getMonitor() != 0) { - part->getMonitor()->setPolicy(_statPolicy, _statPeriod); - } - _partitions[mountPoint] = part; - return part; - } catch (vespalib::IoException& e) { - // If we fail to create partition, due to having IO troubles getting - // partition id or mount point, create a partition that doesn't - // correspond to a physical device containing the error found. - Partition::SP part(new Partition(*this, -1, path)); - part->addEvent(IOEvent::createEventFromIoException( - e, - _clock.getTimeInSeconds().getTime())); - _partitions[path] = part; - return part; - } -} - -Disk::SP -DeviceManager::getDisk(const std::string& path) -{ - try{ - int devnr = _deviceMapper->getDeviceId(path); - std::map<int, Disk::SP>::iterator it = _disks.find(devnr); - if (it != _disks.end()) { - return it->second; - } - Disk::SP disk(new Disk(*this, devnr)); - _disks[devnr] = disk; - return disk; - } catch (vespalib::IoException& e) { - // Use negative ints for illegal ids. Make sure they don't already - // exist - int devnr = -1; - while (_disks.find(devnr) != _disks.end()) --devnr; - // If we fail to create partition, due to having IO troubles getting - // partition id or mount point, create a partition that doesn't - // correspond to a physical device containing the error found. - Disk::SP disk(new Disk(*this, devnr)); - disk->addEvent(IOEvent::createEventFromIoException( - e, - _clock.getTimeInSeconds().getTime())); - _disks[devnr] = disk; - return disk; - } -} - -void -DeviceManager::printXml(vespalib::XmlOutputStream& xos) const -{ - using namespace vespalib::xml; - xos << XmlTag("devicemanager"); - xos << XmlTag("mapper") << XmlAttribute("type", _deviceMapper->getName()) - << XmlEndTag(); - xos << XmlTag("devices"); - for (std::map<int, Disk::SP>::const_iterator diskIt = _disks.begin(); - diskIt != _disks.end(); ++diskIt) - { - xos << XmlTag("disk") << XmlAttribute("deviceId", diskIt->first); - for (std::map<std::string, Partition::SP>::const_iterator partIt - = _partitions.begin(); partIt != _partitions.end(); ++partIt) - { - if (partIt->second->getDisk() != *diskIt->second) continue; - xos << XmlTag("partition") - << XmlAttribute("id", partIt->second->getId()) - << XmlAttribute("mountpoint", partIt->second->getMountPoint()); - if (partIt->second->getMonitor() != 0) { - xos << *partIt->second->getMonitor(); - } - for (std::map<std::string, Directory::SP>::const_iterator dirIt - = _directories.begin(); dirIt != _directories.end(); - ++dirIt) - { - if (dirIt->second->getPartition() != *partIt->second) continue; - xos << XmlTag("directory") - << XmlAttribute("index", dirIt->second->getIndex()) - << XmlAttribute("path", dirIt->second->getPath()) - << XmlEndTag(); - } - xos << XmlEndTag(); - } - xos << XmlEndTag(); - } - xos << XmlEndTag() << XmlEndTag(); -} - -} // memfile diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h b/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h deleted file mode 100644 index b8bbb735769..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/devicemanager.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::DeviceManager - * \ingroup persistence - * - * \brief Class keeping information about all devices. - * - * This class keeps track of all the devices so they can be looked up. - */ -#pragma once - -#include "devicemapper.h" -#include "directory.h" -#include "disk.h" -#include "ioevent.h" -#include "partition.h" -#include <vespa/vespalib/util/xmlserializable.h> -#include <vespa/storageframework/generic/clock/clock.h> -#include <set> - -namespace storage::memfile { - -class DeviceManager : public vespalib::XmlSerializable { - using StatfsPolicy = vespa::config::storage::StorDevicesConfig::StatfsPolicy; - DeviceMapper::UP _deviceMapper; - std::map<int, Disk::SP> _disks; - std::map<std::string, Partition::SP> _partitions; - std::map<std::string, Directory::SP> _directories; - std::set<IOEventListener*> _eventListeners; - StatfsPolicy _statPolicy; - uint32_t _statPeriod; - const framework::Clock& _clock; - - void setFindDeviceFunction(); -public: - using UP = std::unique_ptr<DeviceManager>; - - DeviceManager(DeviceMapper::UP mapper, const framework::Clock& clock); - DeviceManager(const DeviceManager&) = delete; - DeviceManager& operator=(const DeviceManager&) = delete; - ~DeviceManager(); - - void setPartitionMonitorPolicy(StatfsPolicy, uint32_t period = 0); - - void notifyDiskEvent(Disk& disk, const IOEvent& e); - void notifyDirectoryEvent(Directory& dir, const IOEvent& e); - void notifyPartitionEvent(Partition& part, const IOEvent& e); - - void addIOEventListener(IOEventListener& listener); - void removeIOEventListener(IOEventListener& listener); - - Directory::SP getDirectory(const std::string& dir, uint16_t index); - Directory::SP deserializeDirectory(const std::string& serialized); - Partition::SP getPartition(const std::string& path); - Disk::SP getDisk(const std::string& path); - - std::vector<Directory::SP> getDirectories(const Disk& disk) const; - std::vector<Directory::SP> getDirectories(const Partition& part) const; - - StatfsPolicy getStatPolicy() const { return _statPolicy; } - uint32_t getStatPeriod() const { return _statPeriod; } - - void printXml(vespalib::XmlOutputStream&) const override; - - const framework::Clock& getClock() const { return _clock; } -}; - -} // memfile diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp deleted file mode 100644 index 53dd4375d8a..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "devicemapper.h" -#include <vespa/vespalib/text/stringtokenizer.h> -#include <vespa/vespalib/util/exceptions.h> -#include <fstream> -#include <sstream> -#include <sys/stat.h> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.devicemapper"); - -namespace storage::memfile { - -namespace { - uint64_t getDevice(const std::string& path) { - struct stat info; - if (stat(path.c_str(), &info) != 0) { - std::ostringstream ost; - ost << "Failed to run stat to find data on file " << path - << ": errno(" << errno << ") - " << vespalib::getLastErrorString() << "."; - throw vespalib::IoException( - ost.str(), vespalib::IoException::getErrorType(errno), - VESPA_STRLOC); - } - return info.st_dev; - } -} - -AdvancedDeviceMapper::AdvancedDeviceMapper() - : _mountPoints() -{ - // Initialize the mount point map - std::ifstream is; - is.exceptions(std::ifstream::badbit); // Throw exception on failure - is.open("/proc/mounts"); - init(is); -} - -void -AdvancedDeviceMapper::init(std::istream& is) -{ - std::string line; - while (std::getline(is, line)) { - vespalib::StringTokenizer st(line, " \t\f\r\n", ""); - if (st[0] == "none") { - LOG(debug, "Ignoring special mount point '%s'.", line.c_str()); - continue; - } - if (st.size() < 3 || st[1][0] != '/') { - LOG(warning, "Found unexpected line in /proc/mounts: '%s'.", - line.c_str()); - continue; - } - std::string mountPoint(st[1]); - try{ - uint64_t deviceId = getDevice(mountPoint); - LOG(debug, "Added mountpoint '%s' with device id %" PRIu64 ".", - mountPoint.c_str(), deviceId); - _mountPoints[deviceId] = mountPoint; - } catch (vespalib::Exception& e) { - LOG(info, "Failed to get device of mountpoint %s. This is normal " - "for some special mountpoints, and doesn't matter unless " - "the device is used by VDS: %s", - mountPoint.c_str(), e.getMessage().c_str()); - } - } -} - -std::string -AdvancedDeviceMapper::getMountPoint(const std::string& fileOnFS) const -{ - uint64_t dev = getDevice(fileOnFS); - std::map<uint64_t, std::string>::const_iterator it(_mountPoints.find(dev)); - if (it == _mountPoints.end()) { - std::ostringstream ost; - ost << "Failed to find a device for file '" << fileOnFS << "'. Stat " - << "returned device " << dev << " but only the following devices " - << "are known:"; - for (it = _mountPoints.begin(); it != _mountPoints.end(); ++it) { - ost << " (" << it->first << " - " << it->second << ")"; - } - throw vespalib::IoException( - ost.str(), vespalib::IoException::INTERNAL_FAILURE, - VESPA_STRLOC); - } - return it->second; -} - -uint64_t -AdvancedDeviceMapper::getPartitionId(const std::string& fileOnFS) const -{ - return getDevice(fileOnFS); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h b/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h deleted file mode 100644 index e8fcef9ff8a..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/devicemapper.h +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * The device mapper is used to get some interesting information for - * storage devies. - */ -#pragma once - -#include <iosfwd> -#include <map> -#include <string> -#include <memory> - -namespace storage { - -namespace memfile { - -/** - * @class DeviceMapper - * @ingroup persistence - * - * @brief Maps directories to partition and disk information. - */ -struct DeviceMapper { - typedef std::unique_ptr<DeviceMapper> UP; - - virtual ~DeviceMapper() {} - - virtual const char* getName() const = 0; - - virtual std::string getMountPoint(const std::string& fileOnFS) const = 0; - virtual uint64_t getPartitionId(const std::string& fileOnFS) const = 0; - virtual uint64_t getDeviceId(const std::string& fileOnFS) const = 0; -}; - -/** - * @class SimpleDeviceMapper - * @ingroup persistence - * - * @brief Simple device mapper, not trying to detect any information. - * - * This simple device mapper, assumes all directories used are actually - * mountpoints, and that all mountpoints are on separate disks. This returns - * dummy device numbers. - * - * Using this, each directory used will be handled separately, and there is no - * dependency on information to retrieve from OS. - */ -class SimpleDeviceMapper : public DeviceMapper { - mutable std::map<std::string, int> _devices; - mutable int _lastDevice; - - SimpleDeviceMapper(const SimpleDeviceMapper&); - SimpleDeviceMapper& operator=(const SimpleDeviceMapper&); - -public: - SimpleDeviceMapper() : _devices(), _lastDevice(0) {} - - uint64_t getPartitionId(const std::string& fileOnFS) const override { - std::map<std::string, int>::const_iterator it = _devices.find(fileOnFS); - if (it != _devices.end()) { - return it->second; - } - int dev = ++_lastDevice; - _devices[fileOnFS] = dev; - return dev; - } - std::string getMountPoint(const std::string& path) const override { return path; } - uint64_t getDeviceId(const std::string& fileOnFS) const override { - return getPartitionId(fileOnFS); - } - const char* getName() const override { return "Simple (All directories on individual fake devices)"; } -}; - -/** - * @class AdvancedDeviceMapper - * @ingroup persistence - * - * @brief Device mapper trying to find a real physical model using stat/statfs. - * - * Using this device mapper, stat/statfs will be used to try to find a real - * model. Directories mapping to common components wil cause all directories to - * fail if the common component fails. - */ -struct AdvancedDeviceMapper : public DeviceMapper { - std::map<uint64_t, std::string> _mountPoints; - - AdvancedDeviceMapper(); - void init(std::istream&); - - std::string getMountPoint(const std::string& fileOnFS) const override; - uint64_t getPartitionId(const std::string& fileOnFS) const override; - uint64_t getDeviceId(const std::string& fileOnFS) const override { - // Not found a way to detect partitions on common device. - // Returning partition ids for now. - return getPartitionId(fileOnFS); - } - const char* getName() const override { return "Advanced (Read devices attempted found)"; } -}; - -} - -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp deleted file mode 100644 index ce5ddaf6e2d..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/directory.cpp +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "directory.h" -#include "devicemanager.h" -#include <vespa/vespalib/util/exceptions.h> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.device.directory"); - -namespace storage { - -namespace memfile { - -const IOEvent* -Directory::getLastEvent() const -{ - if (!_events.empty()) return &_events.back(); - return _partition->getLastEvent(); -} - -Device::State -Directory::getState() const -{ - const IOEvent* event = getLastEvent(); - return (event ? event->getState() : Device::OK); -} - -void -Directory::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - out << _path << " "; - Device::print(out, verbose, indent); -} - -Directory::Directory(DeviceManager& manager, uint16_t index, - const std::string& path) - : Device(manager), - _index(index), - _path(path), - _partition(manager.getPartition(path)) -{ - assert(_partition.get()); -} - -namespace { - struct Entry { - std::string path; - Device::State status; - std::string description; - Entry(); - ~Entry(); - }; - - Entry::Entry() {} - Entry::~Entry() {} - - Entry parseDirectoryString(const std::string& serialized) { - while (1) { - Entry e; - std::string::size_type pos1 = serialized.find(' '); - if (pos1 == std::string::npos) break; - e.path = serialized.substr(0, pos1); - std::string::size_type pos2 = serialized.find(' ', pos1 + 1); - std::string num = serialized.substr(pos1 + 1, pos2 - pos1 - 1); - char* c; - e.status = static_cast<Device::State>( - strtoul(num.c_str(), &c, 10)); - if (*c != '\0') break; - if (pos2 != std::string::npos) { - e.description = serialized.substr(pos2 + 1); - } - return e; - } - std::string msg = "Illegal line in disk status file: '" + serialized - + "'. Ignoring it."; - LOG(warning, "%s", msg.c_str()); - throw vespalib::IllegalArgumentException(msg, VESPA_STRLOC); - } -} - -Directory::Directory(const std::string& serialized, - DeviceManager& manager) - : Device(manager), - _index(0), - _path(parseDirectoryString(serialized).path), - _partition(manager.getPartition(_path)) -{ - assert(_partition.get()); - Entry e = parseDirectoryString(serialized); - if (e.status != Device::OK) { - addEvent(IOEvent(manager.getClock().getTimeInSeconds().getTime(), - e.status, e.description, VESPA_STRLOC)); - } -} - -void Directory::addEvent(const IOEvent& e) -{ - switch (e.getState()) { - case Device::IO_FAILURE: - _partition->addEvent(e); - break; - case Device::PATH_FAILURE: - case Device::NO_PERMISSION: - case Device::INTERNAL_FAILURE: - case Device::DISABLED_BY_ADMIN: - default: - if (!e.isGlobal()) { - _events.push_back(e); - } - _manager.notifyDirectoryEvent(*this, e); - } -} - -void -Directory::addEvent(Device::State s, - const std::string& description, - const std::string& location) -{ - addEvent(IOEvent( - _manager.getClock().getTimeInSeconds().getTime(), - s, - description, - location)); - -} - -void Directory::addEvents(const Directory& d) -{ - std::list<IOEvent> events; - events.insert(events.end(), d.getEvents().begin(), d.getEvents().end()); - events.insert(events.end(), d.getPartition().getEvents().begin(), - d.getPartition().getEvents().end()); - events.insert(events.end(), d.getPartition().getDisk().getEvents().begin(), - d.getPartition().getDisk().getEvents().end()); - for (std::list<IOEvent>::const_iterator it = events.begin(); - it != events.end(); ++it) - { - addEvent(*it); - } -} - -} // memfile - -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/directory.h b/memfilepersistence/src/vespa/memfilepersistence/device/directory.h deleted file mode 100644 index c7df91f4a48..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/directory.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::Directory - * \ingroup persistence - * - * \brief Class representing a directory used by Vespa storage. - * - * IMPORTANT: Directory objects may be generated for faulty directories too, - * thus creating the object must not result in a disk operation. - */ -#pragma once - -#include "partition.h" - -namespace storage { - -namespace memfile { - -class Directory : public Device { - uint16_t _index; - std::string _path; - Partition::SP _partition; - - // Only DeviceManager can create these objects, so we only need - // to cope with these constructors being so similar there. - Directory(DeviceManager&, uint16_t index, const std::string& path); - Directory(const std::string& serialized, DeviceManager& manager); - - void addEvents(const Directory& d); - - friend class DeviceManager; - -public: - using SP = std::shared_ptr<Directory>; - void setIndex(uint16_t index) { _index = index; } // Used when deserializing - - uint16_t getIndex() const { return _index; } - const std::string& getPath() const { return _path; } - Partition& getPartition() { return *_partition; } - const Partition& getPartition() const { return *_partition; } - - const IOEvent* getLastEvent() const override; - void addEvent(const IOEvent& e) override; - virtual void addEvent(Device::State s, - const std::string& description, - const std::string& location); - - State getState() const; - bool isOk() const { return (getLastEvent() == 0); } - void print(std::ostream& out, bool verbose, const std::string& indent) const override; - bool operator==(const Directory& d) const { return (_path == d._path); } - bool operator!=(const Directory& d) const { return (_path != d._path); } - - // Easy access functions, using the partition monitor to query state of - // partition - - /** Query whether partition is full after adding given amount of data. */ - bool isFull(int64_t afterAdding = 0, double maxFillRate = -1) const { - return _partition->getMonitor() == 0 - || _partition->getMonitor()->isFull(afterAdding, maxFillRate); - } - -}; - -} // memfile - -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp deleted file mode 100644 index bc30098a877..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/disk.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "disk.h" -#include "devicemanager.h" - -namespace storage { - -namespace memfile { - -Disk::Disk(DeviceManager& manager, uint64_t id) - : Device(manager), - _id(id) -{ -} - -void Disk::addEvent(const IOEvent& e) -{ - if (!e.isGlobal()) { - _events.push_back(e); - } - _manager.notifyDiskEvent(*this, e); -} - -const IOEvent* -Disk::getLastEvent() const -{ - if (getEvents().size() > 0) - return &getEvents().back(); - return 0; -} - -void -Disk::print(std::ostream& out, bool verbose, const std::string& indent) const -{ - out << "Disk id: " << _id << " "; - Device::print(out, verbose, indent); -} - -} // memfile - -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/disk.h b/memfilepersistence/src/vespa/memfilepersistence/device/disk.h deleted file mode 100644 index 3e15d31d93f..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/disk.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::Disk - * \ingroup persistence - * - * \brief Class representing a storage unit on a node. - * - * Class representing a storage unit on a node, which can be a physical disk, or - * a device set up by a RAID controller or similar. - * - * IMPORTANT: Disk objects may be generated for faulty disks too, thus creating - * the object must not result in a disk operation. - */ - -#pragma once - -#include "device.h" - -namespace storage { - -namespace memfile { - -class Disk : public Device { - uint64_t _id; - - Disk(DeviceManager&, uint64_t id); - - friend class DeviceManager; - -public: - using SP = std::shared_ptr<Disk>; - - uint64_t getId() const { return _id; } - - void addEvent(const IOEvent& e) override; - const IOEvent* getLastEvent() const override; - - bool operator==(const Disk& disk) const { return (_id == disk._id); } - bool operator!=(const Disk& disk) const { return (_id != disk._id); } - void print(std::ostream& out, bool verbose, const std::string& indent) const override; -}; - -} // memfile - -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp deleted file mode 100644 index ac1edea6a93..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "ioevent.h" -#include <vespa/vespalib/util/exceptions.h> -#include <ostream> - -namespace storage { - -namespace memfile { - -IOEvent::IOEvent() - : _state(Device::OK), - _description(), - _location(), - _global(false), - _timestamp(0) -{} - -IOEvent::IOEvent(const IOEvent &) = default; -IOEvent & IOEvent::operator = (const IOEvent &) = default; - -IOEvent::~IOEvent() {} - -namespace { - vespalib::string stripBacktrace(const vespalib::string& s) { - vespalib::string::size_type pos = s.find("Backtrace:"); - if (pos == vespalib::string::npos) return s; - while (pos > 0 && (s[pos - 1] == ' ' || s[pos - 1] == '\n')) { - --pos; - } - return s.substr(0, pos); - } -} - -IOEvent::IOEvent(uint32_t timestamp, Device::State s, const vespalib::string& description, - const vespalib::string& location, bool global) - : _state(s), - _description(stripBacktrace(description)), - _location(location), - _global(global), - _timestamp(timestamp) -{ -} - -IOEvent -IOEvent::createEventFromErrno(uint32_t timestamp, - int error, const vespalib::string& extraInfo, - const vespalib::string& location) -{ - vespalib::string err(vespalib::getErrorString(error)); - err += ": " + extraInfo; - switch (error) { - case ENOENT: - return IOEvent(timestamp, Device::NOT_FOUND, err, location); - case ENOTDIR: - case ENAMETOOLONG: - case ELOOP: - case EISDIR: // Using directory as file - case EOPNOTSUPP: // Operation not supported by filesystem - case EROFS: - case EMLINK: - case ENXIO: - case ESPIPE: // Descriptor is a pip/socket/fifo - return IOEvent(timestamp, Device::PATH_FAILURE, err, location); - case EACCES: - return IOEvent(timestamp, Device::NO_PERMISSION, err, location); - case EIO: // IO error occured. - case EINTR: // Read from slow device interrupted before any data. - return IOEvent(timestamp, Device::IO_FAILURE, err, location); - case EMFILE: - return IOEvent(timestamp, Device::TOO_MANY_OPEN_FILES, err, - location, true); - case EAGAIN: // Non-blocking read but no data available - case EBADF: // Invalid file descriptor - case EFAULT: // Buffer pointer invalid - case EINVAL: // Faulty input parameter - case ENFILE: - default: - return IOEvent(timestamp, Device::INTERNAL_FAILURE, err, location); - } -} - -IOEvent -IOEvent::createEventFromIoException(vespalib::IoException& e, uint32_t timestamp) -{ - Device::State type = Device::INTERNAL_FAILURE; - switch (e.getType()) { - case vespalib::IoException::NOT_FOUND: - type = Device::NOT_FOUND; break; - case vespalib::IoException::ILLEGAL_PATH: - type = Device::PATH_FAILURE; break; - case vespalib::IoException::NO_PERMISSION: - type = Device::NO_PERMISSION; break; - case vespalib::IoException::DISK_PROBLEM: - type = Device::IO_FAILURE; break; - case vespalib::IoException::TOO_MANY_OPEN_FILES: - type = Device::TOO_MANY_OPEN_FILES; break; - case vespalib::IoException::INTERNAL_FAILURE: - case vespalib::IoException::NO_SPACE: - case vespalib::IoException::CORRUPT_DATA: - case vespalib::IoException::DIRECTORY_HAVE_CONTENT: - case vespalib::IoException::FILE_FULL: - case vespalib::IoException::ALREADY_EXISTS: - case vespalib::IoException::UNSPECIFIED: - type = Device::INTERNAL_FAILURE; break; - } - return IOEvent(timestamp, type, e.getMessage(), e.getLocation()); -} - -void -IOEvent::print(std::ostream & os, bool verbose, const std::string& indent) const -{ - (void) indent; - os << "IOEvent("; - os << Device::getStateString(_state); - if (verbose) { - if (_description.size() > 0) { - os << ", " << _description; - } - if (_location.size() > 0) { - os << ", " << _location; - } - os << ", time " << _timestamp; - } - os << ")"; -} - -} // memfile - -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h b/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h deleted file mode 100644 index c22c49e084c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/ioevent.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::IOEvent - * \ingroup persistence - * - * \brief Class representing an IO event. An event tied to a device. - */ -#pragma once - -#include "device.h" - -namespace vespalib { class IoException; } -namespace storage { - -namespace memfile { - -class IOEvent : public vespalib::Printable { -public: - IOEvent(); - - IOEvent(uint32_t timestamp, - Device::State s, - const vespalib::string & description, - const vespalib::string & location, - bool global = false); - IOEvent(const IOEvent &); - IOEvent & operator = (const IOEvent &); - IOEvent(IOEvent &&) = default; - IOEvent & operator = (IOEvent &&) = default; - - ~IOEvent(); - - static IOEvent createEventFromErrno(uint32_t timestamp, - int error, - const vespalib::string& extraInfo = "", - const vespalib::string& location = ""); - static IOEvent createEventFromIoException(vespalib::IoException& e, - uint32_t timestamp); - - Device::State getState() const { return _state; } - const vespalib::string& getDescription() const { return _description; } - - void print(std::ostream& out, bool verbose, - const std::string& indent) const override; - - /** - * Global events aren't tied to device they was found in. They should not - * be saved on each device or be a reason to disable one. - */ - bool isGlobal() const { return _global; } - - uint32_t getTimestamp() const { return _timestamp; } - -private: - Device::State _state; - vespalib::string _description; - vespalib::string _location; - bool _global; - uint32_t _timestamp; -}; - -class Directory; -class Partition; -class Disk; - -/** - * \class storage::IOEventListener - * \ingroup persistence - * - * \brief Interface to implement if you want IO events. Register at manager. - */ -struct IOEventListener { - virtual void handleDirectoryEvent(Directory& dir, const IOEvent& e) = 0; - virtual void handlePartitionEvent(Partition& part, const IOEvent& e) = 0; - virtual void handleDiskEvent(Disk& disk, const IOEvent& e) = 0; - - virtual ~IOEventListener() {} -}; - -} - -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp deleted file mode 100644 index 5e1d835e3a2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.cpp +++ /dev/null @@ -1,643 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "mountpointlist.h" -#include <vespa/memfilepersistence/common/exceptions.h> -#include <vespa/persistence/spi/exceptions.h> -#include <vespa/vdslib/state/nodestate.h> -#include <vespa/config/helper/configfetcher.h> -#include <vespa/config/common/exceptions.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/util/guard.h> -#include <vespa/vespalib/text/stringtokenizer.h> -#include <fstream> -#include <sys/stat.h> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.mountpointlist"); - -namespace storage::memfile { - -using vespalib::getLastErrorString; -using vespalib::DirPointer; - -MountPointList::MountPointList(const std::string& vdsRoot, - const std::vector<vespalib::string>& diskPath, - DeviceManager::UP manager) - : framework::XmlStatusReporter("mountpointlist", "Disk directories"), - _deviceManager(std::move(manager)), - _vdsRoot(vdsRoot), - _diskPath(diskPath), - _mountPoints(0) -{ -} - -MountPointList::~MountPointList() {} - -spi::PartitionStateList -MountPointList::getPartitionStates() const -{ - spi::PartitionStateList list(_mountPoints.size()); - for (uint32_t i=0; i<_mountPoints.size(); ++i) { - if (!(_mountPoints[i]->isOk())) { - const IOEvent* event = _mountPoints[i]->getLastEvent(); - list[i] = spi::PartitionState(spi::PartitionState::DOWN, event->getDescription()); - } - } - - return list; -} - -void -MountPointList::init(uint16_t diskCount) -{ - initDisks(); - scanForDisks(); - readFromFile(); - if (verifyHealthyDisks(diskCount == 0 ? -1 : diskCount)) { - // Initialize monitors after having initialized disks, such as to - // not create them for invalid disks. - initializePartitionMonitors(); - } - if (diskCount != 0 && _mountPoints.size() != diskCount) { - std::ostringstream ost; - ost << _mountPoints.size() - << " mount points found. Expected " << diskCount - << " mount points to exist."; - LOG(error, "%s", ost.str().c_str()); - throw config::InvalidConfigException(ost.str(), VESPA_STRLOC); - } -} - -void -MountPointList::initDisks() -{ - if (_diskPath.empty()) return; - - using vespalib::make_string; - - vespalib::string vdsDisksPath = make_string("%s/disks", _vdsRoot.c_str()); - vespalib::mkdir(vdsDisksPath); - - for (size_t diskIndex = 0; diskIndex < _diskPath.size(); ++diskIndex) { - auto disk_path = make_string( - "%s/d%zu", vdsDisksPath.c_str(), diskIndex); - if (pathExists(disk_path)) continue; - - vespalib::mkdir(_diskPath[diskIndex]); - - try { - vespalib::symlink(_diskPath[diskIndex], disk_path); - } catch (vespalib::IoException& dummy) { - // The above mkdir() created disk_path as a directory, or a - // subdirectory of disk_path, which is OK. - (void) dummy; - } - } -} - -void -MountPointList::initializePartitionMonitors() -{ - std::set<Partition*> seen; - for (uint32_t i=0; i<_mountPoints.size(); ++i) { - if (!(_mountPoints[i]->isOk())) continue; - Partition* part = &_mountPoints[i]->getPartition(); - std::set<Partition*>::const_iterator it(seen.find(part)); - if (it == seen.end()) { - part->initializeMonitor(); - seen.insert(part); - } - } -} - -void -MountPointList::scanForDisks() -{ - _mountPoints.clear(); - std::vector<Directory::SP> entries; - DirPointer dir(opendir((_vdsRoot + "/disks").c_str())); - struct dirent* entry; - if (dir) while ((entry = readdir(dir))) { - if (entry == 0) { - std::ostringstream ost; - ost << "Failed to read directory \"" << _vdsRoot << "/disks\", " - << "errno " << errno << ": " << getLastErrorString(); - throw vespalib::IoException(ost.str(), - vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC); - } - std::string name(reinterpret_cast<char*>(&entry->d_name)); - assert(name.size() > 0); - if (name[0] == '.') continue; - // To be a valid d<digit> name, size must be at least 2 - if (name.size() < 2 || name[0] != 'd') { - LOG(warning, "File %s in disks directory is faulty named for a " - "disk directory, ignoring it.", name.c_str()); - continue; - } - char* endp; - uint32_t diskNr = strtoul(name.c_str()+1, &endp, 10); - // If rest of name is not a number, ignore - if (*endp != '\0') { - LOG(warning, "File %s in disks directory is faulty named for a " - "disk directory, ignoring it.", name.c_str()); - continue; - } - // If number is out of range, ignore.. - if (diskNr >= 254) { - LOG(warning, "Ignoring disk directory %s, as max directories have " - "been set to 254.", name.c_str()); - continue; - } - - // Valid disk directory.. Add entry.. - if (entries.size() <= diskNr) { - entries.resize(diskNr + 1); - } - LOG(debug, "Found disk directory %u: %s", diskNr, name.c_str()); - entries[diskNr] = _deviceManager->getDirectory( - _vdsRoot + "/disks/" + name, diskNr); - - // We only care about directories (or symlinks). DT_UNKNOWN must be handled explicitly. - if (entry->d_type != DT_DIR && entry->d_type != DT_LNK && entry->d_type != DT_UNKNOWN) { - std::ostringstream ost; - ost << "File " << name << " in disks directory is not a directory."; - LOG(warning, "%s", ost.str().c_str()); - entries[diskNr]->addEvent(Device::PATH_FAILURE, - ost.str(), VESPA_STRLOC); - } - - // Not all filesystems support d_type. Have to stat if this equals DT_UNKNOWN. - if (entry->d_type == DT_UNKNOWN) { - struct stat st; - lstat(entries[diskNr]->getPath().c_str(), &st); - if (!S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) { - std::ostringstream ost; - ost << "File " << name << " in disks directory is not a directory."; - LOG(warning, "%s", ost.str().c_str()); - entries[diskNr]->addEvent(Device::PATH_FAILURE, - ost.str(), VESPA_STRLOC); - } - } - } else if (errno == ENOENT) { - std::ostringstream ost; - ost << "Disk directory \"" << _vdsRoot << "/disks\" not created. VDS " - << "needs this to know which disks to use. See vespa doc."; - throw NoDisksException(ost.str(), VESPA_STRLOC); - } else { - std::ostringstream ost; - ost << "Failed to open directory \"" << _vdsRoot << "/disks\", errno " - << errno << ": " << getLastErrorString(); - throw vespalib::IoException(ost.str(), - vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC); - } - // Assign found disks to the instance - _mountPoints.resize(entries.size()); - for (uint32_t i=0; i<_mountPoints.size(); ++i) { - if (!entries[i].get()) { - if (!_mountPoints[i].get() || - _mountPoints[i]->getState() == Device::OK) - { - std::ostringstream ost; - ost << _vdsRoot + "/disks/d" << i; - _mountPoints[i] = _deviceManager->getDirectory(ost.str(), i); - _mountPoints[i]->addEvent( - Device::NOT_FOUND, - "Disk not found during scanning of disks directory", - VESPA_STRLOC); - } - LOG(warning, "Disk %u was not found.", i); - } else if (!_mountPoints[i].get() || - _mountPoints[i]->getState() == Device::NOT_FOUND) - { - _mountPoints[i] = entries[i]; - } - } -} - -namespace { - /** - * Get the disk nr of the given mountpoint, - * or -1 if the mountpoint is illegal. - */ - int getDiskNr(const std::string& mountPoint) { - std::string::size_type pos1 = mountPoint.rfind('/'); - if (pos1 == std::string::npos || - pos1 + 2 >= mountPoint.size() || - mountPoint[pos1+1] != 'd') - { - return -1; - } - char* endp; - std::string digit(mountPoint.substr(pos1+2)); - const char* digitptr = digit.c_str(); - int diskNr = strtoul(digitptr, &endp, 10); - if (digitptr[0] == '\0' || *endp != '\0') return -1; - return diskNr; - } -} - -void -MountPointList::readFromFile() -{ - std::vector<Directory::SP> entries; - // Read entries from disk - std::ifstream is; - // Throw exception if failing to read file - is.exceptions(std::ifstream::badbit); - is.open(getDiskStatusFileName().c_str()); - std::string line("EOF"); - while (std::getline(is, line)) { - if (line == "EOF") { break; } - Directory::SP dir = _deviceManager->deserializeDirectory(line); - int diskNr = getDiskNr(dir->getPath()); - if (diskNr == -1) { - LOG(warning, "Found illegal disk entry '%s' in vds disk file %s.", - line.c_str(), getDiskStatusFileName().c_str()); - } else { - dir->setIndex(diskNr); - if (entries.size() <= static_cast<uint32_t>(diskNr)) { - entries.resize(diskNr + 1); - } - entries[diskNr] = dir; - } - } - if (line != "EOF" || std::getline(is, line)) { - LOG(warning, "Disk status file %s did not end in EOF.", - getDiskStatusFileName().c_str()); - } - // Assign entries to this instance - if (_mountPoints.size() < entries.size()) { - _mountPoints.resize(entries.size()); - } - for (uint32_t i=0; i<entries.size(); ++i) { - if (entries[i].get() && - entries[i]->getState() != Device::OK && - entries[i]->getState() != Device::NOT_FOUND) - { - _mountPoints[i] = entries[i]; - } - } -} - -void -MountPointList::writeToFile() const -{ - try{ - std::string filename(getDiskStatusFileName()); - std::string tmpFilename(filename + ".tmp"); - std::ofstream os(tmpFilename.c_str()); - if (os.fail()) { - LOG(warning, "Failed to open %s.tmp for writing. Not writing " - "disks.status file.", filename.c_str()); - return; - } - for (std::vector<Directory::SP>::const_iterator it - = _mountPoints.begin(); it != _mountPoints.end(); ++it) - { - if (it->get() && - (*it)->getState() != Device::OK) - { - os << **it << "\n"; - } - } - os << "EOF"; - os.close(); - if (os.fail()) { - LOG(warning, "Failed to write %s.tmp. disks.status file might now " - "be corrupt as we failed while writing it.", - filename.c_str()); - return; - } - vespalib::rename(tmpFilename, filename, false, false); - LOG(debug, "Mount point list saved to file %s.", filename.c_str()); - } catch (std::exception& e) { - LOG(warning, "Failed to write disk status file: %s", e.what()); - } -} - -namespace { - void testMountPoint(Directory& mountPoint) { - struct stat filestats; - if (stat(mountPoint.getPath().c_str(), &filestats) != 0) { - switch (errno) { - case ENOTDIR: - case ENAMETOOLONG: - case ENOENT: - case EACCES: - case ELOOP: - { - mountPoint.addEvent(Device::PATH_FAILURE, - getLastErrorString(), - VESPA_STRLOC); - return; - } - case EIO: - { - mountPoint.addEvent(Device::IO_FAILURE, - getLastErrorString(), VESPA_STRLOC); - return; - } - case EFAULT: - default: - assert(0); // Should never happen - } - } - // At this point we know the mount point exists.. - if (!(S_ISDIR(filestats.st_mode))) { - mountPoint.addEvent( - Device::PATH_FAILURE, - "The path exist, but is not a directory.", - VESPA_STRLOC); - } - } - - struct Chunk { - uint32_t nr; - uint32_t total; - - Chunk() : nr(0), total(0) {} // Invalid - bool valid() const { return (nr < total); } - }; - - Chunk getChunkDef(const std::string& mountPoint) { - vespalib::File file(mountPoint + "/chunkinfo"); - file.open(vespalib::File::READONLY); - std::string buffer; - buffer.resize(200, '\0'); - size_t read(file.read(&buffer[0], buffer.size(), 0)); - buffer.resize(read); - vespalib::StringTokenizer tokenizer(buffer, "\n", ""); - - Chunk chunk; - if (tokenizer.size() < 3) { - return chunk; - } - - char *c; - chunk.nr = strtoul(tokenizer[1].c_str(), &c, 10); - if (tokenizer[1].c_str() + tokenizer[1].size() != c) return Chunk(); - chunk.total = strtoul(tokenizer[2].c_str(), &c, 10); - if (tokenizer[2].c_str() + tokenizer[2].size() != c) return Chunk(); - return chunk; - } - - void writeChunkDef(Chunk c, const std::string& mountPoint) { - vespalib::File file(mountPoint + "/chunkinfo"); - file.open(vespalib::File::CREATE | vespalib::File::TRUNC, true); - std::ostringstream ost; - ost << "# This file tells VDS what data this mountpoint may contain.\n" - << c.nr << "\n" - << c.total << "\n"; - std::string content(ost.str()); - file.write(&content[0], content.size(), 0); - } - - Device::State getDeviceState(vespalib::IoException::Type type) { - using vespalib::IoException; - switch (type) { - case IoException::ILLEGAL_PATH: return Device::PATH_FAILURE; - case IoException::NO_PERMISSION: return Device::NO_PERMISSION; - case IoException::DISK_PROBLEM: return Device::IO_FAILURE; - case IoException::INTERNAL_FAILURE: return Device::INTERNAL_FAILURE; - default: ; - } - return Device::OK; - } - - bool emptyDir(Directory& dir) { - const std::string& path(dir.getPath()); - errno = 0; - DirPointer dirdesc(opendir(path.c_str())); - struct dirent* entry; - if (dirdesc) while ((entry = readdir(dirdesc))) { - if (errno) break; - std::string name(reinterpret_cast<char*>(&entry->d_name)); - if (name == "." || name == "..") continue; - return false; - } - if (dirdesc == 0 || errno) { - std::ostringstream ost; - ost << "Failed to read directory \"" << path << "\", " - << "errno " << errno << ": " << getLastErrorString(); - dir.addEvent(getDeviceState(vespalib::IoException::getErrorType(errno)), - ost.str(), - VESPA_STRLOC); - throw vespalib::IoException(ost.str(), - vespalib::IoException::DISK_PROBLEM, VESPA_STRLOC); - } - return true; - } - - struct WriteStatusFileIfFailing { - MountPointList& _list; - bool _failed; - - WriteStatusFileIfFailing(MountPointList& list) - : _list(list), _failed(false) {} - ~WriteStatusFileIfFailing() { - if (_failed) _list.writeToFile(); - } - - void reportFailure() { _failed = true; } - }; -} - -bool -MountPointList::verifyHealthyDisks(int mountPointCount) -{ - WriteStatusFileIfFailing statusWriter(*this); - int usable = 0, empty = 0; - std::map<uint32_t, Directory::SP> lackingChunkDef; - // Test disks and get chunkinfo - for (uint32_t i=0, n=_mountPoints.size(); i<n; ++i) { - Directory::SP dir(_mountPoints[i]); - // Insert NOT_FOUND disk if not found, such that operator[] - // can return only valid pointers - if (!dir.get()) { - std::ostringstream ost; - ost << _vdsRoot + "/disks/d" << i; - dir = _deviceManager->getDirectory(ost.str(), i); - dir->addEvent(Device::NOT_FOUND, - "Disk not found during scanning of disks directory", - VESPA_STRLOC); - _mountPoints[i] = dir; - statusWriter.reportFailure(); - } - if (dir->isOk()) { - testMountPoint(*dir); - if (!dir->isOk()) statusWriter.reportFailure(); - } - // Don't touch unhealthy or non-existing disks. - if (!dir->isOk()) { - std::ostringstream ost; - ost << "Not using disk " << i << " marked bad: "; - dir->getLastEvent()->print(ost, true, " "); - LOG(warning, "%s", ost.str().c_str()); - continue; - } - - // Read chunkinfo - using vespalib::IoException; - Chunk chunk; - try{ - chunk = getChunkDef(dir->getPath()); - } catch (IoException& e) { - chunk = Chunk(); - if (e.getType() == IoException::NOT_FOUND) { - if (!emptyDir(*dir)) { - dir->addEvent(Device::INTERNAL_FAILURE, - "Foreign data in mountpoint. New " - "mountpoints added should be empty.", ""); - } - } else { - LOG(warning, "Failed to read chunkinfo file from mountpoint %s", - dir->getPath().c_str()); - Device::State newState(getDeviceState(e.getType())); - if (newState != Device::OK) { - dir->addEvent(newState, e.what(), VESPA_STRLOC); - } - } - } catch (std::exception& e) { - LOG(warning, "Failed to read chunkinfo file from mountpoint %s", - dir->getPath().c_str()); - dir->addEvent(Device::INTERNAL_FAILURE, e.what(), VESPA_STRLOC); - } - - // If disk was found unusable, don't use it. - if (!dir->isOk()) { - LOG(warning, "Unusable disk %d: %s", - i, dir->getLastEvent()->toString(true).c_str()); - statusWriter.reportFailure(); - continue; - } - ++usable; - // Ensure disk fits in with the already detected ones. - if (!chunk.valid()) { - ++empty; - lackingChunkDef[i] = dir; - } else if (chunk.nr != i) { - std::ostringstream ost; - ost << "Disk " << dir->getPath() << " thinks it's disk " << chunk.nr - << " (instead of " << i << ")."; - LOG(error, "%s", ost.str().c_str()); - throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); - } else if (mountPointCount == -1) { - mountPointCount = chunk.total; - } else if (static_cast<uint32_t>(mountPointCount) != chunk.total) { - std::ostringstream ost; - ost << "Disk " << dir->getPath() << " thinks it's disk " << chunk.nr - << " of " << chunk.total << " (instead of " << i << " of " - << mountPointCount << ")."; - LOG(error, "%s", ost.str().c_str()); - throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); - } - } - if (empty == usable && usable != mountPointCount && mountPointCount != -1) { - std::ostringstream ost; - ost << "Found " << usable << " disks and config says we're " - << "supposed to have " << mountPointCount << ". Not initializing " - << "disks."; - throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); - } - bool retval = true; - // Handle case where no chunkinfo file present (none/unusable/new disks) - if (mountPointCount == -1) { - if (_mountPoints.size() == 0) { - LOG(error, "No disks configured for storage node. Disk " - "directories/symlinks for this node should be created " - "in %s/disks/. Please refer to VDS documentation to " - "learn how to add disks", _vdsRoot.c_str()); - throw spi::HandledException("No disks configured", VESPA_STRLOC); - } else if (usable == 0) { - LOG(error, "All of the configured disks are unusable. " - "Please refer to previous warnings and the VDS " - "documentation for troubleshooting"); - throw spi::HandledException("All disks unusable", VESPA_STRLOC); - } else { - mountPointCount = _mountPoints.size(); - LOG(info, "All disks empty. Setting up node to run with the %u " - "found disks.", mountPointCount); - retval = false; - } - } - // Write chunkdef files where these are missing - for (std::map<uint32_t, Directory::SP>::const_iterator it - = lackingChunkDef.begin(); it != lackingChunkDef.end(); ++it) - { - const Directory::SP &dir = it->second; - Chunk c; - c.nr = it->first; - c.total = mountPointCount; - if (c.nr >= c.total) { - LOG(warning, "Can't use disk %u of %u as the index is too high. " - "(Disks are indexed from zero)", c.nr, c.total); - continue; - } - if (!emptyDir(*dir)) { - LOG(warning, "Not creating chunkinfo file on disk %u as it already " - "contains data. If you want to include the disk, " - "create chunkinfo file manually.", c.nr); - assert(!dir->isOk()); - continue; - } - using vespalib::IoException; - try{ - writeChunkDef(c, dir->getPath()); - retval = true; - } catch (IoException& e) { - statusWriter.reportFailure(); - LOG(warning, "Failed to write chunkinfo file to mountpoint %s.", - dir->getPath().c_str()); - Device::State newState(getDeviceState(e.getType())); - if (newState != Device::OK) { - dir->addEvent(newState, e.what(), VESPA_STRLOC); - } - } catch (std::exception& e) { - statusWriter.reportFailure(); - LOG(warning, "Failed to write chunkinfo file to mountpoint %s", - dir->getPath().c_str()); - dir->addEvent(Device::INTERNAL_FAILURE, e.what(), VESPA_STRLOC); - } - } - // If we need more entries in mountpointlist, due to chunkinfo - // showing more indexes, add them. - for (int i = _mountPoints.size(); i < mountPointCount; ++i) { - std::ostringstream ost; - ost << _vdsRoot + "/disks/d" << i; - Directory::SP dir(_deviceManager->getDirectory(ost.str(), i)); - dir->addEvent(Device::NOT_FOUND, - "Disk not found during scanning of disks directory", - VESPA_STRLOC); - _mountPoints.push_back(dir); - } - if (static_cast<int>(_mountPoints.size()) > mountPointCount) { - _mountPoints.resize(mountPointCount); - } - return retval; -} - -uint16_t -MountPointList::findIndex(const Directory& dir) const -{ - for (uint16_t i = 0; i < _mountPoints.size(); ++i) { - if (_mountPoints[i].get() != 0 && dir == *_mountPoints[i]) return i; - } - throw vespalib::IllegalArgumentException( - "Could not find directory " + dir.toString(), VESPA_STRLOC); -} - -std::string -MountPointList::getDiskStatusFileName() const -{ - return _vdsRoot + "/disks.status"; -} - -vespalib::string -MountPointList::reportXmlStatus(vespalib::xml::XmlOutputStream& xos, - const framework::HttpUrlPath&) const -{ - xos << *_deviceManager; - return ""; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h b/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h deleted file mode 100644 index 350e8f6e1d5..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/mountpointlist.h +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::MountPointList - * \ingroup persistence - * - * \brief Class holding information about the mount points used by storage - * - * We need to keep a list of mount points, to read and write the mount point - * file, and to access what mount points should be used and not. - * - * NOTE: A mountpoint is often referred to as a disk, even though you technicly - * can have multiple mountpoints per partition and multiple partitions per disk. - * - * IMPORTANT: Remember to call verifyHealthyDisks() before starting to use them. - */ - -#pragma once - -#include "devicemanager.h" -#include "directory.h" -#include <vespa/storageframework/generic/status/xmlstatusreporter.h> -#include <vespa/persistence/spi/persistenceprovider.h> -#include <vespa/vespalib/util/printable.h> - -namespace storage { -namespace lib { - class NodeState; -} - -namespace memfile { - -struct MountPointList : public framework::XmlStatusReporter { - typedef std::unique_ptr<MountPointList> UP; - - /** Create a mount point list. */ - MountPointList(const std::string& vdsRoot, - const std::vector<vespalib::string>& diskPath, - std::unique_ptr<DeviceManager>); - ~MountPointList(); - - DeviceManager& getDeviceManager() { return *_deviceManager; } - - /** - * Call init to initialize the mount point list in the regular fashion. - * @param diskCount Number of disks to find, or 0 to auto-detect. - * @return The number of usable disks found. - */ - void init(uint16_t diskCount); - - /** - * Initialize the disks, see description of diskPath config in - * stor-devices. Will be called as part of init(). - */ - void initDisks(); - - /** - * Scan disks directory for disks. Add entries found, which does not exist, - * or are marked NOT_FOUND to this instance. - * - * To prevent reading from possible bad disks, we cannot access the disks - * themselves. Thus, in case of symlinks, it assumes the symlink is to a - * directory. - */ - void scanForDisks(); - - /** - * Read the disk status file and adjust the list. - * Important that any entry marking a disk bad (except for NOT_FOUND if it - * should be in the file) overrides any disks marked ok in this instance. - * - * Similarily to scanForDisks(), this does not access the disks itself. - */ - void readFromFile(); - - /** - * Initialize the partition monitors within the partitions. Done after - * partition creation, as partition objects are generated for bad disks. - */ - void initializePartitionMonitors(); - - /** - * Write the current state of disks to the disk status file. - * Disks that are OK or NOT_FOUND does not need to be written to file. - */ - void writeToFile() const; - - /** - * Go through all the mountpoints marked ok, and check that they work. - * <ul> - * <li> Verify that symlinks point to a directory, not a file. - * <li> Read disk chunk files, stating mountpoint is number A/N. - * <li> Write disk chunk files on mountpoints missing these. - * - * IMPORTANT: This must be called before starting to use the disks. - * getSize() may not return correct size before this has been called. - * - * @return True if there are at least one mountpoint appearing healthy. - * @throws document::IllegalStateException If the mountpoint chunk files - * disagree on how many mountpoints there are. - */ - bool verifyHealthyDisks(int mountPointCount); - - /** Get how many mountpoints exist. */ - uint32_t getSize() const { return _mountPoints.size(); } - - /** Get the given mountpoint. */ - Directory& operator[](uint16_t i) - { assert(_mountPoints.size() > i); return *_mountPoints[i]; } - const Directory& operator[](uint16_t i) const - { assert(_mountPoints.size() > i); return *_mountPoints[i]; } - - uint16_t findIndex(const Directory& dir) const; - - vespalib::string reportXmlStatus(vespalib::xml::XmlOutputStream&, const framework::HttpUrlPath&) const override; - - /** - * Returns the current state of the mountpoints. - */ - spi::PartitionStateList getPartitionStates() const; - -private: - std::unique_ptr<DeviceManager> _deviceManager; - std::string _vdsRoot; - std::vector<vespalib::string> _diskPath; - std::vector<Directory::SP> _mountPoints; - - /** Get the name used for the disk status file. */ - std::string getDiskStatusFileName() const; -}; - -} // memfile - -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp deleted file mode 100644 index 830c971f70a..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/partition.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "partition.h" -#include "devicemanager.h" -#include <vespa/vespalib/util/exceptions.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.device.partition"); - -namespace storage::memfile { - -Partition::Partition(DeviceManager& manager, - uint64_t id, - const std::string& mountPoint) - : Device(manager), - _id(id), - _mountPoint(mountPoint), - _disk(manager.getDisk(mountPoint)), - _monitor() -{ - assert(_disk.get()); -} - -void Partition::initializeMonitor() -{ - try{ - _monitor.reset(new PartitionMonitor(_mountPoint)); - _monitor->setPolicy(_manager.getStatPolicy(), _manager.getStatPeriod()); - } catch (vespalib::IoException& e) { - std::ostringstream error; - error << "Failed to create partition monitor for partition " - << _mountPoint << ": " << e.getMessage(); - LOG(warning, "%s", error.str().c_str()); - addEvent(IOEvent(_manager.getClock().getTimeInSeconds().getTime(), - Device::IO_FAILURE, error.str(), VESPA_STRLOC)); - } -} - -void Partition::addEvent(const IOEvent& e) -{ - // No events yet defined that is partition specific - _disk->addEvent(e); -} - -const IOEvent* -Partition::getLastEvent() const -{ - if (!_events.empty()) return &_events.back(); - return _disk->getLastEvent(); -} - -void -Partition::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - out << "Partition: " << _id << " " << _mountPoint << " "; - Device::print(out, verbose, indent); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partition.h b/memfilepersistence/src/vespa/memfilepersistence/device/partition.h deleted file mode 100644 index c90676192b9..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/partition.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::Partition - * \ingroup persistence - * - * \brief Class representing a disk partition. - * - * IMPORTANT: Partition objects may be generated for faulty partitions too, - * thus creating the object must not result in a disk operation. - */ - -#pragma once - -#include "disk.h" -#include "partitionmonitor.h" - -namespace storage { - -namespace memfile { - -class Partition : public Device { - uint64_t _id; - std::string _mountPoint; - Disk::SP _disk; - PartitionMonitor::UP _monitor; - - Partition(DeviceManager& manager, uint64_t id, - const std::string& mountPoint); - - friend class DeviceManager; - -public: - using SP = std::shared_ptr<Partition>; - - void initializeMonitor(); - - uint64_t getId() const { return _id; } - const std::string& getMountPoint() const { return _mountPoint; } - - Disk& getDisk() { return *_disk; } - const Disk& getDisk() const { return *_disk; } - - PartitionMonitor* getMonitor() { return _monitor.get(); } - const PartitionMonitor* getMonitor() const { return _monitor.get(); } - - void addEvent(const IOEvent& e) override; - const IOEvent* getLastEvent() const override; - void print(std::ostream& out, bool verbose, const std::string& indent) const override; - bool operator==(const Partition& p) const { return (_id == p._id); } - bool operator!=(const Partition& p) const { return (_id != p._id); } -}; - -} // memfile - -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp deleted file mode 100644 index 68b02f87a04..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.cpp +++ /dev/null @@ -1,389 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "partitionmonitor.h" -#include <vespa/vespalib/util/exceptions.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/xmlstream.h> -#include <ostream> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.device.partition.monitor"); - -namespace storage::memfile { - -namespace { - - uint32_t getBlockSize(struct statvfs& info) { - // f_bsize have a strange name in man page, but as far as we've seen - // on actual file systems, it seems to correspond to block size. - return info.f_bsize; - } - - float calcRootOnlyRatio(struct statvfs& info) { - return (static_cast<uint64_t>(info.f_bfree) - - static_cast<uint64_t>(info.f_bavail)) - / info.f_blocks; - } - - struct RealStatter : public PartitionMonitor::Statter { - void statFileSystem(const std::string& file, struct statvfs& info) override { - if (statvfs(file.c_str(), &info) != 0) { - vespalib::asciistream ost; - ost << "Failed to run statvfs to find data on disk containing " - << "file " << file << ": errno(" << errno << ") - " - << vespalib::getLastErrorString() << "."; - throw vespalib::IoException( - ost.str(), vespalib::IoException::getErrorType(errno), - VESPA_STRLOC); - } - } - }; - -} - -uint64_t -PartitionMonitor::calcTotalSpace(struct statvfs& info) const { - // Ignore the part of the filesystem only root can write to. - uint64_t nonRootBlocksExisting( - static_cast<uint64_t>(info.f_blocks) - - static_cast<uint64_t>(info.f_bfree) - + static_cast<uint64_t>(info.f_bavail)); - return nonRootBlocksExisting * _blockSize; -} - -uint64_t -PartitionMonitor::calcUsedSpace(struct statvfs& info) const { - return (_partitionSize - info.f_bavail * _blockSize); -} - -float -PartitionMonitor::calcInodeFillRatio(struct statvfs& info) const { - uint64_t freeForRootOnly = info.f_ffree - info.f_favail; - uint64_t nonRootInodes = info.f_files - freeForRootOnly; - float freeInodesRatio = static_cast<float>(info.f_favail) / nonRootInodes; - return float(1.0) - freeInodesRatio; -} - -uint64_t -PartitionMonitor::calcDynamicPeriod() const -{ - uint32_t lastFillRate = (100 * _usedSpace / _partitionSize); - uint32_t maxFillRate = static_cast<uint32_t>(100 * _maxFillRate); - if (lastFillRate >= maxFillRate) { - return 1; - } else { - uint32_t fillDiff = (maxFillRate - lastFillRate); - return _period * fillDiff * fillDiff; - } -} - -PartitionMonitor::PartitionMonitor(const std::string& file) - : _fileOnPartition(file), - _fileSystemId(0), - _policy(STAT_PERIOD), - _blockSize(0), - _partitionSize(0), - _usedSpace(0), - _period(100), - _queriesSinceStat(0), - _maxFillRate(0.98), - _rootOnlyRatio(0), - _inodeFillRate(0), - _statter() -{ - setStatter(std::unique_ptr<Statter>(new RealStatter)); - LOG(debug, "%s: Monitor created with default setting of period at 100.", - _fileOnPartition.c_str()); -} - -PartitionMonitor::~PartitionMonitor() {} - -void -PartitionMonitor::setPolicy(vespa::config::storage::StorDevicesConfig::StatfsPolicy policy, - uint32_t period) -{ - switch (policy) { - case vespa::config::storage::StorDevicesConfig::STAT_ALWAYS: - setAlwaysStatPolicy(); break; - case vespa::config::storage::StorDevicesConfig::STAT_ONCE: - setStatOncePolicy(); break; - case vespa::config::storage::StorDevicesConfig::STAT_PERIOD: - if (period == 0) { - setStatPeriodPolicy(); - } else { - setStatPeriodPolicy(period); - } - break; - case vespa::config::storage::StorDevicesConfig::STAT_DYNAMIC: - if (period == 0) { - setStatDynamicPolicy(); - } else { - setStatDynamicPolicy(period); - } - break; - } -} - -void -PartitionMonitor::setAlwaysStatPolicy() -{ - _policy = ALWAYS_STAT; - LOG(debug, "%s: Set stat policy to always stat.", _fileOnPartition.c_str()); -} - -void -PartitionMonitor::setStatOncePolicy() -{ - _policy = STAT_ONCE; - LOG(debug, "%s: Set stat policy to stat once.", _fileOnPartition.c_str()); -} - -void -PartitionMonitor::setStatPeriodPolicy(uint32_t period) -{ - _policy = STAT_PERIOD; - _period = period; - LOG(debug, "%s: Set stat policy to stat every %u attempt.", - _fileOnPartition.c_str(), _period); -} - -void -PartitionMonitor::setStatDynamicPolicy(uint32_t basePeriod) -{ - _policy = STAT_DYNAMIC; - _period = basePeriod; - LOG(debug, "%s: Set stat policy to stat dynamicly with base %u.", - _fileOnPartition.c_str(), _period); -} - -void -PartitionMonitor::setStatter(std::unique_ptr<Statter> statter) -{ - vespalib::LockGuard lock(_updateLock); - _statter = std::move(statter); - struct statvfs info; - _statter->statFileSystem(_fileOnPartition, info); - _blockSize = getBlockSize(info); - _partitionSize = calcTotalSpace(info); - // Calculations further down assumes total size can be held within - // a signed 64 bit. - assert(_partitionSize - < static_cast<uint64_t>(std::numeric_limits<int64_t>::max())); - _usedSpace = calcUsedSpace(info); - _rootOnlyRatio = calcRootOnlyRatio(info); - _inodeFillRate = calcInodeFillRatio(info); - _fileSystemId = info.f_fsid; - LOG(debug, "FileSystem(%s): Total size: %" PRIu64 ", used: %" PRIu64 - ", root only %f, max fill rate %f, fill rate %f.", - _fileOnPartition.c_str(), - _partitionSize, - _usedSpace, - _rootOnlyRatio, - _maxFillRate, - static_cast<double>(_usedSpace) / _partitionSize); -} - -void -PartitionMonitor::updateIfNeeded() const -{ - uint32_t period = 0; - switch (_policy) { - case STAT_ONCE: period = std::numeric_limits<uint32_t>::max(); break; - case ALWAYS_STAT: period = 1; break; - case STAT_PERIOD: period = _period; break; - case STAT_DYNAMIC: period = calcDynamicPeriod(); break; - } - if (++_queriesSinceStat >= period) { - struct statvfs info; - try{ - _statter->statFileSystem(_fileOnPartition, info); - _usedSpace = calcUsedSpace(info); - _inodeFillRate = calcInodeFillRatio(info); - _queriesSinceStat = 0; - } catch (vespalib::Exception& e) { - LOG(warning, "Failed to stat filesystem with file %s. Using " - "last stored used space of %" PRIu64 ".", - _fileOnPartition.c_str(), _usedSpace); - } - } -} -uint64_t -PartitionMonitor::getUsedSpace() const -{ - vespalib::LockGuard lock(_updateLock); - updateIfNeeded(); - return _usedSpace; -} - -float -PartitionMonitor::getFillRate(int64_t afterAdding) const -{ - vespalib::LockGuard lock(_updateLock); - updateIfNeeded(); - float fillRate; - if (static_cast<int64_t>(_usedSpace) + afterAdding - >= static_cast<int64_t>(_partitionSize)) - { - fillRate = 1; - } else if (static_cast<int64_t>(_usedSpace) + afterAdding < 0) { - fillRate = 0; - } else { - fillRate = (static_cast<double>(_usedSpace) + afterAdding) - / _partitionSize; - } - if (fillRate < _inodeFillRate) { - fillRate = _inodeFillRate; - LOG(spam, "Inode fill rate is now %f. %u requests since last stat.", - fillRate, _queriesSinceStat); - } else { - LOG(spam, "Fill rate is now %f. %u requests since last stat.", - fillRate, _queriesSinceStat); - } - return fillRate; -} - -void -PartitionMonitor::setMaxFillness(float maxFill) -{ - if (maxFill <= 0 || maxFill > 1.0) { - vespalib::asciistream ost; - ost << "Max fill rate must be in the range <0,1]. Value of " - << maxFill << " is not legal."; - throw vespalib::IllegalArgumentException(ost.str(), VESPA_STRLOC); - } - _maxFillRate = maxFill; -} - -void -PartitionMonitor::addingData(uint64_t dataSize) -{ - vespalib::LockGuard lock(_updateLock); - _usedSpace = std::max(_usedSpace, _usedSpace + dataSize); -} - -void -PartitionMonitor::removingData(uint64_t dataSize) -{ - vespalib::LockGuard lock(_updateLock); - _usedSpace = (_usedSpace > dataSize ? _usedSpace - dataSize : 0); -} - -uint64_t -PartitionMonitor::getPartitionId(const std::string& fileOnPartition) -{ - RealStatter realStatter; - struct statvfs info; - realStatter.statFileSystem(fileOnPartition, info); - return info.f_fsid; -} - -namespace { - void printSize(std::ostream& out, uint64_t size) { - std::string s; - if (size < 10 * 1024) { - s = "B"; - } else { - size = size / 1024; - if (size < 10 * 1024) { - s = "kB"; - } else { - size = size / 1024; - if (size < 10 * 1024) { - s = "MB"; - } else { - size = size / 1024; - if (size < 10 * 1024) { - s = "GB"; - } else { - size = size / 1024; - s = "TB"; - } - } - } - } - out << " (" << size << " " << s << ")"; - } -} - -void -PartitionMonitor::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - vespalib::LockGuard lock(_updateLock); - out << "PartitionMonitor(" << _fileOnPartition; - if (verbose) { - out << ") {" - << "\n" << indent << " Fill rate: " - << (100.0 * _usedSpace / _partitionSize) - << " %" - << "\n" << indent << " Inode fill rate: " << (100 * _inodeFillRate) - << " %" - << "\n" << indent << " Detected block size: " << _blockSize - << "\n" << indent << " File system id: " << _fileSystemId - << "\n" << indent << " Total size: " << _partitionSize; - printSize(out, _partitionSize); - out << "\n" << indent << " Used size: " << _usedSpace; - printSize(out, _usedSpace); - out << "\n" << indent << " Queries since last stat: " - << _queriesSinceStat - << "\n" << indent << " Monitor policy: "; - } else { - out << ", "; - } - switch (_policy) { - case STAT_ONCE: out << "STAT_ONCE"; break; - case ALWAYS_STAT: out << "ALWAYS_STAT"; break; - case STAT_PERIOD: out << "STAT_PERIOD(" << _period << ")"; break; - case STAT_DYNAMIC: out << "STAT_DYNAMIC(" << calcDynamicPeriod() << ")"; - break; - } - if (verbose) { - if (_policy == STAT_DYNAMIC) { - out << "\n" << indent << " Period at current fillrate " - << calcDynamicPeriod(); - } - out << "\n" << indent << " Root only ratio " << _rootOnlyRatio - << "\n" << indent << " Max fill rate " << (100 * _maxFillRate) - << " %" - << "\n" << indent << "}"; - } else { - bool inodesFill = false; - double fillRate = static_cast<double>(_usedSpace) / _partitionSize; - if (_inodeFillRate > fillRate) { - inodesFill = true; - fillRate = _inodeFillRate; - } - - out << ", " << _usedSpace << "/" << _partitionSize << " used - " - << (100 * fillRate) << " % full" << (inodesFill ? " (inodes)" : "") - << ")"; - } -} - -void -PartitionMonitor::printXml(vespalib::XmlOutputStream& xos) const -{ - using namespace vespalib::xml; - xos << XmlTag("partitionmonitor") - << XmlContent(toString(true)) - << XmlEndTag(); -} - -void -PartitionMonitor::overrideRealStat(uint32_t blockSize, uint32_t totalBlocks, - uint32_t blocksUsed, float inodeFillRate) -{ - vespalib::LockGuard lock(_updateLock); - if (_policy != STAT_ONCE) { - throw vespalib::IllegalStateException( - "Makes no sense to override real stat if policy isnt set to " - "STAT_ONCE. Values will just be set back to real values again.", - VESPA_STRLOC); - } - _blockSize = blockSize; - _partitionSize = totalBlocks * blockSize; - _usedSpace = blocksUsed * blockSize; - _inodeFillRate = inodeFillRate; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h b/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h deleted file mode 100644 index c7c339091cb..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/device/partitionmonitor.h +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::PartitionMonitor - * \ingroup persistence - * - * \brief Monitors how full a file system is. - * - * This class is used by the persistence layer to monitor how full a disk is. - * It remembers how full the disk is, and can also take hints, such that it - * can give reasonable correct answers cheaply. - */ -#pragma once - -#include <vespa/config-stor-devices.h> -#include <vespa/vespalib/util/sync.h> -#include <vespa/vespalib/util/xmlserializable.h> -#include <vespa/vespalib/util/printable.h> -#include <sys/statvfs.h> - - -namespace storage::memfile { - -class PartitionMonitorTest; - -class PartitionMonitor : public vespalib::Printable, - public vespalib::XmlSerializable -{ -public: - using UP = std::unique_ptr<PartitionMonitor>; - - /** - * Use an object to stat through, such that unit tests can fake stat - * responses. - */ - struct Statter { - virtual ~Statter() {} - virtual void statFileSystem(const std::string& file, - struct statvfs& info) = 0; - }; - -private: - enum MonitorPolicy { ALWAYS_STAT, STAT_ONCE, STAT_PERIOD, STAT_DYNAMIC }; - - vespalib::Lock _updateLock; - std::string _fileOnPartition; - uint64_t _fileSystemId; - MonitorPolicy _policy; - uint32_t _blockSize; - uint64_t _partitionSize; - mutable uint64_t _usedSpace; - uint32_t _period; - mutable uint32_t _queriesSinceStat; - float _maxFillRate; - float _rootOnlyRatio; - mutable float _inodeFillRate; - std::unique_ptr<Statter> _statter; - - void setStatter(std::unique_ptr<Statter> statter); - uint64_t calcTotalSpace(struct statvfs& info) const; - uint64_t calcUsedSpace(struct statvfs& info) const; - uint64_t calcDynamicPeriod() const; - float calcInodeFillRatio(struct statvfs& info) const; - - friend class PartitionMonitorTest; - -public: - /** Default policy is STAT_PERIOD(100). Default max fill rate 0.98. */ - PartitionMonitor(const std::string& fileOnFileSystem); - ~PartitionMonitor(); - - /** Set monitor policy from config. */ - void setPolicy(vespa::config::storage::StorDevicesConfig::StatfsPolicy, uint32_t period); - - /** Always stat on getFillRate() requests. */ - void setAlwaysStatPolicy(); - /** - * Stat only once, then depend on addingData/removingData hints to provide - * correct answers. - */ - void setStatOncePolicy(); - /** - * Run stat each period getFillRate() request. Depend on hints to keep value - * sane within a period. - */ - void setStatPeriodPolicy(uint32_t period = 100); - /** - * Run stat often when close to full, but seldom when there is lots of free - * space. In current algorithm, we will check each percentage diff from full - * multiplied itself times the baseperiod request. - */ - void setStatDynamicPolicy(uint32_t basePeriod = 10); - - /** Get the file system id of this instance. */ - uint64_t getFileSystemId() const { return _fileSystemId; } - - float getRootOnlyRatio() const { return _rootOnlyRatio; } - - uint64_t getPartitionSize() const { return _partitionSize; } - - uint64_t getUsedSpace() const; - - /** - * Get the fill rate of the file system. Where 0 is empty and 1 is 100% - * full. - */ - float getFillRate(int64_t afterAdding = 0) const; - - /** Set the limit where the file system is considered full. (0-1) */ - void setMaxFillness(float maxFill); - - /** Query whether disk fill rate is high enough to be considered full. */ - bool isFull(int64_t afterAdding = 0, double maxFillRate = -1) const - { - if (maxFillRate == -1) { - maxFillRate = _maxFillRate; - } - return (getFillRate(afterAdding) >= maxFillRate); - } - - /** - * To keep the monitor more up to date without having to do additional stat - * commands, give clues when you add or remove data from the file system. - */ - void addingData(uint64_t dataSize); - - /** - * To keep the monitor more up to date without having to do additional stat - * commands, give clues when you add or remove data from the file system. - */ - void removingData(uint64_t dataSize); - - void print(std::ostream& out, bool verbose, const std::string& indent) const override ; - - /** - * Calculate the file system id for a given file. Used when wanting an - * instance for a new file, but you're unsure whether you already have a - * tracker for that file system. - */ - static uint64_t getPartitionId(const std::string& fileOnPartition); - - /** Used in unit testing only. */ - void overrideRealStat(uint32_t blockSize, uint32_t totalBlocks, - uint32_t blocksUsed, float inodeFillRate = 0.1); - - void printXml(vespalib::XmlOutputStream&) const override; -private: - void updateIfNeeded() const; - -}; - -} // memfile diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/init/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt deleted file mode 100644 index 0d75e0c31dc..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/init/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_init OBJECT - SOURCES - filescanner.cpp - DEPENDS -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp deleted file mode 100644 index d369c3c7ada..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.cpp +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "filescanner.h" -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/util/exceptions.h> -#include <iomanip> - -#include <vespa/log/bufferedlogger.h> -LOG_SETUP(".persistence.memfile.filescanner"); - -namespace storage::memfile { - -FileScanner::Metrics::Metrics(framework::Clock& clock) - : metrics::MetricSet("dbinit.filescan", "", - "Metrics for the memfile filescanner"), - _alienFiles(), - _alienFileCounter("alienfiles", "", - "Unknown files found during disk scanning.", this), - _temporaryFilesDeleted("tempfilesdeleted", "", - "Temporary files found and deleted during initialization.", this), - _multipleBucketsSameDisk("multiplebucketssamedisk", "", - "Multiple buckets found on same disk.", this), - _wrongDir("wrongdir", "", - "Number of buckets moved from wrong to right directory.", this), - _wrongDisk("wrongdisk", "", - "Number of buckets found on non-ideal disk.", this), - _dirsListed("dirslisted", "", - "Directories listed in list step of initialization.", this), - _startTime(clock), - _listLatency("listlatency", "", - "Time used until list phase is done. (in ms)", this) -{ -} - -FileScanner::Metrics::~Metrics() {} - -FileScanner::FileScanner(framework::ComponentRegister& reg, - const MountPointList& mountPoints, - uint32_t directoryLevels, - uint32_t directorySpread) - : framework::Component(reg, "filescanner"), - _directoryMapper(directoryLevels, directorySpread), - _mountPoints(mountPoints), - _dirLevels(directoryLevels), - _dirSpread(directorySpread), - _globalLock(), - _globalMetrics(getClock()) -{ - registerMetric(_globalMetrics); -} - -FileScanner::~FileScanner() {} - -void -FileScanner::buildBucketList(document::BucketId::List & list, - uint16_t partition, - uint16_t part, uint16_t totalParts) -{ - Context context(_mountPoints[partition], getClock()); - std::vector<uint32_t> path(_dirLevels); - if (_dirLevels > 0) { - // If we have dirlevels, split into parts on top level only - for (uint32_t i=0, n=_dirSpread; i<n; ++i) { - if (i % totalParts == part) { - path[0] = i; - buildBucketList(list, context, path, 1); - } - } - } else if (part == 0) { - // If we don't have dirlevels, send all data in part 0 - buildBucketList(list, context, path); - } - // Grab lock and update metrics - vespalib::LockGuard lock(_globalLock); - std::vector<metrics::Metric::UP> newMetrics; - context._metrics.addToSnapshot(_globalMetrics, newMetrics); - assert(newMetrics.empty()); -} - -void -FileScanner::buildBucketList(document::BucketId::List & list, - Context& context, - std::vector<uint32_t>& path, - uint32_t dirLevel) -{ - if (dirLevel >= _dirLevels) { - buildBucketList(list, context, path); - return; - } - for (uint32_t i=0, n=_dirSpread; i<n; ++i) { - path[dirLevel] = i; - buildBucketList(list, context, path, dirLevel + 1); - } -} - -std::string -FileScanner::getPathName(Context& context, std::vector<uint32_t>& path, - const document::BucketId* bucket) const -{ - std::ostringstream ost; - ost << context._dir.getPath() << std::hex << std::setfill('0'); - for (uint32_t i=0, n=path.size(); i<n; ++i) { - ost << '/' << std::setw(4) << path[i]; - } - if (bucket != 0) { - ost << '/' << std::setw(16) - << bucket->stripUnused().getRawId() << ".0"; - } - return ost.str(); -} - -void -FileScanner::buildBucketList(document::BucketId::List & list, - Context& context, - std::vector<uint32_t>& path) -{ - std::string pathName(getPathName(context, path)); - if (!vespalib::fileExists(pathName)) { - LOG(spam, "Directory %s does not exist.", pathName.c_str()); - return; - } - LOG(spam, "Listing directory %s", pathName.c_str()); - vespalib::DirectoryList dir(vespalib::listDirectory(pathName)); - for (uint32_t i=0; i<dir.size(); ++i) { - if (!processFile(list, context, path, pathName, dir[i])) { - // To only process alien files once, we lock rather than use - // context object. Should be few (none) alien files so shouldn't - // matter from a performance point of view - vespalib::LockGuard lock(_globalLock); - _globalMetrics._alienFileCounter.inc(); - if (_globalMetrics._alienFiles.size() - <= _config._maxAlienFilesLogged) - { - LOG(spam, "Detected alien file %s/%s", - pathName.c_str(), dir[i].c_str()); - _globalMetrics._alienFiles.push_back(pathName + "/" + dir[i]); - } - } - } - context._metrics._dirsListed.inc(); -} - - -// Always called from lister thread (which might be worker thread) -bool -FileScanner::processFile(document::BucketId::List & list, - Context& context, - std::vector<uint32_t>& path, - const std::string& pathName, - const std::string& name) -{ - if (name == "." || name == ".." - || name == "chunkinfo" || name == "creationinfo") - { - LOG(spam, "Ignoring expected file that is not a slotfile '%s'.", - name.c_str()); - return true; - } - document::BucketId bucket(extractBucketId(name)); - if (bucket.getRawId() == 0) { - // Delete temporary files generated by storage - if (name.size() > 4 && name.substr(name.size() - 4) == ".tmp") { - context._metrics._temporaryFilesDeleted.inc(); - LOG(debug, "Deleting temporary file found '%s'. Assumed it was " - "generated by storage temporarily while processing a " - "request and process or disk died before operation " - "completed.", - (pathName + "/" + name).c_str()); - vespalib::unlink(pathName + "/" + name); - return true; - } - return false; - } - if (handleBadLocation(bucket, context, path)) { - LOG(spam, "Adding bucket %s.", bucket.toString().c_str()); - list.push_back(bucket); - } - return true; -} - -document::BucketId -FileScanner::extractBucketId(const std::string& name) const -{ - if (name.size() < 9) return document::BucketId(); - std::string::size_type pos = name.find('.'); - if (pos == std::string::npos || pos > 16) return document::BucketId(); - char *endPtr; - document::BucketId::Type idnum = strtoull(&name[0], &endPtr, 16); - if (endPtr != &name[pos]) return document::BucketId(); - uint32_t fileNr = strtol(&name[pos + 1], &endPtr, 16); - if (*endPtr != '\0') return document::BucketId(); - // Check for deprecated name types - if (fileNr != 0) { - LOG(warning, "Found buckets split with old file splitting system. Have " - "you upgraded from VDS version < 3.1 to >= 3.1 ? This " - "requires a refeed as files stored are not backward " - "compatible."); - return document::BucketId(); - } - return document::BucketId(idnum); -} - -bool -FileScanner::handleBadLocation(const document::BucketId& bucket, - Context& context, - std::vector<uint32_t>& path) -{ - std::vector<uint32_t> expectedPath(_directoryMapper.getPath(bucket)); - - // If in wrong directory on disk, do a rename to move it where VDS will - // access it. - if (expectedPath != path) { - std::string source(getPathName(context, path, &bucket)); - std::string target(getPathName(context, expectedPath, &bucket)); - - if (vespalib::fileExists(target)) { - std::ostringstream err; - err << "Cannot move file from wrong directory " << source - << " to " << target << " as file already exist. Multiple " - << "instances of bucket on same disk. Should not happen. " - << "Ignoring file at in bad location."; - LOG(warning, "%s", err.str().c_str()); - context._metrics._multipleBucketsSameDisk.inc(); - return false; - } - if (!vespalib::rename(source, target, false, true)) { - std::ostringstream err; - err << "Cannot move file from " << source << " to " << target - << " as source file does not exist. Should not happen."; - LOG(error, "%s", err.str().c_str()); - throw vespalib::IllegalStateException(err.str(), VESPA_STRLOC); - } - LOGBP(warning, "Found bucket in wrong directory. Moved %s to %s.", - source.c_str(), target.c_str()); - context._metrics._wrongDir.inc(); - } - return true; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h b/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h deleted file mode 100644 index c9610bcc28e..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/init/filescanner.h +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::FileScanner - * \ingroup memfile - * - * \brief Scans a directory for memfiles. - * - * When storage starts up, we need to know what data already exist. This process - * will identify what buckets we have data for. - */ - -#pragma once - -#include <vespa/memfilepersistence/device/mountpointlist.h> -#include <vespa/memfilepersistence/mapper/bucketdirectorymapper.h> -#include <vespa/metrics/metrics.h> -#include <vespa/storageframework/generic/component/component.h> -#include <vespa/storageframework/generic/clock/timer.h> - -namespace document { - class BucketId; -} - -namespace storage::memfile { - -class FileScanner : private framework::Component { -public: - typedef std::unique_ptr<FileScanner> UP; - - struct Config { - uint32_t _maxAlienFilesLogged; - Config() - : _maxAlienFilesLogged(10) {} - }; - struct Metrics : public metrics::MetricSet { - std::vector<std::string> _alienFiles; - metrics::LongCountMetric _alienFileCounter; - metrics::LongCountMetric _temporaryFilesDeleted; - metrics::LongCountMetric _multipleBucketsSameDisk; - metrics::LongCountMetric _wrongDir; - metrics::LongCountMetric _wrongDisk; - metrics::LongCountMetric _dirsListed; - framework::MilliSecTimer _startTime; - metrics::LongAverageMetric _listLatency; - - Metrics(framework::Clock&); - ~Metrics(); - }; - -private: - struct Context { - const Directory& _dir; - Metrics _metrics; - - Context(const Directory& d, framework::Clock& c) - : _dir(d), _metrics(c) {} - }; - - BucketDirectoryMapper _directoryMapper; - const MountPointList& _mountPoints; - Config _config; - uint32_t _dirLevels; - uint32_t _dirSpread; - // As there is only one FileScanner instance in storage, we need a - // lock to let multiple threads update global data in the scanner. - // Each operation will typically keep a Context object it can use - // without locking and then grab lock to update global data after - // completion. - vespalib::Lock _globalLock; - Metrics _globalMetrics; - -public: - FileScanner(framework::ComponentRegister&, const MountPointList&, - uint32_t dirLevels, uint32_t dirSpread); - ~FileScanner(); - - void buildBucketList(document::BucketId::List & list, - uint16_t partition, - uint16_t part, uint16_t totalParts); - - const Metrics& getMetrics() const { return _globalMetrics; } - - -private: - void buildBucketList(document::BucketId::List & list, - Context&, - std::vector<uint32_t>& path, - uint32_t dirLevel); - std::string getPathName(Context&, std::vector<uint32_t>& path, - const document::BucketId* bucket = 0) const; - void buildBucketList(document::BucketId::List & list, - Context&, - std::vector<uint32_t>& path); - bool processFile(document::BucketId::List & list, - Context&, - std::vector<uint32_t>& path, - const std::string& pathName, - const std::string& name); - document::BucketId extractBucketId(const std::string& name) const; - bool handleBadLocation(const document::BucketId& bucket, - Context&, - std::vector<uint32_t>& path); -}; - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt deleted file mode 100644 index 16dc225e828..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_mapper OBJECT - SOURCES - buffer.cpp - memfilemapper.cpp - serializationmetrics.cpp - memfile_v1_serializer.cpp - memfile_v1_verifier.cpp - locationreadplanner.cpp - simplememfileiobuffer.cpp - fileinfo.cpp - locationreadplanner.cpp - bufferedfilewriter.cpp - bucketdirectorymapper.cpp - DEPENDS -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp deleted file mode 100644 index 865b62e006d..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "bucketdirectorymapper.h" -#include <vespa/document/bucket/bucketid.h> -#include <vespa/vespalib/util/random.h> - -namespace storage { -namespace memfile { - -BucketDirectoryMapper::BucketDirectoryMapper(uint32_t dirLevels, - uint32_t dirSpread) - : _dirLevels(dirLevels), - _dirSpread(dirSpread) -{ -} - -std::vector<uint32_t> -BucketDirectoryMapper::getPath(const document::BucketId& bucket) -{ - document::BucketId::Type seed = bucket.getId(); - seed = seed ^ (seed >> 32); - vespalib::RandomGen randomizer(static_cast<uint32_t>(seed) ^ 0xba5eba11); - std::vector<uint32_t> position(_dirLevels); - for (uint32_t i=0; i<_dirLevels; ++i) { - position[i] = randomizer.nextUint32() % _dirSpread; - } - return position; -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h deleted file mode 100644 index 5fad368cf55..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/bucketdirectorymapper.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::BucketDirectoryMapper - * \ingroup memfile - * - * \brief Maps buckets to directories on disk. - * - * To avoid having too many files in one directory, we want to map buckets to - * different directories. As these are all in the same partition anyways, we - * don't really need the distribution to be different based on node indexes or - * disk indexes. - * - * This class hides a simple function for distributing buckets between - * directories. - */ - -#pragma once - -#include <vector> -#include <cstdint> - -namespace document { - class BucketId; -} - -namespace storage { -namespace memfile { - -class BucketDirectoryMapper { - uint32_t _dirLevels; - uint32_t _dirSpread; - -public: - BucketDirectoryMapper(uint32_t dirLevels, uint32_t dirSpread); - - std::vector<uint32_t> getPath(const document::BucketId&); -}; - -} -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp deleted file mode 100644 index 119ea4b47a8..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "buffer.h" -#include <cstring> - -using vespalib::alloc::MemoryAllocator; -using vespalib::alloc::Alloc; - -namespace storage { -namespace memfile { - -// Use AutoAlloc to transparently use mmap for large buffers. -// It is crucial that any backing buffer type returns an address that is -// 512-byte aligned, or direct IO will scream at us and fail everything. -Buffer::Buffer(size_t size) - : _buffer(Alloc::alloc(size, MemoryAllocator::HUGEPAGE_SIZE, 512)), - _size(size) -{ -} - -void -Buffer::resize(size_t size) -{ - Alloc buffer = _buffer.create(size); - size_t commonSize(std::min(size, _size)); - memcpy(buffer.get(), _buffer.get(), commonSize); - _buffer.swap(buffer); - _size = size; -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h deleted file mode 100644 index e753de95c18..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/buffer.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::Buffer - * \ingroup memfile - * - * \brief Simple wrapper class to contain an aligned buffer. - * - * For direct IO operations, we need to use 512 byte aligned buffers. This is - * a simple wrapper class to get such a buffer. - */ - -#pragma once - -#include <vespa/vespalib/util/alloc.h> - -namespace storage { -namespace memfile { - -class Buffer -{ - vespalib::alloc::Alloc _buffer; - // Actual, non-aligned size (as opposed to _buffer.size()). - size_t _size; - -public: - using UP = std::unique_ptr<Buffer>; - - Buffer(const Buffer &) = delete; - Buffer & operator = (const Buffer &) = delete; - Buffer(size_t size); - - /** - * Resize buffer while keeping data that exists in the intersection of - * the old and new buffers' sizes. - */ - void resize(size_t size); - - char* getBuffer() noexcept { - return static_cast<char*>(_buffer.get()); - } - const char* getBuffer() const noexcept { - return static_cast<const char*>(_buffer.get()); - } - size_t getSize() const noexcept { - return _size; - } - - operator char*() noexcept { return getBuffer(); } - -}; - -} // storage -} // memfile - - diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp deleted file mode 100644 index c4f4e7e8bc1..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.cpp +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "bufferedfilewriter.h" -#include <vespa/vespalib/util/guard.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/util/exceptions.h> -#include <sstream> -#include <cassert> - -#include <vespa/log/bufferedlogger.h> -LOG_SETUP(".persistence.memfile.bufferedfilewriter"); - -namespace storage { - -namespace memfile { - -using vespalib::ValueGuard; - -BufferedFileWriter:: -BufferedFileWriter(vespalib::File& file, char* buffer, uint32_t bufferSize) - : _file(file), - _buffer(buffer), - _bufferSize(bufferSize), - _bufferedData(0), - _filePosition(0), - _writeCount(0), - _cache(0), - _cacheDirtyUpTo(0), - _writing(false) -{ - // Since we normally use direct IO for writing, we want to have - // 512b aligned buffers - if (bufferSize < 512) { - // Only warn for this. Used in testing. - LOGBP(warning, "Using buffer smaller than 512b"); - } else if (bufferSize % 512 != 0) { - std::ostringstream ost; - ost << "Buffered file writer got buffer of length " << bufferSize - << " (Not dividable by 512)"; - throw vespalib::IllegalArgumentException(ost.str()); - } - LOG(spam, "Using buffer in writer of %u bytes", bufferSize); -} - -BufferedFileWriter::~BufferedFileWriter() -{ - if (LOG_WOULD_LOG(debug) && _bufferedData != 0) { - LOG(debug, "Discarding %u bytes of buffered, unflushed data", - _bufferedData); - } -} - -void -BufferedFileWriter::setMemoryCache(Cache* cache) -{ - _cache = cache; - _cacheDirtyUpTo = 0; - if (cache == 0) { - LOG(spam, "No longer using a memory cache"); - } else { - LOG(spam, "Using memory cache of %u bytes", _cache->getCachedAmount()); - } -} - -void BufferedFileWriter::write(const char* data, uint32_t size, uint32_t pos) -{ - _writing = true; - // If at least parts of data written is cached in slotfileimage, update - // cache rather than write to file. - if (_cache != 0 && _cache->getCachedAmount() > pos) { - uint32_t len = std::min(size, _cache->getCachedAmount() - pos); - _cache->setData(data, len, pos); - if (_cache->duplicateCacheWrite()) { - len = 0; - } - if (len != size) { // Write remaining directly to disk - LOG(spam, "Writing remainder after cache, bypassing buffer. " - "%u bytes at pos %u.", size - len, pos + len); - _file.write(data + len, size - len, pos + len); - ++_writeCount; - } else { - LOG(spam, "Writing %u bytes to memory cache at position %u.", - size, pos); - } - _cacheDirtyUpTo = std::max(_cacheDirtyUpTo, pos + len); - } else { - LOG(spam, "Writing directly to file, bypassing buffer. %u" - " bytes at pos %u", size, pos); - _file.write(data, size, pos); - ++_writeCount; - } - _writing = false; -} - -void BufferedFileWriter::flush() -{ - if (_bufferedData == 0) return; - LOG(spam, "Flushing buffer. Writing %u at pos %u.", - _bufferedData, _filePosition); - write(_buffer, _bufferedData, _filePosition); - _filePosition += _bufferedData; - _bufferedData = 0; -} - -void BufferedFileWriter::write(const void *buffer, size_t size) -{ - LOG(spam, "Writing %" PRIu64 " bytes to buffer at position %u.", - size, _filePosition + _bufferedData); - if (!_buffer) { // If we don't use a buffer, just write to file. - write(static_cast<const char*>(buffer), size, _filePosition); - _filePosition += size; - return; - } - // In case of exception later, reset state to original state - ValueGuard<uint32_t> bufIndexGuard(_bufferedData); - ValueGuard<uint32_t> filePositionGuard(_filePosition); - // Buffer may contain data prior to this write call. If this is - // successfully written to disk, we need to update state to revert - // to such that we don't lose that write. - - if (_bufferedData + size >= _bufferSize) { - size_t part = _bufferSize - _bufferedData; - memcpy(_buffer + _bufferedData, buffer, part); - _bufferedData = _bufferSize; - buffer = static_cast<const char*>(buffer) + part; - flush(); - bufIndexGuard = 0; - filePositionGuard = _filePosition + _bufferSize - part; - size -= part; - } - - if (_bufferedData + size >= _bufferSize) { - if (reinterpret_cast<unsigned long>(buffer)%0x200 == 0) { - // Write the big part that is a multiple of _bufferSize to the file. - size_t part((size/_bufferSize)*_bufferSize); - write(static_cast<const char*>(buffer), part, _filePosition); - _filePosition += part; - buffer = static_cast<const char*>(buffer) + part; - size -= part; - } else { - for (; _bufferedData + size >= _bufferSize; size -= _bufferSize, buffer = static_cast<const char*>(buffer) + _bufferSize) { - memcpy(_buffer, buffer, _bufferSize); - _bufferedData = _bufferSize; - flush(); - } - } - } - - // We now have room for the rest of the data in buffer - assert(_bufferedData + size < _bufferSize); - memcpy(_buffer + _bufferedData, buffer, size); - _bufferedData += size; - // Finished successfully, deactivate guards - bufIndexGuard.deactivate(); - filePositionGuard.deactivate(); -} - -void BufferedFileWriter::writeGarbage(uint32_t size) { - LOG(spam, "Writing %u bytes of garbage at position %u.", - size, _filePosition + _bufferedData); - if (!_buffer) { - ValueGuard<uint32_t> filePositionGuard(_filePosition); - uint32_t maxBufferSize = 0xFFFF; - uint32_t bufSize = (size > maxBufferSize ? maxBufferSize : size); - std::unique_ptr<char[]> buf(new char[bufSize]); - while (size > 0) { - uint32_t part = (size > bufSize ? bufSize : size); - write(&buf[0], part, _filePosition); - _filePosition += part; - size -= part; - } - filePositionGuard.deactivate(); - return; - } - // In case of exception later, reset state to original state - ValueGuard<uint32_t> bufIndexGuard(_bufferedData); - ValueGuard<uint32_t> filePositionGuard(_filePosition); - - if (_bufferedData + size >= _bufferSize) { - size_t part = _bufferSize - _bufferedData; - memset(_buffer + _bufferedData, 0xFF, part); - _bufferedData += part; // Use any garbage data already there. - flush(); - bufIndexGuard = 0; - filePositionGuard = _filePosition + _bufferSize - part; - size -= part; - } - - memset(_buffer + _bufferedData, 0xFF, std::min(_bufferSize-_bufferedData, size)); - - for (;_bufferedData + size >= _bufferSize; size -= _bufferSize) { - _bufferedData = _bufferSize; - flush(); - } - - // We now have room for the rest of the data in buffer - assert(_bufferedData + size < _bufferSize); - _bufferedData += size; // Use any garbage data already there. - // Finished successfully, deactivate guards - bufIndexGuard.deactivate(); - filePositionGuard.deactivate(); -} - -void BufferedFileWriter::setFilePosition(uint32_t pos) -{ - if (pos != _filePosition + _bufferedData) { - flush(); - _filePosition = pos; - } -} - -uint32_t BufferedFileWriter::getFilePosition() const -{ - return _filePosition + _bufferedData; -} - -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h deleted file mode 100644 index 993004c01f0..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/bufferedfilewriter.h +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * @class storage::BufferedFileWriter - * @ingroup filestorage - * - * @brief A utility class for buffered writing to a file. - * - * To minimize the number of system calls, and to minimize the chance of - * fragmentation, files should be written to disk in large chunks. Since - * it's easier to write algorithms which writes files in smaller pieces, this - * class exists to buffer such writes and send them to disk at a later time. - * - * @author H�kon Humberset - * @date 2005-11-03 - */ - -#pragma once - -#include <vector> -#include <cstring> -#include <cstdint> - -namespace vespalib { - class File; -} - -namespace storage::memfile { - -class BufferedFileWriter { -public: - struct Cache { - virtual ~Cache() {} - virtual uint32_t getCachedAmount() const = 0; - /** Index given must be within [0 - getCachedAmount()> */ - virtual char* getCache(uint32_t atIndex) = 0; - /** If true, write to both cache and file, else, write to cache only. */ - virtual bool duplicateCacheWrite() const = 0; - /** Function for updating content in cache. Implemented in cache as new - * core overrides it to ignore data ahead of a given index. */ - virtual void setData(const char* data, size_t len, uint64_t pos) - { memcpy(getCache(pos), data, len); } - }; - -private: - vespalib::File& _file; - char* _buffer; - uint32_t _bufferSize; - uint32_t _bufferedData; - uint32_t _filePosition; - uint32_t _writeCount; - Cache* _cache; - uint32_t _cacheDirtyUpTo; - bool _writing; - -public: - BufferedFileWriter(const BufferedFileWriter &) = delete; - BufferedFileWriter & operator = (const BufferedFileWriter &) = delete; - /** - * Create a new buffered file writer. - * - * @param filedescriptor Write to this file which should already be open for - * writing. - * @param buffer Pointer to the buffer to use in this writer. Note that - * if buffer is 0, fakemode will be used, where all writes - * are sent on to OS. This mode can be used to test difference - * in performance of using this class or not. - * @param bufferSize The size of the buffer to keep. - */ - BufferedFileWriter(vespalib::File&, char* buffer, uint32_t bufferSize); - /** - * Destructor does not flush(). Make sure to call flush() manually. - * (flush() can fail, and destructors should not throw exceptions) - */ - ~BufferedFileWriter(); - - uint32_t getBufferSize() const { return _bufferSize; } - - /** - * If set, write portion written inside of memory cache here instead of - * to file. - */ - void setMemoryCache(Cache* cache); - - bool isMemoryCacheDirty() const { return (_cacheDirtyUpTo != 0); } - - uint32_t getLastDirtyIndex() const { return _cacheDirtyUpTo; } - - void tagCacheClean() { _cacheDirtyUpTo = 0; } - - /** Write all buffered data to disk. */ - void flush(); - - // Functions using the held file position. - - /** Writes the given data to file and increases the file position. */ - void write(const void *buffer, size_t size); - - /** Writes undefined data of given size to file and increases position. */ - void writeGarbage(uint32_t size); - - /** Set the file position to the given value. (Flushes before changing) */ - void setFilePosition(uint32_t pos); - - /** Get the current file position. */ - uint32_t getFilePosition() const; - - uint32_t getBufferedSize() const { return _bufferedData; } - - /** Get how many times this writer has flushed data to disk. */ - uint32_t getWriteCount() const { return _writeCount; } - -private: - void write(const char* data, uint32_t size, uint32_t pos); -}; - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp deleted file mode 100644 index 37befa86aaf..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "fileinfo.h" -#include <vespa/vespalib/stllike/asciistream.h> - -namespace storage { - -namespace memfile { - -void -MetaSlot::print(std::ostream & out) const { - vespalib::asciistream tmp; - print(tmp); - out << tmp.str(); -} - -void -MetaSlot::print(vespalib::asciistream & out) const { - out << "Slot(" << std::dec << _timestamp << ", " << _gid << ", " - << _headerPos << " - " << _headerSize << ", " << _bodyPos - << " - " << _bodySize << ", 0x" << std::hex << _flags << ", 0x" - << _checksum << ")" << std::dec; -} - -std::ostream& operator<<(std::ostream& out, const MetaSlot& slot) { - vespalib::asciistream tmp; - slot.print(tmp); - return out << tmp.str(); -} -vespalib::asciistream& operator<<(vespalib::asciistream & out, const MetaSlot& slot) { - slot.print(out); return out; -} - -void -Header::print(std::ostream& out, const std::string& indent) const { - out << indent << "SlotFileHeader(\n" - << indent << " version: " << std::hex << _version << std::dec << "\n" - << indent << " meta data list size: " << _metaDataListSize << "\n" - << indent << " header block size: " << _headerBlockSize << "b\n" - << indent << " checksum: " << std::hex << _checksum - << indent << (verify() ? " (OK)\n" : " (MISMATCH)\n") - << indent << " file checksum: " << _fileChecksum << "\n" - << indent << ")"; -} - -FileInfo::FileInfo() - : _metaDataListSize(0), - _headerBlockSize(0), - _bodyBlockSize(0) -{ } - -FileInfo::FileInfo(uint32_t metaDataListSize, - uint32_t headerBlockSize, - uint32_t bodyBlockSize) - : _metaDataListSize(metaDataListSize), - _headerBlockSize(headerBlockSize), - _bodyBlockSize(bodyBlockSize) -{ } - - -FileInfo::FileInfo(const Header& header, size_t fileSize) - : _metaDataListSize(header._metaDataListSize), - _headerBlockSize(header._headerBlockSize), - _bodyBlockSize( - fileSize - header._headerBlockSize - - sizeof(MetaSlot) * header._metaDataListSize - sizeof(Header)) -{ } - -FileInfo::~FileInfo() { } - -uint32_t -FileInfo::getHeaderBlockStartIndex() const -{ - return sizeof(Header) + _metaDataListSize * sizeof(MetaSlot); -} - -uint32_t -FileInfo::getBodyBlockStartIndex() const -{ - return getHeaderBlockStartIndex() + _headerBlockSize; -} - -uint32_t -FileInfo::getFileSize() const -{ - return getBodyBlockStartIndex() + _bodyBlockSize; -} - -std::string -FileInfo::toString() const -{ - vespalib::asciistream ost; - ost << "FileInfo(" - << "meta_size " << _metaDataListSize - << " header_start " << getHeaderBlockStartIndex() - << " body_start " << getBodyBlockStartIndex() - << ")"; - return ost.str(); -} - -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h deleted file mode 100644 index 71d6e4e4f3d..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/fileinfo.h +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/vespalib/util/crc.h> - -namespace storage { - -namespace memfile { - -struct MetaSlot : private Types { - Timestamp _timestamp; - GlobalId _gid; - uint32_t _headerPos; - uint32_t _headerSize; - uint32_t _bodyPos; - uint32_t _bodySize; - uint16_t _flags; - uint16_t _checksum; - - MetaSlot() : _timestamp(0), _headerPos(0), _headerSize(0), - _bodyPos(0), _bodySize(0), _flags(0), _checksum(39859) - { - //_checksum = calcSlotChecksum(); - //std::cerr << "Empty checksum " << _checksum << "\n"; - } - - uint16_t calcSlotChecksum() const { - static uint32_t size(sizeof(MetaSlot) - sizeof(_checksum)); - vespalib::crc_32_type calculator; - calculator.process_bytes(this, size); - return calculator.checksum() & 0xffff; - - } - - bool inUse() const { - return (_flags & IN_USE); - } - - void print(std::ostream & out) const; - void print(vespalib::asciistream & out) const; - - // Functions used by unit tests (avoid renaming all old func usage) - void updateChecksum() { _checksum = calcSlotChecksum(); } - void setTimestamp(Timestamp ts) { _timestamp = ts; } - void setHeaderPos(uint32_t p) { _headerPos = p; } - void setHeaderSize(uint32_t sz) { _headerSize = sz; } - void setBodyPos(uint32_t p) { _bodyPos = p; } - void setBodySize(uint32_t sz) { _bodySize = sz; } - void setUseFlag(bool isInUse) - { _flags = (isInUse ? _flags | IN_USE : _flags & ~IN_USE); } -}; - -std::ostream& operator<<(std::ostream& out, const MetaSlot& slot); -vespalib::asciistream& operator<<(vespalib::asciistream & out, const MetaSlot& slot); - -/** - * Represents a slotfile header. - */ -struct Header { - uint32_t _version; - uint32_t _metaDataListSize; - uint32_t _headerBlockSize; - uint32_t _checksum; - uint32_t _fileChecksum; - uint32_t _notInUse0; // Some reserved bits, which we can use later if - uint64_t _notInUse1; // needed without altering the file format. - uint64_t _notInUse2; - uint64_t _notInUse3; - uint64_t _notInUse4; - uint64_t _notInUse5; - - Header() - : _version(Types::TRADITIONAL_SLOTFILE), - _metaDataListSize(0), - _headerBlockSize(0), - _checksum(0), - _fileChecksum(0), - _notInUse0(0), _notInUse1(0), _notInUse2(0), - _notInUse3(0), _notInUse4(0), _notInUse5(0) - { - } - - uint32_t calcHeaderChecksum() const { - vespalib::crc_32_type calculator; - calculator.process_bytes(this, 12); - return calculator.checksum(); - } - bool verify() const { - return (_version == Types::TRADITIONAL_SLOTFILE - && _checksum == calcHeaderChecksum()); - } - // Functions used by unit tests (avoid renaming all old func usage) - void updateChecksum() { _checksum = calcHeaderChecksum(); } - void setVersion(uint32_t version) { _version = version; } - void setMetaDataListSize(uint32_t sz) { _metaDataListSize = sz; } - void setHeaderBlockSize(uint32_t sz) { _headerBlockSize = sz; } - - void print(std::ostream& out, const std::string& indent = "") const; - }; - -struct FileInfo { - typedef std::unique_ptr<FileInfo> UP; - - uint32_t _metaDataListSize; - uint32_t _headerBlockSize; - uint32_t _bodyBlockSize; - - // Cached header bytes to write in addition to metadata when - // needing to write back metadata 512 byte aligned - std::vector<char> _firstHeaderBytes; - - FileInfo(); - FileInfo(uint32_t metaDataListSize, uint32_t headerBlockSize, uint32_t bodyBlockSize); - FileInfo(const Header& header, size_t fileSize); - ~FileInfo(); - - uint32_t getBlockSize(Types::DocumentPart part) const { - return (part == Types::BODY ? _bodyBlockSize : _headerBlockSize); - } - uint32_t getBlockIndex(Types::DocumentPart part) const { - return (part == Types::BODY ? getBodyBlockStartIndex() - : getHeaderBlockStartIndex()); - } - uint32_t getHeaderBlockStartIndex() const; - uint32_t getBodyBlockStartIndex() const; - uint32_t getFileSize() const; - std::string toString() const; -}; - -} - -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp deleted file mode 100644 index 381e5d17766..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.cpp +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "locationreadplanner.h" -#include <vespa/memfilepersistence/memfile/memfileiointerface.h> -#include <algorithm> - -namespace storage::memfile { - -LocationDiskIoPlanner::LocationDiskIoPlanner( - const MemFileIOInterface& io, - DocumentPart part, - const std::vector<DataLocation>& desiredLocations, - uint32_t maxGap, - uint32_t blockStartIndex) - : _io(io), - _operations(), - _part(part), - _blockStartIndex(blockStartIndex) -{ - processLocations(desiredLocations, maxGap); -} - -namespace { - uint32_t alignDown(uint32_t value) { - uint32_t blocks = value / 512; - return blocks * 512; - }; - - uint32_t alignUp(uint32_t value) { - uint32_t blocks = (value + 512 - 1) / 512; - return blocks * 512; - }; -} - -void -LocationDiskIoPlanner::scheduleLocation(DataLocation loc, - std::vector<DataLocation>& ops) -{ - if (!_io.isCached(loc, _part) && loc._size) { - // Convert the relative location from the buffer to an - // absolute location. - ops.push_back(DataLocation(loc._pos + _blockStartIndex, - loc._size)); - } -} - -void -LocationDiskIoPlanner::processLocations( - const std::vector<DataLocation>& desiredLocations, - uint32_t maxGap) -{ - // Build list of disk read operations to do - std::vector<DataLocation> allOps; - - // Create list of all locations we need to read - for (std::size_t i = 0; i < desiredLocations.size(); ++i) { - scheduleLocation(desiredLocations[i], allOps); - } - - // Sort list, and join elements close together into single IO ops - std::sort(allOps.begin(), allOps.end()); - for (size_t i = 0; i < allOps.size(); ++i) { - uint32_t start = alignDown(allOps[i]._pos); - uint32_t stop = alignUp(allOps[i]._pos + allOps[i]._size); - if (i != 0) { - uint32_t lastStop = _operations.back()._pos - + _operations.back()._size; - if (lastStop >= start || start - lastStop < maxGap) { - _operations.back()._size += (stop - lastStop); - continue; - } - } - - _operations.push_back(DataLocation(start, stop - start)); - } -} - -uint32_t -LocationDiskIoPlanner::getTotalBufferSize() const -{ - uint32_t totalSize = 0; - for (size_t i = 0; i < _operations.size(); ++i) { - totalSize += _operations[i]._size; - } - return totalSize; -} - -void -LocationDiskIoPlanner::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - (void) verbose; (void) indent; - for (std::size_t i = 0; i < _operations.size(); ++i) { - if (i > 0) out << ","; - out << "[" << _operations[i]._pos << "," - << (_operations[i]._size + _operations[i]._pos) << "]"; - } -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h deleted file mode 100644 index e8bd129b004..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/locationreadplanner.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::LocationDiskIoPlanner - * \ingroup memfile - * - * \brief Creates list of minimal IO operations to do versus disk. - * - * When accessing many locations on disk, it is not necessarily ideal to do a - * disk access per location. This class creates a minimal set of locations to - * access to avoid accessing more than a maximum gap of uninteresting data. - */ -#pragma once - -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/vespalib/util/printable.h> - -namespace storage::memfile { - -class MemSlot; - -class MemFileIOInterface; - -class LocationDiskIoPlanner : public Types, public vespalib::Printable -{ -public: - LocationDiskIoPlanner(const MemFileIOInterface& io, - DocumentPart part, - const std::vector<DataLocation>& desiredLocations, - uint32_t maxGap, - uint32_t blockStartIndex); - - const std::vector<DataLocation>& getIoOperations() const { - return _operations; - } - - /** - * Get the total amount of space needed to hold all the data from all - * locations identified to be accessed. Useful to create a buffer of correct - * size. - */ - uint32_t getTotalBufferSize() const; - - void print(std::ostream& out, bool verbose, - const std::string& indent) const override; - -private: - const MemFileIOInterface& _io; - std::vector<DataLocation> _operations; - DocumentPart _part; - uint32_t _blockStartIndex; - - void processLocations( - const std::vector<DataLocation>& desiredLocations, - uint32_t maxGap); - - void scheduleLocation(DataLocation loc, std::vector<DataLocation>&); -}; - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h deleted file mode 100644 index 3f42756abf1..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/mapperslotoperation.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::MapperSlotOperation - * \ingroup memfile - * - * \brief Utility class to wrap const casting. - * - * The MemFile objects wants to track all changes done to them by clients, such - * that they can track internally whether they are altered in memory from the - * physical file. Thus, only const MemSlot objects are exposed. If one wants to - * alter the MemFile slots one has to call functions in MemFile to do it. - * - * But the mapper code need to alter some information in the MemFile and MemSlot - * objects. For instance, it has to clear altered tag after flushing content to - * disk. The mappers thus need to alter the objects in a way regular clients - * should not be allowed to. - * - * To implement this, we use this class, which contains only the functionality - * needed by the mappers, and which const cast to let the mapper change the - * state it needs to. - */ - -#pragma once - -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/memfilepersistence/memfile/memfile.h> - -namespace storage { -namespace memfile { - -struct MapperSlotOperation : protected Types { - static void setFlag(const MemFile& file, uint32_t flags) - { - const_cast<MemFile&>(file).setFlag(flags); - } - static void clearFlag(const MemFile& file, uint32_t flags) - { - const_cast<MemFile&>(file).clearFlag(flags); - } - static void setFlag(const MemSlot& slot, uint32_t flags) - { - const_cast<MemSlot&>(slot).setFlag(flags); - } - static void clearFlag(const MemSlot& slot, uint32_t flags) - { - const_cast<MemSlot&>(slot).clearFlag(flags); - } - static void setLocation(const MemSlot& slot, DocumentPart part, - const DataLocation& dl) - { - const_cast<MemSlot&>(slot).setLocation(part, dl); - } - static void setChecksum(const MemSlot& slot, uint16_t checksum) - { - const_cast<MemSlot&>(slot).setChecksum(checksum); - } -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp deleted file mode 100644 index aa0a0d4a0c2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.cpp +++ /dev/null @@ -1,1026 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfile_v1_serializer.h" -#include "memfile_v1_verifier.h" - -#include "locationreadplanner.h" -#include "uniqueslotgenerator.h" -#include <vespa/memfilepersistence/common/exceptions.h> -#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/storageframework/generic/clock/timer.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfilev1"); - -namespace storage::memfile { - -namespace { - -void alignUp(uint32_t& value, uint32_t offset = 0, uint32_t block = 512) { - uint32_t blocks = (value + offset + block - 1) / block; - value = blocks * block - offset; -} - -int32_t getBufferPos( - const DataLocation& location, - const std::vector<DataLocation>& locations) -{ - uint32_t posNow = 0; - for (uint32_t i = 0; i < locations.size(); ++i) { - if (locations[i].contains(location)) { - return posNow + location._pos - locations[i]._pos; - } - - posNow += locations[i]._size; - } - - return -1; -} - -} - -MemFileV1Serializer::MemFileV1Serializer(ThreadMetricProvider& metricProvider) - : _metricProvider(metricProvider) -{ -} - -namespace { - -class SlotValidator -{ -public: - SlotValidator(uint32_t headerBlockOffset, - uint32_t bodyBlockOffset, - uint32_t fileSize) - : _headerBlockOffset(headerBlockOffset), - _bodyBlockOffset(bodyBlockOffset), - _fileSize(fileSize) - { - } - - bool slotHasValidInformation(const MetaSlot& ms) const { - const uint16_t slotCrc(ms.calcSlotChecksum()); - const bool checksumOk(slotCrc == ms._checksum); - return (checksumOk && slotLocationsWithinFileBounds(ms)); - } - -private: - bool slotLocationsWithinFileBounds(const MetaSlot& ms) const { - // The reason for checking header location bounds against file size - // instead of body block offset is that the latter is computed from the - // file meta header information and will thus be entirely unaware of - // any file truncations. - return (_headerBlockOffset + ms._headerPos + ms._headerSize <= _fileSize - && _bodyBlockOffset + ms._bodyPos + ms._bodySize <= _fileSize); - } - - const uint32_t _headerBlockOffset; - const uint32_t _bodyBlockOffset; - const uint32_t _fileSize; -}; - -} - -void -MemFileV1Serializer::loadFile(MemFile& file, Environment& env, - Buffer& buffer, uint64_t bytesRead) -{ - SerializationMetrics& metrics(getMetrics().serialization); - SimpleMemFileIOBuffer& ioBuf( - static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); - - vespalib::LazyFile* lf = &ioBuf.getFileHandle(); - - assert(file.getSlotCount() == 0); - assert(bytesRead >= 64); - - const Header* header(reinterpret_cast<const Header*>(buffer.getBuffer())); - if (header->_checksum != header->calcHeaderChecksum()) { - std::ostringstream error; - error << "Header checksum mismatch. Stored checksum " << std::hex - << header->_checksum << " does not match calculated checksum " - << header->calcHeaderChecksum(); - throw CorruptMemFileException(error.str(), file.getFile(), VESPA_STRLOC); - } - uint32_t headerBlockIndex = sizeof(Header) - + header->_metaDataListSize * sizeof(MetaSlot); - - // Read all we need including first header bytes until alignment - uint32_t firstAlignedHeaderByte = headerBlockIndex; - alignUp(firstAlignedHeaderByte); - if (firstAlignedHeaderByte > bytesRead) { - framework::MilliSecTimer timer(env._clock); - LOG(spam, - "Only read %zu of required %u header bytes. " - "Resizing buffer and reading remaining data", - bytesRead, - firstAlignedHeaderByte); - buffer.resize(firstAlignedHeaderByte); - header = reinterpret_cast<const Header*>(buffer.getBuffer()); - off_t moreBytesRead = lf->read( - buffer + bytesRead, - firstAlignedHeaderByte - bytesRead, - bytesRead); - bytesRead += moreBytesRead; - if (bytesRead != firstAlignedHeaderByte) { - size_t fileSize = lf->getFileSize(); - if (firstAlignedHeaderByte > fileSize) { - std::ostringstream error; - error << "Header indicates file is bigger than it " - << "physically is. First aligned byte in header block " - << "starts at byte " << firstAlignedHeaderByte - << " while file is " << fileSize << " bytes long."; - throw CorruptMemFileException(error.str(), file.getFile(), VESPA_STRLOC); - - } - assert(bytesRead == firstAlignedHeaderByte); - } - metrics.tooLargeMetaReadLatency.addValue(timer.getElapsedTimeAsDouble()); - } - - FileInfo::UP data(new FileInfo); - data->_metaDataListSize = header->_metaDataListSize; - data->_headerBlockSize = header->_headerBlockSize; - const uint32_t headerBlockOffset( - sizeof(Header) + data->_metaDataListSize * sizeof(MetaSlot)); - const uint32_t bodyBlockOffset = headerBlockOffset + data->_headerBlockSize; - const uint32_t fileSize = lf->getFileSize(); - - // Avoid underflow in case of truncation. - const uint32_t bodyBlockSize( - fileSize > bodyBlockOffset ? fileSize - bodyBlockOffset : 0); - - data->_bodyBlockSize = bodyBlockSize; - data->_firstHeaderBytes.resize(firstAlignedHeaderByte - headerBlockIndex); - memcpy(&data->_firstHeaderBytes[0], buffer.getBuffer() + headerBlockIndex, - data->_firstHeaderBytes.size()); - - LOG(debug, - "File %s header info: metaDataListSize=%u, " - "headerBlockSize=%u, bodyBlockSize=%u", - file.getFile().getPath().c_str(), - data->_metaDataListSize, - data->_headerBlockSize, - data->_bodyBlockSize); - - ioBuf.setFileInfo(std::move(data)); - - uint32_t metaEntriesRead(header->_metaDataListSize); - bool foundBadSlot = false; - uint32_t lastBadSlot = 0; - SlotValidator validator(headerBlockOffset, bodyBlockOffset, fileSize); - - for (uint32_t i = 0; i < metaEntriesRead; ++i) { - const MetaSlot* ms(reinterpret_cast<const MetaSlot*>( - buffer + sizeof(Header) + i * sizeof(MetaSlot))); - - if (!validator.slotHasValidInformation(*ms)) { - foundBadSlot = true; - lastBadSlot = i; - continue; // Don't add bad slots. - } - - if (!ms->inUse()) { - break; - } - - MemSlot slot(ms->_gid, - ms->_timestamp, - DataLocation(ms->_headerPos, ms->_headerSize), - DataLocation(ms->_bodyPos, ms->_bodySize), - ms->_flags, - ms->_checksum); - - file.addSlot(slot); - } - - // We bail here instead of doing so inside the loop because this allows us - // to add all healthy slots to the file prior to throwing the exception. - // Any caller code that wants/need to inspect the good slots is then able - // to do so. It is not a given that this is a strong requirement; the check - // may be moved inside the loop if it can be established that no caller code - // expects the good slots to be present after a loadFile exception. - if (foundBadSlot) { - std::ostringstream error; - error << "Found bad slot in file '" - << file.getFile().getPath() - << "' at slot index " << lastBadSlot - << ", forcing repair of file. Details of file " - "corruption to follow."; - throw CorruptMemFileException(error.str(), file.getFile(), - VESPA_STRLOC); - } - - file.clearFlag(SLOTS_ALTERED); - - LOG(spam, "After loading file, its state is %s", file.toString(true).c_str()); -} - -void -MemFileV1Serializer::cacheLocationsForPart(SimpleMemFileIOBuffer& cache, - DocumentPart part, - uint32_t blockIndex, - const std::vector<DataLocation>& locationsToCache, - const std::vector<DataLocation>& locationsRead, - SimpleMemFileIOBuffer::BufferAllocation& buf) -{ - vespalib::asciistream error; - for (uint32_t i = 0; i < locationsToCache.size(); ++i) { - DataLocation loc(locationsToCache[i]); - assert(loc.valid()); - - if (loc._size == 0) { - LOG(spam, "Bailing since location size is 0"); - continue; - } - - loc._pos += blockIndex; - int32_t bufferPos = getBufferPos(loc, locationsRead); - - assert(bufferPos != -1); - - MemFileV1Verifier verifier; - if (!verifier.verifyBlock(part, locationsToCache[i]._pos, - error, - buf.getBuffer() + bufferPos, - loc._size)) - { - throw CorruptMemFileException( - error.str(), cache.getFileSpec(), VESPA_STRLOC); - } - - cache.cacheLocation(part, - locationsToCache[i], - buf.getSharedBuffer(), - buf.getBufferPosition() + bufferPos); - } -} - -void -MemFileV1Serializer::cacheLocations(MemFileIOInterface& io, - Environment& env, - const Options& options, - DocumentPart part, - const std::vector<DataLocation>& locations) -{ - SimpleMemFileIOBuffer& cache(static_cast<SimpleMemFileIOBuffer&>(io)); - - const FileInfo& data(cache.getFileInfo()); - uint32_t blockStartIndex(part == HEADER - ? data.getHeaderBlockStartIndex() - : data.getBodyBlockStartIndex()); - - LOG(spam, "%s: cacheLocations for %s with %zu locations. " - "max read-through gap is %u", - cache.getFileHandle().getFilename().c_str(), - getDocumentPartName(part), - locations.size(), - options._maximumGapToReadThrough); - - LocationDiskIoPlanner planner( - cache, - part, - locations, - options._maximumGapToReadThrough, - blockStartIndex); - - if (planner.getIoOperations().empty()) { - LOG(spam, "%s: no disk read operations required for %zu %s locations", - cache.getFileHandle().getFilename().c_str(), - locations.size(), - getDocumentPartName(part)); - return; - } - - const std::vector<DataLocation>& readLocations(planner.getIoOperations()); - - const size_t bufferSize = planner.getTotalBufferSize(); - assert(bufferSize % 512 == 0); - const SimpleMemFileIOBuffer::SharedBuffer::Alignment align512( - SimpleMemFileIOBuffer::SharedBuffer::ALIGN_512_BYTES); - - SimpleMemFileIOBuffer::BufferAllocation buf( - cache.allocateBuffer(part, bufferSize, align512)); - assert(reinterpret_cast<size_t>(buf.getBuffer()) % 512 == 0); - LOG(spam, - "Allocated %u bytes with offset %u from shared buffer %p " - "(of total %zu bytes, %zu bytes used, %zu bytes free)", - buf.getSize(), - buf.getBufferPosition(), - buf.getSharedBuffer().get(), - buf.getSharedBuffer()->getSize(), - buf.getSharedBuffer()->getUsedSize(), - buf.getSharedBuffer()->getFreeSize()); - - framework::MilliSecTimer readTimer(env._clock); - SerializationMetrics& metrics(getMetrics().serialization); - - uint64_t total(read(cache.getFileHandle(), buf.getBuffer(), readLocations)); - - metrics::DoubleAverageMetric& latency( - part == HEADER ? metrics.headerReadLatency - : metrics.bodyReadLatency); - metrics::LongAverageMetric& sz(part == HEADER ? metrics.headerReadSize - : metrics.bodyReadSize); - latency.addValue(readTimer.getElapsedTimeAsDouble()); - sz.addValue(total); - - framework::MilliSecTimer cacheUpdateTimer(env._clock); - cacheLocationsForPart(cache, part, blockStartIndex, locations, - readLocations, buf); - - metrics.cacheUpdateAndImplicitVerifyLatency.addValue( - cacheUpdateTimer.getElapsedTimeAsDouble()); -} - -uint64_t -MemFileV1Serializer::read(vespalib::LazyFile& file, - char* buf, - const std::vector<DataLocation>& readOps) -{ - uint32_t currPos = 0; - uint64_t totalRead = 0; - - for (uint32_t i = 0; i < readOps.size(); i++) { - file.read(buf + currPos, readOps[i]._size, readOps[i]._pos); - currPos += readOps[i]._size; - totalRead += readOps[i]._size; - } - return totalRead; -} - -void -MemFileV1Serializer::ensureFormatSpecificDataSet(const MemFile& ) -{ -/* - if (file.getFormatSpecificData() == 0) { - assert(!file.fileExists()); - file.setFormatSpecificData(MemFile::FormatSpecificData::UP(new Data)); - } -*/ -} - -uint32_t -MemFileV1Serializer::writeMetaData(BufferedFileWriter& writer, - const MemFile& file) -{ - const SimpleMemFileIOBuffer& ioBuf( - static_cast<const SimpleMemFileIOBuffer&>(file.getMemFileIO())); - uint32_t lastPos = writer.getFilePosition(); - const FileInfo& data(ioBuf.getFileInfo()); - - // Create the header - Header header; - header._version = file.getCurrentVersion(); - header._metaDataListSize = data._metaDataListSize; - header._headerBlockSize = data._headerBlockSize; - header.updateChecksum(); - header._fileChecksum = file.getBucketInfo().getChecksum(); - writer.write(&header, sizeof(Header)); - for (uint32_t i=0, n=header._metaDataListSize; i<n; ++i) { - MetaSlot meta; - if (i < file.getSlotCount()) { - const MemSlot& slot(file[i]); - assert(i == 0 || (file[i].getTimestamp() - > file[i-1].getTimestamp())); - meta._timestamp = slot.getTimestamp(); - meta._gid = slot.getGlobalId(); - meta._flags = slot.getPersistedFlags(); - meta._headerPos = slot.getLocation(HEADER)._pos; - meta._headerSize = slot.getLocation(HEADER)._size; - meta._bodyPos = slot.getLocation(BODY)._pos; - meta._bodySize = slot.getLocation(BODY)._size; - meta.updateChecksum(); - } - writer.write(&meta, sizeof(MetaSlot)); - } - return (writer.getFilePosition() - lastPos); -} - -// TODO: make exception safe -MemFileV1Serializer::FlushResult -MemFileV1Serializer::flushUpdatesToFile(MemFile& file, Environment& env) -{ - framework::MilliSecTimer totalWriteTimer(env._clock); - MemFilePersistenceThreadMetrics& metrics(getMetrics()); - SerializationWriteMetrics& writeMetrics(metrics.serialization.partialWrite); - SimpleMemFileIOBuffer& ioBuf( - static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); - const FileInfo& data(ioBuf.getFileInfo()); - BucketId bid(file.getFile().getBucketId()); - - LOG(spam, - "Attempting partial write of file %s", - file.getFile().getPath().c_str()); - - if (file.getSlotCount() > data._metaDataListSize) { - LOG(debug, - "Cannot do partial write of file %s as its " - "in-memory slot count (%u) is greater than its " - "persisted metadata list size (%u)", - file.getFile().getPath().c_str(), - file.getSlotCount(), data._metaDataListSize); - return FlushResult::TooFewMetaEntries; - } - - // TODO: replace this with multimap to avoid vector allocations - // for every single unique location? Could potentially also use - // a Boost.Intrusive rbtree with a pool-based allocation scheme - // to avoid multiple allocations even for the nodes themselves. - typedef MemFile::LocationMap LocationMap; - LocationMap headersToWrite, bodiesToWrite; - LocationMap existingHeaders, existingBodies; - - file.getLocations(headersToWrite, bodiesToWrite, - NON_PERSISTED_LOCATIONS); - - // We don't need the slot list for this, just using it to find a - // gap in the file - file.getLocations(existingHeaders, existingBodies, - PERSISTED_LOCATIONS | NO_SLOT_LIST); - - // Figure out total size of unwritten data for each part and - // whether or not there exists a single continuous gap in the - // part's block in which we can fit the data. Also keep track - // of the total amount of data we actually use so we can check - // if file should be downsized afterwards. - uint32_t totalSpaceUsed[2] = { 0 }; - uint32_t maxUsedExtent[2] = { 0 }; - uint32_t bytesToWrite[2] = { 0 }; - - for (uint32_t partId = 0; partId < 2; ++partId) { - DocumentPart part(static_cast<DocumentPart>(partId)); - LocationMap& unwritten(part == HEADER ? headersToWrite : bodiesToWrite); - LocationMap& existing(part == HEADER ? existingHeaders : existingBodies); - - for (LocationMap::iterator it(unwritten.begin()), e(unwritten.end()); - it != e; ++it) - { - bytesToWrite[partId] += it->first._size; - } - alignUp(bytesToWrite[partId]); - for (LocationMap::iterator it(existing.begin()), e(existing.end()); - it != e; ++it) - { - totalSpaceUsed[partId] += it->first._size; - maxUsedExtent[partId] = std::max(maxUsedExtent[partId], - it->first._pos + it->first._size); - } - LOG(spam, "Max used %s extent before align: %u", - getDocumentPartName(part), - maxUsedExtent[partId]); - - assert(maxUsedExtent[partId] <= data.getBlockSize(part)); - alignUp(maxUsedExtent[partId]); - - if (maxUsedExtent[partId] > data.getBlockSize(part) - || (bytesToWrite[partId] - > (data.getBlockSize(part) - maxUsedExtent[partId]))) - { - LOG(debug, "Could not find sufficient free space in %s to " - "perform a partial write for %s. Only %u bytes available, " - "but need at least %u bytes; rewriting entire file.", - getDocumentPartName(part), - file.getFile().getPath().c_str(), - (data.getBlockSize(part) >= maxUsedExtent[partId] - ? data.getBlockSize(part) - maxUsedExtent[partId] - : 0), - bytesToWrite[partId]); - return FlushResult::TooSmall; - } - } - if (LOG_WOULD_LOG(debug)) { - for (int partId = 0; partId < 2; ++partId) { - DocumentPart part(static_cast<DocumentPart>(partId)); - LOG(debug, - "%s: block %s has totalSpaceUsed=%u, maxUsedExtent=%u " - "bytesToWrite=%u blockIndex=%u blockSize=%u", - bid.toString().c_str(), - getDocumentPartName(part), - totalSpaceUsed[part], - maxUsedExtent[part], - bytesToWrite[part], - data.getBlockIndex(part), - data.getBlockSize(part)); - } - } - // Verify not too much free space. Remember to include bytes to write - // currently, and count free space forced added for alignment and to - // overrepresent blocks as used. - // TODO: are the overrepresent factors correct wrt. new data added? - std::shared_ptr<const MemFilePersistenceConfig> memFileCfg; - { - auto guard = env.acquireConfigReadLock(); - memFileCfg = guard.memFilePersistenceConfig(); - } - { - uint32_t usedSpace = static_cast<uint32_t>( - sizeof(Header) - + sizeof(MetaSlot) * file.getSlotCount() - * memFileCfg->overrepresentMetaDataFactor - + totalSpaceUsed[HEADER] - * memFileCfg->overrepresentHeaderBlockFactor - + totalSpaceUsed[BODY] - + bytesToWrite[HEADER] - + bytesToWrite[BODY]); - alignUp(usedSpace, 0, memFileCfg->fileBlockSize); - alignUp(usedSpace, 0, memFileCfg->minimumFileSize); - if (double(usedSpace) / data.getFileSize() < memFileCfg->minFillRate) { - LOG(debug, "File %s only uses %u of %u bytes (%f %%), which is " - "less than min fill rate of %f %%. " - "Resizing file to become smaller.", - file.getFile().getPath().c_str(), - usedSpace, data.getFileSize(), - 100.0 * usedSpace / data.getFileSize(), - 100.0 * memFileCfg->minFillRate); - return FlushResult::TooLarge; - } - } - // At this point, we've checked if we can downsize the file with - // a no-go outcome. If there are no altered slots, we can safely - // do an early exit here to avoid rewriting metadata needlessly. - if (!file.slotsAltered()) { - LOG(spam, - "No slots in %s altered, returning without writing anything.", - bid.toString().c_str()); - assert(bytesToWrite[HEADER] == 0); - assert(bytesToWrite[BODY] == 0); - return FlushResult::UnAltered; - } - - // Persist dirty locations to disk, updating all slots as we go. - // NOTE: it is assumed that the buffered data blocks contain pre- - // serialized checksums, document ids etc as appropriate since - // we only write the raw data to disk. - Buffer buffer(1024 * 1024); - BufferedFileWriter writer(ioBuf.getFileHandle(), buffer, buffer.getSize()); - - for (uint32_t partId = 0; partId < 2; ++partId) { - framework::MilliSecTimer writeTimer(env._clock); - DocumentPart part(static_cast<DocumentPart>(partId)); - LocationMap& locations(part == HEADER ? headersToWrite : bodiesToWrite); - - uint32_t realPos = data.getBlockIndex(part) + maxUsedExtent[partId]; - alignUp(realPos); - uint32_t pos = realPos - data.getBlockIndex(part); - - LOG(spam, - "%s: writing data for part %d, index %d, max " - "used extent %d, block size %d", - bid.toString().c_str(), - part, - data.getBlockIndex(part), - maxUsedExtent[partId], - data.getBlockSize(part)); - - writer.setFilePosition(realPos); - for (LocationMap::iterator it(locations.begin()), e(locations.end()); - it != e; ++it) - { - uint32_t size = it->first._size; - writer.write(ioBuf.getBuffer(it->first, part), size); - DataLocation newSlotLocation(pos, size); - ioBuf.persist(part, it->first, newSlotLocation); - - LOG(spam, - "%s: wrote location %d,%d to disk, resulting location was %d,%d", - bid.toString().c_str(), - it->first._pos, - it->first._size, - newSlotLocation._pos, - newSlotLocation._size); - - std::vector<const MemSlot*>& slots(it->second.slots); - for (uint32_t j = 0; j < slots.size(); ++j) { - LOG(spam, "%s: setting %s location for slot %s to %u,%u", - bid.toString().c_str(), - getDocumentPartName(part), - slots[j]->toString().c_str(), - newSlotLocation._pos, - newSlotLocation._size); - MapperSlotOperation::setLocation(*slots[j], part, newSlotLocation); - } - pos += size; - } - pos = writer.getFilePosition(); - alignUp(pos); - assert(part == BODY || pos <= data.getBlockIndex(BODY)); - writer.writeGarbage(pos - writer.getFilePosition()); - - metrics::DoubleAverageMetric& latency( - part == HEADER ? writeMetrics.headerLatency - : writeMetrics.bodyLatency); - metrics::LongAverageMetric& sz( - part == HEADER ? writeMetrics.headerSize - : writeMetrics.bodySize); - latency.addValue(writeTimer.getElapsedTimeAsDouble()); - sz.addValue(bytesToWrite[part]); - } - - framework::MilliSecTimer metaWriteTimer(env._clock); - // Write metadata back to file - writer.setFilePosition(0); - writeMetaData(writer, file); - writer.write(&data._firstHeaderBytes[0], data._firstHeaderBytes.size()); - writer.flush(); - MapperSlotOperation::clearFlag(file, SLOTS_ALTERED); - - writeMetrics.metaLatency.addValue(metaWriteTimer.getElapsedTimeAsDouble()); - writeMetrics.totalLatency.addValue(totalWriteTimer.getElapsedTimeAsDouble()); - writeMetrics.metaSize.addValue(writer.getFilePosition()); - return FlushResult::ChangesWritten; -} - -namespace { - uint32_t - getMetaSlotCount(uint32_t usedSlotCount, - const FileSpecification& file, - const MemFilePersistenceConfig& cfg, - const Options& options) - { - uint32_t wanted = static_cast<uint32_t>( - usedSlotCount * options._growFactor - * options._overrepresentMetaDataFactor); - if (wanted < uint32_t(cfg.minimumFileMetaSlots)) { - wanted = cfg.minimumFileMetaSlots; - } - if (wanted > uint32_t(cfg.maximumFileMetaSlots)) { - if (uint32_t(cfg.maximumFileMetaSlots) >= usedSlotCount) { - wanted = cfg.maximumFileMetaSlots; - } else { - std::ostringstream ost; - ost << "Need " << usedSlotCount << " slots and want " - << wanted << " slots in file, but max slots is " - << cfg.maximumFileMetaSlots; - throw MemFileIoException( - ost.str(), file, MemFileIoException::FILE_FULL, - VESPA_STRLOC); - } - } - return wanted; - } - - uint32_t - getHeaderBlockSize(uint32_t minBytesNeeded, - uint32_t startBlockIndex, - const FileSpecification& file, - const MemFilePersistenceConfig& cfg, - const Options& options) - { - uint32_t wanted = static_cast<uint32_t>( - minBytesNeeded * options._growFactor - * options._overrepresentHeaderBlockFactor); - if (wanted < uint32_t(cfg.minimumFileHeaderBlockSize)) { - wanted = cfg.minimumFileHeaderBlockSize; - } - if (wanted > uint32_t(cfg.maximumFileHeaderBlockSize)) { - if (uint32_t(cfg.maximumFileHeaderBlockSize) - >= minBytesNeeded) - { - wanted = cfg.maximumFileHeaderBlockSize; - } else { - std::ostringstream ost; - ost << "Need " << minBytesNeeded << " header bytes and want " - << wanted << " header bytes in file, but max is " - << cfg.maximumFileHeaderBlockSize; - throw MemFileIoException( - ost.str(), file, MemFileIoException::FILE_FULL, - VESPA_STRLOC); - } - } - alignUp(wanted, startBlockIndex); - return wanted; - } - - uint32_t - getBodyBlockSize(uint32_t minBytesNeeded, - uint32_t startBlockIndex, - const FileSpecification& file, - const MemFilePersistenceConfig& cfg, - const Options& options) - { - assert(startBlockIndex % 512 == 0); - uint32_t wanted = static_cast<uint32_t>( - minBytesNeeded * options._growFactor); - if (wanted + startBlockIndex < uint32_t(cfg.minimumFileSize)) { - wanted = cfg.minimumFileSize - startBlockIndex; - } - if (wanted + startBlockIndex > uint32_t(cfg.maximumFileSize)) { - if (uint32_t(cfg.maximumFileSize) - >= minBytesNeeded + startBlockIndex) - { - wanted = cfg.maximumFileSize - startBlockIndex; - } else { - std::ostringstream ost; - ost << "Need " << minBytesNeeded << " body bytes and want " - << wanted << " body bytes in file, but max is " - << (cfg.maximumFileSize - startBlockIndex) - << " as the body block starts at index " << startBlockIndex; - throw MemFileIoException( - ost.str(), file, MemFileIoException::FILE_FULL, - VESPA_STRLOC); - } - } - alignUp(wanted, startBlockIndex, cfg.fileBlockSize); - return wanted; - } - - struct TempCache : public BufferedFileWriter::Cache { - uint32_t _headerBlockIndex; - std::vector<char> _buffer; - - TempCache(uint32_t headerBlockIndex) - : _headerBlockIndex(headerBlockIndex), - _buffer() - { - uint32_t firstAligned = _headerBlockIndex; - alignUp(firstAligned); - _buffer.resize(firstAligned - _headerBlockIndex); - } - - uint32_t getCachedAmount() const override { return _buffer.size() + _headerBlockIndex; } - char* getCache(uint32_t pos) override { - // We should never get requests to write prior to header block - // index. - assert(pos >= _headerBlockIndex); - return (&_buffer[0] + (pos - _headerBlockIndex)); - } - - bool duplicateCacheWrite() const override { return true; } - void setData(const char* data, size_t len, uint64_t pos) override { - if (pos < _headerBlockIndex) { - if (len <= _headerBlockIndex - pos) return; - uint32_t diff = (_headerBlockIndex - pos); - len -= diff; - pos += diff; - data += diff; - } - Cache::setData(data, len, pos); - } - }; - -} - -// Iterate and write locations in timestamp order. Keep track of what -// locations have already been written and what their new location -// is in the rewritten file. Returns total number of bytes written -// for all unique locations. Modifies slot locations in-place in MemFile. -uint32_t -MemFileV1Serializer::writeAndUpdateLocations( - MemFile& file, - SimpleMemFileIOBuffer& ioBuf, - BufferedFileWriter& writer, - DocumentPart part, - const MemFile::LocationMap& locationsToWrite, - const Environment& env) -{ - framework::MilliSecTimer timer(env._clock); - BucketId bid(file.getFile().getBucketId()); - std::map<DataLocation, DataLocation> writtenLocations; - uint32_t index = 0; - for (uint32_t i = 0; i < file.getSlotCount(); ++i) { - const MemSlot& slot(file[i]); - - DataLocation originalLoc(slot.getLocation(part)); - if (originalLoc._size == 0) { - LOG(spam, "Slot %s has empty %s, not writing anything", - slot.toString().c_str(), - getDocumentPartName(part)); - assert(originalLoc._pos == 0); - continue; - } - - MemFile::LocationMap::const_iterator it( - locationsToWrite.find(originalLoc)); - assert(it != locationsToWrite.end()); - std::map<DataLocation, DataLocation>::iterator written( - writtenLocations.find(originalLoc)); - - DataLocation loc; - if (written == writtenLocations.end()) { - uint32_t size = it->first._size; - loc = DataLocation(index, size); - - LOG(spam, "%s: writing %s for slot %s to location (%u, %u)", - file.getFile().getBucketId().toString().c_str(), - getDocumentPartName(part), - slot.toString().c_str(), - index, size); - - writer.write(ioBuf.getBuffer(originalLoc, part), size); - index += size; - writtenLocations[originalLoc] = loc; - } else { - LOG(spam, "%s: %s already written for slot %s; " - "updating to location (%u, %u)", - file.getFile().getBucketId().toString().c_str(), - getDocumentPartName(part), - slot.toString().c_str(), - written->second._pos, written->second._size); - loc = written->second; - } - assert(loc.valid()); - MapperSlotOperation::setLocation(slot, part, loc); - } - // Move in cache. Cannot be done inside loop. - ioBuf.remapAndPersistAllLocations(part, writtenLocations); - - SerializationWriteMetrics& writeMetrics( - getMetrics().serialization.fullWrite); - metrics::DoubleAverageMetric& latency( - part == HEADER ? writeMetrics.headerLatency - : writeMetrics.bodyLatency); - metrics::LongAverageMetric& sz( - part == HEADER ? writeMetrics.headerSize - : writeMetrics.bodySize); - latency.addValue(timer.getElapsedTimeAsDouble()); - sz.addValue(index); // Equal to written size. - - return index; -} - -void -MemFileV1Serializer::rewriteFile(MemFile& file, Environment& env) -{ - framework::MilliSecTimer totalWriteTimer(env._clock); - SerializationWriteMetrics& writeMetrics( - getMetrics().serialization.fullWrite); - file.ensureHeaderAndBodyBlocksCached(); - - SimpleMemFileIOBuffer& ioBuf( - static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); - - const FileSpecification& oldSpec(file.getFile()); - std::string newPath = oldSpec.getPath() + ".new"; - - LOG(debug, "Rewriting entire file %s", oldSpec.getPath().c_str()); - ioBuf.getFileHandle().close(); - vespalib::LazyFile::UP newFile = env.createFile(newPath); - newFile->open(ioBuf.getFileHandle().getFlags() - | vespalib::File::CREATE | vespalib::File::TRUNC, true); - MapperSlotOperation::setFlag(file, FILE_EXIST); - - FileInfo::UP data(new FileInfo); - Buffer buffer(32 * 1024 * 1024); - BufferedFileWriter writer(*newFile, buffer, buffer.getSize()); - - std::shared_ptr<const MemFilePersistenceConfig> memFileCfg; - std::shared_ptr<const Options> options; - { - auto guard = env.acquireConfigReadLock(); - memFileCfg = guard.memFilePersistenceConfig(); - options = guard.options(); - } - - // Create the header - Header header; - header._version = getFileVersion(); - header._metaDataListSize = getMetaSlotCount( - file.getSlotCount(), file.getFile(), *memFileCfg, *options); - data->_metaDataListSize = header._metaDataListSize; - header._fileChecksum = file.getBucketInfo().getChecksum(); - - // Dump header and metadata to writer, so we can start writing header - // and bodies. If buffer is too small causing this to be written, we - // need to write it again after updating it. - writer.write(&header, sizeof(Header)); - LOG(spam, "Writing garbage for %u meta entries", - header._metaDataListSize); - writer.writeGarbage(sizeof(MetaSlot) * header._metaDataListSize); - - TempCache tempCache(writer.getFilePosition()); - writer.setMemoryCache(&tempCache); - - typedef MemFile::LocationMap LocationMap; - LocationMap headersToWrite, bodiesToWrite; - // Don't need the slot list, we update that implicitly - file.getLocations(headersToWrite, bodiesToWrite, - PERSISTED_LOCATIONS - | NON_PERSISTED_LOCATIONS - | NO_SLOT_LIST); - - uint32_t headerIndex = writeAndUpdateLocations( - file, ioBuf, writer, HEADER, headersToWrite, env); - - header._headerBlockSize = getHeaderBlockSize( - headerIndex, - data->getHeaderBlockStartIndex(), - file.getFile(), - *memFileCfg, - *options); - header._checksum = header.calcHeaderChecksum(); - data->_headerBlockSize = header._headerBlockSize; - - if (headerIndex < header._headerBlockSize) { - LOG(spam, "Writing %u bytes of header garbage filler", - header._headerBlockSize - headerIndex); - writer.writeGarbage(header._headerBlockSize - headerIndex); - } - - uint32_t bodyIndex = writeAndUpdateLocations( - file, ioBuf, writer, BODY, bodiesToWrite, env); - - data->_bodyBlockSize = getBodyBlockSize( - bodyIndex, - data->getBodyBlockStartIndex(), - file.getFile(), - *memFileCfg, - *options); - if (bodyIndex < data->_bodyBlockSize) { - writer.writeGarbage(data->_bodyBlockSize - bodyIndex); - } - - framework::MilliSecTimer metaWriteTimer(env._clock); - // Update meta entries - std::vector<MetaSlot> writeSlots(header._metaDataListSize); - - for (uint32_t i = 0; i < file.getSlotCount(); ++i) { - const MemSlot& slot(file[i]); - MetaSlot& meta(writeSlots[i]); - - DataLocation headerLoc = slot.getLocation(HEADER); - assert(headerLoc.valid()); - DataLocation bodyLoc = slot.getLocation(BODY); - assert(bodyLoc.valid()); - assert(i == 0 || (file[i].getTimestamp() > file[i - 1].getTimestamp())); - - meta._timestamp = slot.getTimestamp(); - meta._gid = slot.getGlobalId(); - meta._flags = slot.getPersistedFlags(); - meta._headerPos = headerLoc._pos; - meta._headerSize = headerLoc._size; - meta._bodyPos = bodyLoc._pos; - meta._bodySize = bodyLoc._size; - assert(meta.inUse()); - - meta.updateChecksum(); - MapperSlotOperation::setChecksum(slot, meta._checksum); - } - - if (writer.getWriteCount() != 0) { - // If we didn't have large enough buffer to hold entire file, reposition - // to start to write meta data after updates. - writer.setFilePosition(0); - writer.write(&header, sizeof(Header)); - writer.write(&writeSlots[0], writeSlots.size() * sizeof(MetaSlot)); - writer.write(&tempCache._buffer[0], tempCache._buffer.size()); - } else { - // Otherwise, just update the content in the write buffer. - memcpy(buffer, &header, sizeof(Header)); - memcpy(buffer + sizeof(Header), - &writeSlots[0], writeSlots.size() * sizeof(MetaSlot)); - } - - writer.flush(); - data->_firstHeaderBytes.swap(tempCache._buffer); - int64_t sizeDiff = 0; - if (file.getFormatSpecificData() != 0) { - sizeDiff = ioBuf.getFileInfo().getFileSize(); - } - sizeDiff = static_cast<int64_t>(data->getFileSize()) - sizeDiff; - - //file.setFormatSpecificData(MemFile::FormatSpecificData::UP(data.release())); - ioBuf.setFileInfo(std::move(data)); - file.setCurrentVersion(TRADITIONAL_SLOTFILE); - newFile->close(); - vespalib::rename(newPath, oldSpec.getPath()); - - ioBuf.getFileHandle().open( - ioBuf.getFileHandle().getFlags(), - true); - - // Update partitionmonitor with size usage. - PartitionMonitor* partitionMonitor( - file.getFile().getDirectory().getPartition().getMonitor()); - if (partitionMonitor == 0) { - // Only report if monitor exist. - } else if (sizeDiff > 0) { - partitionMonitor->addingData(static_cast<uint32_t>(sizeDiff)); - } else if (sizeDiff < 0) { - partitionMonitor->removingData(static_cast<uint32_t>(-1 * sizeDiff)); - } - MapperSlotOperation::clearFlag(file, SLOTS_ALTERED); - - writeMetrics.metaLatency.addValue(metaWriteTimer.getElapsedTimeAsDouble()); - writeMetrics.totalLatency.addValue(totalWriteTimer.getElapsedTimeAsDouble()); - writeMetrics.metaSize.addValue(sizeof(MetaSlot) * header._metaDataListSize); -} - -bool -MemFileV1Serializer::verify(MemFile& file, Environment& env, - std::ostream& reportStream, - bool repairErrors, uint16_t fileVerifyFlags) -{ - MemFileV1Verifier verifier; - SerializationMetrics& metrics(getMetrics().serialization); - framework::MilliSecTimer timer(env._clock); - - bool ok(verifier.verify(file, env, reportStream, repairErrors, fileVerifyFlags)); - - metrics.verifyLatency.addValue(timer.getElapsedTimeAsDouble()); - return ok; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h deleted file mode 100644 index 1f61c114827..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_serializer.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "bufferedfilewriter.h" -#include "versionserializer.h" -#include "fileinfo.h" -#include "simplememfileiobuffer.h" -#include <vespa/memfilepersistence/common/environment.h> -#include <vespa/memfilepersistence/spi/threadmetricprovider.h> - -namespace storage { -namespace memfile { - -class MemFileV1Serializer : public VersionSerializer -{ - ThreadMetricProvider& _metricProvider; - MemFilePersistenceThreadMetrics& getMetrics() { - return _metricProvider.getMetrics(); - } -public: - using UP = std::unique_ptr<MemFileV1Serializer>; - - MemFileV1Serializer(ThreadMetricProvider&); - - FileVersion getFileVersion() override { return TRADITIONAL_SLOTFILE; } - void loadFile(MemFile& file, Environment&, Buffer& buffer, uint64_t bytesRead) override; - - void cacheLocationsForPart(SimpleMemFileIOBuffer& cache, DocumentPart part, uint32_t blockIndex, - const std::vector<DataLocation>& locationsToCache, - const std::vector<DataLocation>& locationsRead, - SimpleMemFileIOBuffer::BufferAllocation& buf); - - void cacheLocations(MemFileIOInterface& cache, Environment& env, const Options& options, - DocumentPart part, const std::vector<DataLocation>& locations) override; - - FlushResult flushUpdatesToFile(MemFile&, Environment&) override; - void rewriteFile(MemFile&, Environment&) override; - bool verify(MemFile&, Environment&, std::ostream& errorReport, - bool repairErrors, uint16_t fileVerifyFlags) override; - - uint64_t read(vespalib::LazyFile& file, char* buf, const std::vector<DataLocation>& readOps); - void ensureFormatSpecificDataSet(const MemFile& file); - uint32_t writeMetaData(BufferedFileWriter& writer, const MemFile& file); - - uint32_t writeAndUpdateLocations( - MemFile& file, - SimpleMemFileIOBuffer& ioBuf, - BufferedFileWriter& writer, - DocumentPart part, - const MemFile::LocationMap& locationsToWrite, - const Environment& env); -}; - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp deleted file mode 100644 index 39eb68315d2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.cpp +++ /dev/null @@ -1,694 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfile_v1_verifier.h" -#include "memfilemapper.h" -#include "simplememfileiobuffer.h" -#include <vespa/storageframework/generic/clock/timer.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/stllike/hash_set.hpp> -#include <sstream> -#include <algorithm> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfilev1.verifier"); - -namespace storage::memfile { - -namespace { - -void alignUp(uint32_t& value, uint32_t offset = 0, uint32_t block = 512) { - uint32_t blocks = (value + offset + block - 1) / block; - value = blocks * block - offset; -} - -struct TimestampSlotOrder - : public std::binary_function<MetaSlot*, MetaSlot*, bool> -{ - bool operator()(const MetaSlot* slot1, - const MetaSlot* slot2) const - { - return (slot1->_timestamp < slot2->_timestamp); - } -}; - -struct HeaderSlotOrder - : public std::binary_function<MetaSlot*, - MetaSlot*, bool> -{ - bool operator()(const MetaSlot* slot1, - const MetaSlot* slot2) const - { - if (slot1->_headerPos == slot2->_headerPos) { - return (slot1->_headerSize < slot2->_headerSize); - } - return (slot1->_headerPos < slot2->_headerPos); - } -}; - -struct BodySlotOrder - : public std::binary_function<MetaSlot*, - MetaSlot*, bool> -{ - bool operator()(const MetaSlot* slot1, - const MetaSlot* slot2) const - { - if (slot1->_bodyPos == slot2->_bodyPos) { - return (slot1->_bodySize < slot2->_bodySize); - } - return (slot1->_bodyPos < slot2->_bodyPos); - } -}; - -uint32_t calculateChecksum(const void* pos, uint32_t size) { - vespalib::crc_32_type calculator; - calculator.process_bytes(pos, size); - return calculator.checksum(); -} - -template<typename T> -bool verifyBodyBlock(const T& id, vespalib::asciistream & error, - const char* data, uint32_t size) -{ - uint32_t bodyLen = size - sizeof(uint32_t); - const char* bodyCrcPos = data + bodyLen; - const uint32_t bodyCrc = *reinterpret_cast<const uint32_t*>(bodyCrcPos); - uint32_t calculatedChecksum = calculateChecksum(data, bodyLen); - if (calculatedChecksum != bodyCrc) { - error << "Body checksum mismatch for " << id - << ": Stored checksum is 0x" << std::hex << bodyCrc - << " while calculated one is 0x" << calculatedChecksum << "."; - return false; - } - return true; -} - -template<typename T> -bool verifyHeaderBlock(const T& id, vespalib::asciistream & error, - const char* data, uint32_t size, - Types::DocumentId* documentId = 0) -{ - if (size <= 3 * sizeof(uint32_t)) { - error << "Error in header for " << id << ": " << size << " byte " - << "header is too small to contain required data."; - return false; - } - const char* nameCrcPos = data + size - sizeof(uint32_t); - const uint32_t nameCrc = *reinterpret_cast<const uint32_t*>(nameCrcPos); - const char* nameLenPos = nameCrcPos - sizeof(uint32_t); - const uint32_t nameLen = *reinterpret_cast<const uint32_t*>(nameLenPos); - if (size < 3 * sizeof(uint32_t) + nameLen) { - error << "Error in header for " << id << ": " << size << " byte " - << "header is not big enough to contain a document " - << "identifier " << nameLen << " bytes long."; - return false; - } - const char *namePos = nameLenPos - nameLen; - uint32_t calculatedNameCrc( - calculateChecksum(namePos, nameLen + sizeof(uint32_t))); - if (calculatedNameCrc != nameCrc) { - error << "Document identifier checksum mismatch for " << id - << ": Stored checksum is 0x" << std::hex << nameCrc - << " while calculated one is 0x" << calculatedNameCrc << "."; - return false; - } - const char* blobCrcPos = namePos - sizeof(uint32_t); - const uint32_t blobCrc = *reinterpret_cast<const uint32_t*>(blobCrcPos); - uint32_t blobLen = size - nameLen - 3 * sizeof(uint32_t); - uint32_t calculatedChecksum = calculateChecksum(data, blobLen); - if (calculatedChecksum != blobCrc) { - error << "Header checksum mismatch for " << id - << ": Stored checksum is 0x" << std::hex << blobCrc - << " while calculated one is 0x" << calculatedChecksum << "."; - return false; - } - if (documentId != 0) { - *documentId = Types::DocumentId(Types::String(namePos, nameLen)); - } - return true; -} - -} - -// Utility classes for simplifying creating report from verify -struct MemFileV1Verifier::ReportCreator { - bool _ok; - const MemFile& _file; - std::ostream& _report; - - ReportCreator(const MemFile& file, std::ostream& out) - : _ok(true), _file(file), _report(out) {} - - void addMessage(const std::string& msg) { - LOG(warning, "verify(%s): %s", - _file.getFile().getPath().c_str(), msg.c_str()); - _report << msg << "\n"; - _ok = false; - } -}; - -namespace { - struct ReportMessage { - MemFileV1Verifier::ReportCreator& _report; - mutable std::ostringstream _ost; - - ReportMessage(MemFileV1Verifier::ReportCreator& rc) - : _report(rc), _ost() {} - ~ReportMessage() { - _report.addMessage(_ost.str()); - } - // Copy constructor must exist for compiler not to complain - ReportMessage(const ReportMessage& o) : _report(o._report), _ost() {} - }; - - std::ostream& getReportStream(const ReportMessage& m) { return m._ost; } -} - -#define REPORT(report) getReportStream(ReportMessage(report)) - -bool -MemFileV1Verifier::verifyBlock(Types::DocumentPart part, - uint32_t id, - vespalib::asciistream & error, - const char* data, uint32_t size) -{ - return (part == Types::HEADER - ? verifyHeaderBlock(id, error, data, size) - : verifyBodyBlock(id, error, data, size)); -} - -const Header* -MemFileV1Verifier::verifyHeader(ReportCreator& report, - const Buffer& buffer, size_t fileSize) const -{ - const Header& header = *reinterpret_cast<const Header*>(buffer.getBuffer()); - if (header._checksum != header.calcHeaderChecksum()) { - REPORT(report) << "Header checksum mismatch. Was " << std::hex - << header.calcHeaderChecksum() << ", stored " - << header._checksum; - return 0; - } - FileInfo data(header, fileSize); - if (data.getBodyBlockStartIndex() > fileSize) { - REPORT(report) << "Header indicates file is bigger than it physically " - << "is. File size is " << fileSize << " bytes, but " - << "header reports that it contains " - << header._metaDataListSize - << " meta data entries and a headerblock of " - << header._headerBlockSize << " bytes, thus the minimum " - << "file size is " - << (header._metaDataListSize * sizeof(MetaSlot) - + sizeof(Header) + header._headerBlockSize); - return 0; - } - return &header; -} - -bool -MemFileV1Verifier::verifyDocumentBody( - ReportCreator& report, const MetaSlot& slot, const Buffer& buffer, - uint32_t blockIndex, uint32_t blockSize) const -{ - if (slot._bodySize == 0) return true; - if (slot._bodyPos > blockSize || - slot._bodyPos + slot._bodySize > blockSize || - slot._bodyPos + slot._bodySize < slot._bodyPos) - { - REPORT(report) << slot << " has body size/pos not contained within " - << "body block of size " << blockSize << "."; - return false; - } - if (slot._bodySize <= sizeof(uint32_t)) { - REPORT(report) << slot << " body is not big enough to possibly " - << "contain a body."; - return false; - } - vespalib::asciistream error; - if (!verifyBodyBlock(slot, error, - buffer.getBuffer() + blockIndex + slot._bodyPos, - slot._bodySize)) - { - REPORT(report) << error.str(); - return false; - } - return true; -} - -void -MemFileV1Verifier::verifyMetaDataBlock( - ReportCreator& report, const Buffer& buffer, - const Header& header, const BucketInfo& info, - std::vector<const MetaSlot*>& slots) const -{ - assert(slots.size() == 0); - slots.reserve(header._metaDataListSize); - Timestamp lastTimestamp(0); - bool foundNotInUse = false; - bool foundUsedAfterUnused = false; - bool wrongOrder = false; - for (uint32_t i=0, n=header._metaDataListSize; i<n; ++i) { - const MetaSlot& slot(*reinterpret_cast<const MetaSlot*>( - buffer.getBuffer() + sizeof(Header) + i * sizeof(MetaSlot))); - if (slot._checksum != slot.calcSlotChecksum()) { - REPORT(report) << "Slot " << i << " at timestamp " - << slot._timestamp << " failed checksum " - << "verification. Was " << std::hex - << slot.calcSlotChecksum() - << ", stored " << slot._checksum; - continue; - } - if (!slot.inUse()) { - foundNotInUse = true; - continue; - } - if (foundNotInUse) { - if (!foundUsedAfterUnused) { - REPORT(report) << "Slot " << i << " found after unused entries"; - } - foundUsedAfterUnused = true; - } - // Handle timestamp collisions later - if (slot._timestamp < lastTimestamp) { - wrongOrder = true; - REPORT(report) << "Slot " << i << " is out of timestamp order. (" - << slot._timestamp << " <= " << lastTimestamp - << ")"; - } - slots.push_back(&slot); - lastTimestamp = slot._timestamp; - } - if (info.getChecksum() != header._fileChecksum) { - REPORT(report) << "File checksum should have been 0x" << std::hex - << info.getChecksum() << " according to metadata found, but is set " - << "to 0x" << header._fileChecksum << "."; - } - if (wrongOrder) { - std::sort(slots.begin(), slots.end(), TimestampSlotOrder()); - } -} - -void -MemFileV1Verifier::verifyInBounds( - ReportCreator& report, const Header& header, bool doHeader, - const FileInfo& data, std::vector<const MetaSlot*>& slots) const -{ - // Gather all information different for header and body parts, - // to avoid differences further down. - uint32_t blockSize = (doHeader ? header._headerBlockSize - : data._bodyBlockSize); - uint32_t minSize = (doHeader ? 3*sizeof(uint32_t) : 0); - std::string part(doHeader ? "Header" : "Body"); - std::vector<const MetaSlot*> okSlots; - okSlots.reserve(slots.size()); - // Go through all slots ordered, and remove illegal ones. - for (uint32_t i=0, n=slots.size(); i<n; ++i) { - uint32_t pos(doHeader ? slots[i]->_headerPos : slots[i]->_bodyPos); - uint32_t size(doHeader ? slots[i]->_headerSize : slots[i]->_bodySize); - if (size < minSize) { - REPORT(report) << part << " of slot (" << *slots[i] << ") " - << "is too small to be valid"; - } else if (size != 0 && - (pos >= blockSize || pos + size > blockSize || - pos + size < pos)) // 3 checks as + can overflow - { - REPORT(report) << part << " of slot (" << *slots[i] << ") goes out " - << "of bounds. (Blocksize " << blockSize << ")"; - } else if (size == 0 && pos != 0) { - REPORT(report) << part << " of slot (" << *slots[i] << ") " - << "has size 0 but is not positioned at pos 0 " - << "as zero sized blocks should be"; - } else { - okSlots.push_back(slots[i]); - } - } - okSlots.swap(slots); -} - -void -MemFileV1Verifier::verifyDataBlock( - ReportCreator& report, Environment& env, const Buffer& buffer, - const FileInfo& data, const BucketId& bucket, - std::vector<const MetaSlot*>& slots, bool doHeader) const -{ - std::vector<const MetaSlot*> okSlots; - okSlots.reserve(slots.size()); - for (uint32_t i=0, n=slots.size(); i<n; ++i) { - if (!doHeader && slots[i]->_bodySize == 0) { - okSlots.push_back(slots[i]); - continue; - } - if (doHeader) { - DocumentId id; - if (!verifyDocumentHeader(report, *slots[i], buffer, id, - data.getHeaderBlockStartIndex(), - data._headerBlockSize)) - { - continue; - } - BucketId foundBucket(env._bucketFactory.getBucketId(id)); - foundBucket.setUsedBits(bucket.getUsedBits()); - foundBucket = foundBucket.stripUnused(); - if (id.getGlobalId() != slots[i]->_gid) { - REPORT(report) << *slots[i] - << " has gid " << slots[i]->_gid.toString() - << " but its header block contains document id " - << id << " with " << id.getGlobalId().toString(); - } - else if (bucket == foundBucket) { - okSlots.push_back(slots[i]); - } else { - REPORT(report) << "Slot " << *slots[i] - << " belongs to bucket " << foundBucket - << " not in bucket " << bucket; - } - } else { - if (!verifyDocumentBody(report, *slots[i], buffer, - data.getBodyBlockStartIndex(), - data._bodyBlockSize)) - { - continue; - } - okSlots.push_back(slots[i]); - } - } - slots.swap(okSlots); -} - -bool -MemFileV1Verifier::verifyDocumentHeader( - ReportCreator& report, const MetaSlot& slot, const Buffer& buffer, - DocumentId& did, uint32_t blockIndex, uint32_t blockSize) const -{ - if (slot._headerPos > blockSize || - slot._headerPos + slot._headerSize > blockSize || - slot._headerPos + slot._headerSize < slot._headerPos) - { - REPORT(report) << slot << " has header size/pos not contained within " - << "header block of size " << blockSize << "."; - return false; - } - vespalib::asciistream error; - if (!verifyHeaderBlock(slot, error, - buffer.getBuffer() + blockIndex + slot._headerPos, - slot._headerSize, &did)) - { - REPORT(report) << error.str(); - return false; - } - return true; -} - -namespace { -// Helper function for verifyNonOverlap - void verifySlotsAtSamePosition( - MemFileV1Verifier::ReportCreator& report, - bool header, - std::vector<const MetaSlot*>& slots, - vespalib::hash_set<const MetaSlot*, - vespalib::hash<void *> >& faultySlots) - { - const Types::GlobalId& gid(slots[0]->_gid); - for (uint32_t i=1; i<slots.size(); ++i) { - if (slots[i]->_gid != gid) { - REPORT(report) << "Multiple slots with different gids use same " - << (header ? "header" : "body") - << " position. For instance slot " - << *slots[0] << " and " << *slots[i] - << ". Repairing will delete all " << slots.size() - << " slots using this position, as we don't " - << "know who is correct."; - for (uint32_t j=0; j<slots.size(); ++j) { - faultySlots.insert(slots[j]); - } - break; - } - } - } -} - -void -MemFileV1Verifier::verifyNonOverlap( - ReportCreator& report, bool doHeader, - std::vector<const MetaSlot*>& slots) const -{ - // Gather all information different for header and body parts, - // to avoid differences further down. - std::string part(doHeader ? "Header" : "Body"); - std::vector<const MetaSlot*> order(slots); - // Using stable sort to sort slots, makes slots in same position - // keep timestamp order. (Thus we can use that if we want to remove - // oldest or newest illegally at same timestamp) - if (doHeader) { - std::stable_sort(order.begin(), order.end(), HeaderSlotOrder()); - } else { - std::stable_sort(order.begin(), order.end(), BodySlotOrder()); - } - // Temporary store slots that need to be removed - vespalib::hash_set<const MetaSlot*, vespalib::hash<void *> > failedSlots; - // Slots that points to the same area within a block. - std::vector<const MetaSlot*> local; - uint32_t lastPos = 0, lastSize = 0; - // Go through all slots ordered, and remove illegal ones. - for (uint32_t i=0, n=order.size(); i<n; ++i) { - uint32_t pos(doHeader ? order[i]->_headerPos : order[i]->_bodyPos); - uint32_t size(doHeader ? order[i]->_headerSize : order[i]->_bodySize); - if (size == 0) { - // Ignore zero sized entries - } else if (pos == lastPos && size == lastSize) { - local.push_back(order[i]); - } else if (pos < lastPos + lastSize) { - std::ostringstream ost; - if (!local.empty()) { - for (uint32_t j=0; j<local.size(); ++j) { - failedSlots.insert(local[j]); - if (j != 0) ost << ", "; - ost << *local[j]; - } - } - failedSlots.insert(order[i]); - if (local.empty()) { - REPORT(report) << part << " of slot(" << *order[i] << ") " - << "overlaps with previously removed slots."; - } else { - REPORT(report) << part << " of slot (" << *order[i] << ") " - << "overlaps with " - << (local.size() == 1 ? "slot" - : "the following slots") - << " " << ost.str() << "."; - } - local.clear(); - lastPos = pos; - lastSize = size; - } else { - if (local.size() > 1) { - verifySlotsAtSamePosition(report, doHeader, local, failedSlots); - } - local.clear(); - local.push_back(order[i]); - lastPos = pos; - lastSize = size; - } - } - if (local.size() > 1) { - verifySlotsAtSamePosition(report, doHeader, local, failedSlots); - } - if (failedSlots.size() == 0) return; - std::vector<const MetaSlot*> okSlots; - okSlots.reserve(slots.size() - failedSlots.size()); - for (uint32_t i=0, n=slots.size(); i<n; ++i) { - if (failedSlots.find(slots[i]) == failedSlots.end()) { - okSlots.push_back(slots[i]); - } - } - okSlots.swap(slots); -} - - - -bool -MemFileV1Verifier::verify(MemFile& file, Environment& env, - std::ostream& reportStream, - bool repairErrors, uint16_t fileVerifyFlags) -{ - bool verifyHeaderData = ((fileVerifyFlags & DONT_VERIFY_HEADER) == 0); - bool verifyBodyData = ((fileVerifyFlags & DONT_VERIFY_BODY) == 0); - - LOG(debug, "verify(%s%s%s%s)", - file.getFile().toString().c_str(), - repairErrors ? ", repairing errors" : "", - verifyHeaderData ? ", verifying header block" : "", - verifyBodyData ? ", verifying body block" : ""); - - SimpleMemFileIOBuffer& ioBuf( - static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); - - framework::MilliSecTimer startTimer(env._clock); - ReportCreator report(file, reportStream); - file.verifyConsistent(); - if (!file.fileExists()) return report._ok; - - // First read at least the header from disk - size_t fileSize = ioBuf.getFileHandle().getFileSize(); - if (fileSize < sizeof(Header)) { - REPORT(report) << "File was only " << fileSize - << " B long and cannot be valid. Delete file to repair."; - if (repairErrors) { - env._memFileMapper.deleteFile(file, env); - } - return report._ok; - } - const size_t initialIndexRead( - env.acquireConfigReadLock().options()->_initialIndexRead); - Buffer buffer(std::min(fileSize, initialIndexRead)); - size_t readBytes = ioBuf.getFileHandle().read(buffer, buffer.getSize(), 0); - - // Exception should have been thrown by read if mismatch here. - assert(readBytes == buffer.getSize()); - - // Ensure slotfile header is ok. If not just delete whole file. - const Header* header = verifyHeader(report, buffer, fileSize); - if (header == 0) { - if (repairErrors) { - env._memFileMapper.deleteFile(file, env); - } - return report._ok; - } - - FileInfo data(*header, fileSize); - - // Read remaining data needed in check, if any - size_t lastNeededByte = sizeof(Header) - + sizeof(MetaSlot) * header->_metaDataListSize; - if (verifyBodyData) { - lastNeededByte = fileSize; - } else if (verifyHeaderData) { - lastNeededByte += header->_headerBlockSize; - } - if (buffer.getSize() < lastNeededByte) { - buffer.resize(lastNeededByte); - header = reinterpret_cast<const Header*>(buffer.getBuffer()); - } - if (lastNeededByte > readBytes) { - readBytes += ioBuf.getFileHandle().read( - buffer + readBytes, buffer.getSize() - readBytes, readBytes); - } - - // Exception should have been thrown by read if mismatch here. - assert(readBytes == buffer.getSize()); - - // Build list of slots. Do simple checking. - std::vector<const MetaSlot*> slots; - verifyMetaDataBlock(report, buffer, *header, file.getBucketInfo(), slots); - verifyInBounds(report, *header, true, data, slots); - verifyInBounds(report, *header, false, data, slots); - - // Check header and body blocks if wanted - if (verifyHeaderData) { - verifyDataBlock(report, env, buffer, data, file.getFile().getBucketId(), - slots, true); - } - if (verifyBodyData) { - verifyDataBlock(report, env, buffer, data, file.getFile().getBucketId(), - slots, false); - } - // Check for overlapping slots last, in case only one of the slots - // actually overlapped pointed to a legal document, we may have - // already removed the problem. - verifyNonOverlap(report, true, slots); - verifyNonOverlap(report, false, slots); - verifyUniqueTimestamps(report, slots); - // If the slotlist is altered from what we read from disk, we need - // to write it back if we're gonna repair the errors. - if (!report._ok && repairErrors) { - // Remove bad entries from the memfile instance - // Entries that are cached in full may be removed from file and just - // tagged not in file anymore in cache. - std::vector<Timestamp> keep; - for (uint32_t i=0; i<slots.size(); ++i) { - keep.push_back(slots[i]->_timestamp); - } - env._memFileMapper.removeAllSlotsExcept( - const_cast<MemFile&>(file), keep); - - // Edit header and metadata part of buffer to only keep wanted data - // Since both source and target is the same buffer, create new meta - // data in new buffer and memcpy back afterwards - Buffer metaData(header->_metaDataListSize * sizeof(MetaSlot)); - BucketInfo info(file.getBucketInfo()); - const_cast<Header*>(header)->_fileChecksum = info.getChecksum(); - for (uint32_t i=0; i<header->_metaDataListSize; ++i) { - MetaSlot* slot(reinterpret_cast<MetaSlot*>( - metaData.getBuffer() + i * sizeof(MetaSlot))); - if (i >= slots.size()) { - *slot = MetaSlot(); - } else if (slot != slots[i]) { - *slot = *slots[i]; - } - } - memcpy(buffer.getBuffer() + sizeof(Header), metaData.getBuffer(), - metaData.getSize()); - // Then rewrite metadata section to disk leaving out bad entries - uint32_t dataToWrite(sizeof(Header) - + sizeof(MetaSlot) * header->_metaDataListSize); - alignUp(dataToWrite); - ioBuf.getFileHandle().write(buffer, dataToWrite, 0); - - // Tag memfile up to date - uint32_t memFileFlags = FILE_EXIST - | HEADER_BLOCK_READ - | BODY_BLOCK_READ; - for (MemFile::const_iterator it = file.begin(ITERATE_REMOVED); - it != file.end(); ++it) - { - if (!ioBuf.isCached(it->getLocation(BODY), BODY)) { - memFileFlags &= ~BODY_BLOCK_READ; - } - if (!ioBuf.isCached(it->getLocation(HEADER), HEADER)) { - memFileFlags &= ~HEADER_BLOCK_READ; - } - - if (!ioBuf.isPersisted(it->getLocation(BODY), BODY) - || !ioBuf.isPersisted(it->getLocation(HEADER), HEADER)) - { - memFileFlags |= SLOTS_ALTERED; - } - - if (it->alteredInMemory()) { - memFileFlags |= SLOTS_ALTERED; - } - } - assert(file.fileExists()); - const_cast<MemFile&>(file).clearFlag(LEGAL_MEMFILE_FLAGS); - const_cast<MemFile&>(file).setFlag(memFileFlags); - LOG(warning, "verify(%s): Errors repaired", file.toString().c_str()); - } else if (report._ok) { - LOG(debug, "verify(%s): Ok", file.toString().c_str()); - } else { - LOG(debug, "verify(%s): Not repairing errors", file.toString().c_str()); - } - -// env._metrics.slotfileMetrics._verifyLatencyTotal.addValue(startTimer); - return report._ok; -} - -void -MemFileV1Verifier::verifyUniqueTimestamps( - ReportCreator& report, std::vector<const MetaSlot*>& slots) const -{ - std::vector<const MetaSlot*> okSlots; - okSlots.reserve(slots.size()); - // Slots should already be in order as verifyMetaDataBlock has run - Timestamp last(0); - for (uint32_t i=0, n=slots.size(); i<n; ++i) { - if (slots[i]->_timestamp == last && i != 0) { - REPORT(report) << "Slot " << i << " (" << *slots[i] - << ") has same timestamp as slot " << (i-1) - << " (" << *slots[i-1] << ")."; - } else { - okSlots.push_back(slots[i]); - last = slots[i]->_timestamp; - } - } - okSlots.swap(slots); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h deleted file mode 100644 index 158646af3eb..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfile_v1_verifier.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/memfilepersistence/mapper/fileinfo.h> -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/memfilepersistence/common/environment.h> - -namespace storage { - - -namespace memfile { - -class MemFile; -class Environment; -class Buffer; - -class MemFileV1Verifier : public Types -{ -public: - bool verify(MemFile&, - Environment&, - std::ostream& errorReport, - bool repairErrors, - uint16_t fileVerifyFlags); - - bool verifyBlock(Types::DocumentPart part, - uint32_t id, - vespalib::asciistream & error, - const char* data, - uint32_t size); - - - class ReportCreator; - -private: - const Header* verifyHeader(ReportCreator& report, - const Buffer& buffer, - size_t fileSize) const; - - void verifyMetaDataBlock(ReportCreator& report, - const Buffer& buffer, - const Header& header, - const BucketInfo& info, - std::vector<const MetaSlot*>& slots) const; - - void verifyInBounds(ReportCreator& report, - const Header& header, - bool doHeader, - const FileInfo& data, - std::vector<const MetaSlot*>& slots) const; - - void verifyDataBlock(ReportCreator& report, - Environment& env, - const Buffer& buffer, - const FileInfo& data, - const BucketId& bucket, - std::vector<const MetaSlot*>& slots, - bool doHeader) const; - - void verifyNonOverlap(ReportCreator& report, - bool doHeader, - std::vector<const MetaSlot*>& slots) const; - - bool verifyDocumentHeader(ReportCreator& report, - const MetaSlot& slot, - const Buffer& buffer, - DocumentId& did, - uint32_t blockIndex, - uint32_t blockSize) const; - - bool verifyDocumentBody(ReportCreator& report, - const MetaSlot& slot, - const Buffer& buffer, - uint32_t blockIndex, - uint32_t blockSize) const; - - void verifyUniqueTimestamps(ReportCreator& report, - std::vector<const MetaSlot*>& slots) const; -}; - -} - -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp deleted file mode 100644 index 71d6a6b6fcf..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.cpp +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfilemapper.h" -#include "memfile_v1_serializer.h" -#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> -#include <vespa/memfilepersistence/common/exceptions.h> -#include <vespa/vdslib/distribution/distribution.h> -#include <vespa/storageframework/generic/clock/timer.h> -#include <sstream> -#include <algorithm> - -#include <vespa/log/bufferedlogger.h> -LOG_SETUP(".persistence.memfile.mapper"); - -namespace storage::memfile { - -// Repair defined in macro, such that log entries will be unique for the various -// instances calling it (different file line numbers) -#define VESPA_REPAIR_MEMFILE(file) \ -{ \ - std::ostringstream memFileErrors; \ - bool memFileRepairResult = repair(file, env, memFileErrors); \ - if (!memFileRepairResult) { \ - LOG(warning, "Repaired %s: %s", \ - file.toString().c_str(), memFileErrors.str().c_str()); \ - sendNotifyBucketCommand(file, env); \ - } else { \ - LOGBP(warning, "Repair for %s triggered but found nothing to repair.", \ - file.toString().c_str()); \ - } \ -} - -// To avoid duplicating code, this macro is used when autoRepair is on, and -// call itself with autorepair off, handling the autorepair. -#define VESPA_HANDLE_AUTOREPAIR(file, func) { \ - try{ \ - func; \ - } catch (CorruptMemFileException& e) { \ - LOGBP(warning, "Corrupt file %s: %s", \ - file.toString().c_str(), e.what()); \ - VESPA_REPAIR_MEMFILE(file); \ - func; \ - } \ - return; \ -} - -void -MemFileMapper::sendNotifyBucketCommand(const MemFile&, - Environment&) -{ -/* TODO: Move to service layer. - BucketInfo info(file.getBucketInfo()); - // Send notify bucket change command to update distributor - api::NotifyBucketChangeCommand::SP msg( - new api::NotifyBucketChangeCommand(file.getFile().getBucketId(), - info)); - uint16_t distributor( - env._storageServer.getDistribution()->getIdealDistributorNode( - *env._storageServer.getStateUpdater().getSystemState(), - file.getFile().getBucketId())); - msg->setAddress(api::StorageMessageAddress( - env._storageServer.getClusterName(), - lib::NodeType::DISTRIBUTOR, - distributor)); - msg->setSourceIndex(env._nodeIndex); - env._fileStorHandler.sendCommand(msg); -*/ -} - -void -MemFileMapper::addVersionSerializer(VersionSerializer::UP serializer) -{ - FileVersion version = serializer->getFileVersion(); - if (_serializers.find(version) != _serializers.end()) { - std::ostringstream error; - error << "A serializer for version " << version - << " is already registered."; - throw vespalib::IllegalStateException(error.str(), VESPA_STRLOC); - } - _serializers[version] = std::move(serializer); -} - -VersionSerializer& -MemFileMapper::getVersionSerializer(const MemFile& file) -{ - std::map<FileVersion, VersionSerializer::UP>::iterator it( - _serializers.find(file.getCurrentVersion())); - if (it == _serializers.end()) { - std::ostringstream ost; - ost << "Unknown serialization version " - << getFileVersionName(file.getCurrentVersion()) - << " (" << file.getCurrentVersion() << ")\n"; - throw CorruptMemFileException(ost.str(), file.getFile(), VESPA_STRLOC); - } - return *it->second; -} - -MemFileMapper::MemFileMapper(ThreadMetricProvider& metricProvider) - : _metricProvider(metricProvider) -{ - addVersionSerializer(VersionSerializer::UP(new MemFileV1Serializer(metricProvider))); -} - -void -MemFileMapper::setDefaultMemFileIO(MemFile& file, - vespalib::LazyFile::UP lf, - const Environment& env) -{ - std::map<FileVersion, VersionSerializer::UP>::iterator serializer( - _serializers.find(file.getFile().getWantedFileVersion())); - assert(serializer != _serializers.end()); - - file.setMemFileIO( - std::unique_ptr<MemFileIOInterface>( - new SimpleMemFileIOBuffer( - *serializer->second, - std::move(lf), - FileInfo::UP(new FileInfo()), - file.getFile(), - env))); -} - -void -MemFileMapper::loadFileImpl(MemFile& file, Environment& env) -{ - framework::MilliSecTimer timer(env._clock); - - if (file.getSlotCount() != 0 || file.getCurrentVersion() != UNKNOWN) { - throw InvalidStateException("File is already loaded", file.getFile(), - VESPA_STRLOC); - } - - vespalib::LazyFile::UP f = env.createFile(file.getFile().getPath()); - vespalib::LazyFile* lf = f.get(); - - setDefaultMemFileIO(file, std::move(f), env); - - // Early exit for file not found to avoid having to use - // exception for common control path - if (!vespalib::fileExists(file.getFile().getPath())) { - LOG(debug, "Cannot load file '%s' as it does not exist", - file.getFile().getPath().c_str()); - file.setFlag(HEADER_BLOCK_READ | BODY_BLOCK_READ); - return; - } - file.setFlag(FILE_EXIST); - - Buffer buffer(env.acquireConfigReadLock().options()->_initialIndexRead); - off_t readBytes = lf->read(buffer, buffer.getSize(), 0); - - if (readBytes < 4) { - std::ostringstream err; - err << "Only " << readBytes << " bytes read from file. Not enough to " - << "get a file version."; - throw CorruptMemFileException(err.str(), file.getFile(), VESPA_STRLOC); - } - SerializationMetrics& metrics(getMetrics().serialization); - metrics.initialMetaReadLatency.addValue(timer.getElapsedTimeAsDouble()); - - file.setFlag(BUCKET_INFO_OUTDATED); - - FileVersion version = static_cast<FileVersion>( - *reinterpret_cast<uint32_t*>(buffer.getBuffer())); - std::map<FileVersion, VersionSerializer::UP>::iterator serializer( - _serializers.find(version)); - file.setCurrentVersion(version); - if (serializer == _serializers.end()) { - std::ostringstream err; - err << "Unknown file version " << std::hex << version; - throw CorruptMemFileException(err.str(), file.getFile(), VESPA_STRLOC); - } - serializer->second->loadFile(file, env, buffer, readBytes); - - metrics.totalLoadFileLatency.addValue(timer.getElapsedTimeAsDouble()); -} - -void -MemFileMapper::loadFile(MemFile& file, Environment& env, bool autoRepair) -{ - try { - loadFileImpl(file, env); - } catch (CorruptMemFileException& e) { - LOGBP(warning, "Corrupt file %s: %s", - file.toString().c_str(), e.what()); - if (autoRepair) { - VESPA_REPAIR_MEMFILE(file); - // Must reset version info, slots etc to avoid getting errors - // that file is already loaded. - file.resetMetaState(); - loadFileImpl(file, env); - } - // Add bucket to set of modified buckets so service layer can request - // new bucket info. - env.addModifiedBucket(file.getFile().getBucketId()); - } -} - -void -MemFileMapper::flush(MemFile& f, Environment& env, bool autoRepair) -{ - (void) autoRepair; - if (f.fileExists()) { - VersionSerializer& serializer(getVersionSerializer(f)); - typedef VersionSerializer::FlushResult FlushResult; - FlushResult result = serializer.flushUpdatesToFile(f, env); - if (result == FlushResult::TooSmall) { - f.compact(); - result = serializer.flushUpdatesToFile(f, env); - } - if (result == FlushResult::ChangesWritten - || result == FlushResult::UnAltered) - { - return; - } - MemFilePersistenceThreadMetrics& metrics(_metricProvider.getMetrics()); - switch (result) { - case FlushResult::TooFewMetaEntries: - metrics.serialization.fullRewritesDueToTooSmallFile.inc(); - break; - case FlushResult::TooSmall: - metrics.serialization.fullRewritesDueToTooSmallFile.inc(); - break; - case FlushResult::TooLarge: - metrics.serialization.fullRewritesDueToDownsizingFile.inc(); - break; - default: - break; - } - } else { - // If a file does not yet exist, its content by definition exists - // entirely in memory. Consequently it costs next to nothing to run - // compaction since there is no need to read any meta/header blocks - // from disk. However, the gains from compacting may be significant if - // the bucket e.g. contains many versions of the same document. - f.compact(); - } - - // If we get here we failed to write updates only and will rewrite - std::map<FileVersion, VersionSerializer::UP>::iterator serializer( - _serializers.find(f.getFile().getWantedFileVersion())); - assert(serializer != _serializers.end()); - - serializer->second->rewriteFile(f, env); -} - -bool -MemFileMapper::verify(MemFile& file, Environment& env, - std::ostream& errorReport, bool repairErrors, - uint16_t fileVerifyFlags) -{ - if (file.fileExists()) { - std::map<FileVersion, VersionSerializer::UP>::iterator serializer( - _serializers.find(file.getCurrentVersion())); - if (serializer != _serializers.end()) { - bool wasOk = serializer->second->verify( - file, env, errorReport, repairErrors, fileVerifyFlags); - if (!wasOk) sendNotifyBucketCommand(file, env); - return wasOk; - } - // If we get here, version is corrupted. Delete file if repairing. - errorReport << "Header read from " << file.getFile().getPath() - << " is of wrong version " - << getFileVersionName(file.getCurrentVersion()) - << "(0x" << std::hex << file.getCurrentVersion() << std::dec - << "). Corrupt file or unsupported format."; - if (repairErrors) { - deleteFile(file, env); - } - sendNotifyBucketCommand(file, env); - return false; - } - return true; -} - -void -MemFileMapper::deleteFile(const MemFile& constFile, Environment& env) -{ - MemFile& file(const_cast<MemFile&>(constFile)); - framework::MilliSecTimer timer(env._clock); - std::vector<Timestamp> keep; - file.clearFlag(FILE_EXIST); - file.setCurrentVersion(UNKNOWN); - - SimpleMemFileIOBuffer& ioBuf( - static_cast<SimpleMemFileIOBuffer&>(file.getMemFileIO())); - - uint32_t fileSize = ioBuf.getFileHandle().getFileSize(); - ioBuf.getFileHandle().unlink(); - - // Indicate we get free space to partition monitor - PartitionMonitor& partitionMonitor( - *constFile.getFile().getDirectory().getPartition().getMonitor()); - partitionMonitor.removingData(fileSize); - getMetrics().serialization.deleteFileLatency.addValue( - timer.getElapsedTimeAsDouble()); -} - -void -MemFileMapper::removeAllSlotsExcept(MemFile& file, std::vector<Timestamp>& keep) -{ - std::vector<const MemSlot*> slotsToRemove; - MemFile::const_iterator orgIt(file.begin(ITERATE_REMOVED)); - std::vector<Timestamp>::reverse_iterator keepIt(keep.rbegin()); - - // Linear merge of vectors to extract inverse set of `keep`; these will - // be the slots we should remove. The output of this is pretty much what - // std::set_symmetric_difference would've given us, but can't use that - // algorithm directly due to our non-implicitly convertible mixing of - // iterator value types. - // Note that iterator ranges are sorted in _descending_ order. - while (orgIt != file.end()) { - if (keepIt == keep.rend() || orgIt->getTimestamp() > *keepIt) { - slotsToRemove.push_back(&*orgIt); - ++orgIt; - } else if (orgIt->getTimestamp() == *keepIt) { - ++orgIt; - ++keepIt; - } else { - // The case where the verifier knows of a slot that the MemFile - // does not _may_ happen in the case of corruptions causing apparent - // timestamp collisions. In this case, sending in timestamps to - // keep could lead to ambiguities, but in general we can assume that - // one of the slots will be removed before this due to a mismatching - // checksum. - LOG(warning, - "Verifier code requested to keep slot at time %zu in " - "file %s, but that slot does not exist in the internal state. " - "Assuming this is due to corruption which will be fixed " - "automatically.", - keepIt->getTime(), - file.getFile().getPath().c_str()); - ++keepIt; - } - } - std::reverse(slotsToRemove.begin(), slotsToRemove.end()); - file.removeSlots(slotsToRemove); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h deleted file mode 100644 index a04ee462fce..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/memfilemapper.h +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::MemFileMapper - * \ingroup memfile - * - * \brief Maps memory representation of files to and from physical files. - * - * The mapper can map to and from all file formats supported. It keeps track - * of all possible formats and call the implementation of these as needed. This - * global class is needed such that files can seemlessly change file format when - * one wants to start using another than one used before. - * - * Note that there will be one MemFileMapper instance per disk thread, such that - * the mapper doesn't have to worry about being threadsafe with multiple - * threads calling it at the same time. - */ - -#pragma once - -#include "versionserializer.h" -#include <vespa/memfilepersistence/spi/threadmetricprovider.h> - -namespace storage { -namespace memfile { - -class MemFileMapper : private Types { -private: - std::map<FileVersion, VersionSerializer::UP> _serializers; - ThreadMetricProvider& _metricProvider; - void setDefaultMemFileIO(MemFile& file, - vespalib::LazyFile::UP lf, - const Environment& env); - -public: - MemFileMapper(ThreadMetricProvider&); - - /** - * Initialize a MemFile entry with the data found in corresponding file. - * This sets: - * - Flag whether file exist or not. - * - If file exist, sets header data in file, such as: - * - File version - * - Meta entry count - * - Header block size - * - Body block size - * - File checksum - */ - void loadFile(MemFile&, Environment&, bool autoRepair = true); - - /** - * Flushes all content in MemFile that is not already persisted to disk. - * This might require a rewrite of the file, if the size of the file need - * to change. Flush updates the following in the MemFile: - * - Updates state saying all is persisted. - * - If file was rewritten and was in unwanted version, file version may - * have changed to wanted version. - * - Sizes of blocks in the file may have changed. - * - Rewrite file if changes would leave the file too empty. (Thus, - * memfile given might not be dirty but still a write may be needed) - */ - void flush(MemFile&, Environment&, bool autoRepair = true); - - /** - * Verify that file is not corrupt. - * @return True if file is fine. - */ - bool verify(MemFile& file, Environment& env, - std::ostream& errorReport, uint16_t fileVerifyFlags = 0) - { return verify(file, env, errorReport, false, fileVerifyFlags); } - - /** - * Verify that file is not corrupt and repair it if it is. - * @return True if file was fine. False if any errors were fixed. - */ - bool repair(MemFile& file, Environment& env, - std::ostream& errorReport, uint16_t fileVerifyFlags = 0) - { return verify(file, env, errorReport, true, fileVerifyFlags); } - - /** - * Utility functions used by verify to remove data from memfile that is no - * longer pointing to valid data. - */ - void deleteFile(const MemFile& file, Environment& env); - void removeAllSlotsExcept(MemFile& file, std::vector<Timestamp>& keep); - -private: - void addVersionSerializer(VersionSerializer::UP); - VersionSerializer& getVersionSerializer(const MemFile& file); - - void loadFileImpl(MemFile&, Environment&); - - /** - * Check file for errors, generate report of errors. Fix if repairErrors - * is set. Returns true if no failures were found. - */ - bool verify(MemFile& file, Environment&, - std::ostream& errorReport, bool repairErrors, - uint16_t fileVerifyFlags); - - MemFilePersistenceThreadMetrics& getMetrics() const { - return _metricProvider.getMetrics(); - } - - void sendNotifyBucketCommand(const MemFile&, Environment&); -}; - -} // storage -} // memfile - diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.cpp deleted file mode 100644 index c52cd0b359d..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "serializationmetrics.h" - -namespace storage { -namespace memfile { - -SerializationWriteMetrics::SerializationWriteMetrics(const std::string& name, MetricSet& owner) - : MetricSet(name, "", - "Write metrics for memfile persistence engine", - &owner), - headerLatency("header_latency", "", - "Time spent writing a single contiguous header location " - "on the disk.", this), - headerSize("header_size", "", - "Average size of contiguous header disk writes", this), - bodyLatency("body_latency", "", - "Time spent writing a single contiguous body location " - "on the disk.", this), - bodySize("body_size", "", - "Average size of contiguous body disk writes", this), - metaLatency("meta_latency", "", - "Time spent writing file header and slot metadata", this), - metaSize("meta_size", "", - "Size of file header and metadata writes", this), - totalLatency("total_latency", "", - "Total time spent performing slot file writing", this) -{ } - -SerializationWriteMetrics::~SerializationWriteMetrics() { } - -SerializationMetrics::SerializationMetrics(const std::string& name, MetricSet* owner) - : MetricSet(name, "", - "(De-)serialization I/O metrics for memfile " - "persistence engine", owner), - initialMetaReadLatency( - "initial_meta_read_latency", "", - "Time spent doing the initial read of " - "the file header and most (or all) of metadata", - this), - tooLargeMetaReadLatency( - "too_large_meta_read_latency", "", - "Time spent doing additional read for " - "metadata too large to be covered by initial " - "read", this), - totalLoadFileLatency( - "total_load_file_latency", "", - "Total time spent initially loading a " - "file from disk", this), - verifyLatency( - "verify_latency", "", - "Time spent performing file verification", this), - deleteFileLatency( - "delete_file_latency", "", - "Time spent deleting a file from disk", this), - headerReadLatency( - "header_read_latency", "", - "Time spent reading a single contiguous header location " - "on the disk (may span many document blobs)", this), - headerReadSize( - "header_read_size", "", - "Size of contiguous header disk location reads", this), - bodyReadLatency( - "body_read_latency", "", - "Time spent reading a single contiguous body location " - "on the disk (may span many document blobs)", this), - bodyReadSize( - "body_read_size", "", - "Size of contiguous body disk location reads", this), - cacheUpdateAndImplicitVerifyLatency( - "cache_update_and_implicit_verify_latency", "", - "Time spent updating memory cache structures and verifying " - "read data blocks for corruptions", this), - fullRewritesDueToDownsizingFile( - "full_rewrites_due_to_downsizing_file", "", - "Number of times a file was rewritten fully because the " - "original file had too low fill rate", this), - fullRewritesDueToTooSmallFile( - "full_rewrites_due_to_too_small_file", "", - "Number of times a file was rewritten fully because the " - "original file did not have sufficient free space for a " - "partial write", this), - partialWrite("partialwrite", *this), - fullWrite("fullwrite", *this) -{ } - -SerializationMetrics::~SerializationMetrics() { } - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h deleted file mode 100644 index 82d347d0b5b..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/serializationmetrics.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/metrics/metrics.h> - -namespace storage { -namespace memfile { - -class SerializationWriteMetrics : public metrics::MetricSet -{ -public: - metrics::DoubleAverageMetric headerLatency; - metrics::LongAverageMetric headerSize; - metrics::DoubleAverageMetric bodyLatency; - metrics::LongAverageMetric bodySize; - metrics::DoubleAverageMetric metaLatency; - metrics::LongAverageMetric metaSize; - metrics::DoubleAverageMetric totalLatency; - - SerializationWriteMetrics(const std::string& name, MetricSet& owner); - ~SerializationWriteMetrics(); -}; - -class SerializationMetrics : public metrics::MetricSet -{ -public: - metrics::DoubleAverageMetric initialMetaReadLatency; - metrics::DoubleAverageMetric tooLargeMetaReadLatency; - metrics::DoubleAverageMetric totalLoadFileLatency; - metrics::DoubleAverageMetric verifyLatency; - metrics::DoubleAverageMetric deleteFileLatency; - metrics::DoubleAverageMetric headerReadLatency; - metrics::LongAverageMetric headerReadSize; - metrics::DoubleAverageMetric bodyReadLatency; - metrics::LongAverageMetric bodyReadSize; - metrics::DoubleAverageMetric cacheUpdateAndImplicitVerifyLatency; - metrics::LongCountMetric fullRewritesDueToDownsizingFile; - metrics::LongCountMetric fullRewritesDueToTooSmallFile; - SerializationWriteMetrics partialWrite; - SerializationWriteMetrics fullWrite; - - SerializationMetrics(const std::string& name, MetricSet* owner = 0); - ~SerializationMetrics(); -}; - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp deleted file mode 100644 index 5277ae561b1..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.cpp +++ /dev/null @@ -1,546 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "simplememfileiobuffer.h" -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/memfilepersistence/common/environment.h> -#include <vespa/vespalib/util/exceptions.h> -#include <vespa/document/util/bytebuffer.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".memfile.simpleiobuffer"); - -namespace storage::memfile { - -namespace { - -uint32_t calculateChecksum(const void* pos, uint32_t size) { - vespalib::crc_32_type calculator; - calculator.process_bytes(pos, size); - return calculator.checksum(); -} - -} - -constexpr size_t SimpleMemFileIOBuffer::WORKING_BUFFER_SIZE; - -SimpleMemFileIOBuffer::SimpleMemFileIOBuffer( - VersionSerializer& reader, - vespalib::LazyFile::UP file, - FileInfo::UP info, - const FileSpecification& fileSpec, - const Environment& env) - : _reader(reader), - _data(2), - _workingBuffers(2), - _file(std::move(file)), - _fileInfo(std::move(info)), - _fileSpec(fileSpec), - _env(env), - _options(env.acquireConfigReadLock().options()) -{ -} - -SimpleMemFileIOBuffer::~SimpleMemFileIOBuffer() {} - -void -SimpleMemFileIOBuffer::close() -{ - if (_file->isOpen()) { - _file->close(); - } -} - -const SimpleMemFileIOBuffer::Data& -SimpleMemFileIOBuffer::getData(DocumentPart part, DataLocation loc) const -{ - DataMap::const_iterator iter = _data[part].find(loc); - - if (iter == _data[part].end()) { - std::ostringstream ost; - ost << "Location " << loc - << " was not found for " << (part == HEADER ? "Header" : "Body"); - throw PartNotCachedException(ost.str(), VESPA_STRLOC); - } - - return iter->second; -} - -document::Document::UP -SimpleMemFileIOBuffer::getDocumentHeader( - const document::DocumentTypeRepo& repo, - DataLocation loc) const -{ - const Data& data = getData(HEADER, loc); - - Document::UP doc(new Document()); - document::ByteBuffer buf(data.buf->getBuffer() + data.pos, - data.buf->getSize() - data.pos); - - doc->deserializeHeader(repo, buf); - return doc; -} - -document::DocumentId -SimpleMemFileIOBuffer::getDocumentId(DataLocation loc) const -{ - const Data& data = getData(HEADER, loc); - - const char* buf = data.buf->getBuffer() + data.pos + loc._size; - buf -= 2 * sizeof(uint32_t); - - uint32_t nameLen = *(const uint32_t*)(buf); - buf -= nameLen; - - return document::DocumentId(vespalib::stringref(buf, nameLen)); -} - -void -SimpleMemFileIOBuffer::readBody( - const document::DocumentTypeRepo& repo, - DataLocation loc, - Document& doc) const -{ - const Data& data = getData(BODY, loc); - - document::ByteBuffer buf(data.buf->getBuffer() + data.pos, - data.buf->getSize() - data.pos); - - doc.deserializeBody(repo, buf); -} - -DataLocation -SimpleMemFileIOBuffer::addLocation(DocumentPart part, - BufferAllocation newData) -{ - if (!newData.getSharedBuffer().get()) { - LOG(spam, "Not adding location since data is null"); - return DataLocation(0, 0); - } - - DataMap& target = _data[part]; - DataLocation loc = DataLocation(_fileInfo->getBlockSize(part), newData.getSize()); - - DataMap::reverse_iterator iter = target.rbegin(); - if (iter != target.rend() && iter->first.endPos() > loc._pos) { - loc = DataLocation(iter->first.endPos(), newData.getSize()); - } - - std::pair<DataMap::iterator, bool> existing( - target.insert(std::make_pair(loc, Data(newData.getSharedBuffer(), - newData.getBufferPosition(), - false)))); - if (!existing.second) { - LOG(error, "%s: addLocation attempted %s insert with location %u,%u, " - "but that location already exists", - _fileSpec.toString().c_str(), - getDocumentPartName(part), - loc._pos, - loc._size); - assert(false); - } - - LOG(spam, "%s: added %s at location %u,%u (buffer %p, position %u)", - _fileSpec.getBucketId().toString().c_str(), - getDocumentPartName(part), - loc._pos, - loc._size, - newData.getSharedBuffer().get(), - newData.getBufferPosition()); - return loc; -} - -void -SimpleMemFileIOBuffer::HeaderChunkEncoder::bufferDocument(const Document& doc) -{ - assert(_serializedDoc.empty()); - doc.serializeHeader(_serializedDoc); -} - -SimpleMemFileIOBuffer::HeaderChunkEncoder::HeaderChunkEncoder(const document::DocumentId& docId) - : _serializedDoc(DEFAULT_STREAM_ALLOC_SIZE), - _docId(docId.toString()) -{ } -SimpleMemFileIOBuffer::HeaderChunkEncoder::~HeaderChunkEncoder() {} - -/** - * Buffer is comprised of the following: - * - Document header blob (n bytes) - * - CRC32 of header blob (4 bytes) - * - Document Id (n bytes) - * - Length of document id (4 bytes) - * - CRC32 of document id and length (4 bytes) - * - * To a reader, the length of the header blob is inferred from length of - * total buffer chunk minus the overhead by the doc id string and metadata in - * the chunk trailer. - */ -void -SimpleMemFileIOBuffer::HeaderChunkEncoder::writeTo(BufferAllocation& buf) const -{ - assert(buf.getSize() >= encodedSize()); - // Note that docSize may be zero throughout this function. - const uint32_t docSize = _serializedDoc.size(); - const uint32_t docChecksum = calculateChecksum( - _serializedDoc.peek(), docSize); - const uint32_t idLen = _docId.size(); - - vespalib::crc_32_type nameChecksum; - nameChecksum.process_bytes(_docId.c_str(), idLen); - nameChecksum.process_bytes(reinterpret_cast<const char*>(&idLen), - sizeof(uint32_t)); - const uint32_t trailerChecksum = nameChecksum.checksum(); - - memcpy(buf.getBuffer(), _serializedDoc.peek(), docSize); - char* trailer = buf.getBuffer() + docSize; - memcpy(trailer, &docChecksum, sizeof(uint32_t)); - trailer += sizeof(uint32_t); - memcpy(trailer, _docId.c_str(), idLen); - trailer += idLen; - memcpy(trailer, &idLen, sizeof(uint32_t)); - trailer += sizeof(uint32_t); - memcpy(trailer, &trailerChecksum, sizeof(uint32_t)); -} - -bool -SimpleMemFileIOBuffer::writeBackwardsCompatibleRemoves() const -{ - return !_options->_defaultRemoveDocType.empty(); -} - -document::Document::UP -SimpleMemFileIOBuffer::generateBlankDocument( - const DocumentId& id, - const document::DocumentTypeRepo& repo) const -{ - vespalib::string typeName( - id.hasDocType() ? id.getDocType() - : _options->_defaultRemoveDocType); - const document::DocumentType* docType(repo.getDocumentType(typeName)); - if (!docType) { - throw vespalib::IllegalArgumentException( - "Could not serialize document for remove with unknown " - "doctype '" + typeName + "'"); - } - return std::unique_ptr<Document>(new Document(*docType, id)); -} - -SimpleMemFileIOBuffer::BufferAllocation -SimpleMemFileIOBuffer::serializeHeader(const Document& doc) -{ - HeaderChunkEncoder encoder(doc.getId()); - encoder.bufferDocument(doc); - BufferAllocation buf(allocateBuffer(HEADER, encoder.encodedSize())); - encoder.writeTo(buf); - - return buf; -} - -SimpleMemFileIOBuffer::BufferAllocation -SimpleMemFileIOBuffer::serializeDocumentIdOnlyHeader( - const DocumentId& id, - const document::DocumentTypeRepo& repo) -{ - HeaderChunkEncoder encoder(id); - if (writeBackwardsCompatibleRemoves()) { - Document::UP blankDoc(generateBlankDocument(id, repo)); - encoder.bufferDocument(*blankDoc); - } - BufferAllocation buf(allocateBuffer(HEADER, encoder.encodedSize())); - encoder.writeTo(buf); - - return buf; -} - -DataLocation -SimpleMemFileIOBuffer::addDocumentIdOnlyHeader( - const DocumentId& docId, - const document::DocumentTypeRepo& repo) -{ - return addLocation(HEADER, serializeDocumentIdOnlyHeader(docId, repo)); -} - -DataLocation -SimpleMemFileIOBuffer::addHeader(const Document& doc) -{ - return addLocation(HEADER, serializeHeader(doc)); -} - -SimpleMemFileIOBuffer::BufferAllocation -SimpleMemFileIOBuffer::serializeBody(const Document& doc) -{ - vespalib::nbostream output(5 * 1024); - doc.serializeBody(output); - - if (output.empty()) { - return BufferAllocation(); - } - - BufferAllocation val(allocateBuffer(BODY, output.size() + sizeof(uint32_t))); - memcpy(val.getBuffer(), output.peek(), output.size()); - - // Also append CRC32 of body block to buffer - uint32_t checksum = calculateChecksum(output.peek(), output.size()); - char* trailer = val.getBuffer() + output.size(); - memcpy(trailer, &checksum, sizeof(uint32_t)); - - return val; -} - -SimpleMemFileIOBuffer::BufferAllocation -SimpleMemFileIOBuffer::allocateBuffer(DocumentPart part, - uint32_t sz, - SharedBuffer::Alignment align) -{ - // If the requested size is greater than or equal to our working buffer - // size, simply allocate a separate buffer for it. - if (sz >= WORKING_BUFFER_SIZE) { - return BufferAllocation(std::make_shared<SharedBuffer>(sz), 0, sz); - } - - SharedBuffer::SP &bufSP(_workingBuffers[part]); - bool requireNewBlock = false; - if (!bufSP.get()) { - requireNewBlock = true; - } else if (!bufSP->hasRoomFor(sz, align)) { - requireNewBlock = true; - } - - if (!requireNewBlock) { - return BufferAllocation(bufSP, - static_cast<uint32_t>(bufSP->allocate(sz, align)), - sz); - } else { - auto newBuf = std::make_shared<SharedBuffer>(WORKING_BUFFER_SIZE); - bufSP = newBuf; - return BufferAllocation(newBuf, - static_cast<uint32_t>(newBuf->allocate(sz, align)), - sz); - } -} - -DataLocation -SimpleMemFileIOBuffer::addBody(const Document& doc) -{ - return addLocation(BODY, serializeBody(doc)); -} - -void -SimpleMemFileIOBuffer::clear(DocumentPart part) -{ - LOG(debug, "%s: cleared all data for part %s", - _fileSpec.getBucketId().toString().c_str(), - getDocumentPartName(part)); - _data[part].clear(); -} - -bool -SimpleMemFileIOBuffer::verifyConsistent() const -{ - return true; -} - -void -SimpleMemFileIOBuffer::move(const FileSpecification& target) -{ - LOG(debug, "Moving %s -> %s", - _file->getFilename().c_str(), - target.getPath().c_str()); - _file->close(); - - if (vespalib::fileExists(_file->getFilename())) { - vespalib::rename(_file->getFilename(), target.getPath(), true, true); - } - - _file.reset( - new vespalib::LazyFile(target.getPath(), vespalib::File::DIRECTIO, true)); -} - -DataLocation -SimpleMemFileIOBuffer::copyCache(const MemFileIOInterface& source, - DocumentPart part, - DataLocation loc) -{ - if (loc._size == 0) { - return loc; - } - - const SimpleMemFileIOBuffer& srcBuf( - static_cast<const SimpleMemFileIOBuffer&>(source)); - Data data = srcBuf.getData(part, loc); - - BufferAllocation val(allocateBuffer(part, loc._size)); - memcpy(val.getBuffer(), data.buf->getBuffer() + data.pos, loc._size); - - LOG(spam, - "Copied cached data from %s to %s for location %u,%u buffer pos=%u", - srcBuf._fileSpec.getBucketId().toString().c_str(), - _fileSpec.getBucketId().toString().c_str(), - loc._pos, - loc._size, - data.pos); - - return addLocation(part, val); -} - - -void -SimpleMemFileIOBuffer::cacheLocation(DocumentPart part, - DataLocation loc, - BufferType::SP buf, - uint32_t bufferPos) -{ - LOG(spam, - "%s: added existing %s buffer at location %u,%u " - "buffer=%p buffer pos=%u", - _fileSpec.toString().c_str(), - getDocumentPartName(part), - loc._pos, - loc._size, - buf.get(), - bufferPos); - _data[part][loc] = Data(std::move(buf), bufferPos, true); -} - -bool -SimpleMemFileIOBuffer::isCached(DataLocation loc, - DocumentPart type) const -{ - if (loc._size == 0) { - // Count zero-sized locations as cached - return true; - } - - return _data[type].find(loc) != _data[type].end(); -} - -bool -SimpleMemFileIOBuffer::isPersisted(DataLocation loc, - DocumentPart type) const -{ - DataMap::const_iterator iter = _data[type].find(loc); - - // If the buffer doesn't know about the data at all, - // we must assume it is already persisted. How else would the file - // know about the location? - if (iter == _data[type].end()) { - return true; - } - - return iter->second.persisted; -} - -void -SimpleMemFileIOBuffer::ensureCached(Environment& env, - DocumentPart part, - const std::vector<DataLocation>& locations) -{ - std::vector<DataLocation> nonCached; - nonCached.reserve(locations.size()); - - for (uint32_t i = 0; i < locations.size(); ++i) { - if (_data[part].find(locations[i]) == _data[part].end()) { - nonCached.push_back(locations[i]); - } - } - - _reader.cacheLocations(*this, env, *_options, part, nonCached); -} - -void -SimpleMemFileIOBuffer::persist(DocumentPart part, - DataLocation oldLoc, - DataLocation newLoc) -{ - Data newData = getData(part, oldLoc); - newData.persisted = true; - size_t erased = _data[part].erase(oldLoc); - assert(erased > 0); - (void) erased; - _data[part][newLoc] = newData; - - LOG(spam, "%s: persisted %s for %u,%u -> %u,%u", - _fileSpec.getBucketId().toString().c_str(), - getDocumentPartName(part), - oldLoc._pos, oldLoc._size, - newLoc._pos, newLoc._size); -} - -void -SimpleMemFileIOBuffer::remapAndPersistAllLocations( - DocumentPart part, - const std::map<DataLocation, DataLocation>& locs) -{ - DataMap remappedData; - - typedef std::map<DataLocation, DataLocation>::const_iterator Iter; - for (Iter it(locs.begin()), e(locs.end()); it != e; ++it) { - DataLocation oldLoc = it->first; - DataLocation newLoc = it->second; - - LOG(spam, "%s: remapping %u,%u -> %u,%u", - _fileSpec.getBucketId().toString().c_str(), - oldLoc._pos, oldLoc._size, - newLoc._pos, newLoc._size); - - Data newData = getData(part, oldLoc); - newData.persisted = true; - std::pair<DataMap::iterator, bool> inserted( - remappedData.insert(std::make_pair(newLoc, newData))); - assert(inserted.second); - } - _data[part].swap(remappedData); - - LOG(debug, - "%s: remapped %zu locations. Discarded %zu locations that " - "had no new mapping", - _fileSpec.getBucketId().toString().c_str(), - locs.size(), - _data[part].size() - locs.size()); -} - -const char* -SimpleMemFileIOBuffer::getBuffer(DataLocation loc, DocumentPart part) const -{ - const Data& data = getData(part, loc); - return data.buf->getBuffer() + data.pos; -} - -uint32_t -SimpleMemFileIOBuffer::getSerializedSize(DocumentPart part, - DataLocation loc) const -{ - if (part == HEADER) { - const Data& data = getData(part, loc); - assert(loc._size > sizeof(uint32_t)*3); - const char* bufEnd = data.buf->getBuffer() + data.pos + loc._size; - uint32_t docIdLen = *reinterpret_cast<const uint32_t*>( - bufEnd - sizeof(uint32_t)*2); - return loc._size - sizeof(uint32_t)*3 - docIdLen; - } else { - return loc._size - sizeof(uint32_t); - } -} - -size_t -SimpleMemFileIOBuffer::getCachedSize(DocumentPart part) const -{ - const DataMap& dm(_data[part]); - vespalib::hash_set<const void*> seenBufs(dm.size()); - size_t ret = 0; - for (DataMap::const_iterator it(dm.begin()), e(dm.end()); it != e; ++it) { - if (seenBufs.find(it->second.buf->getBuffer()) != seenBufs.end()) { - continue; - } - - size_t bufSize = it->second.buf->getSize(); - // Account for (approximate) mmap overhead. - bufSize = util::alignUpPow2<4096>(bufSize); - ret += bufSize; - seenBufs.insert(it->second.buf->getBuffer()); - } - return ret; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h deleted file mode 100644 index ccee518a1c2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/simplememfileiobuffer.h +++ /dev/null @@ -1,330 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "buffer.h" -#include "fileinfo.h" -#include "versionserializer.h" -#include <vespa/memfilepersistence/memfile/memfileiointerface.h> -#include <vespa/document/fieldvalue/document.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/util/exception.h> -#include <vespa/vespalib/util/alloc.h> -#include <vespa/vespalib/objects/nbostream.h> - -namespace storage { -namespace memfile { - -namespace util { - -/** - * @param Alignment (template) must be a power of two. - * @return val aligned up so that retval >= val && retval % Alignment == 0 - */ -template <size_t Alignment> -size_t -alignUpPow2(const size_t val) -{ - const size_t mask = Alignment - 1; - return (val + mask) & ~mask; -} - -/** - * Round any non-power of two value up to the nearest power of two. E.g: - * nextPow2(3) -> 4 - * nextPow2(15) -> 16 - * nextPow2(40) -> 64 - * nextPow2(64) -> 64 - * - * From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - */ -inline uint32_t -nextPow2(uint32_t v) -{ - --v; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - ++v; - return v; -} - -} - -/** - * Implements a simple buffered cache for a memfile. - */ -class SimpleMemFileIOBuffer : public MemFileIOInterface -{ -public: - /** - * Any buffer requests >= than this size will get their own, separately - * allocated buffer. For all other cases, we'll attempt to squeeze as many - * documents as possible into the same (shared) buffer. - */ - static constexpr size_t WORKING_BUFFER_SIZE = 16*1024; // TODO(vekterli): make configurable - - class SharedBuffer - { - public: - static const size_t ALLOC_ALIGNMENT = 8; - enum Alignment { - NO_ALIGN, - ALIGN_512_BYTES - }; - - using SP = std::shared_ptr<SharedBuffer>; - explicit SharedBuffer(size_t totalSize) - : _buf(vespalib::alloc::Alloc::allocMMap(totalSize)), - _totalSize(totalSize), - _usedSize(0) - { } - - size_t getSize() const { return _totalSize; } - size_t getUsedSize() const { return _usedSize; } - size_t getFreeSize() const { return getSize() - getUsedSize(); } - bool hasRoomFor(size_t sz, Alignment align = NO_ALIGN) const { - return (align == ALIGN_512_BYTES - ? util::alignUpPow2<512>(_usedSize) - : _usedSize) + sz <= getSize(); - } - - /** - * Returns an offset into the shared buffer which is valid to use for - * sz bytes. If align is ALIGN_512_BYTES, the returned offset will be - * aligned on a 512-byte boundary. It is the responsibility of the - * caller to ensure buffers used for Direct I/O are allocated with a - * size that is also evenly 512-byte divisible. - */ - size_t allocate(size_t sz, Alignment align = NO_ALIGN) { - if (align == ALIGN_512_BYTES) { - _usedSize = util::alignUpPow2<512>(_usedSize); - } - assert(hasRoomFor(sz)); - size_t ret = _usedSize; - _usedSize += util::alignUpPow2<ALLOC_ALIGNMENT>(sz); - return ret; - } - - char* getBuffer() { - return static_cast<char*>(_buf.get()); - } - const char* getBuffer() const { - return static_cast<const char*>(_buf.get()); - } - private: - vespalib::alloc::Alloc _buf; - size_t _totalSize; - size_t _usedSize; - }; - - struct BufferAllocation - { - BufferAllocation() : pos(0), size(0) {} - - BufferAllocation(SharedBuffer::SP b, uint32_t p, uint32_t sz) - : buf(std::move(b)), pos(p), size(sz) { } - - /** - * Get buffer area available to this specific allocation - */ - char* getBuffer() { return buf->getBuffer() + pos; } - const char* getBuffer() const { return buf->getBuffer() + pos; } - - /** - * Get buffer that is (potentially) shared between many individual - * allocations. - */ - SharedBuffer::SP getSharedBuffer() { return buf; } - uint32_t getBufferPosition() const { return pos; } - uint32_t getSize() const { return size; } - - SharedBuffer::SP buf; - uint32_t pos; - uint32_t size; - }; - - /** - * Utility class for fully encoding a chunk of file data for a document - * header in a slotfile. Supports writing header chunks with and without - * a document payload. - */ - class HeaderChunkEncoder - { - vespalib::nbostream _serializedDoc; - vespalib::string _docId; - public: - static const size_t DEFAULT_STREAM_ALLOC_SIZE = 5 * 2014; - - HeaderChunkEncoder(const document::DocumentId& docId); - ~HeaderChunkEncoder(); - - /** - * Serializes header chunk to buf, which must have at least a size - * of encodedSize() bytes available. - */ - void writeTo(BufferAllocation& buf) const; - - /** - * Assign (and buffer) document that should be written to the chunk. - * If this method is not called on an encoder prior to writeTo(), the - * chunk will contain only a document ID but no payload. This is - * perfectly fine for 5.1+, but is not supported by 5.0 readers. - * It is safe for the provided document to go out of scope after having - * called this method. - * Since this method buffers it may only be called once per encoder. - */ - void bufferDocument(const document::Document&); - - /** - * Compute total size of chunk as it will reside on disk, including - * document blob/id payload and metadata overhead. - * Max doc size is <=64M so we cannot possibly exceed 32 bits. - */ - uint32_t encodedSize() const { - return (_serializedDoc.size() + trailerLength()); - } - private: - static constexpr uint32_t fixedTrailerLength() { - // CRC32 of doc blob + u32 doc id length + CRC32 of doc id. - return (sizeof(uint32_t) * 3); - } - uint32_t trailerLength() const { - return (fixedTrailerLength() + _docId.size()); - } - }; - - typedef SharedBuffer BufferType; - - class PartNotCachedException : public vespalib::Exception { - public: - PartNotCachedException(const std::string& msg, const std::string& location) - : vespalib::Exception(msg, location) {}; - }; - - SimpleMemFileIOBuffer( - VersionSerializer& reader, - vespalib::LazyFile::UP file, - FileInfo::UP fileInfo, - const FileSpecification& fileSpec, - const Environment& env); - ~SimpleMemFileIOBuffer(); - - Document::UP getDocumentHeader(const document::DocumentTypeRepo& repo, DataLocation loc) const override; - document::DocumentId getDocumentId(DataLocation loc) const override; - void readBody(const document::DocumentTypeRepo& repo, DataLocation loc, Document& doc) const override; - DataLocation addDocumentIdOnlyHeader(const DocumentId& id, const document::DocumentTypeRepo& repo) override; - DataLocation addHeader(const Document& doc) override; - DataLocation addBody(const Document& doc) override; - void clear(DocumentPart type) override; - bool verifyConsistent() const override; - - /** - * Moves the underlying file to another location. - */ - void move(const FileSpecification& target) override; - void close() override; - DataLocation copyCache(const MemFileIOInterface& source, DocumentPart part, DataLocation loc) override; - - /** - * Add a location -> buffer mapping - */ - void cacheLocation(DocumentPart part, DataLocation loc, BufferType::SP buf, uint32_t bufferPos); - - /** - * @return Returns true if the given location is cached. - */ - bool isCached(DataLocation loc, DocumentPart type) const override; - - /** - * @return Returns true if the given location has been persisted to disk. - */ - - bool isPersisted(DataLocation loc, DocumentPart type) const override; - uint32_t getSerializedSize(DocumentPart part, DataLocation loc) const override; - void ensureCached(Environment& env, DocumentPart part, const std::vector<DataLocation>& locations) override; - - /** - * Moves the given location into the persisted data area. - * oldLoc must be outside the persisted data area, and newLoc must be within. - */ - void persist(DocumentPart part, DataLocation oldLoc, DataLocation newLoc); - - /** - * Remaps every single location for the given part. - * WARNING: All existing locations that are not remapped will be discarded! - */ - void remapAndPersistAllLocations(DocumentPart part, - const std::map<DataLocation, DataLocation>& locs); - - vespalib::LazyFile& getFileHandle() { return *_file; }; - const vespalib::LazyFile& getFileHandle() const { return *_file; }; - - const FileInfo& getFileInfo() const { return *_fileInfo; } - void setFileInfo(FileInfo::UP fileInfo) { _fileInfo = std::move(fileInfo); } - - const FileSpecification& getFileSpec() const { return _fileSpec; } - - const char* getBuffer(DataLocation loc, DocumentPart part) const; - - size_t getCachedSize(DocumentPart part) const override; - - BufferAllocation allocateBuffer(DocumentPart part, - uint32_t sz, - SharedBuffer::Alignment align - = SharedBuffer::NO_ALIGN); - - /** - * Whether removes should be written with a document header payload in - * order to be backwards-compatible with VDS 5.0. This is in order to - * support a scenario where a cluster is downgraded from 5.1+ -> 5.0. - */ - bool writeBackwardsCompatibleRemoves() const; - - /** - * Generate a document with no content which stores the given document ID - * and is of the type inferred by the ID. If the ID is of legacy format - * (and thus without a type), the default configured type will be used. - */ - Document::UP generateBlankDocument(const DocumentId&, - const document::DocumentTypeRepo&) const; - -private: - struct Data { - Data() : pos(0), persisted(false) {} - - Data(BufferType::SP b, uint32_t p, bool isPersisted) - : buf(std::move(b)), pos(p), persisted(isPersisted) {} - - BufferType::SP buf; - uint32_t pos; - bool persisted; - }; - - typedef std::map<DataLocation, Data> DataMap; - - VersionSerializer& _reader; - std::vector<DataMap> _data; - std::vector<SharedBuffer::SP> _workingBuffers; - vespalib::LazyFile::UP _file; - FileInfo::UP _fileInfo; - FileSpecification _fileSpec; - const Environment& _env; - // Same memfile config is used during entire lifetime of buffer object. - // This makes live reconfigs kick in for all files only when all buckets - // have been evicted from the cache post-reconfig, but greatly simplifies - // the reasoning about a given bucket in the face of such actions. - std::shared_ptr<const Options> _options; - - DataLocation addLocation(DocumentPart part, BufferAllocation newData); - const Data& getData(DocumentPart part, DataLocation loc) const; - BufferAllocation serializeDocumentIdOnlyHeader(const DocumentId& id, const document::DocumentTypeRepo&); - BufferAllocation serializeHeader(const Document& doc); - BufferAllocation serializeBody(const Document& doc); - - friend class SimpleMemFileIOBufferTest; -}; - -} -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp deleted file mode 100644 index 1aff070e72f..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "slotreadplanner.h" -#include <storage/persistence/memfile/memfile/memslot.h> - -namespace storage { -namespace memfile { - -SlotDiskIoPlanner::SlotDiskIoPlanner( - const std::vector<const MemSlot*> desiredSlots, - DocumentPart highestPartNeeded, - uint32_t maxGap, - uint32_t headerBlockStartIndex, - uint32_t bodyBlockStartIndex) - : _operations(), - _startIndexes(2, 0) -{ - _startIndexes[HEADER] = headerBlockStartIndex; - _startIndexes[BODY] = bodyBlockStartIndex; - processSlots(desiredSlots, highestPartNeeded, maxGap); -} - -namespace { - uint32_t alignDown(uint32_t value) { - uint32_t blocks = value / 512; - return blocks * 512; - }; - - uint32_t alignUp(uint32_t value) { - uint32_t blocks = (value + 512 - 1) / 512; - return blocks * 512; - }; -} - -void -SlotDiskIoPlanner::scheduleLocation(const MemSlot& slot, - DocumentPart type, - std::vector<DataLocation>& ops) -{ - if (!slot.partAvailable(type) && slot.getLocation(type)._size) { - ops.push_back(DataLocation( - slot.getLocation(type)._pos + _startIndexes[type], - slot.getLocation(type)._size)); - } -} - -void -SlotDiskIoPlanner::processSlots( - const std::vector<const MemSlot*> desiredSlots, - DocumentPart highestPartNeeded, - uint32_t maxGap) -{ - // Build list of disk read operations to do - std::vector<DataLocation> allOps; - // Create list of all locations we need to read - for (std::size_t i = 0; i < desiredSlots.size(); ++i) { - for (uint32_t p = 0; p <= uint32_t(highestPartNeeded); ++p) { - scheduleLocation(*desiredSlots[i], (DocumentPart) p, allOps); - } - } - // Sort list, and join elements close together into single IO ops - std::sort(allOps.begin(), allOps.end()); - for (size_t i = 0; i < allOps.size(); ++i) { - uint32_t start = alignDown(allOps[i]._pos); - uint32_t stop = alignUp(allOps[i]._pos + allOps[i]._size); - if (i != 0) { - uint32_t lastStop = _operations.back()._pos - + _operations.back()._size; - if (lastStop >= start || start - lastStop < maxGap) { - _operations.back()._size += (stop - lastStop); - continue; - } - } - _operations.push_back(DataLocation(start, stop - start)); - } -} - -uint32_t -SlotDiskIoPlanner::getTotalBufferSize() const -{ - uint32_t totalSize = 0; - for (size_t i = 0; i < _operations.size(); ++i) { - totalSize += _operations[i]._size; - } - return totalSize; -} - -void -SlotDiskIoPlanner::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - (void) verbose; (void) indent; - for (std::size_t i = 0; i < _operations.size(); ++i) { - if (i > 0) out << ","; - out << "[" << _operations[i]._pos << "," - << (_operations[i]._size + _operations[i]._pos) << "]"; - } -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h deleted file mode 100644 index 4622b088f0f..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/slotreadplanner.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::SlotDiskIoPlanner - * \ingroup memfile - * - * \brief Creates list of minimal IO operations to do versus disk. - * - * When accessing many locations on disk, it is not necessarily ideal to do a - * disk access per location. This class creates a minimal set of locations to - * access to avoid accessing more than a maximum gap of uninteresting data. - */ -#pragma once - -#include <storage/persistence/memfile/common/types.h> - -namespace storage { -namespace memfile { - -class MemSlot; - -class SlotDiskIoPlanner : public Types, public vespalib::Printable -{ -public: - SlotDiskIoPlanner(const std::vector<const MemSlot*> desiredSlots, - DocumentPart highestPartNeeded, - uint32_t maxGap, - uint32_t headerBlockStartIndex, - uint32_t bodyBlockStartIndex); - - const std::vector<DataLocation>& getIoOperations() const { - return _operations; - } - - /** - * Get the total amount of space needed to hold all the data from all - * locations identified to be accessed. Useful to create a buffer of correct - * size. - */ - uint32_t getTotalBufferSize() const; - - void print(std::ostream& out, bool verbose, - const std::string& indent) const; - -private: - std::vector<DataLocation> _operations; - std::vector<uint32_t> _startIndexes; - - void processSlots( - const std::vector<const MemSlot*> desiredSlots, - DocumentPart highestPartNeeded, - uint32_t maxGap); - - void scheduleLocation(const MemSlot&, DocumentPart, - std::vector<DataLocation>&); -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp deleted file mode 100644 index 9653cfe35b6..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "uniqueslotgenerator.h" -#include <vespa/memfilepersistence/memfile/memfile.h> -#include <vespa/memfilepersistence/memfile/doccache.h> - -namespace storage { - -namespace memfile { - -bool -UniqueSlotGenerator::ContentLocation::operator==( - const ContentLocation& other) const -{ - if (_loc.valid() && other._loc.valid()) return _loc == other._loc; - return _content == other._content; -} - -bool -UniqueSlotGenerator::ContentLocation::operator<( - const ContentLocation& other) const -{ - if (_loc.valid() && other._loc.valid()) return _loc < other._loc; - if (other._loc.valid()) return false; - if (_loc.valid()) return true; - return _content < other._content; -} - -void -UniqueSlotGenerator::ContentLocation::print(std::ostream& out, bool, - const std::string&) const -{ - out << "ContentLocation(" << _loc << ", " - << std::hex << _content << std::dec << ")"; -} - -UniqueSlotGenerator::UniqueSlotGenerator(const MemFile& memFile) - : _slots(2), - _slotsInOrder(2) -{ - for (uint32_t i = 0; i < memFile.getSlotCount(); i++) { - const MemSlot& slot = memFile[i]; - addSlot(HEADER, slot); - if (slot.hasBodyContent()) addSlot(BODY, slot); - } -} - -void -UniqueSlotGenerator::addSlot(DocumentPart part, const MemSlot& slot) -{ - ContentLocation contentLoc(slot.getLocation(part)); - if (slot.getDocCache() != NULL) { - contentLoc._content = slot.getDocCache()->getPart(part).get(); - } - SlotList& loc = _slots[part][contentLoc]; - loc.push_back(&slot); - if (loc.size() == 1) { - _slotsInOrder[part].push_back(&loc); - } -} - -void -UniqueSlotGenerator::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - (void) verbose; - for (uint32_t i=0; i<2; ++i) { - DocumentPart part(static_cast<DocumentPart>(i)); - out << getDocumentPartName(part) << ":"; - const OrderedSlotList& list = _slotsInOrder[part]; - for (uint32_t j = 0; j < list.size(); ++j) { - const SlotList& slotList = *list[j]; - out << "\n" << indent << slotList[0]->getLocation(part) << ": "; - for (uint32_t k = 0; k < slotList.size(); ++k) { - if (k > 0) out << ", "; - out << slotList[k]->getTimestamp(); - } - } - if (i == 0) out << "\n"; - } -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h deleted file mode 100644 index c961f137b94..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/uniqueslotgenerator.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::UniqueSlotGenerator - * \ingroup memfile - * - * Generates a mapping from unique content locations on disk - * (or unique documents if not persisted) to a list of slots. - */ -#pragma once - -#include <vespa/memfilepersistence/common/types.h> - -namespace storage { -namespace memfile { - -class MemSlot; -class MemFile; - -class UniqueSlotGenerator : private Types, public vespalib::Printable -{ -public: - typedef std::vector<const MemSlot*> SlotList; - -private: - struct ContentLocation : public vespalib::Printable { - DataLocation _loc; - const document::StructFieldValue* _content; - - ContentLocation(const DataLocation& loc) : _loc(loc), _content(0) {} - - ContentLocation(const DataLocation& loc, - const document::StructFieldValue* content) - : _loc(loc), _content(content) {} - - bool operator<(const ContentLocation& other) const; - bool operator==(const ContentLocation& other) const; - - void print(std::ostream& out, bool verbose, - const std::string& indent) const override; - }; - - void addSlot(DocumentPart, const MemSlot&); - - typedef std::map<ContentLocation, SlotList> LocationToSlotMap; - typedef std::vector<SlotList*> OrderedSlotList; - - std::vector<LocationToSlotMap> _slots; - std::vector<OrderedSlotList> _slotsInOrder; - -public: - UniqueSlotGenerator(const MemFile& memFile); - - uint32_t getNumUnique(DocumentPart part) const { - return _slotsInOrder[part].size(); - } - - const SlotList& getSlots(DocumentPart part, uint32_t uniqueIndex) const { - return *_slotsInOrder[part][uniqueIndex]; - } - - void print(std::ostream&, bool verbose, const std::string& indent) const override; - -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h b/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h deleted file mode 100644 index 6da840baea7..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/mapper/versionserializer.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::VersionSerializer - * \ingroup memfile - * - * \brief Super class for file mappers implementing a file format. - * - * An implementation of this handles all specifics of reading and writing - * a file format. - */ - -#pragma once - -#include "buffer.h" -#include "mapperslotoperation.h" -#include <vespa/memfilepersistence/memfile/memfile.h> -#include <vespa/memfilepersistence/memfile/memfileiointerface.h> -#include <vespa/memfilepersistence/common/types.h> - - -namespace storage { -namespace memfile { - -// Avoid circular dependencies -class MemFileEnvironment; -class Options; - -struct VersionSerializer : protected Types { - using UP = std::unique_ptr<VersionSerializer>; - - virtual ~VersionSerializer() {} - - /** Returns the file version this implementation handles. */ - virtual FileVersion getFileVersion() = 0; - - /** - * The MemFileMapper main class reads file header to figure out what version - * it is in. Then loadFile is called on correct implementation to interpret - * the file. The part of the file already read is given to loadFile to avoid - * a re-read of the initial data. - */ - virtual void loadFile(MemFile& file, Environment&, - Buffer& buffer, uint64_t bytesRead) = 0; - - /** - * Flushes all content in MemFile that is altered or not persisted to disk - * to the physical file. This function should not handle file rewriting. If - * updates cannot be done to the existing file it needs to return in case - * we then want to rewrite the file in another format. - * - * Flush must update the following in the MemFile: - * - Update state saying all is persisted and nothing is altered - * - All block position and sizes need to be correct after flush. - * - * @return True if written successfully, false if file rewrite is required. - */ - enum class FlushResult { - ChangesWritten, - TooFewMetaEntries, - TooSmall, - TooLarge, - UnAltered - }; - virtual FlushResult flushUpdatesToFile(MemFile&, Environment&) = 0; - - /** - * This function is typically called when file doesn't already exist or - * flushUpdatesToFile return false, indicating that file need a total - * rewrite. Before calling this function, all data must be cached in the - * MemFile instance. - */ - virtual void rewriteFile(MemFile&, Environment&) = 0; - - /** - * Check file for errors, generate report of errors. Fix if repairErrors - * is set. Returns true if no failures were found or no errors were fixed. - */ - virtual bool verify(MemFile&, Environment&, - std::ostream& errorReport, bool repairErrors, - uint16_t fileVerifyFlags) = 0; - - - /** - * Cache locations into the given buffer. - */ - virtual void cacheLocations(MemFileIOInterface& buffer, - Environment& env, - const Options& options, - DocumentPart part, - const std::vector<DataLocation>& locations) = 0; - -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt deleted file mode 100644 index d3441fdc4bc..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_memfile OBJECT - SOURCES - memslot.cpp - memfile.cpp - slotiterator.cpp - memfilecompactor.cpp - memfilecache.cpp - shared_data_location_tracker.cpp - DEPENDS -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp deleted file mode 100644 index f655192a387..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.cpp +++ /dev/null @@ -1,1117 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfile.h" -#include "memfilecompactor.h" -#include "shared_data_location_tracker.h" -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/memfilepersistence/mapper/simplememfileiobuffer.h> -#include <vespa/memfilepersistence/common/environment.h> -#include <vespa/memfilepersistence/common/exceptions.h> -#include <vespa/document/util/stringutil.h> -#include <vespa/vespalib/util/crc.h> -#include <ext/algorithm> -#include <iomanip> - -#include <vespa/log/bufferedlogger.h> -LOG_SETUP(".persistence.memfile.memfile"); - -namespace { - -template<class A> -std::vector<A> toVector(A entry) { - std::vector<A> entries; - entries.push_back(entry); - return entries; -}; - -} - -#define FAIL_INCONSISTENT(msg, slot) \ -{ \ - std::ostringstream error; \ - error << msg; \ - throw InconsistentSlotException(slot.toString() + ": " + error.str(), \ - _file, slot, VESPA_STRLOC); \ -} -#define FAIL_INCONSISTENT_FILE(msg) \ -{ \ - std::ostringstream error; \ - error << msg; \ - throw InconsistentException(error.str(), _file, VESPA_STRLOC); \ -} - -#define RETHROW_NON_MEMFILE_EXCEPTIONS \ - catch (MemFileException& exceptionToRethrow) { \ - throw; \ - } catch (vespalib::IoException& exceptionToRethrow) { \ - std::ostringstream wrappedMessage; \ - wrappedMessage << "Got IO exception while processing within " \ - << "memfile. Wrapping in memfile exception: "; \ - const std::string& sourceExceptionMessage( \ - exceptionToRethrow.getMessage()); \ - size_t pos = sourceExceptionMessage.find(':'); \ - wrappedMessage << sourceExceptionMessage.substr(pos + 2); \ - throw MemFileIoException(wrappedMessage.str(), _file, \ - exceptionToRethrow.getType(), VESPA_STRLOC) \ - .setCause(exceptionToRethrow); \ - } catch (vespalib::Exception& exceptionToRethrow) { \ - throw MemFileWrapperException( \ - "Got generic exception while processing within " \ - "memfile. Wrapping in memfile exception: " \ - + std::string(exceptionToRethrow.getMessage()), \ - _file, VESPA_STRLOC).setCause(exceptionToRethrow); \ - } - -namespace storage { -namespace memfile { - -MemFile::MemFile(const FileSpecification& file, - Environment& env, - const LoadOptions& opts) - : _flags(BUCKET_INFO_OUTDATED), - _info(), - _entries(), - _file(file), - _currentVersion(UNKNOWN), - _env(env) -{ - try{ - env._memFileMapper.loadFile(*this, env, opts.autoRepair); - } RETHROW_NON_MEMFILE_EXCEPTIONS; -} - -MemFile::~MemFile() {} - -MemFile::MemFile(const FileSpecification& file, Environment& env, - bool callLoadFile) - : _flags(BUCKET_INFO_OUTDATED), - _info(), - _entries(), - _file(file), - _currentVersion(UNKNOWN), - _env(env) -{ - if (callLoadFile) { - env._memFileMapper.loadFile(*this, env, false); - } -} - -void -MemFile::verifyConsistent() const -{ - _buffer->verifyConsistent(); -} - -uint16_t -MemFile::getDisk() const -{ - return _file.getDirectory().getIndex(); -} - -void -MemFile::move(const FileSpecification& file) -{ - // Any given bucket can either be moved to a more specific or less - // specific bucket in the same subtree. - assert(file.getBucketId().contains(_file.getBucketId()) - || _file.getBucketId().contains(file.getBucketId())); - _buffer->move(file); - _file = file; -} - -uint32_t -MemFile::getSlotCount() const -{ - return _entries.size(); -} - -const MemSlot* -MemFile::getSlotWithId(const document::DocumentId& id, - framework::MicroSecTime maxTimestamp) const -{ - for (uint32_t n=_entries.size(), i=n-1; i<n; --i) { - if (_entries[i].getTimestamp() > maxTimestamp) continue; - if (id.getGlobalId() != _entries[i].getGlobalId()) continue; - if (getDocumentId(_entries[i]) == id) return &_entries[i]; - } - return 0; -} - -namespace { - -struct MemSlotTimestampPredicate -{ - bool operator()(const MemSlot& a, Types::Timestamp time) const - { - return a.getTimestamp() < time; - } -}; - -} - -const MemSlot* -MemFile::getSlotAtTime(Timestamp time) const -{ - std::vector<MemSlot>::const_iterator it( - std::lower_bound(_entries.begin(), _entries.end(), - time, MemSlotTimestampPredicate())); - if (it != _entries.end() && it->getTimestamp() == time) { - return &*it; - } - return 0; -} - -void -MemFile::getSlotsByTimestamp( - const std::vector<Timestamp>& timestamps, - std::vector<const MemSlot*>& returned) const -{ - assert(__gnu_cxx::is_sorted(timestamps.begin(), timestamps.end())); - - std::size_t source = 0; - std::size_t target = 0; - - while (source < _entries.size() && target < timestamps.size()) { - if (_entries[source].getTimestamp() == timestamps[target]) { - returned.push_back(&_entries[source]); - ++source; - ++target; - } else if (_entries[source].getTimestamp() < timestamps[target]) { - ++source; - } else { - ++target; - } - } -} - -document::Document::UP -MemFile::getDocument(const MemSlot& slot, GetFlag getFlag) const -{ - LOG(spam, - "%s: getDocument(%s, %s)", - _file.getBucketId().toString().c_str(), - slot.toString().c_str(), - getFlag == HEADER_ONLY ? "header only" : "full document"); - ensureDocumentCached(slot, getFlag == HEADER_ONLY); - - auto& repo = _env.repo(); - Document::UP doc = _buffer->getDocumentHeader( - repo, slot.getLocation(HEADER)); - - if (doc.get() && getFlag == ALL && slot.getLocation(BODY)._size > 0) { - _buffer->readBody(repo, slot.getLocation(BODY), *doc); - } - - return doc; -} - -document::DocumentId -MemFile::getDocumentId(const MemSlot& slot) const -{ - LOG(spam, - "%s: getDocumentId(%s)", - _file.getBucketId().toString().c_str(), - slot.toString().c_str()); - ensureDocumentCached(slot, true); - - return _buffer->getDocumentId(slot.getLocation(HEADER)); -} - -void -MemFile::assertSlotContainedInThisBucket(const MemSlot& slot) const -{ - document::BucketId fileBucket(getBucketId()); - // Non-orderdoc documents should pass this first (very cheap) test. - if (slot.getGlobalId().containedInBucket(fileBucket)) { - return; - } - // Expensive path: get doc id and check against it instead. - DocumentId id(getDocumentId(slot)); - document::BucketIdFactory factory; - document::BucketId slotBucket(factory.getBucketId(id)); - - LOG(spam, - "%s: slot %s has GID not contained in bucket, checking against id %s", - fileBucket.toString().c_str(), - slot.toString().c_str(), - id.toString().c_str()); - - if (!fileBucket.contains(slotBucket)) { - LOG(error, - "Slot %s with document ID %s is not contained in %s. Terminating " - "in order to avoid bucket corruption.", - slot.toString().c_str(), - id.toString().c_str(), - fileBucket.toString().c_str()); - assert(false); - } -} - -void -MemFile::addPutSlot(const Document& doc, Timestamp time) -{ - DataLocation headerLoc = _buffer->addHeader(doc); - DataLocation bodyLoc = _buffer->addBody(doc); - - addSlot(MemSlot(doc.getId().getGlobalId(), - time, - headerLoc, - bodyLoc, - IN_USE | CHECKSUM_OUTDATED, - 0)); -} - -void -MemFile::addUpdateSlot(const Document& header, const MemSlot& body, Timestamp time) -{ - if (!body.getLocation(BODY).valid()) { - LOG(error, - "Slot %s has invalid body location while not " - "having body cached. This is an invalid state.", - body.toString().c_str()); - assert(false); - } - - DataLocation headerLoc = _buffer->addHeader(header); - DataLocation bodyLoc = body.getLocation(BODY); - - addSlot(MemSlot(header.getId().getGlobalId(), - time, - headerLoc, - bodyLoc, - IN_USE | CHECKSUM_OUTDATED, - 0)); -} - -void -MemFile::addRemoveSlot(const MemSlot& header, Timestamp time) -{ - addSlot(MemSlot(header.getGlobalId(), - time, - header.getLocation(HEADER), - DataLocation(0,0), - DELETED | IN_USE | CHECKSUM_OUTDATED, - 0)); -} - -void -MemFile::addRemoveSlotForNonExistingEntry(const DocumentId& docId, - Timestamp time, - RemoveType removeType) -{ - addSlot(MemSlot(docId.getGlobalId(), - time, - _buffer->addDocumentIdOnlyHeader(docId, _env.repo()), - DataLocation(0,0), - DELETED - | IN_USE - | CHECKSUM_OUTDATED - | (removeType == UNREVERTABLE_REMOVE ? DELETED_IN_PLACE : 0), - 0)); -} - -void -MemFile::addSlot(const MemSlot& slot) -{ - LOG(spam, - "%s: adding %s to memfile", - _file.getBucketId().toString().c_str(), - slot.toString().c_str()); - // TODO: Add exception here? - //assert(slot.partAvailable(BODY)); - assert(slot.getLocation(HEADER).valid()); - assert(slot.getLocation(BODY).valid()); - // Don't let full disk block remove entries or entries that - // are already fully persisted - - if (!slot.deleted() - && !slot.deletedInPlace() - && !(partPersisted(slot, HEADER) - && partPersisted(slot, BODY))) - { - verifyDiskNotFull(); - } - - // Optimize common case where slot we're adding has a higher - // timestamp than the last slot already stored. - if (!_entries.empty() - && slot.getTimestamp() > _entries.back().getTimestamp()) - { - _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; - _entries.push_back(slot); - return; - } - - std::vector<MemSlot> entries; - entries.reserve(_entries.size() + 1); - bool inserted = false; - for (uint32_t i=0; i<_entries.size(); ++i) { - if (_entries[i].getTimestamp() == slot.getTimestamp()) { - std::ostringstream err; - err << "Attempt of adding slot at timestamp " - << slot.getTimestamp() << " which already exist in file. " - << "Call modifySlot instead."; - LOG(error, "%s", err.str().c_str()); - assert(false); - } - if (!inserted && _entries[i].getTimestamp() > slot.getTimestamp()) { - inserted = true; - entries.push_back(slot); - } - entries.push_back(_entries[i]); - } - if (!inserted) { - entries.push_back(slot); - } - _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; - _entries.swap(entries); -} - -void -MemFile::copySlot(const MemFile& source, const MemSlot& slot) -{ - addSlot(MemSlot(slot.getGlobalId(), - slot.getTimestamp(), - _buffer->copyCache(*source._buffer, HEADER, slot.getLocation(HEADER)), - _buffer->copyCache(*source._buffer, BODY, slot.getLocation(BODY)), - slot.getFlags(), - slot.getChecksum())); -} - -class MemFile::MemFileBufferCacheCopier : public BufferCacheCopier -{ -public: - MemFileBufferCacheCopier(MemFile& target, const MemFile& source) - : _target(target), - _source(source) - { - } - -private: - DataLocation doCopyFromSourceToLocal( - Types::DocumentPart part, - DataLocation sourceLocation) override - { - return _target._buffer->copyCache( - *_source._buffer, part, sourceLocation); - } - - MemFile& _target; - const MemFile& _source; -}; - -void -MemFile::copySlotsFrom( - const MemFile& source, - const std::vector<const MemSlot*>& sourceSlots) -{ - // TODO we probably want a pre-allocation hint here to avoid many mmaps - MemFileBufferCacheCopier cacheCopier(*this, source); - SharedDataLocationTracker headerTracker(cacheCopier, HEADER); - SharedDataLocationTracker bodyTracker(cacheCopier, BODY); - - for (auto slot : sourceSlots) { - auto headerLoc = headerTracker.getOrCreateSharedLocation( - slot->getLocation(HEADER)); - auto bodyLoc = bodyTracker.getOrCreateSharedLocation( - slot->getLocation(BODY)); - addSlot(MemSlot(slot->getGlobalId(), - slot->getTimestamp(), - headerLoc, - bodyLoc, - slot->getFlags(), - slot->getChecksum())); - } -} - -void -MemFile::removeSlot(const MemSlot& slot) -{ - _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; - std::vector<MemSlot>::iterator it( - std::lower_bound(_entries.begin(), _entries.end(), - slot.getTimestamp(), - MemSlotTimestampPredicate())); - if (it != _entries.end() - && it->getTimestamp() == slot.getTimestamp()) - { - _entries.erase(it); - } else { - LOG(error, - "Attempted to remove a slot that does not exist: %s", - slot.toString().c_str()); - assert(false); - } -} - -void -MemFile::removeSlots(const std::vector<const MemSlot*>& slotsToRemove) -{ - if (slotsToRemove.empty()) return; - // Optimized way of removing slots. Should not throw exceptions, - // (and is not exception safe) - std::vector<MemSlot> slots( - _entries.size() - slotsToRemove.size(), - MemSlot(GlobalId(), Timestamp(0), DataLocation(), DataLocation(), - 0, 0)); - uint32_t r=0; - for (uint32_t i=0,j=0; i<_entries.size(); ++i) { - if (r >= slotsToRemove.size() || slotsToRemove[r] != &_entries[i]) { - _entries[i].swap(slots[j]); - ++j; - } else { - ++r; - } - } - _entries.swap(slots); - if (_entries.size() != slots.size()) { - _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; - } - // Verify that we found all slots to remove - if (r < slotsToRemove.size()) { - Timestamp ts(0); - for (uint32_t i=0; i<slotsToRemove.size(); ++i) { - assert(slotsToRemove[i]->getTimestamp() > ts); - ts = slotsToRemove[i]->getTimestamp(); - } - LOG(error, - "Slot %s wasn't in the file. Only existing slots may be " - "given to removeSlots as non-existing slot stops other " - "slots from being removed.", - slotsToRemove[r]->toString().c_str()); - assert(false); - } -} - -void -MemFile::modifySlot(const MemSlot& slot) -{ - _flags |= BUCKET_INFO_OUTDATED | SLOTS_ALTERED; - // MemSlot actually pointed to by const MemSlot* is non-const - // in entries-vector, so this should be well defined according - // to the C++ ISO standard - MemSlot* slotToModify = const_cast<MemSlot*>( - getSlotAtTime(slot.getTimestamp())); - - assert(slotToModify != NULL); - - LOG(spam, "Modifying %s -> %s", - slotToModify->toString().c_str(), - slot.toString().c_str()); - *slotToModify = slot; -} - -void -MemFile::matchLocationWithFlags(LocationMap& result, - DocumentPart part, - const MemSlot* slot, - uint32_t flags) const -{ - DataLocation loc = slot->getLocation(part); - bool isPersisted = _buffer->isPersisted(loc, part); - - if ((flags & NON_PERSISTED_LOCATIONS) && !isPersisted) { - result[loc].slots.push_back(slot); - } else if ((flags & PERSISTED_LOCATIONS) && isPersisted) { - result[loc].slots.push_back(slot); - } -} - -void -MemFile::getLocations(LocationMap& headers, - LocationMap& bodies, - uint32_t flags) const -{ - for (uint32_t i = 0; i < _entries.size(); ++i) { - matchLocationWithFlags(headers, HEADER, &_entries[i], flags); - matchLocationWithFlags(bodies, BODY, &_entries[i], flags); - } -} - -bool -MemFile::compact() -{ - auto options = _env.acquireConfigReadLock().options(); - MemFileCompactor compactor( - _env._clock.getTimeInMicros(), - CompactionOptions() - .revertTimePeriod(options->_revertTimePeriod) - .keepRemoveTimePeriod(options->_keepRemoveTimePeriod) - .maxDocumentVersions(options->_maxDocumentVersions)); - std::vector<const MemSlot*> slotsToRemove( - compactor.getSlotsToRemove(*this)); - removeSlots(slotsToRemove); - return !slotsToRemove.empty(); -} - -MemFile::const_iterator -MemFile::begin(uint32_t iteratorFlags, - Timestamp fromTimestamp, - Timestamp toTimestamp) const -{ - if (iteratorFlags & ITERATE_GID_UNIQUE) { - return const_iterator(SlotIterator::CUP(new GidUniqueSlotIterator( - *this, iteratorFlags & ITERATE_REMOVED, - fromTimestamp, toTimestamp))); - } else { - return const_iterator(SlotIterator::CUP(new AllSlotsIterator( - *this, iteratorFlags & ITERATE_REMOVED, - fromTimestamp, toTimestamp))); - } -} - -void -MemFile::ensureDocumentIdCached(const MemSlot& slot) const -{ - _buffer->ensureCached(_env, HEADER, toVector(slot.getLocation(HEADER))); -} - -void -MemFile::ensureDocumentCached(const MemSlot& slot, bool headerOnly) const -{ - _buffer->ensureCached(_env, HEADER, toVector(slot.getLocation(HEADER))); - if (!headerOnly) { - _buffer->ensureCached(_env, BODY, toVector(slot.getLocation(BODY))); - } -} - -void -MemFile::ensureDocumentCached(const std::vector<Timestamp>& timestamps, - bool headerOnly) const -{ - LOG(spam, "ensureDocumentCached with %zu timestamps", - timestamps.size()); - if (!fileExists()) { - return; - } - try{ - std::vector<const MemSlot*> slots; - getSlotsByTimestamp(timestamps, slots); - - std::vector<DataLocation> headerLocations; - headerLocations.reserve(timestamps.size()); - std::vector<DataLocation> bodyLocations; - if (!headerOnly) { - bodyLocations.reserve(timestamps.size()); - } - for (uint32_t i = 0; i < slots.size(); ++i) { - headerLocations.push_back(slots[i]->getLocation(HEADER)); - - if (!headerOnly) { - bodyLocations.push_back(slots[i]->getLocation(BODY)); - } - } - - _buffer->ensureCached(_env, HEADER, headerLocations); - if (!headerOnly) { - _buffer->ensureCached(_env, BODY, bodyLocations); - } - } RETHROW_NON_MEMFILE_EXCEPTIONS; -} - -void -MemFile::ensureEntriesCached(bool includeBody) const -{ - if (!fileExists()) { - return; - } - - try{ - std::vector<DataLocation> headerLocations; - std::vector<DataLocation> bodyLocations; - - for (uint32_t i = 0; i < _entries.size(); ++i) { - headerLocations.push_back(_entries[i].getLocation(HEADER)); - - if (includeBody) { - bodyLocations.push_back(_entries[i].getLocation(BODY)); - } - } - - _buffer->ensureCached(_env, HEADER, headerLocations); - if (includeBody) { - _buffer->ensureCached(_env, BODY, bodyLocations); - } - } RETHROW_NON_MEMFILE_EXCEPTIONS; -} - -void -MemFile::ensureHeaderBlockCached() const -{ - ensureEntriesCached(false); -} - -void -MemFile::ensureBodyBlockCached() const -{ - ensureEntriesCached(true); -} - -/** - * Functionally this is the same as ensureBodyBlockCached, but with - * clearer semantics. - */ -void -MemFile::ensureHeaderAndBodyBlocksCached() const -{ - ensureEntriesCached(true); -} - -bool -MemFile::documentIdAvailable(const MemSlot& slot) const -{ - return partAvailable(slot, HEADER); -} - -bool -MemFile::partAvailable(const MemSlot& slot, DocumentPart part) const -{ - return _buffer->isCached(slot.getLocation(part), part); -} - -bool -MemFile::partPersisted(const MemSlot& slot, DocumentPart part) const -{ - assert(_buffer.get()); - - return _buffer->isPersisted(slot.getLocation(part), part); -} - -uint32_t -MemFile::getSerializedSize(const MemSlot& slot, DocumentPart part) const { - DataLocation loc = slot.getLocation(part); - return _buffer->getSerializedSize(part, loc); -} - -const Types::BucketInfo& -MemFile::getBucketInfo() const -{ - if (_flags & BUCKET_INFO_OUTDATED) { - uint32_t uniqueCount = 0, uniqueSize = 0, usedSize = 0; - uint32_t checksum = 0; - - typedef vespalib::hash_set<GlobalId, GlobalId::hash> SeenMap; - SeenMap seen(_entries.size() * 2); - uint32_t maxHeaderExtent = 0, maxBodyExtent = 0; - - MemSlotVector::const_reverse_iterator e(_entries.rend()); - for (MemSlotVector::const_reverse_iterator it(_entries.rbegin()); - it != e; ++it) - { - const MemSlot& slot(*it); - // We now always write sequentially within the blocks, so used size - // for one block is effectively the max location extent seen within - // it. - maxHeaderExtent = std::max(maxHeaderExtent, - slot.getLocation(HEADER)._pos - + slot.getLocation(HEADER)._size); - maxBodyExtent = std::max(maxBodyExtent, - slot.getLocation(BODY)._pos - + slot.getLocation(BODY)._size); - - SeenMap::insert_result inserted(seen.insert(slot.getGlobalId())); - if (!inserted.second) { - continue; - } - if (slot.deleted()) continue; - - const uint32_t slotSize = slot.getLocation(HEADER)._size - + slot.getLocation(BODY)._size; - uniqueSize += slotSize; - ++uniqueCount; - - vespalib::crc_32_type calculator; - calculator.process_bytes(slot.getGlobalId().get(), - GlobalId::LENGTH); - Timestamp time = slot.getTimestamp(); - calculator.process_bytes(&time, sizeof(Timestamp)); - checksum ^= calculator.checksum(); - } - - if (uniqueCount > 0 && checksum < 2) { - checksum += 2; - } - - // Only set used size if we have any entries at all. - if (!_entries.empty()) { - usedSize = 64 + 40 * _entries.size() - + maxHeaderExtent + maxBodyExtent; - } - - spi::BucketInfo info(spi::BucketChecksum(checksum), - uniqueCount, - uniqueSize, - _entries.size(), - usedSize, - BucketInfo::READY, - BucketInfo::NOT_ACTIVE); - - _info = info; - _flags &= ~BUCKET_INFO_OUTDATED; - } - return _info; -} - -void -MemFile::flushToDisk(FlushFlag flag) -{ - if ((flag == CHECK_NON_DIRTY_FILE_FOR_SPACE) || (_flags & SLOTS_ALTERED)) { - LOG(spam, "Flushing %s to disk since flags is %x", toString().c_str(), _flags); - try{ - _env._memFileMapper.flush(*this, _env); - } RETHROW_NON_MEMFILE_EXCEPTIONS; - } else { - LOG(spam, "Not flushing %s as it is not altered", toString().c_str()); - } - - // For now, close all files after done flushing, to avoid getting - // too many open at the same time. Later cache may cache limited - // amount of file handles - getMemFileIO().close(); -} - -void -MemFile::clearCache(DocumentPart part) -{ - _buffer->clear(part); - if (part == HEADER) { - _cacheSizeOverride.headerSize = 0; - } else { - _cacheSizeOverride.bodySize = 0; - } -} - -bool -MemFile::repair(std::ostream& errorReport, uint32_t verifyFlags) -{ - try{ - return _env._memFileMapper.repair( - *this, _env, errorReport, verifyFlags); - } RETHROW_NON_MEMFILE_EXCEPTIONS; -} - -void -MemFile::resetMetaState() -{ - LOG(debug, "Resetting meta state for MemFile"); - _flags = BUCKET_INFO_OUTDATED; - _currentVersion = UNKNOWN; - _info = BucketInfo(); - _entries.clear(); -} - -MemSlot::MemoryUsage -MemFile::getCacheSize() const -{ - assert(_buffer.get()); - - if (_cacheSizeOverride.sum() > 0) { - return _cacheSizeOverride; - } - - MemSlot::MemoryUsage retVal; - retVal.metaSize = sizeof(MemSlot) * _entries.size(); - retVal.headerSize += _buffer->getCachedSize(HEADER); - retVal.bodySize += _buffer->getCachedSize(BODY); - return retVal; -} - -void -MemFile::verifyDiskNotFull() -{ - const double maxFillRate( - _env.acquireConfigReadLock().options()->_diskFullFactor); - - Directory& dir = _file.getDirectory(); - - if (dir.getPartition().getMonitor() == 0) { - LOG(warning, "No partition monitor found for directory %s. Skipping " - "disk full test.", dir.toString(true).c_str()); - } else if (dir.isFull(0, maxFillRate)) { - std::ostringstream token; - token << dir << " is full"; - std::ostringstream ost; - ost << "Disallowing operation on file " << getFile().getPath() - << " because disk is or would be " - << (100 * dir.getPartition().getMonitor() - ->getFillRate()) << " % full, which is " - << "more than the max setting of " - << 100 * maxFillRate << " % full." - << " (Note that this may be both due to space or inodes. " - << "Check \"df -i\" too if manually checking)" - << " (" << dir.toString(true) << ")"; - LOGBT(warning, token.str(), "%s", ost.str().c_str()); - throw vespalib::IoException( - ost.str(), vespalib::IoException::NO_SPACE, VESPA_STRLOC); - } else { - LOG(spam, "Disk will only be %f %% full after operation, which " - "is below limit of %f %%; allowing it to go through.", - 100.0 * dir.getPartition().getMonitor() - ->getFillRate(), - 100.0 * maxFillRate); - } -} - -bool -MemFile::operator==(const MemFile& other) const -{ - if (_info == other._info && - _entries.size() == other._entries.size() && - _file == other._file && - _currentVersion == other._currentVersion) - { - for (uint32_t i=0, n=_entries.size(); i<n; ++i) { - if (_entries[i] != other._entries[i]) return false; - } - return true; - } - return false; -} - -namespace { - void printMemFlags(std::ostream& out, uint32_t flags) { - bool anyPrinted = false; - for (uint32_t val=1,i=1; i<=32; ++i, val *= 2) { - if (flags & val) { - if (anyPrinted) { out << "|"; } - anyPrinted = true; - const char* name = Types::getMemFileFlagName( - static_cast<Types::MemFileFlag>(val)); - if (strcmp(name, "INVALID") == 0) { - out << "INVALID(" << std::hex << val << std::dec << ")"; - } else { - out << name; - } - } - } - if (!anyPrinted) out << "none"; - } -} - -void -MemFile::printHeader(std::ostream& out, bool verbose, - const std::string& indent) const -{ - if (!verbose) { - out << "MemFile(" << _file.getBucketId() << ", dir " - << _file.getDirectory().getIndex(); - } else { - out << "MemFile(" << _file.getBucketId() - << "\n" << indent << " Path(\"" - << _file.getPath() << "\")" - << "\n" << indent << " Wanted version(" - << Types::getFileVersionName(_file.getWantedFileVersion()) - << "(" << std::hex << _file.getWantedFileVersion() << "))" - << "\n" << indent << " Current version(" - << Types::getFileVersionName(_currentVersion) - << "(" << std::hex << _currentVersion << "))" - << "\n" << indent << " " << getBucketInfo() - << "\n" << indent << " Flags "; - printMemFlags(out, _flags); - - if (_formatData.get()) { - out << "\n" << indent << " " << _formatData->toString(); - } - } -} - -void -MemFile::printEntries(std::ostream& out, bool verbose, - const std::string& indent) const -{ - if (verbose && !_entries.empty()) { - out << ") {"; - for (uint32_t i=0; i<_entries.size(); ++i) { - out << "\n" << indent << " "; - print(_entries[i], out, false, indent + " "); - } - out << "\n" << indent << "}"; - } else { - out << ", " << _entries.size() << " entries)"; - } -} - -void -MemFile::printEntriesState(std::ostream& out, bool verbose, - const std::string& indent) const -{ - for (uint32_t i=0; i<_entries.size(); ++i) { - if (verbose) { - printUserFriendly(_entries[i], out, indent); - } else { - print(_entries[i], out, false, indent); - } - out << "\n" << indent; - } - const SimpleMemFileIOBuffer& ioBuf( - static_cast<const SimpleMemFileIOBuffer&>(getMemFileIO())); - const FileInfo& fileInfo(ioBuf.getFileInfo()); - - unsigned int emptyCount = fileInfo._metaDataListSize - _entries.size(); - if (emptyCount > 0) { - out << std::dec << emptyCount << " empty entries.\n" << indent; - } -} - -void -MemFile::print(std::ostream& out, bool verbose, - const std::string& indent) const -{ - printHeader(out, verbose, indent); - printEntries(out, verbose, indent); -} - -void -MemFile::printUserFriendly(const MemSlot& slot, - std::ostream& out, - const std::string& indent) const -{ - out << "MemSlot(" << slot.getGlobalId().toString() - << std::setfill(' ') - << std::dec << "\n" - << indent << " Header pos: " - << std::setw(10) << slot.getLocation(HEADER)._pos - << " - " << std::setw(10) << slot.getLocation(HEADER)._size - << ", Body pos: " << std::setw(10) << slot.getLocation(BODY)._pos - << " - " << std::setw(10) << slot.getLocation(BODY)._size << "\n" << indent - << " Timestamp: " << slot.getTimestamp().toString() - << " (" << slot.getTimestamp().getTime() << ")\n" - << indent << " Checksum: 0x" - << std::hex << std::setw(4) << slot.getChecksum() - << std::setfill(' ') << "\n" << indent << " Flags: 0x" - << std::setw(4) << slot.getFlags(); - std::list<std::string> flags; - - if ((slot.getFlags() & IN_USE) == 0) flags.push_back("NOT IN USE"); - if ((slot.getFlags() & DELETED) != 0) flags.push_back("DELETED"); - if ((slot.getFlags() & DELETED_IN_PLACE) != 0) flags.push_back("DELETED_IN_PLACE"); - if ((slot.getFlags() & CHECKSUM_OUTDATED) != 0) flags.push_back("CHECKSUM_OUTDATED"); - - for (std::list<std::string>::iterator it = flags.begin(); - it != flags.end(); ++it) - { - out << ", " << *it; - } - - const document::DocumentId id = getDocumentId(slot); - - out << "\n" << indent << " Name: " << id; - document::BucketIdFactory factory; - document::BucketId bucket( - factory.getBucketId( - document::DocumentId(id))); - out << "\n" << indent << " Bucket: " << bucket; - out << ")"; -} - -void -MemFile::print(const MemSlot& slot, - std::ostream& out, - bool verbose, - const std::string& indent) const -{ - if (verbose) { - out << "MemSlot("; - } - out << std::dec << slot.getTimestamp() << ", " << slot.getGlobalId().toString() << ", h " - << slot.getLocation(HEADER)._pos << " - " << slot.getLocation(HEADER)._size << ", b " - << slot.getLocation(BODY)._pos << " - " << slot.getLocation(BODY)._size << ", f " - - << std::hex << slot.getFlags() << ", c " << slot.getChecksum() - << ", C(" << (documentIdAvailable(slot) ? "D" : "") - << (partAvailable(slot, HEADER) ? "H" : "") - << (partAvailable(slot, BODY) ? "B" : "") - << ")"; - if (verbose) { - out << ") {"; - if (documentIdAvailable(slot)) { - out << "\n" << indent << " "; - - getDocument(slot, ALL) - ->print(out, true, indent + " "); - } else { - out << "\n" << indent << " Nothing cached beyond metadata."; - } - out << "\n" << indent << "}"; - } -} - -void -MemFile::printState(std::ostream& out, bool userFriendlyOutput, - bool printBody, bool printHeader2, - //SlotFile::MetaDataOrder order, - const std::string& indent) const -{ - const SimpleMemFileIOBuffer& ioBuf( - static_cast<const SimpleMemFileIOBuffer&>(getMemFileIO())); - const FileInfo& fileInfo(ioBuf.getFileInfo()); - - out << "\n" << indent << "Filename: '" << getFile().getPath() << "'"; - if (!fileExists()) { - out << " (non-existing)"; - return; - } else if (ioBuf.getFileHandle().isOpen()) { - out << " (fd " << ioBuf.getFileHandle().getFileDescriptor() << ")"; - } - out << "\n"; - - uint32_t filesize = ioBuf.getFileHandle().getFileSize(); - out << "Filesize: " << filesize << "\n"; - Buffer buffer(filesize); - char* buf = buffer.getBuffer(); - uint32_t readBytes = ioBuf.getFileHandle().read(buf, filesize, 0); - if (readBytes != filesize) { - out << "Failed to read whole file of size " << filesize - << ". Adjusting file size to " << readBytes - << " we managed to read."; - filesize = readBytes; - } - - const Header* header(reinterpret_cast<const Header*>(buf)); - header->print(out); - out << "\n" << indent; - - if (filesize < fileInfo.getHeaderBlockStartIndex()) - { - out << "File not big enough to contain all " - << fileInfo._metaDataListSize << " meta data entries.\n" - << indent; - } else { - printEntriesState(out, userFriendlyOutput, indent); - } - - if (filesize < fileInfo.getBodyBlockStartIndex()) - { - out << "File not big enough to contain the whole " - << fileInfo._headerBlockSize << " byte header block.\n" << indent; - } else { - out << "Header block: (" << std::dec << fileInfo._headerBlockSize - << "b)"; - if (printHeader2) { - const char* start = &buf[0] + fileInfo.getHeaderBlockStartIndex(); - out << "\n" << indent; - document::StringUtil::printAsHex( - out, start, fileInfo._headerBlockSize, 16, false); - } - out << "\n" << indent; - } - - if (filesize < fileInfo.getFileSize()) - { - out << "File not big enough to contain the whole " - << fileInfo._bodyBlockSize << " byte content block.\n" << indent; - } else { - out << "Content block: (" << std::dec << fileInfo._bodyBlockSize << "b)"; - if (printBody) { - const char* start = &buf[0] + fileInfo.getBodyBlockStartIndex(); - out << "\n" << indent; - document::StringUtil::printAsHex( - out, start, fileInfo._bodyBlockSize, 16, false); - } - out << "\n" << indent; - } -} - - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h deleted file mode 100644 index 449fcf56258..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfile.h +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::MemFile - * \ingroup memfile - * - * \brief Class representing a file storing documents in slots. - * - * This is a memory representation of the file, that isn't constricted by the - * actual file format. - * - * A memfile must contains: - * - A header with generic information about the file, including version of - * file format. - * - * A memfile may also contain: - * - Cached meta data describing contents. - * - Cached document header content. - * - Cached document body content. - * - * The representation knows what parts of it that are persisted in a file, and - * what parts exist only in memory. - * - * For ease of use, information is loaded into the cache automatically by the - * MemFile implementation. Thus, the memfile needs a pointer to the file mapper - * implementation. - */ - -#pragma once - -#include "memslot.h" -#include "slotiterator.h" -#include "memfileiointerface.h" -#include <vespa/memfilepersistence/common/filespecification.h> -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/vespalib/io/fileutil.h> - -namespace storage { -namespace memfile { - -class Environment; -//class SlotFileV1SerializerTest; - -class MemFile : private Types, - public vespalib::Printable -{ -public: - struct FormatSpecificData { - typedef std::unique_ptr<FormatSpecificData> UP; - - virtual ~FormatSpecificData() {} - - virtual std::string toString() const = 0; - }; - - typedef IteratorWrapper const_iterator; - - struct LocationContent { - std::vector<const MemSlot*> slots; - }; - typedef std::map<DataLocation, LocationContent> LocationMap; - typedef std::vector<MemSlot> MemSlotVector; - -private: - void verifyDiskNotFull(); - - mutable uint32_t _flags; - mutable BucketInfo _info; - MemFileIOInterface::UP _buffer; - MemSlotVector _entries; - FileSpecification _file; - mutable FileVersion _currentVersion; - Environment& _env; - mutable FormatSpecificData::UP _formatData; - MemSlot::MemoryUsage _cacheSizeOverride; - - friend class MemFilePtr; - friend class MemCacheTest; - class MemFileBufferCacheCopier; - - /** - * Verify tests need to be able to create memfiles that hasn't called - * loadfile, and possibly call loadFile without autorepair set. Such - * memfiles are invalid as many functions require header+metadata to be - * cached. Should only be used for unit tests. - */ - friend class MemFileV1VerifierTest; - MemFile(const FileSpecification&, Environment&, bool callLoadFile); - - // Ensures that all entries are cached. - // If includeBody is true, caches the body as well. - void ensureEntriesCached(bool includeBody) const; - - // Put the given location in the result map if the - // location is persisted according to the given flags. - void matchLocationWithFlags(LocationMap& result, - DocumentPart part, - const MemSlot* slot, - uint32_t flags) const; - -public: - struct LoadOptions { - bool autoRepair; - LoadOptions() : autoRepair(true) - {} - }; - - MemFile(const FileSpecification& spec, - Environment& env, - const LoadOptions& opts = LoadOptions()); - ~MemFile(); - - const FileSpecification& getFile() const { return _file; } - const document::BucketId& getBucketId() const noexcept { - return _file.getBucketId(); - } - FileVersion getCurrentVersion() const { return _currentVersion; } - - bool empty() const { return _entries.empty(); } - bool fileExists() const { return (_flags & FILE_EXIST); } - bool headerBlockCached() const { return (_flags & HEADER_BLOCK_READ); } - bool bodyBlockCached() const { return (_flags & BODY_BLOCK_READ); } - bool slotsAltered() const { return _flags & SLOTS_ALTERED; } - - /** - * Called by the mapper when it has to call loadFile a second - * time due to corruption repairs. Must NOT be called by anyone - * else! - */ - void resetMetaState(); - - void verifyConsistent() const; - - /** Moves the physical file on disk (if any) to the new file name. */ - void move(const FileSpecification& newFileName); - - uint16_t getDisk() const; - - FormatSpecificData* getFormatSpecificData() const - { return _formatData.get(); } - void setFormatSpecificData(FormatSpecificData::UP d) const - { _formatData = std::move(d); } - void setCurrentVersion(FileVersion ver) const { _currentVersion = ver; } - - uint32_t getSlotCount() const; - const MemSlot& operator[](uint32_t index) const { return _entries[index]; } - const MemSlot* getSlotWithId(const document::DocumentId&, - Timestamp maxTimestamp = MAX_TIMESTAMP) const; - const MemSlot* getSlotAtTime(Timestamp) const; - - void getSlotsByTimestamp(const std::vector<Timestamp>&, - std::vector<const MemSlot*>& returned) const; - - // Get flags are defined in types.h (GetFlag) - Document::UP getDocument(const MemSlot& slot, GetFlag getFlag) const; - - document::DocumentId getDocumentId(const MemSlot& slot) const; - - /** - * Returns the number of bytes required by this memfile while - * in cache. - * - * @return Returns the cache size. - */ - MemSlot::MemoryUsage getCacheSize() const; - - void addPutSlot(const Document& doc, Timestamp time); - - void addUpdateSlot(const Document& header, - const MemSlot& body, - Timestamp time); - - void addRemoveSlot(const MemSlot& header, Timestamp time); - - enum RemoveType - { - REGULAR_REMOVE, - UNREVERTABLE_REMOVE - }; - - void addRemoveSlotForNonExistingEntry(const DocumentId& docId, - Timestamp time, - RemoveType removeType); - - void addSlot(const MemSlot&); - void removeSlot(const MemSlot&); - - void setMemFileIO(MemFileIOInterface::UP buffer) { - _buffer = std::move(buffer); - } - MemFileIOInterface& getMemFileIO() { return *_buffer; } - const MemFileIOInterface& getMemFileIO() const { return *_buffer; } - - void getLocations(LocationMap& headers, - LocationMap& bodies, - uint32_t flags) const; - - /** - * Copies a slot from another memfile. - */ - void copySlot(const MemFile& source, const MemSlot&); - - void copySlotsFrom(const MemFile& source, - const std::vector<const MemSlot*>& sourceSlots); - - /** Remove given slots. Slots must exist and be in rising timestamp order */ - void removeSlots(const std::vector<const MemSlot*>&); - void modifySlot(const MemSlot&); - - void setFlag(uint32_t flags) { - verifyLegalFlags(flags, LEGAL_MEMFILE_FLAGS, "MemFile::setFlag"); - _flags |= flags; - } - - void clearFlag(uint32_t flags) { - verifyLegalFlags(flags, LEGAL_MEMFILE_FLAGS, "MemFile::clearFlags"); - _flags &= ~flags; - } - - /** - * Removes entries overwritten after revert time period and remove - * entries older than keep remove period. - * - * @return True if anything was compacted - */ - bool compact(); - - const_iterator begin(uint32_t iteratorFlags = 0, - Timestamp fromTimestamp = UNSET_TIMESTAMP, - Timestamp toTimestamp = UNSET_TIMESTAMP) const; - - const_iterator end() const { return const_iterator(); } - - void ensureDocumentIdCached(const MemSlot&) const; - void ensureDocumentCached(const MemSlot&, bool headerOnly) const; - void ensureHeaderBlockCached() const; - void ensureBodyBlockCached() const; - void ensureHeaderAndBodyBlocksCached() const; - void ensureDocumentCached(const std::vector<Timestamp>&, - bool headerOnly) const; - - /** - * Assert that a given slot is contained in the bucket this MemFile has - * been created for (i.e. output of getBucketId()). In the common case, - * only the slot GID will be consulted, but in the case of orderdoc docs - * the document ID may have to be fetched. - * - * Precondition: `slot` must have its data blocks already added to the - * file's buffer cache. This means any fetches of the document ID should - * not require disk access, but will incur cache lookup and heap - * allocation overhead. - * Postcondition: no side effects if `slot` is contained in bucket. Logs - * error and dumps core otherwise. - */ - void assertSlotContainedInThisBucket(const MemSlot& slot) const; - - bool documentIdAvailable(const MemSlot&) const; - bool partAvailable(const MemSlot&, DocumentPart part) const; - bool partPersisted(const MemSlot&, DocumentPart) const; - - uint32_t getSerializedSize(const MemSlot&, DocumentPart part) const; - - /** - * Fetches the bucket info. If metadata is altered, info will be - * recalculated, and bucket database updated. - */ - const BucketInfo& getBucketInfo() const; - - void flushToDisk(FlushFlag flags = NONE); - - void clearCache(DocumentPart part); - - /** - * Repair any errors found in this slotfile. - * If given, stuff error report into given ostream. - * - * @return True if file was fine. False if any errors were repaired. - */ - bool repair(std::ostream& errorReport, uint32_t fileVerifyFlags = 0); - - /** - * Tests for equality of memfiles. Equality requires MemFile to look equal - * for clients. It will not read data from file, so the same parts of the - * file must be cached for objects to be equal. Non-persistent flags need - * not be equal (The same parts need not be persisted to backend files) - * - * Used in unit testing only. - */ - bool operator==(const MemFile& other) const; - - /** Stat wants control of printing of slots. */ - void printHeader(std::ostream& out, bool verbose, - const std::string& indent) const; - void printEntries(std::ostream& out, bool verbose, - const std::string& indent) const; - void printEntriesState(std::ostream& out, bool verbose, - const std::string& indent) const; - void print(std::ostream& out, bool verbose, - const std::string& indent) const override; - - /** Stat wants control of printing of slots. */ - void printUserFriendly(const MemSlot& slot, - std::ostream& out, - const std::string& indent) const; - void print(const MemSlot& slot, - std::ostream& out, bool verbose, - const std::string& indent) const; - - /** Debug function to print state. */ - void printState(std::ostream& out, bool userFriendlyOutput = false, - bool printBody = true, bool printHeader = true, - //MetaDataOrder order = DEFAULT, - const std::string& indent = "") const; -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp deleted file mode 100644 index cfd73d1a7cb..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.cpp +++ /dev/null @@ -1,515 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfilecache.h" -#include <vespa/memfilepersistence/common/environment.h> -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> -#include <vespa/vespalib/util/exception.h> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfile.cache"); - -namespace storage::memfile { - -void -MemFileCache::Entry::setInUse(bool inUse) { - LOG(debug, "Setting in use to %d for file %s", inUse, _file.toString().c_str()); - _inUse = inUse; -} - -void -MemFileCache::returnToCache(MemFileCache::Entry& entry) -{ - // Ensure file descriptor is closed before returning to cache - entry._file.getMemFileIO().close(); - vespalib::LockGuard lock(_cacheLock); - - BucketInfo info(entry._file.getBucketInfo()); - BucketId id(entry._file.getFile().getBucketId()); - - LOG(debug, "%s being returned to cache", id.toString().c_str()); - - MemoryUsage newUsage = entry._file.getCacheSize(); - - if (_cacheLimit.sum() == 0 || newUsage.sum() == 0) { - entry._file.flushToDisk(); - eraseNoLock(id); - return; - } - - // File must be flushed before being returned to the cache. - assert(!entry._file.slotsAltered()); - entry.setInUse(false); - - Entry* ptr = 0; - { - BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); - BucketIdx::iterator it(bucketIdx.find(id)); - assert(it != bucketIdx.end()); - ptr = it->_ptr.get(); - - if (entry._returnToCacheWhenFinished) { - EntryWrapper wrp(it->_ptr, ++_lastUsedCounter, id); - _entries.replace(it, wrp); - _memoryUsage.add(newUsage); - entry._cacheSize = newUsage; - } else { - _entries.erase(it); - } - } - - LOG(spam, - "Bucket %s, ptr %p returned to cache: %s with %s. " - "Total cache size after return: %s", - id.toString().c_str(), - ptr, - info.toString().c_str(), - newUsage.toString().c_str(), - _memoryUsage.toString().c_str()); - - evictWhileFull(); -} - -void -MemFileCache::done(MemFileCache::Entry& entry) -{ - LOG(spam, "Finished with file %s", - entry._file.getFile().toString().c_str()); - - try { - entry._file.verifyConsistent(); - } catch (vespalib::Exception e) { - LOG(debug, - "Verification of cache entry %s failed: %s", - entry._file.getFile().toString().c_str(), - e.getMessage().c_str()); - - entry.setInUse(false); - throw; - } - - assert(entry.isInUse()); - - returnToCache(entry); -} - -struct MemFileCache::CacheEntryGuard : public MemFilePtr::EntryGuard { - MemFileCache& _cache; - Environment& _env; - MemFileCache::Entry* _entry; - - CacheEntryGuard( - MemFileCache& cache, - Environment& env, - MemFileCache::Entry& entry) - : MemFilePtr::EntryGuard(entry._file), - _cache(cache), - _env(env), - _entry(&entry) - { - } - ~CacheEntryGuard() { - if (_entry) { - _cache.done(*_entry); - } - } - - MemFile& getFile() { - return _entry->_file; - } - - void deleteFile() override { - LOG(debug, "Cache entry guard deleting %s", _file->toString().c_str()); - _env._memFileMapper.deleteFile(*_file, _env); - erase(); - } - - void erase() override { - LOG(debug, "Cache entry guard erasing %s from cache", - _file->toString().c_str()); - _cache.erase(document::BucketId(_entry->_file.getFile().getBucketId())); - _entry = 0; - } - - void move(EntryGuard& target) override { - LOG(debug, "Cache entry guard moving %s", _file->toString().c_str()); - _cache.move(*this, static_cast<CacheEntryGuard&>(target)); - } - - void moveState(CacheEntryGuard& target) { - // Move state over to target. - target._entry = _entry; - target._file = _file; - - // Invalidate this. - _entry = NULL; - _file = NULL; - } - - MemFile* operator->() { - return &_entry->_file; - } -}; - -MemFileCache::MemFileCache(framework::ComponentRegister& componentRegister, - MemFilePersistenceCacheMetrics& metrics) - : Component(componentRegister, "memfilecache"), - _lastUsedCounter(0), - _metrics(metrics), - _bodyEvicter(_metrics.body_evictions), - _headerEvicter(_metrics.header_evictions), - _metaDataEvicter(_metrics.meta_evictions) -{ -}; - -void -MemFileCache::setCacheSize(MemoryUsage cacheSize) -{ - vespalib::LockGuard lock(_cacheLock); - - _cacheLimit = cacheSize; - evictWhileFull(); -} - -MemFilePtr -MemFileCache::get(const BucketId& id, Environment& env, Directory& dir, - bool createIfNotExisting) -{ - vespalib::LockGuard lock(_cacheLock); - - BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); - - BucketIdx::iterator it(bucketIdx.find(id)); - if (it == bucketIdx.end()) { - LOG(debug, - "Bucket %s was not in cache. Creating cache entry.", - id.toString().c_str()); - - FileSpecification file(id, dir, env.calculatePathInDir(id, dir)); - const uint64_t counter(++_lastUsedCounter); - lock.unlock(); - // Create memfile outside lock, since this will involve disk reads - // in the common case that there's a bucket file on the disk. The - // content layer shall guarantee that no concurrent operations happen - // for a single bucket, so this should be fully thread safe. - auto entry = std::make_shared<Entry>(file, env, createIfNotExisting); - - vespalib::LockGuard reLock(_cacheLock); - std::pair<LRUCache::iterator, bool> inserted( - _entries.insert(EntryWrapper(entry, counter, id))); - assert(inserted.second); - _metrics.misses.inc(); - - return MemFilePtr(MemFilePtr::EntryGuard::SP( - new CacheEntryGuard(*this, env, *entry))); - } else { - if (it->_ptr->isInUse()) { - LOG(error, - "Bug! File %s, ptr %p was in use while in the file cache", - it->_ptr->_file.toString(true).c_str(), it->_ptr.get()); - assert(false); - } - - it->_ptr->setInUse(true); - _memoryUsage.sub(it->_ptr->_cacheSize); - EntryWrapper wrp(it->_ptr, ++_lastUsedCounter, id); - _entries.replace(it, wrp); - _metrics.hits.inc(); - } - LOG(debug, - "Bucket %s was already in cache. Returning cache entry with " - "memory usage %s, new total memory usage: %s", - id.toString().c_str(), - it->_ptr->_cacheSize.toString().c_str(), - _memoryUsage.toString().c_str()); - - return MemFilePtr(MemFilePtr::EntryGuard::SP( - new CacheEntryGuard(*this, env, *it->_ptr))); -} - -// TODO: can this be removed?? -MemFileCache::BucketInfoMap -MemFileCache::flushDirtyEntries() -{ - vespalib::LockGuard lock(_cacheLock); - BucketInfoMap retVal; - - uint32_t total = 0, count = 0; - BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); - for (BucketIdx::iterator it = bucketIdx.begin(); it != bucketIdx.end(); ++it) { - ++total; - if (!it->_ptr->isInUse()) { - retVal[it->_ptr->_file.getFile().getBucketId()] = - it->_ptr->_file.getBucketInfo(); - - it->_ptr->_file.flushToDisk(); - // For now, close all files after done flushing, to avoid getting - // too many open at the same time. Later cache may cache limited - // amount of file handles - it->_ptr->_file.getMemFileIO().close(); - - ++count; - } - } - LOG(debug, "Flushed %u of %u entries in cache. Rest are in use", count, total); - - return retVal; -} - -void -MemFileCache::clear() -{ - vespalib::LockGuard lock(_cacheLock); - - uint32_t total = 0, count = 0; - BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); - for (BucketIdx::iterator it = bucketIdx.begin(); - it != bucketIdx.end();) - { - ++total; - if (!it->_ptr->isInUse()) { - // Any file not in use should have been flushed to disk already. - assert(!it->_ptr->_file.slotsAltered()); - _memoryUsage.sub(it->_ptr->_cacheSize); - it = bucketIdx.erase(it); - ++count; - } else { - ++it; - } - } - LOG(debug, "Flushed and cleared %u of %u entries in cache. Rest are in use", - count, total); -} - -void -MemFileCache::eraseNoLock(const document::BucketId& id) -{ - BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); - BucketIdx::iterator iter = bucketIdx.find(id); - - assert(iter != bucketIdx.end()); - assert(iter->_ptr->isInUse()); - //assert(!iter->_ptr->_file.slotsAltered()); - LOG(debug, "Removing %s from cache", id.toString().c_str()); - bucketIdx.erase(iter); -} - -void -MemFileCache::erase(const document::BucketId& id) { - vespalib::LockGuard lock(_cacheLock); - eraseNoLock(id); -} - -void -MemFileCache::move(CacheEntryGuard& source, CacheEntryGuard& target) -{ - vespalib::LockGuard lock(_cacheLock); - assert(target->empty()); - - document::BucketId sourceId = source->getFile().getBucketId(); - document::BucketId targetId = target->getFile().getBucketId(); - - LOG(debug, "Renaming file %s to %s", - source->toString().c_str(), - target->toString().c_str()); - source->move(target->getFile()); - source.moveState(target); - - BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); - BucketIdx::iterator sourceIt(bucketIdx.find(sourceId)); - BucketIdx::iterator targetIt(bucketIdx.find(targetId)); - assert(sourceIt != bucketIdx.end()); - assert(targetIt != bucketIdx.end()); - - EntryWrapper wrp(sourceIt->_ptr, sourceIt->_lastUsed, targetId); - bucketIdx.erase(sourceIt); - _entries.replace(targetIt, wrp); -} - -MemFileCache::TimeIdx::iterator -MemFileCache::getLeastRecentlyUsedBucket() -{ - return boost::multi_index::get<1>(_entries).begin(); - -} - -uint64_t -MemFileCache::size() const -{ - LOG(spam, "memory usage is now %s (total is %zu)", - _memoryUsage.toString().c_str(), _memoryUsage.sum()); - return _memoryUsage.sum(); -} - -bool -MemFileCache::contains(const document::BucketId& bucketId) const -{ - vespalib::LockGuard lock(_cacheLock); - const BucketIdx& bucketIdx = boost::multi_index::get<0>(_entries); - return bucketIdx.find(bucketId) != bucketIdx.end(); -} - -MemFileCache::TimeIdx::iterator -MemFileCache::MetaDataEvictionPolicy::evict( - MemFileCache::TimeIdx& lruIndex, - MemFileCache::TimeIdx::iterator& it, - MemFileCache::MemoryUsage& curUsage) -{ - LOG(debug, "Evicting entire memfile for %s from cache. %s held", - it->_bid.toString().c_str(), - it->_ptr->_cacheSize.toString().c_str()); - curUsage.sub(it->_ptr->_cacheSize); - _evictionMetric.inc(); - return lruIndex.erase(it); -} - -MemFileCache::TimeIdx::iterator -MemFileCache::BodyEvictionPolicy::evict( - MemFileCache::TimeIdx& /*lruIndex*/, - MemFileCache::TimeIdx::iterator& it, - MemFileCache::MemoryUsage& curUsage) -{ - LOG(debug, "Removing body of %s from cache. %s held", - it->_bid.toString().c_str(), - it->_ptr->_cacheSize.toString().c_str()); - - if (it->_ptr->_cacheSize.bodySize) { - it->_ptr->_file.clearCache(BODY); - curUsage.bodySize -= it->_ptr->_cacheSize.bodySize; - it->_ptr->_cacheSize.bodySize = 0; - _evictionMetric.inc(); - } - return ++it; -} - -MemFileCache::TimeIdx::iterator -MemFileCache::HeaderEvictionPolicy::evict( - MemFileCache::TimeIdx& /*lruIndex*/, - MemFileCache::TimeIdx::iterator& it, - MemFileCache::MemoryUsage& curUsage) -{ - LOG(debug, "Removing header and body of %s from cache. %s held", - it->_bid.toString().c_str(), - it->_ptr->_cacheSize.toString().c_str()); - - if (it->_ptr->_cacheSize.headerSize) { - it->_ptr->_file.clearCache(HEADER); - it->_ptr->_file.clearCache(BODY); - curUsage.headerSize -= it->_ptr->_cacheSize.headerSize; - curUsage.bodySize -= it->_ptr->_cacheSize.bodySize; - it->_ptr->_cacheSize.headerSize = 0; - it->_ptr->_cacheSize.bodySize = 0; - _evictionMetric.inc(); - } - return ++it; -} - -template <typename EvictionPolicy> -void -MemFileCache::executeCacheEvictionPolicy(EvictionPolicy& policy) -{ - MemFileCache::TimeIdx& timeIdx = boost::multi_index::get<1>(_entries); - for (MemFileCache::TimeIdx::iterator - i(timeIdx.upper_bound(policy.getEvictionCursor())), - e(timeIdx.end()); - i != e;) - { - if (_memoryUsage.sum() <= _cacheLimit.sum() - || (policy.getValue(_memoryUsage) - <= policy.getValue(_cacheLimit))) - { - LOG(spam, "Aborting current policy because " - "memory usage %s is less than soft limit %s", - _memoryUsage.toString().c_str(), - _cacheLimit.toString().c_str()); - - return; - } - - LOG(spam, "Need to evict more data as memory usage is %zu, hard limit is %zu", - _memoryUsage.sum(), _cacheLimit.sum()); - - // If memfile is in use, skip. It will be readded with new - // timestamp once it's done being used, which means the - // invariant of there not being any files < the cursor holding - // cached data of the policy's type will be maintained. - if (i->_ptr->isInUse()) { - LOG(spam, "Not evicting %s as it is currently active", - i->_bid.toString().c_str()); - ++i; - continue; - } - policy.setEvictionCursor(i->_lastUsed); - i = policy.evict(timeIdx, i, _memoryUsage); - } -} - -void -MemFileCache::executeEvictionPolicies() -{ - executeCacheEvictionPolicy(_bodyEvicter); - if (_memoryUsage.sum() <= _cacheLimit.sum()) { - return; - } - executeCacheEvictionPolicy(_headerEvicter); - if (_memoryUsage.sum() <= _cacheLimit.sum()) { - return; - } - executeCacheEvictionPolicy(_metaDataEvicter); -} - -void -MemFileCache::evictWhileFull() -{ - if (size() > _cacheLimit.sum()) { - LOG(debug, "Before cache eviction, cache usage was %s" - ", new max size is %" PRIu64, - _memoryUsage.toString().c_str(), _cacheLimit.sum()); - - executeEvictionPolicies(); - - LOG(spam, "After cache eviction, memory usage is %s", - _memoryUsage.toString().c_str()); - } else { - LOG(spam, "Max cache size is %" PRIu64 " bytes, but cache " - "only using %" PRIu64 " bytes, so not evicting anything", - _cacheLimit.sum(), _memoryUsage.sum()); - } - - _metrics.files.set(_entries.size()); - _metrics.meta.set(_memoryUsage.metaSize); - _metrics.header.set(_memoryUsage.headerSize); - _metrics.body.set(_memoryUsage.bodySize); -} - -MemFileCache::Statistics -MemFileCache::getCacheStats() const -{ - vespalib::LockGuard lock(_cacheLock); - return Statistics(_memoryUsage, _cacheLimit.sum(), _entries.size()); -} - -void -MemFileCache::printCacheEntriesHtml(std::ostream& out) const -{ - vespalib::LockGuard lock(_cacheLock); - out << "<p>Cache entries (most recently used first):</p>\n" - << "<ol>\n"; - const MemFileCache::TimeIdx& timeIdx(boost::multi_index::get<1>(_entries)); - for (MemFileCache::TimeIdx::const_reverse_iterator - it(timeIdx.rbegin()), e(timeIdx.rend()); - it != e; ++it) - { - out << "<li>"; - out << it->_bid << ": "; - if (!it->_ptr->isInUse()) { - out << it->_ptr->_cacheSize.toString(); - } else { - out << "<em>(in use)</em>"; - } - out << "</li>\n"; - } - out << "</ol>\n"; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h deleted file mode 100644 index 36e6e6b641a..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecache.h +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::slotfile::MemFileCache - * \ingroup memfile - * - * \brief Cache holding onto all mem file objects in memory. - * - * This is the global memory file cache keeping track of all the memory files - * in memory. - */ - -#pragma once - -#include <vespa/metrics/metrics.h> -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/memfilepersistence/memfile/memfile.h> -#include <vespa/memfilepersistence/memfile/memfileptr.h> -#include <boost/multi_index_container.hpp> -#include <boost/multi_index/identity.hpp> -#include <boost/multi_index/member.hpp> -#include <boost/multi_index/mem_fun.hpp> -#include <boost/multi_index/ordered_index.hpp> -#include <boost/multi_index/sequenced_index.hpp> -#include <vespa/storageframework/generic/component/component.h> - - -namespace storage::memfile { - -class MemFilePersistenceCacheMetrics; -class Environment; // Avoid cyclic dependency with environment - -class MemFileCache : private framework::Component, - private Types -{ -public: - typedef MemSlot::MemoryUsage MemoryUsage; - - struct Statistics - { - MemoryUsage _memoryUsage; - size_t _cacheSize; - size_t _numEntries; - - Statistics(const MemoryUsage& memoryUsage, - size_t cacheSize, - size_t numEntries) - : _memoryUsage(memoryUsage), - _cacheSize(cacheSize), - _numEntries(numEntries) - {} - }; -private: - class Entry { - public: - using SP = std::shared_ptr<Entry>; - - MemFile _file; - MemoryUsage _cacheSize; - Environment& _env; - bool _inUse; - bool _returnToCacheWhenFinished; - - Entry(const Entry &) = delete; - Entry & operator = (const Entry &) = delete; - Entry(FileSpecification& file, Environment& env, - bool returnToCacheWhenFinished = true) - : _file(file, env), _env(env), _inUse(true), - _returnToCacheWhenFinished(returnToCacheWhenFinished) - {} - - bool isInUse() const { - return _inUse; - } - - void setInUse(bool inUse); - }; - - struct EntryWrapper { - EntryWrapper( - Entry::SP ptr, - uint64_t lastUsed, - const document::BucketId& bid) - : _ptr(std::move(ptr)), _lastUsed(lastUsed), _bid(bid) {} - - const Entry* operator->() const { - return _ptr.get(); - }; - - Entry* operator->() { - return _ptr.get(); - }; - - Entry::SP _ptr; - uint64_t _lastUsed; - document::BucketId _bid; - }; - - struct CacheEntryGuard; - - vespalib::Lock _cacheLock; - - typedef boost::multi_index::ordered_unique< - boost::multi_index::member<EntryWrapper, BucketId, &EntryWrapper::_bid> - > BucketIdOrder; - - typedef boost::multi_index::ordered_non_unique< - boost::multi_index::member<EntryWrapper, uint64_t, &EntryWrapper::_lastUsed> - > TimeOrder; - - typedef boost::multi_index::multi_index_container< - EntryWrapper, - boost::multi_index::indexed_by< - BucketIdOrder, - TimeOrder - > - > LRUCache; - - typedef boost::multi_index::nth_index<LRUCache, 0>::type BucketIdx; - typedef boost::multi_index::nth_index<LRUCache, 1>::type TimeIdx; - - class CacheEvictionPolicy - { - uint64_t _evictionCursor; - protected: - metrics::LongCountMetric& _evictionMetric; - public: - CacheEvictionPolicy(metrics::LongCountMetric& evictionMetric) - : _evictionCursor(0), - _evictionMetric(evictionMetric) - {} - - uint64_t getEvictionCursor() const { - return _evictionCursor; - } - void setEvictionCursor(uint64_t cursor) { - _evictionCursor = cursor; - } - }; - - class MetaDataEvictionPolicy : public CacheEvictionPolicy - { - public: - MetaDataEvictionPolicy(metrics::LongCountMetric& evictionMetric) - : CacheEvictionPolicy(evictionMetric) {} - - TimeIdx::iterator evict( - TimeIdx& lruIndex, - TimeIdx::iterator& it, - MemoryUsage& curUsage); - - uint64_t getValue(const MemoryUsage& usage) const { - return usage.sum(); - } - }; - - class BodyEvictionPolicy : public CacheEvictionPolicy - { - public: - BodyEvictionPolicy(metrics::LongCountMetric& evictionMetric) - : CacheEvictionPolicy(evictionMetric) {} - - TimeIdx::iterator evict( - TimeIdx& lruIndex, - TimeIdx::iterator& it, - MemoryUsage& curUsage); - - uint64_t getValue(const MemoryUsage& usage) const { - return usage.bodySize; - } - }; - - class HeaderEvictionPolicy : public CacheEvictionPolicy - { - public: - HeaderEvictionPolicy(metrics::LongCountMetric& evictionMetric) - : CacheEvictionPolicy(evictionMetric) {} - - TimeIdx::iterator evict( - TimeIdx& lruIndex, - TimeIdx::iterator& it, - MemoryUsage& curUsage); - - uint64_t getValue(const MemoryUsage& usage) const { - return usage.headerSize + usage.bodySize; - } - }; - - - MemoryUsage _memoryUsage; - - LRUCache _entries; - uint64_t _lastUsedCounter; - - MemFilePersistenceCacheMetrics& _metrics; - - BodyEvictionPolicy _bodyEvicter; - HeaderEvictionPolicy _headerEvicter; - MetaDataEvictionPolicy _metaDataEvicter; - - void done(Entry&); - void move(CacheEntryGuard& source, CacheEntryGuard& target); - void evictWhileFull(); - void executeEvictionPolicies(); - void returnToCache(MemFileCache::Entry& entry); - - TimeIdx::iterator getLeastRecentlyUsedBucket(); - - /** - * @return Returns the current size of the cache. - */ - uint64_t size() const; - - void eraseNoLock(const document::BucketId& id); - - friend class CacheEntryGuard; - friend class MemCacheTest; - - template <typename EvictionPolicy> - void - executeCacheEvictionPolicy(EvictionPolicy& policy); - - MemoryUsage _cacheLimit; - -public: - typedef std::unique_ptr<MemFileCache> UP; - - MemFileCache(framework::ComponentRegister& componentRegister, - MemFilePersistenceCacheMetrics& metrics); - - /** - * Get a memfile for the given bucket on the given disk. - * @param env Needed for cache to be able to create non-existing entries. - * @param dir If not given, use the default directory from the environment. - * @param createIfNotInCache If false, the bucket won't be inserted into the - * cache after, unless it was already cached before this operation. - */ - MemFilePtr get(const BucketId&, - Environment& env, - Directory& dir, - bool createIfNotInCache = true); - - /** - * Removes the given bucket id from cache. Bucket must be in use, - * so erase() will as a consequence not subtract the bucket's cache - * usage from the total cache usage as that has already been done - * upon retrieving the bucket in the first place. - */ - void erase(const document::BucketId& id); - - typedef std::map<document::BucketId, BucketInfo> BucketInfoMap; - - /** - * This function exists just temporarily for memfile layer to flush all - * dirty entries found after each operation. This will be removed in favor - * of another mechanism later. - */ - BucketInfoMap flushDirtyEntries(); - - /** - * Clears the cache of all non-active entries (flushing dirty entries - * as necessary). - */ - void clear(); - - /** - * @return Returns true if the given bucket exists in the cache. - */ - bool contains(const document::BucketId& bucketId) const; - - /** - * Used for unit testing only. - */ - const MemFilePersistenceCacheMetrics& getMetrics() const { - return _metrics; - } - - /** - * Set maximum cache size. - */ - void setCacheSize(MemoryUsage limits); - - uint64_t getCacheSize() { return _cacheLimit.sum(); } - - /** - * NOTE: takes lock, never call from within memfilecache code. - * @return Statistics over cache memory usage and entry counts - */ - Statistics getCacheStats() const; - - /** - * Dump all cache entries as a most recently used-ordered list. - * Used for verbose status page printing. - */ - void printCacheEntriesHtml(std::ostream& out) const; -}; - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp deleted file mode 100644 index 7d84fd3dde5..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfilecompactor.h" -#include "memfile.h" -#include <vespa/vespalib/stllike/hash_map.hpp> -#include <algorithm> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfile.compactor"); - -namespace storage::memfile { - -struct DocumentVersionInfo { - document::DocumentId _id; - uint32_t _versions; - bool _tombstoned; - - DocumentVersionInfo(const document::DocumentId& docId, bool tombstoned) - : _id(docId), - _versions(1), - _tombstoned(tombstoned) - { } - - bool newerVersionExists() const noexcept { - return (_versions != 1); - } -}; - -namespace { - -bool -isTombstone(const MemSlot& slot) -{ - return slot.deleted(); -} - -// Deduct with underflow protection -template<typename T> -T deduct(T a, T b) { - return (a > b ? a - b : T(0)); -} - -struct CompactSlotInfo : private Types { - typedef std::list<DocumentVersionInfo> DocList; - typedef vespalib::hash_map<GlobalId, DocList, GlobalId::hash> Map; - Map _info; - const MemFile& _memFile; - - CompactSlotInfo(const MemFile& memFile) - : _info(2 * memFile.getSlotCount()), - _memFile(memFile) - { - } - - /** - * Registers a particular document version as having been seen in the file, - * keeping track of how many newer versions have been observed thus far and - * whether at least one of these was a tombstone (remove entry). - * - * Potential GID collisions are handled by utilizing the actual document - * ID to track specific documents. - * - * Returns a reference to the currently tracked version state for the - * document the slot is for. Returned reference is valid until the next - * invocation of registerSeen() or the owning CompactSlotInfo instance - * is destructed. - */ - DocumentVersionInfo& registerSeen(const MemSlot& slot) { - document::DocumentId id = _memFile.getDocumentId(slot); - DocList& gidDocs(_info[slot.getGlobalId()]); - auto matchesId = [&](const DocumentVersionInfo& doc) { - return (id == doc._id); - }; - auto existing = std::find_if(gidDocs.begin(), gidDocs.end(), matchesId); - - if (existing == gidDocs.end()) { // (Very) common case - gidDocs.emplace_back(id, isTombstone(slot)); - return gidDocs.back(); - } else { - ++existing->_versions; - if (isTombstone(slot)) { - existing->_tombstoned = true; - } - return *existing; - } - } -}; - -class DecreasingTimestampSlotRange -{ -public: - DecreasingTimestampSlotRange(const MemFile& memFile) - : _memFile(memFile) - { - } - MemFile::const_iterator begin() const { - return _memFile.begin(Types::ITERATE_REMOVED); - } - MemFile::const_iterator end() const { - return _memFile.end(); - } -private: - const MemFile& _memFile; -}; - -DecreasingTimestampSlotRange -allSlotsInDecreasingTimestampOrder(const MemFile& memFile) -{ - return {memFile}; -} - -} - -MemFileCompactor::MemFileCompactor( - framework::MicroSecTime currentTime, - const CompactionOptions& options) - : _options(options), - _currentTime(currentTime), - _revertTimePoint(deduct(currentTime, options._revertTimePeriod)), - _keepRemoveTimePoint(deduct(currentTime, options._keepRemoveTimePeriod)) -{ - assert(_options._maxDocumentVersions != 0); -} - -/* - * Cases to handle: - * - Document has too many versions; always remove slot - * - But otherwise, only remove if older than revert time. - * - Remove entry is too old; remove slot if older than revert time AND keep - * remove time. - * - Tombstoned entries are not resurrected as they are either compacted - * away due to being outside the revert time period or their tombstone - * survives by being inside the revert time period. The "keep remove - * time" period is also forced to be at least as high as the revert time - * period at configuration time. - * - Otherwise, keep the slot. - */ -MemFileCompactor::SlotList -MemFileCompactor::getSlotsToRemove(const MemFile& memFile) -{ - memFile.ensureHeaderBlockCached(); - - std::vector<const MemSlot*> removeSlots; - CompactSlotInfo slots(memFile); - - LOG(spam, - "Running compact on %s. Using revertTime=%zu, " - "keepRemoveTime=%zu, maxDocumentVersions=%u", - memFile.toString(true).c_str(), - _revertTimePoint.getTime(), - _keepRemoveTimePoint.getTime(), - _options._maxDocumentVersions); - - for (auto& slot : allSlotsInDecreasingTimestampOrder(memFile)) { - DocumentVersionInfo& info(slots.registerSeen(slot)); - - if (exceededVersionCount(info)) { - alwaysCompact(slot, removeSlots); - } else if (info.newerVersionExists()) { - // A tombstone also counts as a newer version. - compactIfNotRevertible(slot, removeSlots); - } else if (isTombstone(slot) && keepRemoveTimeExpired(slot)) { - compactIfNotRevertible(slot, removeSlots); - } // else: keep slot since it's the newest or within revert period. - } - - std::reverse(removeSlots.begin(), removeSlots.end()); - return removeSlots; -} - -bool -MemFileCompactor::exceededVersionCount( - const DocumentVersionInfo& info) const noexcept -{ - return (info._versions > _options._maxDocumentVersions); -} - -bool -MemFileCompactor::keepRemoveTimeExpired(const MemSlot& slot) const noexcept -{ - return (slot.getTimestamp() < _keepRemoveTimePoint); -} - -void -MemFileCompactor::compactIfNotRevertible( - const MemSlot& slot, - SlotList& slotsToRemove) const -{ - // May compact slot away if its timestamp is older than the point in time - // where we expect reverts may be sent. - if (slot.getTimestamp() < _revertTimePoint) { - alwaysCompact(slot, slotsToRemove); - } -} - -void -MemFileCompactor::alwaysCompact(const MemSlot& slot, - SlotList& slotsToRemove) const -{ - LOG(spam, "Compacting slot %s", slot.toString().c_str()); - slotsToRemove.push_back(&slot); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h deleted file mode 100644 index 1a11df15ced..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfilecompactor.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::MemFileCompactor - * \ingroup memfile - * - * \brief Class containing logic to find what slots in a memfile can be removed. - */ -#pragma once - -#include <vespa/memfilepersistence/common/types.h> -#include <limits> - -namespace storage { -namespace memfile { - -class MemFile; -class MemSlot; - -struct CompactionOptions -{ - framework::MicroSecTime _revertTimePeriod; - framework::MicroSecTime _keepRemoveTimePeriod; - uint32_t _maxDocumentVersions {std::numeric_limits<uint32_t>::max()}; - - CompactionOptions& revertTimePeriod(framework::MicroSecTime t) { - _revertTimePeriod = t; - return *this; - } - - CompactionOptions& keepRemoveTimePeriod(framework::MicroSecTime t) { - _keepRemoveTimePeriod = t; - return *this; - } - - CompactionOptions& maxDocumentVersions(uint32_t maxVersions) { - _maxDocumentVersions = maxVersions; - return *this; - } -}; - -class DocumentVersionInfo; - -class MemFileCompactor : public Types -{ -public: - using SlotList = std::vector<const MemSlot*>; - - MemFileCompactor(framework::MicroSecTime currentTime, - const CompactionOptions& options); - - SlotList getSlotsToRemove(const MemFile& memFile); -private: - bool exceededVersionCount(const DocumentVersionInfo&) const noexcept; - bool keepRemoveTimeExpired(const MemSlot& slot) const noexcept; - void compactIfNotRevertible(const MemSlot& slot, - SlotList& slotsToRemove) const; - void alwaysCompact(const MemSlot& slot, SlotList& slotsToRemove) const; - - CompactionOptions _options; - framework::MicroSecTime _currentTime; - framework::MicroSecTime _revertTimePoint; - framework::MicroSecTime _keepRemoveTimePoint; -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h deleted file mode 100644 index d6108001c93..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileiointerface.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/memfilepersistence/common/filespecification.h> - -namespace storage { - -namespace memfile { - -class Environment; - -class MemFileIOInterface : public Types { -public: - virtual ~MemFileIOInterface() {} - - typedef std::unique_ptr<MemFileIOInterface> UP; - - /** - * Deserializes the data in the given location (must already be read from disk), - * into a document object. If the data is not already read from disk, returns NULL. - */ - virtual Document::UP getDocumentHeader( - const document::DocumentTypeRepo&, - DataLocation loc) const = 0; - - virtual document::DocumentId getDocumentId(DataLocation loc) const = 0; - - /** - * Deserializes the given document's body part with the data in the given data - * location. - */ - virtual void readBody( - const document::DocumentTypeRepo&, - DataLocation loc, - Document& doc) const = 0; - - virtual DataLocation addDocumentIdOnlyHeader( - const DocumentId&, - const document::DocumentTypeRepo&) = 0; - - virtual DataLocation addHeader(const Document& doc) = 0; - - virtual DataLocation addBody(const Document& doc) = 0; - - virtual void clear(DocumentPart part) = 0; - - virtual bool verifyConsistent() const = 0; - - virtual void move(const FileSpecification& target) = 0; - - virtual DataLocation copyCache(const MemFileIOInterface& source, - DocumentPart part, - DataLocation loc) = 0; - - virtual void ensureCached(Environment& env, - DocumentPart part, - const std::vector<DataLocation>& locations) = 0; - - virtual bool isCached(DataLocation loc, DocumentPart part) const = 0; - - virtual bool isPersisted(DataLocation loc, DocumentPart part) const = 0; - - virtual uint32_t getSerializedSize(DocumentPart part, - DataLocation loc) const = 0; - - virtual void close() = 0; - - virtual size_t getCachedSize(DocumentPart part) const = 0; - - void clear() { - clear(HEADER); - clear(BODY); - } -}; - -} - -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h deleted file mode 100644 index c1b85e8700d..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memfileptr.h +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::slotfile::MemFilePtr - * \ingroup memfile - * - * \brief Utility class for managing an entry taken from cache. - * - * To be able to seamlessly return copy by value objects from the cache that - * can be used, and automatically return to the cache on destruction, this - * wrapper class exist to ensure that then the last user stops using it, it - * will be released. - * - * This object is created by the cache and returned to the disk thread using it. - * A linked pointer should thus be safe as we assume all users of it will be in - * the same thread. It assumes the cache itself has a lifetime longer than this - * object. - */ - -#pragma once - -#include <memory> - -namespace storage { -namespace memfile { - -class MemFile; - -class MemFilePtr { -public: - /** - * Utility class to ensure we call done() on cache after all cache - * pointers are deleted. The cache implements a subclass of this class - * doing it, to prevent cyclic dependency with cache. - */ - struct EntryGuard { - using SP = std::shared_ptr<EntryGuard>; - - MemFile* _file; - - EntryGuard(MemFile& file) : _file(&file) {} - virtual ~EntryGuard() {} - - virtual void erase() = 0; - virtual void deleteFile() = 0; - virtual void move(EntryGuard& target) = 0; - }; - -private: - EntryGuard::SP _entry; - -public: - MemFilePtr() {}; - MemFilePtr(EntryGuard::SP entry) : _entry(std::move(entry)) {} - - // Behave like pointer to MemFile for ease of use. - MemFile* operator->() { return _entry->_file; } - MemFile& operator*() { return *_entry->_file; } - MemFile* get() { - return (_entry.get() != 0 ? _entry->_file : 0); - } - const MemFile* operator->() const { return _entry->_file; } - const MemFile& operator*() const { return *_entry->_file; } - const MemFile* get() const { - return (_entry.get() != 0 ? _entry->_file : 0); - } - - /** Removes the entry from cache and deletes the underlying file. */ - void deleteFile() { _entry->deleteFile(); } - - /** - * Erases the entry from the cache. Does not touch the underlying file so - * therefore requires the memfile's alteredSlots() to return false. - */ - void eraseFromCache() { _entry->erase(); } - - /** - * Removes the entry from cache and renames the underlying file. - * The end result is that this mem file now points to the renamed file. - * The target MemFilePtr is invalid after this operation. - * - * @return Returns false if the target file already existed. - */ - void move(MemFilePtr& target) { - _entry->move(*target._entry); - } -}; - -} // storage -} // memfile - diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp deleted file mode 100644 index f128132c53e..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.cpp +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memslot.h" -#include <iostream> -#include <iomanip> - -#define ASSERT_FLAG(a) \ -{ \ - if (!(_flags & a)) { \ - std::ostringstream error; \ - error << "Expected one of flags " << std::hex << a << " to be set at " \ - << "this point, but only the given flags are set: " << _flags \ - << ", " << toString(true); \ - throw vespalib::IllegalStateException(error.str(), VESPA_STRLOC); \ - } \ -} - -namespace storage { -namespace memfile { - -MemSlot::MemSlot(const MemSlot& other) - : _timestamp(other._timestamp), - _header(other._header), - _body(other._body), - _gid(other._gid), - _flags(other._flags), - _checksum(other._checksum) -{ } - -MemSlot::MemSlot(const GlobalId& gid, Timestamp time, - DataLocation header, DataLocation body, - uint16_t flags, uint16_t checksum) - : _timestamp(time), - _header(header), - _body(body), - _gid(gid), - _flags(flags), - _checksum(checksum) -{ } - -MemSlot::~MemSlot() { } - -MemSlot::MemoryUsage -MemSlot::getCacheSize() const -{ - MemoryUsage retVal; - retVal.metaSize = sizeof(MemSlot); - retVal.headerSize = _header._size; - retVal.bodySize = _body._size; - return retVal; -} - -MemSlot& -MemSlot::operator=(const MemSlot& other) -{ - _timestamp = other._timestamp; - _header = other._header; - _body = other._body; - _gid = other._gid; - _checksum = other._checksum; - - // Flags must be copied after cache. - _flags = other._flags; - return *this; -} - -void -MemSlot::swap(MemSlot& other) -{ - std::swap(_timestamp, other._timestamp); - std::swap(_header, other._header); - std::swap(_body, other._body); - std::swap(_gid, other._gid); - std::swap(_checksum, other._checksum); - std::swap(_flags, other._flags); -} - -bool -MemSlot::hasBodyContent() const -{ - return _body._size > 0; -} - -bool -MemSlot::operator==(const MemSlot& other) const -{ - if (_checksum != other._checksum - || _timestamp != other._timestamp - || _header != other._header - || _body != other._body - || _flags != other._flags - || _gid != other._gid) - { - return false; - } - return true; -} - -void -MemSlot::print(std::ostream& out, bool verbose, - const std::string& /*indent*/) const -{ - if (verbose) { - out << "MemSlot("; - } - out << std::dec << _timestamp << ", " << _gid.toString() << ", h " - << _header._pos << " - " << _header._size << ", b " - << _body._pos << " - " << _body._size << ", f " - - << std::hex << _flags << ", c " << _checksum; - if (verbose) { - out << ")"; - } -} - -std::string -MemSlot::MemoryUsage::toString() const -{ - std::ostringstream ss; - ss << "MemoryUsage(meta=" << metaSize - << ", header=" << headerSize - << ", body=" << bodySize - << ")"; - return ss.str(); -} - -std::string -MemSlot::toString(bool verbose) const { - std::ostringstream ost; - print(ost, verbose, ""); - return ost.str(); -} - -std::ostream& -operator<<(std::ostream& out, const MemSlot& slot) { - slot.print(out, false, ""); - return out; -} - - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h deleted file mode 100644 index 1fceeadb992..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/memslot.h +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::MemSlot - * \ingroup memfile - * - * \brief Class representing a slot in a MemFile. - * - * The MemSlot class keeps all the data we need for a single entry in the - * slotfile. - * - * Note that a lot of these instances will be kept in the memory cache. It is - * important that the memory footprint of this class is really small, such that - * we can fit many entries in the cache. The layout of the class is thus a bit - * specialized to keep a low footprint. - * - * Currently, 40 bytes are used for metadata. - * - * A note about constness. The cached part are considered mutable, such that - * all read access can be const. Only operations causing the slot to change on - * disk (given a flush) is non-const. - */ -#pragma once - -#include <vespa/memfilepersistence/common/types.h> - -namespace storage { -namespace memfile { - -class MemFile; - -class MemSlot : private Types -{ - // Metadata for slot we need to keep. - Timestamp _timestamp; // 64 bit - 8 bytes timestamp - DataLocation _header; // 2x32 bit - 8 bytes header location - DataLocation _body; // 2x32 bit - 8 bytes body location - GlobalId _gid; // 96 bit - 12 bytes - uint16_t _flags; // 16 bit - 2 bytes flag - uint16_t _checksum; // 16 bit - 2 bytes checksum - - friend class MemFileTest; - - // used by tests to simulate gid collision. - void setGlobalId(const GlobalId& gid) { - _gid = gid; - } - -public: - struct MemoryUsage { - MemoryUsage() : - headerSize(0), - bodySize(0), - metaSize(0) {} - - MemoryUsage(uint64_t metaSz, uint64_t headerSz, uint64_t bodySz) - : headerSize(headerSz), - bodySize(bodySz), - metaSize(metaSz) - {} - - uint64_t headerSize; - uint64_t bodySize; - uint64_t metaSize; - - uint64_t sum() const { - return headerSize + bodySize + metaSize; - } - - void add(const MemoryUsage& usage) { - headerSize += usage.headerSize; - bodySize += usage.bodySize; - metaSize += usage.metaSize; - } - - void sub(const MemoryUsage& usage) { - headerSize -= usage.headerSize; - bodySize -= usage.bodySize; - metaSize -= usage.metaSize; - } - - std::string toString() const; - }; - - using UP = std::unique_ptr<MemSlot>; - - MemSlot(const MemSlot&); - /** Constructor used by mappers reading from file. */ - MemSlot(const GlobalId& gid, Timestamp time, - DataLocation header, DataLocation body, - uint16_t flags, uint16_t checksum); - ~MemSlot(); - - MemSlot& operator=(const MemSlot&); - void swap(MemSlot&); - - Timestamp getTimestamp() const { return _timestamp; } - const GlobalId& getGlobalId() const { return _gid; } - - DataLocation getLocation(DocumentPart part) const - { return (part == HEADER ? _header : _body); } - - bool inUse() const { return (_flags & IN_USE); } - bool deleted() const { return (_flags & DELETED); } - bool deletedInPlace() const { return (_flags & DELETED_IN_PLACE); } - - bool checksumOutdated() const { return (_flags & CHECKSUM_OUTDATED); } - - bool alteredInMemory() const { return (_flags & SLOTS_ALTERED); } - - bool usingUnusedFlags() const { return (_flags & UNUSED); } - - uint16_t getFlags() const { return _flags; } - - bool hasBodyContent() const; - - uint16_t getPersistedFlags() const - { return (_flags & LEGAL_PERSISTED_SLOT_FLAGS); } - - /** - * Returns the number of bytes required to keep this slot - * in memory. - */ - MemoryUsage getCacheSize() const; - - void setFlag(uint32_t flags) - { _flags |= flags | (flags & 0xff ? CHECKSUM_OUTDATED : 0); } - - void clearFlag(uint32_t flags) { _flags &= ~flags; } - - void setLocation(DocumentPart part, DataLocation location) { - if (part == HEADER) { - _header = location; - } else { - _body = location; - } - _flags |= CHECKSUM_OUTDATED; - } - - void setChecksum(uint16_t checksum) - { _checksum = checksum; _flags &= ~CHECKSUM_OUTDATED; } - - uint16_t getChecksum() const { return _checksum; } - - void clearPersistence() { - _header = DataLocation(); - if (_body._size > 0) { - _body = DataLocation(); - } - _flags |= CHECKSUM_OUTDATED; - } - - void turnToUnrevertableRemove() { - if (_flags & DELETED_IN_PLACE) return; - _body = DataLocation(0, 0); - _flags |= DELETED | DELETED_IN_PLACE; - _flags |= ALTERED_IN_MEMORY | CHECKSUM_OUTDATED; - } - - /** - * Tests for equality of memfiles. Equality requires MemFile to look equal - * for clients. It will not read data from file, so the same parts of the - * file must be cached for objects to be equal. Non-persistent flags need - * not be equal (The same parts need not be persisted to backend files) - * - * Used in unit testing only. - */ - bool operator==(const MemSlot& other) const; - bool operator!=(const MemSlot& other) const { - return ! (*this == other); - } - - // Implement print functions so we can be used similar to as we were - // a document::Printable (Don't want inheritance in this class) - void print(std::ostream& out, bool verbose, - const std::string& indent) const; - - std::string toString(bool verbose = false) const; -}; - -std::ostream& operator<<(std::ostream& out, const MemSlot& slot); - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp deleted file mode 100644 index 434f4eece0a..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "shared_data_location_tracker.h" - -namespace storage { -namespace memfile { - -DataLocation -SharedDataLocationTracker::getOrCreateSharedLocation( - DataLocation sourceLocation) -{ - DataLocation& bufferedLoc(_trackedLocations[sourceLocation]); - if (!bufferedLoc.valid()) { - bufferedLoc = _cacheCopier.copyFromSourceToLocal(_part, sourceLocation); - } - return bufferedLoc; -} - - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h deleted file mode 100644 index 4708ca58d06..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/shared_data_location_tracker.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/memfilepersistence/common/types.h> -#include <map> - -namespace storage { -namespace memfile { - -class BufferCacheCopier; - -/** - * Simple utility to track locations copied between files and to help - * ensure locations that were shared in the source file will also be shared - * in the destination file. - */ -class SharedDataLocationTracker -{ -public: - SharedDataLocationTracker(BufferCacheCopier& cacheCopier, - Types::DocumentPart part) - : _cacheCopier(cacheCopier), - _part(part), - _trackedLocations() - { - } - - /** - * Get a location to data contained in the destination which points at the - * exact same data as that given by sourceLocation in the source. Multiple - * requests to the same source location will return the same destination - * location. - */ - DataLocation getOrCreateSharedLocation(DataLocation sourceLocation); -private: - BufferCacheCopier& _cacheCopier; - Types::DocumentPart _part; - std::map<DataLocation, DataLocation> _trackedLocations; -}; - -/** - * Interface for copying data between individual MemFile buffer caches. - */ -class BufferCacheCopier -{ - virtual DataLocation doCopyFromSourceToLocal( - Types::DocumentPart part, - DataLocation sourceLocation) = 0; -public: - virtual ~BufferCacheCopier() {} - - /** - * Copy a given file part location from a source cache into a new location - * in the destination cache. Returns new location in destination cache. - * It is assumed that locations returned by this method will be unique. - */ - DataLocation copyFromSourceToLocal(Types::DocumentPart part, - DataLocation sourceLocation) - { - return doCopyFromSourceToLocal(part, sourceLocation); - } -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp deleted file mode 100644 index 8296b2b205e..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "slotiterator.h" -#include <vespa/memfilepersistence/memfile/memfile.h> - -namespace storage { -namespace memfile { - -GidUniqueSlotIterator::GidUniqueSlotIterator(const MemFile& file, - bool iterateRemoves, - Timestamp fromTimestamp, - Timestamp toTimestamp) - : _file(file), - _seen(2 * file.getSlotCount()), - _iterateRemoves(iterateRemoves), - _fromTimestamp(fromTimestamp), - _toTimestamp(toTimestamp), - _currentIndex(file.getSlotCount()) -{ - iterate(); -} - -void -GidUniqueSlotIterator::iterate() const -{ - for (uint32_t i = _currentIndex - 1; i < _currentIndex; --i) { - // To avoid separate implementations for const and non-const - // iterators we do a const cast here. For const iterators, only - // const MemSlot entries will be exposed externally, so no - // modifications will be allowed for those. - MemSlot& slot(const_cast<MemSlot&>(_file[i])); - if (_fromTimestamp != Timestamp(0) && - slot.getTimestamp() < _fromTimestamp) continue; - if (_toTimestamp != Timestamp(0) && - slot.getTimestamp() > _toTimestamp) continue; - - SeenMap::insert_result inserted(_seen.insert(slot.getGlobalId())); - if (!inserted.second) { - continue; - } - if (slot.deleted() && !_iterateRemoves) continue; - _current = &slot; - _currentIndex = i; - return; - } - _current = 0; - _currentIndex = 0; -} - -SlotIterator* -GidUniqueSlotIterator::clone() const { - GidUniqueSlotIterator* sit( - new GidUniqueSlotIterator(_file, _iterateRemoves, - _fromTimestamp, _toTimestamp)); - sit->_seen = _seen; - sit->_currentIndex = _currentIndex; - sit->_current = _current; - return sit; -} - -AllSlotsIterator::AllSlotsIterator(const MemFile& file, - bool iterateRemoves, - Timestamp fromTimestamp, - Timestamp toTimestamp) - : _file(file), - _iterateRemoves(iterateRemoves), - _fromTimestamp(fromTimestamp), - _toTimestamp(toTimestamp), - _currentIndex(file.getSlotCount()) -{ - iterate(); -} - -SlotIterator* -AllSlotsIterator::clone() const { - AllSlotsIterator* sit = new AllSlotsIterator(_file, _iterateRemoves, - _fromTimestamp, _toTimestamp); - sit->_currentIndex = _currentIndex; - sit->_current = _current; - return sit; -} - -void -AllSlotsIterator::iterate() const -{ - for (uint32_t i = _currentIndex - 1; i < _currentIndex; --i) { - // To avoid seprate implementations for const and non-const - // iterators we do a const cast here. For const iterators, only - // const MemSlot entries will be exposed externally, so no - // modifications will be allowed for those. - MemSlot& slot(const_cast<MemSlot&>(_file[i])); - if (_fromTimestamp != Timestamp(0) && - slot.getTimestamp() < _fromTimestamp) continue; - if (_toTimestamp != Timestamp(0) && - slot.getTimestamp() > _toTimestamp) continue; - if (slot.deleted() && !_iterateRemoves) continue; - _current = &slot; - _currentIndex = i; - return; - } - _current = 0; - _currentIndex = 0; -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h b/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h deleted file mode 100644 index 48b5e525fd9..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/memfile/slotiterator.h +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::SlotIterator - * \ingroup memfile - * - * \brief Utility class for iterating slots in a MemFile. - * - * When needing to iterate the slots, sometimes one want to iterate only unique - * slots and sometimes you want to iterate deleted slots. Iterating only unique - * slots adds a CPU cost, so one would want to avoid adding that cost if - * iterating all. - * - * To simplify code iterating slots, they can use a SlotIterator, such that they - * don't have to reimplement the iteration. - * - * The typical way of creating such an iterator, is by calling MemFile's - * getSlotIterator function, which will give you an iterator of suitable - * implementation. Do not use these directly. - */ - -#pragma once - -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/vespalib/stllike/hash_set.h> - -namespace storage { -namespace memfile { - -class MemFile; // MemFile depends on this file. Don't want circular dependency -class MemSlot; - -class SlotIterator : protected Types { -protected: - mutable MemSlot* _current; - - virtual void iterate() const = 0; - SlotIterator() : _current(0) {} - -public: - typedef std::unique_ptr<SlotIterator> UP; - typedef std::unique_ptr<const SlotIterator> CUP; - - virtual ~SlotIterator() {} - - virtual SlotIterator* clone() const = 0; - - MemSlot* getCurrent() { return _current; } - const MemSlot* getCurrent() const { return _current; } - - const MemSlot& operator++() const { iterate(); return *_current; } -}; - -class GidUniqueSlotIterator : public SlotIterator { - const MemFile& _file; - typedef vespalib::hash_set<GlobalId, GlobalId::hash> SeenMap; - mutable SeenMap _seen; - bool _iterateRemoves; - Timestamp _fromTimestamp; - Timestamp _toTimestamp; - mutable uint32_t _currentIndex; - -public: - GidUniqueSlotIterator(const MemFile& file, - bool iterateRemoves, - Timestamp fromTimestamp, - Timestamp toTimestamp); - - void iterate() const override; - SlotIterator* clone() const override; -}; - -class AllSlotsIterator : public SlotIterator { - const MemFile& _file; - bool _iterateRemoves; - Timestamp _fromTimestamp; - Timestamp _toTimestamp; - mutable uint32_t _currentIndex; - -public: - AllSlotsIterator(const MemFile& file, - bool iterateRemoves, - Timestamp fromTimestamp, - Timestamp toTimestamp); - - void iterate() const override; - SlotIterator* clone() const override; -}; - -/** - * \class storage::memfile::IteratorWrapper - * \ingroup memfile - * - * \brief Wrapper class for iterators, such that we can return by value. - * - * Iterators use inheritance, so we need a wrapper class to wrap the - * implementation in order to be able to return iterators by value, as one is - * acustomed to in the standard library. - */ -class IteratorWrapper { - SlotIterator::CUP _it; - -public: - IteratorWrapper() {} // Creates end() iterator. - IteratorWrapper(SlotIterator::CUP it) : _it(std::move(it)) {} - // Override to clone implementation - IteratorWrapper(const IteratorWrapper& o) : _it(o._it->clone()) {} - IteratorWrapper& operator=(const IteratorWrapper& o) { - _it.reset(0); - if (o._it.get() != 0) _it.reset(o._it->clone()); - return *this; - } - - bool operator==(const IteratorWrapper& o) const { - const MemSlot* slot(_it.get() == 0 ? 0 : _it->getCurrent()); - const MemSlot* slot2(o._it.get() == 0 ? 0 : o._it->getCurrent()); - return (slot == slot2); - } - bool operator!=(const IteratorWrapper& o) const { - return ! (*this == o); - } - - const MemSlot& operator*() const { return *_it->getCurrent(); } - const MemSlot* operator->() const { return _it->getCurrent(); } - const MemSlot& operator++() const { return ++*_it; } -}; - - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore deleted file mode 100644 index 7e7c0fe7fae..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.depend -/Makefile diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt deleted file mode 100644 index 5731e7e33c2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_spi OBJECT - SOURCES - memfilepersistence.cpp - memfilepersistenceprovider.cpp - memfilepersistenceprovidermetrics.cpp - operationhandler.cpp - iteratorhandler.cpp - joinoperationhandler.cpp - splitoperationhandler.cpp - visitorslotmatcher.cpp - threadlocals.cpp - cacheevictionguard.cpp - DEPENDS -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp deleted file mode 100644 index a693dda0e83..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "cacheevictionguard.h" -#include <vespa/memfilepersistence/memfile/memfile.h> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfile.cacheevictionguard"); - -namespace storage { -namespace memfile { - -MemFileCacheEvictionGuard::~MemFileCacheEvictionGuard() -{ - if (!_ok) { - LOG(debug, - "Clearing %s from cache to force reload " - "of file on next access.", - _ptr->getFile().getBucketId().toString().c_str()); - // Throw away all non-persisted changes to file and clear it from the - // cache to force a full reload on next access. This is the safest - // option, as all operations that are not yet persisted should fail - // back to the client automatically. - _ptr->clearFlag(Types::SLOTS_ALTERED); - _ptr.eraseFromCache(); // nothrow - } -} - -} -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h b/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h deleted file mode 100644 index 4cdaaab7eec..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/cacheevictionguard.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/memfilepersistence/memfile/memfileptr.h> -#include <cassert> - -namespace storage { -namespace memfile { - -/** - * Guard which will forcefully un-mark a file as being modified and evict - * it from the cache if an exception occurs before it is destructed (more - * specifically, if unguard() is never invoked on it). - * - * Any data not yet persisted when the memfile is evicted will be lost. - * It's up to the caller to ensure that this does not actually cause - * any true data loss. - */ -class MemFileCacheEvictionGuard -{ -public: - MemFileCacheEvictionGuard(const MemFilePtr& ptr) - : _ptr(ptr), - _ok(false) - { - assert(_ptr.get()); - } - ~MemFileCacheEvictionGuard(); - - MemFile* operator->() { return _ptr.get(); } - MemFile& operator*() { return *_ptr; } - const MemFile* operator->() const { return _ptr.get(); } - const MemFile& operator*() const { return *_ptr; } - - const MemFilePtr& get() const { return _ptr; } - MemFilePtr& get() { return _ptr; } - - void unguard() { _ok = true; } -private: - MemFilePtr _ptr; - bool _ok; -}; - -} -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp deleted file mode 100644 index 9fe396a27d6..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "iteratorhandler.h" -#include "visitorslotmatcher.h" -#include "cacheevictionguard.h" -#include <vespa/document/select/bodyfielddetector.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfile.handler.iterator"); - -namespace storage::memfile { - -CachePrefetchRequirements -CachePrefetchRequirements::createFromSelection(const document::DocumentTypeRepo& repo, - const document::select::Node& sel) -{ - CachePrefetchRequirements ret; - document::select::BodyFieldDetector bfd(repo); - sel.visit(bfd); - ret.setHeaderPrefetchRequired(bfd.foundHeaderField); - ret.setBodyPrefetchRequired(bfd.foundBodyField); - return ret; -} - - -IteratorState::IteratorState(const spi::Bucket& bucket, const spi::Selection& sel, document::FieldSet::UP fieldSet, - spi::IncludedVersions versions, std::unique_ptr<document::select::Node> docSel, - const CachePrefetchRequirements& prefetchRequirements) - : _bucket(bucket), - _selection(sel), - _fieldSet(std::move(fieldSet)), - _documentSelection(std::move(docSel)), - _remaining(), - _versions(versions), - _prefetchRequirements(prefetchRequirements), - _isActive(false), - _isCompleted(false) -{} - -IteratorState::~IteratorState() {} - -IteratorHandler::IteratorHandler(Environment& env) - : OperationHandler(env) -{} - -IteratorHandler::~IteratorHandler() {} - -void -IteratorHandler::sanityCheckActiveIteratorCount() -{ - if (_sharedState._iterators.size() - >= SharedIteratorHandlerState::WARN_ACTIVE_ITERATOR_COUNT - && !_sharedState._hasWarnedLargeIteratorCount) - { - LOG(warning, "Number of active iterators has reached warn-limit " - "of %zu. Potential iterator leak? destroyIterator() must be " - "invoked for each successful createIterator() invocation.", - SharedIteratorHandlerState::WARN_ACTIVE_ITERATOR_COUNT); - _sharedState._hasWarnedLargeIteratorCount = true; - } -} - -spi::CreateIteratorResult -IteratorHandler::createIterator(const spi::Bucket& bucket, - const document::FieldSet& fields, - const spi::Selection& sel, - spi::IncludedVersions versions) -{ - uint64_t id; - // By default, no explicit prefetching is required. - CachePrefetchRequirements prefetcher; - - std::unique_ptr<document::select::Node> docSelection; - if (!sel.getDocumentSelection().getDocumentSelection().empty()) { - docSelection.reset( - parseDocumentSelection( - sel.getDocumentSelection().getDocumentSelection(), - true).release()); - if (!docSelection.get()) { - return spi::CreateIteratorResult( - spi::Result::PERMANENT_ERROR, - "Got invalid/unparseable document selection string"); - } - prefetcher = CachePrefetchRequirements::createFromSelection( - _env.repo(), *docSelection); - // NOTE: Suboptimal behavior; since the field detector doesn't - // understand that ID-related selections require header reads, - // we take the safest route here and simply always require the - // header to be prefetched if we have _any_ kind of non-empty - // document selection. - prefetcher.setHeaderPrefetchRequired(true); - } - prefetcher.setFromTimestamp(Timestamp(sel.getFromTimestamp().getValue())); - prefetcher.setToTimestamp(Timestamp(sel.getToTimestamp().getValue())); - - { - vespalib::LockGuard lock(_sharedState._stateLock); - id = _sharedState._nextId; - - std::pair<IteratorStateMap::iterator, bool> inserted( - _sharedState._iterators.insert( - IteratorStateMap::value_type( - id, - IteratorState(bucket, sel, document::FieldSet::UP(fields.clone()), - versions, std::move(docSelection), prefetcher)))); - - assert(inserted.second); // Should never have duplicates - ++_sharedState._nextId; - sanityCheckActiveIteratorCount(); - } - LOG(debug, "Created new iterator state for bucket %s " - "with iterator id %zu", - bucket.getBucketId().toString().c_str(), - id); - return spi::CreateIteratorResult(spi::IteratorId(id)); -} - -spi::Result -IteratorHandler::destroyIterator(spi::IteratorId id) -{ - vespalib::LockGuard lock(_sharedState._stateLock); - uint64_t iterId = id; - IteratorStateMap::iterator iter( - _sharedState._iterators.find(iterId)); - if (iter == _sharedState._iterators.end()) { - std::ostringstream ss; - ss << "destroyIterator called with unknown iterator id (" - << iterId << ")"; - LOG(error, "%s", ss.str().c_str()); - return spi::Result(); - } - LOG(debug, "Destroying iterator state for iterator id %zu", iterId); - assert(!iter->second.isActive()); - _sharedState._iterators.erase(iter); - return spi::Result(); -} - -spi::DocEntry::SizeType -IteratorHandler::getDocumentSize(const MemFile& file, - const MemSlot& slot, - bool headerOnly) const -{ - spi::DocEntry::SizeType size = file.getSerializedSize(slot, HEADER); - if (!headerOnly) { - size += file.getSerializedSize(slot, BODY); - } - return size; -} - -spi::DocEntry::SizeType -IteratorHandler::getEntrySize(spi::DocEntry::SizeType docSize) const -{ - return docSize + sizeof(spi::DocEntry); -} - -void -IteratorHandler::prefetch(const CachePrefetchRequirements& requirements, - MemFile& file) const -{ - if (requirements.noPrefetchRequired()) { - LOG(spam, "%s: no prefetching required", - file.getFile().getBucketId().toString().c_str()); - return; - } - // Let body prefetching also imply header prefetching, at least for now. - // If this changes, so must the explicit caching of remaining timestamps - // in iterate(). - bool headerOnly = !requirements.isBodyPrefetchRequired(); - if (requirements.prefetchEntireBlocks()) { - LOG(spam, "%s: prefetching entire blocks for header: yes, body: %s", - file.getFile().getBucketId().toString().c_str(), - headerOnly ? "no" : "yes"); - if (headerOnly) { - file.ensureHeaderBlockCached(); - } else { - file.ensureHeaderAndBodyBlocksCached(); - } - } else { - std::vector<Timestamp> timestamps; - for (size_t i = 0; i < file.getSlotCount(); ++i) { - const MemSlot& slot(file[i]); - // TODO(vekterli): replace this sub-optimal code with a lower bound search - if (slot.getTimestamp() < requirements.getFromTimestamp()) { - continue; - } - if (slot.getTimestamp() > requirements.getToTimestamp()) { - break; - } - timestamps.push_back(slot.getTimestamp()); - } - LOG(spam, "%s: prefetching %zu slots in timestamp range [%zu, %zu]", - file.getFile().getBucketId().toString().c_str(), - timestamps.size(), - requirements.getFromTimestamp().getTime(), - requirements.getToTimestamp().getTime()); - file.ensureDocumentCached(timestamps, headerOnly); - } -} - -std::vector<Types::Timestamp>& -IteratorHandler::getOrFillRemainingTimestamps(MemFile& file, - IteratorState& state) -{ - std::vector<Types::Timestamp>& remaining(state.getRemaining()); - if (remaining.empty()) { - if (state.getSelection().getTimestampSubset().empty()) { - VisitorSlotMatcher matcher( - _env.repo(), state.getDocumentSelectionPtr()); - - int flags = 0; - switch (state.getIncludedVersions()) { - case spi::NEWEST_DOCUMENT_ONLY: - flags = ITERATE_GID_UNIQUE; - break; - case spi::NEWEST_DOCUMENT_OR_REMOVE: - flags = ITERATE_GID_UNIQUE | ITERATE_REMOVED; - break; - case spi::ALL_VERSIONS: - flags = ITERATE_REMOVED; - break; - } - - remaining = select( - file, - matcher, - flags, - Timestamp(state.getSelection().getFromTimestamp()), - Timestamp(state.getSelection().getToTimestamp())); - } else { - const std::vector<spi::Timestamp>& subset( - state.getSelection().getTimestampSubset()); - remaining.reserve(subset.size()); - for (size_t i = 0; i < subset.size(); ++i) { - // Ensure timestamps are strictly increasing - assert(i == 0 || subset[i] > subset[i - 1]); - remaining.push_back(Types::Timestamp(subset[i])); - } - - state.setIncludedVersions(spi::ALL_VERSIONS); - } - } - return remaining; -} - -bool -IteratorHandler::addMetaDataEntry(spi::IterateResult::List& result, - const MemSlot& slot, - uint64_t& totalSize, - uint64_t maxByteSize) const -{ - size_t entrySize = getEntrySize(0); - if (totalSize + entrySize >= maxByteSize && !result.empty()) { - return false; - } - totalSize += entrySize; - - int metaFlags = (slot.deleted() || slot.deletedInPlace()) ? spi::REMOVE_ENTRY : 0; - spi::DocEntry::UP docEntry( - new spi::DocEntry( - spi::Timestamp(slot.getTimestamp().getTime()), - metaFlags)); - result.push_back(std::move(docEntry)); - return true; -} - -bool -IteratorHandler::addRemoveEntry(spi::IterateResult::List& results, - const MemFile& file, - const MemSlot& slot, - uint64_t& totalSize, - uint64_t maxByteSize) const -{ - DocumentId did = file.getDocumentId(slot); - size_t idSize = did.getSerializedSize(); - size_t entrySize = getEntrySize(idSize); - - if (totalSize + entrySize >= maxByteSize && !results.empty()) { - return false; - } - totalSize += entrySize; - - spi::DocEntry::UP docEntry( - new spi::DocEntry( - spi::Timestamp(slot.getTimestamp().getTime()), - spi::REMOVE_ENTRY, - did)); - results.push_back(std::move(docEntry)); - return true; -} - -bool -IteratorHandler::addPutEntry(spi::IterateResult::List& results, - const MemFile& file, - const MemSlot& slot, - bool headerOnly, - const document::FieldSet& fieldsToKeep, - uint64_t& totalSize, - uint64_t maxByteSize) const -{ - size_t docSize = getDocumentSize(file, slot, headerOnly); - size_t entrySize = getEntrySize(docSize); - if (totalSize + entrySize >= maxByteSize && !results.empty()) { - return false; - } - Document::UP doc( - file.getDocument(slot, headerOnly ? HEADER_ONLY : ALL)); - totalSize += entrySize; - // If we want either the full doc or just the header, don't waste time - // stripping unwanted document fields. - if (fieldsToKeep.getType() != document::FieldSet::ALL - && fieldsToKeep.getType() != document::FieldSet::HEADER) - { - document::FieldSet::stripFields(*doc, fieldsToKeep); - } - spi::DocEntry::UP docEntry( - new spi::DocEntry(spi::Timestamp(slot.getTimestamp().getTime()), - 0, - std::move(doc), - docSize)); - results.push_back(std::move(docEntry)); - return true; -} - -spi::IterateResult -IteratorHandler::iterate(spi::IteratorId id, uint64_t maxByteSize) -{ - spi::IterateResult::List results; - - IteratorState* state; - { - vespalib::LockGuard lock(_sharedState._stateLock); - IteratorStateMap::iterator iter( - _sharedState._iterators.find(id)); - if (iter == _sharedState._iterators.end()) { - LOG(error, "Invoked iterate(id=%zu, maxByteSize=%zu) " - "with unknown id", - uint64_t(id), - maxByteSize); - - return spi::IterateResult(spi::Result::PERMANENT_ERROR, - "Unknown iterator ID"); - } - assert(!iter->second.isActive()); - state = &iter->second; - if (state->isCompleted()) { - return spi::IterateResult(std::move(results), true); - } - state->setActive(true); - } - - ActiveGuard activeGuard(*state); - MemFileCacheEvictionGuard file(getMemFile(state->getBucket())); - - const document::FieldSet& fields(state->getFields()); - bool metaDataOnly = (fields.getType() == document::FieldSet::NONE); - bool headerOnly = true; - - // Ensure we have relevant parts of the file prefetched if this is required. - const CachePrefetchRequirements& prefetchRequirements( - state->getCachePrefetchRequirements()); - prefetch(prefetchRequirements, *file); - - std::vector<Timestamp>& remaining( - getOrFillRemainingTimestamps(*file, *state)); - - if (!metaDataOnly) { - document::HeaderFields h; - headerOnly = h.contains(fields); - // Don't bother doing duplicate work if we've already prefetched - // everything we need. - if (!((headerOnly && prefetchRequirements.isHeaderPrefetchRequired()) - || prefetchRequirements.isBodyPrefetchRequired())) - { - LOG(spam, "Caching %zu remaining slots from disk for %s", - remaining.size(), - state->getBucket().getBucketId().toString().c_str()); - file->ensureDocumentCached(remaining, headerOnly); - } - } else { - LOG(spam, "Not caching any of the %zu remaining slots from disk " - "for %s since iteration is metadata only", - remaining.size(), - state->getBucket().getBucketId().toString().c_str()); - } - - size_t totalSize = 0; - while (!remaining.empty()) { - Timestamp ts = remaining.back(); - const MemSlot* slot = file->getSlotAtTime(ts); - - if (slot) { - if (metaDataOnly) { - if (!addMetaDataEntry(results, *slot, totalSize, maxByteSize)) { - break; - } - } else if (slot->deleted() || slot->deletedInPlace()) { - if (state->getIncludedVersions() == spi::NEWEST_DOCUMENT_ONLY) { - // Probably altered by unrevertable remove between time - // of timestamp gathering and actual iteration. - remaining.pop_back(); - continue; - } - if (!addRemoveEntry(results, *file, *slot, - totalSize, maxByteSize)) - { - break; - } - } else { - if (!addPutEntry(results, *file, *slot, - headerOnly, fields, totalSize, maxByteSize)) - { - break; - } - } - } - remaining.pop_back(); - } - - file.unguard(); - - LOG(debug, "Iteration of bucket %s returned result with %zu entries " - "and %zu bytes. Remaining docs: %zu", - state->getBucket().getBucketId().toString().c_str(), - results.size(), - totalSize, - remaining.size()); - - if (remaining.empty()) { - state->setCompleted(); - return spi::IterateResult(std::move(results), true); - } - - return spi::IterateResult(std::move(results), false); -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h deleted file mode 100644 index 53a4c1973d2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/iteratorhandler.h +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::IteratorHandler - * \ingroup memfile - * - * \brief Class exposing iterators over a bucket - */ -#pragma once - -#include "operationhandler.h" -#include <vespa/persistence/spi/persistenceprovider.h> -#include <vespa/document/fieldset/fieldsetrepo.h> -#include <vespa/document/select/node.h> -#include <map> - -namespace document { - -class FieldSet; - -} - -namespace storage { - -class GetIterCommand; - -namespace memfile { - -class CachePrefetchRequirements : public Types -{ -public: - CachePrefetchRequirements() - : _headerPrefetchRequired(false), - _bodyPrefetchRequired(false), - _fromTimestamp(0), - _toTimestamp(UINT64_MAX) - { - } - - bool noPrefetchRequired() const { - return !_headerPrefetchRequired && !_bodyPrefetchRequired; - } - - bool isHeaderPrefetchRequired() const { return _headerPrefetchRequired; } - void setHeaderPrefetchRequired(bool required) { _headerPrefetchRequired = required; } - - bool isBodyPrefetchRequired() const { return _bodyPrefetchRequired; } - void setBodyPrefetchRequired(bool required) { _bodyPrefetchRequired = required; } - - bool prefetchEntireBlocks() const { - return (_fromTimestamp == Timestamp(0) - && _toTimestamp == Timestamp(UINT64_MAX)); - } - - Timestamp getFromTimestamp() const { return _fromTimestamp; } - void setFromTimestamp(Timestamp fromTimestamp) { _fromTimestamp = fromTimestamp; } - Timestamp getToTimestamp() const { return _toTimestamp; } - void setToTimestamp(Timestamp toTimestamp) { _toTimestamp = toTimestamp; } - - static CachePrefetchRequirements createFromSelection( - const document::DocumentTypeRepo& repo, - const document::select::Node& sel); -private: - // Whether or not document selection requires header/body to be read - // beforehand to work efficiently. - bool _headerPrefetchRequired; - bool _bodyPrefetchRequired; - - Timestamp _fromTimestamp; - Timestamp _toTimestamp; -}; - -class IteratorState -{ - spi::Bucket _bucket; - spi::Selection _selection; - std::unique_ptr<document::FieldSet> _fieldSet; - std::unique_ptr<document::select::Node> _documentSelection; - std::vector<Types::Timestamp> _remaining; - spi::IncludedVersions _versions; - CachePrefetchRequirements _prefetchRequirements; - bool _isActive; - bool _isCompleted; - std::map<std::string, bool> _headerOnlyForDocumentType; - -public: - IteratorState(const spi::Bucket& bucket, const spi::Selection& sel, document::FieldSet::UP fieldSet, - spi::IncludedVersions versions, std::unique_ptr<document::select::Node> docSel, - const CachePrefetchRequirements& prefetchRequirements); - IteratorState(IteratorState &&) = default; - IteratorState & operator = (IteratorState &&) = default; - ~IteratorState(); - - const spi::Bucket& getBucket() const { return _bucket; } - - const CachePrefetchRequirements& getCachePrefetchRequirements() const { - return _prefetchRequirements; - } - - bool isActive() const { return _isActive; } - void setActive(bool active) { _isActive = active; } - - bool isCompleted() const { return _isCompleted; } - void setCompleted(bool completed = true) { _isCompleted = completed; } - - const spi::Selection& getSelection() const { return _selection; } - spi::Selection& getSelection() { return _selection; } - const document::FieldSet& getFields() const { return *_fieldSet; } - - spi::IncludedVersions getIncludedVersions() const { return _versions; } - void setIncludedVersions(spi::IncludedVersions versions) { _versions = versions; } - bool hasDocumentSelection() const { return _documentSelection.get() != 0; } - - /** - * Can only be called if hasDocumentSelection() == true - */ - const document::select::Node& getDocumentSelection() const - { - return *_documentSelection; - } - /** - * @return pointer to doc selection if one has been given, NULL otherwise. - */ - const document::select::Node* getDocumentSelectionPtr() const - { - return _documentSelection.get(); - } - const std::vector<Types::Timestamp>& getRemaining() const { return _remaining; } - std::vector<Types::Timestamp>& getRemaining() { return _remaining; } -}; - -class SharedIteratorHandlerState -{ -public: - typedef std::map<uint64_t, IteratorState> IteratorStateMap; -private: - IteratorStateMap _iterators; - uint64_t _nextId; - vespalib::Lock _stateLock; - // Debugging aid: - static const size_t WARN_ACTIVE_ITERATOR_COUNT = 2048; - bool _hasWarnedLargeIteratorCount; - - friend class IteratorHandler; - friend class IteratorHandlerTest; -public: - SharedIteratorHandlerState() : _nextId(1) {} -}; - -class IteratorHandler : public OperationHandler -{ -private: - typedef SharedIteratorHandlerState::IteratorStateMap IteratorStateMap; - - class ActiveGuard - { - IteratorState& _state; - public: - ActiveGuard(IteratorState& state) : _state(state) {} - ~ActiveGuard() { - _state.setActive(false); - } - }; - - /** - * Get the serialized size of a document, only counting the header if - * headerOnly is true. - */ - spi::DocEntry::SizeType getDocumentSize(const MemFile&, - const MemSlot&, - bool headerOnly) const; - /** - * Get the in-memory size of a single DocEntry object to more accurately - * limit per-iteration memory usage. - */ - spi::DocEntry::SizeType getEntrySize(spi::DocEntry::SizeType docSize) const; - /** - * Populate the state's remaining timestamps-vector, either from an - * explicitly specified timestamp subset in the selection, or from its - * document selection if no timestamp subset is given. - * @return mutable reference to the state's remaining-vector. - */ - std::vector<Types::Timestamp>& getOrFillRemainingTimestamps( - MemFile& file, - IteratorState&); - - /** - * If header/body precaching is required, cache _all_ documents in the - * required part(s) for the file. Otherwise, do nothing. - */ - void prefetch(const CachePrefetchRequirements& requirements, - MemFile& file) const; - - bool addMetaDataEntry(spi::IterateResult::List& result, - const MemSlot& slot, - uint64_t& totalSize, - uint64_t maxByteSize) const; - bool addRemoveEntry(spi::IterateResult::List& result, - const MemFile& file, - const MemSlot& slot, - uint64_t& totalSize, - uint64_t maxByteSize) const; - bool addPutEntry(spi::IterateResult::List& result, - const MemFile& file, - const MemSlot& slot, - bool headerOnly, - const document::FieldSet& fieldsToKeep, - uint64_t& totalSize, - uint64_t maxByteSize) const; - - /** - * Sanity checking to ensure we don't leak iterators. Checks if the number - * of active iterators exceeds a predefined Large Number(tm) and warns - * if this is the case. Mutates shared state (sets a "has warned" flag), - * so must only be called when holding shared state mutex. - */ - void sanityCheckActiveIteratorCount(); - -public: - typedef std::unique_ptr<IteratorHandler> UP; - - SharedIteratorHandlerState _sharedState; - - IteratorHandler(Environment&); - ~IteratorHandler(); - - spi::CreateIteratorResult createIterator(const spi::Bucket& bucket, - const document::FieldSet& fieldSet, - const spi::Selection& sel, - spi::IncludedVersions versions); - spi::Result destroyIterator(spi::IteratorId id); - spi::IterateResult iterate(spi::IteratorId id, uint64_t maxByteSize); - - const SharedIteratorHandlerState& getState() const { - return _sharedState; - } -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp deleted file mode 100644 index f29e391352c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "joinoperationhandler.h" -#include "cacheevictionguard.h" -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/vespalib/util/stringfmt.h> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfile.handler.join"); - -namespace storage { -namespace memfile { - -JoinOperationHandler::JoinOperationHandler(Environment& env) - : OperationHandler(env), - _env(env) -{ -} - -void -JoinOperationHandler::copySlots(MemFile& sourceFile, MemFile& targetFile) -{ - sourceFile.ensureBodyBlockCached(); - LOG(spam, - "Moving data from %s to %s", - sourceFile.toString().c_str(), - targetFile.toString().c_str()); - - std::vector<const MemSlot*> slotsToCopy; - slotsToCopy.reserve(sourceFile.getSlotCount()); - - for (uint32_t j = 0; j < sourceFile.getSlotCount(); j++) { - const MemSlot* slot(&sourceFile[j]); - - if (!targetFile.getSlotAtTime(slot->getTimestamp())) { - slotsToCopy.push_back(slot); - } - } - targetFile.copySlotsFrom(sourceFile, slotsToCopy); - LOG(spam, "Moved data from %s to %s", - sourceFile.toString().c_str(), targetFile.toString().c_str()); -} - -spi::Result -JoinOperationHandler::join( - const spi::Bucket& source1, - const spi::Bucket& source2, - const spi::Bucket& target) -{ - if ((source1.getBucketId() == source2.getBucketId()) - && (target.getBucketId() == source1.getBucketId())) - { - return singleJoin(source1, target); - } - - MemFileCacheEvictionGuard targetFile( - getMemFile(target.getBucketId(), target.getPartition(), false)); - - std::vector<spi::Bucket> sources; - sources.push_back(source1); - if (source1.getBucketId() != source2.getBucketId()) { - sources.push_back(source2); - } - - for (uint32_t i = 0; i < sources.size(); i++) { - MemFileCacheEvictionGuard sourceFile( - getMemFile(sources[i].getBucketId(), - sources[i].getPartition(), - false)); - - if (targetFile->empty()) { - LOG(spam, "Renaming %s to %s", - sourceFile->toString().c_str(), targetFile->toString().c_str()); - // It is assumed that if this fails, the nature of the exception is - // such that it will cause the disk to automatically be marked as - // down and for the process to restart, meaning we should not get - // out of sync between the service and persistence layers. - sourceFile.get().move(targetFile.get()); - } else { - copySlots(*sourceFile, *targetFile); - targetFile->flushToDisk(); - sourceFile.get().deleteFile(); - } - sourceFile.unguard(); - } - targetFile.unguard(); - - return spi::Result(); -} - -void -JoinOperationHandler::clearBucketFromCache(const spi::Bucket& bucket) -{ - getMemFile(bucket.getBucketId(), bucket.getPartition(), false) - .eraseFromCache(); -} - -/* - * Moving same bucket between partitions, potentially joining data - * if target file already exists. - */ -spi::Result -JoinOperationHandler::singleJoin( - const spi::Bucket& source, - const spi::Bucket& target) -{ - assert(source.getBucketId() == target.getBucketId()); - assert(source.getPartition() != target.getPartition()); - // Internal joins sidestep the cache completely, so we have to ensure - // the bucket is cleared from it before commencing. Otherwise, it's - // possible that the cached file offsets will not reflect what's actually - // stored on disk, leading to potential data corruption! The bucket shall - // not have been taken out of the cache before this point. - clearBucketFromCache(target); - - Directory& toJoinDir = _env.getDirectory(source.getPartition()); - FileSpecification toJoinSpec( - source.getBucketId(), toJoinDir, - _env.calculatePathInDir(source.getBucketId(), toJoinDir)); - - MemFile toJoin(toJoinSpec, _env); - - Directory& toKeepDir = _env.getDirectory(target.getPartition()); - FileSpecification toKeepSpec( - source.getBucketId(), toKeepDir, - _env.calculatePathInDir(source.getBucketId(), toKeepDir)); - assert(toJoinDir != toKeepDir); - - const double maxFillRate( - _env.acquireConfigReadLock().memFilePersistenceConfig() - ->diskFullFactorMove); - if (source.getPartition() != target.getPartition() && - toKeepDir.isFull(0, maxFillRate)) - { - std::string failure = - vespalib::make_string("Not moving bucket %s to directory %s because it's " - "fill rate is %G (>%G)", - source.getBucketId().toString().c_str(), - toKeepDir.toString().c_str(), - toKeepDir.getPartition().getMonitor()->getFillRate(), - maxFillRate); - - LOG(debug, "%s", failure.c_str()); - - return spi::Result(spi::Result::TRANSIENT_ERROR, failure); - } - - MemFile toKeep(toKeepSpec, _env); - - copySlots(toJoin, toKeep); - toKeep.flushToDisk(); - - // Delete original file. - _env._memFileMapper.deleteFile(toJoin, _env); - - return spi::Result(); -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h deleted file mode 100644 index d1feab43adb..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/joinoperationhandler.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::JoinHandler - * \ingroup memfile - */ -#pragma once - -#include <vespa/memfilepersistence/spi/operationhandler.h> -#include <vespa/persistence/spi/persistenceprovider.h> - -namespace storage { - -namespace memfile { - -class JoinOperationHandler : public OperationHandler { -public: - typedef std::unique_ptr<JoinOperationHandler> UP; - - JoinOperationHandler(Environment&); - - spi::Result join(const spi::Bucket& source1, - const spi::Bucket& source2, - const spi::Bucket& target); - - spi::Result singleJoin(const spi::Bucket& source, - const spi::Bucket& target); - -private: - Environment& _env; - - void copySlots(MemFile& source, MemFile& target); - void clearBucketFromCache(const spi::Bucket&); -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp deleted file mode 100644 index b3a675e9c9c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "memfilepersistence.h" - -namespace storage { -namespace memfile { - - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h deleted file mode 100644 index b49f4cd1454..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistence.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::MemFilePersistence - * \ingroup memfile - * - * \brief Top class in memfile persistence actually implementing the SPI - */ - -#pragma once - -namespace storage { -namespace memfile { - -struct MemFilePersistence -{ -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp deleted file mode 100644 index 87eea4e2c9f..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.cpp +++ /dev/null @@ -1,914 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - - -#include "memfilepersistenceprovider.h" -#include <vespa/memfilepersistence/common/exceptions.h> -#include <vespa/document/update/documentupdate.h> -#include <vespa/config/helper/configgetter.hpp> -#include <vespa/storageframework/generic/status/htmlstatusreporter.h> -#include <vespa/document/bucket/fixed_bucket_spaces.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".memfilepersistenceprovider"); - -#define TRACE(context, level, func, message) \ -{ \ - if ((context).getTrace().shouldTrace(level)) { \ - vespalib::string messageToTrace( \ - vespalib::make_string("MemFilePP.%s: %s", func, message)); \ - (context).getTrace().trace(level, messageToTrace); \ - } \ -} -#define TRACEGENERIC(context, type) \ -if ((context).getTrace().shouldTrace(9)) { \ - vespalib::string messageToTrace( \ - vespalib::make_string("MemFilePP.%s: Load type %s, priority %u.", \ - type, (context).getLoadType().toString().c_str(), \ - (uint32_t) (context).getPriority())); \ - (context).getTrace().trace(9, messageToTrace); \ -} - -namespace storage::memfile { - -namespace { - -Device::State -mapIoExceptionToDeviceState(MemFileIoException::Type type) -{ - using vespalib::IoException; - switch (type) { - case IoException::ILLEGAL_PATH: - return Device::PATH_FAILURE; - case IoException::NO_PERMISSION: - return Device::NO_PERMISSION; - case IoException::DISK_PROBLEM: - return Device::IO_FAILURE; - case IoException::TOO_MANY_OPEN_FILES: - return Device::TOO_MANY_OPEN_FILES; - default: - return Device::OK; - } -} - -} // end of anonymous namespace - -MemFilePtr& -MemFilePersistenceProvider::getThreadLocalMemFile() const -{ - return _threadLocals.get()._memFile; -} - -MemFilePersistenceThreadMetrics& -MemFilePersistenceProvider::getMetrics() const -{ - ThreadContext& context = _threadLocals.get(); - if (context._metrics == NULL) { - context._metrics = _metrics.addThreadMetrics(); - } - - return *context._metrics; -} - -bool -MemFilePersistenceProvider::hasCachedMemFile() const -{ - return _threadLocals.get()._memFile.get(); -} - -MemFilePtr -MemFilePersistenceProvider::getMemFile(const spi::Bucket& b, - bool keepInCache) const -{ - assert(b.getBucketSpace() == document::FixedBucketSpaces::default_space()); - MemFilePtr& ptr = getThreadLocalMemFile(); - - if (ptr.get()) { - assert(ptr->getFile().getBucketId() == b); - - MemFilePtr retVal = ptr; - ptr = MemFilePtr(); - return retVal; - } - - return _env->_cache.get(b.getBucketId(), - *_env, - _env->getDirectory(b.getPartition()), - keepInCache); -} - -void -MemFilePersistenceProvider::setActiveMemFile(MemFilePtr ptr, - const char* user) const -{ - LOG(spam, "Inserting active memfile %s for user %s", - ptr->getFile().getBucketId().toString().c_str(), - user); - getThreadLocalMemFile() = ptr; -} - -void -MemFilePersistenceProvider::clearActiveMemFile(spi::Context* context) const -{ - LOG(spam, "Clearing active memfile"); - MemFilePtr& ptr = getThreadLocalMemFile(); - assert(ptr.get() == NULL || !ptr->slotsAltered()); - ptr = MemFilePtr(); - if (context != 0) { - TRACE(*context, 9, "clearActiveMemFile", "Done clearing"); - } -} - -enum MemFileAccessGuardScopeExitAction { - REINSERT_AS_ACTIVE = 0x1, -}; - -/** - * The MemFile access guard provides a simple scope guard for providing - * exception safety for operations toward MemFiles. - * The guard will always evict a file from the cache iff the guard has not - * been dismissed upon destruction. This will throw away all non-persisted - * changes to file and clear it from the cache to force a full reload on next - * access. This is the safest option, as all operations that are not yet - * persisted should fail back to the client automatically. - * - * The current MemFile will be reinserted as the thread's active MemFile - * iff REINSERT_AS_ACTIVE has specified as a guard construction flag and - * the guard was dismissed before destruction. - */ -class MemFileAccessGuard : public Types -{ - MemFileAccessGuard(const MemFileAccessGuard&); - MemFileAccessGuard& operator=(const MemFileAccessGuard&); -public: - MemFileAccessGuard(const MemFilePersistenceProvider& spi, - const MemFilePtr& ptr, - const char* user, - uint32_t flags = 0) - : _spi(spi), - _ptr(ptr), - _user(user), - _flags(flags), - _dismissed(false) - { - assert(_ptr.get()); - } - - ~MemFileAccessGuard() { - if (!_dismissed) { - LOG(debug, - "Access guard in %s not dismissed on scope exit, clearing %s" - " from cache to force reload of file on next access.", - _user, - _ptr->getFile().getBucketId().toString().c_str()); - - _ptr->clearFlag(SLOTS_ALTERED); - _ptr.eraseFromCache(); // nothrow - } - if ((_flags & REINSERT_AS_ACTIVE) && _dismissed) { - _spi.setActiveMemFile(_ptr, _user); - } else { - _spi.clearActiveMemFile(); - } - } - - // Misc accessors - MemFile* operator->() { - return _ptr.get(); - } - MemFile& operator*() { - return *_ptr; - } - const MemFile* operator->() const { - return _ptr.get(); - } - const MemFile& operator*() const { - return *_ptr; - } - MemFilePtr& getMemFilePtr() { - return _ptr; - } - const MemFilePtr& getMemFilePtr() const { - return _ptr; - } - - /** - * If all access towards the MemFile has been successfully performed, - * calling dismiss() will ensure that the specified cleanup actions - * are not taken upon scope exit. - */ - void dismiss() { - _dismissed = true; - } - -private: - const MemFilePersistenceProvider& _spi; - MemFilePtr _ptr; - const char* _user; - const uint32_t _flags; - bool _dismissed; -}; - -void -MemFilePersistenceProvider::handleBucketCorruption(const FileSpecification& file) const -{ - spi::Bucket fixBucket(document::Bucket(document::FixedBucketSpaces::default_space(), - file.getBucketId()), - spi::PartitionId(file.getDirectory().getIndex())); - - // const_cast is nasty, but maintain() must necessarily be able to - // modify state... - MemFilePersistenceProvider& mutableSelf( - const_cast<MemFilePersistenceProvider&>(*this)); - - spi::Result maintainResult(mutableSelf.maintain(fixBucket, spi::HIGH)); - if (maintainResult.getErrorCode() != spi::Result::NONE) { - LOG(warning, - "Failed to successfully repair %s after corruptions: %s", - fixBucket.toString().c_str(), - maintainResult.toString().c_str()); - } - - // Add bucket to set of modified buckets so service layer can request - // new bucket info. - _env->addModifiedBucket(file.getBucketId()); -} - -template<typename C> -C MemFilePersistenceProvider::handleException(const std::exception& e, - bool canRepairBucket) const -{ - LOG(debug, "Handling exception caught during processing: %s", e.what()); - - const MemFileIoException* io = dynamic_cast<const MemFileIoException*>(&e); - if (io != NULL) { - std::ostringstream error; - error << "Exception caught processing operation for " - << io->getFile().getPath() << ": " << io->getMessage(); - - Device::State deviceState( - mapIoExceptionToDeviceState(io->getType())); - - if (deviceState != Device::OK) { - io->getFile().getDirectory().addEvent( - deviceState, - io->getMessage(), - VESPA_STRLOC); - - _env->_mountPoints->writeToFile(); - - return C(spi::Result::FATAL_ERROR, error.str()); - } - if (io->getType() == vespalib::IoException::CORRUPT_DATA - && canRepairBucket) - { - handleBucketCorruption(io->getFile()); - } - - return C(spi::Result::TRANSIENT_ERROR, error.str()); - } - const CorruptMemFileException* ce( - dynamic_cast<const CorruptMemFileException*>(&e)); - if (ce != 0) { - std::ostringstream error; - error << "Exception caught processing operation for " - << ce->getFile().getPath() << ": " << ce->getMessage(); - if (canRepairBucket) { - handleBucketCorruption(ce->getFile()); - } - return C(spi::Result::TRANSIENT_ERROR, error.str()); - } - - const TimestampExistException* ts = - dynamic_cast<const TimestampExistException*>(&e); - if (ts != NULL) { - return C(spi::Result::TIMESTAMP_EXISTS, ts->getMessage()); - } - - return C(spi::Result::PERMANENT_ERROR, e.what()); -} - -MemFilePersistenceProvider::MemFilePersistenceProvider( - framework::ComponentRegister& compReg, - const config::ConfigUri & configUri) - : framework::Component(compReg, "memfilepersistenceprovider"), - framework::StatusReporter("memfilepersistenceprovider", - "VDS Persistence Provider"), - _componentRegister(compReg), - _configUri(configUri), - _config(*config::ConfigGetter<vespa::config::storage::StorMemfilepersistenceConfig>::getConfig(configUri.getConfigId(), - configUri.getContext())), - _memFileMapper(*this), - _repo(0), - _metrics(*this), - _threadLocals(1024) -{ - registerMetric(_metrics); - registerStatusPage(*this); -} - -MemFilePersistenceProvider::~MemFilePersistenceProvider() -{ -} - -void -MemFilePersistenceProvider::setDocumentRepo(const document::DocumentTypeRepo& repo) -{ - _repo = &repo; - if (_env.get()) { - _env->setRepo(_repo); - } -} - -using MemFilePersistenceConfig - = vespa::config::storage::StorMemfilepersistenceConfig; -using PersistenceConfig = vespa::config::content::PersistenceConfig; - -namespace { - -MemFileCache::MemoryUsage -getCacheLimits(const MemFilePersistenceConfig& cfg) -{ - MemFileCache::MemoryUsage cacheLimits; - cacheLimits.metaSize = cfg.cacheSize * cfg.cacheSizeMetaPercentage / 100; - cacheLimits.headerSize = cfg.cacheSize * cfg.cacheSizeHeaderPercentage / 100; - cacheLimits.bodySize = cfg.cacheSize * cfg.cacheSizeBodyPercentage / 100; - return cacheLimits; -} - -std::unique_ptr<Options> -makeOptions(const MemFilePersistenceConfig& memFileCfg, - const PersistenceConfig& persistenceCfg) -{ - return std::unique_ptr<Options>(new Options(memFileCfg, persistenceCfg)); -} - -} - -void -MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::storage::StorMemfilepersistenceConfig> cfg) -{ - assert(cfg.get() != nullptr); - auto guard = _env->acquireConfigWriteLock(); - - guard.setMemFilePersistenceConfig(std::move(cfg)); - - if (guard.hasPersistenceConfig()) { - guard.setOptions(makeOptions(*guard.memFilePersistenceConfig(), - *guard.persistenceConfig())); - } - - // Data race free; acquires internal cache lock. - _cache->setCacheSize(getCacheLimits(*guard.memFilePersistenceConfig())); -} - -void -MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::content::PersistenceConfig> cfg) -{ - assert(cfg.get() != nullptr); - auto guard = _env->acquireConfigWriteLock(); - - guard.setPersistenceConfig(std::move(cfg)); - - if (guard.hasMemFilePersistenceConfig()) { - guard.setOptions(makeOptions(*guard.memFilePersistenceConfig(), - *guard.persistenceConfig())); - } -} - -void -MemFilePersistenceProvider::setConfig(std::unique_ptr<vespa::config::storage::StorDevicesConfig> cfg) -{ - assert(cfg.get() != nullptr); - auto guard = _env->acquireConfigWriteLock(); - guard.setDevicesConfig(std::move(cfg)); -} - -spi::PartitionStateListResult -MemFilePersistenceProvider::getPartitionStates() const -{ - // Lazily initialize to ensure service layer has set up enough for us - // to use all we need (memory manager for instance) - if (_env.get() == 0) { - assert(_repo != 0); - _cache.reset(new MemFileCache(_componentRegister, - _metrics._cache)); - _cache->setCacheSize(getCacheLimits(_config)); - try{ - _env.reset(new Environment( - _configUri, *_cache, _memFileMapper, *_repo, getClock())); - } catch (NoDisksException& e) { - return spi::PartitionStateListResult(spi::PartitionStateList( - spi::PartitionId::Type(0))); - } - _fileScanner.reset(new FileScanner( - _componentRegister, *_env->_mountPoints, - _config.dirLevels, _config.dirSpread)); - _util.reset(new OperationHandler(*_env)); - _iteratorHandler.reset(new IteratorHandler(*_env)); - _joinOperationHandler.reset(new JoinOperationHandler(*_env)); - _splitOperationHandler.reset(new SplitOperationHandler(*_env)); - } - return _env->_mountPoints->getPartitionStates(); -} - -spi::BucketIdListResult -MemFilePersistenceProvider::listBuckets(BucketSpace space, spi::PartitionId partition) const -{ - spi::BucketIdListResult::List buckets; - if (space == document::FixedBucketSpaces::default_space()) { - _fileScanner->buildBucketList(buckets, partition, 0, 1); - } - return spi::BucketIdListResult(buckets); -} - -spi::BucketIdListResult -MemFilePersistenceProvider::getModifiedBuckets(BucketSpace space) const -{ - document::BucketId::List modified; - if (space == document::FixedBucketSpaces::default_space()) { - _env->swapModifiedBuckets(modified); // Atomic op - } - return spi::BucketIdListResult(modified); -} - -spi::BucketInfoResult -MemFilePersistenceProvider::getBucketInfo(const spi::Bucket& bucket) const -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - LOG(spam, "getBucketInfo(%s)", bucket.toString().c_str()); - try { - bool retainMemFile = hasCachedMemFile(); - MemFileAccessGuard file(*this, - getMemFile(bucket, false), - "getBucketInfo", - retainMemFile ? REINSERT_AS_ACTIVE : 0); - - spi::BucketInfo info = file->getBucketInfo(); - - file.dismiss(); - return spi::BucketInfoResult(info); - } catch (std::exception& e) { - return handleException<spi::BucketInfoResult>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::put(const spi::Bucket& bucket, spi::Timestamp ts, - const document::Document::SP& doc, - spi::Context& context) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "put"); - LOG(spam, "put(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts), - doc->getId().toString().c_str()); - try { - TRACE(context, 9, "put", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "put", - REINSERT_AS_ACTIVE); - TRACE(context, 9, "put", "Altering file in memory"); - _util->write(*file, *doc, Timestamp(ts)); - - TRACE(context, 9, "put", "Dismissing file"); - file.dismiss(); - return spi::Result(); - } catch (std::exception& e) { - return handleException<spi::Result>(e, true); - } -} - -spi::RemoveResult -MemFilePersistenceProvider::remove(const spi::Bucket& bucket, spi::Timestamp ts, - const DocumentId& id, spi::Context& context) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "remove"); - LOG(spam, "remove(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts), - id.toString().c_str()); - try { - TRACE(context, 9, "remove", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "remove", - REINSERT_AS_ACTIVE); - TRACE(context, 9, "remove", "Altering file in memory"); - spi::Timestamp oldTs(_util->remove(*file, - id, Timestamp(ts), - OperationHandler::ALWAYS_PERSIST_REMOVE).getTime()); - TRACE(context, 9, "remove", "Dismissing file"); - file.dismiss(); - return spi::RemoveResult(oldTs > 0); - } catch (std::exception& e) { - return handleException<spi::RemoveResult>(e, true); - } -} - -spi::RemoveResult -MemFilePersistenceProvider::removeIfFound(const spi::Bucket& bucket, - spi::Timestamp ts, - const DocumentId& id, - spi::Context& context) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "removeIfFound"); - LOG(spam, "removeIfFound(%s, %zu, %s)", bucket.toString().c_str(), - uint64_t(ts), id.toString().c_str()); - try { - TRACE(context, 9, "removeIfFound", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "removeiffound", - REINSERT_AS_ACTIVE); - TRACE(context, 9, "removeIfFound", "Altering file in memory"); - spi::Timestamp oldTs(_util->remove(*file, - id, Timestamp(ts), - OperationHandler::PERSIST_REMOVE_IF_FOUND).getTime()); - TRACE(context, 9, "removeIfFound", "Dismissing file"); - file.dismiss(); - return spi::RemoveResult(oldTs > 0); - } catch (std::exception& e) { - return handleException<spi::RemoveResult>(e, true); - } -} - -spi::UpdateResult -MemFilePersistenceProvider::MemFilePersistenceProvider::update( - const spi::Bucket& bucket, spi::Timestamp ts, - const document::DocumentUpdate::SP& upd, spi::Context& context) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "update"); - LOG(spam, "update(%s, %zu, %s)", bucket.toString().c_str(), uint64_t(ts), - upd->getId().toString().c_str()); - try { - TRACE(context, 9, "update", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "update", - REINSERT_AS_ACTIVE); - TRACE(context, 9, "update", "Reading old entry"); - bool headerOnly = !upd->affectsDocumentBody(); - OperationHandler::ReadResult ret = _util->read( - *file, - upd->getId(), - Timestamp(ts), - headerOnly ? HEADER_ONLY : ALL); - - Document::UP doc = ret.getDoc(); - if (!doc.get()) { - if (upd->getCreateIfNonExistent()) { - TRACE(context, 9, "update", "Doc did not exist, creating one"); - doc.reset(new Document(upd->getType(), upd->getId())); - upd->applyTo(*doc); - _util->write(*file, *doc, Timestamp(ts)); - file.dismiss(); - return spi::UpdateResult(spi::Timestamp(ts)); - } else { - TRACE(context, 9, "update", "Doc did not exist"); - file.dismiss(); - return spi::UpdateResult(); - } - } - - if (Timestamp(ts) == ret._ts) { - file.dismiss(); - if (doc->getId() == upd->getId()) { - TRACE(context, 9, "update", "Timestamp exist same doc"); - return spi::UpdateResult(spi::Result::TRANSIENT_ERROR, - "Update was already performed."); - } else { - // TODO: Assert-fail if we ever get here?? - TRACE(context, 9, "update", "Timestamp exist other doc"); - std::ostringstream error; - error << "Update of " << upd->getId() - << ": There already exists a document" - << " with timestamp " << ts; - - return spi::UpdateResult(spi::Result::TIMESTAMP_EXISTS, error.str()); - } - } - - TRACE(context, 9, "update", "Altering file in memory"); - upd->applyTo(*doc); - if (headerOnly) { - TRACE(context, 9, "update", "Writing new header entry"); - _util->update(*file, *doc, Timestamp(ts), Timestamp(ret._ts)); - } else { - TRACE(context, 9, "update", "Writing new doc entry"); - _util->write(*file, *doc, Timestamp(ts)); - } - if (headerOnly) { - ++getMetrics().headerOnlyUpdates; - } - - TRACE(context, 9, "update", "Dismissing file"); - file.dismiss(); - return spi::UpdateResult(spi::Timestamp(ret._ts.getTime())); - } catch (std::exception& e) { - return handleException<spi::UpdateResult>(e, true); - } -} - -spi::GetResult -MemFilePersistenceProvider::get(const spi::Bucket& bucket, - const document::FieldSet& fieldSet, - const DocumentId& id, - spi::Context& context) const -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "get"); - LOG(spam, "get(%s, %s)", bucket.toString().c_str(), id.toString().c_str()); - try { - TRACE(context, 9, "get", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "get"); - document::HeaderFields headerFields; - bool headerOnly = headerFields.contains(fieldSet); - - TRACE(context, 9, "get", "Reading from file."); - OperationHandler::ReadResult ret = - _util->read(*file, id, Timestamp(0), - headerOnly ? HEADER_ONLY : ALL); - - file.dismiss(); - if (!ret._doc.get()) { - TRACE(context, 9, "get", "Doc not found"); - return spi::GetResult(); - } - if (headerOnly) { - TRACE(context, 9, "get", "Retrieved doc header only"); - ++getMetrics().headerOnlyGets; - } - // Don't create unnecessary copy if we want the full doc or header - if (fieldSet.getType() == document::FieldSet::ALL - || fieldSet.getType() == document::FieldSet::HEADER) - { - TRACE(context, 9, "get", "Returning doc"); - return spi::GetResult(ret.getDoc(), spi::Timestamp(ret._ts.getTime())); - } else { - TRACE(context, 9, "get", "Returning stripped doc"); - document::FieldSet::stripFields(*ret._doc, fieldSet); - return spi::GetResult(ret.getDoc(), spi::Timestamp(ret._ts.getTime())); - } - } catch (std::exception& e) { - return handleException<spi::GetResult>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::flush(const spi::Bucket& bucket, - spi::Context& context) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "flush"); - LOG(spam, "flush(%s)", bucket.toString().c_str()); - try { - TRACE(context, 9, "flush", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "flush"); - - LOG(spam, "Attempting to auto-flush %s", - file->getFile().toString().c_str()); - TRACE(context, 9, "flush", "Flushing to disk"); - file->flushToDisk(); - - TRACE(context, 9, "flush", "Dismissing file"); - file.dismiss(); - return spi::Result(); - } catch (std::exception& e) { - return handleException<spi::Result>(e, true); - } -} - -spi::CreateIteratorResult -MemFilePersistenceProvider::createIterator(const spi::Bucket& b, - const document::FieldSet& fieldSet, - const spi::Selection& sel, - spi::IncludedVersions versions, - spi::Context& context) -{ - assert(b.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "createIterator"); - LOG(spam, "createIterator(%s)", b.toString().c_str()); - try { - clearActiveMemFile(); - return _iteratorHandler->createIterator(b, fieldSet, sel, versions); - } catch (std::exception& e) { - return handleException<spi::CreateIteratorResult>(e, true); - } -} - -spi::IterateResult -MemFilePersistenceProvider::iterate(spi::IteratorId iterId, - uint64_t maxByteSize, - spi::Context& context) const -{ - TRACEGENERIC(context, "iterate"); - try { - clearActiveMemFile(&context); - spi::IterateResult result( - _iteratorHandler->iterate(iterId, maxByteSize)); - TRACE(context, 9, "iterate", "Done filling iterator"); - return result; - } catch (std::exception& e) { - return handleException<spi::IterateResult>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::destroyIterator(spi::IteratorId iterId, - spi::Context& context) -{ - TRACEGENERIC(context, "destroyIterator"); - try { - return _iteratorHandler->destroyIterator(iterId); - } catch (std::exception& e) { - return handleException<spi::IterateResult>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::deleteBucket(const spi::Bucket& bucket, - spi::Context& context) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "deleteBucket"); - LOG(spam, "deleteBucket(%s)", bucket.toString().c_str()); - try { - TRACE(context, 9, "deleteBucket", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "deleteBucket"); - TRACE(context, 9, "deleteBucket", "Deleting it"); - file.getMemFilePtr().deleteFile(); - // It is assumed guard will only kick in if deleteFile has failed - // _before_ it erases the bucket from the cache (since this should - // be a nothrow op). Otherwise, this will crash trying to deref a - // null ptr. - TRACE(context, 9, "deleteBucket", "Dismissing file"); - file.dismiss(); - return spi::Result(); - } catch (std::exception& e) { - return handleException<spi::IterateResult>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::split(const spi::Bucket& source, - const spi::Bucket& target1, - const spi::Bucket& target2, - spi::Context& context) -{ - assert(source.getBucketSpace() == document::FixedBucketSpaces::default_space()); - assert(target1.getBucketSpace() == document::FixedBucketSpaces::default_space()); - assert(target2.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "split"); - LOG(spam, "split(%s -> %s, %s)", source.toString().c_str(), - target1.toString().c_str(), target2.toString().c_str()); - try { - clearActiveMemFile(); - return _splitOperationHandler->split(source, target1, target2); - } catch (std::exception& e) { - return handleException<spi::Result>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::join(const spi::Bucket& source1, - const spi::Bucket& source2, - const spi::Bucket& target, - spi::Context& context) -{ - assert(source1.getBucketSpace() == document::FixedBucketSpaces::default_space()); - assert(source2.getBucketSpace() == document::FixedBucketSpaces::default_space()); - assert(target.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "join"); - LOG(spam, "join(%s, %s -> %s)", source1.toString().c_str(), - source2.toString().c_str(), target.toString().c_str()); - try { - clearActiveMemFile(); - return _joinOperationHandler->join(source1, source2, target); - } catch (std::exception& e) { - return handleException<spi::Result>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::removeEntry(const spi::Bucket& bucket, - spi::Timestamp ts, - spi::Context& context) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - TRACEGENERIC(context, "removeEntry"); - LOG(spam, "removeEntry(%s, %zu)", bucket.toString().c_str(), uint64_t(ts)); - try { - TRACE(context, 9, "removeEntry", "Grabbing memfile"); - MemFileAccessGuard file(*this, getMemFile(bucket), "revert", - REINSERT_AS_ACTIVE); - const MemSlot* slot = file->getSlotAtTime(Timestamp(ts)); - if (slot) { - TRACE(context, 9, "removeEntry", "Removing slot"); - file->removeSlot(*slot); - } - - TRACE(context, 9, "removeEntry", "Dismissing file"); - file.dismiss(); - return spi::Result(); - } catch (std::exception& e) { - return handleException<spi::Result>(e, true); - } -} - -spi::Result -MemFilePersistenceProvider::maintain(const spi::Bucket& bucket, - spi::MaintenanceLevel level) -{ - assert(bucket.getBucketSpace() == document::FixedBucketSpaces::default_space()); - LOG(spam, "maintain(%s)", bucket.toString().c_str()); - try { - MemFileAccessGuard file(*this, getMemFile(bucket, false), "maintain"); - assert(!file->slotsAltered()); - if (!file->fileExists()) { - LOG(debug, - "maintain(%s): file '%s' does not exist, nothing to maintain. " - "Assuming file was corrupted and auto-deleted.", - bucket.toString().c_str(), - file->getFile().getPath().c_str()); - return spi::Result(); - } - - std::ostringstream report; - const uint32_t verifyFlags((level == spi::HIGH) ? 0 : DONT_VERIFY_BODY); - if (!file->repair(report, verifyFlags)) { - LOG(debug, - "repair() on %s indicated errors, evicting from cache to " - "force reload of file with altered metadata", - bucket.toString().c_str()); - return spi::Result(); // No dismissal of guard; auto-evict. - } - assert(!file->slotsAltered()); - file->compact(); - file->flushToDisk(CHECK_NON_DIRTY_FILE_FOR_SPACE); - - file.dismiss(); - return spi::Result(); - } catch (std::exception& e) { - // Failing maintain() cannot cause an auto-repair since this will - // in turn call maintain(). - return handleException<spi::Result>(e, false); - } -} - -vespalib::string -MemFilePersistenceProvider::getReportContentType(const framework::HttpUrlPath&) const -{ - return "text/html"; -} - -namespace { - -void -printMemoryUsage(std::ostream& out, - const char* part, - uint64_t usage, - uint64_t total) -{ - out << "<li>" << part << ": " << usage; - if (total > 0) { - out << " (" << ((static_cast<double>(usage) / total) * 100.0) << "%)"; - } - out << "</li>\n"; -} - -} - -bool -MemFilePersistenceProvider::reportStatus(std::ostream& out, - const framework::HttpUrlPath& path) const -{ - framework::PartlyHtmlStatusReporter htmlReporter(*this); - htmlReporter.reportHtmlHeader(out, path); - - out << "<h1>Mem file persistence provider status page</h1>\n"; - bool printVerbose = path.hasAttribute("verbose"); - if (!printVerbose) { - out << "<p><a href=\"memfilepersistenceprovider?verbose\">" - "More verbose</a></p>\n"; - } else { - out << "<p><a href=\"memfilepersistenceprovider\">" - "Less verbose</a></p>\n"; - } - - MemFileCache::Statistics cacheStats(_env->_cache.getCacheStats()); - const MemFileCache::MemoryUsage& memUsage(cacheStats._memoryUsage); - out << "<p>Cache with " << cacheStats._numEntries - << " entries using " << memUsage.sum() - << " of max " << cacheStats._cacheSize - << " bytes</p>\n"; - out << "<ul>\n"; - printMemoryUsage(out, "Meta", memUsage.metaSize, memUsage.sum()); - printMemoryUsage(out, "Header", memUsage.headerSize, memUsage.sum()); - printMemoryUsage(out, "Body", memUsage.bodySize, memUsage.sum()); - out << "</ul>\n"; - out << "</p>\n"; - - if (printVerbose) { - _env->_cache.printCacheEntriesHtml(out); - } - - htmlReporter.reportHtmlFooter(out, path); - - return true; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h deleted file mode 100644 index f706fabc20c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovider.h +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "operationhandler.h" -#include "iteratorhandler.h" -#include "joinoperationhandler.h" -#include "splitoperationhandler.h" -#include "memfilepersistenceprovidermetrics.h" -#include "threadmetricprovider.h" -#include "threadlocals.h" -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/memfilepersistence/init/filescanner.h> -#include <vespa/persistence/spi/abstractpersistenceprovider.h> -#include <vespa/storageframework/generic/status/httpurlpath.h> - -#include <vespa/config/config.h> - -namespace storage::memfile { - -class ThreadContext { -public: - MemFilePtr _memFile; - MemFilePersistenceThreadMetrics* _metrics; - - ThreadContext() - : _metrics(NULL) - {} -}; - -class MemFilePersistenceProvider : public spi::AbstractPersistenceProvider, - public framework::Component, - public Types, - public framework::StatusReporter, - public ThreadMetricProvider -{ -public: - typedef std::unique_ptr<MemFilePersistenceProvider> UP; - - MemFilePersistenceProvider(framework::ComponentRegister& reg, const config::ConfigUri & configUri); - ~MemFilePersistenceProvider(); - - spi::PartitionStateListResult getPartitionStates() const override; - spi::BucketIdListResult listBuckets(BucketSpace bucketSpace, spi::PartitionId) const override; - spi::BucketIdListResult getModifiedBuckets(BucketSpace bucketSpace) const override; - spi::BucketInfoResult getBucketInfo(const spi::Bucket&) const override; - spi::Result put(const spi::Bucket&, spi::Timestamp, - const spi::DocumentSP&, spi::Context&) override; - - spi::RemoveResult remove(const spi::Bucket&, spi::Timestamp, - const DocumentId&, spi::Context&) override; - - spi::RemoveResult removeIfFound(const spi::Bucket&, spi::Timestamp, - const DocumentId&, spi::Context&) override; - - spi::UpdateResult update(const spi::Bucket&, spi::Timestamp, - const spi::DocumentUpdateSP&, spi::Context&) override; - - spi::GetResult get(const spi::Bucket&, const document::FieldSet&, - const spi::DocumentId&, spi::Context&) const override; - - spi::Result flush(const spi::Bucket&, spi::Context&) override; - - spi::CreateIteratorResult createIterator(const spi::Bucket&, const document::FieldSet&, const spi::Selection&, - spi::IncludedVersions versions, spi::Context&) override; - - spi::IterateResult iterate(spi::IteratorId, uint64_t maxByteSize, spi::Context&) const override; - spi::Result destroyIterator(spi::IteratorId, spi::Context&) override; - spi::Result deleteBucket(const spi::Bucket&, spi::Context&) override; - spi::Result split(const spi::Bucket& source, const spi::Bucket& target1, - const spi::Bucket& target2, spi::Context&) override; - - spi::Result join(const spi::Bucket& source1, const spi::Bucket& source2, - const spi::Bucket& target, spi::Context&) override; - - spi::Result removeEntry(const spi::Bucket&, spi::Timestamp, spi::Context&) override; - spi::Result maintain(const spi::Bucket&, spi::MaintenanceLevel level) override; - - Environment& getEnvironment() { return *_env; } - - vespalib::string getReportContentType(const framework::HttpUrlPath&) const override; - bool reportStatus(std::ostream&, const framework::HttpUrlPath&) const override; - - /** - Used by unit tests. - */ - void clearActiveMemFile(spi::Context* = 0) const; - const IteratorHandler& getIteratorHandler() const { return *_iteratorHandler; } - - MemFilePersistenceThreadMetrics& getMetrics() const override; - - void setDocumentRepo(const document::DocumentTypeRepo& repo); - void setConfig(std::unique_ptr<vespa::config::storage::StorMemfilepersistenceConfig> config); - void setConfig(std::unique_ptr<vespa::config::content::PersistenceConfig> config); - void setConfig(std::unique_ptr<vespa::config::storage::StorDevicesConfig> config); -private: - framework::ComponentRegister& _componentRegister; - - config::ConfigUri _configUri; - vespa::config::storage::StorMemfilepersistenceConfig _config; - mutable MemFileMapper _memFileMapper; - - const document::DocumentTypeRepo* _repo; - mutable MemFileCache::UP _cache; - mutable Environment::UP _env; - mutable FileScanner::UP _fileScanner; - mutable OperationHandler::UP _util; - mutable IteratorHandler::UP _iteratorHandler; - mutable JoinOperationHandler::UP _joinOperationHandler; - mutable SplitOperationHandler::UP _splitOperationHandler; - mutable MemFilePersistenceMetrics _metrics; - - mutable ThreadLocals<ThreadContext> _threadLocals; - - std::pair<spi::Result::ErrorType, vespalib::string> getErrorFromException(const std::exception& e); - - MemFilePtr getMemFile(const spi::Bucket& b, bool keepInCache = true) const; - void setActiveMemFile(MemFilePtr ptr, const char* user) const; - bool hasCachedMemFile() const; - - template<typename C> C handleException(const std::exception& e, bool canRepairBucket) const; - - void handleBucketCorruption(const FileSpecification& file) const; - - //void addBucketToNotifySet(const MemFile& file) const; - - MemFilePtr& getThreadLocalMemFile() const; - - friend class MemFileAccessGuard; -}; - -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.cpp deleted file mode 100644 index a9ec8a06f39..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "memfilepersistenceprovidermetrics.h" -#include <vespa/metrics/summetric.hpp> -#include <vespa/storageframework/generic/component/component.h> - -namespace storage { -namespace memfile { - -using metrics::MetricSet; - -MemFilePersistenceThreadMetrics::MemFilePersistenceThreadMetrics(const std::string& name, MetricSet& owner) - : MetricSet(name, "partofsum thread", "Metrics for a worker thread using memfile persistence provider", &owner), - headerOnlyGets("headeronlygets", "", "Number of gets that only read header", this), - headerOnlyUpdates("headeronlyupdates", "", "Number of updates that only wrote header", this), - serialization("serialization", this) -{ } - -MemFilePersistenceThreadMetrics::~MemFilePersistenceThreadMetrics() { } - -MemFilePersistenceCacheMetrics::MemFilePersistenceCacheMetrics(MetricSet& owner) - : MetricSet("cache", "", "Metrics for the VDS persistence cache", &owner), - files("files", "", "Number of files cached", this), - meta("meta", "", "Bytes of file metadata cached", this), - header("header", "", "Bytes of file header parts cached", this), - body("body", "", "Bytes of file body parts cached", this), - hits("hits", "", "Number of times a bucket was attempted fetched " - "from the cache and it was already present", this), - misses("misses", "", "Number of times a bucket was attempted fetched " - "from the cache and it could not be found, requiring a load", this), - meta_evictions("meta_evictions", "", "Bucket meta data evictions", this), - header_evictions("header_evictions", "", "Bucket header (and " - "implicitly body, if present) data evictions", this), - body_evictions("body_evictions", "", "Bucket body data evictions", this) -{ } - -MemFilePersistenceCacheMetrics::~MemFilePersistenceCacheMetrics() { } - -MemFilePersistenceMetrics::MemFilePersistenceMetrics(framework::Component& component) - : MetricSet("memfilepersistence", "", "Metrics for the VDS persistence layer"), - _component(component), - _cache(*this) -{ } - -MemFilePersistenceMetrics::~MemFilePersistenceMetrics() { } - -MemFilePersistenceThreadMetrics* -MemFilePersistenceMetrics::addThreadMetrics() { - vespalib::MonitorGuard metricLock(_component.getMetricManagerLock()); - vespalib::LockGuard guard(_threadMetricsLock); - - if (!_sumMetric.get()) { - _sumMetric.reset(new metrics::SumMetric<MemFilePersistenceThreadMetrics> - ("allthreads", "sum", "", this)); - } - - std::string name = vespalib::make_string("thread_%zu", _threadMetrics.size()); - MemFilePersistenceThreadMetrics * metrics = new MemFilePersistenceThreadMetrics(name, *this); - _threadMetrics.emplace_back(metrics); - _sumMetric->addMetricToSum(*metrics); - return metrics; -} - -} -} - -template class metrics::SumMetric<storage::memfile::MemFilePersistenceThreadMetrics>; diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h b/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h deleted file mode 100644 index b76a6340d07..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/metrics/metrics.h> -#include <vespa/memfilepersistence/mapper/serializationmetrics.h> -#include <vespa/vespalib/util/sync.h> -#include <list> - -namespace storage { - -namespace framework { class Component; } - -namespace memfile { - -class MemFilePersistenceThreadMetrics : public metrics::MetricSet -{ -public: - metrics::LongCountMetric headerOnlyGets; - metrics::LongCountMetric headerOnlyUpdates; - SerializationMetrics serialization; - - MemFilePersistenceThreadMetrics(const std::string& name, metrics::MetricSet& owner); - ~MemFilePersistenceThreadMetrics(); -}; - -class MemFilePersistenceCacheMetrics : public metrics::MetricSet -{ -public: - metrics::LongValueMetric files; - metrics::LongValueMetric meta; - metrics::LongValueMetric header; - metrics::LongValueMetric body; - metrics::LongCountMetric hits; - metrics::LongCountMetric misses; - metrics::LongCountMetric meta_evictions; - metrics::LongCountMetric header_evictions; - metrics::LongCountMetric body_evictions; - - MemFilePersistenceCacheMetrics(metrics::MetricSet& owner); - ~MemFilePersistenceCacheMetrics(); -}; - -class MemFilePersistenceMetrics : public metrics::MetricSet -{ - framework::Component& _component; - -public: - vespalib::Lock _threadMetricsLock; - std::list<std::unique_ptr<MemFilePersistenceThreadMetrics> > _threadMetrics; - - std::unique_ptr<metrics::SumMetric<MemFilePersistenceThreadMetrics> > _sumMetric; - MemFilePersistenceCacheMetrics _cache; - - MemFilePersistenceMetrics(framework::Component& component); - ~MemFilePersistenceMetrics(); - MemFilePersistenceThreadMetrics* addThreadMetrics(); -}; - -} -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp deleted file mode 100644 index ebc60246a50..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.cpp +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "operationhandler.h" -#include <vespa/memfilepersistence/common/exceptions.h> -#include <vespa/document/select/parser.h> -#include <sstream> - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfile.handler.operation"); - -namespace storage::memfile { - -OperationHandler::OperationHandler(Environment& env) - : _env(env) -{ -} - -OperationHandler::ReadResult -OperationHandler::read(MemFile& file, const DocumentId& id, - Timestamp maxTimestamp, GetFlag getFlags) const -{ - if (maxTimestamp == Timestamp(0)) { - maxTimestamp = MAX_TIMESTAMP; - } - const MemSlot* slot(file.getSlotWithId(id, maxTimestamp)); - if (slot == 0 || slot->deleted()) { - return ReadResult(Document::UP(), Timestamp(0)); - } - return ReadResult(file.getDocument(*slot, getFlags), slot->getTimestamp()); -} - -OperationHandler::ReadResult -OperationHandler::read(MemFile& file, Timestamp timestamp, - GetFlag getFlags) const -{ - const MemSlot* slot(file.getSlotAtTime(timestamp)); - if (slot == 0 || slot->deleted()) { - return ReadResult(Document::UP(), Timestamp(0)); - } - - return ReadResult(file.getDocument(*slot, getFlags), slot->getTimestamp()); -} - -Types::Timestamp -OperationHandler::remove(MemFile& file, - const DocumentId& id, - Timestamp timestamp, - RemoveType persistRemove) -{ - LOG(debug, "remove(%s, %s, %zu, %s)", - file.getFile().getPath().c_str(), - id.toString().c_str(), - timestamp.getTime(), - persistRemove ? "always persist" : "persist only if put is found"); - - const MemSlot* slotAtTime(file.getSlotAtTime(timestamp)); - if (slotAtTime) { - if (slotAtTime->deleted()) { - LOG(spam, - "Slot %s already existed at timestamp %zu but was already " - "deleted; not doing anything", - slotAtTime->toString().c_str(), - timestamp.getTime()); - return Timestamp(0); - } - LOG(spam, - "Slot %s already existed at timestamp %zu, delegating to " - "unrevertableRemove", - slotAtTime->toString().c_str(), - timestamp.getTime()); - return unrevertableRemove(file, id, timestamp); - } - - const MemSlot* slot(file.getSlotWithId(id)); - - if (slot == 0 || slot->getTimestamp() > timestamp) { - LOG(spam, "No slot existed, or timestamp was higher"); - - if (persistRemove == ALWAYS_PERSIST_REMOVE) { - file.addRemoveSlotForNonExistingEntry( - id, timestamp, MemFile::REGULAR_REMOVE); - } - return Timestamp(0); - } - - if (slot->deleted()) { - LOG(spam, "Document %s was already deleted.", - id.toString().c_str()); - - if (persistRemove == ALWAYS_PERSIST_REMOVE) { - file.addRemoveSlot(*slot, timestamp); - } - - return Timestamp(0); - } - - Timestamp oldTs(slot->getTimestamp()); - file.addRemoveSlot(*slot, timestamp); - return oldTs; -} - -Types::Timestamp -OperationHandler::unrevertableRemove(MemFile& file, - const DocumentId& id, - Timestamp timestamp) -{ - LOG(debug, "unrevertableRemove(%s, %s, %zu)", - file.getFile().getPath().c_str(), - id.toString().c_str(), - timestamp.getTime()); - - const MemSlot* slot(file.getSlotAtTime(timestamp)); - if (slot == 0) { - file.addRemoveSlotForNonExistingEntry( - id, timestamp, MemFile::UNREVERTABLE_REMOVE); - return Timestamp(0); - } - if (slot->getGlobalId() != id.getGlobalId()) { - // Should Not Happen(tm) case: given timestamp+document id does not - // match the document ID stored on file for the timestamp. In this - // case we throw out the old slot and insert a new unrevertable remove - // slot with the new document ID. - LOG(error, "Unrevertable remove for timestamp %zu with document id %s " - "does not match the document id %s of the slot stored at this " - "timestamp! Existing slot: %s. Removing old slot to get in sync.", - timestamp.getTime(), - id.toString().c_str(), - file.getDocumentId(*slot).toString().c_str(), - slot->toString().c_str()); - file.removeSlot(*slot); - file.addRemoveSlotForNonExistingEntry( - id, timestamp, MemFile::UNREVERTABLE_REMOVE); - return timestamp; - } - - MemSlot newSlot(*slot); - newSlot.turnToUnrevertableRemove(); - file.modifySlot(newSlot); - return timestamp; -} - -void -OperationHandler::write(MemFile& file, const Document& doc, Timestamp time) -{ - const MemSlot* slot(file.getSlotAtTime(time)); - if (slot != 0) { - if (doc.getId().getGlobalId() == slot->getGlobalId() && - !slot->deleted()) - { - LOG(debug, "Tried to put already existing document %s at time " - "%zu into file %s. Probably sent here by merge from other " - "copy. Flagging put ok and doing nothing.", - doc.getId().toString().c_str(), - time.getTime(), - file.getFile().getPath().c_str()); - return; - } else { - std::ostringstream ost; - ost << "Failed adding document " << doc.getId().toString() - << " to slotfile '" << file.getFile().getPath() - << "'. Entry " << *slot << " already exists at that timestamp"; - LOG(warning, "%s", ost.str().c_str()); - throw TimestampExistException( - ost.str(), file.getFile(), time, VESPA_STRLOC); - } - } - - file.addPutSlot(doc, time); -} - -bool -OperationHandler::update(MemFile& file, const Document& header, - Timestamp newTime, Timestamp existingTime) -{ - const MemSlot* slot; - if (existingTime == Timestamp(0)) { - slot = file.getSlotWithId(header.getId()); - } else { - slot = file.getSlotAtTime(existingTime); - if (slot == NULL) { - return false; - } - - DocumentId docId = file.getDocumentId(*slot); - if (docId != header.getId()) { - std::ostringstream ost; - ost << "Attempted update of doc " << header.getId() << " with " - << "timestamp " << existingTime << " failed as non-matching " - << "doc " << docId << " existed at timestamp."; - throw MemFileIoException(ost.str(), file.getFile(), - MemFileIoException::INTERNAL_FAILURE, VESPA_STRLOC); - } - } - if (slot == 0 || slot->deleted()) return false; - - file.addUpdateSlot(header, *slot, newTime); - return true; -} - -std::vector<Types::Timestamp> -OperationHandler::select(MemFile& file, - SlotMatcher& checker, - uint32_t iteratorFlags, - Timestamp fromTimestamp, - Timestamp toTimestamp) -{ - verifyLegalFlags(iteratorFlags, LEGAL_ITERATOR_FLAGS, "select"); - checker.preload(file); - std::vector<Timestamp> result; - result.reserve(file.getSlotCount()); - for (MemFile::const_iterator it = file.begin(iteratorFlags, - fromTimestamp, - toTimestamp); - it != file.end(); ++it) - { - if (checker.match(SlotMatcher::Slot(*it, file))) { - result.push_back(it->getTimestamp()); - } - } - reverse(result.begin(), result.end()); - return result; -} - -void -OperationHandler::verifyBucketMapping(const DocumentId& id, - const BucketId& bucket) const -{ - BucketId docBucket(_env._bucketFactory.getBucketId(id)); - docBucket.setUsedBits(bucket.getUsedBits()); - if (bucket != docBucket) { - docBucket = _env._bucketFactory.getBucketId(id); - throw vespalib::IllegalStateException("Document " + id.toString() - + " (bucket " + docBucket.toString() + ") does not belong in " - + "bucket " + bucket.toString() + ".", VESPA_STRLOC); - } -} - -MemFilePtr -OperationHandler::getMemFile(const spi::Bucket& b, bool keepInCache) -{ - return getMemFile(b.getBucketId(), b.getPartition(), keepInCache); -} - -MemFilePtr -OperationHandler::getMemFile(const document::BucketId& id, Directory& dir, - bool keepInCache) { - return _env._cache.get(id, _env, dir, keepInCache); -} - -MemFilePtr -OperationHandler::getMemFile(const document::BucketId& id, uint16_t diskIndex, - bool keepInCache) -{ - return getMemFile(id, _env.getDirectory(diskIndex), keepInCache); -} - -document::FieldSet::UP -OperationHandler::parseFieldSet(const std::string& fieldSet) -{ - document::FieldSetRepo fsr; - return fsr.parse(_env.repo(), fieldSet); -} - -std::unique_ptr<document::select::Node> -OperationHandler::parseDocumentSelection( - const std::string& documentSelection, bool allowLeaf) -{ - std::unique_ptr<document::select::Node> ret; - try { - document::select::Parser parser( - _env.repo(), _env._bucketFactory); - ret = parser.parse(documentSelection); - } catch (document::select::ParsingFailedException& e) { - LOG(debug, "Failed to parse document selection '%s': %s", - documentSelection.c_str(), e.getMessage().c_str()); - return std::unique_ptr<document::select::Node>(); - } - if (ret->isLeafNode() && !allowLeaf) { - LOG(debug, "Document selection results in a single leaf node: '%s'", - documentSelection.c_str()); - return std::unique_ptr<document::select::Node>(); - } - return ret; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h deleted file mode 100644 index c06fcce24b9..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/operationhandler.h +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::OperationHandler - * \ingroup memfile - * - * \brief Super class for operation handlers. - * - * The operation handler superclass provides common functionality needed to - * operation handlers. - */ -#pragma once - -#include <vespa/memfilepersistence/memfile/memfile.h> -#include <vespa/memfilepersistence/memfile/memfilecache.h> -#include <vespa/memfilepersistence/memfile/memfileptr.h> -#include <vespa/memfilepersistence/common/environment.h> -#include <vespa/memfilepersistence/common/filespecification.h> -#include <vespa/memfilepersistence/common/slotmatcher.h> -#include <vespa/memfilepersistence/common/types.h> -#include <vespa/persistence/spi/bucketinfo.h> -#include <vespa/document/fieldset/fieldsetrepo.h> - -namespace document { - namespace select { - class Node; - } -} - -namespace storage { -namespace memfile { - -class OperationHandler : protected Types -{ -protected: - Environment& _env; - -public: - typedef std::unique_ptr<OperationHandler> UP; - - OperationHandler(const OperationHandler &) = delete; - OperationHandler & operator = (const OperationHandler &) = delete; - OperationHandler(Environment&); - virtual ~OperationHandler() {} - - struct ReadResult : private Types { - ReadResult(Document::UP doc, - Timestamp ts) - : _doc(std::move(doc)), - _ts(ts) {}; - - ReadResult(ReadResult&& other) - : _doc(std::move(other._doc)), - _ts(other._ts) {}; - - Document::UP _doc; - Timestamp _ts; - - Document::UP getDoc() { return std::move(_doc); } - }; - - ReadResult read(MemFile&, - const DocumentId&, - Timestamp maxTimestamp, - GetFlag getFlags) const; - - ReadResult read(MemFile&, Timestamp timestamp, GetFlag getFlags) const; - - enum RemoveType - { - ALWAYS_PERSIST_REMOVE, - PERSIST_REMOVE_IF_FOUND - }; - - Types::Timestamp remove(MemFile&, - const DocumentId&, - Timestamp, - RemoveType); - - Types::Timestamp unrevertableRemove(MemFile&, - const DocumentId&, - Timestamp); - - void write(MemFile&, const Document& doc, Timestamp); - - bool update(MemFile&, - const Document& headerToOverwrite, - Timestamp newTime, - Timestamp existingTime = Timestamp(0)); - - /** - * Get the slots matching a given matcher. - * - * @return The timestamps of the matching slots, ordered in rising - * timestamp order. - */ - std::vector<Timestamp> select(MemFile&, SlotMatcher&, - uint32_t iteratorFlags, - Timestamp fromTimestamp = Timestamp(0), - Timestamp toTimestamp = Timestamp(0)); - - /** Verify that a document id belongs to a given bucket. */ - void verifyBucketMapping(const DocumentId&, const BucketId&) const; - - MemFilePtr getMemFile(const spi::Bucket& b, bool keepInCache = true); - - MemFilePtr getMemFile(const document::BucketId& id, Directory& dir, - bool keepInCache = true); - - MemFilePtr getMemFile(const document::BucketId& id, uint16_t disk, - bool keepInCache = true); - - document::FieldSet::UP parseFieldSet(const std::string& fieldSet); - - std::unique_ptr<document::select::Node> - parseDocumentSelection(const std::string& documentSelection, - bool allowLeaf); -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp deleted file mode 100644 index 30e2fb19e0c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.cpp +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "splitoperationhandler.h" -#include "cacheevictionguard.h" - -#include <vespa/log/log.h> -LOG_SETUP(".persistence.memfile.handler.split"); - -namespace storage { -namespace memfile { - -SplitOperationHandler::SplitOperationHandler(Environment& env) - : OperationHandler(env) -{ -} - -namespace { - -struct BucketMatcher : public SlotMatcher { - const document::BucketIdFactory& _factory; - document::BucketId _bid; - - BucketMatcher(const document::BucketIdFactory& factory, const document::BucketId& bid) - : SlotMatcher(PRELOAD_HEADER), - _factory(factory), - _bid(bid) {} - - bool match(const Slot& slot) override { - document::DocumentId id(slot.getDocumentId()); - document::BucketId bucket = _factory.getBucketId(id); - bucket.setUsedBits(_bid.getUsedBits()); - - if (bucket.stripUnused() == _bid.stripUnused()) { - return true; - } else { - return false; - } - } -}; - -} - -void -SplitOperationHandler::copyTimestamps( - const MemFile& source, - MemFile& target, - const std::vector<Timestamp>& timestamps) -{ - std::vector<const MemSlot*> slotsToCopy; - slotsToCopy.reserve(timestamps.size()); - for (uint32_t i = 0; i < timestamps.size(); i++) { - const MemSlot* slot = source.getSlotAtTime(timestamps[i]); - - if (!target.getSlotAtTime(timestamps[i])) { - slotsToCopy.push_back(slot); - } - } - target.copySlotsFrom(source, slotsToCopy); -} - -uint32_t -SplitOperationHandler::splitIntoFile(MemFile& source, - const spi::Bucket& target) -{ - BucketMatcher matcher(_env._bucketFactory, target.getBucketId()); - - std::vector<Timestamp> ts = select(source, matcher, ITERATE_REMOVED); - - MemFileCacheEvictionGuard targetFile(getMemFile(target, false)); - - LOG(debug, - "Found %zu slots to move from file %s to file %s", - ts.size(), - source.getFile().toString().c_str(), - targetFile->getFile().toString().c_str()); - - copyTimestamps(source, *targetFile, ts); - - targetFile->flushToDisk(); - targetFile.unguard(); - return ts.size(); -} - -spi::Result -SplitOperationHandler::split(const spi::Bucket& source, - const spi::Bucket& target1, - const spi::Bucket& target2) -{ - MemFileCacheEvictionGuard file(getMemFile(source, false)); - file->ensureBodyBlockCached(); - - uint32_t totalDocsMoved = 0; - totalDocsMoved += splitIntoFile(*file, target1); - if (target2.getBucketId().getRawId() != 0) { - totalDocsMoved += splitIntoFile(*file, target2); - } - if (file->getBucketInfo().getEntryCount() != totalDocsMoved) { - LOG(error, "Split(%s) code moved only %u of %u entries out of source " - "file.", - source.getBucketId().toString().c_str(), - totalDocsMoved, file->getBucketInfo().getEntryCount()); - assert(false); - } - file.get().deleteFile(); - file.unguard(); - return spi::Result(); -} - -} // memfile -} // storage diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h b/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h deleted file mode 100644 index 2fa7547d2bc..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/splitoperationhandler.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -/** - * \class storage::memfile::SplitHandler - * \ingroup memfile - * - * \brief Class used to do basic operations to memfiles. - */ -#pragma once - -#include <vespa/memfilepersistence/spi/operationhandler.h> -#include <vespa/persistence/spi/persistenceprovider.h> - -namespace storage { - -namespace memfile { - -class SplitOperationHandler : public OperationHandler { -public: - typedef std::unique_ptr<SplitOperationHandler> UP; - - SplitOperationHandler(Environment&); - - spi::Result split(const spi::Bucket& source, - const spi::Bucket& target1, - const spi::Bucket& target2); - -private: - /** - * Copies the slots designated by the given list of timestamps from one mem - * file to another. If the target already has a slot at any of the given - * timestamps, those timestamps aren't copied. - */ - void copyTimestamps(const MemFile& source, MemFile& target, - const std::vector<Timestamp>& timestamps); - - uint32_t splitIntoFile(MemFile& source, const spi::Bucket& target); -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp deleted file mode 100644 index b3116983d79..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "threadlocals.h" - -namespace storage { - -namespace memfile { - -vespalib::Lock ThreadStatic::_threadLock; -uint16_t ThreadStatic::_nextThreadIdx = 0; -__thread int ThreadStatic::_threadIdx = -1; - -void ThreadStatic::initThreadIndex() -{ - if (_threadIdx == -1) { - vespalib::LockGuard guard(_threadLock); - _threadIdx = _nextThreadIdx; - ++_nextThreadIdx; - } -} - -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h b/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h deleted file mode 100644 index d5ceb50cd2f..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/threadlocals.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vespalib/util/sync.h> -#include <vector> - -namespace storage { - -namespace memfile { - -class ThreadStatic { -public: - static vespalib::Lock _threadLock; - static uint16_t _nextThreadIdx; - static __thread int _threadIdx; - - void initThreadIndex(); -}; - -/** - * This class takes ownership of a set of thread local - * variables. The maximum number of unique threads the - * class can use must be predetermined on construction. - */ -template<typename T> -class ThreadLocals : public ThreadStatic { - static const size_t CACHE_LINE_SIZE = 64; // Architectural assumption. - struct CacheLinePaddedValue - { - T _data; - private: - // Ensure addressing the data of one entry does not touch the cache - // line of any following entries. Could make this an exact fit, but - // not very important since there are very few TLS entries in total. - char _padding[CACHE_LINE_SIZE]; - }; -public: - mutable std::vector<CacheLinePaddedValue> _contexts; - - ThreadLocals(uint32_t maxThreadCount) - : _contexts(maxThreadCount) - { - } - - T& get() { - initThreadIndex(); - assert(_threadIdx < (int)_contexts.size()); - return _contexts[_threadIdx]._data; - } -}; - -} - -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h b/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h deleted file mode 100644 index 27516078ebf..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/threadmetricprovider.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -namespace storage { -namespace memfile { - -class MemFilePersistenceThreadMetrics; - -class ThreadMetricProvider -{ -public: - virtual ~ThreadMetricProvider() {} - - virtual MemFilePersistenceThreadMetrics& getMetrics() const = 0; -}; - -} -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp deleted file mode 100644 index 18e70b98020..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "visitorslotmatcher.h" -#include <vespa/document/select/bodyfielddetector.h> -#include <vespa/document/select/node.h> - -namespace storage { -namespace memfile { - -namespace { - -SlotMatcher::PreloadFlag -getCacheRequirements(const document::select::Node* selection, - const document::DocumentTypeRepo& repo) { - if (!selection) { - return SlotMatcher::PRELOAD_META_DATA_ONLY; - } - - document::select::BodyFieldDetector detector(repo); - selection->visit(detector); - - if (detector.foundBodyField) { - return SlotMatcher::PRELOAD_BODY; - } else { - return SlotMatcher::PRELOAD_HEADER; - } -} - -bool needDocument(const document::select::Node* selection) -{ - if (selection) { - document::select::NeedDocumentDetector detector; - selection->visit(detector); - return detector.needDocument(); - } else { - return false; - } -} - -} // namespace - -VisitorSlotMatcher::VisitorSlotMatcher( - const document::DocumentTypeRepo& repo, - const document::select::Node* selection) - : SlotMatcher(getCacheRequirements(selection, repo)), - _selection(selection), - _needDocument(needDocument(selection)) -{ -} - -bool -VisitorSlotMatcher::match(const Slot& slot) { - if (_selection) { - if (!slot.isRemove() && _needDocument) { - document::Document::UP doc( - slot.getDocument(!(_preload == PRELOAD_BODY))); - return (_selection->contains(*doc) - == document::select::Result::True); - } else { - document::DocumentId docId(slot.getDocumentId()); - return (_selection->contains(docId) - == document::select::Result::True); - } - } - - return true; -} - -} -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h b/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h deleted file mode 100644 index a62fdc380a2..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/spi/visitorslotmatcher.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/memfilepersistence/common/slotmatcher.h> - -namespace document { - namespace select { - class Node; - } -} - -namespace storage { -namespace memfile { - -class VisitorSlotMatcher : public SlotMatcher -{ -private: - const document::select::Node* _selection; - bool _needDocument; - -public: - VisitorSlotMatcher(const document::DocumentTypeRepo& repo, - const document::select::Node* selection); - - bool match(const Slot& slot) override; - -}; - -} -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore b/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore deleted file mode 100644 index 9163367bfa9..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/.depend -/Makefile -vespa-vds-dump-slotfile -vespa-vds-disktool-bin diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt b/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt deleted file mode 100644 index 72938ca2b73..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(memfilepersistence_tools OBJECT - SOURCES - dumpslotfile.cpp - vdsdisktool.cpp - DEPENDS -) -vespa_add_executable(memfilepersistence_dumpslotfile_app - SOURCES - dumpslotfileapp.cpp - OUTPUT_NAME vespa-vds-dump-slotfile - INSTALL bin - DEPENDS - memfilepersistence -) -vespa_add_executable(memfilepersistence_vdsdisktool_app - SOURCES - vdsdiskapp.cpp - OUTPUT_NAME vespa-vds-disktool-bin - INSTALL bin - DEPENDS - memfilepersistence -) diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp deleted file mode 100644 index b267fff0ab0..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.cpp +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "dumpslotfile.h" -#include <vespa/config/helper/configgetter.h> -#include <vespa/document/config/config-documenttypes.h> -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/document/document.h> -#include <vespa/memfilepersistence/common/environment.h> -#include <vespa/memfilepersistence/mapper/memfilemapper.h> -#include <vespa/memfilepersistence/memfile/memfilecache.h> -#include <vespa/memfilepersistence/spi/memfilepersistenceprovidermetrics.h> -#include <vespa/storageframework/defaultimplementation/clock/realclock.h> -#include <vespa/storageframework/defaultimplementation/component/componentregisterimpl.h> -#include <vespa/vespalib/util/programoptions.h> -#include <vespa/config/helper/configgetter.hpp> -#include <vespa/config/subscription/configuri.h> - -using config::ConfigGetter; -using document::DocumenttypesConfig; -using config::FileSpec; -using document::DocumentTypeRepo; - -namespace storage::memfile { - -namespace { - std::ostream* cout; - std::ostream* cerr; - - struct CmdOptions : public vespalib::ProgramOptions { - bool showSyntaxPage; - bool userFriendlyOutput; - bool printHeader; - bool printBody; - bool toXml; - bool toBinary; - bool includeRemovedDocs; - bool includeRemoveEntries; -// std::string metaDataSort; - std::string documentManConfigId; - std::string filename; - uint64_t timestampToShow; - std::string docId; -// bool useConstructor; - - CmdOptions(int argc, const char* const* argv); - ~CmdOptions(); - - }; - - CmdOptions::CmdOptions(int argc, const char* const* argv) - : vespalib::ProgramOptions(argc, argv), - showSyntaxPage(false) - { - setSyntaxMessage( - "Utility program for showing the contents of the slotfiles " - "used by Vespa Document Storage in a user readable format. " - "Intended for debugging purposes." - ); - addOption("h help", showSyntaxPage, false, - "Shows this help page"); - addOption("n noheader", printHeader, true, - "If given, the header block content is not shown"); - addOption("N nobody", printBody, true, - "If given, the body block content is not shown"); - addOption("f friendly", userFriendlyOutput, false, - "Gives less compact, but more user friendly output"); - addOption("x toxml", toXml, false, - "Print document XML of contained documents"); - addOption("b tobinary", toBinary, false, - "Print binary representations of contained documents"); - addOption("includeremoveddocs", includeRemovedDocs, false, - "When showing XML, include documents that are still in " - "the file, but have been removed."); - addOption("includeremoveentries", includeRemoveEntries, false, - "When showing XML, include remove entries."); - addOption("c documentconfig", documentManConfigId, - std::string("client"), - "The document config to use, needed if deserializing " - "documents."); -// addOption("s sort", metaDataSort, std::string("none"), -// "How to sort metadatalist. Valid arguments: " -// "bodypos, headerpos & none."); - addOption("t time", timestampToShow, uint64_t(0), - "If set, only present data related to this timestamp, " - "when outputting XML or binary data."); - addOption("docid", docId, std::string(""), - "Retrieve single document using get semantics"); -// addOption("useconstructor", useConstructor, false, "Debug option"); - addArgument("slotfile", filename, "The slotfile to dump."); - } - CmdOptions::~CmdOptions() { } - - void printDoc(document::Document& doc, CmdOptions& o) { - if (o.toXml) { - *cout << doc.toXml() << "\n"; - } else { - document::ByteBuffer::UP bbuf(doc.serialize()); - *cout << std::string(bbuf->getBuffer(), bbuf->getLength()); - } - } - - void printFailure(const std::string& failure) { - *cerr << failure << "\n"; - } - - uint64_t extractBucketId(const std::string& path) { - size_t slashPos = path.find_last_of('/'); - bool foundSlash = true; - if (slashPos == std::string::npos) { - foundSlash = false; - } - - size_t dotPos = path.find_last_of('.'); - if (dotPos == std::string::npos - || (foundSlash && (slashPos > dotPos))) - { - dotPos = path.size(); - } - - std::string bucketIdAsHex; - if (foundSlash) { - bucketIdAsHex.assign(path.begin() + slashPos + 1, - path.begin() + dotPos); - } else { - bucketIdAsHex.assign(path.begin(), - path.begin() + dotPos); - } - - char* endp; - uint64_t bucketId = strtoull(bucketIdAsHex.c_str(), &endp, 16); - if (*endp != '\0') { - return 0; - } - return bucketId; - } - - struct EnvironmentImpl : ThreadMetricProvider { - framework::defaultimplementation::ComponentRegisterImpl _compReg; - framework::Component _component; - framework::defaultimplementation::RealClock _clock; - MemFilePersistenceMetrics _metrics; - MemFilePersistenceThreadMetrics* _threadMetrics; - std::unique_ptr<MemFileCache> _cache; - MemFileMapper _mapper; - DeviceManager _deviceManager; - document::DocumentType _docType; - std::shared_ptr<const DocumentTypeRepo> _repo; - vespa::config::storage::StorMemfilepersistenceConfigBuilder _memFileConfig; - vespa::config::content::PersistenceConfigBuilder _persistenceConfig; - vespa::config::storage::StorDevicesConfigBuilder _deviceConfig; - config::ConfigSet _configSet; - config::IConfigContext::SP _configContext; - std::unique_ptr<config::ConfigUri> _internalConfig; - std::unique_ptr<Environment> _env; - - EnvironmentImpl(config::ConfigUri& externalConfig, - const char* documentConfigId); - ~EnvironmentImpl(); - - MemFilePersistenceThreadMetrics& getMetrics() const override { - return *_threadMetrics; - } - - }; - - EnvironmentImpl::EnvironmentImpl(config::ConfigUri& externalConfig, const char* documentConfigId) - : _compReg(), - _component(_compReg, "dumpslotfile"), - _clock(), - _metrics(_component), - _threadMetrics(_metrics.addThreadMetrics()), - _cache(), - _mapper(*this), - _deviceManager(DeviceMapper::UP(new SimpleDeviceMapper), _clock), - _docType("foo", 1) - { - _compReg.setClock(_clock); - _cache.reset(new MemFileCache(_compReg, _metrics._cache)); - if (documentConfigId == 0) { - _repo.reset(new DocumentTypeRepo(_docType)); - } else { - config::ConfigUri uri( - externalConfig.createWithNewId(documentConfigId)); - std::unique_ptr<document::DocumenttypesConfig> config( - ConfigGetter<DocumenttypesConfig>::getConfig( - uri.getConfigId(), uri.getContext())); - _repo.reset(new DocumentTypeRepo(*config)); - } - _deviceConfig.rootFolder = "."; - std::string configId("defaultId"); - _configSet.addBuilder(configId, &_memFileConfig); - _configSet.addBuilder(configId, &_persistenceConfig); - _configSet.addBuilder(configId, &_deviceConfig); - _configContext.reset(new config::ConfigContext(_configSet)); - _internalConfig.reset( - new config::ConfigUri(configId, _configContext)); - _env.reset(new Environment( - *_internalConfig, *_cache, _mapper, *_repo, _clock, true)); - } - EnvironmentImpl::~EnvironmentImpl() {} - -} - -int SlotFileDumper::dump(int argc, const char * const * argv, - config::ConfigUri& config, - std::ostream& out, std::ostream& err) -{ - cout = &out; - cerr = &err; - CmdOptions o(argc, argv); - try{ - o.parse(); - } catch (vespalib::InvalidCommandLineArgumentsException& e) { - if (!o.showSyntaxPage) { - err << e.getMessage() << "\n\n"; - o.writeSyntaxPage(err); - err << "\n"; - return 1; - } - } - if (o.showSyntaxPage) { - o.writeSyntaxPage(err); - err << "\n"; - return 0; - } - if (!o.toXml && (o.includeRemovedDocs || o.includeRemoveEntries)) { - err << "Options for what to include in XML makes no sense when " - "not printing XML content.\n\n"; - o.writeSyntaxPage(err); - err << "\n"; - return 1; - } - if (o.toBinary && o.timestampToShow == 0 && o.docId == "") { - err << "To binary option only works for a single document. " - "Use --time or --docid options.\n\n"; - o.writeSyntaxPage(err); - err << "\n"; - return 1; - } -// if (o.metaDataSort != "none" && o.metaDataSort != "bodypos") { -// err << "Illegal value for metadata sorting: '" << o.metaDataSort -// << "'. Legal values are:\n" -// << " none - Keep order on disk (currently timestamp)\n" -// << " bodypos - Reorder metadata by position of body\n" -// << " headerpos - Reorder metadata by position of header\n\n"; -// o.writeSyntaxPage(err); -// err << "\n"; -// return 1; -// } - - EnvironmentImpl env(config, o.toXml ? o.documentManConfigId.c_str() : ""); - - document::BucketId bucket(extractBucketId(o.filename)); - Directory::SP dir(env._deviceManager.getDirectory(o.filename, 0)); - FileSpecification fileSpec(bucket, *dir, o.filename); - - MemFile::LoadOptions opts; - opts.autoRepair = false; - MemFile memFile(fileSpec, *env._env, opts); - - if (!o.toXml && !o.toBinary) { - spi::BucketInfo info; - info = memFile.getBucketInfo(); - if (bucket.getRawId() == 0) { - out << "Failed to extract bucket id from filename\n"; - } else { - out << bucket << " (extracted from filename)\n"; - } - out << "Unique document count: " << info.getDocumentCount() - << "\nTotal document size: " - << info.getDocumentSize() << "\n"; - out << "Used size: " << info.getUsedSize() << "\n"; - out << "Entry count: " << info.getEntryCount() << "\n"; - -/* - SlotFile::MetaDataOrder order = SlotFile::DEFAULT; - if (o.metaDataSort == "bodypos") { - order = SlotFile::BODYPOS; - } else if (o.metaDataSort == "headerpos") { - order = SlotFile::HEADERPOS; - } -*/ - memFile.printState(out, o.userFriendlyOutput, o.printBody, - o.printHeader/*, order*/); - out << "\n"; - std::ostringstream ost; - uint16_t verifyFlags = 0; // May verify only header/body - if (env._mapper.verify(memFile, *env._env, ost, verifyFlags)) { - out << "Slotfile verified.\n"; - } else { - out << "Slotfile failed verification.\n"; - out << ost.str() << "\n"; - } - } else { - std::ostringstream ost; - uint16_t verifyFlags = 0; // May verify only header/body - if (!env._mapper.verify(memFile, *env._env, ost, verifyFlags)) { - out << "Slotfile failed verification.\n"; - out << ost.str() << "\n"; - return 1; - } - - if (o.toXml) { - out << "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"; - out << "<vespafeed>\n"; - } - if (o.docId != "") { - const MemSlot* slot( - memFile.getSlotWithId(document::DocumentId(o.docId))); - if (slot != 0 && !slot->deleted()) { - document::Document::UP doc(memFile.getDocument(*slot, - o.printBody ? - Types::ALL : Types::HEADER_ONLY)); - if (doc.get()) { - printDoc(*doc, o); - } else { - printFailure("No document with id " + o.docId + - " found."); - } - } else { - printFailure("No document with id " + o.docId + " found."); - } - } else { - uint32_t iteratorFlags = o.includeRemoveEntries ? - Types::ITERATE_REMOVED : 0; - if (!o.includeRemovedDocs) { - iteratorFlags |= Types::ITERATE_GID_UNIQUE; - } - for (MemFile::const_iterator it = memFile.begin(iteratorFlags); - it != memFile.end(); ++it) - { - if (o.timestampToShow == 0 - || (Types::Timestamp)o.timestampToShow - == it->getTimestamp()) - { - if (it->deleted() || it->deletedInPlace()) { - printFailure("Found remove entry"); - } else { - document::Document::UP doc(memFile.getDocument(*it, - o.printBody ? - Types::ALL : Types::HEADER_ONLY)); - if (doc.get()) { - printDoc(*doc, o); - } else { - printFailure("Unable to get document in " + it->toString(true)); - } - } - } - } - } - if (o.toXml) { - out << "</vespafeed>\n"; - } - } - return 0; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h deleted file mode 100644 index a54cbb9f400..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfile.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <iosfwd> - -namespace config { - class ConfigUri; -} - -namespace storage { -namespace memfile { - -struct SlotFileDumper { - static int dump(int argc, const char * const * argv, - config::ConfigUri& config, - std::ostream& out, std::ostream& err); -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp deleted file mode 100644 index 004fd4ea244..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/dumpslotfileapp.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "dumpslotfile.h" -#include <vespa/config/subscription/configuri.h> -#include <vespa/fastos/app.h> -#include <iostream> - -namespace { - -struct DumpSlotFileApp : public FastOS_Application { - int Main() override { - try{ - config::ConfigUri config(""); - return storage::memfile::SlotFileDumper::dump( - _argc, _argv, config, std::cout, std::cerr); - } catch (std::exception& e) { - std::cerr << "Aborting due to exception:\n" << e.what() << "\n"; - return 1; - } - } -}; - -} // anonymous - -int main(int argc, char **argv) { - DumpSlotFileApp app; - return app.Entry(argc, argv); -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp deleted file mode 100644 index 601888c88cc..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdiskapp.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "vdsdisktool.h" -#include <vespa/defaults.h> -#include <vespa/fastos/app.h> -#include <iostream> - -namespace { - struct DiskApp : public FastOS_Application { - int Main() override { - try { - std::string dir = vespa::Defaults::underVespaHome("var/db/vespa/vds"); - return storage::memfile::VdsDiskTool::run( - _argc, _argv, dir.c_str(), - std::cout, std::cerr); - } catch (std::exception& e) { - std::cerr << "Application aborted with exception:\n" << e.what() - << "\n"; - return 1; - } - } - }; -} // anonymous - -int main(int argc, char **argv) { - DiskApp app; - return app.Entry(argc, argv); -} - diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp deleted file mode 100644 index 4372f0a67d3..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.cpp +++ /dev/null @@ -1,518 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/document/util/stringutil.h> -#include <vespa/fnet/frt/frt.h> -#include <vespa/memfilepersistence/device/mountpointlist.h> -#include <vespa/memfilepersistence/tools/vdsdisktool.h> -#include <vespa/storageframework/defaultimplementation/clock/realclock.h> -#include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/util/programoptions.h> -#include <vespa/vespalib/util/exceptions.h> -#include <csignal> -#include <dirent.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vdsdiskapp"); - -using std::vector; - -namespace storage::memfile { - -using vespalib::getLastErrorString; - -namespace { - - struct Sorter { - bool operator()(const std::pair<std::string, std::string>& first, - const std::pair<std::string, std::string>& second) - { return (first.first < second.first); } - }; - - /** - * Read pid from pid file. In case we want to extend pid file to contain - * more information later, accept multiple lines in file as long as pid is - * in first, and allow a pid: prefix to the pid. - */ - uint32_t readPid(const std::string& pidFile) { - vespalib::LazyFile lf(pidFile, vespalib::File::READONLY); - vector<char> data(32); - size_t read = lf.read(&data[0], 32, 0); - // If pid file has been extended to have more data, ignore it. - for (uint32_t i=0; i<32; ++i) { - if (data[i] == '\n') { - data[i] = '\0'; - read = i; - break; - } - } - // Allow a "pid:" prefix if it exists. - int start = 0; - if (strncmp("pid:", &data[0], 4) == 0) { - start = 4; - } - // Fail unless the first line was just a number with the pid - char* endp; - uint32_t pid = strtoull(&data[start], &endp, 10); - if (*endp != '\0' || read >= 32) { - throw vespalib::IllegalStateException( - "Unexpected content in pid file " + pidFile, - VESPA_STRLOC); - } - if (pid == 0) { - throw vespalib::IllegalStateException( - "Read pid 0 from pidfile which is illegal.", - VESPA_STRLOC); - } - return pid; - } -} - -struct CmdLineOptions : public vespalib::ProgramOptions { - std::ostream& _err; - std::string _rootpath; - bool _showSyntax; - std::string _cluster; - uint32_t _nodeIndex; - std::string _mode; - uint32_t _diskIndex; - std::string _message; - /* - std::string _slobrokConfigId; - std::string _slobrokConnectionSpec; - */ - - CmdLineOptions(int argc, const char * const * argv, - const std::string& rootpath, std::ostream& err) - : vespalib::ProgramOptions(argc, argv), - _err(err), - _rootpath(rootpath) - { - setSyntaxMessage( - "This tool is used to stop VDS from using a given partition " - "you no longer want it to use, or to reenable use of a partition " - "that previously have been disabled. Note that currently, this " - "requires a restart of the storage node, which this tool will " - "do automatically. Note that the tool must be run on the storage " - "node where you want to enable/disable a partition.\n\n" - "Examples:\n" - " vdsdisktool disable 2 \"Seeing a lot of smart warnings on this one\"\n" - " vdsdisktool -c mycluster -i 3 disable 0 \"Shouldn't have put this on OS drive\"\n" - " vdsdisktool enable 2\n" - ); - addOption("h help", _showSyntax, false, - "Show this help page."); - addOption("c cluster", _cluster, std::string(""), - "Which cluster the storage node whose disks should be " - "adjusted. If only data from one cluster is detected " - "on the node, this does not have to be specified"); - addOption("i index", _nodeIndex, uint32_t(0xffffffff), - "The node index of the storage node whose disks should be " - "adjusted. If only data from one storage node is detected " - "on the node, this does not have to be specified"); - addArgument("Mode", _mode, - "There are three modes. They are status, enable and disable" - ". The status mode is used to just query current disk " - "status without. The enable and disable modes will enable " - "or disable a disk."); - addArgument("Disk Index", _diskIndex, uint32_t(0xffffffff), - "The disk index which you want to enable/disable. Not " - "specified in status mode, but required otherwise."); - addArgument("Reason", _message, std::string(""), - "Give a reason for why we're enabling or disabling a disk. " - "Required when disabling a disk, such that other " - "administrators can see why it has happened."); - } - ~CmdLineOptions(); - - vector<std::string> listDir(const std::string& dir) { - DIR* dirp = opendir(dir.c_str()); - struct dirent* entry; - vector<std::string> result; - if (dirp) while ((entry = readdir(dirp))) { - if (entry == 0) { - std::ostringstream ost; - ost << "Failed to read directory '" << dir << "', errno " - << errno << ": " << getLastErrorString() << "\n"; - int tmp = closedir(dirp); - assert(tmp == 0); - (void) tmp; - throw vespalib::IllegalStateException(ost.str(), VESPA_STRLOC); - } - std::string name(reinterpret_cast<char*>(&entry->d_name)); - assert(name.size() > 0); - if (name[0] == '.') continue; - result.push_back(name); - } - int tmp = closedir(dirp); - assert(tmp == 0); - (void) tmp; - return result; - } - - std::set<std::string> detectPossibleClusters() { - if (!vespalib::fileExists(_rootpath)) { - throw vespalib::IllegalStateException( - "No VDS installations found at all in " + _rootpath, - VESPA_STRLOC); - } - vector<std::string> files(listDir(_rootpath)); - std::set<std::string> result(files.begin(), files.end()); - return result; - } - - std::set<uint16_t> - detectPossibleNodeIndexes(const std::string& cluster) - { - std::string dir = _rootpath + "/" + cluster + "/storage"; - if (!vespalib::fileExists(dir)) { - throw vespalib::IllegalStateException( - "No VDS installations found at all in " + dir, - VESPA_STRLOC); - } - vector<std::string> files(listDir(dir)); - std::set<uint16_t> result; - for (uint32_t i=0; i<files.size(); ++i) { - char* endp; - uint64_t index = strtoull(files[i].c_str(), &endp, 10); - if (*endp != '\0' || index > 0xffff) { - _err << "Found strange file in directory supposed to " - << "contain node indexes: '" << files[i] << "'.\n"; - } else { - result.insert(index); - } - } - return result; - } - - bool validate() { - // Validate that cluster was in fact found. Uses storage disk - // directories to scan for legal targets. - LOG(debug, "Detecting clusters"); - std::set<std::string> clusters(detectPossibleClusters()); - if (clusters.size() == 0) { - _err << "No VDS clusters at all detected on this node.\n"; - return false; - } - bool clusterFound = false; - if (_cluster != "") { - if (clusters.find(_cluster) == clusters.end()) { - _err << "No cluster named '" << _cluster - << "' found.\n"; - } else { - clusterFound = true; - } - } else if (clusters.size() != 1u) { - _err << "Cluster must be specified as there are multiple " - "targets.\n"; - } else { - _cluster = *clusters.begin(); - clusterFound = true; - } - if (!clusterFound) { - _err << "Detected cluster names on local node:\n"; - for (std::set<std::string>::const_iterator it = clusters.begin(); - it != clusters.end(); ++it) - { - _err << " " << *it << "\n"; - } - return false; - } - // Validate that node index was in fact found. Uses storage disk - // directories to scan for legal targets. - LOG(debug, "Detecting node indexes"); - std::set<uint16_t> nodeIndexes( - detectPossibleNodeIndexes(_cluster)); - if (nodeIndexes.size() == 0) { - _err << "No node indexes at all detected on this node in " - "cluster '" << _cluster << ".\n"; - return false; - } - bool indexFound = false; - if (_nodeIndex != uint32_t(0xffffffff)) { - if (_nodeIndex > 0xffff) { - _err << "Illegal node index " << _nodeIndex - << " specified. Nodes must be in the range of " - << "0-65535.\n"; - return false; - } - if (nodeIndexes.find(_nodeIndex) == nodeIndexes.end()) { - _err << "No node with index " << _nodeIndex - << " found in cluster '" << _cluster - << "'.\n"; - } else { - indexFound = true; - } - } else if (nodeIndexes.size() != 1u) { - _err << "Node index must be specified as there are multiple " - "targets.\n"; - } else { - _nodeIndex = *nodeIndexes.begin(); - indexFound = true; - } - if (!indexFound) { - _err << "Detected node indexes on local node in cluster '" - << _cluster << "':\n"; - for (std::set<uint16_t>::const_iterator it = nodeIndexes.begin(); - it != nodeIndexes.end(); ++it) - { - _err << " " << *it << "\n"; - } - return false; - } - // Validate modes - if (_mode != "enable" && _mode != "disable" && _mode != "status") { - _err << "Illegal mode '" << _mode << "'.\n"; - return false; - } - // Warn if senseless options are given in status mode - if (_mode == "status" && (_diskIndex != 0xffffffff || _message != "")) { - _err << "Warning: Disk index and/or reason makes no sense in " - << "status mode.\n"; - } - if ((_mode == "enable" || _mode == "disable") - && _diskIndex == 0xffffffff) - { - _err << "A disk index must be given to specify which disk to " - << _mode << ".\n"; - return false; - } - if (_mode == "disable" && _message == "") { - _err << "A reason must be given for why you are disabling the " - "disk.\n"; - return false; - } - if (_mode == "enable" || _mode == "disable") { - std::ostringstream dir; - dir << _rootpath << "/" << _cluster << "/storage/" << _nodeIndex - << "/disks/d" << _diskIndex; - if (!vespalib::fileExists(dir.str())) { - _err << "Cannot " << _mode << " missing disk " - << _diskIndex << ". No disk detected at " - << dir.str() << "\n"; - return false; - } - } - return true; - } - - vector<uint16_t> getNodeIndexes() { - vector<uint16_t> indexes; - indexes.push_back(_nodeIndex); - return indexes; - } - - std::string getNodePath(uint16_t nodeIndex) { - std::ostringstream ost; - ost << _rootpath << "/" << _cluster << "/storage/" << nodeIndex; - return ost.str(); - } - - std::string getPidFile(uint16_t nodeIndex) { - return getNodePath(nodeIndex) + "/pidfile"; - } - -}; - -CmdLineOptions::~CmdLineOptions() {} - -int -VdsDiskTool::run(int argc, const char * const * argv, - const std::string& rootPath, - std::ostream& out, std::ostream& err) -{ - CmdLineOptions options(argc, argv, rootPath, err); - try{ - LOG(debug, "Parsing command line options"); - options.parse(); - } catch (vespalib::InvalidCommandLineArgumentsException& e) { - LOG(debug, "Failed parsing command line options"); - if (!options._showSyntax) { - err << e.getMessage() << "\n"; - options.writeSyntaxPage(err, false); - err << "\n"; - return 1; - } - } - if (options._showSyntax) { - options.writeSyntaxPage(err, false); - err << "\n"; - return 0; - } - LOG(debug, "Validating options"); - if (!options.validate()) { - LOG(debug, "Options failed validation"); - options.writeSyntaxPage(err, false); - return 1; - } - LOG(debug, "Iterate over all nodes to operate on"); - // Iterate over all node indexes to operate on. - for (uint32_t indexIterator = 0; - indexIterator < options.getNodeIndexes().size(); ++indexIterator) - { - uint16_t nodeIndex = options.getNodeIndexes()[indexIterator]; - std::string pidFile = options.getPidFile(nodeIndex); - - // Read pid if process is running - uint32_t pid = 0; - try{ - if (vespalib::fileExists(pidFile)) { - pid = readPid(pidFile); - if (kill(pid, 0) != 0) { - err << "Failed to signal process with pid " - << pid << " (" << errno << "): " - << getLastErrorString() << ". If storage node is " - << "running it needs to be manually restarted" - << " before changes take effect.\n"; - } else if (options._mode == "status") { - out << "Storage node " << nodeIndex - << " in cluster " << options._cluster - << " is running with pid " << pid << ".\n"; - } - } - } catch (vespalib::IoException& e) { - err << "Failed to read pid file: " << e.getMessage() - << "\n"; - if (options._mode != "status") { - err << "Not restarting storage node after changes.\n"; - } - } - framework::defaultimplementation::RealClock clock; - // Read the disk status file. - DeviceManager::UP devMan(new DeviceManager( - DeviceMapper::UP(new SimpleDeviceMapper), - clock)); - MountPointList mountPointList(options.getNodePath(nodeIndex), - vector<vespalib::string>(), - std::move(devMan)); - mountPointList.scanForDisks(); - if (options._mode == "enable" || options._mode == "disable") { - if (mountPointList.getSize() <= options._diskIndex - || mountPointList[options._diskIndex].getState() - == Device::NOT_FOUND) - { - err << "Disk " << options._diskIndex << " on node " - << nodeIndex << " in cluster " - << options._cluster << " does not exist. " - << "Cannot enable or disable a non-existing " - << "disk.\n"; - return 1; - } - if (mountPointList[options._diskIndex].getState() - != Device::OK) - { - err << "Disk " << options._diskIndex << " on node " - << nodeIndex << " in cluster " - << options._cluster << " fails pre-initialize " - << "routine. Cannot enable or disable disk with " - << "such a problem: " - << mountPointList[options._diskIndex] << "\n"; - return 1; - } - } - vector<Device::State> preFileStates( - mountPointList.getSize()); - for (uint32_t i=0; i<mountPointList.getSize(); ++i) { - preFileStates[i] = mountPointList[i].getState(); - } - mountPointList.readFromFile(); - if (options._mode == "enable") { - Directory& dir(mountPointList[options._diskIndex]); - if (dir.getState() == Device::OK) { - out << "Disk " << options._diskIndex << " on node " - << nodeIndex << " in cluster " - << options._cluster << " is already enabled. " - << "Nothing to do.\n"; - continue; - } - // Shouldn't be null when state is not OK - assert(dir.getLastEvent() != 0); - IOEvent oldEvent(*dir.getLastEvent()); - dir.clearEvents(); - dir.getPartition().clearEvents(); - dir.getPartition().getDisk().clearEvents(); - if (preFileStates[options._diskIndex] != Device::OK) { - out << "Cannot enable disk " << options._diskIndex - << " on node " << nodeIndex << " in cluster " - << options._cluster << ", as it has a failure " - << "that must be fixed by an admin.\n"; - if (preFileStates[options._diskIndex] - != oldEvent.getState()) - { - out << "Clearing any stored state such that the " - << "disk will work once admin fixes\n" - << "the current error.\n"; - } - } else { - out << "Reactivating disk " << options._diskIndex - << " on node " << nodeIndex << " in cluster " - << options._cluster << ". Removed stored event: " - << oldEvent << "\n"; - } - } else if (options._mode == "disable") { - Directory& dir(mountPointList[options._diskIndex]); - if (dir.getState() != Device::OK) { - // Shouldn't be null when state is not OK - assert(dir.getLastEvent() != 0); - IOEvent oldEvent(*dir.getLastEvent()); - out << "Disk " << options._diskIndex << " on node " - << nodeIndex << " in cluster " - << options._cluster << " is already disabled. " - << "Overriding old event: " << oldEvent << "\n"; - } - dir.clearEvents(); - dir.getPartition().clearEvents(); - dir.getPartition().getDisk().clearEvents(); - IOEvent newEvent(clock.getTimeInSeconds().getTime(), - Device::DISABLED_BY_ADMIN, - options._message, "vdsdisktool"); - dir.addEvent(newEvent); - out << "Deactivated disk " << options._diskIndex - << " on node " << nodeIndex << " in cluster " - << options._cluster << ". Added event: " - << newEvent << "\n"; - } else if (options._mode == "status") { - out << "Disks on storage node " << nodeIndex - << " in cluster " << options._cluster << ":\n"; - if (mountPointList.getSize() == 0) { - out << " No disks at all are set up.\n"; - } - for (uint32_t i=0; i<mountPointList.getSize(); ++i) { - out << " Disk " << i << ": "; - Directory& dir(mountPointList[i]); - if (dir.isOk()) { - out << "OK\n"; - } else { - const IOEvent* event(dir.getLastEvent()); - assert(event != 0); // If so disk is ok - out << Device::getStateString( - event->getState()) - << " - " << event->getDescription() << "\n"; - } - } - } - if (options._mode == "enable" || options._mode == "disable") { - out << "Writing disk status file to disk\n"; - mountPointList.writeToFile(); - if (pid != 0) { - out << "Killing node such that it reads new data\n"; - int result = kill(pid, SIGTERM); - if (result != 0) { - if (errno == EINVAL) { - err << "Signal SIGTERM not recognized.\n"; - } else if (errno == EPERM) { - err << "No permission to send kill signal to " - "storage process\n"; - } else if (errno == ESRCH) { - err << "No process or process group found " - "using pid " << pid << "\n"; - } - } - } - out << "Done\n"; - continue; - } - } - return 0; -} - -} diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h deleted file mode 100644 index 98fb5c3505c..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <iosfwd> - -namespace storage { -namespace memfile { - -struct VdsDiskTool { - static int run(int argc, const char * const * argv, - const std::string& rootPath, - std::ostream& out, std::ostream& err); -}; - -} // memfile -} // storage - diff --git a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl b/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl deleted file mode 100644 index f04ce619ccc..00000000000 --- a/memfilepersistence/src/vespa/memfilepersistence/tools/vdsdisktool.pl +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/perl -w -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -use strict; - -# Simple wrapper for executing vdsdisktool-bin - -my $args = &getArgs(); -&run("vdsdisktool-bin $args"); - -exit(0); - -sub isHelpRequest { - foreach my $arg (@ARGV) { - if ($arg eq '-h' || $arg eq '--help') { - return 1; - } - } - return 0; -} - -sub getArgs { - my @args; - foreach my $arg (@ARGV) { - $arg =~ s/([ \t\f])/\\$1/g; - push @args, $arg; - } - return join(' ', @args); -} - -sub isDebugRun { - foreach my $arg (@ARGV) { - if ($arg eq '--debug-perl-wrapper') { - return 1; - } - } - return 0; -} - -sub run { - my ($cmd) = @_; - if (&isDebugRun()) { - print "Debug: Would have executed '$cmd'.\n"; - } else { - exec($cmd); - } -} |