From 72231250ed81e10d66bfe70701e64fa5fe50f712 Mon Sep 17 00:00:00 2001
From: Jon Bratseth <bratseth@yahoo-inc.com>
Date: Wed, 15 Jun 2016 23:09:44 +0200
Subject: Publish

---
 fsa/doc/.gitignore           |    2 +
 fsa/doc/Doxyfile             | 1099 ++++++++++++++++++++++++++++++++++++++++++
 fsa/doc/docbook/.gitignore   |    4 +
 fsa/doc/docbook/fsadump.xml  |  205 ++++++++
 fsa/doc/docbook/fsainfo.xml  |  177 +++++++
 fsa/doc/docbook/makefsa.xml  |  224 +++++++++
 fsa/doc/fsa_file_format.html |   69 +++
 fsa/doc/permute_query.stats  |   18 +
 8 files changed, 1798 insertions(+)
 create mode 100644 fsa/doc/.gitignore
 create mode 100644 fsa/doc/Doxyfile
 create mode 100644 fsa/doc/docbook/.gitignore
 create mode 100644 fsa/doc/docbook/fsadump.xml
 create mode 100644 fsa/doc/docbook/fsainfo.xml
 create mode 100644 fsa/doc/docbook/makefsa.xml
 create mode 100644 fsa/doc/fsa_file_format.html
 create mode 100644 fsa/doc/permute_query.stats

(limited to 'fsa/doc')

diff --git a/fsa/doc/.gitignore b/fsa/doc/.gitignore
new file mode 100644
index 00000000000..2ec816f3ef2
--- /dev/null
+++ b/fsa/doc/.gitignore
@@ -0,0 +1,2 @@
+html
+latex
diff --git a/fsa/doc/Doxyfile b/fsa/doc/Doxyfile
new file mode 100644
index 00000000000..6b47c186cb4
--- /dev/null
+++ b/fsa/doc/Doxyfile
@@ -0,0 +1,1099 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+# Doxyfile 1.3.5
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = fsa
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
+# This could be handy for archiving the generated documentation or 
+# if some version control system is used.
+
+PROJECT_NUMBER         = 2.0.1
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
+# base path where the generated documentation will be put. 
+# If a relative path is entered, it will be relative to the location 
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = 
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
+# documentation generated by doxygen is written. Doxygen will use this 
+# information to generate all constant output in the proper language. 
+# The default language is English, other supported languages are: 
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, 
+# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en 
+# (Japanese with English messages), Korean, Norwegian, Polish, Portuguese, 
+# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE        = English
+
+# This tag can be used to specify the encoding used in the generated output. 
+# The encoding is not always determined by the language that is chosen, 
+# but also whether or not the output is meant for Windows or non-Windows users. 
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES 
+# forces the Windows encoding (this is the default for the Windows binary), 
+# whereas setting the tag to NO uses a Unix-style encoding (the default for 
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING   = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
+# include brief member descriptions after the members that are listed in 
+# the file and class documentation (similar to JavaDoc). 
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
+# the brief description of a member or function before the detailed description. 
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator 
+# that is used to form the text in various listings. Each string 
+# in this list, if found as the leading text of the brief description, will be 
+# stripped from the text and the result after processing the whole list, is used 
+# as the annotated text. Otherwise, the brief description is used as-is. If left 
+# blank, the following values are used ("$name" is automatically replaced with the 
+# name of the entity): "The $name class" "The $name widget" "The $name file" 
+# "is" "provides" "specifies" "contains" "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       = 
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
+# Doxygen will generate a detailed section even if there is only a brief 
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited 
+# members of a class in the documentation of that class as if those members were 
+# ordinary class members. Constructors, destructors and assignment operators of 
+# the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
+# path before files name in the file list and in the header files. If set 
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
+# can be used to strip a user-defined part of the path. Stripping is 
+# only done if one of the specified strings matches the left-hand part of 
+# the path. It is allowed to use relative paths in the argument list.
+
+STRIP_FROM_PATH        = 
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
+# (but less readable) file names. This can be useful is your file systems 
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
+# will interpret the first line (until the first dot) of a JavaDoc-style 
+# comment as the brief description. If set to NO, the JavaDoc 
+# comments will behave just like the Qt-style comments (thus requiring an 
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
+# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
+# comments) as a brief description. This used to be the default behaviour. 
+# The new default is to treat a multi-line C++ comment block as a detailed 
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen 
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member 
+# documentation.
+
+DETAILS_AT_TOP         = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
+# member inherits the documentation from any documented member that it 
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
+# tag is set to YES, then doxygen will reuse the documentation of the first 
+# member in the group (if any) for the other members of the group. By default 
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts 
+# as commands in the documentation. An alias has the form "name=value". 
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
+# put the command \sideeffect (or @sideeffect) in the documentation, which 
+# will result in a user-defined paragraph with heading "Side Effects:". 
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                = 
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources 
+# only. Doxygen will then generate output that is more tailored for C. 
+# For instance, some of the names that are used will be different. The list 
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources 
+# only. Doxygen will then generate output that is more tailored for Java. 
+# For instance, namespaces will be presented as packages, qualified scopes 
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
+# the same type (for instance a group of public functions) to be put as a 
+# subgroup of that type (e.g. under the Public Functions section). Set it to 
+# NO to prevent subgrouping. Alternatively, this can be done per class using 
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
+# documentation are documented, even if no documentation was available. 
+# Private class members and static file members will be hidden unless 
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file 
+# will be included in the documentation.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
+# defined locally in source files will be included in the documentation. 
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
+# undocumented members of documented classes, files or namespaces. 
+# If set to NO (the default) these members will be included in the 
+# various overviews, but no documentation section is generated. 
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
+# undocumented classes that are normally visible in the class hierarchy. 
+# If set to NO (the default) these classes will be included in the various 
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
+# friend (class|struct|union) declarations. 
+# If set to NO (the default) these declarations will be included in the 
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
+# documentation blocks found inside the body of a function. 
+# If set to NO (the default) these blocks will be appended to the 
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation 
+# that is typed after a \internal command is included. If the tag is set 
+# to NO (the default) then the documentation will be excluded. 
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
+# file names in lower-case letters. If set to YES upper-case letters are also 
+# allowed. This is useful if you have classes or files whose names only differ 
+# in case and if your file system supports case sensitive file names. Windows 
+# users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
+# will show members with their full class and namespace scopes in the 
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
+# will put a list of the files that are included by a file in the documentation 
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
+# will sort the (detailed) documentation of file and class members 
+# alphabetically by member name. If set to NO the members will appear in 
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or 
+# disable (NO) the todo list. This list is created by putting \todo 
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or 
+# disable (NO) the test list. This list is created by putting \test 
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or 
+# disable (NO) the bug list. This list is created by putting \bug 
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
+# disable (NO) the deprecated list. This list is created by putting 
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional 
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       = 
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
+# the initial value of a variable or define consists of for it to appear in 
+# the documentation. If the initializer consists of more lines than specified 
+# here it will be hidden. Use a value of 0 to hide initializers completely. 
+# The appearance of the initializer of individual variables and defines in the 
+# documentation can be controlled using \showinitializer or \hideinitializer 
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
+# at the bottom of the documentation of classes and structs. If set to YES the 
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated 
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are 
+# generated by doxygen. Possible values are YES and NO. If left blank 
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
+# potential errors in the documentation, such as not documenting some 
+# parameters in a documented function, or documenting parameters that 
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that 
+# doxygen can produce. The string should contain the $file, $line, and $text 
+# tags, which will be replaced by the file and line number from which the 
+# warning originated and the warning text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning 
+# and error messages should be written. If left blank the output is written 
+# to stderr.
+
+WARN_LOGFILE           = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain 
+# documented source files. You may enter file names like "myfile.cpp" or 
+# directories like "/usr/src/myproject". Separate the files or directories 
+# with spaces.
+
+INPUT                  = ../src
+
+# If the value of the INPUT tag contains directories, you can use the 
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank the following patterns are tested: 
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp 
+# *.h++ *.idl *.odl *.cs *.php *.php3 *.inc
+
+FILE_PATTERNS          = *.h *.cpp
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
+# should be searched for input files as well. Possible values are YES and NO. 
+# If left blank NO is used.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should 
+# excluded from the INPUT source files. This way you can easily exclude a 
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                = 
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories 
+# that are symbolic links (a Unix filesystem feature) are excluded from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the 
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
+# certain files from those directories.
+
+EXCLUDE_PATTERNS       = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or 
+# directories that contain example code fragments that are included (see 
+# the \include command).
+
+EXAMPLE_PATH           = 
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank all files are included.
+
+EXAMPLE_PATTERNS       = 
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
+# searched for input files to be used with the \include or \dontinclude 
+# commands irrespective of the value of the RECURSIVE tag. 
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or 
+# directories that contain image that are included in the documentation (see 
+# the \image command).
+
+IMAGE_PATH             = 
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should 
+# invoke to filter for each input file. Doxygen will invoke the filter program 
+# by executing (via popen()) the command <filter> <input-file>, where <filter> 
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
+# input file. Doxygen will then use the output that the filter program writes 
+# to standard output.
+
+INPUT_FILTER           = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
+# INPUT_FILTER) will be used to filter the input files when producing source 
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
+# be generated. Documented entities will be cross-referenced with these sources. 
+# Note: To get rid of all source code in the generated output, make sure also 
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body 
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
+# doxygen to hide any special comment blocks from generated source code 
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default) 
+# then for each documented function all documented 
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default) 
+# then for each documented function all documented entities 
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = YES
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
+# will generate a verbatim copy of the header file for each class for 
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
+# of all compounds will be generated. Enable this if the project 
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 3
+
+# In case all classes in a project start with a common prefix, all 
+# classes will be put under the same header in the alphabetical index. 
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard header.
+
+HTML_HEADER            = 
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard footer.
+
+HTML_FOOTER            = 
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
+# style sheet that is used by each HTML page. It can be used to 
+# fine-tune the look of the HTML output. If the tag is left blank doxygen 
+# will generate a default style sheet. Note that doxygen will try to copy 
+# the style sheet file to the HTML output directory, so don't put your own 
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        = 
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
+# files or namespaces will be aligned in HTML using tables. If set to 
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
+# will be generated that can be used as input for tools like the 
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) 
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
+# be used to specify the file name of the resulting .chm file. You 
+# can add a path in front of the file if the result should not be 
+# written to the html output directory.
+
+CHM_FILE               = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
+# be used to specify the location (absolute path including file name) of 
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
+# controls if a separate .chi index file is generated (YES) or that 
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
+# controls whether a binary table of contents is generated (YES) or a 
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members 
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
+# top of each HTML page. The value NO (the default) enables the index and 
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20]) 
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that 
+# is generated for HTML Help). For this to work a browser that supports 
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, 
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are 
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
+# used to set the initial width (in pixels) of the frame in which the tree 
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
+# generate index for LaTeX. If left blank `makeindex' will be used as the 
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
+# LaTeX documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used 
+# by the printer. Possible values are: a4, a4wide, letter, legal and 
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
+# the generated latex document. The header should contain everything until 
+# the first chapter. If it is left blank doxygen will generate a 
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
+# contain links (just like the HTML output) instead of page references 
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
+# plain latex in the generated Makefile. Set this option to YES to get a 
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
+# command to the generated LaTeX files. This will instruct LaTeX to keep 
+# running if errors occur, instead of asking the user for help. 
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
+# include the index chapters (such as File Index, Compound Index, etc.) 
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
+# The RTF output is optimized for Word 97 and may not look very pretty with 
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
+# RTF documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
+# will contain hyperlink fields. The RTF file will 
+# contain links (just like the HTML output) instead of page references. 
+# This makes the output suitable for online browsing using WORD or other 
+# programs which support those fields. 
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's 
+# config file, i.e. a series of assignments. You only have to provide 
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    = 
+
+# Set optional variables used in the generation of an rtf document. 
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to 
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
+# then it will generate one additional man file for each entity 
+# documented in the real man page(s). These additional files 
+# only source the real man page, but without them the man command 
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will 
+# generate an XML file that captures the structure of 
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_SCHEMA             = 
+
+# The XML_DTD tag can be used to specify an XML DTD, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_DTD                = 
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
+# dump the program listings (including syntax highlighting 
+# and cross-referencing information) to the XML output. Note that 
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
+# generate an AutoGen Definitions (see autogen.sf.net) file 
+# that captures the structure of the code including all 
+# documentation. Note that this feature is still experimental 
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
+# generate a Perl module file that captures the structure of 
+# the code including all documentation. Note that this 
+# feature is still experimental and incomplete at the 
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
+# nicely formatted so it can be parsed by a human reader.  This is useful 
+# if you want to understand what is going on.  On the other hand, if this 
+# tag is set to NO the size of the Perl module output will be much smaller 
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file 
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
+# This is useful so different doxyrules.make files included by the same 
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
+# evaluate all C-preprocessor directives found in the sources and include 
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
+# names in the source code. If set to NO (the default) only conditional 
+# compilation will be performed. Macro expansion can be done in a controlled 
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
+# then the macro expansion is limited to the macros specified with the 
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that 
+# contain include files that are not input files but should be processed by 
+# the preprocessor.
+
+INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
+# patterns (like *.h and *.hpp) to filter out the header-files in the 
+# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# be used.
+
+INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that 
+# are defined before the preprocessor is started (similar to the -D option of 
+# gcc). The argument of the tag is a list of macros of the form: name 
+# or name=definition (no spaces). If the definition and the = are 
+# omitted =1 is assumed.
+
+PREDEFINED             = 
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
+# this tag can be used to specify a list of macro names that should be expanded. 
+# The macro definition that is found in the sources will be used. 
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
+# doxygen's preprocessor will remove all function-like macros that are alone 
+# on a line, have an all uppercase name, and do not end with a semicolon. Such 
+# function macros are typically used for boiler-plate code, and will confuse the 
+# parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references   
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. 
+# Optionally an initial location of the external documentation 
+# can be added for each tagfile. The format of a tag file without 
+# this location is as follows: 
+#   TAGFILES = file1 file2 ... 
+# Adding location for the tag files is done as follows: 
+#   TAGFILES = file1=loc1 "file2 = loc2" ... 
+# where "loc1" and "loc2" can be relative or absolute paths or 
+# URLs. If a location is present for each tag, the installdox tool 
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen 
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
+# in the class index. If set to NO only the inherited external classes 
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
+# in the modules index. If set to NO, only the current project's groups will 
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script 
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or 
+# super classes. Setting the tag to NO turns the diagrams off. Note that this 
+# option is superseded by the HAVE_DOT option below. This is only a fallback. It is 
+# recommended to install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide 
+# inheritance and usage relations if the target is undocumented 
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
+# available from the path. This tool is part of Graphviz, a graph visualization 
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = YES
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect inheritance relations. Setting this tag to YES will force the 
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect implementation dependencies (inheritance, containment, and 
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
+# collaboration diagrams in a style similar to the OMG's Unified Modeling 
+# Language.
+
+UML_LOOK               = YES
+
+# If set to YES, the inheritance and collaboration graphs will show the 
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
+# tags are set to YES then doxygen will generate a graph for each documented 
+# file showing the direct and indirect include dependencies of the file with 
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
+# documented header file showing the documented files that directly or 
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will 
+# generate a call dependency graph for every global function or class method. 
+# Note that enabling this option will significantly increase the time of a run. 
+# So in most cases it will be better to enable call graphs for selected 
+# functions only using the \callgraph command.
+
+CALL_GRAPH             = YES
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be 
+# found. If left blank, it is assumed the dot tool can be found on the path.
+
+DOT_PATH               = 
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that 
+# contain dot files that are included in the documentation (see the 
+# \dotfile command).
+
+DOTFILE_DIRS           = 
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width 
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
+# this value, doxygen will try to truncate the graph, so that it fits within 
+# the specified constraint. Beware that most browsers cannot cope with very 
+# large images.
+
+MAX_DOT_GRAPH_WIDTH    = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height 
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
+# this value, doxygen will try to truncate the graph, so that it fits within 
+# the specified constraint. Beware that most browsers cannot cope with very 
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT   = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
+# graphs generated by dot. A depth value of 3 means that only nodes reachable 
+# from the root by following a path via at most 3 edges will be shown. Nodes that 
+# lay further from the root node will be omitted. Note that setting this option to 
+# 1 or 2 may greatly reduce the computation time needed for large code bases. Also 
+# note that a graph may be further truncated if the graph's image dimensions are 
+# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT). 
+# If 0 is used for the depth value (the default), the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
+# generate a legend page explaining the meaning of the various boxes and 
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
+# remove the intermediate dot files that are used to generate 
+# the various graphs.
+
+DOT_CLEANUP            = YES
+
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine   
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be 
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE           = NO
diff --git a/fsa/doc/docbook/.gitignore b/fsa/doc/docbook/.gitignore
new file mode 100644
index 00000000000..56d5bd4fda9
--- /dev/null
+++ b/fsa/doc/docbook/.gitignore
@@ -0,0 +1,4 @@
+*.1
+*.html
+manpage.links
+manpage.refs
diff --git a/fsa/doc/docbook/fsadump.xml b/fsa/doc/docbook/fsadump.xml
new file mode 100644
index 00000000000..c4a72a157a2
--- /dev/null
+++ b/fsa/doc/docbook/fsadump.xml
@@ -0,0 +1,205 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V3.1//EN">
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<refentry id="fsadump">
+
+<refmeta>
+<refentrytitle>fsadump</refentrytitle>
+<manvolnum>1</manvolnum>
+</refmeta>
+
+<refnamediv>
+<refname>fsadump</refname>
+<refpurpose>dump the contents of finite state automata files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+<cmdsynopsis>
+  <command>fsadump</command>
+  <arg>OPTIONS</arg>
+  <arg choice='plain'>fsa_file</arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+
+<refsect1><title>Description</title>
+<para>
+<command>fsadump</command> dumps the contents of fsa files to standard
+out in one of several different formats (some of which can be directly
+used as input for <command>makefsa</command>).
+</para>
+<refsect2><title>Options</title>
+<para>
+<variablelist>
+<varlistentry>
+<term><option>-e</option></term>
+<listitem>
+<para>
+text output format, with empty meta info (default)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-t</option></term>
+<listitem>
+<para>
+text output format
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-b</option></term>
+<listitem>
+<para>
+binary output format, with base64 encoded meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-B</option></term>
+<listitem>
+<para>
+binary output format with raw meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-n</option></term>
+<listitem>
+<para>
+text ouptut with numerical meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-p</option></term>
+<listitem>
+<para>
+text output format, with the perfect hash value instead of meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-d</option></term>
+<listitem>
+<para>
+dot output format (for visualization using graphviz)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-h</option></term>
+<listitem>
+<para>
+display usage help
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-V</option></term>
+<listitem>
+<para>
+display version number
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect2>
+</refsect1>
+
+
+<refsect1><title>Output formats</title>
+<para>
+<variablelist>
+<varlistentry>
+<term>Text output format with empty meta info (<option>-e</option>)</term>
+<listitem>
+<para>
+The input strings are terminated with '\n', and may not contain '\0',
+'\0xff' or '\n' characters. This is the default.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text output format (<option>-t</option>)</term>
+<listitem>
+<para>
+Lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input and meta strings may not contain '\0',
+'\0xff', '\n' or '\t' characters. A terminating '\0' (if found) is
+removed from the end of the meta info.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text output format with numerical info (<option>-n</option>)</term>
+<listitem>
+<para>
+Lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input strings may not contain '\0', '\0xff', '\n'
+or '\t' characters. Meta strings are unsigned integers ([0-9]+), which
+will be are retrieved from the binary representation in the
+automaton. Valid data sizes are 1, 2 or 4 bytes, for sizes other than
+these only the first 2 or 4 bytes are used.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary output format, with base64 encoded meta info (<option>-b</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info is base64 encoded, as it may contain any character.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary output format with raw meta info (<option>-B</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info must not contain '\0'.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text output format, with the perfect hash value instead of meta
+info (<option>-p</option>)</term>
+<listitem>
+<para>
+The format is the same as for text output with numerical info, but the
+perfect hash value for each string is used instead of meta info.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Dot output format for visualization using GraphViz (<option>-d</option>)</term>
+<listitem>
+<para>
+Output a dot format graph, with the start and final states marked and
+edges labeled with transition symbols. The <command>dot</command> tool
+can be used to generate graphical output (e.g. PostScript) of the
+graph. Use this format for small automata only (a few hundred states
+or less), as the graph soon becomes quite complex.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect1>
+
+<refsect1><title>See also</title>
+<para>
+makefsa, fsainfo.
+</para>
+</refsect1>
+
+<refsect1><title>Author</title>
+<para>
+Written by Peter Boros.
+</para>
+</refsect1>
+
+</refentry>
diff --git a/fsa/doc/docbook/fsainfo.xml b/fsa/doc/docbook/fsainfo.xml
new file mode 100644
index 00000000000..d0315b112ba
--- /dev/null
+++ b/fsa/doc/docbook/fsainfo.xml
@@ -0,0 +1,177 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V3.1//EN">
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<refentry id="fsainfo">
+
+<refmeta>
+<refentrytitle>fsainfo</refentrytitle>
+<manvolnum>1</manvolnum>
+</refmeta>
+
+<refnamediv>
+<refname>fsainfo</refname>
+<refpurpose>display information about finite state automata files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+<cmdsynopsis>
+  <command>fsainfo</command>
+  <arg>OPTIONS</arg>
+  <arg choice='plain'>fsa_file</arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+
+<refsect1><title>Description</title>
+<para>
+<command>fsainfo</command> displays information about fsa files,
+mainly based on the fsa header. In addition,
+<command>fsainfo</command> tries to load the fsa file and reports
+whether loading succeeded. The following information is presented:
+</para>
+<para>
+<variablelist>
+<varlistentry>
+<term>Header size</term>
+<listitem>
+<para>
+Size of the fsa header (usually 256 bytes).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Magic</term>
+<listitem>
+<para>
+Magic number identifying fsa files (2038637673). Files with wrong
+magic will not be attempted to load.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Version</term>
+<listitem>
+<para>
+Version of the fsa library used for building the fsa file (e.g. 1.0.2).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Serial number</term>
+<listitem>
+<para>
+Serial number of the fsa file.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Checksum</term>
+<listitem>
+<para>
+Checksum for verifying the integrity of the fsa file. If the checksum
+verification fails, the fsa file will refuse to load.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>FSA size</term>
+<listitem>
+<para>
+Size of the automaton (in number of cells and bytes).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Start state</term>
+<listitem>
+<para>
+Index of the start state.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Data size</term>
+<listitem>
+<para>
+Size of data storage used for storing meta information for final states.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Data item type</term>
+<listitem>
+<para>
+Type of meta data items (fixed or variable size).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Fixed item size</term>
+<listitem>
+<para>
+Size of meta data items, if fixed size.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Perfect hash</term>
+<listitem>
+<para>
+Indication whether the fsa was built with perfect hash (yes/no).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Perfect hash size</term>
+<listitem>
+<para>
+Perfect hash size, if the fsa was built with perfect hash.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Total size</term>
+<listitem>
+<para>
+Full size of the fsa file (header + automaton + meta data + perfect hash).
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+<refsect2><title>Options</title>
+<para>
+<variablelist>
+<varlistentry>
+<term><option>-h</option></term>
+<listitem>
+<para>
+display usage help
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-V</option></term>
+<listitem>
+<para>
+display version number
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect2>
+</refsect1>
+
+<refsect1><title>See also</title>
+<para>
+makefsa, fsadump.
+</para>
+</refsect1>
+
+<refsect1><title>Author</title>
+<para>
+Written by Peter Boros.
+</para>
+</refsect1>
+
+</refentry>
diff --git a/fsa/doc/docbook/makefsa.xml b/fsa/doc/docbook/makefsa.xml
new file mode 100644
index 00000000000..4673b06f78d
--- /dev/null
+++ b/fsa/doc/docbook/makefsa.xml
@@ -0,0 +1,224 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V3.1//EN">
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<refentry id="makefsa">
+
+<refmeta>
+<refentrytitle>makefsa</refentrytitle>
+<manvolnum>1</manvolnum>
+</refmeta>
+
+<refnamediv>
+<refname>makefsa</refname>
+<refpurpose>create finite state automata files from text or binary input</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+<cmdsynopsis>
+  <command>makefsa</command>
+  <arg>OPTIONS</arg>
+  <arg>input_file</arg>
+  <arg choice='plain'>fsa_file</arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+
+<refsect1><title>Description</title>
+<para>
+<command>makefsa</command> creates a finite state automaton file from
+text or binary input. If <option>input_file</option> is not specified,
+standard input is used. The input must be sorted and must not contain
+duplicate input strings (unsorted or duplicate entries will be
+ignored).
+</para>
+<refsect2><title>Options</title>
+<para>
+<variablelist>
+<varlistentry>
+<term><option>-e</option></term>
+<listitem>
+<para>
+use text input format, with empty meta info (default)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-t</option></term>
+<listitem>
+<para>
+use text input format
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-b</option></term>
+<listitem>
+<para>
+use binary input format, with base64 encoded meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-B</option></term>
+<listitem>
+<para>
+use binary input format with raw meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-n</option></term>
+<listitem>
+<para>
+use text input with numerical meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-s size</option></term>
+<listitem>
+<para>
+data size for numerical meta info (default=4)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-i</option></term>
+<listitem>
+<para>
+ignore meta info regardless of input format
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-p</option></term>
+<listitem>
+<para>
+build the automaton with a perfect hash
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-S num</option></term>
+<listitem>
+<para>
+set serial number of automaton (default=0)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-v</option></term>
+<listitem>
+<para>
+be verbose, display progress information and statistics
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-h</option></term>
+<listitem>
+<para>
+display usage help
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-V</option></term>
+<listitem>
+<para>
+display version number
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect2>
+</refsect1>
+
+
+<refsect1><title>Input formats</title>
+<para>
+<variablelist>
+<varlistentry>
+<term>Text input format with empty meta info (<option>-e</option>)</term>
+<listitem>
+<para>
+The input strings are terminated with '\n', and may not contain '\0',
+'\0xff' or '\n' characters. This is the default.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text input format (<option>-t</option>)</term>
+<listitem>
+<para>
+Input lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input and meta strings may not contain '\0',
+'\0xff', '\n' or '\t' characters. A terminating '\0' is added to the
+meta info when stored in the automaton.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text input format with numerical info (<option>-n</option>)</term>
+<listitem>
+<para>
+Input lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input strings may not contain '\0', '\0xff', '\n'
+or '\t' characters. Meta strings are unsigned integers ([0-9]+), which
+will be stored in binary representation in the automaton. The size of
+the data can be controlled by the <option>-s</option> option, valid
+values are 1, 2 or 4 bytes, correcponding to uint8_t, uint16_t and
+uint32_t, respectively. (Default is 4 bytes.)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary input format, with base64 encoded meta info (<option>-b</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info is base64 encoded, as it may contain any character.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary input format with raw meta info (<option>-B</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info must not contain '\0'.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect1>
+
+<refsect1><title>Perfect hashes</title>
+<para>
+Automata built with perfect hash ((<option>-p</option>) will contain
+an additional data structure which provides a mapping from the strings
+stored in the automaton to unique integers in the range [0,n-1] where
+n is the number of accepted strings. The size of the fsa file will
+increase by up to 80%. Lookup time is slightly longer if the hash
+value needs to be retrieved (but still O(m), where m is the length of
+the input). Reverse lookup is also possible, though it is more
+expensive (also O(m), but with a much higher constant).
+</para>
+</refsect1>
+
+<refsect1><title>See also</title>
+<para>
+fsainfo, fsadump.
+</para>
+</refsect1>
+
+<refsect1><title>Author</title>
+<para>
+Written by Peter Boros.
+</para>
+</refsect1>
+
+</refentry>
diff --git a/fsa/doc/fsa_file_format.html b/fsa/doc/fsa_file_format.html
new file mode 100644
index 00000000000..077edd627c3
--- /dev/null
+++ b/fsa/doc/fsa_file_format.html
@@ -0,0 +1,69 @@
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<html>
+<head>
+<title>fsa file format</title>
+</head>
+<body>
+<h2>fsa file format</h2>
+<table border="1" cellpadding="2" cellspacing="0">
+<tr><td>header</td><td>256 bytes</td></tr>
+<tr><td>symbol table</td><td><em>size</em> bytes</td></tr>
+<tr><td>state table</td><td><em>size</em>*4 bytes</td></tr>
+<tr><td>data table</td><td><em>data_size</em> bytes</td></tr>
+<tr><td>perfect hahs table (optional)</td><td><em>size</em>*4 bytes</td></tr>
+</table>
+
+<h3>header</h3>
+<table border="1" cellpadding="2" cellspacing="0">
+<tr><th>field</th><th>offset</th><th>size</th><th>descrption</th></tr>
+<tr><td>magic</td><td>0</td><td>4 (uint32)</td><td>Magic number (0x79832469)</td></tr>
+<tr><td>version</td><td>4</td><td>4 (uint32)</td><td>Version number of
+the fsa library used for building this fsa</td></tr>
+<tr><td>checksum</td><td>8</td><td>4 (uint32)</td><td>Checksum</td></tr>
+<tr><td>size</td><td>12</td><td>4 (uint32)</td><td>Size of fsa (cells)</td></tr>
+<tr><td>start</td><td>16</td><td>4 (uint32)</td><td>Start state</td></tr>
+<tr><td>data_size</td><td>20</td><td>4 (uint32)</td><td>Size of data (bytes)</td></tr>
+<tr><td>data_type</td><td>24</td><td>4 (uint32)</td><td>Type of data
+items (0=variable size, 1=fixed size)</td></tr>
+<tr><td>fixed_data_size</td><td>28</td><td>4 (uint32)</td><td>Data item size if fixed</td></tr>
+<tr><td>has_perfect_hash</td><td>32</td><td>4
+(uint32)</td><td>Indicator for perfect hash (0=no, 1=yes)</td></tr>
+<tr><td>serial</td><td>36</td><td>4 (uint32)</td><td>Serial number</td></tr>
+<tr><td>reserved</td><td>40</td><td>216 (54*uint32)</td><td>Reserved (pads size to 256 bytes)</td></tr>
+</table>
+
+<h3>symbol table and state table</h3>
+The symbol table and state table contain the transitions of the
+automaton, each 1-byte entry in the symbol table corresponds to an
+uint32 entry in the state table. For each state, a list of at most 254
+transistions is stored, as the symbol set is 8-bit characters, with
+0x00 and 0xff reserved. Each state id is in fact an offset into these
+tables. For a given state <em>state</em>, there exists a valid
+transition for symbol <em>sym</em> if the symbol table contains
+<em>sym</em> at offset <em>state</em>+<em>sym</em>. 0x00 means the
+cell is empty, 0xff is a special symbol meaning that the given state
+is a final state, anything else means invalid transition (i.e. the
+cell is in use by some other state). For valid transitions, the
+corresponding entry in the state table yields the next state. For 0xff
+transitions, the state table entry contains the offset of the date
+item within the data store.
+
+<h3>data store</h3>
+The data store contains the data items for the final states. The 'new
+state' entry of a final state transition in the state table (corresponding to the
+special final state symbol 0xff) contains the data store offset of the data item
+corresponding to that final state. If fixed size items are used, each
+item takes fixed_data_size bytes as defined in the header. Variable
+size items take 4 bytes (uint32 item_size) plus <em>item_size</em>
+bytes. The size of the data store is given in the header.
+
+<h3>perfect hash table</h3>
+The perfect hash table has one uint32 entry for each transition in the
+symbol/state table, thus the size of the perfect hash table equals the
+size of the state table. The perfect hash value for a final state is
+calculated by adding all values in this table for the transitions
+along the path from the start state to the final state. 
+
+</body>
+</html>
+
diff --git a/fsa/doc/permute_query.stats b/fsa/doc/permute_query.stats
new file mode 100644
index 00000000000..3515bb9a631
--- /dev/null
+++ b/fsa/doc/permute_query.stats
@@ -0,0 +1,18 @@
+
+Statistics:
+
+  Empty or single term:       6815022    5.6768%
+  Too long:                    864794    0.7204%
+  Length  2 (grams   1):     27184017   22.6438%
+  Length  3 (grams   4):     32461067   27.0395%
+  Length  4 (grams  11):     24369083   20.2990%
+  Length  5 (grams  26):     14157811   11.7932%
+  Length  6 (grams  57):      7269208    6.0551%
+  Length  7 (grams 119):      3612039    3.0088%
+  Length  8 (grams 238):      1822986    1.5185%
+  Length  9 (grams 456):       962163    0.8015%
+  Length 10 (grams 837):       532530    0.4436%
+  Total:                    120050720
+
+Average number of grams per query: 24.62
+
-- 
cgit v1.2.3