summaryrefslogtreecommitdiffstats
path: root/fsa
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /fsa
Publish
Diffstat (limited to 'fsa')
-rw-r--r--fsa/.gitignore4
-rw-r--r--fsa/CMakeLists.txt14
-rw-r--r--fsa/OWNERS1
-rw-r--r--fsa/README2
-rw-r--r--fsa/TODO7
-rw-r--r--fsa/doc/.gitignore2
-rw-r--r--fsa/doc/Doxyfile1099
-rw-r--r--fsa/doc/docbook/.gitignore4
-rw-r--r--fsa/doc/docbook/fsadump.xml205
-rw-r--r--fsa/doc/docbook/fsainfo.xml177
-rw-r--r--fsa/doc/docbook/makefsa.xml224
-rw-r--r--fsa/doc/fsa_file_format.html69
-rw-r--r--fsa/doc/permute_query.stats18
-rw-r--r--fsa/pom.xml46
-rw-r--r--fsa/queryproc/.gitignore9
-rw-r--r--fsa/queryproc/count_plain_grams.cpp89
-rw-r--r--fsa/queryproc/count_sorted_grams.cpp78
-rw-r--r--fsa/queryproc/p2s_ratio.cpp59
-rw-r--r--fsa/queryproc/permute_query.cpp110
-rw-r--r--fsa/queryproc/sort_grams.cpp29
-rw-r--r--fsa/src/.gitignore6
-rw-r--r--fsa/src/alltest/.gitignore15
-rw-r--r--fsa/src/alltest/CMakeLists.txt70
-rwxr-xr-xfsa/src/alltest/alltest.sh11
-rw-r--r--fsa/src/alltest/conceptnet_test.cpp80
-rw-r--r--fsa/src/alltest/conceptnet_test.out4
-rw-r--r--fsa/src/alltest/detector_test.cpp50
-rw-r--r--fsa/src/alltest/detector_test.out26
-rwxr-xr-xfsa/src/alltest/detector_test.sh3
-rw-r--r--fsa/src/alltest/fsa_create_test.cpp94
-rw-r--r--fsa/src/alltest/fsa_perftest.cpp77
-rw-r--r--fsa/src/alltest/fsa_test.cpp114
-rw-r--r--fsa/src/alltest/fsa_test.out60
-rwxr-xr-xfsa/src/alltest/fsa_test.sh3
-rw-r--r--fsa/src/alltest/fsamanager_test.cpp25
-rw-r--r--fsa/src/alltest/lookup_test.cpp49
-rw-r--r--fsa/src/alltest/lookup_test.out41
-rwxr-xr-xfsa/src/alltest/lookup_test.sh3
-rw-r--r--fsa/src/alltest/ngram_test.cpp57
-rw-r--r--fsa/src/alltest/ngram_test.out72
-rwxr-xr-xfsa/src/alltest/ngram_test.sh3
-rw-r--r--fsa/src/alltest/segmenter_test.cpp74
-rw-r--r--fsa/src/alltest/segmenter_test.out332
-rwxr-xr-xfsa/src/alltest/segmenter_test.sh3
-rw-r--r--fsa/src/alltest/testinput.txt41
-rw-r--r--fsa/src/alltest/vectorizer_perftest.cpp95
-rw-r--r--fsa/src/alltest/vectorizer_test.cpp40
-rw-r--r--fsa/src/alltest/vectorizer_test.out26
-rwxr-xr-xfsa/src/alltest/vectorizer_test.sh3
-rw-r--r--fsa/src/apps/.gitignore3
-rw-r--r--fsa/src/apps/fsadump/.gitignore1
-rw-r--r--fsa/src/apps/fsadump/CMakeLists.txt9
-rw-r--r--fsa/src/apps/fsadump/fsadump.cpp186
-rw-r--r--fsa/src/apps/fsainfo/.gitignore1
-rw-r--r--fsa/src/apps/fsainfo/CMakeLists.txt9
-rw-r--r--fsa/src/apps/fsainfo/fsainfo.cpp124
-rw-r--r--fsa/src/apps/makefsa/.gitignore1
-rw-r--r--fsa/src/apps/makefsa/CMakeLists.txt9
-rw-r--r--fsa/src/apps/makefsa/makefsa.cpp295
-rw-r--r--fsa/src/libfsa/.gitignore6
-rw-r--r--fsa/src/libfsa/automaton-alternate.h998
-rw-r--r--fsa/src/libfsamanagers/.gitignore6
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/FSA.java636
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/MetaData.java217
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/conceptnet/ConceptNet.java384
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/package-info.java7
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java42
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java137
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java313
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/topicpredictor/PredictedTopic.java65
-rw-r--r--fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java180
-rw-r--r--fsa/src/test/fsa/test-data.fsabin0 -> 1674 bytes
-rw-r--r--fsa/src/test/fsa/test-fsa.fsabin0 -> 1717 bytes
-rw-r--r--fsa/src/test/fsa/test-iterator.fsabin0 -> 1954 bytes
-rw-r--r--fsa/src/test/fsa/utf8.fsabin0 -> 1632 bytes
-rw-r--r--fsa/src/test/input/test-data-input.txt4
-rw-r--r--fsa/src/test/input/test-fsa-input.txt3
-rw-r--r--fsa/src/test/input/test-iterator-input.txt12
-rw-r--r--fsa/src/test/input/utf8.txt1
-rw-r--r--fsa/src/test/java/com/yahoo/fsa/test/FSADataTestCase.java104
-rw-r--r--fsa/src/test/java/com/yahoo/fsa/test/FSAIteratorTestCase.java119
-rw-r--r--fsa/src/test/java/com/yahoo/fsa/test/FSATestCase.java100
-rw-r--r--fsa/src/test/java/com/yahoo/fsa/test/UTF8TestCase.java97
-rw-r--r--fsa/src/util/.gitignore2
-rwxr-xr-xfsa/src/util/cn_txt2xml625
-rwxr-xr-xfsa/src/util/cn_xml2dat218
-rw-r--r--fsa/src/util/fsadump/.gitignore5
-rw-r--r--fsa/src/util/fsainfo/.gitignore5
-rw-r--r--fsa/src/util/makefsa/.gitignore5
-rw-r--r--fsa/src/vespa/.gitignore3
-rw-r--r--fsa/src/vespa/fsa/CMakeLists.txt44
-rw-r--r--fsa/src/vespa/fsa/automaton-alternate.cpp846
-rw-r--r--fsa/src/vespa/fsa/automaton.cpp824
-rw-r--r--fsa/src/vespa/fsa/automaton.h970
-rw-r--r--fsa/src/vespa/fsa/base64.cpp142
-rw-r--r--fsa/src/vespa/fsa/base64.h58
-rw-r--r--fsa/src/vespa/fsa/blob.cpp54
-rw-r--r--fsa/src/vespa/fsa/blob.h140
-rw-r--r--fsa/src/vespa/fsa/checksum.h58
-rw-r--r--fsa/src/vespa/fsa/conceptnet.cpp512
-rw-r--r--fsa/src/vespa/fsa/conceptnet.h371
-rw-r--r--fsa/src/vespa/fsa/detector.cpp102
-rw-r--r--fsa/src/vespa/fsa/detector.h131
-rw-r--r--fsa/src/vespa/fsa/file.h29
-rw-r--r--fsa/src/vespa/fsa/fsa.cpp413
-rw-r--r--fsa/src/vespa/fsa/fsa.h2312
-rw-r--r--fsa/src/vespa/fsa/metadata.cpp137
-rw-r--r--fsa/src/vespa/fsa/metadata.h177
-rw-r--r--fsa/src/vespa/fsa/ngram.cpp285
-rw-r--r--fsa/src/vespa/fsa/ngram.h433
-rw-r--r--fsa/src/vespa/fsa/permuter.cpp135
-rw-r--r--fsa/src/vespa/fsa/permuter.h65
-rw-r--r--fsa/src/vespa/fsa/segmenter.cpp279
-rw-r--r--fsa/src/vespa/fsa/segmenter.h636
-rw-r--r--fsa/src/vespa/fsa/selector.cpp77
-rw-r--r--fsa/src/vespa/fsa/selector.h105
-rw-r--r--fsa/src/vespa/fsa/timestamp.h84
-rw-r--r--fsa/src/vespa/fsa/tokenizer.h69
-rw-r--r--fsa/src/vespa/fsa/unicode.cpp532
-rw-r--r--fsa/src/vespa/fsa/unicode.h483
-rw-r--r--fsa/src/vespa/fsa/unicode_charprops.cpp1688
-rw-r--r--fsa/src/vespa/fsa/unicode_lowercase.cpp656
-rw-r--r--fsa/src/vespa/fsa/unicode_tables.cpp162
-rw-r--r--fsa/src/vespa/fsa/vectorizer.cpp92
-rw-r--r--fsa/src/vespa/fsa/vectorizer.h642
-rw-r--r--fsa/src/vespa/fsa/wordchartokenizer.cpp101
-rw-r--r--fsa/src/vespa/fsa/wordchartokenizer.h109
-rw-r--r--fsa/src/vespa/fsamanagers/CMakeLists.txt25
-rw-r--r--fsa/src/vespa/fsamanagers/conceptnethandle.h123
-rw-r--r--fsa/src/vespa/fsamanagers/conceptnetmanager.cpp105
-rw-r--r--fsa/src/vespa/fsamanagers/conceptnetmanager.h104
-rw-r--r--fsa/src/vespa/fsamanagers/fsahandle.h191
-rw-r--r--fsa/src/vespa/fsamanagers/fsamanager.cpp187
-rw-r--r--fsa/src/vespa/fsamanagers/fsamanager.h140
-rw-r--r--fsa/src/vespa/fsamanagers/metadatahandle.h130
-rw-r--r--fsa/src/vespa/fsamanagers/metadatamanager.cpp105
-rw-r--r--fsa/src/vespa/fsamanagers/metadatamanager.h99
-rw-r--r--fsa/src/vespa/fsamanagers/mutex.cpp82
-rw-r--r--fsa/src/vespa/fsamanagers/mutex.h73
-rw-r--r--fsa/src/vespa/fsamanagers/refcountable.h111
-rw-r--r--fsa/src/vespa/fsamanagers/rwlock.cpp99
-rw-r--r--fsa/src/vespa/fsamanagers/rwlock.h95
-rw-r--r--fsa/src/vespa/fsamanagers/singleton.cpp89
-rw-r--r--fsa/src/vespa/fsamanagers/singleton.h172
144 files changed, 24538 insertions, 0 deletions
diff --git a/fsa/.gitignore b/fsa/.gitignore
new file mode 100644
index 00000000000..be0452bed21
--- /dev/null
+++ b/fsa/.gitignore
@@ -0,0 +1,4 @@
+/target
+/pom.xml.build
+Makefile
+Testing
diff --git a/fsa/CMakeLists.txt b/fsa/CMakeLists.txt
new file mode 100644
index 00000000000..7b621a6f111
--- /dev/null
+++ b/fsa/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_define_module(
+ LIBS
+ src/vespa/fsa
+ src/vespa/fsamanagers
+
+ APPS
+ src/apps/fsadump
+ src/apps/fsainfo
+ src/apps/makefsa
+
+ TESTS
+ src/alltest
+)
diff --git a/fsa/OWNERS b/fsa/OWNERS
new file mode 100644
index 00000000000..7ae1acb1be9
--- /dev/null
+++ b/fsa/OWNERS
@@ -0,0 +1 @@
+geirst
diff --git a/fsa/README b/fsa/README
new file mode 100644
index 00000000000..a5437e8b651
--- /dev/null
+++ b/fsa/README
@@ -0,0 +1,2 @@
+This is the FSA library and tools, an implementation of finite state
+automata (FSA) and related algorithms.
diff --git a/fsa/TODO b/fsa/TODO
new file mode 100644
index 00000000000..7244fc3f002
--- /dev/null
+++ b/fsa/TODO
@@ -0,0 +1,7 @@
+* Improve performance of traversing fsa (iterator) by limiting the number of
+ possible transitions which need to be checked.
+
+* Extend makefsa -v (verbose) option to include statistics (memory usage etc.)
+
+* Make memory consumption and allocation strategy in Automaton more efficient.
+
diff --git a/fsa/doc/.gitignore b/fsa/doc/.gitignore
new file mode 100644
index 00000000000..2ec816f3ef2
--- /dev/null
+++ b/fsa/doc/.gitignore
@@ -0,0 +1,2 @@
+html
+latex
diff --git a/fsa/doc/Doxyfile b/fsa/doc/Doxyfile
new file mode 100644
index 00000000000..6b47c186cb4
--- /dev/null
+++ b/fsa/doc/Doxyfile
@@ -0,0 +1,1099 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+# Doxyfile 1.3.5
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = fsa
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER = 2.0.1
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch,
+# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en
+# (Japanese with English messages), Korean, Norwegian, Polish, Portuguese,
+# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE = English
+
+# This tag can be used to specify the encoding used in the generated output.
+# The encoding is not always determined by the language that is chosen,
+# but also whether or not the output is meant for Windows or non-Windows users.
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES
+# forces the Windows encoding (this is the default for the Windows binary),
+# whereas setting the tag to NO uses a Unix-style encoding (the default for
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is used
+# as the annotated text. Otherwise, the brief description is used as-is. If left
+# blank, the following values are used ("$name" is automatically replaced with the
+# name of the entity): "The $name class" "The $name widget" "The $name file"
+# "is" "provides" "specifies" "contains" "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited
+# members of a class in the documentation of that class as if those members were
+# ordinary class members. Constructors, destructors and assignment operators of
+# the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. It is allowed to use relative paths in the argument list.
+
+STRIP_FROM_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources
+# only. Doxygen will then generate output that is more tailored for Java.
+# For instance, namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = ../src
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp
+# *.h++ *.idl *.odl *.cs *.php *.php3 *.inc
+
+FILE_PATTERNS = *.h *.cpp
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories
+# that are symbolic links (a Unix filesystem feature) are excluded from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+
+EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+
+INPUT_FILTER =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 3
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse the
+# parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or
+# super classes. Setting the tag to NO turns the diagrams off. Note that this
+# option is superseded by the HAVE_DOT option below. This is only a fallback. It is
+# recommended to install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = YES
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = YES
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will
+# generate a call dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+
+CALL_GRAPH = YES
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found on the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes that
+# lay further from the root node will be omitted. Note that setting this option to
+# 1 or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that a graph may be further truncated if the graph's image dimensions are
+# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT).
+# If 0 is used for the depth value (the default), the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
+
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE = NO
diff --git a/fsa/doc/docbook/.gitignore b/fsa/doc/docbook/.gitignore
new file mode 100644
index 00000000000..56d5bd4fda9
--- /dev/null
+++ b/fsa/doc/docbook/.gitignore
@@ -0,0 +1,4 @@
+*.1
+*.html
+manpage.links
+manpage.refs
diff --git a/fsa/doc/docbook/fsadump.xml b/fsa/doc/docbook/fsadump.xml
new file mode 100644
index 00000000000..c4a72a157a2
--- /dev/null
+++ b/fsa/doc/docbook/fsadump.xml
@@ -0,0 +1,205 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V3.1//EN">
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<refentry id="fsadump">
+
+<refmeta>
+<refentrytitle>fsadump</refentrytitle>
+<manvolnum>1</manvolnum>
+</refmeta>
+
+<refnamediv>
+<refname>fsadump</refname>
+<refpurpose>dump the contents of finite state automata files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+<cmdsynopsis>
+ <command>fsadump</command>
+ <arg>OPTIONS</arg>
+ <arg choice='plain'>fsa_file</arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+
+<refsect1><title>Description</title>
+<para>
+<command>fsadump</command> dumps the contents of fsa files to standard
+out in one of several different formats (some of which can be directly
+used as input for <command>makefsa</command>).
+</para>
+<refsect2><title>Options</title>
+<para>
+<variablelist>
+<varlistentry>
+<term><option>-e</option></term>
+<listitem>
+<para>
+text output format, with empty meta info (default)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-t</option></term>
+<listitem>
+<para>
+text output format
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-b</option></term>
+<listitem>
+<para>
+binary output format, with base64 encoded meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-B</option></term>
+<listitem>
+<para>
+binary output format with raw meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-n</option></term>
+<listitem>
+<para>
+text ouptut with numerical meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-p</option></term>
+<listitem>
+<para>
+text output format, with the perfect hash value instead of meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-d</option></term>
+<listitem>
+<para>
+dot output format (for visualization using graphviz)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-h</option></term>
+<listitem>
+<para>
+display usage help
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-V</option></term>
+<listitem>
+<para>
+display version number
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect2>
+</refsect1>
+
+
+<refsect1><title>Output formats</title>
+<para>
+<variablelist>
+<varlistentry>
+<term>Text output format with empty meta info (<option>-e</option>)</term>
+<listitem>
+<para>
+The input strings are terminated with '\n', and may not contain '\0',
+'\0xff' or '\n' characters. This is the default.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text output format (<option>-t</option>)</term>
+<listitem>
+<para>
+Lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input and meta strings may not contain '\0',
+'\0xff', '\n' or '\t' characters. A terminating '\0' (if found) is
+removed from the end of the meta info.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text output format with numerical info (<option>-n</option>)</term>
+<listitem>
+<para>
+Lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input strings may not contain '\0', '\0xff', '\n'
+or '\t' characters. Meta strings are unsigned integers ([0-9]+), which
+will be are retrieved from the binary representation in the
+automaton. Valid data sizes are 1, 2 or 4 bytes, for sizes other than
+these only the first 2 or 4 bytes are used.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary output format, with base64 encoded meta info (<option>-b</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info is base64 encoded, as it may contain any character.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary output format with raw meta info (<option>-B</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info must not contain '\0'.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text output format, with the perfect hash value instead of meta
+info (<option>-p</option>)</term>
+<listitem>
+<para>
+The format is the same as for text output with numerical info, but the
+perfect hash value for each string is used instead of meta info.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Dot output format for visualization using GraphViz (<option>-d</option>)</term>
+<listitem>
+<para>
+Output a dot format graph, with the start and final states marked and
+edges labeled with transition symbols. The <command>dot</command> tool
+can be used to generate graphical output (e.g. PostScript) of the
+graph. Use this format for small automata only (a few hundred states
+or less), as the graph soon becomes quite complex.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect1>
+
+<refsect1><title>See also</title>
+<para>
+makefsa, fsainfo.
+</para>
+</refsect1>
+
+<refsect1><title>Author</title>
+<para>
+Written by Peter Boros.
+</para>
+</refsect1>
+
+</refentry>
diff --git a/fsa/doc/docbook/fsainfo.xml b/fsa/doc/docbook/fsainfo.xml
new file mode 100644
index 00000000000..d0315b112ba
--- /dev/null
+++ b/fsa/doc/docbook/fsainfo.xml
@@ -0,0 +1,177 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V3.1//EN">
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<refentry id="fsainfo">
+
+<refmeta>
+<refentrytitle>fsainfo</refentrytitle>
+<manvolnum>1</manvolnum>
+</refmeta>
+
+<refnamediv>
+<refname>fsainfo</refname>
+<refpurpose>display information about finite state automata files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+<cmdsynopsis>
+ <command>fsainfo</command>
+ <arg>OPTIONS</arg>
+ <arg choice='plain'>fsa_file</arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+
+<refsect1><title>Description</title>
+<para>
+<command>fsainfo</command> displays information about fsa files,
+mainly based on the fsa header. In addition,
+<command>fsainfo</command> tries to load the fsa file and reports
+whether loading succeeded. The following information is presented:
+</para>
+<para>
+<variablelist>
+<varlistentry>
+<term>Header size</term>
+<listitem>
+<para>
+Size of the fsa header (usually 256 bytes).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Magic</term>
+<listitem>
+<para>
+Magic number identifying fsa files (2038637673). Files with wrong
+magic will not be attempted to load.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Version</term>
+<listitem>
+<para>
+Version of the fsa library used for building the fsa file (e.g. 1.0.2).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Serial number</term>
+<listitem>
+<para>
+Serial number of the fsa file.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Checksum</term>
+<listitem>
+<para>
+Checksum for verifying the integrity of the fsa file. If the checksum
+verification fails, the fsa file will refuse to load.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>FSA size</term>
+<listitem>
+<para>
+Size of the automaton (in number of cells and bytes).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Start state</term>
+<listitem>
+<para>
+Index of the start state.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Data size</term>
+<listitem>
+<para>
+Size of data storage used for storing meta information for final states.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Data item type</term>
+<listitem>
+<para>
+Type of meta data items (fixed or variable size).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Fixed item size</term>
+<listitem>
+<para>
+Size of meta data items, if fixed size.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Perfect hash</term>
+<listitem>
+<para>
+Indication whether the fsa was built with perfect hash (yes/no).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Perfect hash size</term>
+<listitem>
+<para>
+Perfect hash size, if the fsa was built with perfect hash.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Total size</term>
+<listitem>
+<para>
+Full size of the fsa file (header + automaton + meta data + perfect hash).
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+<refsect2><title>Options</title>
+<para>
+<variablelist>
+<varlistentry>
+<term><option>-h</option></term>
+<listitem>
+<para>
+display usage help
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-V</option></term>
+<listitem>
+<para>
+display version number
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect2>
+</refsect1>
+
+<refsect1><title>See also</title>
+<para>
+makefsa, fsadump.
+</para>
+</refsect1>
+
+<refsect1><title>Author</title>
+<para>
+Written by Peter Boros.
+</para>
+</refsect1>
+
+</refentry>
diff --git a/fsa/doc/docbook/makefsa.xml b/fsa/doc/docbook/makefsa.xml
new file mode 100644
index 00000000000..4673b06f78d
--- /dev/null
+++ b/fsa/doc/docbook/makefsa.xml
@@ -0,0 +1,224 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V3.1//EN">
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<refentry id="makefsa">
+
+<refmeta>
+<refentrytitle>makefsa</refentrytitle>
+<manvolnum>1</manvolnum>
+</refmeta>
+
+<refnamediv>
+<refname>makefsa</refname>
+<refpurpose>create finite state automata files from text or binary input</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+<cmdsynopsis>
+ <command>makefsa</command>
+ <arg>OPTIONS</arg>
+ <arg>input_file</arg>
+ <arg choice='plain'>fsa_file</arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+
+<refsect1><title>Description</title>
+<para>
+<command>makefsa</command> creates a finite state automaton file from
+text or binary input. If <option>input_file</option> is not specified,
+standard input is used. The input must be sorted and must not contain
+duplicate input strings (unsorted or duplicate entries will be
+ignored).
+</para>
+<refsect2><title>Options</title>
+<para>
+<variablelist>
+<varlistentry>
+<term><option>-e</option></term>
+<listitem>
+<para>
+use text input format, with empty meta info (default)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-t</option></term>
+<listitem>
+<para>
+use text input format
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-b</option></term>
+<listitem>
+<para>
+use binary input format, with base64 encoded meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-B</option></term>
+<listitem>
+<para>
+use binary input format with raw meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-n</option></term>
+<listitem>
+<para>
+use text input with numerical meta info
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-s size</option></term>
+<listitem>
+<para>
+data size for numerical meta info (default=4)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-i</option></term>
+<listitem>
+<para>
+ignore meta info regardless of input format
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-p</option></term>
+<listitem>
+<para>
+build the automaton with a perfect hash
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-S num</option></term>
+<listitem>
+<para>
+set serial number of automaton (default=0)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-v</option></term>
+<listitem>
+<para>
+be verbose, display progress information and statistics
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-h</option></term>
+<listitem>
+<para>
+display usage help
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term><option>-V</option></term>
+<listitem>
+<para>
+display version number
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect2>
+</refsect1>
+
+
+<refsect1><title>Input formats</title>
+<para>
+<variablelist>
+<varlistentry>
+<term>Text input format with empty meta info (<option>-e</option>)</term>
+<listitem>
+<para>
+The input strings are terminated with '\n', and may not contain '\0',
+'\0xff' or '\n' characters. This is the default.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text input format (<option>-t</option>)</term>
+<listitem>
+<para>
+Input lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input and meta strings may not contain '\0',
+'\0xff', '\n' or '\t' characters. A terminating '\0' is added to the
+meta info when stored in the automaton.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Text input format with numerical info (<option>-n</option>)</term>
+<listitem>
+<para>
+Input lines are terminated with '\n', input string and meta info are
+separated by '\t'. Input strings may not contain '\0', '\0xff', '\n'
+or '\t' characters. Meta strings are unsigned integers ([0-9]+), which
+will be stored in binary representation in the automaton. The size of
+the data can be controlled by the <option>-s</option> option, valid
+values are 1, 2 or 4 bytes, correcponding to uint8_t, uint16_t and
+uint32_t, respectively. (Default is 4 bytes.)
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary input format, with base64 encoded meta info (<option>-b</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info is base64 encoded, as it may contain any character.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>Binary input format with raw meta info (<option>-B</option>)</term>
+<listitem>
+<para>
+Both the input string and meta info are terminated by '\0'. The input
+string must not contain the reserved characters '\0' and '\0xff'. The
+meta info must not contain '\0'.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+</refsect1>
+
+<refsect1><title>Perfect hashes</title>
+<para>
+Automata built with perfect hash ((<option>-p</option>) will contain
+an additional data structure which provides a mapping from the strings
+stored in the automaton to unique integers in the range [0,n-1] where
+n is the number of accepted strings. The size of the fsa file will
+increase by up to 80%. Lookup time is slightly longer if the hash
+value needs to be retrieved (but still O(m), where m is the length of
+the input). Reverse lookup is also possible, though it is more
+expensive (also O(m), but with a much higher constant).
+</para>
+</refsect1>
+
+<refsect1><title>See also</title>
+<para>
+fsainfo, fsadump.
+</para>
+</refsect1>
+
+<refsect1><title>Author</title>
+<para>
+Written by Peter Boros.
+</para>
+</refsect1>
+
+</refentry>
diff --git a/fsa/doc/fsa_file_format.html b/fsa/doc/fsa_file_format.html
new file mode 100644
index 00000000000..077edd627c3
--- /dev/null
+++ b/fsa/doc/fsa_file_format.html
@@ -0,0 +1,69 @@
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<html>
+<head>
+<title>fsa file format</title>
+</head>
+<body>
+<h2>fsa file format</h2>
+<table border="1" cellpadding="2" cellspacing="0">
+<tr><td>header</td><td>256 bytes</td></tr>
+<tr><td>symbol table</td><td><em>size</em> bytes</td></tr>
+<tr><td>state table</td><td><em>size</em>*4 bytes</td></tr>
+<tr><td>data table</td><td><em>data_size</em> bytes</td></tr>
+<tr><td>perfect hahs table (optional)</td><td><em>size</em>*4 bytes</td></tr>
+</table>
+
+<h3>header</h3>
+<table border="1" cellpadding="2" cellspacing="0">
+<tr><th>field</th><th>offset</th><th>size</th><th>descrption</th></tr>
+<tr><td>magic</td><td>0</td><td>4 (uint32)</td><td>Magic number (0x79832469)</td></tr>
+<tr><td>version</td><td>4</td><td>4 (uint32)</td><td>Version number of
+the fsa library used for building this fsa</td></tr>
+<tr><td>checksum</td><td>8</td><td>4 (uint32)</td><td>Checksum</td></tr>
+<tr><td>size</td><td>12</td><td>4 (uint32)</td><td>Size of fsa (cells)</td></tr>
+<tr><td>start</td><td>16</td><td>4 (uint32)</td><td>Start state</td></tr>
+<tr><td>data_size</td><td>20</td><td>4 (uint32)</td><td>Size of data (bytes)</td></tr>
+<tr><td>data_type</td><td>24</td><td>4 (uint32)</td><td>Type of data
+items (0=variable size, 1=fixed size)</td></tr>
+<tr><td>fixed_data_size</td><td>28</td><td>4 (uint32)</td><td>Data item size if fixed</td></tr>
+<tr><td>has_perfect_hash</td><td>32</td><td>4
+(uint32)</td><td>Indicator for perfect hash (0=no, 1=yes)</td></tr>
+<tr><td>serial</td><td>36</td><td>4 (uint32)</td><td>Serial number</td></tr>
+<tr><td>reserved</td><td>40</td><td>216 (54*uint32)</td><td>Reserved (pads size to 256 bytes)</td></tr>
+</table>
+
+<h3>symbol table and state table</h3>
+The symbol table and state table contain the transitions of the
+automaton, each 1-byte entry in the symbol table corresponds to an
+uint32 entry in the state table. For each state, a list of at most 254
+transistions is stored, as the symbol set is 8-bit characters, with
+0x00 and 0xff reserved. Each state id is in fact an offset into these
+tables. For a given state <em>state</em>, there exists a valid
+transition for symbol <em>sym</em> if the symbol table contains
+<em>sym</em> at offset <em>state</em>+<em>sym</em>. 0x00 means the
+cell is empty, 0xff is a special symbol meaning that the given state
+is a final state, anything else means invalid transition (i.e. the
+cell is in use by some other state). For valid transitions, the
+corresponding entry in the state table yields the next state. For 0xff
+transitions, the state table entry contains the offset of the date
+item within the data store.
+
+<h3>data store</h3>
+The data store contains the data items for the final states. The 'new
+state' entry of a final state transition in the state table (corresponding to the
+special final state symbol 0xff) contains the data store offset of the data item
+corresponding to that final state. If fixed size items are used, each
+item takes fixed_data_size bytes as defined in the header. Variable
+size items take 4 bytes (uint32 item_size) plus <em>item_size</em>
+bytes. The size of the data store is given in the header.
+
+<h3>perfect hash table</h3>
+The perfect hash table has one uint32 entry for each transition in the
+symbol/state table, thus the size of the perfect hash table equals the
+size of the state table. The perfect hash value for a final state is
+calculated by adding all values in this table for the transitions
+along the path from the start state to the final state.
+
+</body>
+</html>
+
diff --git a/fsa/doc/permute_query.stats b/fsa/doc/permute_query.stats
new file mode 100644
index 00000000000..3515bb9a631
--- /dev/null
+++ b/fsa/doc/permute_query.stats
@@ -0,0 +1,18 @@
+
+Statistics:
+
+ Empty or single term: 6815022 5.6768%
+ Too long: 864794 0.7204%
+ Length 2 (grams 1): 27184017 22.6438%
+ Length 3 (grams 4): 32461067 27.0395%
+ Length 4 (grams 11): 24369083 20.2990%
+ Length 5 (grams 26): 14157811 11.7932%
+ Length 6 (grams 57): 7269208 6.0551%
+ Length 7 (grams 119): 3612039 3.0088%
+ Length 8 (grams 238): 1822986 1.5185%
+ Length 9 (grams 456): 962163 0.8015%
+ Length 10 (grams 837): 532530 0.4436%
+ Total: 120050720
+
+Average number of grams per query: 24.62
+
diff --git a/fsa/pom.xml b/fsa/pom.xml
new file mode 100644
index 00000000000..aef9682deaa
--- /dev/null
+++ b/fsa/pom.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<!-- Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>parent</artifactId>
+ <version>6-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+ <artifactId>fsa</artifactId>
+ <packaging>container-plugin</packaging>
+ <version>6-SNAPSHOT</version>
+ <description>
+ This is the FSA library and tools, an implementation of finite state
+ automata (FSA) and related algorithms.
+ </description>
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>vespajlib</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>annotations</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/fsa/queryproc/.gitignore b/fsa/queryproc/.gitignore
new file mode 100644
index 00000000000..a073ef1dd72
--- /dev/null
+++ b/fsa/queryproc/.gitignore
@@ -0,0 +1,9 @@
+.deps
+.libs
+Makefile
+Makefile.in
+count_plain_grams
+count_sorted_grams
+p2s_ratio
+permute_query
+sort_grams
diff --git a/fsa/queryproc/count_plain_grams.cpp b/fsa/queryproc/count_plain_grams.cpp
new file mode 100644
index 00000000000..197c958149b
--- /dev/null
+++ b/fsa/queryproc/count_plain_grams.cpp
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+#include <iomanip>
+#include <map>
+#include <string>
+
+#include "fsa.h"
+#include "permuter.h"
+#include "selector.h"
+#include "ngram.h"
+#include "base64.h"
+#include "wordchartokenizer.h"
+
+using namespace fsa;
+
+unsigned int gram_count(unsigned int mg, unsigned int q)
+{
+ unsigned int i,j,c1,c2,ct=0;
+
+ for(i=2;i<=mg;i++){
+ c1=1;c2=1;
+ for(j=(i>q-i)?(i+1):(q-i+1);j<=q;j++){
+ c1*=j;
+ c2*=(q-j)+1;
+ }
+ ct+=c1/c2;
+ }
+ return ct;
+}
+
+int main(int argc, char **argv)
+{
+ const unsigned int MAXQUERY = 10;
+ const unsigned int MAXGRAM = 6;
+
+ Permuter p;
+ NGram freq_s,query;
+ WordCharTokenizer tokenizer(WordCharTokenizer::PUNCTUATION_WHITESPACEONLY);
+ unsigned int freq;
+ Selector s;
+ std::string qstr;
+ unsigned int qlen,glen;
+
+ if(argc!=2){
+ std::cerr << "usage: " << argv[0] << " fsa_file" << std::endl;
+ exit(1);
+ }
+
+ FSA fsa(argv[1]);
+ FSA::State state(fsa);
+ std::map<std::string,unsigned int> grams,gq;
+ std::map<std::string,unsigned int>::iterator grams_it,gq_it;
+ std::string gram_str;
+
+ while(!std::cin.eof()){
+ getline(std::cin,qstr);
+ query.set(qstr,tokenizer,1,-1);
+ qlen = query.length();
+ if(2<=qlen && qlen<=MAXQUERY){
+ freq_s.set(qstr,tokenizer,0,1);
+ freq=atoi(freq_s[0].c_str());
+ gq.clear();
+ for(unsigned int i=0;i<qlen-1;i++){
+ for(unsigned int j=2;j<=MAXGRAM&&i+j<=qlen;j++){
+ state.startWord(query[i]);
+ for(unsigned int k=1;state.isValid()&&k<j;k++){
+ state.deltaWord(query[i+k]);
+ }
+ if(state.isFinal()){
+ gram_str = query.join(" ",i,j);
+ gq[gram_str]=freq;
+ }
+ }
+ }
+ for(gq_it=gq.begin();gq_it!=gq.end();++gq_it){
+ grams_it=grams.find(gq_it->first);
+ if(grams_it!=grams.end())
+ grams[gq_it->first]=grams_it->second+gq_it->second;
+ else
+ grams[gq_it->first]=gq_it->second;
+ }
+ }
+ }
+
+ for(grams_it=grams.begin();grams_it!=grams.end();++grams_it)
+ std::cout << grams_it->first << '\t' << grams_it->second << std::endl;
+
+ return 0;
+}
diff --git a/fsa/queryproc/count_sorted_grams.cpp b/fsa/queryproc/count_sorted_grams.cpp
new file mode 100644
index 00000000000..58be4548fac
--- /dev/null
+++ b/fsa/queryproc/count_sorted_grams.cpp
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+#include <iomanip>
+#include <map>
+#include <string>
+
+#include "fsa.h"
+#include "permuter.h"
+#include "selector.h"
+#include "ngram.h"
+#include "base64.h"
+#include "wordchartokenizer.h"
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ const unsigned int MAXQUERY = 10;
+ const unsigned int MAXGRAM = 6;
+
+ Permuter p;
+ NGram freq_s,query,gram;
+ WordCharTokenizer tokenizer(WordCharTokenizer::PUNCTUATION_WHITESPACEONLY);
+ unsigned int freq;
+ Selector s;
+ std::string qstr;
+ unsigned int qlen,glen;
+
+ if(argc!=2){
+ std::cerr << "usage: " << argv[0] << " sorted_fsa_file" << std::endl;
+ exit(1);
+ }
+
+ FSA fsa(argv[1]);
+ FSA::State state(fsa);
+ std::map<std::string,unsigned int> grams;
+ std::map<std::string,unsigned int>::iterator grams_it;
+ std::string gram_str;
+
+ while(!std::cin.eof()){
+ getline(std::cin,qstr);
+ query.set(qstr,tokenizer,1,-1);
+ qlen = query.length();
+ if(2<=qlen && qlen<=MAXQUERY){
+ freq_s.set(qstr,tokenizer,0,1);
+ freq=atoi(freq_s[0].c_str());
+ query.sort();
+ qlen = query.uniq();
+ unsigned int glen=qlen<MAXGRAM?qlen:MAXGRAM;
+ for(unsigned int n=2;n<=glen;n++){
+ unsigned int c=Permuter::firstComb(n,qlen);
+ while(c>0){
+ s.clear();
+ s.set(c);
+ gram.set(query,s);
+ state.startWord(gram[0]);
+ for(unsigned int i=1;state.isValid()&&i<gram.size();i++){
+ state.deltaWord(gram[i]);
+ }
+ if(state.isFinal()){
+ gram_str = gram.join(" ");
+ grams_it=grams.find(gram_str);
+ if(grams_it!=grams.end())
+ grams[gram_str]=grams_it->second+freq;
+ else
+ grams[gram_str]=freq;
+ }
+ c=Permuter::nextComb(c,qlen);
+ }
+ }
+ }
+ }
+
+ for(grams_it=grams.begin();grams_it!=grams.end();++grams_it)
+ std::cout << grams_it->first << '\t' << grams_it->second << std::endl;
+
+ return 0;
+}
diff --git a/fsa/queryproc/p2s_ratio.cpp b/fsa/queryproc/p2s_ratio.cpp
new file mode 100644
index 00000000000..cbc61c45d53
--- /dev/null
+++ b/fsa/queryproc/p2s_ratio.cpp
@@ -0,0 +1,59 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+#include <iomanip>
+#include <map>
+#include <string>
+
+#include "fsa.h"
+#include "permuter.h"
+#include "ngram.h"
+#include "base64.h"
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ const unsigned int MAXQUERY = 10;
+ const unsigned int MAXGRAM = 6;
+
+ Permuter p;
+ NGram freq_s,gram,sorted_gram;
+ unsigned int freq;
+ Selector s(10);
+ std::string gstr;
+
+ if(argc!=3){
+ std::cerr << "usage: " << argv[0] << " plain_count_fsa_file sorted_count_fsa_file" << std::endl;
+ exit(1);
+ }
+
+ FSA plain_fsa(argv[1]);
+ FSA sorted_fsa(argv[2]);
+ FSA::State state1(plain_fsa),state2(sorted_fsa);
+
+ while(!std::cin.eof()){
+ getline(std::cin,gstr);
+ gram.set(gstr);
+ if(gram.length()>1){
+ sorted_gram.set(gram);
+ sorted_gram.sort();
+ sorted_gram.uniq();
+ state1.startWord(gram[0]);
+ for(unsigned int i=1;state1.isValid()&&i<gram.length();i++){
+ state1.deltaWord(gram[i]);
+ }
+ state2.startWord(sorted_gram[0]);
+ for(unsigned int i=1;state2.isValid()&&i<sorted_gram.length();i++){
+ state2.deltaWord(sorted_gram[i]);
+ }
+ if(state1.isFinal() && state2.isFinal()){
+ unsigned int c1,c2;
+ c1=*((unsigned int*)state1.data());
+ c2=*((unsigned int*)state2.data());
+ std::cout << gram << "\t" << c1 << "," << c2 << "," << (double)c1/(double)c2 << std::endl;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/fsa/queryproc/permute_query.cpp b/fsa/queryproc/permute_query.cpp
new file mode 100644
index 00000000000..7645e864a44
--- /dev/null
+++ b/fsa/queryproc/permute_query.cpp
@@ -0,0 +1,110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+#include <iomanip>
+
+#include "permuter.h"
+#include "selector.h"
+#include "ngram.h"
+#include "base64.h"
+
+#if (__GNUG__ <3 || (__GNUG__ == 3 && __GNUC_MINOR__ < 1))
+namespace std {
+const char *fixed = "";
+}
+#endif
+
+using namespace fsa;
+
+unsigned int gram_count(unsigned int mg, unsigned int q)
+{
+ unsigned int i,j,c1,c2,ct=0;
+
+ for(i=2;i<=mg;i++){
+ c1=1;c2=1;
+ for(j=(i>q-i)?(i+1):(q-i+1);j<=q;j++){
+ c1*=j;
+ c2*=(q-j)+1;
+ }
+ ct+=c1/c2;
+ }
+ return ct;
+}
+
+int main(int argc, char **argv)
+{
+ const unsigned int MAXQUERY = 10;
+ const unsigned int MAXGRAM = 6;
+
+ Permuter p;
+ NGram query,gram;
+ Selector s;
+ std::string qstr;
+ unsigned int qlen,glen;
+ bool verbose=true;
+ unsigned int i;
+ double total,ctotal;
+ int stats[MAXQUERY+1];
+
+ for(i=0;i<=MAXQUERY;i++)
+ stats[i]=0;
+ while(!std::cin.eof()){
+ getline(std::cin,qstr);
+ query.set(qstr,1);
+ qlen = query.length();
+ if(2<=qlen && qlen<=MAXQUERY){
+ stats[qlen]++;
+ std::cout << "QUERY: " << query << std::endl;
+ query.sort();
+ qlen = query.uniq();
+ unsigned int glen=qlen<MAXGRAM?qlen:MAXGRAM;
+ for(unsigned int n=2;n<=glen;n++){
+ unsigned int c=Permuter::firstComb(n,qlen);
+ while(c>0){
+ s.clear();
+ s.set(c);
+ gram.set(query,s);
+ std::cout << " " << gram << std::endl;
+ c=Permuter::nextComb(c,qlen);
+ }
+ }
+ }
+ else{
+ if(qlen<2)
+ stats[0]++;
+ else
+ stats[1]++;
+ }
+ }
+
+
+
+ if(verbose){
+ total=0.0;ctotal=0.0;
+ for(i=0;i<=MAXQUERY;i++)
+ total+=stats[i];
+ std::cerr << std::fixed << std::setprecision(4) << std::endl;
+ std::cerr << "Statistics:" << std::endl;
+ std::cerr << std::endl;
+ std::cerr << " Empty or single term: " <<
+ std::setw(12) << stats[0] << " " <<
+ std::setw(7) << double(stats[0])*100.0/total << "%" << std::endl;
+ std::cerr << " Too long: " <<
+ std::setw(12) << stats[1] << " " <<
+ std::setw(7) << double(stats[1])*100.0/total << "%" << std::endl;
+ for(i=2;i<=MAXQUERY;i++){
+ std::cerr << " Length " << std::setw(2) << i << " (grams " << std::setw(3) <<
+ gram_count(i<MAXGRAM?i:MAXGRAM,i) << "): " <<
+ std::setw(12) << stats[i] << " " <<
+ std::setw(7) << double(stats[i])*100.0/total << "%" << std::endl;
+ ctotal+=stats[i]*gram_count(i<MAXGRAM?i:MAXGRAM,i);
+ }
+ std::cerr << " Total: " <<
+ std::setw(12) << std::setprecision(0) << total << std::endl;
+ std::cerr << std::endl;
+ std::cerr << "Average number of grams per query: " <<
+ std::setprecision(2) << ctotal/total << std::endl;
+ std::cerr << std::endl;
+ }
+
+ return 0;
+}
diff --git a/fsa/queryproc/sort_grams.cpp b/fsa/queryproc/sort_grams.cpp
new file mode 100644
index 00000000000..427dba129ff
--- /dev/null
+++ b/fsa/queryproc/sort_grams.cpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+#include <iomanip>
+
+#include "permuter.h"
+#include "ngram.h"
+#include "base64.h"
+#include "wordchartokenizer.h"
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+
+ NGram query;
+ WordCharTokenizer tokenizer(WordCharTokenizer::PUNCTUATION_WHITESPACEONLY);
+ std::string qstr;
+
+ while(!std::cin.eof()){
+ getline(std::cin,qstr);
+ query.set(qstr,tokenizer,0,-1);
+ query.sort();
+ query.uniq();
+ std::cout << query << std::endl;
+ }
+
+
+ return 0;
+}
diff --git a/fsa/src/.gitignore b/fsa/src/.gitignore
new file mode 100644
index 00000000000..65ad4d24f75
--- /dev/null
+++ b/fsa/src/.gitignore
@@ -0,0 +1,6 @@
+.depend
+*_test
+test.out
+/Makefile.ini
+/config_command.sh
+/fsa.mak
diff --git a/fsa/src/alltest/.gitignore b/fsa/src/alltest/.gitignore
new file mode 100644
index 00000000000..c950caba857
--- /dev/null
+++ b/fsa/src/alltest/.gitignore
@@ -0,0 +1,15 @@
+Makefile
+.depend
+__testfsa__.__fsa__
+fsa_conceptnet_test_app
+fsa_detector_test_app
+fsa_fsa_create_test_app
+fsa_fsa_perf_test_app
+fsa_fsa_test_app
+fsa_fsamanager_test_app
+fsa_lookup_test_app
+fsa_ngram_test_app
+fsa_segmenter_test_app
+fsa_vectorizer_perf_test_app
+fsa_vectorizer_test_app
+*.output
diff --git a/fsa/src/alltest/CMakeLists.txt b/fsa/src/alltest/CMakeLists.txt
new file mode 100644
index 00000000000..d82ca400405
--- /dev/null
+++ b/fsa/src/alltest/CMakeLists.txt
@@ -0,0 +1,70 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(fsa_conceptnet_test_app
+ SOURCES
+ conceptnet_test.cpp
+ DEPENDS
+ fsamanagers
+ fsa
+)
+vespa_add_executable(fsa_detector_test_app
+ SOURCES
+ detector_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsa_test_app
+ SOURCES
+ fsa_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsa_create_test_app
+ SOURCES
+ fsa_create_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsa_perf_test_app
+ SOURCES
+ fsa_perftest.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_fsamanager_test_app
+ SOURCES
+ fsamanager_test.cpp
+ DEPENDS
+ fsamanagers
+ fsa
+)
+vespa_add_executable(fsa_lookup_test_app
+ SOURCES
+ lookup_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_ngram_test_app
+ SOURCES
+ ngram_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_segmenter_test_app
+ SOURCES
+ segmenter_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_vectorizer_test_app
+ SOURCES
+ vectorizer_test.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_executable(fsa_vectorizer_perf_test_app
+ SOURCES
+ vectorizer_perftest.cpp
+ DEPENDS
+ fsa
+)
+vespa_add_test(NAME fsa_vectorizer_perf_test_app NO_VALGRIND COMMAND sh alltest.sh)
diff --git a/fsa/src/alltest/alltest.sh b/fsa/src/alltest/alltest.sh
new file mode 100755
index 00000000000..37274721e25
--- /dev/null
+++ b/fsa/src/alltest/alltest.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+./detector_test.sh
+./fsa_test.sh
+./fsa_fsa_create_test_app
+./fsa_fsa_perf_test_app
+./fsa_fsamanager_test_app . __testfsa__.__fsa__
+./lookup_test.sh
+./ngram_test.sh
+./segmenter_test.sh
+./vectorizer_test.sh
+./fsa_vectorizer_perf_test_app
diff --git a/fsa/src/alltest/conceptnet_test.cpp b/fsa/src/alltest/conceptnet_test.cpp
new file mode 100644
index 00000000000..38c020aa511
--- /dev/null
+++ b/fsa/src/alltest/conceptnet_test.cpp
@@ -0,0 +1,80 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <vespa/fsa/conceptnet.h>
+#include <vespa/fsamanagers/conceptnetmanager.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ char opt;
+ //extern char *optarg;
+ extern int optind;
+
+ bool do_ext = false, do_assoc = false, do_cat = false;
+
+ while((opt=getopt(argc,argv,"aec")) != -1){
+ switch(opt){
+ case 'a':
+ do_assoc = true;
+ break;
+ case 'e':
+ do_ext = true;
+ break;
+ case 'c':
+ do_cat = true;
+ break;
+ case '?':
+ fprintf(stderr,"conceptnet_test: unrecognized option");
+ exit(1);
+ }
+ }
+
+ if(optind>=argc){
+ fprintf(stderr,"usage: conceptnet_test [-aec] DOMAIN [UNIT ...]\n");
+ exit(1);
+ }
+
+ std::string domain = argv[optind];
+
+ if(!ConceptNetManager::instance().load(domain,
+ domain + ".fsa",
+ domain + ".dat")){
+ fprintf(stderr,"failed to load concept net %s\n",domain.c_str());
+ exit(1);
+ }
+
+ ConceptNet::Handle* cn = ConceptNetManager::instance().get(domain);
+
+ if(cn!=NULL){
+ for(int i=optind+1;i<argc;i++){
+ int idx = (*cn)->lookup(argv[i]);
+ printf("%s(%d) : (%d,%d,%d,%d) (%f,%f)\n",argv[i],idx,
+ (*cn)->frq(idx),(*cn)->cFrq(idx),(*cn)->qFrq(idx),(*cn)->sFrq(idx),
+ (*cn)->score(idx),(*cn)->strength(idx));
+ if(do_ext){
+ for(int e = 0; e<(*cn)->numExt(idx); e++){
+ printf(" %s, %d\n",(*cn)->lookup((*cn)->ext(idx,e)),(*cn)->extFrq(idx,e));
+ }
+ }
+ if(do_assoc){
+ for(int a = 0; a<(*cn)->numAssoc(idx); a++){
+ printf(" %s, %d\n",(*cn)->lookup((*cn)->assoc(idx,a)),(*cn)->assocFrq(idx,a));
+ }
+ }
+ if(do_cat){
+ for(int c = 0; c<(*cn)->numCat(idx); c++){
+ printf(" %s\n",(*cn)->catName((*cn)->cat(idx,c)));
+ }
+ }
+ }
+ }
+ else {
+ fprintf(stderr,"failed to load concept net %s\n",domain.c_str());
+ exit(1);
+ }
+
+}
diff --git a/fsa/src/alltest/conceptnet_test.out b/fsa/src/alltest/conceptnet_test.out
new file mode 100644
index 00000000000..9f3570cebf1
--- /dev/null
+++ b/fsa/src/alltest/conceptnet_test.out
@@ -0,0 +1,4 @@
+new york(841954) : (-1,-1,-1,-1) (-1.000000,-1.000000)
+sunnyvale(1139231) : (-1,-1,-1,-1) (-1.000000,-1.000000)
+gibson(479780) : (-1,-1,-1,-1) (-1.000000,-1.000000)
+metallica(770993) : (-1,-1,-1,-1) (-1.000000,-1.000000)
diff --git a/fsa/src/alltest/detector_test.cpp b/fsa/src/alltest/detector_test.cpp
new file mode 100644
index 00000000000..1942c4ba7a6
--- /dev/null
+++ b/fsa/src/alltest/detector_test.cpp
@@ -0,0 +1,50 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizertest.cpp
+ * @brief Test for the vectorizer class
+ *
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/detector.h>
+#include <vespa/fsa/ngram.h>
+
+using namespace fsa;
+
+class MyHits : public Detector::Hits{
+public:
+ MyHits() {};
+ ~MyHits() {};
+
+ void add(const NGram &text,
+ unsigned int from, int length,
+ const FSA::State &)
+ {
+ std::cout << "detected: [" << from << "," << from+length-1 << "], '"
+ << text.join(" ",from,length) << "'\n";
+ }
+};
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Detector d(dict);
+ MyHits h;
+
+ std::string text;
+ while(!std::cin.eof()){
+ getline(std::cin,text);
+
+ d.detect(text,h);
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/detector_test.out b/fsa/src/alltest/detector_test.out
new file mode 100644
index 00000000000..c5dbbdd08f1
--- /dev/null
+++ b/fsa/src/alltest/detector_test.out
@@ -0,0 +1,26 @@
+detected: [0,0], 'apple'
+detected: [0,0], 'apricot'
+detected: [0,0], 'artichoke'
+detected: [0,0], 'banana'
+detected: [0,0], 'cabbage'
+detected: [0,0], 'carrot'
+detected: [0,0], 'cherry'
+detected: [0,0], 'chili'
+detected: [0,0], 'cucumber'
+detected: [0,0], 'eggplant'
+detected: [0,0], 'grapes'
+detected: [0,0], 'lettuce'
+detected: [0,0], 'onion'
+detected: [0,0], 'paprika'
+detected: [0,1], 'passion fruit'
+detected: [0,0], 'pea'
+detected: [0,0], 'peach'
+detected: [0,0], 'pear'
+detected: [0,0], 'pineapple'
+detected: [0,0], 'plum'
+detected: [0,0], 'potato'
+detected: [0,0], 'pumpkin'
+detected: [0,1], 'sour cherry'
+detected: [1,1], 'cherry'
+detected: [0,0], 'squash'
+detected: [0,0], 'tomato'
diff --git a/fsa/src/alltest/detector_test.sh b/fsa/src/alltest/detector_test.sh
new file mode 100755
index 00000000000..dd6f650a35c
--- /dev/null
+++ b/fsa/src/alltest/detector_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_detector_test_app < testinput.txt > detector_test.output
+diff detector_test.output detector_test.out
diff --git a/fsa/src/alltest/fsa_create_test.cpp b/fsa/src/alltest/fsa_create_test.cpp
new file mode 100644
index 00000000000..c72ea900aad
--- /dev/null
+++ b/fsa/src/alltest/fsa_create_test.cpp
@@ -0,0 +1,94 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/automaton.h>
+#include <vespa/fsa/timestamp.h>
+
+using namespace fsa;
+
+int main(int, char**)
+{
+
+ Automaton *aut = new Automaton;
+
+ Blob fruit("Fruit"), veggie("Vegetable"), city("City");
+
+ TimeStamp t;
+
+ aut->init();
+
+ aut->insertSortedString("Cupertino",city);
+ aut->insertSortedString("Foster City",city);
+ aut->insertSortedString("Los Altos",city);
+ aut->insertSortedString("Menlo Park",city);
+ aut->insertSortedString("Mountain View",city);
+ aut->insertSortedString("Palo Alto",city);
+ aut->insertSortedString("San Francisco",city);
+ aut->insertSortedString("San Jose",city);
+ aut->insertSortedString("Santa Clara",city);
+ aut->insertSortedString("Saratoga",city);
+ aut->insertSortedString("Sunnyvale",city);
+ aut->insertSortedString("apple",fruit);
+ aut->insertSortedString("apricot",fruit);
+ aut->insertSortedString("artichoke",veggie);
+ aut->insertSortedString("banana",fruit);
+ aut->insertSortedString("cabbage",veggie);
+ aut->insertSortedString("carrot",veggie);
+ aut->insertSortedString("cherry",fruit);
+ aut->insertSortedString("chili",veggie);
+ aut->insertSortedString("cucumber",veggie);
+ aut->insertSortedString("eggplant",veggie);
+ aut->insertSortedString("grapes",fruit);
+ aut->insertSortedString("lettuce",veggie);
+ aut->insertSortedString("onion",veggie);
+ aut->insertSortedString("paprika",veggie);
+ aut->insertSortedString("passion fruit",fruit);
+ aut->insertSortedString("pea",veggie);
+ aut->insertSortedString("peach",fruit);
+ aut->insertSortedString("pear",fruit);
+ aut->insertSortedString("pineapple",fruit);
+ aut->insertSortedString("plum",fruit);
+ aut->insertSortedString("potato",veggie);
+ aut->insertSortedString("pumpkin",veggie);
+ aut->insertSortedString("sour cherry",fruit);
+ aut->insertSortedString("squash",veggie);
+ aut->insertSortedString("tomato",veggie);
+
+ aut->finalize();
+
+ double d1 = t.elapsed();
+
+ aut->addPerfectHash();
+
+ double d2 = t.elapsed();
+
+ aut->write("__testfsa__.__fsa__");
+
+ double d3 = t.elapsed();
+
+ FSA *fsa = aut->getFSA();
+
+ double d4 = t.elapsed();
+
+ std::cout << "Automoaton build finished (" << 1000*d1 << "ms," << 1000*(d2-d1) << "ms)"
+ << ", fsa retrieval (" << 1000*(d4-d3) << "ms) " << ((fsa==NULL)?"failed":"succeded") << ".\n";
+
+ if(fsa!=NULL){
+ FSA::State fs(*fsa);
+ const unsigned char *pb = fs.lookup("cucumber");
+ std::cout << "Lookup(\"cucumber\") -> ";
+ if(pb!=NULL){
+ std::cout << "\"" << pb << "\"";
+ }
+ else{
+ std::cout << "not found.";
+ }
+ std::cout << "\n";
+ }
+
+ delete aut;
+ delete fsa;
+
+ return 0;
+}
diff --git a/fsa/src/alltest/fsa_perftest.cpp b/fsa/src/alltest/fsa_perftest.cpp
new file mode 100644
index 00000000000..90d2c042b07
--- /dev/null
+++ b/fsa/src/alltest/fsa_perftest.cpp
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/timestamp.h>
+
+using namespace fsa;
+
+int main(int, char**)
+{
+ FSA f("__testfsa__.__fsa__");
+ FSA::State s(f);
+ FSA::HashedState hs(f);
+ FSA::MemoryState ms(f);
+ FSA::HashedMemoryState hms(f);
+ FSA::CounterState cs(f);
+ std::string input("cucumber");
+ unsigned int count=10000000,i;
+
+ std::cout << "Number of lookups: " << count << std::endl;
+ std::cout << "Input string length: " << input.length() << std::endl;
+ std::cout << std::endl;
+
+ TimeStamp t;
+ double t0,t1;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ s.start();
+ s.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "State: " << t1*1000 << " ms" << "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ hs.start();
+ hs.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "HashedState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ ms.start();
+ ms.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "MemoryState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ hms.start();
+ hms.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "HashedMemoryState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ t0=t.elapsed();
+ for(i=0;i<count;i++){
+ cs.start();
+ cs.lookup(input);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "CounterState: " << t1*1000 << " ms"<< "\t"
+ << (unsigned int)(count*input.length()/t1) << " delta/sec" << std::endl;
+
+ return 0;
+}
diff --git a/fsa/src/alltest/fsa_test.cpp b/fsa/src/alltest/fsa_test.cpp
new file mode 100644
index 00000000000..5bc95f20430
--- /dev/null
+++ b/fsa/src/alltest/fsa_test.cpp
@@ -0,0 +1,114 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+
+using namespace fsa;
+
+int main(int, char**)
+{
+ FSA *f = new FSA("__testfsa__.__fsa__", FILE_ACCESS_MMAP);
+ FSA::State *fs = new FSA::State(*f);
+
+ std::string s("cucu");
+ fs->start(s);
+ fs->delta('m');
+ fs->delta("ber");
+ if(fs->isFinal()){
+ printf("start/delta test: string(\"cucu\")+'m'+\"ber\" is accepted\n");
+ printf(" data size: %d\n",fs->dataSize());
+ printf(" data string: \"%-*.*s\"\n",fs->dataSize(),fs->dataSize(),fs->data());
+ }
+ else {
+ printf("start/delta test failed.\n");
+ }
+
+ const unsigned char *pb = fs->lookup("cucumber");
+ if(pb!=NULL){
+ printf("lookup test: \"cucumber\" -> \"%s\"\n",pb);
+ }
+ else{
+ printf("lookup test: \"cucumber\" not found.\n");
+ }
+
+
+ FSA::HashedState *fs1 = new FSA::HashedState(*f);
+
+
+ fs1->delta("pe");
+
+ FSA::HashedState *fs2 = new FSA::HashedState(*fs1);
+ FSA::HashedState *fs3 = new FSA::HashedState(*fs1);
+
+
+
+ fs1->delta("a");
+ fs2->delta("ach");
+ fs3->delta("ar");
+
+ if(fs1->isFinal() && fs2->isFinal()){
+ printf("copy hashed state test:\n");
+ printf(" \"pe\"+\"a\": hash=%d, data_size=%d, data string=\"%-*.*s\"\n",
+ fs1->hash(),fs1->dataSize(),fs1->dataSize(),fs1->dataSize(),fs1->data());
+ printf(" \"pe\"+\"ach\": hash=%d, data_size=%d, data string=\"%-*.*s\"\n",
+ fs2->hash(),fs2->dataSize(),fs2->dataSize(),fs2->dataSize(),fs2->data());
+ printf(" \"pe\"+\"ar\": hash=%d, data_size=%d, data string=\"%-*.*s\"\n",
+ fs3->hash(),fs3->dataSize(),fs3->dataSize(),fs3->dataSize(),fs3->data());
+
+ }
+ else {
+ printf("copy hashed state test failed.\n");
+ }
+
+ printf("revLookup test:\n");
+ unsigned int i=0;
+ std::string res;
+ while(i<100){
+ res=fs2->revLookup(i);
+ if(res.size()==0)
+ break;
+ fs2->lookup(res);
+ printf(" %d -> %s -> %d\n",i,res.c_str(),fs2->hash());
+ i++;
+ }
+
+ printf("iterator test:\n");
+ fs1->start('p');
+ printf(" possible continuations from \"p\":\n");
+ for(FSA::iterator it(*fs1); it!=fs1->end(); ++it){
+ printf(" \"p\" + \"%s\"\n",it->str().c_str());
+ }
+
+ delete fs;
+ delete fs1;
+ delete fs2;
+ delete fs3;
+
+
+ printf("counter/memory state test\n");
+ FSA::CounterState *cs = new FSA::CounterState(*f);
+ FSA::MemoryState *ms = new FSA::MemoryState(*f);
+
+ cs->start("cucu");
+ ms->start("cucu");
+ printf(" \"cucu\" -> %s:%d\n",ms->memory().c_str(),cs->counter());
+
+ cs->start("cucumber");
+ ms->start("cucumber");
+ printf(" \"cucumber\" -> %s:%d\n",ms->memory().c_str(),cs->counter());
+
+ cs->start("cucumber slumber");
+ ms->start("cucumber slumber");
+ printf(" \"cucumber slumber\" -> %s:%d\n",ms->memory().c_str(),cs->counter());
+
+ delete cs;
+ delete ms;
+ delete f;
+
+ return 0;
+}
diff --git a/fsa/src/alltest/fsa_test.out b/fsa/src/alltest/fsa_test.out
new file mode 100644
index 00000000000..b9c96e5b795
--- /dev/null
+++ b/fsa/src/alltest/fsa_test.out
@@ -0,0 +1,60 @@
+start/delta test: string("cucu")+'m'+"ber" is accepted
+ data size: 10
+ data string: "Vegetable "
+lookup test: "cucumber" -> "Vegetable"
+copy hashed state test:
+ "pe"+"a": hash=26, data_size=10, data string="Vegetable "
+ "pe"+"ach": hash=27, data_size=6, data string="Fruit "
+ "pe"+"ar": hash=28, data_size=6, data string="Fruit "
+revLookup test:
+ 0 -> Cupertino -> 0
+ 1 -> Foster City -> 1
+ 2 -> Los Altos -> 2
+ 3 -> Menlo Park -> 3
+ 4 -> Mountain View -> 4
+ 5 -> Palo Alto -> 5
+ 6 -> San Francisco -> 6
+ 7 -> San Jose -> 7
+ 8 -> Santa Clara -> 8
+ 9 -> Saratoga -> 9
+ 10 -> Sunnyvale -> 10
+ 11 -> apple -> 11
+ 12 -> apricot -> 12
+ 13 -> artichoke -> 13
+ 14 -> banana -> 14
+ 15 -> cabbage -> 15
+ 16 -> carrot -> 16
+ 17 -> cherry -> 17
+ 18 -> chili -> 18
+ 19 -> cucumber -> 19
+ 20 -> eggplant -> 20
+ 21 -> grapes -> 21
+ 22 -> lettuce -> 22
+ 23 -> onion -> 23
+ 24 -> paprika -> 24
+ 25 -> passion fruit -> 25
+ 26 -> pea -> 26
+ 27 -> peach -> 27
+ 28 -> pear -> 28
+ 29 -> pineapple -> 29
+ 30 -> plum -> 30
+ 31 -> potato -> 31
+ 32 -> pumpkin -> 32
+ 33 -> sour cherry -> 33
+ 34 -> squash -> 34
+ 35 -> tomato -> 35
+iterator test:
+ possible continuations from "p":
+ "p" + "aprika"
+ "p" + "assion fruit"
+ "p" + "ea"
+ "p" + "each"
+ "p" + "ear"
+ "p" + "ineapple"
+ "p" + "lum"
+ "p" + "otato"
+ "p" + "umpkin"
+counter/memory state test
+ "cucu" -> cucu:4
+ "cucumber" -> cucumber:8
+ "cucumber slumber" -> cucumber:8
diff --git a/fsa/src/alltest/fsa_test.sh b/fsa/src/alltest/fsa_test.sh
new file mode 100755
index 00000000000..497fd291c4d
--- /dev/null
+++ b/fsa/src/alltest/fsa_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_fsa_test_app > fsa_test.output
+diff fsa_test.output fsa_test.out
diff --git a/fsa/src/alltest/fsamanager_test.cpp b/fsa/src/alltest/fsamanager_test.cpp
new file mode 100644
index 00000000000..7ca4a2d8e8a
--- /dev/null
+++ b/fsa/src/alltest/fsamanager_test.cpp
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsamanagers/fsamanager.h>
+
+#include <iostream>
+#include <string>
+#include <stdlib.h>
+
+using namespace fsa;
+
+int main(int argc, char** argv)
+{
+ if(argc<3){
+ std::cerr << "usage: fsamanager_test cache_dir fsa_file_or_url [fsa_file_or_url ...]\n";
+ exit(1);
+ }
+
+ FSAManager::instance().setCacheDir(argv[1]);
+
+ for(int i=2;i<argc;i++){
+ std::cerr << "Loading " << argv[i] << " ... ";
+ std::cerr << (FSAManager::instance().load(argv[i],argv[i]) ? "ok":"failed") << "\n";
+ }
+
+}
diff --git a/fsa/src/alltest/lookup_test.cpp b/fsa/src/alltest/lookup_test.cpp
new file mode 100644
index 00000000000..6ff4e3063d4
--- /dev/null
+++ b/fsa/src/alltest/lookup_test.cpp
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+#include <vespa/fsa/fsa.h>
+
+#if (__GNUG__ <3 || (__GNUG__ == 3 && __GNUC_MINOR__ < 1))
+namespace std {
+const char *left = "";
+}
+#endif
+
+using namespace fsa;
+
+int main(int argc, char** argv)
+{
+
+ if(argc!=2){
+ std::cerr << "usage: lookup_test fsafile <input >output" << std::endl;
+ exit(1);
+ }
+
+ FSA f(argv[1]);
+ FSA::HashedState fs(f);
+ std::string input;
+
+ while(!std::cin.eof()){
+ getline(std::cin,input);
+
+ if(input.size()>0){
+ fs.start(input);
+ if(fs.isFinal()){
+ std::cout << "'" << input << "'" << " is accepted, hash value: " << fs.hash()
+ << ", data size: " << fs.dataSize()
+ << ", data string: \""
+ << std::setw(fs.dataSize()) << std::left << fs.data()
+ << "\"" << std::endl;
+ }
+ else{
+ std::cout << "'" << input << "'" << " is not accepted." << std::endl;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/lookup_test.out b/fsa/src/alltest/lookup_test.out
new file mode 100644
index 00000000000..b7dd9b4da4b
--- /dev/null
+++ b/fsa/src/alltest/lookup_test.out
@@ -0,0 +1,41 @@
+'Cupertino' is accepted, hash value: 0, data size: 5, data string: "City "
+'Foster City' is accepted, hash value: 1, data size: 5, data string: "City "
+'Los Altos' is accepted, hash value: 2, data size: 5, data string: "City "
+'Menlo Park' is accepted, hash value: 3, data size: 5, data string: "City "
+'Mountain View' is accepted, hash value: 4, data size: 5, data string: "City "
+'Palo Alto' is accepted, hash value: 5, data size: 5, data string: "City "
+'San Francisco' is accepted, hash value: 6, data size: 5, data string: "City "
+'San Jose' is accepted, hash value: 7, data size: 5, data string: "City "
+'Santa Clara' is accepted, hash value: 8, data size: 5, data string: "City "
+'Saratoga' is accepted, hash value: 9, data size: 5, data string: "City "
+'Sunnyvale' is accepted, hash value: 10, data size: 5, data string: "City "
+'apple' is accepted, hash value: 11, data size: 6, data string: "Fruit "
+'apricot' is accepted, hash value: 12, data size: 6, data string: "Fruit "
+'artichoke' is accepted, hash value: 13, data size: 10, data string: "Vegetable "
+'banana' is accepted, hash value: 14, data size: 6, data string: "Fruit "
+'cabbage' is accepted, hash value: 15, data size: 10, data string: "Vegetable "
+'carrot' is accepted, hash value: 16, data size: 10, data string: "Vegetable "
+'cherry' is accepted, hash value: 17, data size: 6, data string: "Fruit "
+'chili' is accepted, hash value: 18, data size: 10, data string: "Vegetable "
+'cucumber' is accepted, hash value: 19, data size: 10, data string: "Vegetable "
+'eggplant' is accepted, hash value: 20, data size: 10, data string: "Vegetable "
+'grapes' is accepted, hash value: 21, data size: 6, data string: "Fruit "
+'lettuce' is accepted, hash value: 22, data size: 10, data string: "Vegetable "
+'onion' is accepted, hash value: 23, data size: 10, data string: "Vegetable "
+'paprika' is accepted, hash value: 24, data size: 10, data string: "Vegetable "
+'passion fruit' is accepted, hash value: 25, data size: 6, data string: "Fruit "
+'pea' is accepted, hash value: 26, data size: 10, data string: "Vegetable "
+'peach' is accepted, hash value: 27, data size: 6, data string: "Fruit "
+'pear' is accepted, hash value: 28, data size: 6, data string: "Fruit "
+'pineapple' is accepted, hash value: 29, data size: 6, data string: "Fruit "
+'plum' is accepted, hash value: 30, data size: 6, data string: "Fruit "
+'potato' is accepted, hash value: 31, data size: 10, data string: "Vegetable "
+'pumpkin' is accepted, hash value: 32, data size: 10, data string: "Vegetable "
+'sour cherry' is accepted, hash value: 33, data size: 6, data string: "Fruit "
+'squash' is accepted, hash value: 34, data size: 10, data string: "Vegetable "
+'tomato' is accepted, hash value: 35, data size: 10, data string: "Vegetable "
+'alpha' is not accepted.
+'beta' is not accepted.
+'gamma' is not accepted.
+'delta' is not accepted.
+'epsilon' is not accepted.
diff --git a/fsa/src/alltest/lookup_test.sh b/fsa/src/alltest/lookup_test.sh
new file mode 100755
index 00000000000..394baecc78a
--- /dev/null
+++ b/fsa/src/alltest/lookup_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_lookup_test_app __testfsa__.__fsa__ < testinput.txt > lookup_test.output
+diff lookup_test.output lookup_test.out
diff --git a/fsa/src/alltest/ngram_test.cpp b/fsa/src/alltest/ngram_test.cpp
new file mode 100644
index 00000000000..7f0be7769e1
--- /dev/null
+++ b/fsa/src/alltest/ngram_test.cpp
@@ -0,0 +1,57 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <iostream>
+
+#include <vespa/fsa/permuter.h>
+#include <vespa/fsa/selector.h>
+#include <vespa/fsa/ngram.h>
+#include <vespa/fsa/base64.h>
+#include <vespa/fsa/wordchartokenizer.h>
+
+using namespace fsa;
+
+int main(int, char **)
+{
+ Permuter p;
+
+ NGram q1("a b c d e f"), q2(q1,p,10), q3(q2,p,13);
+
+ Selector s;
+
+ std::string s1("this is a test"), s2;
+
+ Base64::encode(s1,s2);
+ std::cout << "'" << s1 << "'" << std::endl;
+ std::cout << "'" << s2 << "'" << std::endl;
+ Base64::decode(s2,s1);
+ std::cout << "'" << s1 << "'" << std::endl;
+
+
+ std::cout << q1 << std::endl;
+ std::cout << q2 << std::endl;
+ std::cout << q3 << std::endl;
+
+ q2.sort();
+ std::cout << q2 << std::endl;
+ q2.reverse();
+ std::cout << q2 << std::endl;
+
+ std::cout << std::hex;
+ for(unsigned int n=1;n<=6;n++){
+ unsigned int c=Permuter::firstComb(n,6);
+ while(c>0){
+ s.clear();
+ s.set(c);
+ q2.set(q1,s);
+ std::cout << c << ": " << q2 << std::endl;
+ c=Permuter::nextComb(c,6);
+ }
+ }
+ std::cout << std::dec;
+
+ WordCharTokenizer tokenizer(WordCharTokenizer::PUNCTUATION_SMART,"PUNCT");
+
+ NGram q4("test, wordchar tokenizer. does it work?",tokenizer);
+
+ std::cout << q4.join(" -|- ") << std::endl;
+
+}
diff --git a/fsa/src/alltest/ngram_test.out b/fsa/src/alltest/ngram_test.out
new file mode 100644
index 00000000000..d826e3173dd
--- /dev/null
+++ b/fsa/src/alltest/ngram_test.out
@@ -0,0 +1,72 @@
+'this is a test'
+'dGhpcyBpcyBhIHRlc3Q='
+'this is a test'
+a b c d e f
+b d a c e f
+a b c d e f
+a b c d e f
+f e d c b a
+1: a
+2: b
+4: c
+8: d
+10: e
+20: f
+3: a b
+5: a c
+6: b c
+9: a d
+a: b d
+c: c d
+11: a e
+12: b e
+14: c e
+18: d e
+21: a f
+22: b f
+24: c f
+28: d f
+30: e f
+7: a b c
+b: a b d
+d: a c d
+e: b c d
+13: a b e
+15: a c e
+16: b c e
+19: a d e
+1a: b d e
+1c: c d e
+23: a b f
+25: a c f
+26: b c f
+29: a d f
+2a: b d f
+2c: c d f
+31: a e f
+32: b e f
+34: c e f
+38: d e f
+f: a b c d
+17: a b c e
+1b: a b d e
+1d: a c d e
+1e: b c d e
+27: a b c f
+2b: a b d f
+2d: a c d f
+2e: b c d f
+33: a b e f
+35: a c e f
+36: b c e f
+39: a d e f
+3a: b d e f
+3c: c d e f
+1f: a b c d e
+2f: a b c d f
+37: a b c e f
+3b: a b d e f
+3d: a c d e f
+3e: b c d e f
+3f: a b c d e f
+test -|- PUNCT -|- wordchar -|- tokenizer -|- PUNCT -|- does -|- it -|- work
diff --git a/fsa/src/alltest/ngram_test.sh b/fsa/src/alltest/ngram_test.sh
new file mode 100755
index 00000000000..85559d6e391
--- /dev/null
+++ b/fsa/src/alltest/ngram_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_ngram_test_app > ngram_test.output
+diff ngram_test.output ngram_test.out
diff --git a/fsa/src/alltest/segmenter_test.cpp b/fsa/src/alltest/segmenter_test.cpp
new file mode 100644
index 00000000000..3b80fe3390e
--- /dev/null
+++ b/fsa/src/alltest/segmenter_test.cpp
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file segmenter_test.cpp
+ * @brief Test for the Segmenter class
+ *
+ */
+
+#include <iostream>
+#include <iomanip>
+
+#include <vespa/fsa/segmenter.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Segmenter segmenter(dict);
+ Segmenter::Segments segments;
+ const Segmenter::Segmentation *segmentation;
+
+ std::string text;
+ while(!std::cin.eof()){
+ getline(std::cin,text);
+
+ if(text.size()>3){
+
+ segmenter.segment(text,segments);
+
+ std::cout << "List of all segments:" << std::endl;
+ for(unsigned int i=0; i<segments.size(); i++){
+ std::cout << " "
+ << segments.sgm(i) << ":" << segments.conn(i) << " ["
+ << segments.beg(i) << "," << segments.end(i)-1 << "]"
+ << std::endl;
+ }
+
+ segmentation=segments.segmentation(Segmenter::SEGMENTATION_WEIGHTED);
+
+ std::cout << "Weighted segmentation:" << std::endl << " ";
+ for(Segmenter::SegmentationConstIterator it=segmentation->begin();
+ it!=segmentation->end();++it){
+ std::cout << "(" << segments.sgm(*it) << ")";
+ }
+ std::cout << std::endl;
+
+ segmentation=segments.segmentation(Segmenter::SEGMENTATION_RIGHTMOST_LONGEST);
+
+ std::cout << "Rightmost-longest segmentation:" << std::endl << " ";
+ for(Segmenter::SegmentationConstIterator it=segmentation->begin();
+ it!=segmentation->end();++it){
+ std::cout << "(" << segments.sgm(*it) << ")";
+ }
+ std::cout << std::endl;
+
+ segmentation=segments.segmentation(Segmenter::SEGMENTATION_LEFTMOST_LONGEST);
+
+ std::cout << "Lefttmost-longest segmentation:" << std::endl << " ";
+ for(Segmenter::SegmentationConstIterator it=segmentation->begin();
+ it!=segmentation->end();++it){
+ std::cout << "(" << segments.sgm(*it) << ")";
+ }
+ std::cout << std::endl;
+
+ }
+
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/segmenter_test.out b/fsa/src/alltest/segmenter_test.out
new file mode 100644
index 00000000000..d8c42cfacce
--- /dev/null
+++ b/fsa/src/alltest/segmenter_test.out
@@ -0,0 +1,332 @@
+List of all segments:
+ cupertino:0 [0,0]
+Weighted segmentation:
+ (cupertino)
+Rightmost-longest segmentation:
+ (cupertino)
+Lefttmost-longest segmentation:
+ (cupertino)
+List of all segments:
+ foster:0 [0,0]
+ city:0 [1,1]
+Weighted segmentation:
+ (foster)(city)
+Rightmost-longest segmentation:
+ (foster)(city)
+Lefttmost-longest segmentation:
+ (foster)(city)
+List of all segments:
+ los:0 [0,0]
+ altos:0 [1,1]
+Weighted segmentation:
+ (los)(altos)
+Rightmost-longest segmentation:
+ (los)(altos)
+Lefttmost-longest segmentation:
+ (los)(altos)
+List of all segments:
+ menlo:0 [0,0]
+ park:0 [1,1]
+Weighted segmentation:
+ (menlo)(park)
+Rightmost-longest segmentation:
+ (menlo)(park)
+Lefttmost-longest segmentation:
+ (menlo)(park)
+List of all segments:
+ mountain:0 [0,0]
+ view:0 [1,1]
+Weighted segmentation:
+ (mountain)(view)
+Rightmost-longest segmentation:
+ (mountain)(view)
+Lefttmost-longest segmentation:
+ (mountain)(view)
+List of all segments:
+ palo:0 [0,0]
+ alto:0 [1,1]
+Weighted segmentation:
+ (palo)(alto)
+Rightmost-longest segmentation:
+ (palo)(alto)
+Lefttmost-longest segmentation:
+ (palo)(alto)
+List of all segments:
+ san:0 [0,0]
+ francisco:0 [1,1]
+Weighted segmentation:
+ (san)(francisco)
+Rightmost-longest segmentation:
+ (san)(francisco)
+Lefttmost-longest segmentation:
+ (san)(francisco)
+List of all segments:
+ san:0 [0,0]
+ jose:0 [1,1]
+Weighted segmentation:
+ (san)(jose)
+Rightmost-longest segmentation:
+ (san)(jose)
+Lefttmost-longest segmentation:
+ (san)(jose)
+List of all segments:
+ santa:0 [0,0]
+ clara:0 [1,1]
+Weighted segmentation:
+ (santa)(clara)
+Rightmost-longest segmentation:
+ (santa)(clara)
+Lefttmost-longest segmentation:
+ (santa)(clara)
+List of all segments:
+ saratoga:0 [0,0]
+Weighted segmentation:
+ (saratoga)
+Rightmost-longest segmentation:
+ (saratoga)
+Lefttmost-longest segmentation:
+ (saratoga)
+List of all segments:
+ sunnyvale:0 [0,0]
+Weighted segmentation:
+ (sunnyvale)
+Rightmost-longest segmentation:
+ (sunnyvale)
+Lefttmost-longest segmentation:
+ (sunnyvale)
+List of all segments:
+ apple:1769304646 [0,0]
+Weighted segmentation:
+ (apple)
+Rightmost-longest segmentation:
+ (apple)
+Lefttmost-longest segmentation:
+ (apple)
+List of all segments:
+ apricot:1769304646 [0,0]
+Weighted segmentation:
+ (apricot)
+Rightmost-longest segmentation:
+ (apricot)
+Lefttmost-longest segmentation:
+ (apricot)
+List of all segments:
+ artichoke:1701274966 [0,0]
+Weighted segmentation:
+ (artichoke)
+Rightmost-longest segmentation:
+ (artichoke)
+Lefttmost-longest segmentation:
+ (artichoke)
+List of all segments:
+ banana:1769304646 [0,0]
+Weighted segmentation:
+ (banana)
+Rightmost-longest segmentation:
+ (banana)
+Lefttmost-longest segmentation:
+ (banana)
+List of all segments:
+ cabbage:1701274966 [0,0]
+Weighted segmentation:
+ (cabbage)
+Rightmost-longest segmentation:
+ (cabbage)
+Lefttmost-longest segmentation:
+ (cabbage)
+List of all segments:
+ carrot:1701274966 [0,0]
+Weighted segmentation:
+ (carrot)
+Rightmost-longest segmentation:
+ (carrot)
+Lefttmost-longest segmentation:
+ (carrot)
+List of all segments:
+ cherry:1769304646 [0,0]
+Weighted segmentation:
+ (cherry)
+Rightmost-longest segmentation:
+ (cherry)
+Lefttmost-longest segmentation:
+ (cherry)
+List of all segments:
+ chili:1701274966 [0,0]
+Weighted segmentation:
+ (chili)
+Rightmost-longest segmentation:
+ (chili)
+Lefttmost-longest segmentation:
+ (chili)
+List of all segments:
+ cucumber:1701274966 [0,0]
+Weighted segmentation:
+ (cucumber)
+Rightmost-longest segmentation:
+ (cucumber)
+Lefttmost-longest segmentation:
+ (cucumber)
+List of all segments:
+ eggplant:1701274966 [0,0]
+Weighted segmentation:
+ (eggplant)
+Rightmost-longest segmentation:
+ (eggplant)
+Lefttmost-longest segmentation:
+ (eggplant)
+List of all segments:
+ grapes:1769304646 [0,0]
+Weighted segmentation:
+ (grapes)
+Rightmost-longest segmentation:
+ (grapes)
+Lefttmost-longest segmentation:
+ (grapes)
+List of all segments:
+ lettuce:1701274966 [0,0]
+Weighted segmentation:
+ (lettuce)
+Rightmost-longest segmentation:
+ (lettuce)
+Lefttmost-longest segmentation:
+ (lettuce)
+List of all segments:
+ onion:1701274966 [0,0]
+Weighted segmentation:
+ (onion)
+Rightmost-longest segmentation:
+ (onion)
+Lefttmost-longest segmentation:
+ (onion)
+List of all segments:
+ paprika:1701274966 [0,0]
+Weighted segmentation:
+ (paprika)
+Rightmost-longest segmentation:
+ (paprika)
+Lefttmost-longest segmentation:
+ (paprika)
+List of all segments:
+ passion:0 [0,0]
+ fruit:0 [1,1]
+ passion fruit:1769304646 [0,1]
+Weighted segmentation:
+ (passion fruit)
+Rightmost-longest segmentation:
+ (passion fruit)
+Lefttmost-longest segmentation:
+ (passion fruit)
+List of all segments:
+ peach:1769304646 [0,0]
+Weighted segmentation:
+ (peach)
+Rightmost-longest segmentation:
+ (peach)
+Lefttmost-longest segmentation:
+ (peach)
+List of all segments:
+ pear:1769304646 [0,0]
+Weighted segmentation:
+ (pear)
+Rightmost-longest segmentation:
+ (pear)
+Lefttmost-longest segmentation:
+ (pear)
+List of all segments:
+ pineapple:1769304646 [0,0]
+Weighted segmentation:
+ (pineapple)
+Rightmost-longest segmentation:
+ (pineapple)
+Lefttmost-longest segmentation:
+ (pineapple)
+List of all segments:
+ plum:1769304646 [0,0]
+Weighted segmentation:
+ (plum)
+Rightmost-longest segmentation:
+ (plum)
+Lefttmost-longest segmentation:
+ (plum)
+List of all segments:
+ potato:1701274966 [0,0]
+Weighted segmentation:
+ (potato)
+Rightmost-longest segmentation:
+ (potato)
+Lefttmost-longest segmentation:
+ (potato)
+List of all segments:
+ pumpkin:1701274966 [0,0]
+Weighted segmentation:
+ (pumpkin)
+Rightmost-longest segmentation:
+ (pumpkin)
+Lefttmost-longest segmentation:
+ (pumpkin)
+List of all segments:
+ sour:0 [0,0]
+ cherry:1769304646 [1,1]
+ sour cherry:1769304646 [0,1]
+Weighted segmentation:
+ (sour cherry)
+Rightmost-longest segmentation:
+ (sour cherry)
+Lefttmost-longest segmentation:
+ (sour cherry)
+List of all segments:
+ squash:1701274966 [0,0]
+Weighted segmentation:
+ (squash)
+Rightmost-longest segmentation:
+ (squash)
+Lefttmost-longest segmentation:
+ (squash)
+List of all segments:
+ tomato:1701274966 [0,0]
+Weighted segmentation:
+ (tomato)
+Rightmost-longest segmentation:
+ (tomato)
+Lefttmost-longest segmentation:
+ (tomato)
+List of all segments:
+ alpha:0 [0,0]
+Weighted segmentation:
+ (alpha)
+Rightmost-longest segmentation:
+ (alpha)
+Lefttmost-longest segmentation:
+ (alpha)
+List of all segments:
+ beta:0 [0,0]
+Weighted segmentation:
+ (beta)
+Rightmost-longest segmentation:
+ (beta)
+Lefttmost-longest segmentation:
+ (beta)
+List of all segments:
+ gamma:0 [0,0]
+Weighted segmentation:
+ (gamma)
+Rightmost-longest segmentation:
+ (gamma)
+Lefttmost-longest segmentation:
+ (gamma)
+List of all segments:
+ delta:0 [0,0]
+Weighted segmentation:
+ (delta)
+Rightmost-longest segmentation:
+ (delta)
+Lefttmost-longest segmentation:
+ (delta)
+List of all segments:
+ epsilon:0 [0,0]
+Weighted segmentation:
+ (epsilon)
+Rightmost-longest segmentation:
+ (epsilon)
+Lefttmost-longest segmentation:
+ (epsilon)
diff --git a/fsa/src/alltest/segmenter_test.sh b/fsa/src/alltest/segmenter_test.sh
new file mode 100755
index 00000000000..d36a6d10057
--- /dev/null
+++ b/fsa/src/alltest/segmenter_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_segmenter_test_app < testinput.txt > segmenter_test.output
+diff segmenter_test.output segmenter_test.out
diff --git a/fsa/src/alltest/testinput.txt b/fsa/src/alltest/testinput.txt
new file mode 100644
index 00000000000..fa4afece710
--- /dev/null
+++ b/fsa/src/alltest/testinput.txt
@@ -0,0 +1,41 @@
+Cupertino
+Foster City
+Los Altos
+Menlo Park
+Mountain View
+Palo Alto
+San Francisco
+San Jose
+Santa Clara
+Saratoga
+Sunnyvale
+apple
+apricot
+artichoke
+banana
+cabbage
+carrot
+cherry
+chili
+cucumber
+eggplant
+grapes
+lettuce
+onion
+paprika
+passion fruit
+pea
+peach
+pear
+pineapple
+plum
+potato
+pumpkin
+sour cherry
+squash
+tomato
+alpha
+beta
+gamma
+delta
+epsilon
diff --git a/fsa/src/alltest/vectorizer_perftest.cpp b/fsa/src/alltest/vectorizer_perftest.cpp
new file mode 100644
index 00000000000..582652ec66d
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_perftest.cpp
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizertest.cpp
+ * @brief Test for the vectorizer class
+ *
+ */
+
+#include <string>
+#include <iostream>
+#include <iomanip>
+
+#include <vespa/fsa/vectorizer.h>
+#include <vespa/fsa/timestamp.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Vectorizer v(dict);
+ Vectorizer::TermVector tv;
+
+
+ std::string text =
+ "belfast northern ireland protestant extremists crashed a forklift "
+ "truck into a belfast pub packed with catholics early friday and tossed "
+ "gasoline bombs into the building on a road on the front line of "
+ "tensions between the two communities "
+ "no one was hurt in the attack police said, though the forklift came "
+ "crashing through a window just above a bench where a patron had been "
+ "sitting seconds earlier the bar s owner sean conlon said "
+ "the customer had just gotten up to go to the toilet so it s really "
+ "just by the grace of god still he s here today at all conlon said "
+ "a protestant gang used the stolen vehicle to smash down a heavy metal "
+ "security grill on a window at around 12 45 a m then to toss three "
+ "gasoline bombs inside the pub on the crumlin road an especially "
+ "polarized part of north belfast where catholic protestant tensions "
+ "have repeatedly flared "
+ "no group claimed responsibility for the attack on the thirty two "
+ "degrees north pub a catholic frequented bar across the street from a "
+ "hard line protestant district but catholic leaders blamed the largest "
+ "illegal protestant group the ulster defense association "
+ "firefighters quickly doused the flames caused by the gasoline "
+ "bombs the forklift remained wedged into the pub friday afternoon as "
+ "engineers and architects discussed whether the newly refurbished pub "
+ "would have to be partly demolished "
+ "the uda is supposed to be observing a cease fire in support of "
+ "northern ireland s 1998 peace accord but britain no longer recognizes "
+ "the validity of the uda truce because the anti catholic group has "
+ "violated it so often "
+ "the crumlin road area of north belfast has suffered some of northern "
+ "ireland s most graphic sectarian trouble in recent years while both "
+ "sides complain of suffering harassment and stone throwing protestants "
+ "in particular accuse the expanding catholic community of seeking to "
+ "force them from the area a charge the catholics deny. "
+ "protestant mobs in 2001 and 2002 blocked catholics from taking their "
+ "children to the local catholic elementary school which is in the "
+ "predominantly protestant part of the area "
+ "on july 12 hundreds of catholics from the area s ardoyne district "
+ "swarmed over police and british soldiers protecting a protestant "
+ "parade that had just passed down crumlin road dozens were wounded "
+ "demographic tensions lie at the heart of the northern ireland "
+ "conflict which was founded 84 years ago as a british territory with a "
+ "70 percent protestant majority the most recent census in 2001 put the "
+ "sectarian split at nearer 55 percent protestant and 45 percent "
+ "catholic and confirmed that belfast now has a catholic majority";
+
+ NGram tokenized_text(text);
+
+ TimeStamp t;
+ double t0,t1;
+ unsigned int count=1000;
+
+ std::cout << "Number of iterations: " << count << std::endl;
+ std::cout << "Input string length: " << text.length() << std::endl;
+ std::cout << "Number of input tokens: " << tokenized_text.length() << std::endl;
+ std::cout << std::endl;
+
+ t0=t.elapsed();
+ for(unsigned int i=0; i<count; ++i){
+ v.vectorize(tokenized_text,tv);
+ }
+ t1=t.elapsed()-t0;
+ std::cout << "Vectorizer performance: \t" << t1 << " sec" << "\t\t"
+ << count/t1 << " document/sec" << std::endl;
+ for(unsigned int i=0; i<tv.size(); i++){
+ std::cout << tv[i].term() << ", " << tv[i].weight() << std::endl;
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/vectorizer_test.cpp b/fsa/src/alltest/vectorizer_test.cpp
new file mode 100644
index 00000000000..e3bcf236455
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_test.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizertest.cpp
+ * @brief Test for the vectorizer class
+ *
+ */
+
+#include <iostream>
+#include <iomanip>
+
+#include <vespa/fsa/vectorizer.h>
+
+using namespace fsa;
+
+int main(int argc, char **argv)
+{
+ FSA dict(argc>=2? argv[1] : "__testfsa__.__fsa__");
+
+ Vectorizer v(dict);
+ Vectorizer::TermVector tv;
+
+ std::string text;
+ NGram tokenized_text;
+
+ while(!std::cin.eof()){
+ getline(std::cin,text);
+
+ tokenized_text.set(text);
+ v.vectorize(tokenized_text,tv);
+
+ for(unsigned int i=0; i<tv.size(); i++){
+ std::cout << tv[i].term() << ", " << tv[i].weight() << std::endl;
+ }
+ }
+
+ return 0;
+}
diff --git a/fsa/src/alltest/vectorizer_test.out b/fsa/src/alltest/vectorizer_test.out
new file mode 100644
index 00000000000..aa30421a2bf
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_test.out
@@ -0,0 +1,26 @@
+apple, 0
+apricot, 0
+artichoke, 0
+banana, 0
+cabbage, 0
+carrot, 0
+cherry, 0
+chili, 0
+cucumber, 0
+eggplant, 0
+grapes, 0
+lettuce, 0
+onion, 0
+paprika, 0
+passion fruit, 0
+pea, 0
+peach, 0
+pear, 0
+pineapple, 0
+plum, 0
+potato, 0
+pumpkin, 0
+cherry, 0
+sour cherry, 0
+squash, 0
+tomato, 0
diff --git a/fsa/src/alltest/vectorizer_test.sh b/fsa/src/alltest/vectorizer_test.sh
new file mode 100755
index 00000000000..03d794fc6e8
--- /dev/null
+++ b/fsa/src/alltest/vectorizer_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./fsa_vectorizer_test_app < testinput.txt > vectorizer_test.output
+diff vectorizer_test.output vectorizer_test.out
diff --git a/fsa/src/apps/.gitignore b/fsa/src/apps/.gitignore
new file mode 100644
index 00000000000..85c014ca23b
--- /dev/null
+++ b/fsa/src/apps/.gitignore
@@ -0,0 +1,3 @@
+Makefile
+.depend
+vespa-*-*
diff --git a/fsa/src/apps/fsadump/.gitignore b/fsa/src/apps/fsadump/.gitignore
new file mode 100644
index 00000000000..36c86d6022c
--- /dev/null
+++ b/fsa/src/apps/fsadump/.gitignore
@@ -0,0 +1 @@
+fsadump
diff --git a/fsa/src/apps/fsadump/CMakeLists.txt b/fsa/src/apps/fsadump/CMakeLists.txt
new file mode 100644
index 00000000000..069bdfb379b
--- /dev/null
+++ b/fsa/src/apps/fsadump/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(fsa_fsadump_app
+ SOURCES
+ fsadump.cpp
+ OUTPUT_NAME fsadump
+ INSTALL bin
+ DEPENDS
+ fsa
+)
diff --git a/fsa/src/apps/fsadump/fsadump.cpp b/fsa/src/apps/fsadump/fsadump.cpp
new file mode 100644
index 00000000000..a713b5dd30f
--- /dev/null
+++ b/fsa/src/apps/fsadump/fsadump.cpp
@@ -0,0 +1,186 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+#include <iostream>
+#include <fstream>
+
+#include <vespa/fsa/base64.h>
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/automaton.h>
+
+using namespace fsa;
+
+enum FSA_Input_Format {
+ OUTPUT_UNDEF,
+ OUTPUT_TEXT,
+ OUTPUT_TEXT_EMPTY,
+ OUTPUT_TEXT_NUM,
+ OUTPUT_BINARY,
+ OUTPUT_BINARY_RAW,
+ OUTPUT_PHASH,
+ OUTPUT_DOT
+};
+
+void error(const char *name, const char *errormsg = NULL)
+{
+ if(errormsg!=NULL){
+ fprintf(stderr,"%s: %s\n",name,errormsg);
+ }
+}
+
+void usage(const char *name, const char *errormsg = NULL)
+{
+ error(name,errormsg);
+ fprintf(stderr,"usage:\n");
+ fprintf(stderr," %s [OPTIONS] fsafile\n",name);
+ fprintf(stderr,"\n");
+ fprintf(stderr," Valid options are:\n");
+ fprintf(stderr," -h display this help\n");
+ fprintf(stderr," -b use binary output format with Base64 encoded info\n");
+ fprintf(stderr," -B use binary output format with raw info\n");
+ fprintf(stderr," -e use text output format with no info (default)\n");
+ fprintf(stderr," -n use text output format with (unsigned) numerical info\n");
+ fprintf(stderr," -t use text input format\n");
+ fprintf(stderr," -p use perfect hash value instead of meta info (text output)\n");
+ fprintf(stderr," -d output dot format\n");
+ fprintf(stderr," -V display version number\n");
+ fprintf(stderr,"\n");
+}
+
+void version()
+{
+ std::cout << "fsadump "
+ << FSA::VER/1000000 << "." << (FSA::VER/1000)%1000 << "." << FSA::VER%1000;
+ if(FSA::VER != FSA::libVER()){
+ std::cout << " (library "
+ << FSA::libVER()/1000000 << "." << (FSA::libVER()/1000)%1000 << "." << FSA::libVER()%1000
+ << ")";
+ }
+ std::cout << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+ FSA_Input_Format format = OUTPUT_UNDEF;
+ const char *input_file;
+
+ char opt;
+ extern int optind;
+
+ while((opt=getopt(argc,argv,"ebBhntpdV")) != -1){
+ switch(opt){
+ case 'b':
+ format = OUTPUT_BINARY;
+ break;
+ case 'B':
+ format = OUTPUT_BINARY_RAW;
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ case 'V':
+ version();
+ exit(0);
+ case 't':
+ format = OUTPUT_TEXT;
+ break;
+ case 'n':
+ format = OUTPUT_TEXT_NUM;
+ break;
+ case 'e':
+ format = OUTPUT_TEXT_EMPTY;
+ break;
+ case 'p':
+ format = OUTPUT_PHASH;
+ break;
+ case 'd':
+ format = OUTPUT_DOT;
+ break;
+ case '?':
+ usage(argv[0],"unrecognized option");
+ exit(1);
+ }
+ }
+
+ if(optind!=argc-1){
+ usage(argv[0],"required parameter(s) missing");
+ exit(1);
+ }
+
+ if(format==OUTPUT_UNDEF) // use default format (warning?)
+ format=OUTPUT_TEXT_EMPTY;
+
+ input_file = argv[optind];
+
+ FSA fsa(input_file);
+
+ if(!fsa.isOk()){
+ std::cerr << "Failed to open fsa file (" << input_file << ")" << std::endl;
+ exit(1);
+ }
+
+ std::string meta,temp;
+ uint32_t num_meta;
+ uint32_t lines=0;
+
+ if(format!=OUTPUT_DOT){
+
+ for(FSA::iterator it(fsa); it!=fsa.end(); ++it){
+
+ switch(format){
+ case OUTPUT_BINARY:
+ temp.assign((const char *)(it->data()),it->dataSize());
+ Base64::encode(temp,meta);
+ std::cout << it->str() << '\0' << meta << '\0';
+ break;
+ case OUTPUT_BINARY_RAW:
+ meta.assign((const char *)(it->data()),it->dataSize());
+ std::cout << it->str() << '\0' << meta << '\0';
+ break;
+ case OUTPUT_TEXT:
+ meta.assign((const char *)(it->data()),it->dataSize());
+ if(meta.size()>0 && meta[meta.size()-1]==0){
+ meta.resize(meta.size()-1);
+ }
+ std::cout << it->str() << '\t' << meta << '\n';
+ break;
+ case OUTPUT_TEXT_NUM:
+ switch(it->dataSize()){
+ case 1:
+ num_meta = *((const uint8_t*)it->data());
+ break;
+ case 2:
+ case 3:
+ num_meta = *((const uint16_t*)it->data());
+ break;
+ case 4:
+ default:
+ num_meta = *((const uint32_t*)it->data());
+ break;
+ }
+ std::cout << it->str() << '\t' << num_meta << '\n';
+ break;
+ case OUTPUT_PHASH:
+ std::cout << it->str() << '\t' << lines << '\n';
+ break;
+ case OUTPUT_TEXT_EMPTY:
+ std::cout << it->str() << '\n';
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ ++lines;
+ }
+ }
+
+ else {
+ fsa.printDot();
+ }
+
+ return 0;
+}
diff --git a/fsa/src/apps/fsainfo/.gitignore b/fsa/src/apps/fsainfo/.gitignore
new file mode 100644
index 00000000000..fc50ebfe566
--- /dev/null
+++ b/fsa/src/apps/fsainfo/.gitignore
@@ -0,0 +1 @@
+fsainfo
diff --git a/fsa/src/apps/fsainfo/CMakeLists.txt b/fsa/src/apps/fsainfo/CMakeLists.txt
new file mode 100644
index 00000000000..c16332ed20b
--- /dev/null
+++ b/fsa/src/apps/fsainfo/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(fsa_fsainfo_app
+ SOURCES
+ fsainfo.cpp
+ OUTPUT_NAME fsainfo
+ INSTALL bin
+ DEPENDS
+ fsa
+)
diff --git a/fsa/src/apps/fsainfo/fsainfo.cpp b/fsa/src/apps/fsainfo/fsainfo.cpp
new file mode 100644
index 00000000000..efbe6075331
--- /dev/null
+++ b/fsa/src/apps/fsainfo/fsainfo.cpp
@@ -0,0 +1,124 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <iostream>
+#include <fstream>
+
+#include <vespa/fsa/fsa.h>
+
+using namespace fsa;
+
+void usage(const char *name, const char *errormsg = NULL)
+{
+ if(errormsg!=NULL){
+ fprintf(stderr,"%s: %s\n",name,errormsg);
+ }
+ fprintf(stderr,"usage:\n");
+ fprintf(stderr," %s [OPTIONS] fsa\n",name);
+ fprintf(stderr,"\n");
+ fprintf(stderr," Valid options are:\n");
+ fprintf(stderr," -h display this help\n");
+ fprintf(stderr," -V display version number\n");
+ fprintf(stderr,"\n");
+}
+
+void version()
+{
+ std::cout << "fsainfo "
+ << FSA::VER/1000000 << "." << (FSA::VER/1000)%1000 << "." << FSA::VER%1000;
+ if(FSA::VER != FSA::libVER()){
+ std::cout << " (library "
+ << FSA::libVER()/1000000 << "." << (FSA::libVER()/1000)%1000 << "." << FSA::libVER()%1000
+ << ")";
+ }
+ std::cout << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+ const char *fsa_file;
+
+ char opt;
+ extern int optind;
+
+ while((opt=getopt(argc,argv,"hV")) != -1){
+ switch(opt){
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ case 'V':
+ version();
+ exit(0);
+ case '?':
+ usage(argv[0],"unrecognized option");
+ exit(1);
+ }
+ }
+
+ if(optind!=argc-1){
+ usage(argv[0],"required parameter fsa is missing");
+ exit(1);
+ }
+
+ fsa_file = argv[optind];
+
+
+
+ FSA::Header header;
+
+ size_t r;
+
+ int fd = ::open(fsa_file,O_RDONLY);
+ if(fd<0){
+ std::cerr << "Failed to open fsa file (" << fsa_file << ")" << std::endl;
+ return 1;
+ }
+ else{
+ r=::read(fd,&header,sizeof(header));
+ ::close(fd);
+ if(r<sizeof(header) || header._magic!=FSA::MAGIC){
+ std::cout << "Unrecognized file format (" << fsa_file << ")\n";
+ }
+ else if(header._version<1000){
+ std::cout << "Obsolete fsa file (" << fsa_file << ")\n";
+ }
+ else {
+ std::cout << "Information about " << fsa_file << ":\n";
+ std::cout << " Header size: " << sizeof(header) << " bytes" <<std::endl;
+ std::cout << " Magic: " << header._magic << std::endl;
+ std::cout << " Version: " << header._version/1000000 << "."
+ << (header._version%1000000)/1000 << "."
+ << header._version%1000 << std::endl;
+ std::cout << " Serial number: " << header._serial << std::endl;
+ std::cout << " Checksum: " << header._checksum << std::endl;
+ std::cout << " FSA size: " << header._size << " cells" <<std::endl;
+ std::cout << " " << header._size*(sizeof(unsigned char)+sizeof(unsigned int))
+ << " bytes" <<std::endl;
+ std::cout << " Start state: " << header._start << std::endl;
+ std::cout << " Data size: " << header._data_size << " bytes" << std::endl;
+ std::cout << " Data item type: " << (header._data_type==FSA::DATA_FIXED?
+ "fixed size":"variable size") << std::endl;
+ if(header._data_type==FSA::DATA_FIXED)
+ std::cout << " Fixed item size: " << header._fixed_data_size << std::endl;
+ std::cout << " Perfect hash: " << (header._has_perfect_hash?
+ "yes":"no") << std::endl;
+ if(header._has_perfect_hash)
+ std::cout << " Perfect hash size: " << header._size*sizeof(unsigned int) << " bytes" << std::endl;
+ std::cout << " Total size: "
+ << (header._size*(sizeof(unsigned char)+
+ sizeof(unsigned int)*(header._has_perfect_hash?2:1)) +
+ header._data_size +
+ sizeof(header))
+ << " bytes" << std::endl;
+ std::cout << " Trying to load FSA ... " << std::flush;
+
+ FSA fsa(fsa_file);
+ std::cout << (fsa.version()==header._version ? "succeeded.":"failed.") << std::endl;
+ }
+ }
+
+ return 0;
+}
diff --git a/fsa/src/apps/makefsa/.gitignore b/fsa/src/apps/makefsa/.gitignore
new file mode 100644
index 00000000000..1ea7393bec3
--- /dev/null
+++ b/fsa/src/apps/makefsa/.gitignore
@@ -0,0 +1 @@
+makefsa
diff --git a/fsa/src/apps/makefsa/CMakeLists.txt b/fsa/src/apps/makefsa/CMakeLists.txt
new file mode 100644
index 00000000000..80002338479
--- /dev/null
+++ b/fsa/src/apps/makefsa/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(fsa_makefsa_app
+ SOURCES
+ makefsa.cpp
+ OUTPUT_NAME makefsa
+ INSTALL bin
+ DEPENDS
+ fsa
+)
diff --git a/fsa/src/apps/makefsa/makefsa.cpp b/fsa/src/apps/makefsa/makefsa.cpp
new file mode 100644
index 00000000000..b27485a851e
--- /dev/null
+++ b/fsa/src/apps/makefsa/makefsa.cpp
@@ -0,0 +1,295 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+#include <iostream>
+#include <fstream>
+
+#include <vespa/fsa/base64.h>
+#include <vespa/fsa/fsa.h>
+#include <vespa/fsa/automaton.h>
+
+using namespace fsa;
+
+enum FSA_Input_Format {
+ INPUT_UNDEF,
+ INPUT_TEXT,
+ INPUT_TEXT_EMPTY,
+ INPUT_TEXT_NUM,
+ INPUT_BINARY,
+ INPUT_BINARY_RAW };
+
+void usage(const char *name, const char *errormsg = NULL)
+{
+ if(errormsg!=NULL){
+ fprintf(stderr,"%s: %s\n",name,errormsg);
+ }
+ fprintf(stderr,"usage:\n");
+ fprintf(stderr," %s [OPTIONS] [input_file] output_file\n",name);
+ fprintf(stderr,"\n");
+ fprintf(stderr," Valid options are:\n");
+ fprintf(stderr," -h display this help\n");
+ fprintf(stderr," -b use binary input format with Base64 encoded info\n");
+ fprintf(stderr," -B use binary input format with raw\n");
+ fprintf(stderr," -e use text input format with no info (default)\n");
+ fprintf(stderr," -n use text input format with (unsigned) numerical info\n");
+ fprintf(stderr," -s bytes data size for numerical info: 1,2 or 4(default)\n");
+ fprintf(stderr," -z bytes data size for binary info (-B) (0 means NUL terminated)\n");
+ fprintf(stderr," -t use text input format\n");
+ fprintf(stderr," -p build automaton with perfect hash\n");
+ fprintf(stderr," -i ignore info string, regardless of input format\n");
+ fprintf(stderr," -S serial serial number\n");
+ fprintf(stderr," -v be verbose\n");
+ fprintf(stderr," -V display version number\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr," If input_file is not specified, standard input is used.\n");
+}
+
+void version()
+{
+ std::cout << "makefsa "
+ << FSA::VER/1000000 << "." << (FSA::VER/1000)%1000 << "." << FSA::VER%1000;
+ if(FSA::VER != FSA::libVER()){
+ std::cout << " (library "
+ << FSA::libVER()/1000000 << "." << (FSA::libVER()/1000)%1000 << "." << FSA::libVER()%1000
+ << ")";
+ }
+ std::cout << std::endl;
+}
+
+
+int main(int argc, char** argv)
+{
+ FSA_Input_Format format = INPUT_UNDEF;
+ unsigned int num_size = 4;
+ unsigned int info_size_binary = 0;
+ bool build_phash = false;
+ const char *input_file;
+ const char *output_file;
+ uint32_t serial = 0;
+ bool ignore_info = false;
+ bool verbose = false;
+ unsigned int lines=0,count = 0;
+
+ char opt;
+ extern char *optarg;
+ extern int optind;
+
+ while((opt=getopt(argc,argv,"ebBhns:z:tpS:ivV")) != -1){
+ switch(opt){
+ case 'b':
+ format = INPUT_BINARY;
+ break;
+ case 'B':
+ format = INPUT_BINARY_RAW;
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ case 'V':
+ version();
+ exit(0);
+ case 't':
+ format = INPUT_TEXT;
+ break;
+ case 'n':
+ format = INPUT_TEXT_NUM;
+ break;
+ case 's':
+ num_size = strtoul(optarg,NULL,0);
+ if(num_size!=1 && num_size!=2 && num_size!=4){
+ usage(argv[0],"invalid numerical info size (-s)");
+ exit(1);
+ }
+ break;
+ case 'z':
+ info_size_binary = strtoul(optarg,NULL,0);
+ break;
+ case 'S':
+ serial = strtoul(optarg,NULL,0);
+ break;
+ case 'e':
+ format = INPUT_TEXT_EMPTY;
+ break;
+ case 'p':
+ build_phash = true;
+ break;
+ case 'i':
+ ignore_info = true;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case '?':
+ usage(argv[0],"unrecognized option");
+ exit(1);
+ }
+ }
+
+ if(format==INPUT_UNDEF) // use default format (warning?)
+ format=INPUT_TEXT_EMPTY;
+
+ if(optind+2==argc){
+ input_file = argv[optind];
+ output_file = argv[optind+1];
+ }
+ else if(optind+1==argc){
+ input_file = NULL;
+ output_file = argv[optind];
+ }
+ else{
+ usage(argv[0],"required parameter(s) missing");
+ exit(1);
+ }
+
+ Automaton automaton;
+
+ std::string input,last_input,meta,temp;
+ union{
+ uint8_t u1;
+ uint16_t u2;
+ uint32_t u4;
+ } num_meta;
+ std::ifstream infile;
+ std::istream *in;
+ char binary_info[info_size_binary];
+ size_t split;
+ bool empty_meta_str = false;
+
+ if(verbose) version();
+
+ if(verbose) std::cerr << "Initializing automaton ...";
+ automaton.init();
+ if(verbose) std::cerr << " done." << std::endl;
+
+ if(input_file!=NULL){
+ infile.open(input_file);
+ if (infile.fail()) {
+ std::cerr << "Error: Could not open file \"" << input_file << "\"\n";
+ return(1);
+ }
+ in=&infile;
+ }
+ else{
+ in=&std::cin;
+ }
+ if(verbose) std::cerr << "Inserting lines ...";
+ while(!in->eof()){
+ switch(format){
+ case INPUT_BINARY:
+ getline(*in,input,'\0');
+ getline(*in,temp,'\0');
+ Base64::decode(temp,meta);
+ break;
+ case INPUT_BINARY_RAW:
+ getline(*in,input,'\0');
+ if (info_size_binary) {
+ in->read(binary_info, info_size_binary);
+ meta.assign(binary_info, info_size_binary);
+ }
+ else
+ getline(*in,meta,'\0');
+ break;
+ case INPUT_TEXT:
+ getline(*in,temp,'\n');
+ split = temp.find_first_of('\t');
+ input = temp.substr(0, split);
+ if (split == std::string::npos) {
+ empty_meta_str = true;
+ break;
+ }
+ meta = temp.substr(split + 1);
+ meta+='\0';
+ break;
+ case INPUT_TEXT_NUM:
+ getline(*in,temp,'\n');
+ split = temp.find_first_of('\t');
+ input = temp.substr(0, split);
+ if (split == std::string::npos) {
+ empty_meta_str = true;
+ break;
+ }
+ temp = temp.substr(split + 1);
+ switch(num_size){
+ case 1:
+ num_meta.u1=strtoul(temp.c_str(),NULL,0);
+ meta.assign((const char*)&num_meta,1);
+ break;
+ case 2:
+ num_meta.u2=strtoul(temp.c_str(),NULL,0);
+ meta.assign((const char*)&num_meta,2);
+ break;
+ case 4:
+ default:
+ num_meta.u4=strtoul(temp.c_str(),NULL,0);
+ meta.assign((const char*)&num_meta,4);
+ break;
+ }
+ break;
+ case INPUT_TEXT_EMPTY:
+ getline(*in,input,'\n');
+ break;
+ case INPUT_UNDEF:
+ assert(0);
+ break;
+ }
+
+ ++lines;
+
+ if(input.length()>0){
+ if(last_input>input){
+ std::cerr << "warning: ignoring unsorted line " << lines << ", \"" << input << "\"\n";
+ }
+ else if(last_input==input){
+ std::cerr << "warning: ignoring duplicate line " << lines << ", \"" << input << "\"\n";
+ }
+ else if(empty_meta_str) {
+ std::cerr << "warning: ignoring line " << lines << ", \"" << input << "\" with missing meta info\n";
+ }
+ else{
+ if(format==INPUT_TEXT_EMPTY || ignore_info){
+ automaton.insertSortedString(input);
+ }
+ else{
+ automaton.insertSortedString(input,meta);
+ }
+ if(verbose){
+ ++count;
+ if(count%1000==0)
+ std::cerr << "\rInserting lines ... (inserted " << count << " lines)";
+ }
+ }
+ last_input=input;
+ }
+ empty_meta_str = false;
+ }
+ if(verbose) std::cerr << "\rInserting lines ... (inserted " << count << "/" << (lines-1) << " lines) ... done.\n";
+ if(input_file!=NULL){
+ infile.close();
+ }
+
+
+ if(verbose) std::cerr << "Finalizing ...";
+ automaton.finalize();
+ if(verbose) std::cerr << " done." << std::endl;
+
+
+ if(build_phash){
+ if(verbose) std::cerr << "Adding perfect hash ...";
+ automaton.addPerfectHash();
+ if(verbose) std::cerr << " done." << std::endl;
+ }
+
+
+ if(verbose) std::cerr << "Writing fsa file ...";
+ if (!automaton.write(output_file,serial)) {
+ std::cerr << "Failed to write fsa file '" << std::string(output_file) << "'. Please check write permissions" << std::endl;
+ return 1;
+ }
+ if(verbose) std::cerr << " done." << std::endl;
+
+
+ return 0;
+}
diff --git a/fsa/src/libfsa/.gitignore b/fsa/src/libfsa/.gitignore
new file mode 100644
index 00000000000..9fb98574200
--- /dev/null
+++ b/fsa/src/libfsa/.gitignore
@@ -0,0 +1,6 @@
+*.la
+*.lo
+.deps
+.libs
+Makefile
+Makefile.in
diff --git a/fsa/src/libfsa/automaton-alternate.h b/fsa/src/libfsa/automaton-alternate.h
new file mode 100644
index 00000000000..20cc8f933eb
--- /dev/null
+++ b/fsa/src/libfsa/automaton-alternate.h
@@ -0,0 +1,998 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @file automaton.h
+ * @brief Definition of the classes used for %FSA (%Finite %State %Automaton) construction
+ *
+ */
+
+#pragma once
+
+#include <map>
+#include <list>
+#include <string>
+#include <vector>
+#include <assert.h>
+#include <sys/mman.h> // for mmap() etc
+
+#include "blob.h"
+#include "fsa.h"
+
+namespace fsa {
+
+
+// {{{ Automaton
+/**
+ * @class Automaton
+ * @brief %FSA (%Finite %State %Automaton) construction class.
+ *
+ * The Automaton class provides the methods and data structures needed
+ * for construcing a %Finite %State %Automaton from input strings. (The
+ * current implementation requires the input to be sorted, this
+ * requirement may be relaxed in future releases.)
+ *
+ * The constructed %FSA, when stored in a compact representation, can
+ * be used for lookups, etc. vie the FSA class. The compact %FSA can
+ * not be modified anymore.
+ */
+class Automaton {
+
+public:
+ /**
+ * Empty data item for final states without assigned data. Contains
+ * a zero terminated empty string.
+ */
+ static const Blob EMPTY_BLOB;
+
+private:
+
+ class State;
+
+ // {{{ Automaton::Transition
+ /**
+ * @struct Transition
+ * @brief Struct for storing a single transition.
+ *
+ * A transition consists of an input symbol and a new state.
+ */
+ struct Transition {
+ symbol_t _symbol; /**< Input symbol. */
+ State *_state; /**< New state. */
+ };
+ // }}}
+
+ // {{{ Automaton::TransitionList
+ /**
+ * @class TransitionList
+ * @brief Class representing all transitions from a state.
+ *
+ * This class is used for the internal representation of the
+ * automaton. A state can be represented by the list of all
+ * possible transitions from that state. Two states are
+ * equivalent, if both are final (with the same meta info) or both
+ * are not final, and their transition list matches, that is they
+ * have the same number of out-transitions, these correspond to the
+ * same set of input symbols, and for each of these symbols the new
+ * states are equal. In the internal representation, final states
+ * are implemented by means of a special transition, so transition
+ * list equivalence is implies state equivalence.
+ */
+ class TransitionList {
+
+ friend class State;
+
+ private:
+ Transition* _trans; /**< Transition array. */
+ unsigned int _size; /**< Used size. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Default constructor, creates an empty transition list.
+ */
+ TransitionList() : _trans(NULL), _size(0) {};
+
+ /**
+ * @brief Destructor.
+ */
+ ~TransitionList()
+ { if(_trans!=NULL) free(_trans); }
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param tl Reference to transition list object.
+ */
+ TransitionList(const TransitionList& tl) : _trans(NULL), _size(tl._size)
+ {
+ if(_size>0){
+ _trans = (Transition*)malloc(_size*sizeof(Transition));
+ assert(_trans!=NULL);
+ }
+ memcpy(_trans, tl._trans, sizeof(_trans[0]) * _size);
+ }
+
+
+ /**
+ * @brief Less-than operator.
+ *
+ * t1<t2 (or t1.operator<(t2) is true iff
+ * - t1 has less transitions than t2, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has a lower symbol for t1, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has the same symbol but a lower new state for t1
+ *
+ * @param tl Reference to transition list object.
+ * @return True iff the t1<t2.
+ */
+ bool operator<(const TransitionList& tl) const;
+
+ /**
+ * @brief Greater-than operator.
+ *
+ * t1>t2 (or t1.operator>(t2) is true iff
+ * - t1 has more transitions than t2, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has a higher symbol for t1, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has the same symbol but a higher new state for t1
+ *
+ * @param tl Reference to transition list object.
+ * @return True iff the t1>t2.
+ */
+ bool operator>(const TransitionList& tl) const;
+
+ /**
+ * @brief Equals operator.
+ *
+ * t1==t2 (or t1.operator==(t2) is true iff
+ * - t1 and t2 have the same number of transitions, which have
+ * the same set of of symbols and for each symbol the new
+ * states are equal
+ *
+ * @param tl Reference to transition list object.
+ * @return True iff the t1==t2.
+ */
+ bool operator==(const TransitionList& tl) const;
+
+ /**
+ * @brief Check for emptyness.
+ *
+ * @return True iff the transition list is empty.
+ */
+ bool empty() { return (_size==0); }
+
+ /**
+ * @brief Get transition list size.
+ *
+ * @return Size of the transition list (number of transitions, or 0 if empty).
+ */
+ unsigned int size() const { return _size; }
+
+ /**
+ * @brief Index operator.
+ *
+ * Returns a reference to the ith transition on the list. i must
+ * be between 0 and size-1 (0<=i<=size-1).
+ *
+ * @param i Index of transition.
+ * @return Reference to the ith transition.
+ */
+ const Transition& operator[](unsigned int i) const { return _trans[i]; }
+
+ /**
+ * @brief Get the last transition.
+ *
+ * Returns a pointer to the last transition, or NULL pointer if
+ * the list is empty.
+ *
+ * @return Pointer to last transition, or NULL.
+ */
+ Transition* last()
+ { if(_size>0) return &_trans[_size-1];
+ return NULL;
+ }
+
+ /**
+ * @brief Get the transition corresponding to a symbol.
+ *
+ * Returns a pointer to the transition corresponding to a given
+ * symbol, or NULL pointer if the symbol is not found on the list
+ * (a transition with that symbol does not exist).
+ *
+ * @param sy Input symbol.
+ * @return Pointer to last transition, or NULL.
+ */
+ Transition* find(symbol_t sy)
+ { for(unsigned int i=0; i<_size; i++){
+ if(_trans[i]._symbol == sy) return &_trans[i];
+ }
+ return NULL;
+ }
+
+ /**
+ * @brief Append a new transition to the list.
+ *
+ * Appends a new transition to the end of the list. The allocated
+ * size is increased if necessary. If a transition with the same
+ * symbol already exists, the behaviour is undefined.
+ *
+ * @param sy Input symbol.
+ * @param st Pointer to new state.
+ */
+ void append(symbol_t sy, State* st)
+ {
+ if(_size==0){
+ _trans = (Transition*)malloc(sizeof(Transition));
+ }
+ else{
+ _trans = (Transition*)realloc(_trans,(_size+1)*sizeof(Transition));
+ }
+ assert(_trans!=NULL);
+ _trans[_size]._symbol=sy;
+ _trans[_size]._state=st;
+ _size++;
+ }
+
+ };
+
+ // }}}
+ // {{{ Automaton::State
+ /**
+ * @class State
+ * @brief Class representing a state of the automaton.
+ *
+ * The representation of the automaton states consists of a
+ * transition list for the state, and meta info blob (the latter
+ * only used for special states reached by a final transition. A
+ * final transition is a transition from a final (accepting) state
+ * with the reserved FINAL_SYMBOL (0xff) to a special state, which
+ * stores the meta info corresponding to the final state. For each
+ * unique meta info blob, there is one special state.
+ */
+ class State {
+
+ private:
+
+ TransitionList _tlist; /**< Transition list. */
+ const Blob *_blob; /**< Meta info blob. */
+
+ public:
+
+ /**
+ * @brief Constructor.
+ *
+ * Default constructor, creates a state with an empty transition
+ * list and no (NULL) blob.
+ */
+ State() : _tlist(), _blob(NULL) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates a (special) state with an empty transition list and a
+ * given blob.
+ *
+ * @param b Pointer to blob.
+ */
+ State(const Blob* b) : _tlist(), _blob(b) {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~State() { if(_blob!=NULL) delete _blob; }
+
+ /**
+ * @brief Check if the state is final (accepting) state.
+ *
+ * @return True if the state is final.
+ */
+ bool isFinal() { return child(FSA::FINAL_SYMBOL)!=NULL; }
+
+ /**
+ * @brief Get the blob assigned to the state.
+ *
+ * @return Pointer to blob.
+ */
+ const Blob* getBlob() const { return _blob; }
+
+ /**
+ * @brief Check if the state has children.
+ *
+ * Returns true if the state has children (the transition list is
+ * not empty), or false if the state is a leaf.
+ *
+ * @return True if the state has children.
+ */
+ bool hasChildren() { return !_tlist.empty(); }
+
+ /**
+ * @brief Get child corresponding to a symbol.
+ *
+ * Get the child of the state which is reached by a transition
+ * with a given symbol. If there is no out-transition with that
+ * symbol, NULL is returned.
+ *
+ * @return Pointer to the child, or NULL.
+ */
+ State* child(symbol_t sy)
+ { Transition* t = _tlist.find(sy);
+ if(t!=NULL){ return t->_state; }
+ return NULL;
+ }
+
+ /**
+ * @brief Get the last child.
+ *
+ * Get the last child of the state which is reached by a valid
+ * transition (not FINAL_SYMBOL). If no such children exists, NULL
+ * is returned.
+ *
+ * @return Pointer to last child, or NULL.
+ */
+ State* lastChild()
+ { Transition* t = _tlist.last();
+ if(t!=NULL && t->_symbol!=FSA::FINAL_SYMBOL){ return t->_state; }
+ return NULL;
+ }
+
+ /**
+ * @brief Update the last child.
+ *
+ * Updates the last child to point to a new state. This method is
+ * used when merging equivalent subtrees together.
+ *
+ * @param st New state to be used in last child.
+ */
+ void updateLastChild(State* st)
+ { Transition* t = _tlist.last();
+ if(t!=NULL){
+ t->_state = st;
+ }
+ }
+
+ /**
+ * @brief Append a new empty child.
+ *
+ * Append an empty child to the list of transitions using the
+ * given symbol (and optional blob).
+ *
+ * @param sy New transition symbol.
+ * @param b Optional blob to be assigned to the new state, defaults to NULL.
+ * @return Pointer to the new state.
+ */
+ State* addEmptyChild(symbol_t sy, const Blob *b=NULL)
+ {
+ State* child = new State(b);
+ assert(child!=NULL);
+ _tlist.append(sy,child);
+ return child;
+ }
+
+ /**
+ * @brief Add a transition to an existing state.
+ *
+ * Append a new transition to the list pointing to an existing
+ * state, using the given symbol.
+ *
+ * @param sy New transition symbol.
+ * @param child Pointer to destination state (already existing).
+ * @return Pointer to the child state.
+ */
+ State* addChild(symbol_t sy, State* child)
+ {
+ _tlist.append(sy,child);
+ return child;
+ }
+
+ /**
+ * @brief Get the transition list.
+ *
+ * Get the transition list of the state.
+ *
+ * @return Reference to the transition list.
+ */
+ const TransitionList& getTransitionList(void) const { return _tlist; }
+
+
+ };
+
+ // }}}
+ // {{{ Automaton::TListPtrLess
+ /**
+ * @class TListPtrLess
+ * @brief Less-than functor for use with ordered STL containers.
+ *
+ * The function compares two TransitionList pointers by comparing
+ * the objects they point to.
+ */
+ struct TListPtrLess {
+ inline bool operator()(const TransitionList * const & x, const TransitionList * const & y) const { return *x < *y; }
+ };
+ // }}}
+ // {{{ Special allocator for Register that will make it possible to completely reclaim its memory when we are done with it
+ template <typename _Tp>
+ class MMapArenaAllocator {
+ std::vector<_Tp*> _chunks;
+ size_t _size; // used # of objects in current chunk
+ static const size_t _CAPACITY = 16 * 1024 * 1024; // capacity of chunk in bytes
+ public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef _Tp* pointer;
+ typedef const _Tp* const_pointer;
+ typedef _Tp& reference;
+ typedef const _Tp& const_reference;
+ typedef _Tp value_type;
+
+ template<typename _Tp1>
+ struct rebind
+ { typedef MMapArenaAllocator<_Tp1> other; };
+
+ MMapArenaAllocator() throw(): _chunks(), _size(0) { }
+
+ MMapArenaAllocator(const MMapArenaAllocator&) throw(): _chunks(), _size(0) { }
+
+ template<typename _Tp1>
+ MMapArenaAllocator(const MMapArenaAllocator<_Tp1>&) throw(): _chunks(), _size(0) { }
+
+ ~MMapArenaAllocator() throw() { release(); }
+
+ pointer
+ address(reference __x) const { return &__x; }
+
+ const_pointer
+ address(const_reference __x) const { return &__x; }
+
+ // NB: __n is permitted to be 0. The C++ standard says nothing
+ // about what the return value is when __n == 0.
+ pointer
+ allocate(size_type __n, const void* = 0)
+ {
+ pointer __ret;
+ if(__n) {
+ size_type __b = __n * sizeof(_Tp);
+ if(_chunks.size()==0 || _CAPACITY - (_size*sizeof(_Tp)) < __b) { // need new chunk
+ __ret = static_cast<_Tp*>(::mmap(0, _CAPACITY, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, /*fd=*/0, /*offset=*/0));
+ if(__ret == MAP_FAILED)
+ throw std::bad_alloc();
+ _chunks.push_back(__ret);
+ _size = __n;
+ }
+ else { // fits in current chunk
+ __ret = (*(_chunks.end()-1)) + _size;
+ _size += __n;
+ }
+ }
+ return __ret;
+ }
+
+ // __p is not permitted to be a null pointer.
+ void
+ deallocate(pointer, size_type)
+ { }
+
+ void release(void)
+ {
+ for(size_t i = 0; i < _chunks.size(); i++){
+ ::munmap(_chunks[i], _CAPACITY);
+ }
+ _chunks.clear();
+ _size = 0;
+ }
+
+ size_type
+ max_size() const throw()
+ { return _CAPACITY / sizeof(_Tp); }
+
+ void
+ construct(pointer __p, const _Tp& __val)
+ { ::new(__p) value_type(__val); }
+
+ void
+ destroy(pointer __p) { __p->~_Tp(); }
+ };
+ // }}}
+ // {{{ Automaton::Register, BlobRegister, StateArray, StateCellArray, PackMap, SymList and iterators
+
+ struct StateArrayLess {
+ bool operator()(State* const &x, State* const &y)
+ { return x < y; }
+ };
+ struct StateCellArrayItem {
+ State *state;
+ uint32_t cell;
+ StateCellArrayItem(): state(NULL), cell(0) { }
+ StateCellArrayItem(State *s): state(s), cell(0) { }
+ };
+ struct StateCellArrayLess {
+ bool operator()(const StateCellArrayItem &x, const StateCellArrayItem &y)
+ { return x.state < y.state; }
+ };
+
+ /**
+ * @brief Register of states, maps a transition list to a state object
+ */
+ typedef std::map< const TransitionList*,State*,TListPtrLess,MMapArenaAllocator< std::pair< const TransitionList*, State* > > > Register;
+ /**
+ * @brief State register iterator.
+ */
+ typedef std::map< const TransitionList*,State*,TListPtrLess,MMapArenaAllocator< std::pair< const TransitionList*, State* > > >::iterator RegisterIterator;
+
+ /**
+ * @brief Register of states, maps a blob to a special state.
+ */
+ typedef std::map< Blob,State* > BlobRegister;
+ /**
+ * @brief Blob register iterator.
+ */
+ typedef std::map< Blob,State* >::iterator BlobRegisterIterator;
+
+ /**
+ * @brief Array of state pointers.
+ */
+ typedef std::vector< State* > StateArray;
+ /**
+ * @brief State* array iterator.
+ */
+ typedef std::vector< State* >::iterator StateArrayIterator;
+
+ /**
+ * @brief Array of state/cell pairs.
+ */
+ typedef std::vector< StateCellArrayItem > StateCellArray;
+ /**
+ * @brief StateCell array iterator.
+ */
+ typedef std::vector< StateCellArrayItem >::iterator StateCellArrayIterator;
+
+ /**
+ * @brief Packing map, maps a state pointer to a state ID.
+ */
+ typedef std::map< const void*, unsigned int > PackMap;
+ /**
+ * @brief Packing map iterator.
+ */
+ typedef std::map< const void*, unsigned int >::iterator PackMapIterator;
+
+ /**
+ * @brief symbol_t list.
+ */
+ typedef std::list<symbol_t> SymList;
+ /**
+ * @brief symbol_t list iterator.
+ */
+ typedef std::list<symbol_t>::iterator SymListIterator;
+ /**
+ * @brief symbol_t list const_iterator.
+ */
+ typedef std::list<symbol_t>::const_iterator SymListConstIterator;
+ // }}}
+
+ // {{{ Automaton::PackedAutomaton
+
+ /**
+ * @class PackedAutomaton
+ * @brief Helper class for packing an automaton.
+ *
+ * This class is used for packing an Automaton to a compressed
+ * format which can be saved to file to be used by the FSA class.
+ */
+ class PackedAutomaton {
+
+ private:
+ bool _packable; /**< Packable flag. */
+ PackMap _blob_map; /**< Map blob pointers to indices. */
+ State **_packed_ptr; /**< Array for state pointers. */
+ state_t *_packed_idx; /**< Array for state indices. */
+ symbol_t *_symbol; /**< Array for transition symbols. */
+ bool *_used; /**< Array for cell used flags. */
+ hash_t *_perf_hash; /**< Array for perfect hash deltas. */
+ hash_t *_totals; /**< Array for perfect hash totals. */
+ uint32_t _packed_size; /**< Size of packed arrays (in cells). */
+ uint32_t _last_packed; /**< Index of last packed state. */
+
+ data_t *_blob; /**< Data storage. */
+ uint32_t _blob_size; /**< Data storage size. */
+ uint32_t _blob_used; /**< Used data storage size. */
+ uint32_t _blob_type; /**< Type of data items (fixed/var.) */
+ uint32_t _fixed_blob_size; /**< Data item size if fixed. */
+
+ state_t _start_state; /**< Index of start state. */
+
+ /**
+ * @brief Number of cells to allocate in one expansion.
+ */
+ static const uint32_t _ALLOC_CELLS = 131072; // 128k
+
+ /**
+ * @brief Number of bytes to allocate in one data storage expansion.
+ */
+ static const uint32_t _ALLOC_BLOB = 65536; // 64k
+
+ /**
+ * @brief How long back the search for an empty cell should start.
+ */
+ static const uint32_t _BACKCHECK = 255;
+
+
+ /**
+ * @brief Expand cell arrays.
+ */
+ void expandCells();
+
+ /**
+ * @brief Expand data storage.
+ *
+ * @param minExpand Mimimum size to expand, it will be rounded up
+ * to the nearest multiply of _ALLOC_BLOB.
+ */
+ void expandBlob(uint32_t minExpand);
+
+ /**
+ * @brief Get an empty cell.
+ *
+ * Start looking for an empty cell _BACKCHECK cells before the
+ * last packed cell, and return the index of the first empty cell
+ * found. The cell arrays are expanded on demand, that is if no
+ * empty cell is found.
+ *
+ * @return Index of empty cell.
+ */
+ uint32_t getEmptyCell();
+
+ /**
+ * @brief Get an empty cell where a list of transitions can be stored.
+ *
+ * Start looking for an empty cell _BACKCHECK cells before the
+ * last packed cell. In addition to the cell being empty, it
+ * should be possible to store a list of transitions from that
+ * cell. The cell arrays are expanded on demand, that is if no
+ * empty cell is found.
+ *
+ * @param t List of transition symbols.
+ * @return Index of empty cell.
+ */
+ uint32_t getCell(const SymList &t);
+
+ /**
+ * @brief Pack a data item.
+ *
+ * Pack a data item to the data storage. If the same (or
+ * equivalent) data item has been packed before, return the offset
+ * where it was packed. Otherwise, pack the data item at the end
+ * of the storage (expand storage if needed), add the item and
+ * offset to the blob map and return the offset.
+ *
+ * @param b Pointer to data item.
+ * @return Offset to data item in data storage.
+ */
+ uint32_t packBlob(const Blob* b);
+
+ /**
+ * @brief Compute perfect hash deltas for a subtree.
+ *
+ * Recursive function for computing the perfect hash deltas for
+ * all transitions within a subtree. The delta for transition T
+ * from state S is the number of final states reachable from state
+ * S via transitions lower than T (that is, with a lower input
+ * symbol). Also, state S being a final state counts. The hash
+ * deltas are filled into the _perf_hash array.
+ *
+ * @return Number of final states within the subtree.
+ */
+ hash_t computePerfectHash(state_t state);
+
+
+ public:
+
+ /**
+ * @brief Default constructor.
+ */
+ PackedAutomaton() :
+ _packable(false),
+ _blob_map(),
+ _packed_ptr(NULL),
+ _packed_idx(NULL),
+ _symbol(NULL),
+ _used(NULL),
+ _perf_hash(NULL),
+ _totals(NULL),
+ _packed_size(0),
+ _last_packed(0),
+ _blob(NULL),
+ _blob_size(0),
+ _blob_used(0),
+ _blob_type(0),
+ _fixed_blob_size(0),
+ _start_state(0)
+ { }
+
+ /**
+ * @brief Destructor.
+ */
+ ~PackedAutomaton() { reset(); }
+
+ /**
+ * @brief Reset the object.
+ *
+ * Reset the object and free all allocated memory.
+ */
+ void reset();
+
+ /**
+ * @brief Initialize.
+ *
+ * Reset the object, and initialize data structures, also
+ * preallocate memory for cell and data storage.
+ */
+ void init();
+
+ /**
+ * @brief Pack a state.
+ *
+ * Pack a state and its transitions into the compact structure. For
+ * final states, the data item is packed as well.
+ *
+ * @param s Pointer to state to pack.
+ * @return False if the object is not packable (it has been
+ * finalized, or it has not been initialized)
+ */
+ bool packState(Automaton::StateCellArrayIterator &it);
+
+ /**
+ * @brief Set the cell of the start state.
+ *
+ * @param cell Cell of start state.
+ */
+ void setStartState(uint32_t cell) { _start_state = (state_t)cell; }
+
+ /**
+ * @brief Finalize the packed structure.
+ *
+ * Obtain all state indices from the state pointers using the
+ * pack map. Also compact the data storage if all data items have
+ * the same size (only store the size once, and store data items
+ * consecutively, without size attribute).
+ *
+ * @param queue State queue.
+ */
+ void finalize(const StateCellArray &queue);
+
+ /**
+ * @brief Add perfect hash to the automaton.
+ *
+ * Computes the perfect hash for the whole automaton.
+ */
+ void addPerfectHash();
+
+ /**
+ * @brief Write the automaton to a file.
+ *
+ * @param filename Name of file.
+ * @param serial Serial number.
+ * @return True on success.
+ */
+ bool write(const char *filename, uint32_t serial = 0);
+
+ /**
+ * @brief Read an automaton from file.
+ *
+ * @param filename Name of file.
+ * @return True on success.
+ */
+ bool read(const char *filename);
+
+ /**
+ * @brief Perform a lookup in the packed automaton.
+ *
+ * @param input Input string
+ * @return Pointer to data associated with input, or NULL if input is not accepted.
+ */
+ const unsigned char* lookup(const char *input) const;
+
+ /**
+ * @brief Create an FSA object from the automaton.
+ *
+ * Create an FSA object from the automaton. The PackedAutomaton is
+ * implicitly reset if the operation succeeds. PackedAutomanton
+ * cannot access the private constructor of FSA, so we have to pass
+ * the object via a struct, which is ugly :-(.
+ *
+ * @param d Pointer to the FSA::Descriptor (struct) to store necessary info for
+ * creating the FSA object.
+ * @return True if the operation was successful.
+ */
+ bool getFSA(FSA::Descriptor &d);
+
+ };
+
+ // }}}
+
+
+ Register *_register; /**< Register of states. */
+ BlobRegister _blob_register; /**< Register of data items. */
+ State* _q0; /**< Start state. */
+ StateArray *_queue; /**< State queue. */
+ bool _finalized; /**< Finalized flag. */
+ PackedAutomaton _packed; /**< Packed automaton. */
+
+ /**
+ * @brief Get last state in common path.
+ *
+ * Get the last state of the common path shared by the current input
+ * string and strings already in the automaton. Also sets a pointer
+ * to the suffix part of \a input which occurs after the last state.
+ *
+ * @param input Input string.
+ * @return Pointer to last state in common path.
+ */
+ State* getCPLastState(const char *input, const char *&suffix);
+
+ /**
+ * @brief Replace or register a state.
+ *
+ * Replace the state with an already registered equivalent state in
+ * the automaton, or register it if no such state exists yet.
+ *
+ * @param state Pointer to state to be replaced or registered.
+ */
+ void replaceOrRegister(State* state);
+
+ /**
+ * @brief Add new states for a suffix.
+ *
+ * Add the necessary new states for a suffix of an input string. The
+ * suffix is that part of an input string which is not covered by
+ * the common path.
+ *
+ * @param state Pointer to last state in the common path.
+ * @param suffix Suffix.
+ * @param b Data item associated with the input.
+ */
+ void addSuffix(State* state, const char *suffix, const Blob *b=NULL);
+
+ /**
+ * @brief Clean up data structures and release memory.
+ */
+ void cleanUp();
+
+public:
+
+ /**
+ * @brief Default constructor.
+ */
+ Automaton() :
+ _register(NULL),
+ _blob_register(),
+ _q0(NULL),
+ _queue(NULL),
+ _finalized(false),
+ _packed()
+ { }
+
+ /**
+ * @brief Destructor.
+ */
+ ~Automaton();
+
+ /**
+ * @brief Initialize the object.
+ */
+ void init();
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ */
+ void insertSortedString(const std::string &input);
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ * @param meta Meta info string to be stored in data item).
+ */
+ void insertSortedString(const std::string &input, const std::string &meta);
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ * @param b Reference to data item.
+ */
+ void insertSortedString(const char *input, const Blob &b);
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ * @param b Pointer to data item.
+ */
+ void insertSortedString(const char *input, const Blob *b=NULL);
+
+ /**
+ * @brief Finalize the automaton.
+ *
+ * Finalize the automaton. This involves calling replaceOrRegister
+ * for the start state _q0, and building the packed automaton, so no
+ * strings can be added to the automaton after this method is
+ * called.
+ */
+ void finalize();
+
+ /**
+ * @brief Add perfect hash to automaton.
+ *
+ * Compute and add perfect hash structure to the automaton. Only
+ * works on finalized automata.
+ */
+ void addPerfectHash();
+
+ /**
+ * @brief Write the finalized automaton to file.
+ *
+ * @param file Name of the file.
+ * @param serial Serial number.
+ * @return True on success.
+ */
+ bool write(const char *file, uint32_t serial = 0);
+
+ /**
+ * @brief Write the finalized automaton to file.
+ *
+ * @param file Name of the file.
+ * @param serial Serial number.
+ * @return True on success.
+ */
+ bool write(const std::string &file, uint32_t serial = 0)
+ {
+ return write(file.c_str(),serial);
+ }
+
+ /**
+ * @brief Create an FSA object from the automaton.
+ *
+ * Create an FSA object from the automaton. The Automaton and
+ * PackedAutomaton is implicitly reset.
+ *
+ * @return Pointer to a newly created FSA object. The caller is
+ * responsible for freeing it.
+ */
+ FSA* getFSA(void);
+
+};
+// }}}
+
+template<typename _Tp>
+ inline bool
+ operator==(const Automaton::MMapArenaAllocator<_Tp>&, const Automaton::MMapArenaAllocator<_Tp>&)
+ { return true; }
+
+template<typename _Tp>
+ inline bool
+ operator!=(const Automaton::MMapArenaAllocator<_Tp>&, const Automaton::MMapArenaAllocator<_Tp>&)
+ { return false; }
+
+} // namespace fsa
+
diff --git a/fsa/src/libfsamanagers/.gitignore b/fsa/src/libfsamanagers/.gitignore
new file mode 100644
index 00000000000..9fb98574200
--- /dev/null
+++ b/fsa/src/libfsamanagers/.gitignore
@@ -0,0 +1,6 @@
+*.la
+*.lo
+.deps
+.libs
+Makefile
+Makefile.in
diff --git a/fsa/src/main/java/com/yahoo/fsa/FSA.java b/fsa/src/main/java/com/yahoo/fsa/FSA.java
new file mode 100644
index 00000000000..6e352f3ddca
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/FSA.java
@@ -0,0 +1,636 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.CharBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.charset.Charset;
+import java.util.NoSuchElementException;
+
+
+/**
+ * Finite-State Automaton.
+ *
+ * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ */
+public class FSA {
+
+ /**
+ * Thread local state object used to traverse a Finite-State Automaton.
+ */
+ public static class State {
+
+ FSA fsa;
+ int state = 0;
+ int hash = 0;
+
+ private State(FSA fsa) {
+ this.fsa = fsa;
+ start();
+ }
+
+ public void start(){
+ state = fsa.start();
+ hash = 0;
+ }
+
+ public void delta(byte symbol) {
+ hash += fsa.hashDelta(state,symbol);
+ state = fsa.delta(state,symbol);
+ }
+
+ /** Returns whether the given symbol would take us to a valid state, without changing the state */
+ public boolean peekDelta(byte symbol) {
+ return fsa.delta(state,symbol)!=0;
+ }
+
+ public boolean tryDelta(byte symbol) {
+ int lastHash=hash;
+ int lastState=state;
+ delta(symbol);
+ if (isValid()) return true;
+
+ hash=lastHash;
+ state=lastState;
+ return false;
+ }
+
+ public void delta(char chr){
+ CharBuffer chrbuf = CharBuffer.allocate(1);
+ chrbuf.put(0,chr);
+ ByteBuffer buf = fsa.encode(chrbuf);
+ while(state >0 && buf.position()<buf.limit()){
+ delta(buf.get());
+ }
+ }
+
+ /** Jumps ahead by string */
+ public void delta(String string){
+ ByteBuffer buf = fsa.encode(string);
+ while(state >0 && buf.position()<buf.limit()){
+ delta(buf.get());
+ }
+ }
+
+ /**
+ * Jumps ahead by string if that puts us into a valid state, does nothing otherwise
+ *
+ * @return whether we jumped to a valid state (true) or di nothing (false)
+ */
+ public boolean tryDelta(String string){
+ int lastHash=hash;
+ int lastState=state;
+ delta(string);
+ if (isValid()) return true;
+
+ hash=lastHash;
+ state=lastState;
+ return false;
+ }
+
+ /** Jumps ahead by a word - if this is not the first word, it must be preceeded by space. */
+ public void deltaWord(String string){
+ if (state!=fsa.start()) {
+ delta((byte)' ');
+ }
+ delta(string);
+ }
+
+ /**
+ * Tries to jump ahead by one word. If the given string is not the next complete valid word, nothing is done.
+ */
+ public boolean tryDeltaWord(String string){
+ int lastHash=hash;
+ int lastState=state;
+ tryDelta((byte)' ');
+ delta(string);
+ if (isValid() && peekDelta((byte)' ')) return true;
+ if (isFinal()) return true;
+
+ hash=lastHash;
+ state=lastState;
+ return false;
+ }
+
+ public boolean isFinal(){
+ return fsa.isFinal(state);
+ }
+
+ public boolean isStartState() {
+ return fsa.start() == state;
+ }
+
+ public boolean isValid(){
+ return state !=0;
+ }
+
+ public ByteBuffer data(){
+ return fsa.data(state);
+ }
+
+ public String dataString(){
+ return fsa.dataString(state);
+ }
+
+ public int hash(){
+ return hash;
+ }
+
+ public ByteBuffer lookup(String str){
+ start();
+ delta(str);
+ return fsa.data(state);
+ }
+
+ public boolean hasPerfectHash(){
+ return fsa.hasPerfectHash();
+ }
+
+ }
+
+ /**
+ * Class used to iterate over all accepted strings in the fsa.
+ */
+ public static class Iterator implements java.util.Iterator<Iterator.Item> {
+ /**
+ * Internally, this class stores the state information for the iterator.
+ * Externally, it is used for accessing the data associated with the iterator position.
+ */
+ public static class Item {
+ private FSA fsa;
+ private java.util.Stack<Byte> string;
+ private int symbol;
+ private int state;
+ private java.util.Stack<Integer> stack;
+
+ /**
+ * Constructor
+ * @param fsa the FSA object the iterator is associated with.
+ * @param state the state used as start state.
+ */
+ public Item(FSA fsa, int state) {
+ this.fsa = fsa;
+ this.string = new java.util.Stack();
+ this.symbol = 0;
+ this.state = state;
+ this.stack = new java.util.Stack();
+ }
+
+ /**
+ * Copy constructor. (Does not copy the state stack)
+ */
+ public Item(Item item) {
+ this.fsa = item.fsa;
+ this.string = new java.util.Stack();
+ for (java.util.Iterator<Byte> itr = item.string.iterator(); itr.hasNext(); ) {
+ byte b = itr.next();
+ this.string.push(b);
+ }
+ this.symbol = item.symbol;
+ this.state = item.state;
+ // no need to fill the stack as this constructor is used by Iterator::next()
+ this.stack = null;
+ }
+
+ public String getString() {
+ ByteBuffer buffer = ByteBuffer.allocate(string.size());
+ for (java.util.Iterator<Byte> itr = string.iterator(); itr.hasNext(); ) {
+ byte b = itr.next();
+ buffer.put(b);
+ }
+ buffer.flip();
+ return fsa.decode(buffer);
+ }
+
+ public ByteBuffer getData() {
+ return fsa.data(state);
+ }
+
+ public String getDataString() {
+ return fsa.dataString(state);
+ }
+
+ public String toString() {
+ return "string: " + string + "(" + getString() + "), symbol: " + symbol + ", state: " + state;
+ }
+ }
+
+ private Item item;
+ boolean useInitState = false;
+
+ /**
+ * Constructor.
+ * @param state the state to create the iterator from.
+ */
+ public Iterator(State state) {
+ item = new Item(state.fsa, state.state);
+ if (state.isFinal()) {
+ useInitState = true;
+ } else {
+ findNext();
+ }
+ }
+
+ private void findNext() {
+ int nextState;
+ int depth;
+
+ if (item.symbol == 256 || item.fsa == null) {
+ throw new NoSuchElementException();
+ }
+
+ // flip the flag now that the first state has been returned
+ if (useInitState) {
+ useInitState = false;
+ }
+
+ // try to find the next final state
+ for(;;) {
+ item.symbol++;
+ if (item.symbol < 256) {
+ byte symbol = (byte)item.symbol;
+ nextState = item.fsa.delta(item.state, (byte)item.symbol);
+ if (nextState != 0) {
+ item.string.push((byte)item.symbol);
+ item.stack.push(item.state);
+ item.state = nextState;
+ item.symbol = 0;
+ if (item.fsa.isFinal(nextState)) {
+ break;
+ }
+ }
+ } else { // backtrack
+ if ((depth = item.string.size()) > 0) {
+ byte b = item.string.pop(); // remove the last byte
+ item.symbol = b < 0 ? b + 256 : b;
+ item.state = item.stack.pop();
+ } else {
+ item.state = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ public boolean hasNext() {
+ return item.state != 0 || useInitState;
+ }
+
+ public Item next() {
+ Item retval = new Item(item);
+ findNext();
+ return retval;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public State getState(){
+ return new State(this);
+ }
+
+ /**
+ * Returns a new iterator to the start state.
+ */
+ public Iterator iterator() {
+ return new Iterator(getState());
+ }
+
+ /**
+ * Returns a new iterator to the given state.
+ * @param state the state to create the iterator from.
+ */
+ public Iterator iterator(State state) {
+ return new Iterator(state);
+ }
+
+ private boolean _ok = false;
+ private MappedByteBuffer _header;
+ private MappedByteBuffer _symbol_tab;
+ private MappedByteBuffer _state_tab;
+ private MappedByteBuffer _data;
+ private MappedByteBuffer _phash;
+ private Charset _charset;
+
+ /**
+ * Loads an FSA from a resource file name, which is resolved from the class path of the
+ * class loader of the given class.
+ * <p>
+ * This is useful for loading fsa's deployed within OSGi bundles.
+ *
+ * @param resourceFileName the name of the file, relative to any element on the classpath.
+ * For example, if the classpath contains resources/ and the file is resources/myfsa.fsa
+ * this argument should be myfsa.fsa
+ * @param loadingClass a class which provides the class loader to use for loading. Any class which is loaded
+ * from the same class path as the resource will do (e.g with OSGi - any class in the same bundle jar)
+ * @return the loaded FSA
+ * @throws RuntimeException if the class could not be loaded
+ */
+ public static FSA loadFromResource(String resourceFileName,Class loadingClass) {
+ URL fsaUrl=loadingClass.getResource(resourceFileName);
+ if ( ! "file".equals(fsaUrl.getProtocol()))
+ throw new RuntimeException("Could not open non-file url '" + fsaUrl + "' as a file input stream: " +
+ "The classloader of " + loadingClass + "' does not return file urls");
+ return new FSA(fsaUrl.getFile());
+ }
+
+ /**
+ * Loads an FSA from a file using utf-8 encoding
+ *
+ * @throws IllegalArgumentException if the file is not found
+ */
+ public FSA(String filename) {
+ init(filename,"utf-8");
+ }
+
+ /**
+ * Loads an FSA from a file using the specified character encoding.
+ *
+ * @throws IllegalArgumentException if the file is not found
+ */
+ public FSA(String filename, String charsetname) {
+ init(filename,charsetname);
+ }
+
+ /** Loads an FSA from a file input stream using utf-8 encoding */
+ public FSA(FileInputStream filename) {
+ init(filename,"utf-8");
+ }
+
+ /** Loads an FSA from a file input stream using the specified character encoding */
+ public FSA(FileInputStream filename, String charsetname) {
+ init(filename,charsetname);
+ }
+
+ private void init(String filename, String charsetname){
+ try {
+ init(new FileInputStream(filename),charsetname);
+ }
+ catch (FileNotFoundException e) {
+ throw new IllegalArgumentException("Could not find FSA file '" + filename + "'",e);
+ }
+ catch (IOException e) {
+ throw new IllegalArgumentException("Could not read FSA file '" + filename + "'",e);
+ }
+ }
+
+ private void init(FileInputStream file, String charsetname) {
+ try {
+ _charset = Charset.forName(charsetname);
+
+ _header = file.getChannel().map(MapMode.READ_ONLY,0,256);
+ _header.order(ByteOrder.LITTLE_ENDIAN);
+ if (h_magic()!=2038637673) {
+ throw new IOException("Stream does not contain an FSA: Wrong file magic number " + h_magic());
+ }
+ _symbol_tab = file.getChannel().map(MapMode.READ_ONLY,
+ 256,h_size());
+ _symbol_tab.order(ByteOrder.LITTLE_ENDIAN);
+ _state_tab = file.getChannel().map(MapMode.READ_ONLY,
+ 256+h_size(),4*h_size());
+ _state_tab.order(ByteOrder.LITTLE_ENDIAN);
+ _data = file.getChannel().map(MapMode.READ_ONLY,
+ 256+5*h_size(),h_data_size());
+ _data.order(ByteOrder.LITTLE_ENDIAN);
+ if(h_has_phash()>0){
+ _phash = file.getChannel().map(MapMode.READ_ONLY,
+ 256+5*h_size()+h_data_size(),
+ 4*h_size());
+ _phash.order(ByteOrder.LITTLE_ENDIAN);
+ }
+ _ok=true;
+ }
+ catch (IOException e) {
+ throw new RuntimeException("IO error while reading FSA file",e);
+ }
+ }
+
+ private int h_magic(){
+ return _header.getInt(0);
+ }
+ private int h_version(){
+ return _header.getInt(4);
+ }
+ private int h_checksum(){
+ return _header.getInt(8);
+ }
+ private int h_size(){
+ return _header.getInt(12);
+ }
+ private int h_start(){
+ return _header.getInt(16);
+ }
+ private int h_data_size(){
+ return _header.getInt(20);
+ }
+ private int h_data_type(){
+ return _header.getInt(24);
+ }
+ private int h_fixed_data_size(){
+ return _header.getInt(28);
+ }
+ private int h_has_phash(){
+ return _header.getInt(32);
+ }
+ private int h_serial(){
+ return _header.getInt(36);
+ }
+ private int getSymbol(int index){
+ int symbol = _symbol_tab.get(index);
+ if(symbol<0){
+ symbol += 256;
+ }
+ return symbol;
+ }
+
+ private ByteBuffer encode(String str){
+ return _charset.encode(str);
+ }
+
+ private ByteBuffer encode(CharBuffer chrbuf){
+ return _charset.encode(chrbuf);
+ }
+
+ private String decode(ByteBuffer buf){
+ return _charset.decode(buf).toString();
+ }
+
+ public boolean isOk(){
+ return _ok;
+ }
+
+ public boolean hasPerfectHash(){
+ return _ok && h_has_phash()==1;
+ }
+
+ public int version(){
+ if(_ok){
+ return h_version();
+ }
+ return 0;
+ }
+
+ public int serial(){
+ if(_ok){
+ return h_serial();
+ }
+ return 0;
+ }
+
+ protected int start(){
+ if(_ok){
+ return h_start();
+ }
+
+ return 0;
+ }
+
+ protected int delta(int state, byte symbol){
+ int s=symbol;
+ if(s<0){
+ s+=256;
+ }
+ if(_ok && s>0 && s<255){
+ if(getSymbol(state+s)==s){
+ return _state_tab.getInt(4*(state+s));
+ }
+ }
+ return 0;
+ }
+
+ protected int hashDelta(int state, byte symbol){
+ int s=symbol;
+ if(s<0){
+ s+=256;
+ }
+ if(_ok && h_has_phash()==1 && s>0 && s<255){
+ if(getSymbol(state+s)==s){
+ return _phash.getInt(4*(state+s));
+ }
+ }
+ return 0;
+ }
+
+ protected boolean isFinal(int state){
+ if(_ok){
+ if(getSymbol(state+255)==255){
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Retrieves data for the given state using the underlying fsa data buffer.
+ * @param state The fsa state to retrieve data from.
+ * @return A new buffer containing the data for the given state.
+ **/
+ protected ByteBuffer data(int state) {
+ if(_ok && isFinal(state)){
+ int offset = _state_tab.getInt(4*(state+255));
+ int length;
+ if(h_data_type()==1){
+ length = h_fixed_data_size();
+ }
+ else{
+ length = _data.getInt(offset);
+ offset += 4;
+ }
+ ByteBuffer meta = ByteBuffer.allocate(length);
+ meta.order(ByteOrder.LITTLE_ENDIAN);
+ byte[] dst = meta.array();
+ for (int i = 0; i < length; ++i) {
+ dst[i] = _data.get(i + offset);
+ }
+ return meta;
+ }
+ return null;
+ }
+
+ /**
+ * Retrieves data for the given state using the underlying fsa data buffer.
+ * @param state The fsa state to retrieve data from.
+ * @return A string representation of the data for the given state.
+ **/
+ protected String dataString(int state) {
+ ByteBuffer meta = data(state);
+ if(meta!=null){
+ // Remove trailing '\0' if it exists. This is usually the
+ // case for automata built with text format (makefsa -t)
+ String data = decode(meta);
+ if (data.endsWith("\0")) {
+ data = data.substring(0, data.length()-1);
+ }
+ return data;
+ }
+ return null;
+ }
+
+ /**
+ * Convenience method that returns the metadata string in the fsa
+ * for the input lookup String, or null if the input string does
+ * not exist in the fsa.
+ * @param str The string to look up.
+ * @return Metadata string from the fsa. */
+ public String lookup(String str){
+ State s = getState();
+ s.lookup(str);
+ return s.dataString();
+ }
+
+
+ //// test ////
+ public static void main(String[] args) {
+ String test = "sour cherry";
+ if (args.length >= 1) {
+ test = args[0];
+ }
+
+ String fsafile = "/home/gv/fsa/test/__testfsa__.__fsa__";
+ //String fsafile = "/home/p13n/prelude/automata/query2dmozsegments.fsa";
+
+ FSA fsa = new FSA(fsafile);
+
+ System.out.println("Loading FSA file "+fsafile+": "+fsa.isOk());
+ System.out.println(" version: " + fsa.version()/1000000 + "." +
+ (fsa.version()/1000) % 1000 + "." +
+ fsa.version() % 1000);
+ System.out.println(" serial: " + fsa.serial());
+ System.out.println(" phash: " + fsa.hasPerfectHash());
+
+ FSA.State s = fsa.getState();
+
+ s.start();
+ for (int i=0; i < test.length(); i++) {
+ s.delta(test.charAt(i));
+ }
+ System.out.println("\ndelta() char test " + test + ": " +
+ s.isFinal() + ", info: " + s.dataString() +
+ ", hash value: " + s.hash());
+
+ s.start();
+ s.delta(test);
+ System.out.println("\ndelta() test " + test + ": " +
+ s.isFinal() + ", info: " + s.dataString() +
+ ", hash value: " + s.hash());
+
+ s.lookup(test);
+ String data = s.dataString();
+ System.out.println("\nlookup() test \"" + test + "\": " +
+ (s.lookup(test) != null) +
+ ", info: " + data + ", hash value: " + s.hash());
+
+ String data2 = fsa.lookup(test);
+ System.out.println("\nFSA.lookup() test \"" + test + "\": " + data2);
+ }
+}
+
+
diff --git a/fsa/src/main/java/com/yahoo/fsa/MetaData.java b/fsa/src/main/java/com/yahoo/fsa/MetaData.java
new file mode 100644
index 00000000000..fde868464c8
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/MetaData.java
@@ -0,0 +1,217 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.charset.Charset;
+
+import com.yahoo.fsa.FSA;
+
+
+/**
+ * Class for accessing meta-data (dat-files) used by FSA applications.
+ *
+ * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ **/
+public class MetaData {
+
+ private boolean _ok = false;
+ private MappedByteBuffer _header;
+ private MappedByteBuffer _data;
+ private Charset _charset;
+
+
+ public MetaData(String filename){
+ init(filename, "utf-8");
+ }
+
+ public MetaData(String filename, String charsetname){
+ init(filename, charsetname);
+ }
+
+ public boolean isOk(){
+ return _ok;
+ }
+
+ private void init(String filename, String charsetname){
+
+ _charset = Charset.forName(charsetname);
+
+ FileInputStream file;
+ try {
+ file = new FileInputStream(filename);
+ }
+ catch (FileNotFoundException e) {
+ System.out.print("MetaData file " + filename + " not found.\n");
+ return;
+ }
+
+ try {
+ _header = file.getChannel().map(MapMode.READ_ONLY,0,256);
+ _header.order(ByteOrder.LITTLE_ENDIAN);
+ if(h_magic()!=-2025936501){
+ System.out.print("MetaData bad magic " + h_magic() +"\n");
+ return;
+ }
+ _data = file.getChannel().map(MapMode.READ_ONLY,
+ 256,
+ h_size());
+ _data.order(ByteOrder.LITTLE_ENDIAN);
+ _ok=true;
+ }
+ catch (IOException e) {
+ System.out.print("MetaData IO exception.\n");
+ return;
+ }
+ }
+
+ private int h_magic(){
+ return _header.getInt(0);
+ }
+ private int h_version(){
+ return _header.getInt(4);
+ }
+ private int h_checksum(){
+ return _header.getInt(8);
+ }
+ private int h_size(){
+ return _header.getInt(12);
+ }
+ private int h_reserved(int i){
+ if(i<0||i>9){
+ return 0;
+ }
+ return _header.getInt(16+4*i);
+ }
+ private int h_user(int i){
+ if(i<0||i>49){
+ return 0;
+ }
+ return _header.getInt(56+4*i);
+ }
+
+
+ private ByteBuffer encode(CharBuffer chrbuf){
+ return _charset.encode(chrbuf);
+ }
+
+ private String decode(ByteBuffer buf){
+ return _charset.decode(buf).toString();
+ }
+
+
+ public int user(int i){
+ if(!_ok){
+ return 0;
+ }
+ return h_user(i);
+ }
+
+ public int getIntEntry(int idx)
+ {
+ if(_ok){
+ return _data.getInt(idx*4);
+ }
+ else
+ return 0;
+ }
+
+ public ByteBuffer getDirectRecordEntry(int idx, int size)
+ {
+ if(_ok){
+ ByteBuffer meta = ByteBuffer.allocate(size);
+ meta.order(ByteOrder.LITTLE_ENDIAN);
+ _data.position(idx*size);
+ _data.get(meta.array(),0,size);
+ return meta;
+ }
+ else
+ return null;
+ }
+
+ public ByteBuffer getIndirectRecordEntry(int idx, int size)
+ {
+ if(_ok){
+ int offset = _data.getInt(idx*4);
+ ByteBuffer meta = ByteBuffer.allocate(size);
+ meta.order(ByteOrder.LITTLE_ENDIAN);
+ _data.position(offset);
+ _data.get(meta.array(),0,size);
+ return meta;
+ }
+ else
+ return null;
+ }
+
+ public ByteBuffer getIndirectRecordEntry(int idx)
+ {
+ if(_ok){
+ int offset = _data.getInt(idx*4);
+ int size = _data.getInt(offset);
+ ByteBuffer meta = ByteBuffer.allocate(size);
+ meta.order(ByteOrder.LITTLE_ENDIAN);
+ _data.position(offset+4);
+ _data.get(meta.array(),0,size);
+ return meta;
+ }
+ else
+ return null;
+ }
+
+ public String getStringEntry(int stringOffset){
+ if(_ok){
+ int length = 0;
+ _data.position(stringOffset);
+ while(_data.get()!=0){
+ length++;
+ }
+ ByteBuffer meta = ByteBuffer.allocate(length);
+ meta.order(ByteOrder.LITTLE_ENDIAN);
+ _data.position(stringOffset);
+ _data.get(meta.array(),0,length);
+ return decode(meta);
+ }
+ return null;
+ }
+
+ public String[] getStringArrayEntry(int stringOffset, int numStrings){
+ if(_ok && numStrings>0){
+ String[] stringArray = new String[numStrings];
+ int pos=stringOffset;
+ for(int i=0;i<numStrings;i++){
+ int length = 0;
+ _data.position(pos);
+ while(_data.get()!=0){
+ length++;
+ }
+ ByteBuffer meta = ByteBuffer.allocate(length);
+ meta.order(ByteOrder.LITTLE_ENDIAN);
+ _data.position(pos);
+ _data.get(meta.array(),0,length);
+ stringArray[i] = decode(meta);
+ pos += length+1;
+ }
+ return stringArray;
+ }
+ return null;
+ }
+
+ //// test ////
+ public static void main(String[] args) {
+ String file = "dmozPred_2.dat";
+
+ MetaData metaData = new MetaData(file);
+
+ System.out.println("Loading MetaData "+file+": "+metaData.isOk());
+ }
+
+
+
+}
diff --git a/fsa/src/main/java/com/yahoo/fsa/conceptnet/ConceptNet.java b/fsa/src/main/java/com/yahoo/fsa/conceptnet/ConceptNet.java
new file mode 100644
index 00000000000..13cb93073d2
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/conceptnet/ConceptNet.java
@@ -0,0 +1,384 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.conceptnet;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.charset.Charset;
+
+import com.yahoo.fsa.FSA;
+
+
+/**
+ * Class for accessing the concept network automata.
+ *
+ * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ **/
+public class ConceptNet {
+
+ private FSA _fsa;
+ private boolean _ok = false;
+ private MappedByteBuffer _header;
+ private MappedByteBuffer _index;
+ private MappedByteBuffer _info;
+ private MappedByteBuffer _catindex;
+ private MappedByteBuffer _strings;
+ private Charset _charset;
+
+
+ public ConceptNet(String domain){
+ init(domain, "utf-8");
+ }
+
+ public ConceptNet(String domain, String charsetname){
+ init(domain, charsetname);
+ }
+
+ public boolean isOk(){
+ return _ok;
+ }
+
+ private void init(String domain, String charsetname){
+
+ _charset = Charset.forName(charsetname);
+
+ _fsa = new FSA(domain + ".fsa",charsetname);
+
+ if(!_fsa.isOk()){
+ return;
+ }
+
+ FileInputStream file;
+ try {
+ file = new FileInputStream(domain + ".dat");
+ }
+ catch (FileNotFoundException e) {
+ System.out.print("ConceptNet data file " + domain + ".dat" + " not found.\n");
+ return;
+ }
+
+ try {
+ _header = file.getChannel().map(MapMode.READ_ONLY,0,256);
+ _header.order(ByteOrder.LITTLE_ENDIAN);
+ if(h_magic()!=238579428){
+ System.out.print("ConceptNet bad magic " + h_magic() +"\n");
+ return;
+ }
+ _index = file.getChannel().map(MapMode.READ_ONLY,
+ 256,
+ 8*4*h_index_size());
+ _index.order(ByteOrder.LITTLE_ENDIAN);
+ _info = file.getChannel().map(MapMode.READ_ONLY,
+ 256+8*4*h_index_size(),
+ 4*h_info_size());
+ _info.order(ByteOrder.LITTLE_ENDIAN);
+ _catindex = file.getChannel().map(MapMode.READ_ONLY,
+ 256+8*4*h_index_size()+4*h_info_size(),
+ 4*h_catindex_size());
+ _catindex.order(ByteOrder.LITTLE_ENDIAN);
+ _strings = file.getChannel().map(MapMode.READ_ONLY,
+ 256+8*4*h_index_size()+4*h_info_size()+4*h_catindex_size(),
+ h_strings_size());
+ _strings.order(ByteOrder.LITTLE_ENDIAN);
+ _ok=true;
+ }
+ catch (IOException e) {
+ System.out.print("ConceptNet IO exception.\n");
+ return;
+ }
+ }
+
+ private int h_magic(){
+ return _header.getInt(0);
+ }
+ private int h_version(){
+ return _header.getInt(4);
+ }
+ private int h_checksum(){
+ return _header.getInt(8);
+ }
+ private int h_index_size(){
+ return _header.getInt(12);
+ }
+ private int h_info_size(){
+ return _header.getInt(16);
+ }
+ private int h_catindex_size(){
+ return _header.getInt(20);
+ }
+ private int h_strings_size(){
+ return _header.getInt(24);
+ }
+ private int h_max_freq(){
+ return _header.getInt(28);
+ }
+ private int h_max_cfreq(){
+ return _header.getInt(32);
+ }
+ private int h_max_qfreq(){
+ return _header.getInt(36);
+ }
+ private int h_max_sfreq(){
+ return _header.getInt(40);
+ }
+ private int h_max_efreq(){
+ return _header.getInt(44);
+ }
+ private int h_max_afreq(){
+ return _header.getInt(48);
+ }
+
+
+ private ByteBuffer encode(CharBuffer chrbuf){
+ return _charset.encode(chrbuf);
+ }
+
+ private String decode(ByteBuffer buf){
+ return _charset.decode(buf).toString();
+ }
+
+ public int lookup(String unit)
+ {
+ FSA.State state = _fsa.getState();
+ // state.start(); // getState does this for us
+ state.delta(unit);
+ if(state.isFinal()){
+ return state.hash();
+ }
+ return -1;
+ }
+
+ public String lookup(int idx)
+ {
+ if(!_ok || idx<0 || idx>=h_index_size()){
+ return null;
+ }
+ int termoffset = _index.getInt(4*8*idx);
+ return getString(termoffset);
+ }
+
+ private String getString(int stringOffset){
+ if(_ok){
+ int length = 0;
+ _strings.position(stringOffset);
+ while(_strings.get()!=0){
+ length++;
+ }
+ ByteBuffer meta = ByteBuffer.allocate(length);
+ _strings.position(stringOffset);
+ _strings.get(meta.array(),0,length);
+ return decode(meta);
+ }
+ return null;
+ }
+
+ public int frq(int idx)
+ {
+ if(!_ok || idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ return _index.getInt(4*8*idx+4);
+ }
+
+ public int cFrq(int idx)
+ {
+ if(!_ok || idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ return _index.getInt(4*8*idx+8);
+ }
+
+ public int qFrq(int idx)
+ {
+ if(!_ok || idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ return _index.getInt(4*8*idx+12);
+ }
+
+ public int sFrq(int idx)
+ {
+ if(!_ok || idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ return _index.getInt(4*8*idx+16);
+ }
+
+ public double score(int idx)
+ {
+ if(!_ok || idx<0 || idx>=h_index_size()){
+ return -1.0;
+ }
+ return 100.0*cFrq(idx)/qFrq(idx);
+ }
+
+ public double strength(int idx)
+ {
+ if(!_ok || idx<0 || idx>=h_index_size()){
+ return -1.0;
+ }
+ return 100.0*qFrq(idx)/sFrq(idx);
+ }
+
+ public int numExt(int idx)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+20);
+ if(offset==0){
+ return 0;
+ }
+ return _info.getInt(4*offset);
+ }
+
+ public int ext(int idx, int i)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+20);
+ if(offset==0){
+ return -1;
+ }
+ if(i>=_info.getInt(4*offset)){
+ return -1;
+ }
+ return _info.getInt(4*offset+4+8*i);
+ }
+
+ public int extFrq(int idx, int i)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+20);
+ if(offset==0){
+ return -1;
+ }
+ if(i>=_info.getInt(4*offset)){
+ return -1;
+ }
+ return _info.getInt(4*offset+8+8*i);
+ }
+
+ public int numAssoc(int idx)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+24);
+ if(offset==0){
+ return 0;
+ }
+ return _info.getInt(4*offset);
+ }
+
+ public int assoc(int idx, int i)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+24);
+ if(offset==0){
+ return -1;
+ }
+ if(i>=_info.getInt(4*offset)){
+ return -1;
+ }
+ return _info.getInt(4*offset+4+8*i);
+ }
+
+ public int assocFrq(int idx, int i)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+24);
+ if(offset==0){
+ return -1;
+ }
+ if(i>=_info.getInt(4*offset)){
+ return -1;
+ }
+ return _info.getInt(4*offset+8+8*i);
+ }
+
+ public int numCat(int idx)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+28);
+ if(offset==0){
+ return 0;
+ }
+ return _info.getInt(4*offset);
+ }
+
+ public int cat(int idx, int i)
+ {
+ if(idx<0 || idx>=h_index_size()){
+ return -1;
+ }
+ int offset = _index.getInt(4*8*idx+28);
+ if(offset==0){
+ return -1;
+ }
+ if(i>=_info.getInt(4*offset)){
+ return -1;
+ }
+ return _info.getInt(4*offset+4+8*i);
+ }
+
+ public String catName(int catidx)
+ {
+ if(!_ok || catidx<0 || catidx>=h_catindex_size()){
+ return null;
+ }
+ int catoffset = _catindex.getInt(4*catidx);
+ return getString(catoffset);
+ }
+
+ //// test ////
+ public static void main(String[] args) {
+ String domain = "/home/gv/fsa/automata/us_main_20041002_20041008";
+
+ ConceptNet cn = new ConceptNet(domain);
+
+ System.out.println("Loading ConceptNet domain "+domain+": "+cn.isOk());
+ int idx = cn.lookup("new york");
+ System.out.println(" lookup(\"new york\") -> "+idx);
+ System.out.println(" lookup("+idx+") -> "+cn.lookup(idx)+"("+cn.score(idx)+","+cn.strength(idx)+")");
+ System.out.println(" extensions("+cn.numExt(idx)+"):");
+ for(int i=0;i<5 && i<cn.numExt(idx);i++){
+ System.out.println(" "+cn.lookup(cn.ext(idx,i))+","+cn.extFrq(idx,i));
+ }
+ if(5<cn.numExt(idx)){
+ System.out.println(" ...");
+ }
+ System.out.println(" associations("+cn.numAssoc(idx)+"):");
+ for(int i=0;i<5 && i<cn.numAssoc(idx);i++){
+ System.out.println(" "+cn.lookup(cn.assoc(idx,i))+","+cn.assocFrq(idx,i));
+ }
+ if(5<cn.numAssoc(idx)){
+ System.out.println(" ...");
+ }
+ System.out.println(" categories("+cn.numCat(idx)+"):");
+ for(int i=0;i<5 && i<cn.numCat(idx);i++){
+ System.out.println(" "+cn.catName(cn.cat(idx,i)));
+ }
+ if(5<cn.numCat(idx)){
+ System.out.println(" ...");
+ }
+ }
+
+
+
+}
diff --git a/fsa/src/main/java/com/yahoo/fsa/package-info.java b/fsa/src/main/java/com/yahoo/fsa/package-info.java
new file mode 100644
index 00000000000..94c7fd30603
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/package-info.java
@@ -0,0 +1,7 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+@PublicApi
+package com.yahoo.fsa;
+
+import com.yahoo.api.annotations.PublicApi;
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java
new file mode 100644
index 00000000000..1e424372a66
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segment.java
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.segmenter;
+
+/**
+ * Class encapsulation of a segment.
+ *
+ * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ */
+public class Segment {
+
+ int _beg;
+ int _end;
+ int _conn;
+
+ public Segment(int b, int e, int c)
+ {
+ _beg = b;
+ _end = e;
+ _conn = c;
+ }
+
+ public int beg()
+ {
+ return _beg;
+ }
+
+ public int end()
+ {
+ return _end;
+ }
+
+ public int len()
+ {
+ return _end-_beg;
+ }
+
+ public int conn()
+ {
+ return _conn;
+ }
+
+}
diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java
new file mode 100644
index 00000000000..80ccd791644
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segmenter.java
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.segmenter;
+
+import java.util.LinkedList;
+import java.util.ListIterator;
+
+import com.yahoo.fsa.FSA;
+
+/**
+ * API for accessing the Segmenter automata.
+ *
+ * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ */
+public class Segmenter {
+
+ private FSA _fsa;
+
+ public Segmenter(FSA fsa) {
+ _fsa = fsa;
+ }
+
+ public Segmenter(String filename) {
+ _fsa = new FSA(filename,"utf-8");
+ }
+
+ public Segmenter(String filename, String charsetname) {
+ _fsa = new FSA(filename,charsetname);
+ }
+
+ public boolean isOk()
+ {
+ return _fsa.isOk();
+ }
+
+ public Segments segment(String input)
+ {
+ String[] tokens = input.split("\\s");
+ return segment(tokens);
+ }
+
+ private class Detector {
+ FSA.State _state;
+ int _index;
+
+ public Detector(FSA.State s, int i)
+ {
+ _state = s;
+ _index = i;
+ }
+
+ public FSA.State state()
+ {
+ return _state;
+ }
+
+ public int index()
+ {
+ return _index;
+ }
+ }
+
+ public Segments segment(String[] tokens)
+ {
+ Segments segments = new Segments(tokens);
+ LinkedList detectors = new LinkedList();
+
+ int i=0;
+
+
+ while(i<tokens.length){
+ detectors.add(new Detector(_fsa.getState(),i));
+
+ ListIterator det_it = detectors.listIterator();
+ while(det_it.hasNext()){
+ Detector d = (Detector)det_it.next();
+ d.state().deltaWord(tokens[i]);
+ if(d.state().isFinal()){
+ segments.add(new Segment(d.index(),i+1,d.state().data().getInt(0)));
+ }
+
+ if(!d.state().isValid()){
+ det_it.remove();
+ }
+ }
+ i++;
+ }
+
+ return segments;
+ }
+
+ //// test ////
+ public static void main(String[] args) {
+ String fsafile = "/home/gv/fsa/automata/segments.fsa";
+
+ Segmenter segmenter = new Segmenter(fsafile);
+
+ System.out.println("Loading segmenter FSA file "+fsafile+": "+segmenter.isOk());
+
+ for(int a=0;a<1||a<args.length;a++){
+
+ String query;
+ if(a==args.length){
+ query = "times square head";
+ }
+ else {
+ query = args[a];
+ }
+ System.out.println("processing query \""+query+"\"");
+
+ Segments segments = segmenter.segment(query);
+ System.out.println("all segments:");
+ for(int i=0; i<segments.size();i++){
+ System.out.println(" "+i+": \""+segments.sgm(i)+"\","+segments.conn(i));
+ }
+
+ Segments best;
+
+ best = segments.segmentation(Segments.SEGMENTATION_WEIGHTED);
+ System.out.print("best segments (weighted): ");
+ for(int i=0; i<best.size();i++){
+ System.out.print("("+best.sgm(i)+")");
+ }
+ System.out.println();
+
+ best = segments.segmentation(Segments.SEGMENTATION_RIGHTMOST_LONGEST);
+ System.out.print("best segments (rightmost_longest):");
+ for(int i=0; i<best.size();i++){
+ System.out.print("("+best.sgm(i)+")");
+ }
+ System.out.println();
+
+ }
+
+ }
+
+}
+
diff --git a/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java
new file mode 100644
index 00000000000..26752046f80
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/segmenter/Segments.java
@@ -0,0 +1,313 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.segmenter;
+
+import java.util.LinkedList;
+
+/**
+ * Contains the segmentation() method.
+ *
+ * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ */
+public class Segments extends LinkedList {
+
+ public final static int SEGMENTATION_WEIGHTED = 0;
+ public final static int SEGMENTATION_WEIGHTED_BIAS10 = 1;
+ public final static int SEGMENTATION_WEIGHTED_BIAS20 = 2;
+ public final static int SEGMENTATION_WEIGHTED_BIAS50 = 3;
+ public final static int SEGMENTATION_WEIGHTED_BIAS100 = 4;
+ public final static int SEGMENTATION_WEIGHTED_LEFTMOST = 5;
+ public final static int SEGMENTATION_WEIGHTED_RIGHTMOST = 6;
+ public final static int SEGMENTATION_WEIGHTED_LONGEST = 7;
+ public final static int SEGMENTATION_LEFTMOST_LONGEST = 8;
+ public final static int SEGMENTATION_LEFTMOST_WEIGHTED = 9;
+ public final static int SEGMENTATION_RIGHTMOST_LONGEST = 10;
+ public final static int SEGMENTATION_RIGHTMOST_WEIGHTED = 11;
+ public final static int SEGMENTATION_LONGEST_WEIGHTED = 12;
+ public final static int SEGMENTATION_LONGEST_LEFTMOST = 13;
+ public final static int SEGMENTATION_LONGEST_RIGHTMOST = 14;
+ public final static int SEGMENTATION_METHODS = 15;
+
+ private String[] _tokens;
+ private int _size;
+ private int[][] _map;
+
+ public Segments(String[] tokens)
+ {
+ _tokens = tokens;
+ _size = tokens.length;
+ _map = new int[_size+1][_size+1];
+ for(int i=0; i<=_size; i++){
+ for(int j=0; j<=_size; j++){
+ _map[i][j]=-1;
+ }
+ }
+ }
+
+ public void add(Segment s)
+ {
+ super.add(s);
+ _map[s.beg()][s.end()]=super.size()-1;
+ }
+
+ private void addMissingSingles()
+ {
+ for(int i=0; i<_size; i++){
+ if(_map[i][i+1]==-1){
+ super.add(new Segment(i,i+1,0));
+ _map[i][i+1]=super.size()-1;
+ }
+ }
+ }
+
+ private void reMap()
+ {
+ for(int i=0; i<=_size; i++){
+ for(int j=0; j<=_size; j++){
+ _map[i][j]=-1;
+ }
+ }
+ for(int i=0; i<super.size(); i++){
+ _map[beg(i)][end(i)] = i;
+ }
+ }
+
+ public String sgm(int idx)
+ {
+ if(idx<0 || idx>=super.size()){
+ return null;
+ }
+ String s = new String(_tokens[((Segment)(super.get(idx))).beg()]);
+ for(int i=((Segment)(super.get(idx))).beg()+1;i<((Segment)(super.get(idx))).end();i++){
+ s += " " + _tokens[i];
+ }
+ return s;
+ }
+
+ public int beg(int idx)
+ {
+ if(idx<0 || idx>=super.size()){
+ return -1;
+ }
+ return ((Segment)(super.get(idx))).beg();
+ }
+
+ public int end(int idx)
+ {
+ if(idx<0 || idx>=super.size()){
+ return -1;
+ }
+ return ((Segment)(super.get(idx))).end();
+ }
+
+ public int len(int idx)
+ {
+ if(idx<0 || idx>=super.size()){
+ return -1;
+ }
+ return ((Segment)(super.get(idx))).len();
+ }
+
+ public int conn(int idx)
+ {
+ if(idx<0 || idx>=super.size()){
+ return -1;
+ }
+ return ((Segment)(super.get(idx))).conn();
+ }
+
+ public Segments segmentation(int method)
+ {
+ Segments smnt = new Segments(_tokens);
+
+ addMissingSingles();
+
+ int maxsc, id, bestid=-1, bias=0, c, pos, bestval, temp=0, next=-1;
+ int[] maxScore = new int[super.size()];
+ int[] nextid = new int[super.size()];
+ for(int i=0;i<nextid.length;i++){
+ nextid[i]=-1;
+ }
+
+ switch(method){
+ case SEGMENTATION_WEIGHTED_BIAS100:
+ bias+=50;
+ case SEGMENTATION_WEIGHTED_BIAS50:
+ bias+=30;
+ case SEGMENTATION_WEIGHTED_BIAS20:
+ bias+=10;
+ case SEGMENTATION_WEIGHTED_BIAS10:
+ bias+=10;
+ case SEGMENTATION_WEIGHTED:
+ bestid=-1;
+ for(int i=_tokens.length;i>=0;i--){
+ bestid=-1;maxsc=0;
+ for(int j=i+1;j<=_tokens.length;j++){
+ id=_map[i][j];
+ if(id>=0 && maxScore[id]+1>maxsc) {
+ bestid=id;
+ maxsc=maxScore[id]+1;
+ }
+ }
+ if(maxsc>0){
+ maxsc--;
+ }
+ for(int j=0;j<i;j++){
+ id=_map[j][i];
+ if(id>=0){
+ nextid[id] = bestid;
+ c = conn(id);
+ if(i-j<=1){
+ maxScore[id] = maxsc;
+ }
+ else if(bias>0){
+ maxScore[id] = maxsc + ((100+(i-j-2)*bias)*c)/100;
+ }
+ else{
+ maxScore[id] = maxsc + c;
+ }
+ }
+ }
+ }
+ id = bestid;
+ while(id!=-1){
+ smnt.add(((Segment)(super.get(id))));
+ id=nextid[id];
+ }
+ break;
+ case SEGMENTATION_LEFTMOST_LONGEST:
+ case SEGMENTATION_LEFTMOST_WEIGHTED:
+ pos = 0;
+ while(pos<_tokens.length){
+ bestid = -1; bestval = -1;
+ for(int i=pos+1;i<=_tokens.length;i++){
+ id = _map[pos][i];
+ if(id>=0 &&
+ (method==SEGMENTATION_LEFTMOST_LONGEST ||
+ (temp=(len(id)>1)? conn(id) :0)>bestval) ){
+ bestid = id;
+ bestval = temp;
+ next = i;
+ }
+ }
+ smnt.add((Segment)(super.get(bestid)));
+ pos=next;
+ }
+ break;
+ case SEGMENTATION_RIGHTMOST_LONGEST:
+ case SEGMENTATION_RIGHTMOST_WEIGHTED:
+ pos = _tokens.length;
+ while(pos>0){
+ bestid = -1; bestval = -1;
+ for(int i=pos-1;i>=0;i--){
+ id = _map[i][pos];
+ if(id>=0 &&
+ (method==SEGMENTATION_RIGHTMOST_LONGEST ||
+ (temp=(len(id)>1)? conn(id) :0)>bestval) ){
+ bestid = id;
+ bestval = temp;
+ next = i;
+ }
+ }
+ smnt.addFirst(super.get(bestid));
+ pos=next;
+ }
+ smnt.reMap();
+ break;
+ case SEGMENTATION_LONGEST_WEIGHTED:
+ case SEGMENTATION_LONGEST_LEFTMOST:
+ case SEGMENTATION_LONGEST_RIGHTMOST:
+ case SEGMENTATION_WEIGHTED_LONGEST:
+ case SEGMENTATION_WEIGHTED_LEFTMOST:
+ case SEGMENTATION_WEIGHTED_RIGHTMOST:
+ buildSegmentationRecursive(method,smnt,0,_tokens.length);
+ break;
+ }
+
+ return smnt;
+ }
+
+ private void buildSegmentationRecursive(int method, Segments smnt, int b, int e)
+ {
+ int bestid, bestval1, bestval2, temp;
+
+ bestid=-1;bestval1=-1;bestval2=-1;
+ for(int i=0;i<super.size();i++){
+ if(b<=beg(i) && e>=end(i)){
+ switch(method){
+ case SEGMENTATION_LONGEST_WEIGHTED:
+ if(len(i)>bestval1 ||
+ (len(i)==bestval1 && conn(i)>bestval2) ){
+ bestid=i;
+ bestval1=len(i);
+ bestval2=conn(i);
+ }
+ break;
+ case SEGMENTATION_LONGEST_LEFTMOST:
+ if(len(i)>bestval1 ||
+ (len(i)==bestval1 && beg(i)<bestval2) ){
+ bestid=i;
+ bestval1=len(i);
+ bestval2=beg(i);
+ }
+ break;
+ case SEGMENTATION_LONGEST_RIGHTMOST:
+ if(len(i)>bestval1 ||
+ (len(i)==bestval1 && end(i)>bestval2) ){
+ bestid=i;
+ bestval1=len(i);
+ bestval2=end(i);
+ }
+ break;
+ case SEGMENTATION_WEIGHTED_LONGEST:
+ temp = (len(i)>1)?conn(i):0;
+ if(temp>bestval1 ||
+ (temp==bestval1 && len(i)>bestval2) ){
+ bestid=i;
+ bestval1=temp;
+ bestval2=len(i);
+ }
+ break;
+ case SEGMENTATION_WEIGHTED_LEFTMOST:
+ temp = (len(i)>1)? conn(i) :0;
+ if(temp>bestval1 ||
+ (temp==bestval1 && beg(i)<bestval2) ){
+ bestid=i;
+ bestval1=temp;
+ bestval2=beg(i);
+ }
+ break;
+ case SEGMENTATION_WEIGHTED_RIGHTMOST:
+ temp = len(i)>1?conn(i):0;
+ if(temp>bestval1 ||
+ (temp==bestval1 && end(i)>bestval2) ){
+ bestid=i;
+ bestval1=temp;
+ bestval2=end(i);
+ }
+ break;
+ default: // dummy defult pick first possible
+ if(bestid<0){
+ bestid=i;
+ }
+ break;
+ }
+ }
+ }
+ if(bestid<0) {
+ return; // this should never happen, as all one-word segments are created
+ }
+
+ if(b<beg(bestid)){
+ buildSegmentationRecursive(method,smnt,b,beg(bestid));
+ }
+
+ // add segment
+ smnt.add((Segment)(super.get(bestid)));
+
+ // check right side
+ if(e>end(bestid)){
+ buildSegmentationRecursive(method,smnt,end(bestid),e);
+ }
+ }
+
+}
diff --git a/fsa/src/main/java/com/yahoo/fsa/topicpredictor/PredictedTopic.java b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/PredictedTopic.java
new file mode 100644
index 00000000000..2dd0dcc9bb2
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/PredictedTopic.java
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.topicpredictor;
+
+
+/**
+ * Class encapsulation of a predicted topic. A topic has a weight and
+ * a term vector string of topicSegments.
+ *
+ * @author gjoranv
+ **/
+public class PredictedTopic {
+
+ private String topic = "";
+ private double weight = 0.0;
+ private String vector = "";
+
+
+ public PredictedTopic(String topic, double weight, String vector){
+ this.topic = topic;
+ this.weight = weight;
+ this.vector = vector;
+ }
+
+ public PredictedTopic(String topic, double weight){
+ this(topic, weight, "");
+ }
+
+
+ /** Returns the topic */
+ public String getTopic() { return topic; }
+
+ /** Returns the weight */
+ public double getWeight() { return weight; }
+
+ /** Returns the vector*/
+ public String getVector() { return vector; }
+
+
+ /** Sets the weight */
+ public void setWeight(double weight) {
+ this.weight = weight;
+ }
+
+ /** Adds to the weight */
+ public void addWeight(double weight) {
+ this.weight += weight;
+ }
+
+ /** Sets the vector*/
+ public void setVector(String vector) {
+ this.vector = vector;
+ }
+
+ /** Compares this topic to another topic, according to weight descending */
+ public int compareDescendWeight(Object o) {
+ PredictedTopic pt = (PredictedTopic)o;
+
+ double wgt1 = getWeight();
+ double wgt2 = pt.getWeight();
+ if (wgt1 < wgt2) { return 1; }
+ if (wgt1 > wgt2) { return -1;}
+ return 0;
+ }
+
+}
diff --git a/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java
new file mode 100644
index 00000000000..177e879c6c8
--- /dev/null
+++ b/fsa/src/main/java/com/yahoo/fsa/topicpredictor/TopicPredictor.java
@@ -0,0 +1,180 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.topicpredictor;
+
+import java.util.logging.Logger;
+import java.util.List;
+import java.util.LinkedList;
+import java.util.Iterator;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.charset.Charset;
+
+import com.yahoo.fsa.FSA;
+import com.yahoo.fsa.MetaData;
+
+
+/**
+ * Class for accessing the topic prediction automata. Look up the
+ * predicted topics for a term. Each topic has an attached weight and
+ * a term vector (topicSegments).
+ *
+ * @author <a href="mailto:boros@yahoo-inc.com">Peter Boros</a>
+ **/
+public class TopicPredictor extends MetaData {
+
+ private static final String packageName = "com.yahoo.fsa.topicpredictor";
+
+ private FSA fsa = null;
+
+ public TopicPredictor(String fsafile, String datfile){
+ this(fsafile, datfile, "utf-8");
+ }
+
+ public TopicPredictor(String fsafile, String datfile,
+ String charsetname) {
+ super(datfile, charsetname);
+ if (!isOk()) {
+ Logger.getLogger(packageName).
+ warning("Error initializing predictor with file " + datfile);
+ }
+
+ // Init the segment->'topic index' FSA
+ fsa = new FSA(fsafile);
+ if (!fsa.isOk()) {
+ Logger.getLogger(packageName).
+ warning("Error initializing FSA with file " + fsafile);
+ }
+ }
+
+ /**
+ * Returns a list of PredictedTopic objects, one for each topic
+ * the segment maps to. The returned list contains all topics,
+ * as opposed to the two-argument version.
+ * @param segment The segment string to find (all) topics for.
+ * @return (Linked)List of PredictedTopic objects. */
+ public List getPredictedTopics(String segment) {
+ return getPredictedTopics(segment, 0);
+ }
+
+ /**
+ * Returns a list of PredictedTopic objects, one for each topic
+ * the segment maps to. The returned list length is cut off at
+ * 'maxTopics' entries, maxTopics=0 returns all topics.
+ * @param segment The segment string to find topics for.
+ * @param maxTopics The max number of topics to return, 0 for all topics
+ * @return (Linked)List of PredictedTopic objects. */
+ public List getPredictedTopics(String segment, int maxTopics) {
+ List predictedTopics = new LinkedList();
+
+ int segIdx = getSegmentIndex(segment);
+ int[][] topicArr = getTopicArray(segIdx, maxTopics);
+ int numTopics = topicArr.length;
+ int allTopics = getNumTopics(segIdx);
+ /*Logger.getLogger(packageName).
+ fine("Segment: '" + segment + "' has " + allTopics +
+ " topics in automaton, fetched " + numTopics);
+ */
+ for(int i=0; i < numTopics; i++) {
+ int weight = topicArr[i][1];
+ String[] topicInfo= getTopicInfo(topicArr[i][0]);
+ String topic = topicInfo[0];
+ String vector= topicInfo[1];
+ PredictedTopic pt =
+ new PredictedTopic(topic, (double)weight, vector);
+ predictedTopics.add(pt);
+ }
+
+ return predictedTopics;
+ }
+
+ /**
+ * Returns the index (hash value) of the input segment in the FSA.
+ * @param segment The segment string to find index for.
+ * @return Index for this segment in the FSA. */
+ private int getSegmentIndex(String segment) {
+ FSA.State s = fsa.getState();
+ s.delta(segment);
+ if (s.isFinal()) {
+ return s.hash();
+ }
+ return -1;
+ }
+
+ /**
+ * Returns the number of topics the FSA contains for the input
+ * segment.
+ * @return Number of topics for the segment. */
+ private int getNumTopics(int segIdx) {
+ if (segIdx < 0) {
+ return 0;
+ }
+ ByteBuffer buf = getIndirectRecordEntry(segIdx, 4);
+ return buf.getInt(0);
+ }
+
+ /**
+ * Reads the topics and other metadata for a segment from the
+ * (memory-mapped) metadata file. Returns the info in a
+ * two-dimensional array (one row per topic).
+ * @param segIdx The FSA index (hash value) for the segment.
+ * @param maxTopics Max number of topics to return, 0 for all topics.
+ * @return Number of topics for the segment. */
+ private int[][] getTopicArray(int segIdx, int maxTopics) {
+ if (segIdx < 0) {
+ return new int[0][0];
+ }
+
+ int numTopics = getNumTopics(segIdx);
+ if ((maxTopics > 0) && (numTopics > maxTopics)) {
+ numTopics = maxTopics;
+ }
+
+ int[][] topics = new int[numTopics][2];
+ ByteBuffer buf = getIndirectRecordEntry(segIdx,4+8*numTopics);
+ for(int i=0; i<numTopics; i++){
+ topics[i][0] = buf.getInt(4+8*i);
+ topics[i][1] = buf.getInt(8+8*i);
+ }
+ return topics;
+ }
+
+ /**
+ * Returns the topic and vector strings from the internal meta
+ * data structure.
+ * @param topicId Topic start index in a two-dimensional array
+ * @return topic string at [0] and vector string at [1] */
+ private String[] getTopicInfo(int topicId) {
+ return getStringArrayEntry(user(0) + topicId, 2);
+ }
+
+
+ //// test ////
+ public static void main(String[] args) {
+ String segment = "new york";
+ if (args.length >= 1) {
+ segment = args[0];
+ }
+
+ String fsafile = "/home/gv/fsa/automata/dmozPred_2.fsa";
+ String datfile = "/home/gv/fsa/automata/dmozPred_2.dat";
+
+ TopicPredictor predictor = new TopicPredictor(fsafile, datfile);
+
+ List predictedTopics = predictor.getPredictedTopics(segment, 25);
+ Iterator i = predictedTopics.iterator();
+ while (i.hasNext()) {
+ PredictedTopic topic = (PredictedTopic) i.next();
+ System.out.println("\n topic=" + topic.getTopic());
+ System.out.println(" weight=" + topic.getWeight());
+ System.out.println(" vector=" + topic.getVector());
+ }
+ }
+
+}
diff --git a/fsa/src/test/fsa/test-data.fsa b/fsa/src/test/fsa/test-data.fsa
new file mode 100644
index 00000000000..92a8a8153ff
--- /dev/null
+++ b/fsa/src/test/fsa/test-data.fsa
Binary files differ
diff --git a/fsa/src/test/fsa/test-fsa.fsa b/fsa/src/test/fsa/test-fsa.fsa
new file mode 100644
index 00000000000..015be3aeea4
--- /dev/null
+++ b/fsa/src/test/fsa/test-fsa.fsa
Binary files differ
diff --git a/fsa/src/test/fsa/test-iterator.fsa b/fsa/src/test/fsa/test-iterator.fsa
new file mode 100644
index 00000000000..a83c6529f06
--- /dev/null
+++ b/fsa/src/test/fsa/test-iterator.fsa
Binary files differ
diff --git a/fsa/src/test/fsa/utf8.fsa b/fsa/src/test/fsa/utf8.fsa
new file mode 100644
index 00000000000..4398ac99d11
--- /dev/null
+++ b/fsa/src/test/fsa/utf8.fsa
Binary files differ
diff --git a/fsa/src/test/input/test-data-input.txt b/fsa/src/test/input/test-data-input.txt
new file mode 100644
index 00000000000..4acbd811537
--- /dev/null
+++ b/fsa/src/test/input/test-data-input.txt
@@ -0,0 +1,4 @@
+aa aa data
+bbbb bbbb data
+c c data
+dddddd dddddd data
diff --git a/fsa/src/test/input/test-fsa-input.txt b/fsa/src/test/input/test-fsa-input.txt
new file mode 100644
index 00000000000..ff56fd30af4
--- /dev/null
+++ b/fsa/src/test/input/test-fsa-input.txt
@@ -0,0 +1,3 @@
+aword
+this is a test
+tudor vidor
diff --git a/fsa/src/test/input/test-iterator-input.txt b/fsa/src/test/input/test-iterator-input.txt
new file mode 100644
index 00000000000..2724764c724
--- /dev/null
+++ b/fsa/src/test/input/test-iterator-input.txt
@@ -0,0 +1,12 @@
+abacus abacus
+abadan abadan
+abaisse abaisse
+abdicate abdicate
+abdomen abdomen
+abdominous abdominous
+dachs dachs
+dacia dacia
+daciaa daciaa
+daciab daciab
+dacite dacite
+dacota dacota
diff --git a/fsa/src/test/input/utf8.txt b/fsa/src/test/input/utf8.txt
new file mode 100644
index 00000000000..15c96002c0b
--- /dev/null
+++ b/fsa/src/test/input/utf8.txt
@@ -0,0 +1 @@
+हिन्दी
diff --git a/fsa/src/test/java/com/yahoo/fsa/test/FSADataTestCase.java b/fsa/src/test/java/com/yahoo/fsa/test/FSADataTestCase.java
new file mode 100644
index 00000000000..ce9854e7c44
--- /dev/null
+++ b/fsa/src/test/java/com/yahoo/fsa/test/FSADataTestCase.java
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.test;
+
+import com.yahoo.fsa.FSA;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.nio.BufferUnderflowException;
+
+/**
+ * @author <a href="geirst@yahoo-inc.com">Geir Storli</a>
+ */
+public class FSADataTestCase extends junit.framework.TestCase {
+
+ private static class Worker extends Thread {
+ FSA.State state;
+ String word;
+ String data;
+ long numRuns;
+ long numExceptions;
+ long numAsserts;
+ public Worker(FSA fsa, String word, String data, long numRuns) {
+ state = fsa.getState();
+ this.word = word;
+ this.data = data;
+ this.numRuns = numRuns;
+ this.numExceptions = 0;
+ this.numAsserts = 0;
+ }
+ public void run() {
+ for (long i = 0; i < numRuns; ++i) {
+ state.start();
+ state.delta(word);
+ try {
+ String data = state.dataString();
+ if (!this.data.equals(data)) {
+ ++numAsserts;
+ }
+ } catch (BufferUnderflowException e) {
+ ++numExceptions;
+ }
+ }
+ System.out.println("Worker(" + word + "): numExceptions(" + numExceptions + "), numAsserts(" + numAsserts + ")");
+ }
+ };
+
+ private FSA fsa;
+
+ public FSADataTestCase(String name) {
+ super(name);
+ }
+
+ protected void setUp() throws IOException {
+ fsa = new FSA(new FileInputStream("src/test/fsa/test-data.fsa"));
+ }
+
+ public void testBasic() {
+ FSA.State state = fsa.getState();
+ state.delta("aa");
+ assertTrue(state.isFinal());
+ assertEquals("aa data", state.dataString());
+
+ state.start();
+ state.delta("bbbb");
+ assertTrue(state.isFinal());
+ assertEquals("bbbb data", state.dataString());
+
+ state.start();
+ state.delta("c");
+ assertTrue(state.isFinal());
+ assertEquals("c data", state.dataString());
+
+ state.start();
+ state.delta("dddddd");
+ assertTrue(state.isFinal());
+ assertEquals("dddddd data", state.dataString());
+ }
+
+ public void testMultipleThreads() {
+ long numRuns = 10000;
+ List<Worker> workers = new ArrayList<Worker>();
+ workers.add(new Worker(fsa, "aa", "aa data", numRuns));
+ workers.add(new Worker(fsa, "bbbb", "bbbb data", numRuns));
+ workers.add(new Worker(fsa, "c", "c data", numRuns));
+ workers.add(new Worker(fsa, "dddddd", "dddddd data", numRuns));
+ for (int i = 0; i < workers.size(); ++i) {
+ workers.get(i).start();
+ }
+ try {
+ for (int i = 0; i < workers.size(); ++i) {
+ workers.get(i).join();
+ }
+ } catch (InterruptedException e) {
+ assertTrue(false);
+ }
+ for (int i = 0; i < workers.size(); ++i) {
+ assertEquals(0, workers.get(i).numExceptions);
+ assertEquals(0, workers.get(i).numAsserts);
+ }
+ }
+
+}
diff --git a/fsa/src/test/java/com/yahoo/fsa/test/FSAIteratorTestCase.java b/fsa/src/test/java/com/yahoo/fsa/test/FSAIteratorTestCase.java
new file mode 100644
index 00000000000..21dc86f4925
--- /dev/null
+++ b/fsa/src/test/java/com/yahoo/fsa/test/FSAIteratorTestCase.java
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.test;
+
+import com.yahoo.fsa.FSA;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * @author <a href="geirst@yahoo-inc.com">Geir Storli</a>
+ */
+public class FSAIteratorTestCase extends junit.framework.TestCase {
+
+ private FSA fsa;
+
+ private FSA.State state;
+
+ private List<String> expected;
+
+ public FSAIteratorTestCase(String name) {
+ super(name);
+ }
+
+ protected void setUp() {
+ fsa = new FSA("src/test/fsa/test-iterator.fsa");
+ state = fsa.getState();
+
+ expected = new ArrayList<String>();
+
+ expected.add("abacus");
+ expected.add("abadan");
+ expected.add("abaisse");
+ expected.add("abdicate");
+ expected.add("abdomen");
+ expected.add("abdominous");
+ expected.add("dachs");
+ expected.add("dacia");
+ expected.add("daciaa");
+ expected.add("daciab");
+ expected.add("dacite");
+ expected.add("dacota");
+ }
+
+ private void checkIterator(int beginIdx, int endIdx, String prefix) {
+ System.out.println("checkIterator(" + beginIdx + ", " + endIdx + ", " + prefix + ")");
+ java.util.Iterator<FSA.Iterator.Item> i = fsa.iterator(state);
+ for (; i.hasNext() && beginIdx < endIdx; ++beginIdx) {
+ FSA.Iterator.Item item = i.next();
+ System.out.println("item: " + item);
+ String str = prefix + item.getString();
+ String data = item.getDataString();
+ System.out.println("str: '" + expected.get(beginIdx) + "'.equals('" + str + "')?");
+ assertTrue(expected.get(beginIdx).equals(str));
+ System.out.println("data: '" + expected.get(beginIdx) + "'.equals('" + data + "')?");
+ assertTrue(expected.get(beginIdx).equals(data));
+ }
+ assertFalse(i.hasNext());
+ assertTrue(beginIdx == endIdx);
+ }
+
+ public void testIterator() {
+ checkIterator(0, expected.size(), "");
+ }
+
+ public void testIteratorSingle() {
+ state.delta("dach");
+ checkIterator(6, 7, "dach");
+ }
+
+ public void testIteratorSubset() {
+ state.delta("abd");
+ checkIterator(3, 6, "abd");
+ }
+
+ public void testIteratorFinalState() {
+ state.delta("dacia");
+ checkIterator(7, 10, "dacia");
+ }
+
+ public void testIteratorFinalStateOnly() {
+ state.delta("dachs");
+ checkIterator(6, 7, "dachs");
+ }
+
+ public void testIteratorEmpty1() {
+ state.delta("b");
+ java.util.Iterator i = fsa.iterator(state);
+ assertFalse(i.hasNext());
+ try {
+ i.next();
+ assertFalse(true);
+ } catch (NoSuchElementException e) {
+ assertTrue(true);
+ }
+ }
+
+ public void testIteratorEmpty2() {
+ state.delta("daciac");
+ java.util.Iterator i = fsa.iterator(state);
+ assertFalse(i.hasNext());
+ try {
+ i.next();
+ assertFalse(true);
+ } catch (NoSuchElementException e) {
+ assertTrue(true);
+ }
+ }
+
+ public void testIteratorRemove() {
+ java.util.Iterator i = fsa.iterator(state);
+ try {
+ i.remove();
+ assertFalse(true);
+ } catch (UnsupportedOperationException e) {
+ assertTrue(true);
+ }
+ }
+}
diff --git a/fsa/src/test/java/com/yahoo/fsa/test/FSATestCase.java b/fsa/src/test/java/com/yahoo/fsa/test/FSATestCase.java
new file mode 100644
index 00000000000..4300c5938e1
--- /dev/null
+++ b/fsa/src/test/java/com/yahoo/fsa/test/FSATestCase.java
@@ -0,0 +1,100 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.test;
+
+import com.yahoo.fsa.FSA;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * @author <a href="bratseth@yahoo-inc.com">Jon Bratseth</a>
+ */
+public class FSATestCase extends junit.framework.TestCase {
+
+ private FSA fsa;
+
+ private FSA.State state;
+
+ public FSATestCase(String name) {
+ super(name);
+ }
+
+ protected void setUp() throws IOException {
+ fsa=new FSA(new FileInputStream("src/test/fsa/test-fsa.fsa"));
+ state=fsa.getState();
+ }
+
+ public void testSingleWordDelta() {
+ state.delta("aword");
+ assertTrue(state.isValid());
+ assertTrue(state.isFinal());
+ }
+
+ public void testSingleWordDeltaWord() {
+ state.deltaWord("aword");
+ assertTrue(state.isValid());
+ assertTrue(state.isFinal());
+ }
+
+ public void testSingleWordDeltaPartialMatch() {
+ state.delta("awo");
+ assertTrue(state.isValid());
+ assertFalse(state.isFinal());
+ }
+
+ public void testSingleWordDeltaPartialMatchWord() {
+ state.deltaWord("awo");
+ assertTrue(state.isValid());
+ assertFalse(state.isFinal());
+ }
+
+ public void testMultiWordDelta() {
+ state.delta("th");
+ assertFalse(state.isFinal());
+ state.delta("is ");
+ assertFalse(state.isFinal());
+ state.delta("is ");
+ assertFalse(state.isFinal());
+ state.delta("a");
+ assertFalse(state.isFinal());
+ state.delta(" test");
+ assertTrue(state.isValid());
+ assertTrue(state.isFinal());
+ }
+
+ public void testMultiWordDeltaWord() {
+ state.deltaWord("this");
+ assertFalse(state.isFinal());
+ state.deltaWord("is");
+ assertFalse(state.isFinal());
+ state.deltaWord("a");
+ assertFalse(state.isFinal());
+ state.deltaWord("test");
+ assertTrue(state.isValid());
+ assertTrue(state.isFinal());
+ }
+
+ public void testMultiWordDeltaWordInvalid() {
+ state.deltaWord("th");
+ assertFalse(state.isFinal());
+ state.deltaWord("is ");
+ assertFalse(state.isFinal());
+ assertFalse(state.isValid());
+ }
+
+ public void testMultiWordDeltaTry() {
+ assertFalse(state.tryDeltaWord("thiss"));
+ assertTrue(state.isValid());
+ assertTrue(state.tryDeltaWord("this"));
+ state.deltaWord("is");
+ state.tryDeltaWord("a");
+ assertFalse(state.tryDeltaWord("tes"));
+ assertFalse(state.tryDeltaWord("tesz"));
+ assertFalse(state.tryDeltaWord("teszzzz"));
+ assertTrue(state.tryDeltaWord("test"));
+ assertTrue(state.isValid());
+ assertTrue(state.isFinal());
+
+ }
+
+}
diff --git a/fsa/src/test/java/com/yahoo/fsa/test/UTF8TestCase.java b/fsa/src/test/java/com/yahoo/fsa/test/UTF8TestCase.java
new file mode 100644
index 00000000000..3f07816a914
--- /dev/null
+++ b/fsa/src/test/java/com/yahoo/fsa/test/UTF8TestCase.java
@@ -0,0 +1,97 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.fsa.test;
+
+import com.yahoo.fsa.FSA;
+import java.util.Iterator;
+import java.nio.charset.Charset;
+
+/**
+ * @author <a href="geirst@yahoo-inc.com">Geir Storli</a>
+ */
+public class UTF8TestCase extends junit.framework.TestCase {
+
+ private Charset charset = Charset.forName("utf-8");
+ private FSA fsa;
+ private FSA.State state;
+ private byte prefixBuf[];
+ private byte suffixBuf[];
+ private String prefix;
+ private String suffix;
+ private String word;
+
+ private static byte [] convert(int [] buf) {
+ byte retval[] = new byte[buf.length];
+ for (int i = 0; i < buf.length; ++i) {
+ retval[i] = (byte)buf[i];
+ }
+ return retval;
+ }
+
+ public UTF8TestCase(String name) {
+ super(name);
+ }
+
+ protected void setUp() {
+ fsa = new FSA("src/test/fsa/utf8.fsa"); // fsa with one word (6 code points, 18 bytes)
+ state = fsa.getState();
+ int pbuf[] = {0xe0,0xa4,0xb9};
+ prefixBuf = convert(pbuf);
+ prefix = new String(prefixBuf, charset);
+ int sbuf[] = {0xe0,0xa4,0xbf,0xe0,0xa4,0xa8,0xe0,0xa5,0x8d,0xe0,0xa4,0xa6,0xe0,0xa5,0x80};
+ suffixBuf = convert(sbuf);
+ suffix = new String(suffixBuf, charset);
+ word = prefix + suffix;
+ }
+
+ public void testStringDelta() {
+ state.delta(word);
+ assertTrue(state.isFinal());
+ }
+
+ public void testCharDelta() {
+ assertEquals(6, word.length());
+ for (int i = 0; i < word.length(); ++i) {
+ state.delta(word.charAt(i));
+ assertTrue(state.isValid());
+ }
+ assertTrue(state.isFinal());
+ }
+
+ public void testByteDelta() {
+ FSA.State state = fsa.getState();
+ assertEquals(3, prefixBuf.length);
+ for (int i = 0; i < prefixBuf.length; ++i) {
+ state.delta(prefixBuf[i]);
+ assertTrue(state.isValid());
+ }
+ assertEquals(15, suffixBuf.length);
+ for (int i = 0; i < suffixBuf.length; ++i) {
+ state.delta(suffixBuf[i]);
+ assertTrue(state.isValid());
+ }
+ assertTrue(state.isFinal());
+ }
+
+ public void testIteratorAtStart() {
+ Iterator<FSA.Iterator.Item> itr = fsa.iterator(state);
+ FSA.Iterator.Item item = itr.next();
+ assertEquals(word, item.getString());
+ assertFalse(itr.hasNext());
+ }
+
+ public void testIteratorWithPrefix() {
+ state.delta(prefix);
+ Iterator<FSA.Iterator.Item> itr = fsa.iterator(state);
+ FSA.Iterator.Item item = itr.next();
+ assertEquals(suffix, item.getString());
+ assertFalse(itr.hasNext());
+ }
+
+ public void testIteratorWithCompleteWord() {
+ state.delta(word);
+ Iterator<FSA.Iterator.Item> itr = fsa.iterator(state);
+ FSA.Iterator.Item item = itr.next();
+ assertEquals("", item.getString());
+ assertFalse(itr.hasNext());
+ }
+}
diff --git a/fsa/src/util/.gitignore b/fsa/src/util/.gitignore
new file mode 100644
index 00000000000..282522db034
--- /dev/null
+++ b/fsa/src/util/.gitignore
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/fsa/src/util/cn_txt2xml b/fsa/src/util/cn_txt2xml
new file mode 100755
index 00000000000..9c439879af9
--- /dev/null
+++ b/fsa/src/util/cn_txt2xml
@@ -0,0 +1,625 @@
+#!/usr/bin/perl
+
+use strict;
+
+use FSA;
+use BerkeleyDB;
+use Getopt::Long;
+use Pod::Usage;
+
+#
+# Process command line options.
+#
+
+my $do_qfreq = 1;
+my $do_sqfreq = 1;
+my $do_ext = 1;
+my $do_assoc = 1;
+my $do_cat = 1;
+my $do_fsa = 1;
+my $help = 0;
+my $man = 0;
+my $verbose = 0;
+my $stopwords_file = '';
+my $output_file = '';
+
+my $result = GetOptions('qfreq|q!' => \$do_qfreq,
+ 'sqfreq|s!' => \$do_sqfreq,
+ 'ext|e!' => \$do_ext,
+ 'assoc|a!' => \$do_assoc,
+ 'cat|c!' => \$do_cat,
+ 'fsa|f!' => \$do_fsa,
+ 'help|h' => \$help,
+ 'man|m' => \$man,
+ 'verbose|v' => \$verbose,
+ 'stopwords|w:s' => \$stopwords_file,
+ 'output-file|o=s' => \$output_file,
+ );
+
+pod2usage(1) if $help;
+pod2usage(-verbose => 2) if $man;
+
+#
+# Domain is a required parameter.
+#
+
+my $domain = shift || die "need domain";
+
+#
+# Some constants for setting limits etc.
+#
+
+my $MAX_UNIT_LENGTH = 8;
+my $MAX_QUERY_LENGTH = 10;
+
+#
+# Declare arrays to store concept net data.
+#
+
+my @unit = ();
+my @unit_f = ();
+my @unit_qf = ();
+my @unit_qfc = ();
+my @unit_qfs = ();
+my @ext = ();
+my @assoc = ();
+my @cats = ();
+my @ucats = ();
+my @st_map = ();
+
+#
+# Some other global variables
+#
+
+my %stopwords = ();
+my %stopMap = ();
+
+my ($total,$count);
+
+my ($fsa,$sfsa);
+
+#***********************************************************
+#
+# Functions
+#
+#***********************************************************
+
+
+sub msg($@){
+ my $format = shift;
+ if($verbose){
+ printf STDERR $format,@_;
+ }
+}
+
+sub progress($$$$){
+ my ($msg,$cnt,$total,$done) = @_;
+
+ if($done){
+ if($total>0){
+ msg("\r%s ... %d of %d (%.2f%%) ... done.\n",$msg,$cnt,$total,100.0*$cnt/$total);
+ }
+ else {
+ msg("\r%s ... %d ... done.\n",$msg,$cnt);
+ }
+ }
+ elsif($cnt%1000==0){
+ if($total>0){
+ msg("\r%s ... %d of %d (%.2f%%)",$msg,$cnt,$total,100.0*$cnt/$total);
+ }
+ else {
+ msg("\r%s ... %d",$msg,$cnt);
+ }
+ }
+}
+
+sub lookup($$){
+ my $fsa = shift;
+ my $u = shift;
+ my $st = FSA::State->new($fsa);
+
+ $st->start();
+ $st->delta($u);
+ if($st->isFinal()){
+ return ($st->hash(),$st->nData());
+ }
+ else {
+ return (-1,0);
+ }
+}
+
+sub aggregate(\@){
+ my $aref = shift;
+ my %hash = ();
+ my $i;
+ for($i=0;$i<$#{$aref}+1;$i+=2){
+ $hash{$$aref[$i]} += $$aref[$i+1];
+ }
+ my @res;
+ foreach $i (sort {$hash{$b} <=> $hash{$a}} keys %hash){
+ push(@res,$i,$hash{$i});
+ }
+ return @res;
+}
+
+sub firstComb($$){
+ my $n = shift;
+ my $m = shift;
+
+ if($n==0 || $n>31 || $m==0 || $m>31 || $n>$m){
+ return 0;
+ }
+
+ return (1<<$n)-1;
+}
+
+sub nextComb($$){
+ my $c = shift;
+ my $m = shift;
+
+ if($c==0 || $m==0 || $m>31){
+ return 0;
+ }
+
+ my $x = $c;
+ my $limit = 1<<$m;
+ my ($mask,$mask1,$mask2);
+
+ if($x&1){
+ $mask=2;
+ while($x&$mask){
+ $mask<<=1;
+ }
+ $x^=($mask+($mask>>1));
+ }
+ else{
+ $mask=2;
+ while(!($x&$mask)){
+ $mask<<=1;
+ }
+ $mask1=$mask2=0;
+ while($x&$mask){
+ $mask1<<=1;
+ $mask1++;
+ $mask2+=$mask;
+ $mask<<=1;
+ }
+ $mask1>>=1;
+ $x^=($mask+($mask1^$mask2));
+ }
+
+ return ($x<$limit)?$x:0;
+}
+
+sub selectComb($\@){
+ my $c = shift;
+ my $aref = shift;
+
+ my @res;
+ my $i = 0;
+ while($c>0 && $i<=$#$aref){
+ if($c&1){
+ push(@res,$$aref[$i]);
+ }
+ $c>>=1;
+ $i++;
+ }
+ return @res;
+}
+
+sub sortGrams($){
+ my $in = shift;
+ my @grams = split(/\s+/,$in);
+
+ if($#grams<1){
+ return $in;
+ }
+
+ my @sorted_grams = sort(@grams);
+ my $i=1;
+ while($i<=$#sorted_grams){
+ if($sorted_grams[$i] eq $sorted_grams[$i-1]){
+ splice(@sorted_grams,$i,1);
+ }
+ else{
+ $i++;
+ }
+ }
+ return join(" ",@sorted_grams);
+}
+
+sub cleanStop($){
+ my $unit = shift;
+ if($stopwords_file ne ''){
+ if(!defined($stopMap{$unit})){
+ my @words = split(/\s+/,$unit);
+ while ((@words) && ($stopwords{$words[0]})) {
+ shift(@words);
+ }
+ while ((@words) && ($stopwords{$words[$#words]})) {
+ pop(@words);
+ }
+ $stopMap{$unit} = join(' ', @words);
+ }
+ return $stopMap{$unit};
+ }
+ return($unit);
+}
+
+
+#***********************************************************
+#
+# Main program.
+#
+#***********************************************************
+
+
+#
+# Configure stopwords list
+#
+
+if($stopwords_file ne ''){
+ msg("configuring stopwords ... ");
+ open(STOPFILE, $stopwords_file) or die "error opening stopwords file '$stopwords_file': $!\n\t";
+ while(<STOPFILE>){
+ chomp;
+ $stopwords{$_}=1;
+ }
+ close(STOPFILE);
+ msg("done.\n");
+}
+
+#
+# Build plain FSA with perfect hash and frequencies,
+# and compact FSA with perfect hash only.
+#
+if($do_fsa){
+ msg("building plain fsa ... ");
+ my %units_t = ();
+ open(U,"${domain}_unit.txt");
+ while(<U>){
+ chomp;
+ my ($f,$u) = split(/\t/);
+ my $uns = cleanStop($u);
+ if($uns ne ""){
+ $units_t{$uns}+=$f;
+ }
+ }
+ close(U);
+ open(F1,"| makefsa -vnp ${domain}.plain.fsa");
+ open(F2,"| makefsa -ep ${domain}.fsa");
+ foreach my $u (sort keys %units_t){
+ print F1 "$u\t$units_t{$u}\n";
+ print F2 "$u\n";
+ }
+ close(F1);
+ close(F2);
+ %units_t = ();
+ msg("done.\n");
+}
+
+#
+# Open plain FSA.
+#
+
+$fsa = FSA->new("${domain}.plain.fsa");
+
+#
+# Read units.
+#
+
+$total = 0 + `wc -l ${domain}_unit.txt`;
+$count = 0;
+open(U,"${domain}_unit.txt");
+while(<U>){
+ $count++; progress("reading units",$count,$total,0);
+ chomp;
+ my ($f,$u) = split(/\t/);
+ my $uns = cleanStop($u);
+ if($uns ne ""){
+ my ($idx,$frq) = lookup($fsa,$uns);
+ if($idx>=0){
+ $unit[$idx] = $uns;
+ $unit_f[$idx] = $frq;
+ }
+ }
+}
+close(U);
+progress("reading units",$count,$total,1);
+
+
+#
+# Build term-sorted FSA for counting query frequencies.
+#
+
+if($do_qfreq || $do_sqfreq){
+ msg("building fsa for query frequencies ... ");
+ my %units_st = ();
+ for(my $i=0;$i<=$#unit;$i++){
+ my $uns = sortGrams($unit[$i]);
+ if(defined($units_st{$uns})){
+ $units_st{$uns}.=",$i";
+ }
+ else{
+ $units_st{$uns}="$i";
+ }
+ }
+ open(F,"| makefsa -vep ${domain}.sorted.fsa");
+ my $i=0;
+ foreach my $u (sort keys %units_st){
+ $st_map[$i]=$units_st{$u};
+ print F "$u\n";
+ $i++;
+ }
+ close(F);
+ %units_st = ();
+ msg("done.\n");
+
+ #
+ # Open term-sorted FSA.
+ #
+
+ $sfsa = FSA->new("${domain}.sorted.fsa");
+
+ #
+ # Read complete query file for query frequencies.
+ #
+
+ $total = 0 + `zcat complete.txt.gz | wc -l`;
+ $count = 0;
+ open(C,"zcat complete.txt.gz|") or die "ERROR opening pipe: \"zcat complete.txt.gz|\"\n";
+ while(<C>){
+ $count++; progress("processing raw query file for query frequencies",$count,$total,0);
+ chomp;
+ my ($frq,$query) = split(/\t/);
+
+ #
+ # Complete query match.
+ #
+ my ($idx,$f) = lookup($fsa,$query);
+ if($idx>=0){
+ $unit_qfc[$idx] += $frq;
+ }
+
+ #
+ # Partial query match.
+ #
+ my @qgrams = split(/\s+/,$query);
+ my $st = FSA::State->new($fsa);
+ my %frq_add = ();
+ for(my $i=0;$i<=$#qgrams;$i++){
+ $st->start();
+ $st->delta($qgrams[$i]);
+ if($st->isFinal()){
+ $frq_add{$st->hash()} = 1;
+ }
+ for(my $j=$i+1;$st->isValid()&&$j<=$#qgrams;$j++){
+ $st->delta(" ");
+ $st->delta($qgrams[$j]);
+ if($st->isFinal()){
+ $frq_add{$st->hash()} = 1;
+ }
+ }
+ }
+ foreach my $a (keys %frq_add){
+ $unit_qf[$a] += $frq;
+ }
+
+ if($do_sqfreq){
+ #
+ # Partial query match in any order.
+ #
+ my $squery = sortGrams($query);
+ my @sqgrams = split(/\s+/,$squery);
+ my $sst = FSA::State->new($sfsa);
+ %frq_add = ();
+ my $qlen=$#sqgrams+1;
+ if($qlen>$MAX_QUERY_LENGTH){
+ $qlen=$MAX_QUERY_LENGTH;
+ }
+ for(my $i=1;$i<=$qlen && $i<=$MAX_UNIT_LENGTH; $i++){
+ for(my $c=firstComb($i,$qlen);$c>0;$c=nextComb($c,$qlen)){
+ $sst->start();
+ my $tmp=join(" ",selectComb($c,@sqgrams));
+ $sst->delta($tmp);
+ if($sst->isFinal()){
+ my @to_add = split(/,/,$st_map[$sst->hash()]);
+ foreach my $a (@to_add){
+ $frq_add{$a} = 1;
+ }
+ }
+ }
+ }
+ foreach my $a (keys %frq_add){
+ $unit_qfs[$a] += $frq;
+ }
+ }
+ }
+ close(C);
+ progress("processing raw query file for query frequencies",$count,$total,1);
+}
+
+#
+# Read extensions.
+#
+if($do_ext){
+ $total = 0 + `wc -l ${domain}_ext.txt`;
+ $count = 0;
+ open(E,"${domain}_ext.txt");
+ while(<E>){
+ $count++; progress("reading extensions",$count,$total,0);
+ chomp;
+ my ($f,$u1,$u2) = split(/\t/);
+ my $uns1 = cleanStop($u1);
+ my $uns2 = cleanStop($u2);
+ if($uns1 ne "" && $uns1 ne $uns2){
+ my ($idx1,$frq1) = lookup($fsa,$u1);
+ my ($idx2,$frq2) = lookup($fsa,$u2);
+ if($idx1>=0 && $idx2>=0){
+ $ext[$idx1] .= "$idx2,$f ";
+ }
+ }
+ }
+ close(E);
+ progress("reading extensions",$count,$total,1);
+}
+
+#
+# Read associations.
+#
+if($do_assoc){
+ $total = 0 + `wc -l ${domain}_assoc.txt`;
+ $count = 0;
+ open(A,"${domain}_assoc.txt");
+ while(<A>){
+ $count++; progress("reading associations",$count,$total,0);
+ chomp;
+ my ($f,$u1,$u2) = split(/\t/);
+ my $uns1 = cleanStop($u1);
+ my $uns2 = cleanStop($u2);
+ if($uns1 ne "" && $uns2 ne "" && $uns1 ne $uns2){
+ my ($idx1,$frq1) = lookup($fsa,$u1);
+ my ($idx2,$frq2) = lookup($fsa,$u2);
+ if($idx1>=0 && $idx2>=0){
+ $assoc[$idx1] .= "$idx2,$f ";
+ $assoc[$idx2] .= "$idx1,$f ";
+ }
+ }
+ }
+ close(A);
+ progress("reading associations",$count,$total,1);
+}
+
+#
+# Read categories.
+#
+
+if($do_cat){
+ tie my %hash, 'BerkeleyDB::Btree', -Filename => "uCat.db";
+
+ $total = scalar(keys %hash);
+ $count = 0;
+ my $cid = 0;
+ foreach my $c (sort keys %hash){
+ $count++; progress("reading categories",$count,$total,0);
+ if($c ne "Misc" && $c ne "zzz_uncategorized_catchall"){
+ $cats[$cid] = $c;
+ my (@ucs) = split(/\t/,$hash{$c});
+ foreach my $u (@ucs){
+ my ($t,$f) = split(/,/,$u);
+ my ($idx,$frq) = lookup($fsa,cleanStop($t));
+ if($idx>=0){
+ if(defined($ucats[$idx])){
+ if(!($ucats[$idx]=~/\b$cid\b/)){
+ $ucats[$idx] .= ",$cid";
+ }
+ }
+ else{
+ $ucats[$idx] = "$cid";
+ }
+ }
+ }
+ $cid++;
+ }
+ }
+ progress("reading categories",$count,$total,1);
+ untie %hash;
+}
+
+
+#
+# Write XML output.
+#
+$count=0;
+$total=$#unit+1;
+
+if($output_file eq ""){
+ $output_file = "${domain}.xml";
+}
+open(X,">$output_file");
+print X "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n";
+print X "<conceptnetwork id=\"$domain\" unitcount=\"" . ($#unit+1). "\">\n";
+for(my $i=0;$i<=$#unit;$i++){
+ $count++; progress("writing xml",$count,$total,0);
+ print X " <unit id=\"$i\">\n";
+ print X " <term id=\"$i\" freq=\"" . (0+$unit_f[$i]) . "\" cfreq=\"" .
+ (0+$unit_qfc[$i]) . "\" qfreq=\"" . (0+$unit_qf[$i]) . "\" gfreq=\"" .
+ ($do_sqfreq? (0+$unit_qfs[$i]) : 0) . "\">" . $unit[$i] . "</term>\n";
+ print X " <extensions>\n";
+ if(defined($ext[$i]) && $ext[$i] ne ""){
+ chop($ext[$i]);
+ my @us = split(/[ ,]/,$ext[$i]);
+ for(my $j=0;$j<$#us+1;$j+=2){
+ print X " <term id=\"".$us[$j]."\" freq=\"".$us[$j+1]."\">".$unit[$us[$j]]."</term>\n";
+ }
+ }
+ print X " </extensions>\n";
+ print X " <associations>\n";
+ if(defined($assoc[$i]) && $assoc[$i] ne ""){
+ chop($assoc[$i]);
+ my @usr = split(/[ ,]/,$assoc[$i]);
+ my (@us) = aggregate(@usr);
+ for(my $j=0;$j<$#us+1;$j+=2){
+ print X " <term id=\"".$us[$j]."\" freq=\"".$us[$j+1]."\">".$unit[$us[$j]]."</term>\n";
+ }
+ }
+ print X " </associations>\n";
+ print X " <categories>\n";
+ if(defined($ucats[$i]) && $ucats[$i] ne ""){
+ my @ucs = split(/,/,$ucats[$i]);
+ foreach my $c (@ucs){
+ print X " <category id=\"$c\">$cats[$c]</category>\n";
+ }
+ }
+ print X " </categories>\n";
+}
+progress("writing xml",$count,$total,1);
+print X " </unit>\n";
+print X "</conceptnetwork>\n";
+close(X);
+
+__END__
+
+=head1 NAME
+
+cn_txt2xml - Convert a concept network to single XML file.
+
+=head1 SYNOPSIS
+
+cn_txt2xml [options] domain
+
+Options:
+
+ --[no]qfreq, -[no]q [do not] retrieve query frequencies
+ --[no]sqfreq, -[no]s [do not] retrieve term-sorted query frequencies
+ --[no]ext, -[no]e [do not] process extensions
+ --[no]assoc, -[no]a [do not] process associations
+ --[no]cat, -[no]c [do not] process categories
+ --[no]fsa, -[no]f [do not] build fsa
+ --stopwords=FILE, -w FILE use the given stopwords file
+ --output-file, -o output file
+ --verbose, -v be verbose
+ --help, -h brief help message
+ --man, -m full documentation
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<-help>
+
+Print a brief help message and exits.
+
+=item B<-man>
+
+Prints the manual page and exits.
+
+=back
+
+=head1 DESCRIPTION
+
+B<This program> will convert a concept network to a single XML file.
+useful with the contents thereof.
+
+=cut
+
diff --git a/fsa/src/util/cn_xml2dat b/fsa/src/util/cn_xml2dat
new file mode 100755
index 00000000000..4394c201da8
--- /dev/null
+++ b/fsa/src/util/cn_xml2dat
@@ -0,0 +1,218 @@
+#!/usr/bin/perl
+
+use strict;
+
+use FSA;
+use BerkeleyDB;
+use Getopt::Long;
+use Pod::Usage;
+
+#
+# Process command line options.
+#
+
+my $help = 0;
+my $man = 0;
+my $verbose = 0;
+my $input_file = '';
+my $output_file = '';
+
+my $result = GetOptions('help|h' => \$help,
+ 'man|m' => \$man,
+ 'verbose|v' => \$verbose,
+ 'input-file|i=s' => \$input_file,
+ 'output-file|o=s' => \$output_file,
+ );
+
+pod2usage(1) if $help;
+pod2usage(-verbose => 2) if $man;
+
+#
+# Domain is a required parameter.
+#
+
+my $domain = shift || die "need domain";
+
+
+my $MAGIC = 238579428;
+
+#***********************************************************
+#
+# Functions
+#
+#***********************************************************
+
+
+sub msg($@){
+ my $format = shift;
+ if($verbose){
+ printf STDERR $format,@_;
+ }
+}
+
+sub progress($$$$){
+ my ($msg,$cnt,$total,$done) = @_;
+
+ if($done){
+ if($total>0){
+ msg("\r%s ... %d of %d (%.2f%%) ... done.\n",$msg,$cnt,$total,100.0*$cnt/$total);
+ }
+ else {
+ msg("\r%s ... %d ... done.\n",$msg,$cnt);
+ }
+ }
+ elsif($cnt%1000==0){
+ if($total>0){
+ msg("\r%s ... %d of %d (%.2f%%)",$msg,$cnt,$total,100.0*$cnt/$total);
+ }
+ else {
+ msg("\r%s ... %d",$msg,$cnt);
+ }
+ }
+}
+
+my @cats = ();
+
+my $index = "";
+my $extinfo = pack('L',0); # pack dummy word to make it easy to find empties
+my $unitstr = "";
+my $catindex = "";
+
+my $extptr = 1;
+my $strptr = 0;
+
+my $maxfrq;
+my $maxcfrq;
+my $maxqfrq;
+my $maxsfrq;
+my $maxefrq;
+my $maxafrq;
+$maxfrq = $maxcfrq = $maxqfrq = $maxsfrq = $maxefrq = $maxafrq = 0;
+
+
+my $count=0;
+my @ext;
+
+if($input_file eq ""){
+ $input_file = "${domain}.xml";
+}
+open(X,"$input_file");
+my $line = <X>;
+$line = <X>;
+my ($cnid,$total) = $line=~/<conceptnetwork id=\"([^\"]*)\" unitcount=\"(\d*)\">/;
+die "missing unit count ($total)" if($total<=0);
+if($cnid ne $domain){
+ msg("Warning! Domain \"%s\" does not match concept network id \"%s\".\n",$domain,$cnid);
+}
+while(<X>){
+ if(/^\s*<unit/){
+ $count++; progress("reading xml",$count,$total,0);
+ $line = <X>;
+ my ($id,$frq,$cfrq,$qfrq,$sfrq,$term) = $line=~/^\s*<term id=\"(\d*)\" freq=\"(\d*)\" cfreq=\"(\d*)\" qfreq=\"(\d*)\" gfreq=\"(\d*)\">([^<]*)<\/term>/;
+
+ if($frq>$maxfrq) { $maxfrq = $frq; }
+ if($cfrq>$maxcfrq) { $maxcfrq = $cfrq; }
+ if($qfrq>$maxqfrq) { $maxqfrq = $qfrq; }
+ if($sfrq>$maxsfrq) { $maxsfrq = $sfrq; }
+
+ $index .= pack('L',$strptr); # pack term
+ $unitstr .= pack('Z*',$term);
+ $strptr = length($unitstr);
+ $index .= pack('L',$frq); # pack frq
+ $index .= pack('L',$cfrq); # pack frq
+ $index .= pack('L',$qfrq); # pack frq
+ $index .= pack('L',$sfrq); # pack frq
+
+ $line = <X>;
+ @ext = ();
+ EXT:
+ while($line = <X>){
+ last EXT if($line=~/<\/extensions>/);
+ my ($id,$efrq) = ($line=~/^\s*<term id=\"(\d*)\" freq=\"(\d*)\">/);
+ push(@ext,$id);
+ push(@ext,$efrq);
+ if($efrq>$maxefrq) { $maxefrq = $efrq; }
+ }
+
+ if($#ext==-1){
+ $index .= pack('L',0); # pack empty ext
+ }
+ else {
+ $index .= pack('L',$extptr); # pack ext
+ $extinfo .= pack('L',($#ext+1)/2);
+ $extinfo .= pack('L*',@ext);
+ $extptr += $#ext+2;
+ }
+
+ $line = <X>;
+ @ext = ();
+ ASSOC:
+ while($line = <X>){
+ last ASSOC if($line=~/<\/associations>/);
+ my ($id,$afrq) = $line=~/^\s*<term id=\"(\d*)\" freq=\"(\d*)\">/;
+ push(@ext,$id);
+ push(@ext,$afrq);
+ if($afrq>$maxafrq) { $maxafrq = $afrq; }
+ }
+
+ if($#ext==-1){
+ $index .= pack('L',0); # pack empty assoc
+ }
+ else {
+ $index .= pack('L',$extptr); # pack assoc
+ $extinfo .= pack('L',($#ext+1)/2);
+ $extinfo .= pack('L*',@ext);
+ $extptr += $#ext+2;
+ }
+
+ $line = <X>;
+ @ext = ();
+ CAT:
+ while($line = <X>){
+ last CAT if($line=~/<\/categories>/);
+ my ($id,$cat) = $line=~/^\s*<category id=\"(\d*)\">([^<]*)<\/category>/;
+ if(!defined($cats[$id])){
+ $cats[$id] = $cat;
+ }
+ push(@ext,$id);
+ }
+
+ if($#ext==-1){
+ $index .= pack('L',0); # pack empty cat
+ }
+ else {
+ $index .= pack('L',$extptr); # pack cat
+ $extinfo .= pack('L',$#ext+1);
+ $extinfo .= pack('L*',@ext);
+ $extptr += $#ext+2;
+ }
+
+ }
+}
+close(X);
+progress("reading xml",$count,$total,1);
+
+for(my $i=0;$i<=$#cats;$i++){
+ $catindex .= pack('L',$strptr); # pack category names
+ $unitstr .= pack('Z*',$cats[$i]);
+ $strptr = length($unitstr);
+}
+
+
+msg("writing data file ... ");
+if($output_file eq ""){
+ $output_file = "$domain.dat";
+}
+open(DAT,">$output_file");
+my $header = pack('L64',$MAGIC,0,0,
+ $count,$extptr,$#cats+1,$strptr,
+ $maxfrq,$maxcfrq,$maxqfrq,$maxsfrq,$maxefrq,$maxafrq,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+print DAT $header;
+print DAT $index;
+print DAT $extinfo;
+print DAT $catindex;
+print DAT $unitstr;
+close(DAT);
+msg("done.\n");
diff --git a/fsa/src/util/fsadump/.gitignore b/fsa/src/util/fsadump/.gitignore
new file mode 100644
index 00000000000..ba07f8761d4
--- /dev/null
+++ b/fsa/src/util/fsadump/.gitignore
@@ -0,0 +1,5 @@
+.deps
+.libs
+Makefile
+Makefile.in
+fsadump
diff --git a/fsa/src/util/fsainfo/.gitignore b/fsa/src/util/fsainfo/.gitignore
new file mode 100644
index 00000000000..bf788157708
--- /dev/null
+++ b/fsa/src/util/fsainfo/.gitignore
@@ -0,0 +1,5 @@
+.deps
+.libs
+Makefile
+Makefile.in
+fsainfo
diff --git a/fsa/src/util/makefsa/.gitignore b/fsa/src/util/makefsa/.gitignore
new file mode 100644
index 00000000000..83748e6a3e6
--- /dev/null
+++ b/fsa/src/util/makefsa/.gitignore
@@ -0,0 +1,5 @@
+.deps
+.libs
+Makefile
+Makefile.in
+makefsa
diff --git a/fsa/src/vespa/.gitignore b/fsa/src/vespa/.gitignore
new file mode 100644
index 00000000000..a728d158730
--- /dev/null
+++ b/fsa/src/vespa/.gitignore
@@ -0,0 +1,3 @@
+Makefile
+.depend
+libfsa*.so.*
diff --git a/fsa/src/vespa/fsa/CMakeLists.txt b/fsa/src/vespa/fsa/CMakeLists.txt
new file mode 100644
index 00000000000..dff59686894
--- /dev/null
+++ b/fsa/src/vespa/fsa/CMakeLists.txt
@@ -0,0 +1,44 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(fsa
+ SOURCES
+ automaton.cpp
+ base64.cpp
+ blob.cpp
+ conceptnet.cpp
+ detector.cpp
+ fsa.cpp
+ metadata.cpp
+ ngram.cpp
+ permuter.cpp
+ segmenter.cpp
+ selector.cpp
+ unicode.cpp
+ unicode_charprops.cpp
+ unicode_lowercase.cpp
+ unicode_tables.cpp
+ vectorizer.cpp
+ wordchartokenizer.cpp
+ INSTALL lib64
+ DEPENDS
+)
+
+install(FILES
+ automaton.h
+ base64.h
+ blob.h
+ checksum.h
+ conceptnet.h
+ detector.h
+ file.h
+ fsa.h
+ metadata.h
+ ngram.h
+ permuter.h
+ segmenter.h
+ selector.h
+ timestamp.h
+ tokenizer.h
+ unicode.h
+ vectorizer.h
+ wordchartokenizer.h
+ DESTINATION include/vespa/fsa)
diff --git a/fsa/src/vespa/fsa/automaton-alternate.cpp b/fsa/src/vespa/fsa/automaton-alternate.cpp
new file mode 100644
index 00000000000..c753ba9f844
--- /dev/null
+++ b/fsa/src/vespa/fsa/automaton-alternate.cpp
@@ -0,0 +1,846 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h> // for ;:read(), ::write(), etc.
+#include <sys/stat.h>
+#include <algorithm> // for std::sort<>(), std::equal_range<>()
+
+#include "fsa.h"
+#include "automaton.h"
+#include "checksum.h"
+
+
+namespace fsa {
+
+// {{{ constants
+
+const uint32_t Automaton::PackedAutomaton::_ALLOC_CELLS;
+const uint32_t Automaton::PackedAutomaton::_ALLOC_BLOB;
+const uint32_t Automaton::PackedAutomaton::_BACKCHECK;
+
+const Blob Automaton::EMPTY_BLOB("");
+
+// }}}
+
+// {{{ Automaton::TransitionList::operator<()
+
+bool Automaton::TransitionList::operator<(const Automaton::TransitionList& tl) const
+{
+ if(this==&tl) return false;
+ if(_size<tl._size) return true;
+ if(_size>tl._size) return false;
+ for(unsigned int i=0; i<_size;i++){
+ if(_trans[i]._symbol<tl._trans[i]._symbol) return true;
+ if(_trans[i]._symbol>tl._trans[i]._symbol) return false;
+ if(_trans[i]._state<tl._trans[i]._state) return true;
+ if(_trans[i]._state>tl._trans[i]._state) return false;
+ }
+ return false;
+}
+
+// }}}
+// {{{ Automaton::TransitionList::operator>()
+
+bool Automaton::TransitionList::operator>(const Automaton::TransitionList& tl) const
+{
+ if(this==&tl) return false;
+ if(_size>tl._size) return true;
+ if(_size<tl._size) return false;
+ for(unsigned int i=0; i<_size;i++){
+ if(_trans[i]._symbol>tl._trans[i]._symbol) return true;
+ if(_trans[i]._symbol<tl._trans[i]._symbol) return false;
+ if(_trans[i]._state>tl._trans[i]._state) return true;
+ if(_trans[i]._state<tl._trans[i]._state) return false;
+ }
+ return false;
+}
+
+// }}}
+// {{{ Automaton::TransitionList::operator==()
+
+bool Automaton::TransitionList::operator==(const Automaton::TransitionList& tl) const
+{
+ if(this==&tl) return true;
+ if(_size!=tl._size) return false;
+ for(unsigned int i=0; i<_size;i++){
+ if(_trans[i]._symbol!=tl._trans[i]._symbol) return false;
+ if(_trans[i]._state!=tl._trans[i]._state) return false;
+ }
+ return true;
+}
+
+// }}}
+
+// {{{ Automaton::PackedAutomaton::reset()
+
+void Automaton::PackedAutomaton::reset()
+{
+ _packable = false;
+ _blob_map.clear();
+ if(_packed_ptr!=NULL){
+ free(_packed_ptr);
+ _packed_ptr=NULL;
+ }
+ if(_packed_idx!=NULL){
+ if(sizeof(State*)!=sizeof(state_t)){
+ free(_packed_idx);
+ }
+ _packed_idx=NULL;
+ }
+ if(_symbol!=NULL){
+ free(_symbol);
+ _symbol=NULL;
+ }
+ if(_used!=NULL){
+ free(_used);
+ _used=NULL;
+ }
+ if(_perf_hash!=NULL){
+ free(_perf_hash);
+ _perf_hash=NULL;
+ }
+ if(_totals!=NULL){
+ free(_totals);
+ _totals=NULL;
+ }
+ _packed_size=0;
+ _last_packed=0;
+ if(_blob!=NULL){
+ free(_blob);
+ _blob=NULL;
+ }
+ _blob_size=0;
+ _blob_used=0;
+ _blob_type=FSA::DATA_VARIABLE;
+ _fixed_blob_size=0;
+ _start_state=0;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::init()
+
+void Automaton::PackedAutomaton::init()
+{
+ reset();
+
+ _packed_ptr = (State**)malloc(_ALLOC_CELLS*sizeof(State*));
+ if(sizeof(State*)!=sizeof(state_t)){
+ _packed_idx = (state_t*)malloc(_ALLOC_CELLS*sizeof(state_t));
+ }
+ else {
+ _packed_idx = (state_t*)_packed_ptr;
+ }
+ _symbol = (symbol_t*)malloc(_ALLOC_CELLS*sizeof(symbol_t));
+ _used = (bool*)malloc(_ALLOC_CELLS*sizeof(bool));
+ _packed_size = _ALLOC_CELLS;
+
+ assert(_packed_ptr!=NULL && _packed_idx!=NULL && _symbol!=NULL && _used!=NULL);
+
+ for(uint32_t i=0;i<_packed_size;i++){
+ _used[i] = false;
+ _symbol[i] = FSA::EMPTY_SYMBOL;
+ _packed_ptr[i] = NULL;
+ }
+ if(sizeof(State*)!=sizeof(state_t)){
+ for(uint32_t i=0;i<_packed_size;i++){
+ _packed_idx[i] = 0;
+ }
+ }
+
+ _blob = (data_t*)malloc(_ALLOC_BLOB);
+ _blob_size = _ALLOC_BLOB;
+
+ assert(_blob!=NULL);
+
+ _packable = true;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::expandCells()
+
+void Automaton::PackedAutomaton::expandCells()
+{
+ uint32_t i;
+
+ _packed_ptr = (State**)realloc(_packed_ptr,(_packed_size+_ALLOC_CELLS)*sizeof(State*));
+ if(sizeof(State*)!=sizeof(state_t)){
+ _packed_idx = (state_t*)realloc(_packed_idx,(_packed_size+_ALLOC_CELLS)*sizeof(state_t));
+ }
+ else {
+ _packed_idx = (state_t*)_packed_ptr;
+ }
+ _symbol = (symbol_t*)realloc(_symbol,(_packed_size+_ALLOC_CELLS)*sizeof(symbol_t));
+ _used = (bool*)realloc(_used,(_packed_size+_ALLOC_CELLS)*sizeof(bool));
+
+ assert(_packed_ptr!=NULL && _packed_idx!=NULL && _symbol!=NULL && _used!=NULL);
+
+ for(i=_packed_size;i<_packed_size+_ALLOC_CELLS;i++){
+ _used[i] = false;
+ _symbol[i] = FSA::EMPTY_SYMBOL;
+ _packed_ptr[i] = NULL;
+ if(sizeof(State*)!=sizeof(state_t)){
+ _packed_idx[i] = 0;
+ }
+ }
+ _packed_size += _ALLOC_CELLS;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::expandBlob()
+
+void Automaton::PackedAutomaton::expandBlob(uint32_t minExpand)
+{
+ uint32_t expand=(minExpand/_ALLOC_BLOB+1)*_ALLOC_BLOB;
+
+ _blob = (data_t*)realloc(_blob,_blob_size+expand);
+
+ assert(_blob!=NULL);
+
+ _blob_size += expand;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::getEmptyCell()
+
+uint32_t Automaton::PackedAutomaton::getEmptyCell()
+{
+ unsigned int cell = _last_packed>_BACKCHECK?_last_packed-_BACKCHECK:1;
+ while(_used[cell]){
+ cell++;
+ if(cell+256>=_packed_size)
+ expandCells();
+ }
+
+ _used[cell] = true;
+
+ return cell;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::getCell()
+
+uint32_t Automaton::PackedAutomaton::getCell(const Automaton::SymList &t)
+{
+ SymListConstIterator tit;
+ uint32_t cell = _last_packed>_BACKCHECK?_last_packed-_BACKCHECK:1;
+ bool found = false;
+ while(!found){
+ if(!_used[cell]){
+ if(cell+256>=_packed_size)
+ expandCells();
+ for(tit=t.begin();tit!=t.end();++tit){
+ if(_symbol[cell+*tit]!=FSA::EMPTY_SYMBOL)
+ break;
+ }
+ if(tit==t.end())
+ found=true;
+ }
+ if(!found){
+ cell++;
+ if(cell>=_packed_size)
+ expandCells();
+ }
+ }
+ _used[cell] = true;
+ for(tit=t.begin();tit!=t.end();++tit){
+ _symbol[cell+*tit] = *tit;
+ }
+
+ return cell;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::packState()
+
+bool Automaton::PackedAutomaton::packState(Automaton::StateCellArrayIterator &it)
+{
+ SymList transitions;
+ uint32_t cell;
+ size_t i;
+ const TransitionList &tlist = it->state->getTransitionList();
+
+ if(_packable){
+ if(tlist.size()==0){
+ cell = getEmptyCell();
+ }
+ else{
+ for(i=0; i<tlist.size(); i++){
+ transitions.push_back(tlist[i]._symbol);
+ }
+ transitions.sort();
+ cell = getCell(transitions);
+ for(i=0; i<tlist.size(); i++){
+ if(tlist[i]._symbol==FSA::FINAL_SYMBOL){
+ _packed_idx[cell+FSA::FINAL_SYMBOL] = packBlob(tlist[i]._state->getBlob());
+ }
+ else{
+ _packed_ptr[cell+tlist[i]._symbol] = tlist[i]._state;
+ }
+ }
+ }
+
+ it->cell = cell;
+ if(cell>_last_packed)
+ _last_packed = cell;
+
+ return true;
+ }
+
+ return false;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::packBlob()
+
+static const Blob nullBlob;
+
+uint32_t Automaton::PackedAutomaton::packBlob(const Blob *b)
+{
+ PackMapIterator pi = _blob_map.find(b);
+ if(pi!=_blob_map.end()){
+ return pi->second;
+ }
+ else {
+ uint32_t cell=_blob_used;
+ _blob_map[b]=cell;
+ if(b==NULL){
+ b=&nullBlob;
+ }
+ uint32_t size=b->size();
+ if(_blob_used+size+sizeof(uint32_t)>_blob_size)
+ expandBlob(size+sizeof(uint32_t));
+ memcpy(_blob+_blob_used,&size,sizeof(uint32_t));
+ memcpy(_blob+_blob_used+sizeof(uint32_t),b->data(),size);
+ _blob_used += size+sizeof(uint32_t);
+
+ return cell;
+ }
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::finalize()
+
+void Automaton::PackedAutomaton::finalize(const StateCellArray &queue)
+{
+ uint32_t i;
+
+ if(_packable){
+ for(i=0;i<_last_packed+256;i++){
+ if(i>=_packed_size) // this shouldn't happen anymore, but check anyway
+ expandCells();
+ if(_symbol[i]!=FSA::EMPTY_SYMBOL && _symbol[i]!=FSA::FINAL_SYMBOL){
+ //@@@@@@ probably faster to write a custom binary search
+ _packed_idx[i] = std::equal_range(queue.begin(), queue.end(), StateCellArrayItem(_packed_ptr[i]), StateCellArrayLess()).first->cell;
+ }
+ }
+
+ // compact blobs if the size is constant
+ std::map<uint32_t,uint32_t> bcomp;
+ std::map<uint32_t,uint32_t>::iterator bcomp_it;
+ bcomp[0]=0;
+ uint32_t lastsize = *((uint32_t*)_blob), currsize;
+ uint32_t i=lastsize+sizeof(uint32_t);
+ uint32_t j=lastsize;
+ bool fixedsize = true;
+ while(i<_blob_used){
+ currsize = *((uint32_t*)(_blob+i));
+ if(currsize!=lastsize){
+ fixedsize = false;
+ break;
+ }
+ bcomp[i]=j;
+ i+=currsize+sizeof(uint32_t);
+ j+=currsize;
+ }
+ if(fixedsize){
+ _blob_type = FSA::DATA_FIXED;
+ _fixed_blob_size = lastsize;
+ _blob_used = j;
+ for(i=0;i<_last_packed+256;i++){
+ if(_symbol[i]==FSA::FINAL_SYMBOL){
+ _packed_idx[i] = bcomp[_packed_idx[i]];
+ }
+ }
+
+ for(bcomp_it = bcomp.begin(); bcomp_it!=bcomp.end(); ++bcomp_it){
+ memmove(_blob+(bcomp_it->second),_blob+(bcomp_it->first+sizeof(uint32_t)),lastsize);
+ }
+ }
+
+ _packable = false;
+ }
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::computePerfectHash()
+
+hash_t Automaton::PackedAutomaton::computePerfectHash(state_t state)
+{
+ symbol_t s;
+ hash_t count;
+
+ if(_totals[state]!=0){
+ return _totals[state];
+ }
+
+ count = (_symbol[state+FSA::FINAL_SYMBOL]==FSA::FINAL_SYMBOL) ? 1 : 0;
+
+ for(s=1;s<=254;s++){
+ if(_symbol[state+s]==s){
+ _perf_hash[state+s] = count;
+ count += computePerfectHash(_packed_idx[state+s]);
+ }
+ }
+
+ _totals[state] = count;
+
+ return count;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::addPerfectHash()
+
+void Automaton::PackedAutomaton::addPerfectHash()
+{
+ if(_last_packed==0 || _packable){
+ // do nothing with an empty automaton or one which has not been finalized
+ return;
+ }
+
+ uint32_t size = _last_packed+256;
+
+ _perf_hash = (hash_t*)malloc(size*sizeof(hash_t));
+ _totals = (hash_t*)malloc(size*sizeof(hash_t));
+
+ assert(_perf_hash!=NULL && _totals!=NULL);
+
+ for(unsigned int i=0;i<size;i++){
+ _perf_hash[i] = 0;
+ _totals[i] = 0;
+ }
+
+ computePerfectHash(_start_state);
+
+ free(_totals); _totals=NULL;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::lookup()
+
+const data_t* Automaton::PackedAutomaton::lookup(const char *input) const
+{
+ if(_packable || _start_state==0){
+ return NULL;
+ }
+ state_t state = _start_state;
+ const char *p=input;
+ while(*p){
+ if(_symbol[state+*p]==*p){
+ state=_packed_idx[state+*p];
+ p++;
+ }
+ else{
+ return NULL;
+ }
+ }
+ if(_symbol[state+FSA::FINAL_SYMBOL]==FSA::FINAL_SYMBOL){
+ return _blob+_packed_idx[state+FSA::FINAL_SYMBOL];
+ }
+ return NULL;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::write()
+
+bool Automaton::PackedAutomaton::write(const char *filename, uint32_t serial)
+{
+ if(_packable || _packed_size==0) // must be non-empty and finalized
+ return false;
+
+ FSA::Header header;
+
+ header._magic = FSA::MAGIC;
+ header._version = FSA::VER;
+ header._checksum = 0;
+ header._size = _last_packed+256;
+ header._start = _start_state;
+ header._data_size = _blob_used;
+ header._data_type = _blob_type;
+ header._fixed_data_size = _fixed_blob_size;
+ header._has_perfect_hash = (_perf_hash==NULL) ? 0 : 1;
+ header._serial = serial;
+ memset(&(header._reserved), 0, sizeof(header._reserved));
+
+ int fd = open(filename,O_CREAT|O_TRUNC|O_RDWR,S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
+ if(fd<0) return false;
+
+ header._checksum += Checksum::compute(_symbol,header._size*sizeof(symbol_t));
+ header._checksum += Checksum::compute(_packed_idx,header._size*sizeof(state_t));
+ header._checksum += Checksum::compute(_blob,_blob_used);
+ if(header._has_perfect_hash){
+ header._checksum += Checksum::compute(_perf_hash,header._size*sizeof(hash_t));
+ }
+
+ ::write(fd,&header,sizeof(header));
+ ::write(fd,_symbol,header._size*(sizeof(symbol_t)));
+ ::write(fd,_packed_idx,header._size*(sizeof(state_t)));
+ ::write(fd,_blob,_blob_used);
+ if(header._has_perfect_hash){
+ ::write(fd,_perf_hash,header._size*(sizeof(hash_t)));
+ }
+ close(fd);
+
+ return true;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::read()
+
+bool Automaton::PackedAutomaton::read(const char *filename)
+{
+ FSA::Header header;
+ size_t r;
+
+ reset();
+ int fd = ::open(filename,O_RDONLY);
+ if(fd<0){
+ return false;
+ }
+ r=::read(fd,&header,sizeof(header));
+ if(r<sizeof(header) || header._magic!=FSA::MAGIC){
+ ::close(fd);
+ return false;
+ }
+
+ _packable = false;
+ _packed_size = header._size;
+ _last_packed = _packed_size-256;
+ _blob_size = header._data_size;
+ _blob_used = header._data_size;
+ _blob_type = header._data_type;
+ _fixed_blob_size = header._fixed_data_size;
+ _start_state = header._start;
+
+ _symbol = (symbol_t*)malloc(_packed_size*sizeof(symbol_t));
+ assert(_symbol!=NULL);
+ ::read(fd,_symbol,_packed_size*(sizeof(symbol_t)));
+ _packed_idx = (state_t*)malloc(_packed_size*sizeof(state_t));
+ assert(_packed_idx!=NULL);
+ ::read(fd,_packed_idx,_packed_size*(sizeof(state_t)));
+ _blob = (data_t*)malloc(_blob_used);
+ assert(_blob!=NULL);
+ ::read(fd,_blob,_blob_used);
+ if(header._has_perfect_hash){
+ _perf_hash = (hash_t*)malloc(_packed_size*sizeof(hash_t));
+ assert(_perf_hash!=NULL);
+ ::read(fd,_perf_hash,_packed_size*(sizeof(hash_t)));
+ }
+
+ ::close(fd);
+
+ return true;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::getFSA()
+
+bool Automaton::PackedAutomaton::getFSA(FSA::Descriptor &d)
+{
+ if(_packable || _packed_size==0) // must be non-empty and finalized
+ return false;
+
+ uint32_t size = _last_packed+256;
+
+ _symbol = (symbol_t*)realloc(_symbol,size*sizeof(symbol_t));
+ _packed_idx = (state_t*)realloc(_packed_idx,size*sizeof(state_t));
+ _blob = (data_t*)realloc(_blob,_blob_used);
+ if(_perf_hash!=NULL){
+ _perf_hash = (hash_t*)realloc(_perf_hash,size*sizeof(hash_t));
+ }
+
+ d._version = FSA::VER;
+ d._serial = 0;
+ d._state = _packed_idx;
+ d._symbol = _symbol;
+ d._size = size;
+ d._data = _blob;
+ d._data_size = _blob_used;
+ d._data_type = _blob_type;
+ d._fixed_data_size = _fixed_blob_size;
+ d._perf_hash = _perf_hash;
+ d._start = _start_state;
+
+ _symbol = NULL;
+ _packed_idx = NULL;
+ if(sizeof(State*)==sizeof(state_t)){ // _packed_idx and _packed_ptr are overlayed
+ _packed_ptr=NULL;
+ }
+ _blob = NULL;
+ _perf_hash = NULL;
+ reset();
+
+ return true;
+}
+
+// }}}
+
+// {{{ Automaton::cleanUp()
+
+void Automaton::cleanUp()
+{
+ if(_q0!=NULL){
+ finalize(); // make sure all states are in _register
+ for(BlobRegisterIterator bi = _blob_register.begin(); bi!=_blob_register.end(); ++bi){
+ delete bi->second;
+ }
+ _blob_register.clear(); // clear _blob_register
+ // clear _register and remove all states
+#if 0
+ // In the previous 1-pass method (without _queue), the _register owned
+ // the memory for all states so we cleaned up this way:
+ for(RegisterIterator ri = _register.begin(); ri!=_register.end(); ++ri){
+ delete ri->second;
+ }
+#else
+ if(_queue) {
+ for(StateArrayIterator qi=_queue->begin(); qi!=_queue->end(); ++qi){
+ if(*qi!=_q0) // _q0 may or may not be in the queue so we don't want to double-free it
+ delete *qi;
+ }
+ delete _queue;
+ _queue = NULL;
+ }
+#endif
+ delete _register;
+ _register = NULL;
+ delete _q0;
+ _q0 = NULL;
+ }
+}
+
+// }}}
+// {{{ Automaton::~Automaton()
+
+Automaton::~Automaton()
+{
+ cleanUp();
+}
+
+// }}}
+// {{{ Automaton::getCPLastState()
+
+Automaton::State* Automaton::getCPLastState(const char *input, const char *&suffix)
+{
+ if(_q0==NULL) return NULL;
+
+ unsigned int l=0;
+ State* state = _q0;
+ State* next;
+ while(input[l]!=0){
+ next = state->child(input[l]);
+ if(next==NULL){
+ suffix=input+l;
+ return state;
+ }
+ state=next;
+ l++;
+ }
+ suffix=input+l;
+ return state;
+}
+
+// }}}
+// {{{ Automaton::addSuffix()
+
+void Automaton::addSuffix(State* state, const char *suffix, const Blob *b)
+{
+ State* current = state;
+ State* child;
+
+ while(*suffix != 0){
+ child = current->addEmptyChild(*suffix);
+ current = child;
+ suffix++;
+ }
+ BlobRegisterIterator bi;
+ if(b!=NULL)
+ bi = _blob_register.find(*b);
+ else
+ bi = _blob_register.find(EMPTY_BLOB);
+ if(bi!=_blob_register.end()){
+ child = bi->second;
+ current->addChild(FSA::FINAL_SYMBOL,child);
+ }
+ else {
+ const Blob *bcopy = (b==NULL) ? new Blob(EMPTY_BLOB) : new Blob(*b);
+ assert(bcopy!=NULL);
+ child = current->addEmptyChild(FSA::FINAL_SYMBOL,bcopy);
+ _blob_register[*bcopy] = child;
+ }
+}
+
+// }}}
+// {{{ Automaton::init()
+
+void Automaton::init()
+{
+ cleanUp();
+ _register = new Register();
+ _q0 = new State();
+ _queue = new StateArray();
+ assert(_q0!=NULL);
+ _finalized = false;
+
+ _packed.init();
+}
+
+// }}}
+// {{{ Automaton::finalize()
+
+void Automaton::finalize()
+{
+ if(!_finalized && _q0!=NULL){
+ replaceOrRegister(_q0);
+ //
+ // 2nd-pass begin; clear the _register to free up memory, then pack queued states:
+ //
+ delete _register;
+ _register = NULL;
+ _queue->push_back(_q0);
+ std::sort(_queue->begin(), _queue->end(), StateArrayLess());
+ // now that _register memory is freed up, transfer StateArray into StateCellArray for packing:
+ StateCellArray queue(_queue->size());
+ for(size_t i=0; i < queue.size(); i++){
+ queue[i].state = _queue->operator[](i);
+ queue[i].cell = 0;
+ }
+ delete _queue;
+ _queue = NULL;
+ for(StateCellArrayIterator it=queue.begin(); it!=queue.end(); ++it){
+ _packed.packState(it);
+ if(it->state == _q0)
+ _packed.setStartState(it->cell);
+ }
+ // clean up queue
+ for(StateCellArrayIterator it=queue.begin(); it!=queue.end(); ++it){
+ if(it->state!=_q0)
+ delete it->state;
+ }
+ //
+ // 2nd-pass end
+ //
+ _packed.finalize(queue);
+ _finalized = true;
+ }
+}
+
+// }}}
+// {{{ Automaton::addPerfectHash()
+
+void Automaton::addPerfectHash()
+{
+ if(_finalized){
+ _packed.addPerfectHash();
+ }
+}
+
+// }}}
+// {{{ Automaton::write()
+
+bool Automaton::write(const char *file, uint32_t serial)
+{
+ if(!_finalized){
+ finalize();
+ }
+ return _packed.write(file,serial);
+}
+
+// }}}
+// {{{ Automaton::getFSA()
+
+FSA* Automaton::getFSA()
+{
+ if(!_finalized){
+ finalize();
+ }
+
+ FSA::Descriptor d;
+
+ if(!_packed.getFSA(d))
+ return NULL;
+
+ FSA *fsa = new FSA(d);
+
+ cleanUp();
+
+ return fsa;
+}
+
+// }}}
+// {{{ Automaton::insertSortedString()
+
+void Automaton::insertSortedString(const std::string &input)
+{
+ insertSortedString(input.c_str());
+}
+
+void Automaton::insertSortedString(const std::string &input, const std::string &meta)
+{
+ Blob b(meta);
+ insertSortedString(input.c_str(),&b);
+}
+
+void Automaton::insertSortedString(const char *input, const Blob& b)
+{
+ insertSortedString(input,&b);
+}
+
+void Automaton::insertSortedString(const char *input, const Blob* b)
+{
+ if(_q0==NULL || _finalized) return;
+
+ const char* currentSuffix;
+ State* lastState = getCPLastState(input, currentSuffix);
+
+ if(lastState->hasChildren()){
+ replaceOrRegister(lastState);
+ }
+ addSuffix(lastState,currentSuffix,b);
+}
+
+// }}}
+// {{{ Automaton::replaceOrRegister()
+
+void Automaton::replaceOrRegister(Automaton::State* state)
+{
+ State* child = state->lastChild();
+ if(child!=NULL){
+ if(child->hasChildren()){
+ replaceOrRegister(child);
+ }
+ RegisterIterator ri = _register->find(&(child->getTransitionList()));
+ if(ri!=_register->end() && ri->second!=child){
+ state->updateLastChild(ri->second);
+ delete child;
+ }
+ else {
+ (*_register)[&(child->getTransitionList())] = child;
+#if 0
+ // In the previous 1-pass method (without _queue), we packed states as
+ // we went:
+ _packed.packState(child);
+#else
+ // Now we queue them up to be packed after _register memory is reclaimed:
+ _queue->push_back(child);
+#endif
+ }
+ }
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/automaton.cpp b/fsa/src/vespa/fsa/automaton.cpp
new file mode 100644
index 00000000000..dffca7739ff
--- /dev/null
+++ b/fsa/src/vespa/fsa/automaton.cpp
@@ -0,0 +1,824 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h> // for ;:read(), ::write(), etc.
+#include <sys/stat.h>
+
+#include "fsa.h"
+#include "automaton.h"
+#include "checksum.h"
+
+
+namespace fsa {
+
+// {{{ constants
+
+const uint32_t Automaton::PackedAutomaton::_ALLOC_CELLS;
+const uint32_t Automaton::PackedAutomaton::_ALLOC_BLOB;
+const uint32_t Automaton::PackedAutomaton::_BACKCHECK;
+
+const Blob Automaton::EMPTY_BLOB("");
+
+// }}}
+
+// {{{ Automaton::TransitionList::operator<()
+
+bool Automaton::TransitionList::operator<(const Automaton::TransitionList& tl) const
+{
+ if(this==&tl) return false;
+ if(_size<tl._size) return true;
+ if(_size>tl._size) return false;
+ for(unsigned int i=0; i<_size;i++){
+ if(_trans[i]._symbol<tl._trans[i]._symbol) return true;
+ if(_trans[i]._symbol>tl._trans[i]._symbol) return false;
+ if(_trans[i]._state<tl._trans[i]._state) return true;
+ if(_trans[i]._state>tl._trans[i]._state) return false;
+ }
+ return false;
+}
+
+// }}}
+// {{{ Automaton::TransitionList::operator>()
+
+bool Automaton::TransitionList::operator>(const Automaton::TransitionList& tl) const
+{
+ if(this==&tl) return false;
+ if(_size>tl._size) return true;
+ if(_size<tl._size) return false;
+ for(unsigned int i=0; i<_size;i++){
+ if(_trans[i]._symbol>tl._trans[i]._symbol) return true;
+ if(_trans[i]._symbol<tl._trans[i]._symbol) return false;
+ if(_trans[i]._state>tl._trans[i]._state) return true;
+ if(_trans[i]._state<tl._trans[i]._state) return false;
+ }
+ return false;
+}
+
+// }}}
+// {{{ Automaton::TransitionList::operator==()
+
+bool Automaton::TransitionList::operator==(const Automaton::TransitionList& tl) const
+{
+ if(this==&tl) return true;
+ if(_size!=tl._size) return false;
+ for(unsigned int i=0; i<_size;i++){
+ if(_trans[i]._symbol!=tl._trans[i]._symbol) return false;
+ if(_trans[i]._state!=tl._trans[i]._state) return false;
+ }
+ return true;
+}
+
+// }}}
+
+// {{{ Automaton::PackedAutomaton::reset()
+
+void Automaton::PackedAutomaton::reset()
+{
+ _packable = false;
+ _pack_map.clear();
+ _blob_map.clear();
+ if(_packed_ptr!=NULL){
+ free(_packed_ptr);
+ _packed_ptr=NULL;
+ }
+ if(_packed_idx!=NULL){
+ if(sizeof(State*)!=sizeof(state_t)){
+ free(_packed_idx);
+ }
+ _packed_idx=NULL;
+ }
+ if(_symbol!=NULL){
+ free(_symbol);
+ _symbol=NULL;
+ }
+ if(_used!=NULL){
+ free(_used);
+ _used=NULL;
+ }
+ if(_perf_hash!=NULL){
+ free(_perf_hash);
+ _perf_hash=NULL;
+ }
+ if(_totals!=NULL){
+ free(_totals);
+ _totals=NULL;
+ }
+ _packed_size=0;
+ _last_packed=0;
+ if(_blob!=NULL){
+ free(_blob);
+ _blob=NULL;
+ }
+ _blob_size=0;
+ _blob_used=0;
+ _blob_type=FSA::DATA_VARIABLE;
+ _fixed_blob_size=0;
+ _start_state=0;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::init()
+
+void Automaton::PackedAutomaton::init()
+{
+ reset();
+
+ _packed_ptr = (State**)malloc(_ALLOC_CELLS*sizeof(State*));
+ if(sizeof(State*)!=sizeof(state_t)){
+ _packed_idx = (state_t*)malloc(_ALLOC_CELLS*sizeof(state_t));
+ }
+ else {
+ _packed_idx = (state_t*)_packed_ptr;
+ }
+ _symbol = (symbol_t*)malloc(_ALLOC_CELLS*sizeof(symbol_t));
+ _used = (bool*)malloc(_ALLOC_CELLS*sizeof(bool));
+ _packed_size = _ALLOC_CELLS;
+
+ assert(_packed_ptr!=NULL && _packed_idx!=NULL && _symbol!=NULL && _used!=NULL);
+
+ for(uint32_t i=0;i<_packed_size;i++){
+ _used[i] = false;
+ _symbol[i] = FSA::EMPTY_SYMBOL;
+ _packed_ptr[i] = NULL;
+ if(sizeof(State*)!=sizeof(state_t)){
+ _packed_idx[i] = 0;
+ }
+ }
+
+ _blob = (data_t*)malloc(_ALLOC_BLOB);
+ _blob_size = _ALLOC_BLOB;
+
+ assert(_blob!=NULL);
+
+ _packable = true;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::expandCells()
+
+void Automaton::PackedAutomaton::expandCells()
+{
+ uint32_t i;
+
+ _packed_ptr = (State**)realloc(_packed_ptr,(_packed_size+_ALLOC_CELLS)*sizeof(State*));
+ if(sizeof(State*)!=sizeof(state_t)){
+ _packed_idx = (state_t*)realloc(_packed_idx,(_packed_size+_ALLOC_CELLS)*sizeof(state_t));
+ }
+ else {
+ _packed_idx = (state_t*)_packed_ptr;
+ }
+ _symbol = (symbol_t*)realloc(_symbol,(_packed_size+_ALLOC_CELLS)*sizeof(symbol_t));
+ _used = (bool*)realloc(_used,(_packed_size+_ALLOC_CELLS)*sizeof(bool));
+
+ assert(_packed_ptr!=NULL && _packed_idx!=NULL && _symbol!=NULL && _used!=NULL);
+
+ for(i=_packed_size;i<_packed_size+_ALLOC_CELLS;i++){
+ _used[i] = false;
+ _symbol[i] = FSA::EMPTY_SYMBOL;
+ _packed_ptr[i] = NULL;
+ if(sizeof(State*)!=sizeof(state_t)){
+ _packed_idx[i] = 0;
+ }
+ }
+ _packed_size += _ALLOC_CELLS;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::expandBlob()
+
+void Automaton::PackedAutomaton::expandBlob(uint32_t minExpand)
+{
+ uint32_t expand=(minExpand/_ALLOC_BLOB+1)*_ALLOC_BLOB;
+
+ _blob = (data_t*)realloc(_blob,_blob_size+expand);
+
+ assert(_blob!=NULL);
+
+ _blob_size += expand;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::getEmptyCell()
+
+uint32_t Automaton::PackedAutomaton::getEmptyCell()
+{
+ unsigned int cell = _last_packed>_BACKCHECK?_last_packed-_BACKCHECK:1;
+ while(_used[cell]){
+ cell++;
+ if(cell+256>=_packed_size)
+ expandCells();
+ }
+
+ _used[cell] = true;
+
+ return cell;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::getCell()
+
+uint32_t Automaton::PackedAutomaton::getCell(Automaton::SymList t)
+{
+ SymListIterator tit;
+ uint32_t cell = _last_packed>_BACKCHECK?_last_packed-_BACKCHECK:1;
+ bool found = false;
+ while(!found){
+ if(!_used[cell]){
+ if(cell+256>=_packed_size)
+ expandCells();
+ for(tit=t.begin();tit!=t.end();++tit){
+ if(_symbol[cell+*tit]!=FSA::EMPTY_SYMBOL)
+ break;
+ }
+ if(tit==t.end())
+ found=true;
+ }
+ if(!found){
+ cell++;
+ if(cell>=_packed_size)
+ expandCells();
+ }
+ }
+ _used[cell] = true;
+ for(tit=t.begin();tit!=t.end();++tit){
+ _symbol[cell+*tit] = *tit;
+ }
+
+ return cell;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::packStartState()
+
+bool Automaton::PackedAutomaton::packStartState(const Automaton::State *s)
+{
+ return packState(s,true);
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::packState()
+
+bool Automaton::PackedAutomaton::packState(const Automaton::State *s, bool start)
+{
+ SymList transitions;
+ uint32_t cell;
+ size_t i;
+
+ if(_packable){
+ if(s->getTransitionList().size()==0){
+ cell = getEmptyCell();
+ }
+ else{
+ for(i=0; i<s->getTransitionList().size(); i++){
+ transitions.push_back(s->getTransitionList()[i]._symbol);
+ }
+ transitions.sort();
+ cell = getCell(transitions);
+ for(i=0; i<s->getTransitionList().size(); i++){
+ if(s->getTransitionList()[i]._symbol==FSA::FINAL_SYMBOL){
+ _packed_idx[cell+FSA::FINAL_SYMBOL] =
+ packBlob(s->getTransitionList()[i]._state->getBlob());
+ }
+ else{
+ _packed_ptr[cell+s->getTransitionList()[i]._symbol] =
+ s->getTransitionList()[i]._state;
+ }
+ }
+ }
+
+ _pack_map[s] = cell;
+ if(cell>_last_packed)
+ _last_packed = cell;
+ if(start)
+ _start_state=(state_t)cell;
+
+ return true;
+ }
+
+ return false;
+
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::packBlob()
+
+static const Blob nullBlob;
+
+uint32_t Automaton::PackedAutomaton::packBlob(const Blob *b)
+{
+ PackMapIterator pi = _blob_map.find(b);
+ if(pi!=_blob_map.end()){
+ return pi->second;
+ }
+ else {
+ uint32_t cell=_blob_used;
+ _blob_map[b]=cell;
+ if(b==NULL){
+ b=&nullBlob;
+ }
+ uint32_t size=b->size();
+ if(_blob_used+size+sizeof(uint32_t)>_blob_size)
+ expandBlob(size+sizeof(uint32_t));
+ memcpy(_blob+_blob_used,&size,sizeof(uint32_t));
+ memcpy(_blob+_blob_used+sizeof(uint32_t),b->data(),size);
+ _blob_used += size+sizeof(uint32_t);
+
+ return cell;
+ }
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::finalize()
+
+void Automaton::PackedAutomaton::finalize()
+{
+ if(_packable){
+ for(uint32_t i=0;i<_last_packed+256;i++){
+ if(i>=_packed_size) // this shouldn't happen anymore, but check anyway
+ expandCells();
+ if(_symbol[i]!=FSA::EMPTY_SYMBOL && _symbol[i]!=FSA::FINAL_SYMBOL){
+ _packed_idx[i] = _pack_map[_packed_ptr[i]];
+ }
+ }
+
+ // compact blobs if the size is constant
+ std::map<uint32_t,uint32_t> bcomp;
+ std::map<uint32_t,uint32_t>::iterator bcomp_it;
+ bcomp[0]=0;
+ uint32_t lastsize = *((uint32_t*)_blob), currsize;
+ uint32_t i=lastsize+sizeof(uint32_t);
+ uint32_t j=lastsize;
+ bool fixedsize = true;
+ while(i<_blob_used){
+ currsize = *((uint32_t*)(_blob+i));
+ if(currsize!=lastsize){
+ fixedsize = false;
+ break;
+ }
+ bcomp[i]=j;
+ i+=currsize+sizeof(uint32_t);
+ j+=currsize;
+ }
+ if(fixedsize){
+ _blob_type = FSA::DATA_FIXED;
+ _fixed_blob_size = lastsize;
+ _blob_used = j;
+ for(i=0;i<_last_packed+256;i++){
+ if(_symbol[i]==FSA::FINAL_SYMBOL){
+ _packed_idx[i] = bcomp[_packed_idx[i]];
+ }
+ }
+
+ for(bcomp_it = bcomp.begin(); bcomp_it!=bcomp.end(); ++bcomp_it){
+ memmove(_blob+(bcomp_it->second),_blob+(bcomp_it->first+sizeof(uint32_t)),lastsize);
+ }
+ }
+
+ _packable = false;
+ }
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::computePerfectHash()
+
+hash_t Automaton::PackedAutomaton::computePerfectHash(state_t state)
+{
+ symbol_t s;
+ hash_t count;
+
+ if(_totals[state]!=0){
+ return _totals[state];
+ }
+
+ count = (_symbol[state+FSA::FINAL_SYMBOL]==FSA::FINAL_SYMBOL) ? 1 : 0;
+
+ for(s=1;s<=254;s++){
+ if(_symbol[state+s]==s){
+ _perf_hash[state+s] = count;
+ count += computePerfectHash(_packed_idx[state+s]);
+ }
+ }
+
+ _totals[state] = count;
+
+ return count;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::addPerfectHash()
+
+void Automaton::PackedAutomaton::addPerfectHash()
+{
+ if(_last_packed==0 || _packable){
+ // do nothing with an empty automaton or one which has not been finalized
+ return;
+ }
+
+ uint32_t size = _last_packed+256;
+
+ _perf_hash = (hash_t*)malloc(size*sizeof(hash_t));
+ _totals = (hash_t*)malloc(size*sizeof(hash_t));
+
+ assert(_perf_hash!=NULL && _totals!=NULL);
+
+ for(unsigned int i=0;i<size;i++){
+ _perf_hash[i] = 0;
+ _totals[i] = 0;
+ }
+
+ computePerfectHash(_start_state);
+
+ free(_totals); _totals=NULL;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::lookup()
+
+const data_t* Automaton::PackedAutomaton::lookup(const char *input) const
+{
+ if(_packable || _start_state==0){
+ return NULL;
+ }
+ state_t state = _start_state;
+ const char *p=input;
+ while(*p){
+ if(_symbol[state+*p]==*p){
+ state=_packed_idx[state+*p];
+ p++;
+ }
+ else{
+ return NULL;
+ }
+ }
+ if(_symbol[state+FSA::FINAL_SYMBOL]==FSA::FINAL_SYMBOL){
+ return _blob+_packed_idx[state+FSA::FINAL_SYMBOL];
+ }
+ return NULL;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::write()
+
+bool Automaton::PackedAutomaton::write(const char *filename, uint32_t serial)
+{
+ if(_packable || _packed_size==0) // must be non-empty and finalized
+ return false;
+
+ FSA::Header header;
+
+ header._magic = FSA::MAGIC;
+ header._version = FSA::VER;
+ header._checksum = 0;
+ header._size = _last_packed+256;
+ header._start = _start_state;
+ header._data_size = _blob_used;
+ header._data_type = _blob_type;
+ header._fixed_data_size = _fixed_blob_size;
+ header._has_perfect_hash = (_perf_hash==NULL) ? 0 : 1;
+ header._serial = serial;
+ memset(&(header._reserved), 0, sizeof(header._reserved));
+
+ int fd = open(filename,O_CREAT|O_TRUNC|O_RDWR,S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
+ if(fd<0) return false;
+
+ header._checksum += Checksum::compute(_symbol,header._size*sizeof(symbol_t));
+ header._checksum += Checksum::compute(_packed_idx,header._size*sizeof(state_t));
+ header._checksum += Checksum::compute(_blob,_blob_used);
+ if(header._has_perfect_hash){
+ header._checksum += Checksum::compute(_perf_hash,header._size*sizeof(hash_t));
+ }
+
+ ::write(fd,&header,sizeof(header));
+ ::write(fd,_symbol,header._size*(sizeof(symbol_t)));
+ ::write(fd,_packed_idx,header._size*(sizeof(state_t)));
+ ::write(fd,_blob,_blob_used);
+ if(header._has_perfect_hash){
+ ::write(fd,_perf_hash,header._size*(sizeof(hash_t)));
+ }
+ close(fd);
+
+ return true;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::read()
+
+bool Automaton::PackedAutomaton::read(const char *filename)
+{
+ FSA::Header header;
+ size_t r;
+
+ reset();
+ int fd = ::open(filename,O_RDONLY);
+ if(fd<0){
+ return false;
+ }
+ r=::read(fd,&header,sizeof(header));
+ if(r<sizeof(header) || header._magic!=FSA::MAGIC){
+ ::close(fd);
+ return false;
+ }
+
+ _packable = false;
+ _packed_size = header._size;
+ _last_packed = _packed_size-256;
+ _blob_size = header._data_size;
+ _blob_used = header._data_size;
+ _blob_type = header._data_type;
+ _fixed_blob_size = header._fixed_data_size;
+ _start_state = header._start;
+
+ _symbol = (symbol_t*)malloc(_packed_size*sizeof(symbol_t));
+ assert(_symbol!=NULL);
+ ::read(fd,_symbol,_packed_size*(sizeof(symbol_t)));
+ _packed_idx = (state_t*)malloc(_packed_size*sizeof(state_t));
+ assert(_packed_idx!=NULL);
+ ::read(fd,_packed_idx,_packed_size*(sizeof(state_t)));
+ _blob = (data_t*)malloc(_blob_used);
+ assert(_blob!=NULL);
+ ::read(fd,_blob,_blob_used);
+ if(header._has_perfect_hash){
+ _perf_hash = (hash_t*)malloc(_packed_size*sizeof(hash_t));
+ assert(_perf_hash!=NULL);
+ ::read(fd,_perf_hash,_packed_size*(sizeof(hash_t)));
+ }
+
+ ::close(fd);
+
+ return true;
+}
+
+// }}}
+// {{{ Automaton::PackedAutomaton::getFSA()
+
+bool Automaton::PackedAutomaton::getFSA(FSA::Descriptor &d)
+{
+ if(_packable || _packed_size==0) // must be non-empty and finalized
+ return false;
+
+ uint32_t size = _last_packed+256;
+
+ _symbol = (symbol_t*)realloc(_symbol,size*sizeof(symbol_t));
+ _packed_idx = (state_t*)realloc(_packed_idx,size*sizeof(state_t));
+ _blob = (data_t*)realloc(_blob,_blob_used);
+ if(_perf_hash!=NULL){
+ _perf_hash = (hash_t*)realloc(_perf_hash,size*sizeof(hash_t));
+ }
+
+ d._version = FSA::VER;
+ d._serial = 0;
+ d._state = _packed_idx;
+ d._symbol = _symbol;
+ d._size = size;
+ d._data = _blob;
+ d._data_size = _blob_used;
+ d._data_type = _blob_type;
+ d._fixed_data_size = _fixed_blob_size;
+ d._perf_hash = _perf_hash;
+ d._start = _start_state;
+
+ _symbol = NULL;
+ _packed_idx = NULL;
+ if(sizeof(State*)==sizeof(state_t)){ // _packed_idx and _packed_ptr are overlayed
+ _packed_ptr=NULL;
+ }
+ _blob = NULL;
+ _perf_hash = NULL;
+ reset();
+
+ return true;
+}
+
+// }}}
+
+// {{{ Automaton::cleanUp()
+
+void Automaton::cleanUp()
+{
+ if(_q0!=NULL){
+ finalize(); // make sure all states are in _register
+ for(BlobRegisterIterator bi = _blob_register.begin(); bi!=_blob_register.end(); ++bi){
+ delete bi->second;
+ }
+ _blob_register.clear(); // clear _blob_register
+ // clear _register and remove all states
+ for(RegisterIterator ri = _register.begin(); ri!=_register.end(); ++ri){
+ delete ri->second;
+ }
+ _register.clear();
+ delete _q0;
+ _q0 = NULL;
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ _previous_input.clear();
+#else
+ _previous_input = "";
+#endif
+ }
+}
+
+// }}}
+// {{{ Automaton::~Automaton()
+
+Automaton::~Automaton()
+{
+ cleanUp();
+}
+
+// }}}
+// {{{ Automaton::getCPLength()
+
+unsigned int Automaton::getCPLength(const char *input)
+{
+ if(_q0==NULL) return 0;
+
+ unsigned int l=0;
+ State* state = _q0;
+ State* next;
+ while(input[l]!=0){
+ next = state->child(input[l]);
+ if(next==NULL) return l;
+ state=next;
+ l++;
+ }
+ return l;
+}
+
+// }}}
+// {{{ Automaton::getCPLastState()
+
+Automaton::State* Automaton::getCPLastState(const char *input)
+{
+ if(_q0==NULL) return NULL;
+
+ unsigned int l=0;
+ State* state = _q0;
+ State* next;
+ while(input[l]!=0){
+ next = state->child(input[l]);
+ if(next==NULL) return state;
+ state=next;
+ l++;
+ }
+ return state;
+}
+
+// }}}
+// {{{ Automaton::addSuffix()
+
+void Automaton::addSuffix(State* state, const char *suffix, const Blob *b)
+{
+ State* current = state;
+ State* child;
+
+ while(*suffix != 0){
+ child = current->addEmptyChild(*suffix);
+ current = child;
+ suffix++;
+ }
+ BlobRegisterIterator bi;
+ if(b!=NULL)
+ bi = _blob_register.find(*b);
+ else
+ bi = _blob_register.find(EMPTY_BLOB);
+ if(bi!=_blob_register.end()){
+ child = bi->second;
+ current->addChild(FSA::FINAL_SYMBOL,child);
+ }
+ else {
+ const Blob *bcopy = (b==NULL) ? new Blob(EMPTY_BLOB) : new Blob(*b);
+ assert(bcopy!=NULL);
+ child = current->addEmptyChild(FSA::FINAL_SYMBOL,bcopy);
+ _blob_register[*bcopy] = child;
+ }
+}
+
+// }}}
+// {{{ Automaton::init()
+
+void Automaton::init()
+{
+ cleanUp();
+ _q0 = new State();
+ assert(_q0!=NULL);
+ _finalized = false;
+
+ _packed.init();
+}
+
+// }}}
+// {{{ Automaton::finalize()
+
+void Automaton::finalize()
+{
+ if(!_finalized && _q0!=NULL){
+ replaceOrRegister(_q0);
+ _packed.packStartState(_q0);
+ _packed.finalize();
+ _finalized = true;
+ }
+
+}
+
+// }}}
+// {{{ Automaton::addPerfectHash()
+
+void Automaton::addPerfectHash()
+{
+ if(_finalized){
+ _packed.addPerfectHash();
+ }
+}
+
+// }}}
+// {{{ Automaton::write()
+
+bool Automaton::write(const char *file, uint32_t serial)
+{
+ if(!_finalized){
+ finalize();
+ }
+ return _packed.write(file,serial);
+}
+
+// }}}
+// {{{ Automaton::getFSA()
+
+FSA* Automaton::getFSA()
+{
+ if(!_finalized){
+ finalize();
+ }
+
+ FSA::Descriptor d;
+
+ if(!_packed.getFSA(d))
+ return NULL;
+
+ FSA *fsa = new FSA(d);
+
+ cleanUp();
+
+ return fsa;
+}
+
+// }}}
+// {{{ Automaton::insertSortedString()
+
+void Automaton::insertSortedString(const std::string &input)
+{
+ insertSortedString(input.c_str());
+}
+
+void Automaton::insertSortedString(const std::string &input, const std::string &meta)
+{
+ Blob b(meta);
+ insertSortedString(input.c_str(),&b);
+}
+
+void Automaton::insertSortedString(const char *input, const Blob& b)
+{
+ insertSortedString(input,&b);
+}
+
+void Automaton::insertSortedString(const char *input, const Blob* b)
+{
+ if(_q0==NULL || _finalized) return;
+
+ State* lastState = getCPLastState(input);
+ const char* currentSuffix = input + getCPLength(input);
+
+ if(lastState->hasChildren()){
+ replaceOrRegister(lastState);
+ }
+ addSuffix(lastState,currentSuffix,b);
+}
+
+// }}}
+// {{{ Automaton::replaceOrRegister()
+
+void Automaton::replaceOrRegister(Automaton::State* state)
+{
+ State* child = state->lastChild();
+ if(child!=NULL){
+ if(child->hasChildren()){
+ replaceOrRegister(child);
+ }
+ RegisterIterator ri = _register.find(TListPtr(&(child->getTransitionList())));
+ if(ri!=_register.end() && ri->second!=child){
+ state->updateLastChild(ri->second);
+ delete child;
+ }
+ else {
+ _register[TListPtr(&(child->getTransitionList()))] = child;
+ _packed.packState(child);
+ }
+ }
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/automaton.h b/fsa/src/vespa/fsa/automaton.h
new file mode 100644
index 00000000000..089c7784a0d
--- /dev/null
+++ b/fsa/src/vespa/fsa/automaton.h
@@ -0,0 +1,970 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file automaton.h
+ * @brief Definition of the classes used for %FSA (%Finite %State %Automaton) construction
+ *
+ */
+
+#pragma once
+
+#include <map>
+#include <list>
+#include <string>
+#include <assert.h>
+
+#include "blob.h"
+#include "fsa.h"
+
+namespace fsa {
+
+// {{{ Automaton
+/**
+ * @class Automaton
+ * @brief %FSA (%Finite %State %Automaton) construction class.
+ *
+ * The Automaton class provides the methods and data structures needed
+ * for construcing a %Finite %State %Automaton from input strings. (The
+ * current implementation requires the input to be sorted, this
+ * requirement may be relaxed in future relases.)
+ *
+ * The constructed %FSA, when stored in a compact representation, can
+ * be used for lookups, etc. vie the FSA class. The compact %FSA can
+ * not be modified anymore.
+ */
+class Automaton {
+
+public:
+ /**
+ * Empty data item for final states without assigned data. Contains
+ * a zero terminated empty string.
+ */
+ static const Blob EMPTY_BLOB;
+
+private:
+
+ class State;
+
+ // {{{ Automaton::Transition
+ /**
+ * @struct Transition
+ * @brief Struct for storing a single transition.
+ *
+ * A transition consists of an input symbol and a new state.
+ */
+ struct Transition {
+ symbol_t _symbol; /**< Input symbol. */
+ State *_state; /**< New state. */
+ };
+ // }}}
+
+ // {{{ Automaton::TransitionList
+ /**
+ * @class TransitionList
+ * @brief Class representing all transitions from a state.
+ *
+ * This class is used for the interal representation of the
+ * automaton. A state can be represented by the list of all
+ * possible transitions from that state. Two states are
+ * equivalent, if both are final (with the same meta info) or both
+ * are not final, and their transition list matches, that is they
+ * have the same number of out-transitions, these correspond to the
+ * same set of input symbols, and for each of these symbols the new
+ * states are equal. In the internal representation, final states
+ * are implemented by means of a special transition, so transition
+ * list equivalence is implies state equivalence.
+ */
+ class TransitionList {
+
+ friend class State;
+
+ private:
+ unsigned int _alloc; /**< Allocated size (number of transitions). */
+ unsigned int _size; /**< Used size. */
+ Transition* _trans; /**< Transition array. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Default constructor, creates an empty transition list.
+ */
+ TransitionList() : _alloc(0), _size(0), _trans(NULL) {};
+
+ /**
+ * @brief Constructor.
+ *
+ * Constructor, creates an empty transition list, but preallocates
+ * space for a given number of transitions.
+ *
+ * @param prealloc Number of states to preallocate space for.
+ */
+ TransitionList(unsigned int prealloc) : _alloc(prealloc), _size(0), _trans(NULL)
+ { if(prealloc>0){
+ _trans = (Transition*)malloc(prealloc*sizeof(Transition));
+ assert(_trans!=NULL);
+ }
+ }
+
+ /**
+ * @brief Destructor.
+ */
+ ~TransitionList()
+ { if(_trans!=NULL) free(_trans); }
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param tl Reference to transition list object.
+ */
+ TransitionList(const TransitionList& tl) : _alloc(tl._size), _size(tl._size), _trans(NULL)
+ {
+ if(_alloc>0){
+ _trans = (Transition*)malloc(_alloc*sizeof(Transition));
+ assert(_trans!=NULL);
+ }
+ for(unsigned int i=0; i<_size; i++)
+ _trans[i] = tl._trans[i];
+ }
+
+
+ /**
+ * @brief Less-than operator.
+ *
+ * t1<t2 (or t1.operator<(t2) is true iff
+ * - t1 has less transitions than t2, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has a lower symbol for t1, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has the same symbol but a lower new state for t1
+ *
+ * @param tl Reference to transition list object.
+ * @return True iff the t1<t2.
+ */
+ bool operator<(const TransitionList& tl) const;
+
+ /**
+ * @brief Greater-than operator.
+ *
+ * t1>t2 (or t1.operator>(t2) is true iff
+ * - t1 has more transitions than t2, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has a higher symbol for t1, or
+ * - t1 and t2 have the same number of transitions, and the
+ * first transition which is different for t1 and t2 (sorted
+ * by symbol) has the same symbol but a higher new state for t1
+ *
+ * @param tl Reference to transition list object.
+ * @return True iff the t1>t2.
+ */
+ bool operator>(const TransitionList& tl) const;
+
+ /**
+ * @brief Equals operator.
+ *
+ * t1==t2 (or t1.operator==(t2) is true iff
+ * - t1 and t2 have the same number of transitions, which have
+ * the same set of of symbols and for each symbol the new
+ * states are equal
+ *
+ * @param tl Reference to transition list object.
+ * @return True iff the t1==t2.
+ */
+ bool operator==(const TransitionList& tl) const;
+
+ /**
+ * @brief Check for emptyness.
+ *
+ * @return True iff the transition list is empty.
+ */
+ bool empty() { return (_size==0); }
+
+ /**
+ * @brief Get transition list size.
+ *
+ * @return Size of the transition list (number of transitions, or 0 if empty).
+ */
+ unsigned int size() const { return _size; }
+
+ /**
+ * @brief Index operator.
+ *
+ * Returns a reference to the ith transition on the list. i must
+ * be between 0 and size-1 (0<=i<=size-1).
+ *
+ * @param i Index of transition.
+ * @return Reference to the ith transition.
+ */
+ const Transition& operator[](unsigned int i) const { return _trans[i]; }
+
+ /**
+ * @brief Get the last transition.
+ *
+ * Returns a pointer to the last transition, or NULL pointer if
+ * the list is empty.
+ *
+ * @return Pointer to last transition, or NULL.
+ */
+ Transition* last()
+ { if(_size>0) return &_trans[_size-1];
+ return NULL;
+ }
+
+ /**
+ * @brief Get the transition corresponding to a symbol.
+ *
+ * Returns a pointer to the transition corresponding to a given
+ * symbol, or NULL pointer if the symbol is not found on the list
+ * (a transition with that symbol does not exist).
+ *
+ * @param sy Input symbol.
+ * @return Pointer to last transition, or NULL.
+ */
+ Transition* find(symbol_t sy)
+ { for(unsigned int i=0; i<_size; i++){
+ if(_trans[i]._symbol == sy) return &_trans[i];
+ }
+ return NULL;
+ }
+
+ /**
+ * @brief Append a new transition to the list.
+ *
+ * Appends a new transition to the end of the list. The allocated
+ * size is increased if necessary. If a transition with the same
+ * symbol already exists, the behaviour is undefined.
+ *
+ * @param sy Input symbol.
+ * @param st Pointer to new state.
+ */
+ void append(symbol_t sy, State* st)
+ { if(_size==_alloc){
+ if(_alloc==0){
+ _alloc=1;
+ _trans = (Transition*)malloc(_alloc*sizeof(Transition));
+ }
+ else{
+ _alloc+=2;
+ _trans = (Transition*)realloc(_trans,_alloc*sizeof(Transition));
+ }
+ assert(_trans!=NULL);
+ }
+ _trans[_size]._symbol=sy;
+ _trans[_size]._state=st;
+ _size++;
+ }
+
+ };
+
+ // }}}
+
+ // {{{ Automaton::TListPtr
+ /**
+ * @class TListPtr
+ * @brief Helper class, pointer to a transition list (TransitionList).
+ *
+ * The purpose of this class is to override the comparison operators
+ * for a pointer, instead of comparing the value of the pointer
+ * itself, compares the objects the pointer is pointing to.
+ */
+ class TListPtr {
+
+ private:
+ const TransitionList *_ptr; /**< Pointer to a TransitionList */
+
+ public:
+
+ /**
+ * @brief Constructor.
+ *
+ * Initialize object to point to the specified transition list.
+ *
+ * @param tl pointer to a transition list.
+ */
+ TListPtr(const TransitionList *tl) : _ptr(tl) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Initialize object from another TListPtr.
+ *
+ * @param tp Reference to TListPtr.
+ */
+ TListPtr(const TListPtr& tp) : _ptr(tp._ptr) {}
+
+ /**
+ * @brief Get the pointer to the transition list.
+ *
+ * @return Pointer to the TransitionList.
+ */
+ const TransitionList* getPtr() const { return _ptr; }
+
+ /**
+ * @brief Less-than operator.
+ *
+ * Compares the pointed objects instead of the value of the
+ * pointer itself.
+ *
+ * @param tp Reference to TListPtr object.
+ * @return Comparison result.
+ */
+ bool operator<(const TListPtr& tp) const
+ { return(*_ptr<*tp._ptr); }
+
+ /**
+ * @brief Greater-than operator.
+ *
+ * Compares the pointed objects instead of the value of the
+ * pointer itself.
+ *
+ * @param tp Reference to TListPtr object.
+ * @return Comparison result.
+ */
+ bool operator>(const TListPtr& tp) const
+ { return(*_ptr>*tp._ptr); }
+
+ /**
+ * @brief Equals operator.
+ *
+ * Compares the pointed objects instead of the value of the
+ * pointer itself.
+ *
+ * @param tp Reference to TListPtr object.
+ * @return Comparison result.
+ */
+ bool operator==(const TListPtr& tp) const
+ { return(*_ptr==*tp._ptr); }
+ };
+ // }}}
+
+ // {{{ Automaton::State
+ /**
+ * @class State
+ * @brief Class representing a state of the automaton.
+ *
+ * The representation of the automaton states consists of a
+ * transition list for the state, and meta info blob (the latter
+ * only used for special states reached by a final transition. A
+ * final transition is a transition from a final (accepting) state
+ * with the reserved FINAL_SYMBOL (0xff) to a special state, which
+ * stores the meta info corresponding to the final state. For each
+ * unique meta info blob, there is one special state.
+ */
+ class State {
+
+ private:
+
+ TransitionList _tlist; /**< Transition list. */
+ const Blob *_blob; /**< Meta info blob. */
+
+ public:
+
+ /**
+ * @brief Constructor.
+ *
+ * Default constructor, creates a state with an empty transition
+ * list and no (NULL) blob.
+ */
+ State() : _tlist(), _blob(NULL) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates a (special) state with an empty transition list and a
+ * given blob.
+ *
+ * @param b Pointer to blob.
+ */
+ State(const Blob* b) : _tlist(), _blob(b) {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~State() { if(_blob!=NULL) delete _blob; }
+
+ /**
+ * @brief Check if the state is final (accepting) state.
+ *
+ * @return True if the state is final.
+ */
+ bool isFinal() { return child(FSA::FINAL_SYMBOL)!=NULL; }
+
+ /**
+ * @brief Get the blob assigned to the state.
+ *
+ * @return Pointer to blob.
+ */
+ const Blob* getBlob() const { return _blob; }
+
+ /**
+ * @brief Check if the state has children.
+ *
+ * Returns true if the state has children (the transition list is
+ * not empty), or false if the state is a leaf.
+ *
+ * @return True if the state has children.
+ */
+ bool hasChildren() { return !_tlist.empty(); }
+
+ /**
+ * @brief Get child corresponding to a symbol.
+ *
+ * Get the child of the state which is reached by a transition
+ * with a given symbol. If there is no out-transition with that
+ * symbol, NULL is returned.
+ *
+ * @return Pointer to the child, or NULL.
+ */
+ State* child(symbol_t sy)
+ { Transition* t = _tlist.find(sy);
+ if(t!=NULL){ return t->_state; }
+ return NULL;
+ }
+
+ /**
+ * @brief Get the last child.
+ *
+ * Get the last child of the state which is reached by a valid
+ * transition (not FINAL_SYMBOL). If no such children exists, NULL
+ * is returned.
+ *
+ * @return Pointer to last child, or NULL.
+ */
+ State* lastChild()
+ { Transition* t = _tlist.last();
+ if(t!=NULL && t->_symbol!=FSA::FINAL_SYMBOL){ return t->_state; }
+ return NULL;
+ }
+
+ /**
+ * @brief Update the last child.
+ *
+ * Updates the last child to point to a new state. This method is
+ * used when merging equivalent subtrees together.
+ *
+ * @param st New state to be used in last child.
+ */
+ void updateLastChild(State* st)
+ { Transition* t = _tlist.last();
+ if(t!=NULL){
+ t->_state = st;
+ }
+ }
+
+ /**
+ * @brief Append a new empty child.
+ *
+ * Append an empty child to the list of transitions using the
+ * given symbol (and optional blob).
+ *
+ * @param sy New transition symbol.
+ * @param b Optional blob to be assigned to the new state, defaults to NULL.
+ * @return Pointer to the new state.
+ */
+ State* addEmptyChild(symbol_t sy, const Blob *b=NULL)
+ {
+ State* child = new State(b);
+ assert(child!=NULL);
+ _tlist.append(sy,child);
+ return child;
+ }
+
+ /**
+ * @brief Add a transition to an existing state.
+ *
+ * Append a new transition to the list pointing to an existing
+ * state, using the given symbol.
+ *
+ * @param sy New transition symbol.
+ * @param child Pointer to destination state (already existing).
+ * @return Pointer to the child state.
+ */
+ State* addChild(symbol_t sy, State* child)
+ {
+ _tlist.append(sy,child);
+ return child;
+ }
+
+ /**
+ * @brief Get the transition list.
+ *
+ * Get the transition list of the state.
+ *
+ * @return Reference to the transition list.
+ */
+ const TransitionList& getTransitionList(void) const { return _tlist; }
+
+
+ };
+
+ // }}}
+
+ // {{{ Automaton::Register, BlobRegister, PackMap, SymList and iterators
+ /**
+ * @brief Register of states, maps a transition list to a state object
+ */
+ typedef std::map< TListPtr,State* > Register;
+ /**
+ * @brief State register iterator.
+ */
+ typedef std::map< TListPtr,State* >::iterator RegisterIterator;
+
+ /**
+ * @brief Register of states, maps a blob to a special state.
+ */
+ typedef std::map< Blob,State* > BlobRegister;
+ /**
+ * @brief Blob register iterator.
+ */
+ typedef std::map< Blob,State* >::iterator BlobRegisterIterator;
+
+ /**
+ * @brief Packing map, maps a state pointer to a state ID.
+ */
+ typedef std::map< const void*, unsigned int > PackMap;
+ /**
+ * @brief Packing map iterator.
+ */
+ typedef std::map< const void*, unsigned int >::iterator PackMapIterator;
+
+ /**
+ * @brief symbol_t list.
+ */
+ typedef std::list<symbol_t> SymList;
+ /**
+ * @brief symbol_t list iterator.
+ */
+ typedef std::list<symbol_t>::iterator SymListIterator;
+ // }}}
+
+ // {{{ Automaton::PackedAutomaton
+
+ /**
+ * @class PackedAutomaton
+ * @brief Helper class for packing an automaton.
+ *
+ * This class is used for packing an Automaton to a compressed
+ * format which can be saved to file to be used by the FSA class.
+ */
+ class PackedAutomaton {
+
+ private:
+ bool _packable; /**< Packable flag. */
+ PackMap _pack_map; /**< Map state pointers to indices. */
+ PackMap _blob_map; /**< Map blob pointers to indices. */
+ State **_packed_ptr; /**< Array for state pointers. */
+ state_t *_packed_idx; /**< Array for state indices. */
+ symbol_t *_symbol; /**< Array for transition symbols. */
+ bool *_used; /**< Array for cell used flags. */
+ hash_t *_perf_hash; /**< Array for perfect hash deltas. */
+ hash_t *_totals; /**< Array for perfect hash totals. */
+ uint32_t _packed_size; /**< Size of packed arrays (in cells). */
+ uint32_t _last_packed; /**< Index of last packed state. */
+
+ data_t *_blob; /**< Data storage. */
+ uint32_t _blob_size; /**< Data storage size. */
+ uint32_t _blob_used; /**< Used data storage size. */
+ uint32_t _blob_type; /**< Type of data items (fixed/var.) */
+ uint32_t _fixed_blob_size; /**< Data item size if fixed. */
+
+ state_t _start_state; /**< Index of start state. */
+
+ /**
+ * @brief Number of cells to allocate in one expansion.
+ */
+ static const uint32_t _ALLOC_CELLS = 131072; // 128k
+
+ /**
+ * @brief Number of bytes to allocate in one data storage expansion.
+ */
+ static const uint32_t _ALLOC_BLOB = 65536; // 64k
+
+ /**
+ * @brief How long back the search for an empty cell should start.
+ */
+ static const uint32_t _BACKCHECK = 255;
+
+
+ /**
+ * @brief Expand cell arrays.
+ */
+ void expandCells();
+
+ /**
+ * @brief Expand data storage.
+ *
+ * @param minExpand Mimimum size to expand, it will be rounded up
+ * to the nearest multiply of _ALLOC_BLOB.
+ */
+ void expandBlob(uint32_t minExpand);
+
+ /**
+ * @brief Get an empty cell.
+ *
+ * Start looking for an empty cell _BACKCHECK cells before the
+ * last packed cell, and return the index of the first empty cell
+ * found. The cell arrays are expanded on demand, that is if no
+ * empty cell is found.
+ *
+ * @return Index of empty cell.
+ */
+ uint32_t getEmptyCell();
+
+ /**
+ * @brief Get an empty cell where a list of transitions can be stored.
+ *
+ * Start looking for an empty cell _BACKCHECK cells before the
+ * last packed cell. In addition to the cell being empty, it
+ * should be possible to store a list of transitions from that
+ * cell. The cell arrays are expanded on demand, that is if no
+ * empty cell is found.
+ *
+ * @param t List of transition symbols.
+ * @return Index of empty cell.
+ */
+ uint32_t getCell(SymList t);
+
+ /**
+ * @brief Pack a data item.
+ *
+ * Pack a data item to the data storage. If the same (or
+ * equivalent) data item has been packed before, return the offset
+ * where it was packed. Otherwise, pack the data item at the end
+ * of the storage (expand storage if needed), add the item and
+ * offset to the blob map and return the offset.
+ *
+ * @param b Pointer to data item.
+ * @return Offset to data item in data storage.
+ */
+ uint32_t packBlob(const Blob* b);
+
+ /**
+ * @brief Compute perfect hash deltas for a subtree.
+ *
+ * Recursive function for computing the perfect hash deltas for
+ * all transitions within a subtree. The delta for transition T
+ * from state S is the number of final states reachable from state
+ * S via transitions lower than T (that is, with a lower input
+ * symbol). Also, state S being a final state counts. The hash
+ * deltas are filled into the _perf_hash array.
+ *
+ * @return Number of final states within the subtree.
+ */
+ hash_t computePerfectHash(state_t state);
+
+
+ public:
+
+ /**
+ * @brief Default constructor.
+ */
+ PackedAutomaton() :
+ _packable(false),
+ _pack_map(),
+ _blob_map(),
+ _packed_ptr(NULL),
+ _packed_idx(NULL),
+ _symbol(NULL),
+ _used(NULL),
+ _perf_hash(NULL),
+ _totals(NULL),
+ _packed_size(0),
+ _last_packed(0),
+ _blob(NULL),
+ _blob_size(0),
+ _blob_used(0),
+ _blob_type(0),
+ _fixed_blob_size(0),
+ _start_state(0)
+ { }
+
+ /**
+ * @brief Destructor.
+ */
+ ~PackedAutomaton() { reset(); }
+
+ /**
+ * @brief Reset the object.
+ *
+ * Reset the object and free all allocated memory.
+ */
+ void reset();
+
+ /**
+ * @brief Initialize.
+ *
+ * Reset the object, and initialize data structures, also
+ * preallocate memory for cell and data storage.
+ */
+ void init();
+
+ /**
+ * @brief Pack a state.
+ *
+ * Pack a state and its transitions into the compact structure. For
+ * final states, the data item is packed as well.
+ *
+ * @param s Pointer to state to pack.
+ * @param start True if the state is the start state.
+ * @return False if the object is not packable (it has been
+ * finalized, or it has not been initialized)
+ */
+ bool packState(const State* s, bool start=false);
+
+ /**
+ * @brief Pack the start state.
+ *
+ * Pack the state and mark it as the start state. (Equivalent to
+ * packState(s,true)).
+ *
+ * @param s Pointer to state to pack.
+ * @return False if the object is not packable (it has been
+ * finalized, or it has not been initialized)
+ */
+ bool packStartState(const State* s);
+
+ /**
+ * @brief Finalize the packed structure.
+ *
+ * Obtain all state inidices from the state pointers using the
+ * pack map. Also compact the data storage if all data items have
+ * the same size (only store the size once, and store data items
+ * consecutively, without size attribute).
+ */
+ void finalize();
+
+ /**
+ * @brief Add perfect hash to the automaton.
+ *
+ * Computes the perfect hash for the whole automaton.
+ */
+ void addPerfectHash();
+
+ /**
+ * @brief Write the automaton to a file.
+ *
+ * @param filename Name of file.
+ * @param serial Serial number.
+ * @return True on success.
+ */
+ bool write(const char *filename, uint32_t serial = 0);
+
+ /**
+ * @brief Read an automaton from file.
+ *
+ * @param filename Name of file.
+ * @return True on success.
+ */
+ bool read(const char *filename);
+
+ /**
+ * @brief Perform a lookup in the packed automaton.
+ *
+ * @param input Input string
+ * @return Pointer to data associated with input, or NULL if input is not accepted.
+ */
+ const unsigned char* lookup(const char *input) const;
+
+ /**
+ * @brief Create an FSA object from the automaton.
+ *
+ * Create an FSA object from the automaton. The PackedAutomaton is
+ * implicitly reset if the operation succeeds. PackedAutomanton
+ * cannot access the private constructor of FSA, so we have to pass
+ * the object via a struct, which is ugly :-(.
+ *
+ * @param d Pointer to the FSA::Descriptor (struct) to store necessary info for
+ * creating the FSA object.
+ * @return True if the operation was successful.
+ */
+ bool getFSA(FSA::Descriptor &d);
+
+ };
+
+ // }}}
+
+
+ Register _register; /**< Register of states. */
+ BlobRegister _blob_register; /**< Register of data items. */
+ State* _q0; /**< Start state. */
+ std::string _previous_input; /**< Previous input string. */
+ bool _finalized; /**< Finalized flag. */
+ PackedAutomaton _packed; /**< Packed automaton. */
+
+ /**
+ * @brief Get common path length.
+ *
+ * Get the length of the common path shared by the current input
+ * string and strings already in the automaton.
+ *
+ * @param input Input string.
+ * @return Length of common path.
+ */
+ unsigned int getCPLength(const char *input);
+
+ /**
+ * @brief Get last state in common path.
+ *
+ * Get the last state of the common path shared by the current input
+ * string and strings already in the automaton.
+ *
+ * @param input Input string.
+ * @return Pointer to last state in common path.
+ */
+ State* getCPLastState(const char *input);
+
+ /**
+ * @brief Replace or register a state.
+ *
+ * Replace the state with an already registered equivalent state in
+ * the automaton, or register it if no such state exists yet.
+ *
+ * @param state Pointer to state to be replaced or registered.
+ */
+ void replaceOrRegister(State* state);
+
+ /**
+ * @brief Add new states for a suffix.
+ *
+ * Add the necessary new states for a suffix of an input string. The
+ * suffix is that part of an input string which is not covered by
+ * the common path.
+ *
+ * @param state Pointer to last state in the common path.
+ * @param suffix Suffix.
+ * @param b Data item associated with the input.
+ */
+ void addSuffix(State* state, const char *suffix, const Blob *b=NULL);
+
+ /**
+ * @brief Clean up data structures and release memory.
+ */
+ void cleanUp();
+
+public:
+
+ /**
+ * @brief Default constructor.
+ */
+ Automaton() :
+ _register(),
+ _blob_register(),
+ _q0(NULL),
+ _previous_input(),
+ _finalized(false),
+ _packed()
+ {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~Automaton();
+
+ /**
+ * @brief Initialize the object.
+ */
+ void init();
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ */
+ void insertSortedString(const std::string &input);
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ * @param meta Meta info string to be stored in data item).
+ */
+ void insertSortedString(const std::string &input, const std::string &meta);
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ * @param b Reference to data item.
+ */
+ void insertSortedString(const char *input, const Blob &b);
+
+ /**
+ * @brief Insert a string to the automaton.
+ *
+ * Insert a string to the automaton. Input strings must be inserted
+ * in sorted order, otherwise the behaviour is undefined.
+ *
+ * @param input Input string.
+ * @param b Pointer to data item.
+ */
+ void insertSortedString(const char *input, const Blob *b=NULL);
+
+ /**
+ * @brief Finalize the automaton.
+ *
+ * Finalize the automaton. This involves calling replaceOrRegister
+ * for the start state _q0, and building the packed automaton, so no
+ * strings can be added to the automaton after this method is
+ * called.
+ */
+ void finalize();
+
+ /**
+ * @brief Add perfect hash to automaton.
+ *
+ * Compute and add perfect hash structure to the automaton. Only
+ * works on finalized automata.
+ */
+ void addPerfectHash();
+
+ /**
+ * @brief Write the finalized automaton to file.
+ *
+ * @param file Name of the file.
+ * @param serial Serial number.
+ * @return True on success.
+ */
+ bool write(const char *file, uint32_t serial = 0);
+
+ /**
+ * @brief Write the finalized automaton to file.
+ *
+ * @param file Name of the file.
+ * @param serial Serial number.
+ * @return True on success.
+ */
+ bool write(const std::string &file, uint32_t serial = 0)
+ {
+ return write(file.c_str(),serial);
+ }
+
+ /**
+ * @brief Create an FSA object from the automaton.
+ *
+ * Create an FSA object from the automaton. The Automaton and
+ * PackedAutomaton is implicitly reset.
+ *
+ * @return Pointer to a newly created FSA object. The caller is
+ * responsible for freeing it.
+ */
+ FSA* getFSA(void);
+
+};
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/base64.cpp b/fsa/src/vespa/fsa/base64.cpp
new file mode 100644
index 00000000000..f06fc445cc7
--- /dev/null
+++ b/fsa/src/vespa/fsa/base64.cpp
@@ -0,0 +1,142 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file base64.cpp
+ * @brief Implementation of Base64 class methods
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <iostream>
+#include <string>
+
+#include "base64.h"
+
+
+namespace fsa {
+
+// {{{ Base64::_table, Base64::_padding
+
+const unsigned char Base64::_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+const unsigned char Base64::_padding = '=';
+
+// }}}
+
+// {{{ Base64::b2n()
+
+inline int Base64::b2n(int b)
+{
+ if (b>='A' && b<='Z')
+ return b-'A';
+ else if (b>='a' && b<='z')
+ return b-'a'+26;
+ else if (b>='0' && b<='9')
+ return b-'0'+52;
+ else if (b=='+')
+ return 62;
+ else if (b=='/')
+ return 63;
+ else
+ return -1;
+}
+
+// }}}
+// {{{ Base64::n2b()
+
+inline int Base64::n2b(int n)
+{
+ if(n<0||n>63)
+ return -1;
+ return _table[n];
+}
+
+// }}}
+
+// {{{ Base64::decode()
+
+int Base64::decode(const std::string &src, std::string &dest)
+{
+ if(src.length()&0x03){ // source length should be 4*n
+ dest.resize(0);
+ return -1;
+ }
+
+ dest.resize(3*(src.length()>>2),'\0');
+
+ std::string::size_type i, index = 0;
+ int s1,s2,s3,s4;
+
+ for (i =0; i<src.length(); i+=4) {
+ s1 = b2n(src[i]);
+ s2 = b2n(src[i+1]);
+ s3 = b2n(src[i+2]);
+ s4 = b2n(src[i+3]);
+
+
+ if(s1<0||s2<0){ // the first two symbols should not be '='
+ dest.resize(index);
+ return -1;
+ }
+
+ if(s3<0){ // only one output symbol
+ dest[index++] = s1<<2 | s2>>4;
+ if(s4>=0){ // if s3 is '=', s4 should be '=' too
+ dest.resize(index);
+ return -1;
+ }
+ }
+ else if(s4<0){ // two symbols
+ dest[index++] = s1<<2 | s2>>4;
+ dest[index++] = (s2&0x0f)<<4 | s3>>2;
+ }
+ else { // all three present
+ dest[index++] = s1<<2 | s2>>4;
+ dest[index++] = (s2&0x0f)<<4 | s3>>2;
+ dest[index++] = (s3&0x03)<<6 | s4;
+ }
+ }
+
+ dest.resize(index);
+ return index;
+}
+
+// }}}
+// {{{ Base64::encode()
+
+int Base64::encode(const std::string &src, std::string &dest)
+{
+ dest.resize(4*((src.length()+2)/3),'\0');
+
+ std::string::size_type i, index = 0;
+
+ for(i=0;i+2<src.length();i+=3) {
+ dest[index++] = n2b(src[i]>>2);
+ dest[index++] = n2b((src[i]&0x03)<<4 | src[i+1]>>4);
+ dest[index++] = n2b((src[i+1]&0x0f)<<2 | src[i+2]>>6);
+ dest[index++] = n2b(src[i+2]&0x3f);
+ }
+
+ if (i<src.length()) { // handle padding
+ dest[index++] = n2b(src[i]>>2);
+ if (i<src.length()-1) { // 2 bytes left
+ dest[index++] = n2b((src[i]&0x03)<<4 | src[i+1]>>4);
+ dest[index++] = n2b((src[i+1]&0x0f)<<2);
+ dest[index++] = _padding;
+ } else { // 1 byte left
+ dest[index++] = n2b((src[i+1]&0x03)<<4);
+ dest[index++] = _padding;
+ dest[index++] = _padding;
+ }
+ }
+
+ return index;
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/base64.h b/fsa/src/vespa/fsa/base64.h
new file mode 100644
index 00000000000..b0ada3b1bff
--- /dev/null
+++ b/fsa/src/vespa/fsa/base64.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file base64.h
+ * @brief Definition of Base64 class
+ *
+ */
+
+#pragma once
+
+#include <string>
+
+namespace fsa {
+
+/**
+ * @class Base64
+ * @brief Base64 encoding and decoding.
+ *
+ * Encode and decode arbitrary binary strings to %Base64.
+ */
+class Base64 {
+private:
+ /** Encoing table */
+ static const unsigned char _table[];
+ /** Padding character */
+ static const unsigned char _padding;
+
+ /** Decode one symbol */
+ static inline int b2n(int b);
+ /** Encode one symbol */
+ static inline int n2b(int n);
+
+public:
+
+ /**
+ * @brief Decode a %Base64 encoded string.
+ *
+ * @param src Source %Base64 encoded string.
+ * @param dest Destination to hold the decoded string.
+ * @return Size of destination string.
+ */
+ static int decode(const std::string &src, std::string &dest);
+
+ /**
+ * @brief Decode a %Base64 encoded string.
+ *
+ * @param src Source string.
+ * @param dest Destination to hold %Base64 encoded string.
+ * @return Size of destination string.
+ */
+ static int encode(const std::string &src, std::string &dest);
+
+};
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/blob.cpp b/fsa/src/vespa/fsa/blob.cpp
new file mode 100644
index 00000000000..3fd381b33fd
--- /dev/null
+++ b/fsa/src/vespa/fsa/blob.cpp
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file blob.cpp
+ * @brief Implementation of Blob class methods
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "blob.h"
+
+
+namespace fsa {
+
+// {{{ Blob::operator<()
+
+bool Blob::operator<(const Blob& b) const
+{
+ if(_size<b._size) return true;
+ if(_size>b._size) return false;
+ if(_size==0) return false;
+ if(memcmp(_data,b._data,_size)<0) return true;
+ return false;
+}
+
+// }}}
+// {{{ Blob::operator>()
+
+bool Blob::operator>(const Blob& b) const
+{
+ if(_size>b._size) return true;
+ if(_size<b._size) return false;
+ if(_size==0) return false;
+ if(memcmp(_data,b._data,_size)>0) return true;
+ return false;
+}
+
+// }}}
+// {{{ Blob::operator==()
+
+bool Blob::operator==(const Blob& b) const
+{
+ if(_size==b._size && (_size==0 || memcmp(_data,b._data,_size)==0)) return true;
+ return false;
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/blob.h b/fsa/src/vespa/fsa/blob.h
new file mode 100644
index 00000000000..362b37eb48e
--- /dev/null
+++ b/fsa/src/vespa/fsa/blob.h
@@ -0,0 +1,140 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file blob.h
+ * @brief Definition of Blob class
+ *
+ */
+
+#pragma once
+
+#include <string.h>
+#include <stdlib.h>
+
+#include <string>
+
+namespace fsa {
+
+// {{{ class Blob
+
+/**
+ * @class Blob
+ * @brief %Blob (binary large object) class.
+ *
+ * Representation of a blob (binary large object). Supports assign
+ * method, access to size and data, and comparison operators.
+ */
+class Blob {
+private:
+ /** Size of data. */
+ unsigned int _size;
+ /** Pointer to the data. */
+ void* _data;
+public:
+
+ /**
+ * @brief Default constructor
+ *
+ * Creates an empty blob.
+ */
+ Blob() : _size(0), _data(NULL) {}
+
+ /**
+ * @brief Constructor
+ *
+ * Creates a blob from a character string. The string must be zero
+ * terminated.
+ *
+ * @param str Pointer to input string.
+ */
+ Blob(const char *str) : _size(strlen(str)+1), _data((void*)strdup(str)) {}
+
+ /**
+ * @brief Constructor
+ *
+ * Creates a blob from arbitrary data.
+ *
+ * @param data Pointer to data.
+ * @param size Size of the data.
+ */
+ Blob(const void *data, unsigned int size) : _size(size), _data(malloc(size))
+ { memcpy(_data,data,_size); }
+
+ /**
+ * @brief Copy constructor
+ *
+ * @param b Blob to copy.
+ */
+ Blob(const Blob& b) : _size(b._size), _data(malloc(_size))
+ { memcpy(_data,b._data,_size); }
+
+ /**
+ * @brief Constructor
+ *
+ * Creates a blob from std::string.
+ *
+ * @param s Reference to input string.
+ */
+ Blob(const std::string &s) : _size(s.size()), _data(malloc(_size))
+ { s.copy((char*)_data,_size); }
+
+ /** Destructor */
+ ~Blob() { if(_data!=NULL) free(_data); }
+
+ /**
+ * @brief Get data size.
+ *
+ * @return Data size.
+ */
+ unsigned int size() const { return _size; }
+
+ /**
+ * @brief Get data.
+ *
+ * @return Pointer to data. Valid as long as the blob object exists
+ * and is not modified.
+ */
+ const void* data() const { return _data; }
+
+ /**
+ * @brief Reassign the blob.
+ *
+ * @param s Input string
+ */
+ void assign(const std::string &s)
+ {
+ if(_data!=NULL) free(_data);
+ _size=s.size();
+ _data=malloc(s.size());
+ s.copy((char*)_data,_size);
+ }
+
+ /**
+ * @brief Less-than operator.
+ *
+ * @param b Blob to compare.
+ */
+ bool operator<(const Blob& b) const;
+
+ /**
+ * @brief Greater-than operator.
+ *
+ * @param b Blob to compare.
+ */
+ bool operator>(const Blob& b) const;
+
+ /**
+ * @brief Equals operator.
+ *
+ * @param b Blob to compare.
+ */
+ bool operator==(const Blob& b) const;
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/checksum.h b/fsa/src/vespa/fsa/checksum.h
new file mode 100644
index 00000000000..0c685b27e0a
--- /dev/null
+++ b/fsa/src/vespa/fsa/checksum.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/20
+ * @version $Id$
+ * @file checksum.h
+ * @brief Definition of Checksum class
+ *
+ */
+
+#pragma once
+
+#include <inttypes.h>
+#include <string.h>
+
+
+namespace fsa {
+
+// {{{ class Checksum
+
+/**
+ * @class Checksum
+ * @brief Simple checksum class
+ */
+class Checksum {
+public:
+
+ /**
+ * @brief Comupte 32-bit checksum value of an arbitrary buffer.
+ *
+ * @param buffer Pointer to the buffer.
+ * @param size Size of the buffer.
+ * @return 32-bit checksum value.
+ */
+ static uint32_t compute(void *buffer, uint32_t size)
+ {
+ uint32_t checksum=0,rest=0,i=0;
+ char *buf = (char *)buffer;
+
+ for(i=0;i<(size>>2);i++){
+ uint32_t tmp;
+ memcpy(&tmp, buf, sizeof(uint32_t));
+ buf += sizeof(uint32_t);
+ checksum += tmp;
+ }
+ //@@@@@@BUG! should be if((size&3)>0) but that will break checksumming; postpone to next major .fsa format change
+ if(size&(3>0)){ // was if(size&3>0) but that generates a warning in GCC4
+ memcpy(&rest,(uint8_t*)buffer+4*i,size&3);
+ checksum+=rest;
+ }
+ return checksum;
+ }
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/conceptnet.cpp b/fsa/src/vespa/fsa/conceptnet.cpp
new file mode 100644
index 00000000000..da73003dee6
--- /dev/null
+++ b/fsa/src/vespa/fsa/conceptnet.cpp
@@ -0,0 +1,512 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file conceptnet.cpp
+ * @brief Concept network class implementation.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "conceptnet.h"
+#include "fstream"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h> // for ::mmap()
+#include <sys/time.h>
+#include <sys/resource.h> // for getrlimit(), setrlimit(), etc.
+
+// define this at your own risk...
+#undef NO_RANGE_CHECK
+
+namespace fsa {
+
+// {{{ constants
+
+const uint32_t ConceptNet::MAGIC;
+
+// }}}
+
+// {{{ ConceptNet::ConceptNet()
+
+ConceptNet::ConceptNet(const char *fsafile, const char *datafile, FileAccessMethod fam) :
+ _mmap_addr(NULL), _mmap_length(0),
+ _unit_fsa(fsafile,fam),
+ _index_size(0), _index(NULL),
+ _info_size(0), _info(NULL),
+ _catindex_size(0), _catindex(NULL),
+ _strings_size(0), _strings(NULL),
+ _ok(false)
+{
+ _ok = _unit_fsa.isOk();
+ if(_ok && datafile!=NULL)
+ _ok = read(datafile,fam);
+}
+
+ConceptNet::ConceptNet(const std::string &fsafile, const std::string &datafile, FileAccessMethod fam) :
+ _mmap_addr(NULL), _mmap_length(0),
+ _unit_fsa(fsafile,fam),
+ _index_size(0), _index(NULL),
+ _info_size(0), _info(NULL),
+ _catindex_size(0), _catindex(NULL),
+ _strings_size(0), _strings(NULL),
+ _ok(false)
+{
+ _ok = _unit_fsa.isOk();
+ if(_ok)
+ _ok = read(datafile.c_str(),fam);
+}
+
+// }}}
+// {{{ ConceptNet::~ConceptNet()
+
+ConceptNet::~ConceptNet()
+{
+ reset();
+}
+
+// }}}
+
+// {{{ ConceptNet::reset()
+
+void ConceptNet::reset()
+{
+ if(_mmap_addr!=NULL && _mmap_addr!=MAP_FAILED){
+ munmap(_mmap_addr,_mmap_length);
+ }
+ else{
+ delete[] _index;
+ delete[] _info;
+ delete[] _catindex;
+ delete[] _strings;
+ }
+ _mmap_addr=NULL; _mmap_length=0;
+ // leave _unit_fsa alone
+ _index_size=0; _index=NULL;
+ _info_size=0; _info=NULL;
+ _catindex_size=0; _catindex=NULL;
+ _strings_size=0; _strings=NULL;
+ _ok=false;
+}
+
+// }}}
+// {{{ ConceptNet::read()
+
+bool ConceptNet::read(const char *datafile, FileAccessMethod fam)
+{
+ Header header;
+
+ size_t r;
+
+ reset(); //WATCHOUT: if reset() ever changes to unref _unit_fsa, we can't use it since the FSA is read in the constructor before we get here
+
+ if(fam==FILE_ACCESS_UNDEF)
+ fam=_default_file_access_method;
+
+ if(datafile==NULL)
+ return false;
+
+ int fd = ::open(datafile,O_RDONLY);
+ if(fd<0)
+ return false;
+
+ r=::read(fd,&header,sizeof(header));
+ if(r!=sizeof(header) || header._magic!=ConceptNet::MAGIC){
+ ::close(fd);
+ return false;
+ }
+
+ _index_size = header._index_size;
+ _info_size = header._info_size;
+ _catindex_size = header._catindex_size;
+ _strings_size = header._strings_size;
+
+ if(fam==FILE_ACCESS_MMAP || fam==FILE_ACCESS_MMAP_WITH_MLOCK){
+ _mmap_length =
+ sizeof(header) +
+ _index_size*sizeof(UnitData) +
+ _info_size*sizeof(uint32_t) +
+ _catindex_size*sizeof(uint32_t) +
+ _strings_size*sizeof(char);
+ _mmap_addr = ::mmap((void*)0, _mmap_length, PROT_READ, MAP_SHARED, fd, 0);
+ if(_mmap_addr==MAP_FAILED){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ if(fam==FILE_ACCESS_MMAP_WITH_MLOCK){
+ if(mlock(_mmap_addr, _mmap_length)<0) {
+ /* try to increase RLIMIT_MEMLOCK then mlock() again */
+ struct rlimit rl;
+ if(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0) {
+ rl.rlim_cur += _mmap_length + getpagesize();
+ rl.rlim_max += _mmap_length + getpagesize();
+ if(setrlimit(RLIMIT_MEMLOCK, &rl) >= 0)
+ mlock(_mmap_addr, _mmap_length);
+ }
+ }
+ }
+ }
+
+ // read _index
+ if(_mmap_addr==NULL){
+ _index = new UnitData[_index_size];
+ r=::read(fd,_index,_index_size*sizeof(UnitData));
+ if(r!=_index_size*sizeof(UnitData)){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _index = (UnitData*)((uint8_t*)_mmap_addr + sizeof(header));
+ }
+
+ // read _info
+ if(_mmap_addr==NULL){
+ _info = new uint32_t[_info_size];
+ r=::read(fd,_info,_info_size*sizeof(uint32_t));
+ if(r!=_info_size*sizeof(uint32_t)){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _info = (uint32_t*)((uint8_t*)_index + _index_size*sizeof(UnitData));
+ }
+
+ // read _catindex
+ if(_mmap_addr==NULL){
+ _catindex = new uint32_t[_catindex_size];
+ r=::read(fd,_catindex,_catindex_size*sizeof(uint32_t));
+ if(r!=_catindex_size*sizeof(uint32_t)){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _catindex = (uint32_t*)((uint8_t*)_info + _info_size*sizeof(uint32_t));
+ }
+
+ // read _strings
+ if(_mmap_addr==NULL){
+ _strings = new char[_strings_size];
+ r=::read(fd,_strings,_strings_size*sizeof(char));
+ if(r!=_strings_size*sizeof(char)){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _strings = (char*)((uint8_t*)_catindex + _catindex_size*sizeof(uint32_t));
+ }
+
+ ::close(fd);
+
+ return true;
+}
+
+// }}}
+
+// {{{ ConceptNet::lookup()
+
+int ConceptNet::lookup(const char *unit) const
+{
+ FSA::HashedState hs(_unit_fsa);
+ hs.start(unit);
+ if(hs.isFinal()){
+ return (int)hs.hash();
+ }
+ return -1;
+}
+
+const char * ConceptNet::lookup(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return NULL;
+ }
+#endif
+ return _strings+_index[idx]._term;
+}
+
+// }}}
+// {{{ ConceptNet::frq()
+
+int ConceptNet::frq(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+#endif
+ return _index[idx]._frq;
+}
+
+int ConceptNet::frq(const char *unit) const
+{
+ return frq(lookup(unit));
+}
+
+// }}}
+// {{{ ConceptNet::cFrq()
+
+int ConceptNet::cFrq(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+#endif
+ return _index[idx]._cfrq;
+}
+
+int ConceptNet::cFrq(const char *unit) const
+{
+ return cFrq(lookup(unit));
+}
+
+// }}}
+// {{{ ConceptNet::qFrq()
+
+int ConceptNet::qFrq(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+#endif
+ return _index[idx]._qfrq;
+}
+
+int ConceptNet::qFrq(const char *unit) const
+{
+ return qFrq(lookup(unit));
+}
+
+// }}}
+// {{{ ConceptNet::sFrq()
+
+int ConceptNet::sFrq(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+#endif
+ return _index[idx]._sfrq;
+}
+
+int ConceptNet::sFrq(const char *unit) const
+{
+ return sFrq(lookup(unit));
+}
+
+// }}}
+// {{{ ConceptNet::score()
+
+double ConceptNet::score(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1.0;
+ }
+#endif
+ return 100.0*(double)_index[idx]._cfrq/(double)_index[idx]._qfrq;
+}
+
+double ConceptNet::score(const char *unit) const
+{
+ return score(lookup(unit));
+}
+
+// }}}
+// {{{ ConceptNet::strength()
+
+double ConceptNet::strength(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1.0;
+ }
+#endif
+ return 100.0*(double)_index[idx]._qfrq/(double)_index[idx]._sfrq;
+}
+
+double ConceptNet::strength(const char *unit) const
+{
+ return strength(lookup(unit));
+}
+
+// }}}
+// {{{ ConceptNet::numExt()
+
+int ConceptNet::numExt(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+#endif
+ if(_index[idx]._exts==0){
+ return 0;
+ }
+ return (int)_info[_index[idx]._exts];
+}
+
+// }}}
+// {{{ ConceptNet::numAssoc()
+
+int ConceptNet::numAssoc(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+#endif
+ if(_index[idx]._assocs==0){
+ return 0;
+ }
+ return (int)_info[_index[idx]._assocs];
+}
+
+// }}}
+// {{{ ConceptNet::numCat()
+
+int ConceptNet::numCat(int idx) const
+{
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+#endif
+ if(_index[idx]._cats==0){
+ return 0;
+ }
+ return (int)_info[_index[idx]._cats];
+}
+
+// }}}
+// {{{ ConceptNet::ext()
+
+int ConceptNet::ext(int idx, int j) const
+{
+ assert(j>=0);
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+ if(_index[idx]._exts==0){
+ return -1;
+ }
+ if((uint32_t)j>=_info[_index[idx]._exts]){
+ return -1;
+ }
+#endif
+ return (int)_info[_index[idx]._exts+1+2*j];
+}
+
+// }}}
+// {{{ ConceptNet::extFrq()
+
+int ConceptNet::extFrq(int idx, int j) const
+{
+ assert(j>=0);
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+ if(_index[idx]._exts==0){
+ return -1;
+ }
+ if((uint32_t)j>=_info[_index[idx]._exts]){
+ return -1;
+ }
+#endif
+ return (int)_info[_index[idx]._exts+1+2*j+1];
+}
+
+// }}}
+// {{{ ConceptNet::assoc()
+
+int ConceptNet::assoc(int idx, int j) const
+{
+ assert(j>=0);
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+ if(_index[idx]._assocs==0){
+ return -1;
+ }
+ if((uint32_t)j>=_info[_index[idx]._assocs]){
+ return -1;
+ }
+#endif
+ return (int)_info[_index[idx]._assocs+1+2*j];
+}
+
+// }}}
+// {{{ ConceptNet::assocFrq()
+
+int ConceptNet::assocFrq(int idx, int j) const
+{
+ assert(j>=0);
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+ if(_index[idx]._assocs==0){
+ return -1;
+ }
+ if((uint32_t)j>=_info[_index[idx]._assocs]){
+ return -1;
+ }
+#endif
+ return (int)_info[_index[idx]._assocs+1+2*j+1];
+}
+
+// }}}
+// {{{ ConceptNet::cat()
+
+int ConceptNet::cat(int idx, int j) const
+{
+ assert(j>=0);
+#ifndef NO_RANGE_CHECK
+ if(idx<0 || (uint32_t)idx>=_index_size){
+ return -1;
+ }
+ if(_index[idx]._cats==0){
+ return -1;
+ }
+ if((uint32_t)j>=_info[_index[idx]._cats]){
+ return -1;
+ }
+#endif
+ return (int)_info[_index[idx]._cats+1+j];
+}
+
+// }}}
+// {{{ ConceptNet::catName()
+
+const char *ConceptNet::catName(int catIdx) const
+{
+ if(catIdx<0 || (uint32_t)catIdx>=_catindex_size){
+ return NULL;
+ }
+ return _strings+_catindex[catIdx];
+
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/conceptnet.h b/fsa/src/vespa/fsa/conceptnet.h
new file mode 100644
index 00000000000..77c7a8b9e03
--- /dev/null
+++ b/fsa/src/vespa/fsa/conceptnet.h
@@ -0,0 +1,371 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file conceptnet.h
+ * @brief Concept network class definition.
+ *
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <stdlib.h>
+#include "file.h" // for FileAccessMethod
+#include "fsa.h"
+
+
+namespace fsa {
+
+// {{{ class ConceptNet
+
+/**
+ * @class ConceptNet
+ * @brief Class for compact representation of a concept network.
+ */
+class ConceptNet {
+
+public:
+
+ class Handle; // defined in conceptnethandle.h
+
+private:
+ static const uint32_t MAGIC = 238579428; /**< Magic number identifying concept net files. */
+
+ static const FileAccessMethod _default_file_access_method = FILE_ACCESS_MMAP; /**< Default file access method (read/mmap). */
+
+ /**
+ * @struct Header
+ * @brief Concept net data file header.
+ */
+ struct Header {
+ uint32_t _magic; /**< Magic number. */
+ uint32_t _version; /**< Version number. (currently not used) */
+ uint32_t _checksum; /**< Checksum. (currently not used) */
+ uint32_t _index_size; /**< Size of index structure. */
+ uint32_t _info_size; /**< Size of info structure. */
+ uint32_t _catindex_size; /**< Size of category index. */
+ uint32_t _strings_size; /**< Size of string storage. */
+ uint32_t _max_freq; /**< Reseved for normalization purposes. */
+ uint32_t _max_cfreq; /**< Reseved for normalization purposes. */
+ uint32_t _max_qfreq; /**< Reseved for normalization purposes. */
+ uint32_t _max_sfreq; /**< Reseved for normalization purposes. */
+ uint32_t _max_efreq; /**< Reseved for normalization purposes. */
+ uint32_t _max_afreq; /**< Reseved for normalization purposes. */
+ uint32_t _dummy[51]; /**< Reserved. */
+ };
+
+ /**
+ * @struct UnitData
+ * @brief Unit data structure.
+ */
+ struct UnitData {
+ uint32_t _term; /**< Offset of unit string in string storage. */
+ uint32_t _frq; /**< Unit frequency. */
+ uint32_t _cfrq; /**< Frequency of the unit as complete query. */
+ uint32_t _qfrq; /**< Frequency of the unit as part of a query. */
+ uint32_t _sfrq; /**< Number of queries containing all unit terms. */
+ uint32_t _exts; /**< If non-zero: offset of extension info in info structure. */
+ uint32_t _assocs; /**< If non-zero: offset of association info in info structure. */
+ uint32_t _cats; /**< If non-zero: offset of category info in info structure. */
+ };
+
+ void *_mmap_addr; /**< mmap address, NULL is file has not been mmapped. */
+ size_t _mmap_length; /**< mmap length. */
+
+ FSA _unit_fsa; /**< %FSA containing the units (with hash). */
+ uint32_t _index_size; /**< Size of the index structure. */
+ UnitData *_index; /**< Pointer to the index structure in memory. */
+ uint32_t _info_size; /**< Size of the info structure. */
+ uint32_t *_info; /**< Pointer to the info structure in memory. */
+ uint32_t _catindex_size; /**< Size of the catergory index. */
+ uint32_t *_catindex; /**< Pointer to the category index in memory. */
+ uint32_t _strings_size; /**< Size of the string storage. */
+ char *_strings; /**< Pointer to the string storage in memory. */
+
+ bool _ok; /**< Flag indicating successful initialization. */
+
+ /**
+ * @brief Reset the object.
+ *
+ * Resets the object to an empty %ConceptNet, and releases allocated memory.
+ */
+ void reset();
+
+ /**
+ * @brief Read the concept net data file from disk.
+ *
+ * @param datafile Name of the concept net data file.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global default access mode will be used.
+ * @return True on success.
+ */
+ bool read(const char *datafile, fsa::FileAccessMethod fam = FILE_ACCESS_UNDEF);
+
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ ConceptNet();
+ /**
+ * @brief Unimplemented private copy constructor.
+ */
+ ConceptNet(const ConceptNet&);
+ /**
+ * @brief Unimplemented private assignement operator.
+ */
+ const ConceptNet& operator=(const ConceptNet&);
+
+public:
+
+ /**
+ * @brief Constructor.
+ *
+ * @param fsafile %FSA file containing the units, with a perfect has
+ * (used for indexing the data file).
+ * @param datafile Concept net data file.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global default access mode will be used.
+ */
+ ConceptNet(const char *fsafile, const char *datafile=NULL, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+ ConceptNet(const std::string &fsafile, const std::string &datafile, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~ConceptNet();
+
+ /**
+ * @brief Check if initialization was successful.
+ *
+ * @return True if the initialization of the object succeeded.
+ */
+ bool isOk() const
+ {
+ return _ok;
+ }
+
+ /**
+ * @brief Get the concept net %FSA.
+ *
+ * Get the concept net %FSA. The object continues to be owned by the
+ * concept net.
+ *
+ * @return The concept net %FSA.
+ */
+ const FSA& getFSA() const
+ {
+ assert(_ok);
+ return _unit_fsa;
+ }
+
+ /**
+ * @brief Look up a unit.
+ *
+ * Look up a unit in the concept net, and get its index.
+ *
+ * @param unit Unit string.
+ * @return Index of the unit, or -1 if not found.
+ */
+ int lookup(const char *unit) const;
+
+ /**
+ * @brief Look up a unit index.
+ *
+ * Look up a unit index in the concept net, and get the unit string.
+ *
+ * @param idx Unit index.
+ * @return Pointer to the unit string, or NULL if index is out of range.
+ */
+ const char * lookup(int idx) const;
+
+ /**
+ * @brief Get the unit frequency of the unit.
+ *
+ * @param idx Unit index.
+ * @return Unit frequency, or -1 if the index is out of range.
+ */
+ int frq(int idx) const;
+
+ /**
+ * @brief Get the unit frequency of the unit.
+ *
+ * @param unit Unit string.
+ * @return Unit frequency, or -1 if the unit is not found.
+ */
+ int frq(const char *unit) const;
+
+ /**
+ * @brief Get the frequency of the unit as a complete query.
+ *
+ * @param idx Unit index.
+ * @return Unit-C frequency, or -1 if the index is out of range.
+ */
+ int cFrq(int idx) const;
+
+ /**
+ * @brief Get the frequency of the unit as a complete query.
+ *
+ * @param unit Unit string.
+ * @return Unit-C frequency, or -1 if the unit is not found.
+ */
+ int cFrq(const char *unit) const;
+
+ /**
+ * @brief Get the frequency of the unit as part of a query.
+ *
+ * @param idx Unit index.
+ * @return Unit-Q frequency, or -1 if the index is out of range.
+ */
+ int qFrq(int idx) const;
+
+ /**
+ * @brief Get the frequency of the unit as part of a query.
+ *
+ * @param unit Unit string.
+ * @return Unit-Q frequency, or -1 if the unit is not found.
+ */
+ int qFrq(const char *unit) const;
+
+ /**
+ * @brief Get the frequency of queries containing all terms of the unit.
+ *
+ * @param idx Unit index.
+ * @return Unit-S frequency, or -1 if the index is out of range.
+ */
+ int sFrq(int idx) const;
+
+ /**
+ * @brief Get the frequency of queries containing all terms of the unit.
+ *
+ * @param unit Unit string.
+ * @return Unit-Q frequency, or -1 if the unit is not found.
+ */
+ int sFrq(const char *unit) const;
+
+ /**
+ * @brief Get the unit score (100.0*cFrq/qFrq).
+ *
+ * @param idx Unit index.
+ * @return Unit score, or -1.0 if the index is out of range.
+ */
+ double score(int idx) const;
+
+ /**
+ * @brief Get the unit score (100.0*cFrq/qFrq).
+ *
+ * @param unit Unit string.
+ * @return Unit score, or -1. if the unit is not found.
+ */
+ double score(const char *unit) const;
+
+ /**
+ * @brief Get the unit strength (100.0*qFrq/sFrq).
+ *
+ * @param idx Unit index.
+ * @return Unit strength, or -1.0 if the index is out of range.
+ */
+ double strength(int idx) const;
+
+ /**
+ * @brief Get the unit strength (100.0*qFrq/sFrq).
+ *
+ * @param unit Unit string.
+ * @return Unit strength, or -1. if the unit is not found.
+ */
+ double strength(const char *unit) const;
+
+ /**
+ * @brief Get the number of extensions for the unit.
+ *
+ * @param idx Unit index.
+ * @return Number of extensions for the unit, -1 if the index is out
+ * of range.
+ */
+ int numExt(int idx) const;
+
+ /**
+ * @brief Get the number of associations for the unit.
+ *
+ * @param idx Unit index.
+ * @return Number of associations for the unit, -1 if the index is out
+ * of range.
+ */
+ int numAssoc(int idx) const;
+
+ /**
+ * @brief Get the number of categories for the unit.
+ *
+ * @param idx Unit index.
+ * @return Number of categories for the unit, -1 if the index is out
+ * of range.
+ */
+ int numCat(int idx) const;
+
+ /**
+ * @brief Get the index of an extension.
+ *
+ * @param idx Unit index.
+ * @param j Number of the extension (extensions of each unit are
+ * sorted by decreasing weight).
+ * @return Extension (unit) index, -1 if idx or j is out
+ * of range.
+ */
+ int ext(int idx, int j) const;
+
+ /**
+ * @brief Get the frequency of an extension.
+ *
+ * @param idx Unit index.
+ * @param j Number of the extension (extensions of each unit are
+ * sorted by decreasing weight).
+ * @return Extension frequency, -1 if idx or j is out
+ * of range.
+ */
+ int extFrq(int idx, int j) const;
+
+ /**
+ * @brief Get the index of an association.
+ *
+ * @param idx Unit index.
+ * @param j Number of the association (associations of each unit are
+ * sorted by decreasing weight).
+ * @return Association (unit) index, -1 if idx or j is out
+ * of range.
+ */
+ int assoc(int idx, int j) const;
+
+ /**
+ * @brief Get the frequency of an association.
+ *
+ * @param idx Unit index.
+ * @param j Number of the association (associations of each unit are
+ * sorted by decreasing weight).
+ * @return Association frequency, -1 if idx or j is out
+ * of range.
+ */
+ int assocFrq(int idx, int j) const;
+
+ /**
+ * @brief Get the index of a category.
+ *
+ * @param idx Unit index.
+ * @param j Number of the category.
+ * @return Catergory index, -1 if idx or j is out of range.
+ */
+ int cat(int idx, int j) const;
+
+ /**
+ * @brief Get the name of a category.
+ *
+ * @param catIdx Category index.
+ * @return Catergory name, or NULL if catIdx is out of range.
+ */
+ const char *catName(int catIdx) const;
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/detector.cpp b/fsa/src/vespa/fsa/detector.cpp
new file mode 100644
index 00000000000..f9e92c994d5
--- /dev/null
+++ b/fsa/src/vespa/fsa/detector.cpp
@@ -0,0 +1,102 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file detector.cpp
+ * @brief %FSA (%Finite %State %Automaton) based detector (implementation)
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <list>
+#include <algorithm>
+
+#include <math.h>
+
+#include "detector.h"
+#include "fsa.h"
+#include "ngram.h"
+
+
+namespace fsa {
+
+// {{{ Detector::detect
+
+void Detector::detect(const NGram &text, Detector::Hits &hits,
+ unsigned int from, int length) const
+{
+ std::list<FSA::WordCounterState> detectors;
+ std::list<FSA::WordCounterState>::iterator det_it;
+ unsigned int i,to;
+
+ to = text.length();
+ if(length!=-1 && from+length<to)
+ to=from+length;
+
+ i=from;
+ while(i<to){
+ detectors.push_back(FSA::WordCounterState(_dictionary));
+
+ det_it=detectors.begin();
+ while(det_it!=detectors.end()){
+ det_it->deltaWord(text[i]);
+ if(det_it->isFinal()){
+ hits.add(text, i-det_it->getCounter()+1, det_it->getCounter(), *det_it);
+ }
+
+ if(det_it->isValid())
+ ++det_it;
+ else{
+ det_it=detectors.erase(det_it);
+ }
+ }
+ ++i;
+ }
+
+ detectors.clear();
+}
+
+// }}}
+// {{{ Detector::detectWithHash
+
+void Detector::detectWithHash(const NGram &text, Detector::Hits &hits,
+ unsigned int from, int length) const
+{
+ std::list<FSA::HashedWordCounterState> detectors;
+ std::list<FSA::HashedWordCounterState>::iterator det_it;
+ unsigned int i,to;
+
+ to = text.length();
+ if(length!=-1 && from+length<to)
+ to=from+length;
+
+ i=from;
+ while(i<to){
+ detectors.push_back(FSA::HashedWordCounterState(_dictionary));
+
+ det_it=detectors.begin();
+ while(det_it!=detectors.end()){
+ det_it->deltaWord(text[i]);
+ if(det_it->isFinal()){
+ hits.add(text, i-det_it->getCounter()+1, det_it->getCounter(), *det_it);
+ }
+
+ if(det_it->isValid())
+ ++det_it;
+ else{
+ det_it=detectors.erase(det_it);
+ }
+ }
+ ++i;
+ }
+
+ detectors.clear();
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/detector.h b/fsa/src/vespa/fsa/detector.h
new file mode 100644
index 00000000000..62e54077519
--- /dev/null
+++ b/fsa/src/vespa/fsa/detector.h
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file detector.h
+ * @brief %FSA (%Finite %State %Automaton) based detector.
+ *
+ */
+
+#pragma once
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include "fsa.h"
+#include "ngram.h"
+
+namespace fsa {
+
+// {{{ Detector
+
+/**
+ * @class Detector
+ * @brief Simple %FSA based detector.
+ *
+ * Class for processing a tokenized text and detecting occurrences of
+ * terms and phrases in a given dictionary.
+ */
+class Detector {
+
+public:
+
+ // {{{ class Detector::Hits
+
+ /**
+ * @class Hits
+ * @brief Class for collecting the detection results.
+ *
+ * This is a base class which must be subclassed for each particular
+ * application of the detector. The method add() will be called for
+ * each term/phrase detected by the detector.
+ */
+ class Hits {
+ public:
+ /** Default constructor. */
+ Hits() {}
+ /** Destructor. */
+ virtual ~Hits() {};
+
+ /**
+ * @brief Method to receive results from the detector.
+ *
+ * @param text Tokenized detector input text.
+ * @param from Index of the first term of the detected phrase.
+ * @param length Length of the detected phrase.
+ * @param state Final state after the detection of the phrase.
+ */
+ virtual void add(const NGram &text,
+ unsigned int from, int length,
+ const FSA::State &state) = 0;
+ };
+
+ // }}}
+
+private:
+
+ /** Dictionary. */
+ const FSA& _dictionary;
+
+ /** Unimplemented private default constructor. */
+ Detector();
+ /** Unimplemented private copy constructor. */
+ Detector(const Detector&);
+
+public:
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates a detector, and initializes the dictionary from a handle.
+ *
+ * @param dict Dictionary handle.
+ */
+ Detector(const FSA& dict) : _dictionary(dict) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates a detector, and initializes the dictionary from a handle.
+ *
+ * @param dict Dictionary handle.
+ */
+ Detector(const FSA* dict) : _dictionary(*dict) {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~Detector() {}
+
+ /**
+ * @brief Detect terms and phrases in a text.
+ *
+ * @param text Tokenized text.
+ * @param hits Reference to the object for collecting the results.
+ * @param from Index of first term in text where detection should start.
+ * @param length Number of term to consider (-1 means to end of text).
+ */
+ void detect(const NGram &text, Hits &hits,
+ unsigned int from=0, int length=-1) const;
+
+ /**
+ * @brief Detect terms and phrases in a text.
+ *
+ * Same as detect(), but uses hashed states.
+ *
+ * @param text Tokenized text.
+ * @param hits Reference to the object for collecting the results.
+ * @param from Index of first term in text where detection should start.
+ * @param length Number of term to consider (-1 means to end of text).
+ */
+ void detectWithHash(const NGram &text, Hits &hits,
+ unsigned int from=0, int length=-1) const;
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/file.h b/fsa/src/vespa/fsa/file.h
new file mode 100644
index 00000000000..414751e4849
--- /dev/null
+++ b/fsa/src/vespa/fsa/file.h
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2008/05/30
+ * @version $Id$
+ * @file file.h
+ * @brief Currently just %FileAccessMethod
+ */
+
+#pragma once
+
+namespace fsa {
+
+// {{{ FileAccessMethod
+
+/**
+ * @brief File access method enum.
+ */
+enum FileAccessMethod {
+ FILE_ACCESS_UNDEF,
+ FILE_ACCESS_READ,
+ FILE_ACCESS_MMAP,
+ FILE_ACCESS_MMAP_WITH_MLOCK
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/fsa.cpp b/fsa/src/vespa/fsa/fsa.cpp
new file mode 100644
index 00000000000..63ff979e411
--- /dev/null
+++ b/fsa/src/vespa/fsa/fsa.cpp
@@ -0,0 +1,413 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file fsa.cpp
+ * @brief Implementation of FSA methods (not inlined)
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "fsa.h"
+#include "checksum.h"
+
+#include <map>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h> // for ::read(), ::close()
+#include <sys/types.h>
+#include <sys/mman.h> // for ::mmap()
+#include <sys/time.h>
+#include <sys/resource.h> // for getrlimit(), setrlimit(), etc.
+
+
+
+namespace fsa {
+
+// {{{ constants
+const uint32_t FSA::MAGIC;
+const uint32_t FSA::VER;
+const symbol_t FSA::EMPTY_SYMBOL;
+const symbol_t FSA::FINAL_SYMBOL;
+// }}}
+
+
+// {{{ FSA::iterator::operator++()
+
+FSA::iterator& FSA::iterator::operator++()
+{
+ state_t next;
+ unsigned int depth;
+
+ if(_item._symbol==0xff || _item._fsa==NULL)
+ return *this;
+
+ if(_item._symbol==0 && _item._state==0)
+ _item._state=_item._fsa->start();
+
+ while(1){
+ _item._symbol++;
+ if(_item._symbol<0xff){
+ next=_item._fsa->delta(_item._state,_item._symbol);
+ if(next){
+ _item._string += _item._symbol;
+ _item._stack.push_back(_item._state);
+ _item._state = next;
+ _item._symbol = 0;
+ if(_item._fsa->isFinal(next))
+ break;
+ }
+ }
+ else { // bactrack
+ if((depth=_item._string.size())>0){
+ _item._symbol = _item._string[depth-1];
+ _item._string.resize(depth-1);
+ _item._state = _item._stack.back();
+ _item._stack.pop_back();
+ }
+ else{
+ _item._state=0;
+ break;
+ }
+ }
+ }
+ return *this;
+}
+
+// }}}
+// {{{ FSA::libVER()
+
+uint32_t FSA::libVER()
+{
+ return VER;
+}
+
+// }}}
+// {{{ MetaData::MetaData()
+
+FSA::FSA(const char *file, FileAccessMethod fam) :
+ _mmap_addr(NULL), _mmap_length(0),
+ _version(0), _serial(0),
+ _state(NULL), _symbol(NULL), _size(0),
+ _data(NULL), _data_size(0), _data_type(DATA_VARIABLE), _fixed_data_size(0),
+ _has_perfect_hash(false),_perf_hash(NULL),
+ _start(0), _ok(false)
+{
+ _ok = read(file, fam);
+}
+
+FSA::FSA(const std::string &file, FileAccessMethod fam) :
+ _mmap_addr(NULL), _mmap_length(0),
+ _version(0), _serial(0),
+ _state(NULL), _symbol(NULL), _size(0),
+ _data(NULL), _data_size(0), _data_type(DATA_VARIABLE), _fixed_data_size(0),
+ _has_perfect_hash(false),_perf_hash(NULL),
+ _start(0), _ok(false)
+{
+ _ok = read(file.c_str(), fam);
+}
+
+// }}}
+// {{{ FSA::~FSA()
+
+FSA::~FSA()
+{
+ if(_mmap_addr!=NULL && _mmap_addr!=MAP_FAILED){
+ munmap(_mmap_addr,_mmap_length);
+ }
+ else{
+ if(_state!=NULL) free(_state);
+ if(_symbol!=NULL) free(_symbol);
+ if(_data!=NULL) free(_data);
+ if(_perf_hash!=NULL) free(_perf_hash);
+ }
+}
+
+// }}}
+// {{{ FSA::reset()
+
+void FSA::reset()
+{
+ _version = 0;
+ _serial = 0;
+ if(_mmap_addr!=NULL && _mmap_addr!=MAP_FAILED){
+ munmap(_mmap_addr,_mmap_length);
+ }
+ else{
+ if(_state!=NULL) free(_state);
+ if(_symbol!=NULL) free(_symbol);
+ if(_data!=NULL) free(_data);
+ if(_perf_hash!=NULL) free(_perf_hash);
+ }
+ _mmap_addr=NULL; _mmap_length=0;
+ _state=NULL; _symbol=NULL; _size=0;
+ _data=NULL; _data_size=0; _data_type=DATA_VARIABLE; _fixed_data_size=0;
+ _has_perfect_hash=false; _perf_hash=NULL;
+ _start=0;
+}
+
+// }}}
+// {{{ FSA::read()
+
+bool FSA::read(const char *file, FileAccessMethod fam)
+{
+ Header header;
+ size_t r;
+ uint32_t checksum=0;
+
+ reset();
+
+ if(fam==FILE_ACCESS_UNDEF)
+ fam=_default_file_access_method;
+
+ if(file==NULL)
+ return false;
+
+ int fd = ::open(file,O_RDONLY);
+ if(fd<0)
+ return false;
+
+ r=::read(fd,&header,sizeof(header));
+ if(r<sizeof(header) || header._magic!=MAGIC || header._version<1000){
+ ::close(fd); // no fsa had version number below 0.1.0
+ return false;
+ }
+
+ _version = header._version;
+ _serial = header._serial;
+ _size = header._size;
+ _data_size = header._data_size;
+ _data_type = header._data_type;
+ _fixed_data_size = header._fixed_data_size;
+ _start = header._start;
+
+ if(fam==FILE_ACCESS_MMAP || fam==FILE_ACCESS_MMAP_WITH_MLOCK){
+ _mmap_length =
+ sizeof(header) +
+ _size*sizeof(symbol_t) +
+ _size*sizeof(state_t) +
+ _data_size +
+ (header._has_perfect_hash?_size*sizeof(hash_t):0);
+ _mmap_addr = ::mmap((void*)0, _mmap_length, PROT_READ, MAP_SHARED, fd, 0);
+ if(_mmap_addr==MAP_FAILED){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ if(fam==FILE_ACCESS_MMAP_WITH_MLOCK){
+ if(mlock(_mmap_addr, _mmap_length)<0) {
+ /* try to increase RLIMIT_MEMLOCK then mlock() again */
+ struct rlimit rl;
+ if(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0) {
+ rl.rlim_cur += _mmap_length + getpagesize();
+ rl.rlim_max += _mmap_length + getpagesize();
+ if(setrlimit(RLIMIT_MEMLOCK, &rl) >= 0)
+ mlock(_mmap_addr, _mmap_length);
+ }
+ }
+ }
+ }
+
+ if(_mmap_addr==NULL){
+ _symbol = (symbol_t*)malloc(_size*sizeof(symbol_t));
+ r=::read(fd,_symbol,_size*sizeof(symbol_t));
+ if(r!=_size*sizeof(symbol_t)){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _symbol = (symbol_t*)((uint8_t*)_mmap_addr + sizeof(header));
+ }
+ checksum += Checksum::compute(_symbol,_size*sizeof(symbol_t));
+
+ if(_mmap_addr==NULL){
+ _state = (state_t*)malloc(_size*sizeof(state_t));
+ r=::read(fd,_state,_size*sizeof(state_t));
+ if(r!=_size*sizeof(state_t)){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _state = (state_t*)((uint8_t*)_mmap_addr + sizeof(header) +
+ _size*sizeof(symbol_t));
+ }
+ checksum += Checksum::compute(_state,_size*sizeof(state_t));
+
+ if(_mmap_addr==NULL){
+ _data = (data_t*)malloc(_data_size);
+ r=::read(fd,_data,_data_size);
+ if(r!=_data_size){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _data = (data_t*)((uint8_t*)_mmap_addr + sizeof(header) +
+ _size*sizeof(symbol_t) +
+ _size*sizeof(state_t));
+ }
+ checksum += Checksum::compute(_data,_data_size);
+
+ if(header._has_perfect_hash){
+ if(_mmap_addr==NULL){
+ _perf_hash = (hash_t*)malloc(_size*sizeof(hash_t));
+ r=::read(fd,_perf_hash,_size*sizeof(hash_t));
+ if(r!=_size*sizeof(hash_t)){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _perf_hash = (hash_t*)((uint8_t*)_mmap_addr + sizeof(header) +
+ _size*sizeof(symbol_t) +
+ _size*sizeof(state_t) +
+ _data_size);
+ }
+ checksum += Checksum::compute(_perf_hash,_size*sizeof(hash_t));
+ _has_perfect_hash = true;
+ }
+
+ ::close(fd);
+
+ if(_version>=2000 && checksum!=header._checksum){
+ reset(); // use checksum since version 0.2.0
+ return false;
+ }
+
+ return true;
+}
+// }}}
+// {{{ FSA::revLookup()
+
+std::string FSA::revLookup(hash_t hash) const
+{
+ state_t state = start();
+ state_t next,last_next, current_next;
+ hash_t current = 0,d,last_d;
+ std::string current_string;
+ symbol_t symbol,last_symbol,current_symbol;
+
+ if(!hasPerfectHash())
+ return std::string();
+ last_symbol=current_symbol=0;
+
+ while(current<hash){
+ last_symbol=current_symbol=0;
+ last_next=current_next=0;
+ d=last_d=0;
+ for(symbol=1;symbol<=254;symbol++){
+ next=delta(state,symbol);
+ if(next){
+ last_symbol=current_symbol;
+ current_symbol=symbol;
+ last_next=current_next;
+ current_next=next;
+ last_d=d;
+ d=hashDelta(state,symbol);
+ if(current+d>=hash)
+ break;
+ }
+ }
+ if(current_symbol==0)
+ return std::string();
+ if(current+d<=hash){
+ current_string+=(char)current_symbol;
+ state=current_next;
+ current+=d;
+ }
+ else{
+ current_string+=(char)last_symbol;
+ state=last_next;
+ current+=last_d;
+ }
+ }
+
+ while(!isFinal(state)){
+ for(symbol=1;symbol<=254;symbol++){
+ next=delta(state,symbol);
+ if(next){
+ current_string+=(char)symbol;
+ state=next;
+ break;
+ }
+ }
+ if(symbol==255)
+ return std::string();
+ }
+
+ return current_string;
+}
+
+// }}}
+
+// {{{ FSA::printDot()
+
+void FSA::printDot(std::ostream &out) const
+{
+ state_t start,state,next;
+ symbol_t symbol;
+ std::list<state_t> state_stack;
+ std::list<symbol_t> symbol_stack;
+ std::map<state_t,bool> visited;
+ bool v;
+
+
+ symbol=0;
+ start=state=this->start();
+
+ out << "digraph fsa {\n";
+ out << " node [label=\"\",shape=circle]\n";
+ out << " start [label=start]\n";
+
+ while(1){
+ symbol++;
+ if(symbol<0xff){
+ next=delta(state,symbol);
+ if(next){
+ v=visited[next];
+ if(!v && isFinal(next))
+ out << " n" << next << " [shape=doublecircle]\n";
+ out << " ";
+ if(state==start)
+ out << "start";
+ else
+ out << "n" << state;
+ out << " -> n" << next << " [label=\"" << char(symbol) << "\"]\n";
+ if(!v){
+ visited[next]=true;
+ symbol_stack.push_back(symbol);
+ state_stack.push_back(state);
+ state = next;
+ symbol = 0;
+ }
+ }
+ }
+ else { // bactrack
+ if(state_stack.size()>0){
+ symbol = symbol_stack.back();
+ symbol_stack.pop_back();
+ state = state_stack.back();
+ state_stack.pop_back();
+ }
+ else{
+ break;
+ }
+ }
+ }
+
+ out << "}\n";
+
+}
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/fsa.h b/fsa/src/vespa/fsa/fsa.h
new file mode 100644
index 00000000000..a508b1eb0f4
--- /dev/null
+++ b/fsa/src/vespa/fsa/fsa.h
@@ -0,0 +1,2312 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file fsa.h
+ * @brief Class definition of the %FSA (%Finite %State %Automaton) matcher
+ */
+
+#pragma once
+
+#include <string>
+#include <list>
+#include <iostream>
+#include <inttypes.h>
+
+#include "file.h" // for FileAccessMethod
+
+namespace fsa {
+
+// {{{ symbol_t, state_t, hash_t, data_t
+/**
+ * @brief Symbol type used by the automaton. sizeof() should be 1.
+ */
+typedef uint8_t symbol_t;
+
+/**
+ * @brief State type used by the automaton.
+ */
+typedef uint32_t state_t;
+
+/**
+ * @brief Hash type used by the automaton.
+ */
+typedef uint32_t hash_t;
+
+/**
+ * @brief Data type used by the automaton. sizeof() should be 1.
+ */
+typedef uint8_t data_t;
+
+// }}}
+
+
+// {{{ FSA
+
+/**
+ * Forward declaration of friend.
+ */
+class Automaton;
+
+/**
+ * @class FSA
+ * @brief %FSA (%Finite %State %Automaton) matcher.
+ *
+ * The FSA class provides very fast string lookup and perfect hashing
+ * using the Finite State Automaton technology. The automata are built
+ * off-line using the Automaton class.
+ */
+class FSA {
+
+public:
+
+ class Handle; // defined in fsahandle.h
+ class State;
+
+ // {{{ FSA::iterator
+ /**
+ * @class iterator
+ * @brief Iterate through all accepted strings in the fsa.
+ */
+ class iterator {
+
+ friend class State;
+
+ public:
+
+ /**
+ * @class iteratorItem
+ * @brief Helper class for storing iterator state and accessing data.
+ *
+ * Internally, this class stores the state information for the
+ * iterator. Externally, it is used for accessing the data
+ * associated with the iterator position.
+ */
+ class iteratorItem {
+
+ friend class iterator;
+
+ private:
+ std::string _string; /**< The current string. */
+ std::list<state_t> _stack; /**< The stack of visited states. */
+ symbol_t _symbol; /**< Currently examined symbol. */
+ state_t _state; /**< Currently examined state. */
+ const FSA* _fsa; /**< Pointer to the FSA. */
+
+ /**
+ * @brief Default constructor; unimplemented.
+ */
+ iteratorItem();
+
+ /**
+ * @brief Constructor.
+ *
+ * @param fsa Pointer to the %FSA object the iterator is associated with.
+ */
+ iteratorItem(const FSA *fsa) : _string(), _stack(), _symbol(0), _state(0), _fsa(fsa) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * @param fsa Pointer to the %FSA object the iterator is associated with.
+ * @param s State to use as start state.
+ */
+ iteratorItem(const FSA *fsa, state_t s) :
+ _string(), _stack(), _symbol(0), _state(s), _fsa(fsa) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param it Pointer to iterator item to copy.
+ */
+ iteratorItem(const iteratorItem& it) : _string(it._string), _stack(it._stack),
+ _symbol(it._symbol), _state(it._state),
+ _fsa(it._fsa) {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~iteratorItem() {}
+
+ public:
+
+ /**
+ * @brief Access the string associated with the iterator poristion.
+ *
+ * @return Current string.
+ */
+ const std::string& str() const { return _string; }
+
+ /**
+ * @brief Get the size of meta data which belongs to the current string.
+ *
+ * @return The size of meta data.
+ */
+ int dataSize() const { return _fsa->dataSize(_state); }
+
+ /**
+ * @brief Get the meta data which belongs to the current string.
+ *
+ * @return Pointer to the meta data.
+ */
+ const data_t* data() const { return _fsa->data(_state); }
+ };
+
+ private:
+
+ iteratorItem _item; /**< Internal state. */
+
+ /**
+ * @brief Constructor.
+ *
+ * Private constructor, reserved for FSA::State::begin() and end().
+ *
+ * @param fsa Pointer to the FSA object to assiociate with.
+ * @param s State to use as initial state.
+ */
+ iterator(const FSA *fsa, state_t s) : _item(fsa,s)
+ {
+ if(!fsa->isFinal(s))
+ operator++();
+ }
+
+ public:
+
+ /**
+ * @brief Default constructor.
+ *
+ * Creates an unitialized iterator. The effect of using any of
+ * the access methods on unitialized iterators is undefined.
+ */
+ iterator() : _item(NULL) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param it iterator object to copy.
+ */
+ iterator(const iterator &it) : _item(it._item) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create an iterator for a given state s. The iterator will
+ * only iterate through possible endings from this state.
+ *
+ * @param s State to create the iterator from.
+ */
+ iterator(const State &s) : _item(s._fsa,s._state)
+ {
+ if(!s.isFinal())
+ operator++();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Private constructor, reserved for FSA::begin() and end().
+ *
+ * @param fsa Pointer to the FSA object to assiociate with.
+ * @param atEnd True for end(), false for begin(). (Default is false.)
+ */
+ iterator(const FSA *fsa, bool atEnd=false) : _item(fsa)
+ {
+ if(atEnd)
+ _item._symbol = 0xff;
+ else
+ operator++();
+ }
+
+ /**
+ * @brief Assignment operator.
+ *
+ * @param it iterator object to set values from.
+ * @return Reference to this iterator object.
+ */
+ iterator& operator=(const iterator &it) { _item=it._item; return *this; }
+
+ /**
+ * @brief Not equal operator.
+ *
+ * @return True if the two iterators do not point to the same poistion.
+ */
+ bool operator!=(const iterator &it) const
+ {
+ return _item._fsa!=it._item._fsa || _item._symbol!=it._item._symbol ||
+ _item._state!=it._item._state || _item._string!=it._item._string ||
+ _item._stack!=it._item._stack;
+ }
+
+ /**
+ * @brief Prefix increment operator.
+ *
+ * Prefix increment operator. Calling on an uninitalized iterator
+ * (or one which has reached end()) has no effect.
+ *
+ * @return Reference to this.
+ */
+ iterator& operator++();
+
+ /**
+ * @brief Dereference operator.
+ *
+ * @return Const reference to state object for data access.
+ */
+ const iteratorItem& operator*() const { return _item; }
+
+ /**
+ * @brief Dereference operator.
+ *
+ * @return Const pointer to state object for data access.
+ */
+ const iteratorItem* operator->() const { return &_item; }
+
+ };
+
+ // }}}
+
+ // {{{ FSA::State
+ /**
+ * @class State
+ * @brief Class for FSA lookups.
+ *
+ * This class represents the state of a finite state automaton. It
+ * is connected to one FSA for its whole lifetime. Provides methods
+ * for transitions and lookups.
+ */
+ class State {
+
+ friend FSA::iterator::iterator(const State &);
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ State();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ State& operator=(const State&);
+
+ protected:
+ const FSA *_fsa; /**< Pointer to the FSA. */
+ state_t _state; /**< Current state. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton.
+ *
+ * @param f Reference to FSA.
+ */
+ State(const FSA& f) : _fsa(&f), _state(_fsa->start()) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton.
+ *
+ * @param f Pointer to FSA.
+ */
+ State(const FSA* f) : _fsa(f), _state(_fsa->start()) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing state. The new state will refer to the
+ * same state of the automaton, but it can be used independently
+ * (e.g. continue with different transitions).
+ *
+ * @param s Reference to state to be duplicated.
+ */
+ State(const State& s) : _fsa(s._fsa), _state(s._state) {}
+
+ /**
+ * @brief Destructor.
+ *
+ * Destructor, does nothing special.
+ */
+ virtual ~State() {}
+
+ /**
+ * @brief Check if the automaton has perfect hash built in.
+ *
+ * Returns true if the automaton was built with a perfect hash included.
+ *
+ * @return True if the automaton has perfect hash.
+ */
+ virtual bool hasPerfectHash() const
+ {
+ return _fsa->hasPerfectHash();
+ }
+
+ /**
+ * @brief Check is the state is valid.
+ *
+ * Returns true if the state is valid, that is the sequence of
+ * transitions leading to this state exists in the automaton.
+ *
+ * @return True if the state is valid.
+ */
+ virtual bool isValid() const
+ {
+ return _state>0;
+ }
+
+ /**
+ * @brief Set the state to the start state of the automaton.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+ _state = _fsa->start();
+ return _state!=0;
+ }
+
+ /**
+ * @brief Delta transition.
+ *
+ * Perform a delta transition using a single input symbol.
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(symbol_t in)
+ {
+ _state = _fsa->delta(_state,in);
+ return _state!=0;
+ }
+
+ /**
+ * @brief Try a delta transition.
+ *
+ * Try if a delta transition would succeed, without performing the
+ * transition.
+ *
+ * @param in Input symbol.
+ * @return True if the delta transition would succeed.
+ */
+ virtual bool tryDelta(symbol_t in)
+ {
+ return _fsa->delta(_state,in)!=0;
+ }
+
+ /**
+ * @brief Start and transition.
+ *
+ * Sets the state to the starting state of the automaton, and
+ * performs a transition using a single input symbol.
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start(symbol_t in)
+ {
+ start();
+ return delta(in);
+ }
+
+ /**
+ * @brief Start and transition.
+ *
+ * Sets the state to the starting state of the automaton, and
+ * performs a transition using a sequence of input symbols.
+ *
+ * @param in Input symbols, zero terminated.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start(const symbol_t *in)
+ {
+ start();
+ return delta(in);
+ }
+
+ /**
+ * @brief Start and transition.
+ *
+ * Sets the state to the starting state of the automaton, and
+ * performs a transition using a sequence of input symbols.
+ *
+ * @param in Input symbols, zero terminated.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start(const char *in)
+ {
+ start();
+ return delta(in);
+ }
+
+ /**
+ * @brief Start and transition.
+ *
+ * Sets the state to the starting state of the automaton, and
+ * performs a transition using a sequence of input symbols.
+ *
+ * @param in Input symbols.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start(const std::string &in)
+ {
+ start();
+ return delta(in);
+ }
+
+ /**
+ * @brief Start and transition.
+ *
+ * Sets the state to the starting state of the automaton, and
+ * performs a transition using an input word.
+ *
+ * @param in Input word.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool startWord(const std::string &in)
+ {
+ start();
+ return delta(in);
+ }
+
+ /**
+ * @brief Delta transition.
+ *
+ * Perform a delta transition using a sequence of input symbols.
+ *
+ * @param in Input symbols, zero terminated.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(const symbol_t *in)
+ {
+ const symbol_t *p=in;
+
+ while(*p && _state>0){
+ delta(*p);
+ p++;
+ }
+ return _state!=0;
+ }
+
+ /**
+ * @brief Delta transition.
+ *
+ * Perform a delta transition using a sequence of input symbols.
+ *
+ * @param in Input symbols, zero terminated.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(const char *in)
+ {
+ return delta((const symbol_t *)in);
+ }
+
+ /**
+ * @brief Delta transition.
+ *
+ * Perform a delta transition using a sequence of input symbols.
+ *
+ * @param in Input symbols.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(const std::string &in)
+ {
+ unsigned int idx=0;
+
+ while(idx<in.length() && _state>0){
+ delta(in[idx]);
+ idx++;
+ }
+ return _state!=0;
+ }
+
+ /**
+ * @brief Delta transition.
+ *
+ * Perform a delta transition using an input word. A word
+ * separator symbol ` ` is inserted before the word if it is not
+ * the first word (the current state is not the start state).
+ *
+ * @param in Input word.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool deltaWord(const std::string &in)
+ {
+ if(_state!=_fsa->start())
+ delta(' ');
+ return delta(in);
+ }
+
+ /**
+ * @brief Check if the current state is final (accepting) state.
+ *
+ * @return True if the state is final.
+ */
+ virtual bool isFinal(void) const
+ {
+ return _fsa->isFinal(_state);
+ }
+
+ /**
+ * @brief Get the size of a data item.
+ *
+ * Get the size of the data item assiciated with a final
+ * state. The return value -1 indicates that the current state is
+ * not a final state.
+ *
+ * @return Size of data item, or -1 if the state is not final.
+ */
+ virtual int dataSize(void) const
+ {
+ return _fsa->dataSize(_state);
+ }
+
+ /**
+ * @brief Get the data item.
+ *
+ * Get the data item assiciated with a final state. The return
+ * value NULL indicates that the current state is not a final
+ * state.
+ *
+ * @return Pointer to data item, or NULL if the state is not final.
+ */
+ virtual const data_t *data() const
+ {
+ return _fsa->data(_state);
+ }
+
+ /**
+ * @brief Get the data item as a character string.
+ *
+ * Get the data item assiciated with a final state. The return
+ * value NULL indicates that the current state is not a final
+ * state.
+ *
+ * @return Pointer to data item, or NULL if the state is not final.
+ */
+ virtual const char *cData() const
+ {
+ return (const char*)(_fsa->data(_state));
+ }
+
+ /**
+ * @brief Get the data item as an unsigned 32-bit integer.
+ *
+ * Get the data item assiciated with a final state as an unsigned
+ * 32-bit integer. If the data field size is 0 or the state is not
+ * final, zero returned, otherwise 1, 2 or 4 byte integer is
+ * retrieved according to the size and converted to uint32_t.
+ *
+ * @return Numerical data.
+ */
+ virtual uint32_t nData() const
+ {
+ const data_t *da = _fsa->data(_state);
+ int si = _fsa->dataSize(_state);
+ if(si<=0)
+ return 0;
+ switch(si){
+ case 1:
+ return (uint32_t)((const uint8_t*)da)[0];
+ case 2:
+ case 3:
+ return (uint32_t)((const uint16_t*)da)[0];
+ case 4:
+ default:
+ return ((const uint32_t*)da)[0];
+ }
+ }
+
+ /**
+ * @brief Dummy hash() method; for simple states returns only
+ * zero. Will be overridden by HashedState etc.
+ *
+ * @return 0
+ */
+ virtual hash_t hash() const
+ {
+ return 0;
+ }
+
+
+ /**
+ * @brief Perform a lookup.
+ *
+ * Perform a string lookup in the automaton (sequence of
+ * transitions, starting from the start state. Returns a pointer
+ * to the data item associated with the final state if the string
+ * is accepted, NULL otherwise.
+ *
+ * @param in Input string.
+ * @return Pointer to data item, or NULL if the state is not final.
+ */
+ virtual const data_t *lookup(const symbol_t *in)
+ {
+ start(in);
+ return data();
+ }
+
+ /**
+ * @brief Perform a lookup.
+ *
+ * Perform a string lookup in the automaton (sequence of
+ * transitions, starting from the start state. Returns a pointer
+ * to the data item associated with the final state if the string
+ * is accepted, NULL otherwise.
+ *
+ * @param in Input string.
+ * @return Pointer to data item, or NULL if the state is not final.
+ */
+ virtual const data_t *lookup(const char *in)
+ {
+ return lookup((const symbol_t*)in);
+ }
+
+ /**
+ * @brief Perform a lookup.
+ *
+ * Perform a string lookup in the automaton (sequence of
+ * transitions, starting from the start state. Returns a pointer
+ * to the data item associated with the final state if the string
+ * is accepted, NULL otherwise.
+ *
+ * @param in Input string.
+ * @return Pointer to data item, or NULL if the state is not final.
+ */
+ virtual const data_t *lookup(const std::string &in)
+ {
+ start(in);
+ return data();
+ }
+
+ /**
+ * @brief Reverse lookup.
+ *
+ * For a given hash value, return the corresponding string.
+ *
+ * @param hash Hash value.
+ * @return String corresponding to hash value, or empty string if
+ * the fsa has no perfect hash or the hash value is out of
+ * range.
+ */
+ virtual std::string revLookup(hash_t hash) const
+ {
+ return _fsa->revLookup(hash);
+ }
+
+ /**
+ * @brief Get iterator pointing to the beginning of the fsa.
+ *
+ * @return iterator pointing to the first string in the fsa.
+ */
+ virtual FSA::iterator begin() const { return FSA::iterator(_fsa,_state); }
+
+ /**
+ * @brief Get iterator pointing past the end of the fsa.
+ *
+ * @return iterator pointing past the last string in the fsa.
+ */
+ virtual FSA::iterator end() const { return FSA::iterator(_fsa,true); }
+
+ };
+
+ // }}}
+
+ // {{{ FSA::HashedState
+ /**
+ * @class HashedState
+ * @brief Class for FSA lookups with perfect hash functionality.
+ *
+ * This class represents the state of a finite state automaton. It
+ * is connected to one FSA for its whole lifetime. Provides all
+ * methods of the FSA::State plus perfect hashing functionality.
+ */
+ class HashedState : public State {
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ HashedState();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ HashedState& operator=(const HashedState&);
+
+ protected:
+ hash_t _hash; /**< Hash value. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton.
+ *
+ * @param f Reference to FSA.
+ */
+ HashedState(const FSA& f) : State(f), _hash(0) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton.
+ *
+ * @param f Pointer to FSA.
+ */
+ HashedState(const FSA* f) : State(f), _hash(0) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing hashed state.
+ *
+ * @param s Reference to hashed state to copy.
+ */
+ HashedState(const HashedState& s) : State(s), _hash(s._hash) {}
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~HashedState() {}
+
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ using State::start;
+ using State::delta;
+#else
+ virtual bool start(symbol_t in) { start(); return delta(in); }
+ virtual bool start(const symbol_t *in) { start(); return delta(in); }
+ virtual bool start(const char *in) { start(); return delta(in); }
+ virtual bool start(const std::string &in) { start(); return delta(in); }
+ virtual bool delta(const symbol_t *in)
+ {
+ const symbol_t *p=in;
+ while(*p && _state>0){
+ delta(*p);
+ p++;
+ }
+ return _state!=0;
+ }
+ virtual bool delta(const char *in) { return delta((const symbol_t *)in); }
+ virtual bool delta(const std::string &in)
+ {
+ unsigned int idx=0;
+
+ while(idx<in.length() && _state>0){
+ delta(in[idx]);
+ idx++;
+ }
+ return _state!=0;
+ }
+#endif
+
+ /**
+ * @brief Set the state to the starting state of the automaton.
+ *
+ * This method overrides the State::start() method, and resets the
+ * hash value in addition.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+ _hash = 0;
+ return State::start();
+ }
+
+ /**
+ * @brief Delta transition for hashed states.
+ *
+ * Extends the State::delta() method with hash value update.
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(symbol_t in)
+ {
+ _hash += _fsa->hashDelta(_state,in);
+ return State::delta(in);
+ }
+
+ /**
+ * @brief Get current hash value.
+ *
+ * For final states, returns the perfect hash value for the input
+ * string which lead to the the state. For any state (including
+ * final states) the value equals the number of strings accepted
+ * by the automaton which (in an alphabetical ordering) preceed
+ * the string leading to the state.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t hash() const
+ {
+ return _hash;
+ }
+
+ /**
+ * @brief Obsolete alias for hash(), for backwards compatibility.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t getHash() const
+ {
+ return _hash;
+ }
+
+ };
+
+ // }}}
+
+ // {{{ FSA::CounterState
+ /**
+ * @class CounterState
+ * @brief Class for FSA lookups with counter.
+ *
+ * This class represents the state of a finite state automaton. It
+ * is connected to one FSA for its whole lifetime. Provides all
+ * methods of the FSA::State and counts the number of transtitions.
+ */
+ class CounterState : public State {
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ CounterState();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ CounterState& operator=(const CounterState&);
+
+ protected:
+ uint32_t _counter; /**< Counter value. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Reference to FSA.
+ */
+ CounterState(const FSA& f) : State(f), _counter(0) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Pointer to FSA.
+ */
+ CounterState(const FSA* f) : State(f), _counter(0) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing hashed state.
+ *
+ * @param s Reference to hashed state to copy.
+ */
+ CounterState(const CounterState& s) : State(s), _counter(s._counter) {}
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~CounterState() {}
+
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ using State::start;
+ using State::delta;
+#else
+ virtual bool start(symbol_t in) { start(); return delta(in); }
+ virtual bool start(const symbol_t *in) { start(); return delta(in); }
+ virtual bool start(const char *in) { start(); return delta(in); }
+ virtual bool start(const std::string &in) { start(); return delta(in); }
+ virtual bool delta(const symbol_t *in)
+ {
+ const symbol_t *p=in;
+ while(*p && _state>0){
+ delta(*p);
+ p++;
+ }
+ return _state!=0;
+ }
+ virtual bool delta(const char *in) { return delta((const symbol_t *)in); }
+ virtual bool delta(const std::string &in)
+ {
+ unsigned int idx=0;
+
+ while(idx<in.length() && _state>0){
+ delta(in[idx]);
+ idx++;
+ }
+ return _state!=0;
+ }
+#endif
+
+ /**
+ * @brief Set the state to the starting state of the automaton.
+ *
+ * This method overrides the State::start() method, and resets the
+ * counter in addition.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+ _counter = 0;
+ return State::start();
+ }
+
+ /**
+ * @brief Delta transition for counter states.
+ *
+ * Extends the State::delta() method with counter increment.
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(symbol_t in)
+ {
+ bool ok = State::delta(in);
+ if(ok)
+ ++_counter; // only count valid transitions
+ return ok;
+ }
+
+ /**
+ * @brief Get current counter value.
+ *
+ * Return the current counter. The counter is the number of
+ * transitions from the start state to the current state.
+ * If the state is not valid anymore, the counter is the number of
+ * transitions to the last valid state.
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t counter() const
+ {
+ return _counter;
+ }
+
+ /**
+ * @brief An alias for counter()
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t getCounter() const
+ {
+ return _counter;
+ }
+
+ };
+ // }}}
+
+ // {{{ FSA::WordCounterState
+ /**
+ * @class WordCounterState
+ * @brief Class for FSA lookups with word counter.
+ *
+ * This class is similar to CounterState, but it counts whole word
+ * transitions. Operations other than start(void), startWord(const std::string&)
+ * or deltaWord(const std::string&) will not modify the counter.
+ */
+ class WordCounterState : public State {
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ WordCounterState();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ WordCounterState& operator=(const WordCounterState&);
+
+ protected:
+ uint32_t _counter; /**< Counter value. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Reference to FSA.
+ */
+ WordCounterState(const FSA& f) : State(f), _counter(0) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Pointer to FSA.
+ */
+ WordCounterState(const FSA* f) : State(f), _counter(0) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing hashed state.
+ *
+ * @param s Reference to hashed state to copy.
+ */
+ WordCounterState(const WordCounterState& s) : State(s), _counter(s._counter) {}
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~WordCounterState() {}
+
+ /**
+ * @brief Set the state to the starting state of the automaton.
+ *
+ * This method overrides the State::start() method, and resets the
+ * counter in addition.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+ _counter = 0;
+ return State::start();
+ }
+
+ /**
+ * @brief Start and transition.
+ *
+ * Sets the state to the starting state of the automaton, and
+ * performs a transition using an input word.
+ *
+ * @param in Input word.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool startWord(const std::string &in)
+ {
+ start();
+ return deltaWord(in);
+ }
+
+ /**
+ * @brief Delta transition.
+ *
+ * Perform a delta transition using an input word. A word
+ * separator symbol ` ` is inserted before the word if it is not
+ * the first word (the current state is not the start state).
+ *
+ * @param in Input word.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool deltaWord(const std::string &in)
+ {
+ if(in.length()==0){
+ return _state!=0;
+ }
+ if(_state!=_fsa->start())
+ delta(' ');
+ bool ok = delta(in);
+ if(ok)
+ ++_counter; // only count valid word transitions
+ return ok;
+ }
+
+ /**
+ * @brief Get current counter value.
+ *
+ * Return the current counter. The counter is the number of
+ * word transitions from the start state to the current state.
+ * If the state is not valid anymore, the counter is the number of
+ * word transitions to the last valid state.
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t counter() const
+ {
+ return _counter;
+ }
+
+ /**
+ * @brief An alias for counter()
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t getCounter() const
+ {
+ return _counter;
+ }
+
+ };
+ // }}}
+
+ // {{{ FSA::MemoryState
+ /**
+ * @class MemoryState
+ * @brief Class for FSA lookups with memory functionality.
+ *
+ * This class represents the state of a finite state automaton. It
+ * is connected to one FSA for its whole lifetime. Provides all
+ * methods of the FSA::State and in addition it remebers the
+ * sequence of symbols which led to this state.
+ */
+ class MemoryState : public State {
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ MemoryState();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ MemoryState& operator=(const MemoryState&);
+
+ protected:
+ std::string _memory; /**< Memory value. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ *
+ * @param f Reference to FSA.
+ */
+ MemoryState(const FSA& f) : State(f), _memory() {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ *
+ * @param f Pointer to FSA.
+ */
+ MemoryState(const FSA* f) : State(f), _memory() {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ * Reserves space for the memory string.
+ *
+ * @param f Reference to FSA.
+ * @param res Size to pre-reserve.
+ */
+ MemoryState(const FSA& f, unsigned int res) : State(f), _memory()
+ {
+ _memory.reserve(res);
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ * Reserves space for the memory string.
+ *
+ * @param f Pointer to FSA.
+ * @param res Size to pre-reserve.
+ */
+ MemoryState(const FSA* f, unsigned int res) : State(f), _memory()
+ {
+ _memory.reserve(res);
+ }
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing memory state.
+ *
+ * @param s Reference to memory state to copy.
+ */
+ MemoryState(const MemoryState& s) : State(s), _memory(s._memory) {}
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~MemoryState() {}
+
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ using State::start;
+ using State::delta;
+#else
+ virtual bool start(symbol_t in) { start(); return delta(in); }
+ virtual bool start(const symbol_t *in) { start(); return delta(in); }
+ virtual bool start(const char *in) { start(); return delta(in); }
+ virtual bool start(const std::string &in) { start(); return delta(in); }
+ virtual bool delta(const symbol_t *in)
+ {
+ const symbol_t *p=in;
+ while(*p && _state>0){
+ delta(*p);
+ p++;
+ }
+ return _state!=0;
+ }
+ virtual bool delta(const char *in) { return delta((const symbol_t *)in); }
+ virtual bool delta(const std::string &in)
+ {
+ unsigned int idx=0;
+
+ while(idx<in.length() && _state>0){
+ delta(in[idx]);
+ idx++;
+ }
+ return _state!=0;
+ }
+#endif
+
+ /**
+ * @brief Set the state to the starting state of the automaton.
+ *
+ * This method overrides the State::start() method, and resets the
+ * memory in addition.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ _memory.clear();
+#else
+ _memory = "";
+#endif
+ return State::start();
+ }
+
+ /**
+ * @brief Delta transition for memory states.
+ *
+ * Extends the State::delta() method with memory update.
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(symbol_t in)
+ {
+ bool ok = State::delta(in);
+ if(ok)
+ _memory += (char)in;
+ return ok;
+ }
+
+ /**
+ * @brief Get current memory value.
+ *
+ * The memory for a state stores the sequence of the
+ * transitions which lead to the current state (or the last valid
+ * state).
+ *
+ * @return Memory value.
+ */
+ virtual std::string memory() const
+ {
+ return _memory;
+ }
+
+ /**
+ * @brief Alias for memory().
+ *
+ * @return Memory value.
+ */
+ virtual std::string getMemory() const
+ {
+ return _memory;
+ }
+
+ };
+
+ // }}}
+
+ // {{{ FSA::HashedMemoryState
+ /**
+ * @class HashedMemoryState
+ * @brief Class for FSA lookups with perfect hash and memory functionality.
+ *
+ * This class represents the state of a finite state automaton. It
+ * is connected to one FSA for its whole lifetime. Provides all
+ * methods of the FSA::State plus perfect hashing functionality and
+ * in addition it remebers the sequence of symbols which led to this
+ * state.
+ */
+ class HashedMemoryState : public State {
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ HashedMemoryState();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ HashedMemoryState& operator=(const HashedMemoryState&);
+
+ protected:
+ hash_t _hash; /**< Hash value. */
+ std::string _memory; /**< Memory value. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ *
+ * @param f Reference to FSA.
+ */
+ HashedMemoryState(const FSA& f) : State(f), _hash(0), _memory() {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ *
+ * @param f Pointer to FSA.
+ */
+ HashedMemoryState(const FSA* f) : State(f), _hash(0), _memory() {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ * Reserves space for the memory string.
+ *
+ * @param f Reference to FSA.
+ * @param res Size to pre-reserve.
+ */
+ HashedMemoryState(const FSA& f, unsigned int res) : State(f), _hash(0), _memory()
+ {
+ _memory.reserve(res);
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the memory value.
+ * Reserves space for the memory string.
+ *
+ * @param f Pointer to FSA.
+ * @param res Size to pre-reserve.
+ */
+ HashedMemoryState(const FSA* f, unsigned int res) : State(f), _hash(0), _memory()
+ {
+ _memory.reserve(res);
+ }
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing hashed memory state.
+ *
+ * @param s Reference to hashed memory state to copy.
+ */
+ HashedMemoryState(const HashedMemoryState& s) : State(s),
+ _hash(s._hash),
+ _memory(s._memory) {}
+ /**
+ * @brief Destructor.
+ */
+ virtual ~HashedMemoryState() {}
+
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ using State::start;
+ using State::delta;
+#else
+ virtual bool start(symbol_t in) { start(); return delta(in); }
+ virtual bool start(const symbol_t *in) { start(); return delta(in); }
+ virtual bool start(const char *in) { start(); return delta(in); }
+ virtual bool start(const std::string &in) { start(); return delta(in); }
+ virtual bool delta(const symbol_t *in)
+ {
+ const symbol_t *p=in;
+ while(*p && _state>0){
+ delta(*p);
+ p++;
+ }
+ return _state!=0;
+ }
+ virtual bool delta(const char *in) { return delta((const symbol_t *)in); }
+ virtual bool delta(const std::string &in)
+ {
+ unsigned int idx=0;
+
+ while(idx<in.length() && _state>0){
+ delta(in[idx]);
+ idx++;
+ }
+ return _state!=0;
+ }
+#endif
+
+ /**
+ * @brief Set the state to the starting state of the automaton.
+ *
+ * This method overrides the State::start() method, and resets the
+ * hash and memory in addition.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+ _hash = 0;
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ _memory.clear();
+#else
+ _memory = "";
+#endif
+ return State::start();
+ }
+
+ /**
+ * @brief Delta transition for memory states.
+ *
+ * Extends the State::delta() method with hash and memory update.
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(symbol_t in)
+ {
+ _hash += _fsa->hashDelta(_state,in);
+ bool ok = State::delta(in);
+ if(ok)
+ _memory += (char)in; // only remeber valid transitions
+ return ok;
+ }
+
+ /**
+ * @brief Get current hash value.
+ *
+ * For final states, returns the perfect hash value for the input
+ * string which lead to the the state. For any state (including
+ * final states) the value equals the number of strings accepted
+ * by the automaton which (in an alphabetical ordering) preceed
+ * the string leading to the state.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t hash() const
+ {
+ return _hash;
+ }
+
+ /**
+ * @brief Obsolete alias for hash(), for backwards compatibility.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t getHash() const
+ {
+ return _hash;
+ }
+
+ /**
+ * @brief Get current memory value.
+ *
+ * The memory for a state stores the sequence of the
+ * transitions which lead to the current state (or the last valid
+ * state).
+ *
+ * @return Memory value.
+ */
+ virtual std::string memory() const
+ {
+ return _memory;
+ }
+
+ /**
+ * @brief Alias for memory().
+ *
+ * @return Memory value.
+ */
+ virtual std::string getMemory() const
+ {
+ return _memory;
+ }
+
+ };
+
+ // }}}
+
+ // {{{ FSA::HashedCounterState
+ /**
+ * @class HashedCounterState
+ * @brief Class for FSA lookups with counter and hash.
+ *
+ * This class represents the state of a finite state automaton. It
+ * is connected to one FSA for its whole lifetime. Provides all
+ * methods of the FSA::State and counts the number of transtitions,
+ * and computes hash value.
+ */
+ class HashedCounterState : public State {
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ HashedCounterState();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ HashedCounterState& operator=(const CounterState&);
+
+ protected:
+ hash_t _hash; /**< Hash value. */
+ uint32_t _counter; /**< Counter value. */
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Reference to FSA.
+ */
+ HashedCounterState(const FSA& f) : State(f), _hash(0), _counter(0) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Pointer to FSA.
+ */
+ HashedCounterState(const FSA* f) : State(f), _hash(0), _counter(0) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing hashed state.
+ *
+ * @param s Reference to hashed state to copy.
+ */
+ HashedCounterState(const HashedCounterState& s) : State(s), _hash(s._hash), _counter(s._counter) {}
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~HashedCounterState() {}
+
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ using State::start;
+ using State::delta;
+#else
+ virtual bool start(symbol_t in) { start(); return delta(in); }
+ virtual bool start(const symbol_t *in) { start(); return delta(in); }
+ virtual bool start(const char *in) { start(); return delta(in); }
+ virtual bool start(const std::string &in) { start(); return delta(in); }
+ virtual bool delta(const symbol_t *in)
+ {
+ const symbol_t *p=in;
+ while(*p && _state>0){
+ delta(*p);
+ p++;
+ }
+ return _state!=0;
+ }
+ virtual bool delta(const char *in) { return delta((const symbol_t *)in); }
+ virtual bool delta(const std::string &in)
+ {
+ unsigned int idx=0;
+
+ while(idx<in.length() && _state>0){
+ delta(in[idx]);
+ idx++;
+ }
+ return _state!=0;
+ }
+#endif
+
+ /**
+ * @brief Set the state to the starting state of the automaton.
+ *
+ * This method overrides the State::start() method, and resets the
+ * counter in addition.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+ _hash = 0;
+ _counter = 0;
+ return State::start();
+ }
+
+ /**
+ * @brief Delta transition for hashed counter states.
+ *
+ * Extends the State::delta() method with counter increment and
+ * hash update.
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(symbol_t in)
+ {
+ _hash += _fsa->hashDelta(_state,in);
+ bool ok = State::delta(in);
+ if(ok)
+ ++_counter; // only count valid transitions
+ return ok;
+ }
+
+ /**
+ * @brief Get current hash value.
+ *
+ * For final states, returns the perfect hash value for the input
+ * string which lead to the the state. For any state (including
+ * final states) the value equals the number of strings accepted
+ * by the automaton which (in an alphabetical ordering) preceed
+ * the string leading to the state.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t hash() const
+ {
+ return _hash;
+ }
+
+ /**
+ * @brief Obsolete alias for hash(), for backwards compatibility.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t getHash() const
+ {
+ return _hash;
+ }
+
+ /**
+ * @brief Get current counter value.
+ *
+ * Return the current counter. The counter is the number of
+ * transitions from the start state to the current state.
+ * If the state is not valid anymore, the counter is the number of
+ * transitions to the last valid state.
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t counter() const
+ {
+ return _counter;
+ }
+
+ /**
+ * @brief An alias for counter()
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t getCounter() const
+ {
+ return _counter;
+ }
+
+ };
+ // }}}
+
+ // {{{ FSA::HashedWordCounterState
+ /**
+ * @class HashedWordCounterState
+ * @brief Class for FSA lookups with word counter and hash.
+ *
+ * This class is similar to CounterState, but it counts whole word
+ * transitions. Operations other than start(void), startWord(const std::string&)
+ * or deltaWord(const std::string&) will not modify the counter.
+ */
+ class HashedWordCounterState : public State {
+
+ private:
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ HashedWordCounterState();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ HashedWordCounterState& operator=(const HashedWordCounterState&);
+
+ protected:
+ hash_t _hash; /**< Hash value. */
+ uint32_t _counter; /**< Counter value. */
+
+
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ using State::delta;
+#else
+ virtual bool delta(const symbol_t *in)
+ {
+ const symbol_t *p=in;
+ while(*p && _state>0){
+ delta(*p);
+ p++;
+ }
+ return _state!=0;
+ }
+ virtual bool delta(const char *in) { return delta((const symbol_t *)in); }
+ virtual bool delta(const std::string &in)
+ {
+ unsigned int idx=0;
+
+ while(idx<in.length() && _state>0){
+ delta(in[idx]);
+ idx++;
+ }
+ return _state!=0;
+ }
+#endif
+
+ /**
+ * @brief Delta transition for hashed word counter states.
+ *
+ * Extends the State::delta() method with hash update. It is
+ * protected so it is not accessible outside (only deltaWord is).
+ *
+ * @param in Input symbol.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool delta(symbol_t in)
+ {
+ _hash += _fsa->hashDelta(_state,in);
+ bool ok = State::delta(in);
+ return ok;
+ }
+
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Reference to FSA.
+ */
+ HashedWordCounterState(const FSA& f) : State(f), _hash(0), _counter(0) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new state from an FSA, and set it to the starting
+ * state of the automaton. Also reset the counter.
+ *
+ * @param f Pointer to FSA.
+ */
+ HashedWordCounterState(const FSA* f) : State(f), _hash(0), _counter(0) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate an existing hashed state.
+ *
+ * @param s Reference to hashed state to copy.
+ */
+ HashedWordCounterState(const HashedWordCounterState& s) : State(s), _hash(s._hash), _counter(s._counter) {}
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~HashedWordCounterState() {}
+
+ /**
+ * @brief Set the state to the starting state of the automaton.
+ *
+ * This method overrides the State::start() method, and resets the
+ * counter in addition.
+ *
+ * @return True if the resulting state is valid.
+ */
+ virtual bool start()
+ {
+ _hash = 0;
+ _counter = 0;
+ return State::start();
+ }
+
+ /**
+ * @brief Start and transition.
+ *
+ * Sets the state to the starting state of the automaton, and
+ * performs a transition using an input word.
+ *
+ * @param in Input word.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool startWord(const std::string &in)
+ {
+ start();
+ return deltaWord(in);
+ }
+
+ /**
+ * @brief Delta transition.
+ *
+ * Perform a delta transition using an input word. A word
+ * separator symbol ` ` is inserted before the word if it is not
+ * the first word (the current state is not the start state).
+ *
+ * @param in Input word.
+ * @return True if the resulting state is valid.
+ */
+ virtual bool deltaWord(const std::string &in)
+ {
+ if(in.length()==0){
+ return _state!=0;
+ }
+ if(_state!=_fsa->start())
+ delta(' ');
+ bool ok = delta(in);
+ if(ok)
+ ++_counter; // only count valid word transitions
+ return ok;
+ }
+
+ /**
+ * @brief Get current hash value.
+ *
+ * For final states, returns the perfect hash value for the input
+ * string which lead to the the state. For any state (including
+ * final states) the value equals the number of strings accepted
+ * by the automaton which (in an alphabetical ordering) preceed
+ * the string leading to the state.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t hash() const
+ {
+ return _hash;
+ }
+
+ /**
+ * @brief Obsolete alias for hash(), for backwards compatibility.
+ *
+ * @return Hash value.
+ */
+ virtual hash_t getHash() const
+ {
+ return _hash;
+ }
+
+ /**
+ * @brief Get current counter value.
+ *
+ * Return the current counter. The counter is the number of
+ * word transitions from the start state to the current state.
+ * If the state is not valid anymore, the counter is the number of
+ * word transitions to the last valid state.
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t counter() const
+ {
+ return _counter;
+ }
+
+ /**
+ * @brief An alias for counter()
+ *
+ * @return Counter value.
+ */
+ virtual uint32_t getCounter() const
+ {
+ return _counter;
+ }
+
+ };
+
+ // }}}
+
+#if (__GNUG__ < 3 || (__GNUG__ == 3 && __GNUC_MINOR__ < 1))
+ friend class State;
+ friend class HashedState;
+ friend class MemoryState;
+ friend class HashedMemoryState;
+ friend class CounterState;
+ friend class HashedCounterState;
+ friend class WordCounterState;
+ friend class HashedWordCounterState;
+#endif
+
+public:
+ /**
+ * @brief Magic number for identifying fsa files.
+ */
+ static const uint32_t MAGIC = 0x79832469;
+
+ /**
+ * @brief Version number.
+ *
+ * Version number for identifying the fsa library and files. The
+ * format is MMMmmmrrr, M=major, m=minor, r=revision. 1000 equals
+ * 0.1.0.
+ */
+ static const uint32_t VER = 2000001;
+
+ /**
+ * @brief Library version number.
+ *
+ * Static method which returns the library version.
+ */
+ static uint32_t libVER();
+
+ /**
+ * @brief Reserved symbol used for empty cells in internal tables.
+ */
+ static const symbol_t EMPTY_SYMBOL = 0x00;
+
+ /**
+ * @brief Reserved symbol used for final states in internal tables.
+ */
+ static const symbol_t FINAL_SYMBOL = 0xff;
+
+ /**
+ * @brief Type of data items for final states.
+ *
+ * Type of data items for final states. The possible values are:
+ * - DATA_VARIABLE (0) - variable size data items, the size is
+ * stored with each item
+ * - DATA_FIXED (1) - fixed size data items. The size is only
+ * stored once in the header.
+ */
+ enum Data_Type {
+ DATA_VARIABLE = 0,
+ DATA_FIXED
+ };
+
+ /**
+ * @struct Header
+ * @brief %FSA header.
+ *
+ * Header structure of the %FSA files.
+ */
+ struct Header {
+ uint32_t _magic; /**< Magic number. */
+ uint32_t _version; /**< Version number. */
+ uint32_t _checksum; /**< Checksum. */
+ uint32_t _size; /**< Size of fsa (cells). */
+ uint32_t _start; /**< Start state. */
+ uint32_t _data_size; /**< Size of data. */
+ uint32_t _data_type; /**< Type of data items. */
+ uint32_t _fixed_data_size; /**< Data item size if fixed. */
+ uint32_t _has_perfect_hash; /**< Indicator for perfect hash. */
+ uint32_t _serial; /**< Serial number */
+ uint32_t _reserved[54]; /**< Reserved (pads size to 256 bytes). */
+ };
+
+ /**
+ * @struct Descriptor
+ * @brief %FSA descriptor.
+ *
+ * %FSA descriptor for creating FSA objects directly from Automaton
+ * objects (used by Automaton::getFSA()).
+ */
+ struct Descriptor {
+ uint32_t _version;
+ uint32_t _serial;
+ state_t *_state;
+ symbol_t *_symbol;
+ uint32_t _size;
+ data_t *_data;
+ uint32_t _data_size;
+ uint32_t _data_type;
+ uint32_t _fixed_data_size;
+ hash_t *_perf_hash;
+ uint32_t _start;
+ };
+
+private:
+
+ static const FileAccessMethod _default_file_access_method = FILE_ACCESS_MMAP; /**< Default file access method (read/mmap). */
+
+ void *_mmap_addr; /**< mmap address, NULL is file has not been mmapped. */
+ size_t _mmap_length; /**< mmap length. */
+
+ uint32_t _version; /**< Version of fsalib used to build this fsa. */
+ uint32_t _serial; /**< Serial number of this fsa. */
+
+ state_t *_state; /**< State table for transitions. */
+ symbol_t *_symbol; /**< Symbol table for transitions. */
+ uint32_t _size; /**< Size (number of cells). */
+
+ data_t *_data; /**< Data storage. */
+ uint32_t _data_size; /**< Size of data storage. */
+ uint32_t _data_type; /**< Type of data items (fixed or var.) */
+ uint32_t _fixed_data_size; /**< Size of data items if fixed. */
+
+ bool _has_perfect_hash; /**< Indicator of perfect hash present. */
+ hash_t *_perf_hash; /**< Perfect hash table, if present. */
+
+ state_t _start; /**< Index of start state. */
+
+ bool _ok; /**< Flag set if object initialization succeeded. */
+
+public:
+
+ /**
+ * @brief Constructor.
+ *
+ * Initializes the object from an fsa file.
+ *
+ * @param file Name of fsa file.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global default access mode will be used.
+ */
+ FSA(const char *file, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+ FSA(const std::string &file, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~FSA();
+
+ /**
+ * @brief Check if initialization was successful.
+ *
+ * @return True if the initialization of the object succeeded.
+ */
+ bool isOk() const
+ {
+ return _ok;
+ }
+
+ /**
+ * @brief Get the fsa library version used for building this %FSA.
+ *
+ * @return fsa library version.
+ */
+ uint32_t version(void) const
+ {
+ return _version;
+ }
+
+ /**
+ * @brief Get the serial number of the %FSA.
+ *
+ * @return Serial number.
+ */
+ uint32_t serial(void) const
+ {
+ return _serial;
+ }
+
+ /**
+ * @brief Check if the %FSA has perferct hash.
+ *
+ * @return True if the %FSA was built with perfect hash.
+ */
+ bool hasPerfectHash(void) const
+ {
+ return _has_perfect_hash;
+ }
+
+ /**
+ * @brief Get the start state of the %FSA.
+ *
+ * @return Index of the start state (0 if the %FSA is empty).
+ */
+ state_t start() const
+ {
+ return _start;
+ }
+
+ /**
+ * @brief Perform a delta transition.
+ *
+ * Performs a delta transtion in the automaton. The input is the
+ * index of the current state and an input symbol, and the return
+ * value is the index of the new state.
+ *
+ * @param fs Index of current state.
+ * @param in Input symbol.
+ * @return Index of new state.
+ */
+ state_t delta(state_t fs, symbol_t in) const
+ {
+ // fs!=0 check is unnecessary, as state 0 is never packed so _symbol[in]!=in always.
+ // if(!fs)
+ // return 0;
+ state_t nfs=fs+in;
+ if(_symbol[nfs]==in)
+ return _state[nfs];
+ else
+ return 0;
+ }
+
+ /**
+ * @brief Get hash delta for a transition.
+ *
+ * The perfect hash value for a final state is obtained from the sum
+ * of hash deltas for the transitions leading to that state.
+ *
+ * @param fs Index of current state.
+ * @param in Input symbol.
+ * @return Hash delta for the transition.
+ */
+ hash_t hashDelta(state_t fs, symbol_t in) const
+ {
+ if(_has_perfect_hash && fs!=0 && _symbol[fs+in]==in)
+ return _perf_hash[fs+in];
+ else
+ return 0;
+ }
+
+ /**
+ * @brief Check if the state is a final (accepting) state.
+ *
+ * @param fs State.
+ * @return True if the state is final.
+ */
+ bool isFinal(state_t fs) const
+ {
+ if(fs==0)
+ return false;
+ return _symbol[fs+FINAL_SYMBOL]==FINAL_SYMBOL;
+ }
+
+ /**
+ * @brief Reverse lookup.
+ *
+ * For a given hash value, return the corresponding string.
+ *
+ * @param hash Hash value.
+ * @return String corresponding to hash value, or empty string if
+ * the fsa has no perfect hash or the hash value is out of
+ * range.
+ */
+ std::string revLookup(hash_t hash) const;
+
+ /**
+ * @brief Get the size of data item associated with a final state.
+ *
+ * @param fs State.
+ * @return Size of data item, or -1 if the state is not final.
+ */
+ int dataSize(state_t fs) const
+ {
+ if(fs==0)
+ return -1;
+ if(_symbol[fs+FINAL_SYMBOL]==FINAL_SYMBOL){
+ if(_data_type==DATA_FIXED)
+ return _fixed_data_size;
+ else
+ return (int)(*((uint32_t*)(_data+_state[fs+FINAL_SYMBOL])));
+ }
+ return -1;
+ }
+
+ /**
+ * @brief Get a pointer to the data item associated with a final state.
+ *
+ * @param fs State.
+ * @return Pointer to data item, or NULL if the state is not final.
+ */
+ const data_t *data(unsigned int fs) const
+ {
+ if(fs==0)
+ return NULL;
+ if(_symbol[fs+FINAL_SYMBOL]==FINAL_SYMBOL){
+ if(_data_type==DATA_FIXED)
+ return _data+_state[fs+FINAL_SYMBOL];
+ else
+ return _data+_state[fs+FINAL_SYMBOL]+sizeof(uint32_t);
+ }
+ return NULL;
+ }
+
+ /**
+ * @brief Print the fsa in dot (graphviz) format.
+ *
+ * @param out Output stream (std::cout if omitted).
+ */
+ void printDot(std::ostream &out=std::cout) const;
+
+ /**
+ * @brief Get iterator pointing to the beginning of the fsa.
+ *
+ * @return iterator pointing to the first string in the fsa.
+ */
+ FSA::iterator begin() const { return FSA::iterator(this); }
+
+ /**
+ * @brief Get iterator pointing past the end of the fsa.
+ *
+ * @return iterator pointing past the last string in the fsa.
+ */
+ FSA::iterator end() const { return FSA::iterator(this,true); }
+
+private:
+
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ FSA();
+ /**
+ * @brief Unimplemented private copy constructor.
+ */
+ FSA(const FSA&);
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ const FSA& operator=(const FSA&);
+
+ /**
+ * Automaton needs access to a private constructor.
+ */
+ friend class Automaton;
+
+ /**
+ * @brief Constructor.
+ *
+ * Initializes the object from ready memory buffers.
+ * (Used by Automaton::PackedAutomaton::getFSA.)
+ *
+ * @param d Descriptor containing all FSA parameters.
+ */
+ FSA(Descriptor &d) :
+ _mmap_addr(NULL), _mmap_length(0),
+ _version(d._version), _serial(d._serial),
+ _state(d._state), _symbol(d._symbol), _size(d._size),
+ _data(d._data), _data_size(d._data_size), _data_type(d._data_type),
+ _fixed_data_size(d._fixed_data_size),
+ _has_perfect_hash(d._perf_hash!=NULL),_perf_hash(d._perf_hash),
+ _start(d._start)
+ {
+ }
+
+ /**
+ * @brief Reset the object.
+ *
+ * Resets the object to an empty %FSA, and releases allocated memory.
+ */
+ void reset();
+
+ /**
+ * @brief Read the %FSA from file.
+ *
+ * Reads the %FSA from a file. Returns true on success.
+ *
+ * @param filename Name of fsa file.
+ * @return True on success.
+ */
+ bool read(const char *filename, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/metadata.cpp b/fsa/src/vespa/fsa/metadata.cpp
new file mode 100644
index 00000000000..2a9d511cc91
--- /dev/null
+++ b/fsa/src/vespa/fsa/metadata.cpp
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file metadata.cpp
+ * @brief Generic meta data class implementation.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "metadata.h"
+#include "fstream"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h> // for ::mmap()
+#include <sys/time.h>
+#include <sys/resource.h> // for getrlimit(), setrlimit(), etc.
+
+namespace fsa {
+
+// {{{ constants
+
+const uint32_t MetaData::MAGIC;
+
+// }}}
+
+// {{{ MetaData::MetaData()
+
+MetaData::MetaData(const char *datafile, FileAccessMethod fam) : _mmap_addr(NULL), _mmap_length(0), _ok(false), _header(), _data(NULL)
+{
+ _ok = read(datafile,fam);
+}
+
+MetaData::MetaData(const std::string &datafile, FileAccessMethod fam) : _mmap_addr(NULL), _mmap_length(0), _ok(false), _header(), _data(NULL)
+{
+ _ok = read(datafile.c_str(),fam);
+}
+
+// }}}
+// {{{ MetaData::~MetaData()
+
+MetaData::~MetaData()
+{
+ reset();
+}
+
+// }}}
+
+// {{{ MetaData::reset()
+
+void MetaData::reset()
+{
+ if(_mmap_addr!=NULL && _mmap_addr!=MAP_FAILED){
+ munmap(_mmap_addr,_mmap_length);
+ }
+ else{
+ if(_data!=NULL) free(_data);
+ }
+ _mmap_addr=NULL; _mmap_length=0;
+ _ok=false;
+ _data=NULL;
+}
+
+// }}}
+// {{{ MetaData::read()
+
+bool MetaData::read(const char *datafile, FileAccessMethod fam)
+{
+ size_t r;
+
+ reset();
+
+ if(fam==FILE_ACCESS_UNDEF)
+ fam=_default_file_access_method;
+
+ if(datafile==NULL)
+ return false;
+
+ int fd = ::open(datafile,O_RDONLY);
+ if(fd<0)
+ return false;
+
+ r=::read(fd,&_header,sizeof(_header));
+ if(r!=sizeof(_header) || _header._magic!=MetaData::MAGIC){
+ ::close(fd);
+ return false;
+ }
+
+ if(fam==FILE_ACCESS_MMAP || fam==FILE_ACCESS_MMAP_WITH_MLOCK){
+ _mmap_length = sizeof(_header) + _header._size;
+ _mmap_addr = ::mmap((void*)0, _mmap_length, PROT_READ, MAP_SHARED, fd, 0);
+ if(_mmap_addr==MAP_FAILED){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ if(fam==FILE_ACCESS_MMAP_WITH_MLOCK){
+ if(mlock(_mmap_addr, _mmap_length)<0) {
+ /* try to increase RLIMIT_MEMLOCK then mlock() again */
+ struct rlimit rl;
+ if(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0) {
+ rl.rlim_cur += _mmap_length + getpagesize();
+ rl.rlim_max += _mmap_length + getpagesize();
+ if(setrlimit(RLIMIT_MEMLOCK, &rl) >= 0)
+ mlock(_mmap_addr, _mmap_length);
+ }
+ }
+ }
+ }
+
+ if(_mmap_addr==NULL){
+ _data = malloc(_header._size);
+ r=::read(fd,_data,_header._size);
+ if(r!=_header._size){
+ ::close(fd);
+ reset();
+ return false;
+ }
+ }
+ else {
+ _data = (void*)((uint8_t*)_mmap_addr + sizeof(_header));
+ }
+
+ ::close(fd);
+
+ return true;
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/metadata.h b/fsa/src/vespa/fsa/metadata.h
new file mode 100644
index 00000000000..132ecb1d157
--- /dev/null
+++ b/fsa/src/vespa/fsa/metadata.h
@@ -0,0 +1,177 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/12/17
+ * @version $Id$
+ * @file metadata.h
+ * @brief Generic metadata associated with perfect hash values. The
+ * data structutre is completely up to the user, but it is
+ * usually an array of fixed size records indexed by the
+ * perfect hash value, or it contains an index which maps the
+ * perfect hash values to variable size records.
+ *
+ */
+
+#pragma once
+
+#include <stdlib.h>
+#include "fsa.h"
+
+
+namespace fsa {
+
+// {{{ class MetaData
+
+/**
+ * @class MetaData
+ * @brief Class for representing generic metadata.
+ *
+ * Generic metadata associated with perfect hash values. The data
+ * structutre is completely up to the user, but it is usually an array
+ * of fixed size records indexed by the perfect hash value, or it
+ * contains an index which maps the perfect hash values to variable
+ * size records.
+ */
+class MetaData {
+
+public:
+
+ class Handle; // defined in metadatahandle.h
+
+private:
+ static const uint32_t MAGIC = 0x873EA98B; /**< Magic number identifying metadata net files. */
+
+ static const FileAccessMethod _default_file_access_method = FILE_ACCESS_MMAP; /**< Default file access method (read/mmap). */
+
+ /**
+ * @struct Header
+ * @brief Concept net data file header.
+ */
+ struct Header {
+ uint32_t _magic; /**< Magic number. */
+ uint32_t _version; /**< Version number. (currently not used) */
+ uint32_t _checksum; /**< Checksum. (currently not used) */
+ uint32_t _size; /**< Size of the data. */
+ uint32_t _reserved[10]; /**< Reserved for later use. */
+ uint32_t _user[50]; /**< User defined fields. */
+ };
+
+ void *_mmap_addr; /**< mmap address, NULL is file has not been mmapped. */
+ size_t _mmap_length; /**< mmap length. */
+
+ bool _ok; /**< Flag indicating successful initialization. */
+ Header _header;
+ void *_data;
+
+ /**
+ * @brief Reset the object.
+ *
+ * Resets the object to an empty %MetaData, and releases allocated memory.
+ */
+ void reset();
+
+ /**
+ * @brief Read the metadata file from disk.
+ *
+ * @param datafile Name of the metadata file.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global default access mode will be used.
+ * @return True on success.
+ */
+ bool read(const char *datafile, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ MetaData();
+ /**
+ * @brief Unimplemented private copy constructor.
+ */
+ MetaData(const MetaData&);
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ const MetaData& operator=(const MetaData&);
+
+public:
+
+ /**
+ * @brief Constructor.
+ *
+ * @param datafile Metadata file.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global default access mode will be used.
+ */
+ MetaData(const char *datafile, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+ MetaData(const std::string &datafile, FileAccessMethod fam = FILE_ACCESS_UNDEF);
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~MetaData();
+
+ /**
+ * @brief Check if initialization was successful.
+ *
+ * @return True if the initialization of the object succeeded.
+ */
+ bool isOk() const
+ {
+ return _ok;
+ }
+
+ /**
+ * @brief Get user defined header field
+ *
+ * @param idx Field index
+ * @return Header field value.
+ */
+ uint32_t user(unsigned int idx) const
+ {
+ if(_ok && idx<50)
+ return _header._user[idx];
+ else
+ return 0;
+ }
+
+ uint32_t getUIntEntry(uint32_t idx) const
+ {
+ if(_ok){
+ return ((const uint32_t*)_data)[idx];
+ }
+ else
+ return 0;
+ }
+
+ const void *getDirectRecordEntry(uint32_t idx, uint32_t size) const
+ {
+ if(_ok)
+ return (const void*)((const uint8_t*)_data+idx*size);
+ else
+ return NULL;
+ }
+
+ const void *getIndirectRecordEntry(uint32_t idx) const
+ {
+ if(_ok){
+ uint32_t offset=((const uint32_t*)_data)[idx];
+ return (const void*)((const uint8_t*)_data+offset);
+ }
+ else
+ return NULL;
+ }
+
+ const char *getCharPtrEntry(uint32_t offset) const
+ {
+ if(_ok)
+ return ((const char*)_data)+offset;
+ else
+ return NULL;
+ }
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/ngram.cpp b/fsa/src/vespa/fsa/ngram.cpp
new file mode 100644
index 00000000000..050b9eff035
--- /dev/null
+++ b/fsa/src/vespa/fsa/ngram.cpp
@@ -0,0 +1,285 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file ngram.cpp
+ * @brief n-gram class for tokenized text.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "ngram.h"
+#include "wordchartokenizer.h"
+
+#include <ctype.h>
+#include <iostream>
+
+namespace fsa {
+
+// {{{ NGram::NGram()
+
+NGram::NGram(const char *text, unsigned int from, int length) : _tokens()
+{
+ append(text,from,length);
+}
+
+NGram::NGram(const char *text, Tokenizer &tokenizer, unsigned int from, int length) : _tokens()
+{
+ append(text,tokenizer,from,length);
+}
+
+NGram::NGram(const NGram &g, unsigned int from, int length) : _tokens()
+{
+ append(g,from,length);
+}
+
+NGram::NGram(const NGram &g, const Selector &select) : _tokens()
+{
+ append(g,select);
+}
+
+NGram::NGram(const NGram &g, const Permuter &p, unsigned int id) : _tokens()
+{
+ append(g,p,id);
+}
+
+NGram::NGram(const std::string &s, unsigned int from, int length) : _tokens()
+{
+ append(s,from,length);
+}
+
+NGram::NGram(const std::string &s, Tokenizer &tokenizer, unsigned int from, int length) : _tokens()
+{
+ append(s,tokenizer,from,length);
+}
+
+// }}}
+// {{{ NGram::set()
+
+void NGram::set(const char *text, unsigned int from, int length)
+{
+ clear();
+ append(text,from,length);
+}
+
+void NGram::set(const char *text, Tokenizer &tokenizer, unsigned int from, int length)
+{
+ clear();
+ append(text,tokenizer,from,length);
+
+}
+
+void NGram::set(const NGram &g, unsigned int from, int length)
+{
+ if(this==&g){
+ set(NGram(g),from,length);
+ }
+ else{
+ clear();
+ append(g,from,length);
+ }
+}
+
+void NGram::set(const NGram &g, const Selector &select)
+{
+ if(this==&g){
+ set(NGram(g),select);
+ }
+ else{
+ clear();
+ append(g,select);
+ }
+}
+
+void NGram::set(const NGram &g, const Permuter &p, unsigned int id)
+{
+ if(this==&g){
+ set(NGram(g),p,id);
+ }
+ else{
+ clear();
+ append(g,p,id);
+ }
+}
+
+void NGram::set(const std::string &s, unsigned int from, int length)
+{
+ clear();
+ append(s,from,length);
+}
+
+void NGram::set(const std::string &s, Tokenizer &tokenizer, unsigned int from, int length)
+{
+ clear();
+ append(s,tokenizer,from,length);
+}
+
+// }}}
+// {{{ NGram::setOne()
+
+void NGram::setOne(const std::string &s)
+{
+ clear();
+ appendOne(s);
+}
+
+// }}}
+// {{{ NGram::append()
+
+void NGram::append(const char *text, unsigned int from, int length)
+{
+ WordCharTokenizer tokenizer;
+ append(text,tokenizer,from,length);
+}
+
+void NGram::append(const char *text, Tokenizer &tokenizer, unsigned int from, int length)
+{
+ append(std::string(text),tokenizer,from,length);
+}
+
+
+void NGram::append(const NGram &g, unsigned int from, int length)
+{
+ if(this==&g){
+ append(NGram(g),from,length);
+ return;
+ }
+
+ if(length<0 || from+length>g._tokens.size()) length=g._tokens.size()-from;
+
+ if(length>0){
+ for(unsigned int i=from; i<from+length; i++){
+ _tokens.push_back(g._tokens[i]);
+ }
+ }
+}
+
+void NGram::append(const NGram &g, const Selector &select)
+{
+ if(this==&g){
+ append(NGram(g),select);
+ return;
+ }
+
+ for(unsigned int i=0; i<g._tokens.size()&&i<select.size(); i++){
+ if(select[i])
+ _tokens.push_back(g._tokens[i]);
+ }
+}
+
+void NGram::append(const NGram &g, const Permuter &p, unsigned int id)
+{
+ if(this==&g){
+ append(NGram(g),p,id);
+ return;
+ }
+
+ std::string perm=p.getPerm(id);
+
+ for(unsigned int i=0;i<perm.length();i++){
+ if(perm[i]>0 && perm[i]<=(int)g._tokens.size()){
+ _tokens.push_back(g._tokens[perm[i]-1]);
+ }
+ }
+}
+
+void NGram::append(const std::string &s, unsigned int from, int length)
+{
+ WordCharTokenizer tokenizer;
+ append(s,tokenizer,from,length);
+}
+
+void NGram::append(const std::string &s, Tokenizer &tokenizer, unsigned int from, int length)
+{
+ tokenizer.init(s);
+ unsigned int i=0;
+ while(i<from && tokenizer.hasMore()){
+ tokenizer.getNext();
+ i++;
+ }
+
+ i=0;
+ while(tokenizer.hasMore() && (length<0 || (int)i<length)){
+ appendOne(tokenizer.getNext());
+ i++;
+ }
+}
+
+// }}}
+// {{{ NGram::appendOne()
+
+void NGram::appendOne(const std::string &s)
+{
+ _tokens.push_back(s);
+}
+
+// }}}
+// {{{ NGram::uniq()
+
+unsigned int NGram::uniq()
+{
+ std::vector<std::string>::iterator pos;
+
+ pos = std::unique(_tokens.begin(),_tokens.end());
+ _tokens.erase(pos,_tokens.end());
+ return _tokens.size();
+}
+
+// }}}
+// {{{ NGram::join()
+
+std::string NGram::join(const std::string &separator, unsigned int from, int length) const
+{
+ unsigned int to = _tokens.size();
+ if(length!=-1 && from+length<to)
+ to=from+length;
+
+ std::string dest;
+ if(to>from)
+ dest=_tokens[from];
+ for(unsigned i=from+1;i<to;i++){
+ dest+=separator;
+ dest+=_tokens[i];
+ }
+
+ return dest;
+}
+
+// }}}
+// {{{ NGram::getPermIdTo()
+
+int NGram::getPermIdTo(const NGram &g, const Permuter &p) const
+{
+ if(_tokens.size()!=g._tokens.size())
+ return -1;
+
+ std::string perm(_tokens.size(),'\0');
+ for(unsigned int i=0;i<_tokens.size();i++){
+ for(unsigned int j=0;j<g._tokens.size();j++){
+ if(_tokens[i]==g._tokens[j]){
+ perm[j]=i+1;
+ }
+ }
+ }
+ return p.getPermId(perm);
+}
+
+// }}}
+
+// {{{ operator<<
+
+std::ostream& operator<<(std::ostream &out, const NGram &g)
+{
+ for(unsigned int i=0;i<g._tokens.size();i++){
+ if(i>0) out<<" ";
+ out<<g._tokens[i];
+ }
+ return out;
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/ngram.h b/fsa/src/vespa/fsa/ngram.h
new file mode 100644
index 00000000000..32f739e3533
--- /dev/null
+++ b/fsa/src/vespa/fsa/ngram.h
@@ -0,0 +1,433 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file ngram.h
+ * @brief n-gram class for tokenized text.
+ */
+
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+
+#include "unicode.h"
+#include "selector.h"
+#include "permuter.h"
+#include "tokenizer.h"
+
+namespace fsa {
+
+// {{{ class NGram
+
+/**
+ * @class NGram
+ * @brief Class for representing n-grams.
+ *
+ * Supports tokenization and various manipulation methods, such as
+ * join, sort, uniq, etc.
+ */
+class NGram {
+
+public:
+
+private:
+ std::vector<std::string> _tokens; /**< Vector holding the tokens. */
+
+public:
+ /**
+ * @brief Default constructor, creates empty NGram.
+ */
+ NGram() : _tokens() {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates an NGram object from a utf-8 encoded character
+ * string. The string must be zero terminated. The string is
+ * tokenized using unicode wordchar property. For certain puctuation
+ * strategies, a special puctuation token is inserted if a puctuation
+ * character is found.
+ *
+ * @param text Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ NGram(const char *text,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates an NGram object from a utf-8 encoded character
+ * string. The string must be zero terminated. The string is
+ * tokenized using the supplied tokienizer.
+ *
+ * @param text Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ * @param tokenizer Tokenizer.
+ */
+ NGram(const char *text,
+ Tokenizer &tokenizer,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief (Sort of) Copy constructor.
+ *
+ * @param g NGram object to copy.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ NGram(const NGram &g, unsigned int from=0, int length=-1);
+
+ /**
+ * @brief (Sort of) Copy constructor.
+ *
+ * Copy selected tokens from an NGram objects.
+ *
+ * @param g NGram object to copy.
+ * @param select Selector indicating which tokens to copy.
+ */
+ NGram(const NGram &g, const Selector &select);
+
+ /**
+ * @brief (Sort of) Copy constructor.
+ *
+ * Create a new NGram and permute the tokens.
+ *
+ * @param g NGram object to copy.
+ * @param p Permuter object.
+ * @param id Permutation ID.
+ */
+ NGram(const NGram &g, const Permuter &p, unsigned int id);
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates an NGram object from a utf-8 encoded std::string. The
+ * string is tokenized using unicode wordchar property. For certain
+ * puctuation strategies, a special puctuation token is inserted if
+ * a puctuation character is found.
+ *
+ * @param s Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ NGram(const std::string &s,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates an NGram object from a utf-8 encoded std::string. The
+ * string is tokenized using the supplied tokenizer.
+ *
+ * @param s Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ * @param tokenizer Tokenizer.
+ */
+ NGram(const std::string &s,
+ Tokenizer &tokenizer,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Set the object.
+ *
+ * Reinitalizes the NGram object from a utf-8 encoded character
+ * string. The string must be zero terminated. The string is
+ * tokenized using unicode wordchar property. For certain puctuation
+ * strategies, a special puctuation token is inserted if a puctuation
+ * character is found.
+ *
+ * @param text Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ void set(const char *text,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Set the object.
+ *
+ * Reinitalizes the NGram object from a utf-8 encoded character
+ * string. The string must be zero terminated. The string is
+ * tokenized using the supplied tokenizer.
+ *
+ * @param text Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ * @param tokenizer Tokenizer.
+ */
+ void set(const char *text,
+ Tokenizer &tokenizer,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Set the object.
+ *
+ * @param g NGram object to copy.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ void set(const NGram &g, unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Set the object.
+ *
+ * Copy selected tokens from an NGram objects.
+ *
+ * @param g NGram object to copy.
+ * @param select Selector indicating which tokens to copy.
+ */
+ void set(const NGram &g, const Selector &select);
+
+ /**
+ * @brief Set the object.
+ *
+ * Set the object from another NGram with permuting the tokens.
+ *
+ * @param g NGram object to copy.
+ * @param p Permuter object.
+ * @param id Permutation ID.
+ */
+ void set(const NGram &g, const Permuter &p, unsigned int id);
+
+ /**
+ * @brief Set the object.
+ *
+ * Reinitalizes the NGram object from a utf-8 encoded
+ * std::string. The string is tokenized using unicode wordchar
+ * property. For certain puctuation strategies, a special puctuation
+ * token is inserted if a puctuation character is found.
+ *
+ * @param s Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ void set(const std::string &s,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Set the object.
+ *
+ * Reinitalizes the NGram object from a utf-8 encoded
+ * std::string. The string is tokenized using the supplied tokenizer.
+ *
+ * @param s Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ * @param tokenizer Tokenizer.
+ */
+ void set(const std::string &s,
+ Tokenizer &tokenizer,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Set the object.
+ *
+ * Reinitalizes the object from an std::string, as a single token.
+ *
+ * @param s Input string.
+ */
+ void setOne(const std::string &s);
+
+ /**
+ * @brief Append tokens to the object.
+ *
+ * Appends tokens to the NGram object from a utf-8 encoded character
+ * string. The string must be zero terminated. The string is
+ * tokenized using unicode wordchar property. For certain puctuation
+ * strategies, a special puctuation token is inserted if a
+ * puctuation character is found.
+ *
+ * @param text Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ void append(const char *text,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Append tokens to the object.
+ *
+ * Appends tokens to the NGram object from a utf-8 encoded character
+ * string. The string must be zero terminated. The string is
+ * tokenized using the supplied tokenizer.
+ *
+ * @param text Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ * @param tokenizer Tokenizer.
+ */
+ void append(const char *text,
+ Tokenizer &tokenizer,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Append tokens to the object.
+ *
+ * @param g NGram object to append.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ void append(const NGram &g, unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Append tokens to the object.
+ *
+ * Append selected tokens from an NGram objects.
+ *
+ * @param g NGram object to append.
+ * @param select Selector indicating which tokens to copy.
+ */
+ void append(const NGram &g, const Selector &select);
+
+ /**
+ * @brief Append tokens to the object.
+ *
+ * Append a permuted NGram.
+ *
+ * @param g NGram object to append.
+ * @param p Permuter object.
+ * @param id Permutation ID.
+ */
+ void append(const NGram &g, const Permuter &p, unsigned int id);
+
+ /**
+ * @brief Append tokens to the object.
+ *
+ * Appends tokens to the NGram object from a utf-8 encoded
+ * std::string. The string is tokenized using unicode wordchar
+ * property. For certain puctuation strategies, a special puctuation
+ * token is inserted if a puctuation character is found.
+ *
+ * @param s Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ */
+ void append(const std::string &s,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Append tokens to the object.
+ *
+ * Appends tokens to the NGram object from a utf-8 encoded
+ * std::string. The string is tokenized using the supplied tokenizer.
+ *
+ * @param s Input text.
+ * @param from Starting token to keep (preceeding tokens are ignored).
+ * @param length Number of tokens to keep.
+ * @param tokenizer Tokenizer.
+ */
+ void append(const std::string &s,
+ Tokenizer &tokenizer,
+ unsigned int from=0, int length=-1);
+
+ /**
+ * @brief Append a single token to the object.
+ *
+ * Appends a single token from an std::string.
+ *
+ * @param s Input string.
+ */
+ void appendOne(const std::string &s);
+
+
+ /**
+ * @brief Reset the object.
+ */
+ void clear() { _tokens.clear(); }
+
+ /**
+ * @brief Get the size of the n-gram (number of tokens).
+ *
+ * @return Number of tokens in n-gram.
+ */
+ unsigned int size() const { return _tokens.size(); }
+
+ /**
+ * @brief Get the length (size) of the n-gram (number of tokens).
+ *
+ * @return Number of tokens in n-gram.
+ */
+ unsigned int length() const { return _tokens.size(); }
+
+ /**
+ * @brief Sort the tokens lexicograpically.
+ */
+ void sort() { std::sort(_tokens.begin(),_tokens.end()); }
+
+ /**
+ * @brief Remove duplicate tokens from a sorted n-gram.
+ */
+ unsigned int uniq();
+
+ /**
+ * @brief Reverse the order of the tokens.
+ */
+ void reverse() { std::reverse(_tokens.begin(),_tokens.end()); }
+
+ /**
+ * @brief Join the whole or parts of the n-gram to single string.
+ *
+ * @param separator Separator string.
+ * @param from Starting token (default 0).
+ * @param length Number of tokens (default -1 which means all).
+ * @return Joined tokens.
+ */
+ std::string join(const std::string &separator = " ",
+ unsigned int from=0, int length=-1) const;
+
+ /**
+ * @brief Index operator.
+ *
+ * Provides access a token directly. The index must be in the range
+ * of 0..length()-1, this is not checked.
+ *
+ * @param i Index.
+ * @return Reference to token.
+ */
+ std::string& operator[](unsigned int i) { return _tokens[i]; }
+
+ /**
+ * @brief Index operator.
+ *
+ * Provides const access a token directly. The index must be in the
+ * range of 0..length()-1, this is not checked.
+ *
+ * @param i Index.
+ * @return Const reference to token.
+ */
+ const std::string& operator[](unsigned int i) const { return _tokens[i]; }
+
+ /**
+ * @brief Get permutation ID to another n-gram.
+ *
+ * Get permutation ID to another n-gram. The other n-gram should
+ * consist of the same tokens in different order.
+ *
+ * @param g The other n-gram.
+ * @param p Permuter object.
+ * @return Permutation ID.
+ */
+ int getPermIdTo(const NGram &g, const Permuter &p) const;
+
+ /**
+ * @brief Output operator.
+ *
+ * @param out Reference to output stream.
+ * @param g n-gram.
+ * @return Reference to output stream.
+ */
+ friend std::ostream& operator<<(std::ostream &out, const NGram &g);
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/permuter.cpp b/fsa/src/vespa/fsa/permuter.cpp
new file mode 100644
index 00000000000..a0d472e59fd
--- /dev/null
+++ b/fsa/src/vespa/fsa/permuter.cpp
@@ -0,0 +1,135 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file permuter.cpp
+ * @brief Permuter class.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "permuter.h"
+
+namespace fsa {
+
+// {{{ Permuter::MAX_UNIT_LENGTH
+
+const unsigned int Permuter::MAX_UNIT_LENGTH;
+
+// }}}
+
+// {{{ Permuter::initRec()
+
+void Permuter::initRec(const std::string &input, std::string tail)
+{
+ std::string temp;
+ int i;
+
+ if(input.length()==0){
+ _permtab.push_back(tail);
+ _permmap[tail] = _permtab.size()-1;
+ }
+ else{
+ for(i=input.length()-1;i>=0;i--){
+ temp = input;
+ temp.erase(i,1);
+ initRec(temp,input.substr(i,1)+tail);
+ }
+ }
+}
+
+// }}}
+// {{{ Permuter::Permuter()
+
+Permuter::Permuter() : _permtab(), _permmap(), _size(0), _seed(MAX_UNIT_LENGTH,0)
+{
+ unsigned int i;
+
+ _size = 1;
+ for(i=1;i<=MAX_UNIT_LENGTH;i++){
+ _seed[i-1]=i;
+ _size*=i;
+ }
+ _permtab.reserve(_size);
+
+ initRec(_seed,std::string());
+}
+
+// }}}
+// {{{ Permuter::~Permuter()
+
+Permuter::~Permuter()
+{
+}
+
+// }}}
+// {{{ Permuter::getPermId()
+
+int Permuter::getPermId(const std::string &perm) const
+{
+ std::string t(perm);
+
+ if(t.length()>MAX_UNIT_LENGTH)
+ return -1;
+
+ if(t.length()<MAX_UNIT_LENGTH)
+ t+=_seed.substr(t.length(),MAX_UNIT_LENGTH-t.length());
+
+ const PermMapConstIterator pi = _permmap.find(t);
+ if(pi==_permmap.end())
+ return -1;
+ else
+ return pi->second;
+}
+
+// }}}
+// {{{ Permuter::firstComb()
+
+unsigned int Permuter::firstComb(unsigned int n, unsigned int m)
+{
+ if(n==0 || n>31 || m==0 || m>31 || n>m)
+ return 0;
+
+ return (1<<n)-1;
+}
+
+// }}}
+// {{{ Permuter::nextComb()
+
+unsigned int Permuter::nextComb(unsigned int c, unsigned int m)
+
+{
+ if(c==0 || m==0 || m>31)
+ return 0;
+
+ unsigned int x=c;
+ unsigned int limit=1<<m;
+ unsigned int mask, mask1,mask2;
+
+ if(x&1){
+ mask=2;
+ while(x&mask) mask<<=1;
+ x^=(mask+(mask>>1));
+ }
+ else{
+ mask=2;
+ while(!(x&mask)) mask<<=1;
+ mask1=mask2=0;
+ while(x&mask){
+ mask1<<=1;mask1++;
+ mask2+=mask;
+ mask<<=1;
+ }
+ mask1>>=1;
+ x^=(mask+(mask1^mask2));
+ }
+
+ return (x<limit)?x:0;
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/permuter.h b/fsa/src/vespa/fsa/permuter.h
new file mode 100644
index 00000000000..15b016ab733
--- /dev/null
+++ b/fsa/src/vespa/fsa/permuter.h
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file permuter.h
+ * @brief Permuter class.
+ */
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <string>
+
+
+namespace fsa {
+
+// {{{ class Permuter
+
+/**
+ * @class Permuter
+ * @brief Permuter class.
+ */
+class Permuter {
+private:
+
+ static const unsigned int MAX_UNIT_LENGTH = 6;
+
+ typedef std::vector<std::string> PermTab;
+ typedef std::vector<std::string>::iterator PermTabIterator;
+ typedef std::map<std::string,unsigned int> PermMap;
+ typedef std::map<std::string,unsigned int>::iterator PermMapIterator;
+ typedef std::map<std::string,unsigned int>::const_iterator PermMapConstIterator;
+
+ PermTab _permtab;
+ PermMap _permmap;
+ unsigned int _size;
+ std::string _seed;
+
+ void initRec(const std::string &input, std::string tail);
+
+public:
+ /**
+ * @brief Default constructor.
+ */
+ Permuter();
+
+ /**
+ * @brief Destructor.
+ */
+ ~Permuter();
+
+ std::string getPerm(unsigned int id) const { return _permtab[id]; }
+ int getPermId(const std::string &perm) const;
+
+ static unsigned int firstComb(unsigned int n, unsigned int m);
+ static unsigned int nextComb(unsigned int c, unsigned int m);
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/segmenter.cpp b/fsa/src/vespa/fsa/segmenter.cpp
new file mode 100644
index 00000000000..91f5a611f13
--- /dev/null
+++ b/fsa/src/vespa/fsa/segmenter.cpp
@@ -0,0 +1,279 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file segmenter.cpp
+ * @brief Query segmenter based on %FSA (%Finite %State %Automaton) (implementation)
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "segmenter.h"
+
+
+namespace fsa {
+
+// {{{ Segmenter::Segments::initSingles
+
+void Segmenter::Segments::initSingles()
+{
+ for(unsigned int i=0;i<_text.size();i++){
+ if(!_map.isValid(i,i+1)){
+ _map.set(i,i+1,_segments.size());
+ _segments.push_back(Segment(i,i+1,0));
+ }
+ }
+}
+
+
+// }}}
+// {{{ Segmenter::Segments::buildSegmentation
+
+void Segmenter::Segments::buildSegmentation(Segmenter::SegmentationMethod method)
+{
+ int i,j;
+ int n_txt=(int)_text.size(), n_sgm=_segments.size();
+ int id,bestid;
+ int pos, next=n_txt;
+ unsigned int maxsc,conn;
+ int bestval,temp=0,bias;
+ std::vector<int> nextid(n_sgm,-1);
+ std::vector<unsigned int> maxScore(n_sgm,0);
+
+ if(_segmentation[method]==NULL){
+ _segmentation[method] = new Segmenter::Segmentation;
+ }
+ else {
+ _segmentation[method]->clear();
+ }
+
+ bias=0;
+ switch(method){
+ case SEGMENTATION_WEIGHTED_BIAS100:
+ bias+=50;
+ case SEGMENTATION_WEIGHTED_BIAS50:
+ bias+=30;
+ case SEGMENTATION_WEIGHTED_BIAS20:
+ bias+=10;
+ case SEGMENTATION_WEIGHTED_BIAS10:
+ bias+=10;
+ case SEGMENTATION_WEIGHTED:
+ bestid=-1;
+ for(i=n_txt;i>=0;i--){
+ bestid=-1;maxsc=0;
+ for(j=i+1;j<=n_txt;j++){
+ id=_map.get(i,j);
+ if(id>=0 && maxScore[id]+1>maxsc) {
+ bestid=id;
+ maxsc=maxScore[id]+1;
+ }
+ }
+ if(maxsc>0) maxsc--;
+ for(j=0;j<i;j++){
+ id=_map.get(j,i);
+ if(id>=0){
+ nextid[id] = bestid;
+ conn = _segments[id].conn();
+ if(i-j<=1){
+ maxScore[id] = maxsc;
+ }
+ else if(bias>0){
+ maxScore[id] = maxsc + ((100+(i-j-2)*bias)*conn)/100;
+ }
+ else{
+ maxScore[id] = maxsc + conn;
+ }
+ }
+ }
+ }
+ id = bestid;
+ while(id!=-1){
+ _segmentation[method]->push_back(id);
+ id=nextid[id];
+ }
+ break;
+ case SEGMENTATION_LEFTMOST_LONGEST:
+ case SEGMENTATION_LEFTMOST_WEIGHTED:
+ pos = 0;
+ while(pos<n_txt){
+ bestid = -1; bestval = -1;
+ for(i=pos+1;i<=n_txt;i++){
+ id = _map.get(pos,i);
+ if(id>=0 &&
+ (method==SEGMENTATION_LEFTMOST_LONGEST ||
+ (temp=(_segments[id].len()>1)?(int)_segments[id].conn():0)>bestval) ){
+ bestid = id;
+ bestval = temp;
+ next = i;
+ }
+ }
+ _segmentation[method]->push_back(bestid);
+ pos=next;
+ }
+ break;
+ case SEGMENTATION_RIGHTMOST_LONGEST:
+ case SEGMENTATION_RIGHTMOST_WEIGHTED:
+ pos = n_txt;
+ while(pos>0){
+ bestid = -1; bestval = -1;
+ for(i=pos-1;i>=0;i--){
+ id = _map.get(i,pos);
+ if(id>=0 &&
+ (method==SEGMENTATION_RIGHTMOST_LONGEST ||
+ (temp=(_segments[id].len()>1)?(int)_segments[id].conn():0)>bestval) ){
+ bestid = id;
+ bestval = temp;
+ next = i;
+ }
+ }
+ _segmentation[method]->push_front(bestid);
+ pos=next;
+ }
+ break;
+ case SEGMENTATION_LONGEST_WEIGHTED:
+ case SEGMENTATION_LONGEST_LEFTMOST:
+ case SEGMENTATION_LONGEST_RIGHTMOST:
+ case SEGMENTATION_WEIGHTED_LONGEST:
+ case SEGMENTATION_WEIGHTED_LEFTMOST:
+ case SEGMENTATION_WEIGHTED_RIGHTMOST:
+ buildSegmentationRecursive(method,*_segmentation[method],0,n_txt);
+ break;
+ default:
+ break;
+ }
+}
+
+// }}}
+// {{{ Segmenter::Segments::buildSegmentationRecursive
+
+void Segmenter::Segments::buildSegmentationRecursive(Segmenter::SegmentationMethod method,
+ Segmenter::Segmentation& segmentation,
+ unsigned int beg,
+ unsigned int end)
+{
+ int bestid, bestval1, bestval2, temp;
+ int i;
+
+ // locate the best segment according to method
+ bestid=-1;bestval1=-1;bestval2=-1;
+ for(i=0;i<(int)_segments.size();i++){
+ if(beg<=_segments[i].beg() && end>=_segments[i].end()){
+ switch(method){
+ case SEGMENTATION_LONGEST_WEIGHTED:
+ if((int)_segments[i].len()>bestval1 ||
+ ((int)_segments[i].len()==bestval1 && (int)_segments[i].conn()>bestval2) ){
+ bestid=i;
+ bestval1=_segments[i].len();
+ bestval2=_segments[i].conn();
+ }
+ break;
+ case SEGMENTATION_LONGEST_LEFTMOST:
+ if((int)_segments[i].len()>bestval1 ||
+ ((int)_segments[i].len()==bestval1 && (int)_segments[i].beg()<bestval2) ){
+ bestid=i;
+ bestval1=_segments[i].len();
+ bestval2=_segments[i].beg();
+ }
+ break;
+ case SEGMENTATION_LONGEST_RIGHTMOST:
+ if((int)_segments[i].len()>bestval1 ||
+ ((int)_segments[i].len()==bestval1 && (int)_segments[i].end()>bestval2) ){
+ bestid=i;
+ bestval1=_segments[i].len();
+ bestval2=_segments[i].end();
+ }
+ break;
+ case SEGMENTATION_WEIGHTED_LONGEST:
+ temp = (_segments[i].len()>1)?(int)_segments[i].conn():0;
+ if(temp>bestval1 ||
+ (temp==bestval1 &&
+ (int)_segments[i].len()>bestval2) ){
+ bestid=i;
+ bestval1=temp;
+ bestval2=_segments[i].len();
+ }
+ break;
+ case SEGMENTATION_WEIGHTED_LEFTMOST:
+ temp = (_segments[i].len()>1)?(int)_segments[i].conn():0;
+ if(temp>bestval1 ||
+ (temp==bestval1 &&
+ (int)_segments[i].beg()<bestval2) ){
+ bestid=i;
+ bestval1=temp;
+ bestval2=_segments[i].beg();
+ }
+ break;
+ case SEGMENTATION_WEIGHTED_RIGHTMOST:
+ temp = (int)_segments[i].len()>1?(int)_segments[i].conn():0;
+ if(temp>bestval1 ||
+ (temp==bestval1 &&
+ (int)_segments[i].end()>bestval2) ){
+ bestid=i;
+ bestval1=temp;
+ bestval2=_segments[i].end();
+ }
+ break;
+ default: // dummy defult pick first possible
+ if(bestid<0){
+ bestid=i;
+ }
+ break;
+ }
+ }
+ }
+ if(bestid<0) {
+ return; // this should never happen, as all one-word segments are created
+ }
+
+ // check left side
+ if(beg<_segments[bestid].beg()){
+ buildSegmentationRecursive(method,segmentation,beg,_segments[bestid].beg());
+ }
+
+ // add segment
+ segmentation.push_back(bestid);
+
+ // check right side
+ if(end>_segments[bestid].end()){
+ buildSegmentationRecursive(method,segmentation,_segments[bestid].end(),end);
+ }
+}
+
+// }}}
+
+// {{{ Segmenter::segment
+
+void Segmenter::segment(Segmenter::Segments &segments) const
+{
+ segments.clear();
+ _detector.detect(segments.getText(),segments);
+}
+
+void Segmenter::segment(const NGram &text, Segmenter::Segments &segments) const
+{
+ segments.setText(text);
+ _detector.detect(segments.getText(),segments);
+}
+
+void Segmenter::segment(const std::string &text, Segmenter::Segments &segments) const
+{
+
+ segments.setText(text);
+ _detector.detect(segments.getText(),segments);
+}
+
+void Segmenter::segment(const char *text, Segmenter::Segments &segments) const
+{
+ segments.setText(text);
+ _detector.detect(segments.getText(),segments);
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/segmenter.h b/fsa/src/vespa/fsa/segmenter.h
new file mode 100644
index 00000000000..243629bbaa8
--- /dev/null
+++ b/fsa/src/vespa/fsa/segmenter.h
@@ -0,0 +1,636 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/13
+ * @version $Id$
+ * @file segmenter.h
+ * @brief Query segmenter based on %FSA (%Finite %State %Automaton)
+ *
+ */
+
+#pragma once
+
+#include <string>
+#include <map>
+#include <vector>
+#include <list>
+
+#include <stdio.h>
+
+#include "fsa.h"
+#include "ngram.h"
+#include "detector.h"
+
+
+namespace fsa {
+
+// {{{ class Segmenter
+
+/**
+ * @class Segmenter
+ * @brief Query segmenter based on %FSA.
+ */
+class Segmenter {
+
+public:
+
+ // {{{ enum Segmenter::SegmentationMethod
+
+ /**
+ * @brief Enumerated type of supported segmentation method IDs
+ *
+ * The segmentation methods currently supported are the following:
+ * - SEGMENTATION_WEIGHTED - gives the segmentation where the sum
+ * of the scores of nontrivial (more than one word) segments is
+ * the highest
+ * - SEGMENTATION_WEIGHTED_BIASxx - (xx can be 10,20,50 or 100)
+ * gives the segmentation where the sum of the scores of
+ * nontrivial (more than one word) segments is the highest. The
+ * scores are biased based on segment length, xx% extra for each
+ * term over 2
+ * - SEGMENTATION_WEIGHTED_LEFTMOST - picks the segment with
+ * highest score first, if there are several possibilities, picks
+ * the leftmost, then repeats for the rest of the query
+ * - SEGMENTATION_WEIGHTED_RIGHTMOST - picks the segment with
+ * highest score first, if there are several possibilities, picks
+ * the rightmost, then repeats for the rest of the query
+ * - SEGMENTATION_WEIGHTED_LONGEST - picks the segment with
+ * highest score first, if there are several possibilities, picks
+ * the longest, then repeats for the rest of the query
+ * - SEGMENTATION_LEFTMOST_LONGEST - picks the leftmost segment
+ * first, if there are several possibilities, picks the longest,
+ * then repeats for the rest of the query
+ * - SEGMENTATION_LEFTMOST_WEIGHTED - picks the leftmost segment
+ * first, if there are several possibilities, picks the one with
+ * highest score, then repeats for the rest of the query
+ * - SEGMENTATION_RIGHTMOST_LONGEST - picks the rightmost segment
+ * first, if there are several possibilities, picks the longest,
+ * then repeats for the rest of the query
+ * - SEGMENTATION_RIGHTMOST_WEIGHTED - picks the rightmost segment
+ * first, if there are several possibilities, picks the one with
+ * highest score, then repeats for the rest of the query
+ * - SEGMENTATION_LONGEST_WEIGHTED - picks the longest segment
+ * first, if there are several possibilities, picks the one with
+ * highest score, then repeats for the rest of the query
+ * - SEGMENTATION_LONGEST_LEFTMOST - picks the longest segment
+ * first, if there are several possibilities, picks leftmost,
+ * then repeats for the rest of the query
+ * - SEGMENTATION_LONGEST_RIGHTMOST - picks the longest segment
+ * first, if there are several possibilities, picks the rightmost,
+ * then repeats for the rest of the query
+ */
+ enum SegmentationMethod {
+ SEGMENTATION_WEIGHTED,
+ SEGMENTATION_WEIGHTED_BIAS10,
+ SEGMENTATION_WEIGHTED_BIAS20,
+ SEGMENTATION_WEIGHTED_BIAS50,
+ SEGMENTATION_WEIGHTED_BIAS100,
+ SEGMENTATION_WEIGHTED_LEFTMOST,
+ SEGMENTATION_WEIGHTED_RIGHTMOST,
+ SEGMENTATION_WEIGHTED_LONGEST,
+ SEGMENTATION_LEFTMOST_LONGEST,
+ SEGMENTATION_LEFTMOST_WEIGHTED,
+ SEGMENTATION_RIGHTMOST_LONGEST,
+ SEGMENTATION_RIGHTMOST_WEIGHTED,
+ SEGMENTATION_LONGEST_WEIGHTED,
+ SEGMENTATION_LONGEST_LEFTMOST,
+ SEGMENTATION_LONGEST_RIGHTMOST,
+ SEGMENTATION_METHODS };
+
+ // }}}
+
+ // {{{ typedef Segmenter::Segmentation
+
+ /** %Segmentation type */
+ typedef std::list<int> Segmentation;
+ /** Iterator for %segmentation type */
+ typedef std::list<int>::iterator SegmentationIterator;
+ /** Const iterator for %segmentation type */
+ typedef std::list<int>::const_iterator SegmentationConstIterator;
+
+ // }}}
+
+ // {{{ class Segmenter::Segments
+
+ /**
+ * @class Segments
+ * @brief Class for storing segmentation results.
+ *
+ * Class for storing segmentation results. It is a subclass of
+ * Detector::Hits, so it can be used directly by a Detector.
+ */
+ class Segments : public Detector::Hits {
+
+ private:
+
+ // {{{ class Segmenter::Segments::Segment
+
+ /**
+ * @class Segment
+ * @brief Simple segment class.
+ *
+ * Simple segment class. A segment is defined by its beginning and
+ * end, and it has a connexity. Beginning and end refer to indices
+ * in the original text.
+ */
+ class Segment {
+
+ private:
+ unsigned int _beg; /**< Beginning of the segment. */
+ unsigned int _end; /**< End of the segment. */
+ unsigned int _conn; /**< Connexity of the segment. */
+
+ public:
+
+ /**
+ * @brief Default constructor.
+ *
+ * Null segment at postion zero.
+ */
+ Segment() : _beg(0), _end(0), _conn(0) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * @param b Beginning of the segment.
+ * @param e End of the segment (the position after the last term).
+ * @param c Connexity of the segment.
+ */
+ Segment(unsigned int b, unsigned int e, unsigned int c) :
+ _beg(b), _end(e), _conn(c) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param s Segment object to copy.
+ */
+ Segment(const Segment &s) : _beg(s._beg), _end(s._end), _conn(s._conn) {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~Segment() {}
+
+ /**
+ * @brief Set the segment parameters.
+ *
+ * @param b Beginning of the segment.
+ * @param e End of the segment (the position after the last term).
+ * @param c Connexity of the segment.
+ */
+ void set(unsigned int b, unsigned int e, unsigned int c)
+ {
+ _beg=b;
+ _end=e;
+ _conn=c;
+ }
+
+ public:
+ /**
+ * @brief Get the beginning of the segment.
+ *
+ * @return Beginning of the segment.
+ */
+ unsigned int beg() const { return _beg; }
+
+ /**
+ * @brief Get the end of the segment.
+ *
+ * @return End of the segment. (Position after last term.)
+ */
+ unsigned int end() const { return _end; }
+
+ /**
+ * @brief Get the length of the segment.
+ *
+ * @return Length of the segment (number of terms).
+ */
+ unsigned int len() const { return _end-_beg; }
+
+ /**
+ * @brief Get the connexity of the segment.
+ *
+ * @return Connexity of the segment.
+ */
+ unsigned int conn() const { return _conn; }
+ };
+
+ // }}}
+
+ // {{{ class Segmenter::Segments::SegmentMap
+
+ /**
+ * @class SegmentMap
+ * @brief Class for mapping (beg,end) pairs to segment idx.
+ */
+ class SegmentMap {
+
+ private:
+ /** Size of current map. */
+ unsigned int _size;
+ /** %Segment map */
+ std::vector<int> _map;
+
+ public:
+ /** Default constructor, creates empty map of zero size. */
+ SegmentMap() : _size(0), _map() {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates an empty map of given size.
+ *
+ * @param n Map size.
+ */
+ SegmentMap(unsigned int n) : _size(n+1), _map(_size*_size,-1) {}
+
+ /** Destructor */
+ ~SegmentMap() {}
+
+ /**
+ * @brief Initialize the map.
+ *
+ * Initialize the map to an empty map of given size.
+ *
+ * @param n Map size.
+ */
+ void init(unsigned int n)
+ {
+ _size = n+1;
+ _map.assign(_size*_size,-1);
+ }
+
+ /**
+ * @brief Clear the map.
+ *
+ * Reset the map to an empty map of zero size.
+ */
+ void clear()
+ {
+ _size = 0;
+ _map.clear();
+ }
+
+ /**
+ * @brief Get current map size.
+ *
+ * @return Map size.
+ */
+ unsigned int size() const { return _size; }
+
+ /**
+ * @brief Set an element in the map.
+ *
+ * @param i Beginning of the segment.
+ * @param j End of the segment.
+ * @param idx %Segment index.
+ */
+ void set(unsigned int i, unsigned int j, int idx)
+ {
+ if(i<_size && j<_size)
+ _map[i*_size+j] = idx;
+ }
+
+ /**
+ * @brief Get an element from the map.
+ *
+ * @param i Beginning of the segment.
+ * @param j End of the segment.
+ * @return %Segment index (-1 if segment does not exist).
+ */
+ int get(unsigned int i, unsigned int j) const
+ {
+ if(i<_size && j<_size)
+ return _map[i*_size+j];
+ return -1;
+ }
+
+ /**
+ * @brief Check if a segment exists.
+ *
+ * @param i Beginning of the segment.
+ * @param j End of the segment.
+ * @return True if segment exists.
+ */
+ bool isValid(unsigned int i, unsigned int j) const
+ {
+ return i<_size && j<_size && _map[i*_size+j]!=-1;
+ }
+ };
+
+ // }}}
+
+ private:
+ NGram _text; /**< Tokenized text (e.g. query). */
+ std::vector<Segment> _segments; /**< Detected segments. */
+ SegmentMap _map; /**< Map of segments. */
+ std::vector<Segmentation*> _segmentation; /**< Pre-built segmentations. */
+
+
+ /**
+ * @brief Insert all single term segments.
+ *
+ * Insert all single term segments as detected with zero
+ * connexity. This is important for some of the segentation
+ * algorithms.
+ */
+ void initSingles();
+
+ /**
+ * @brief Build a segmentation.
+ *
+ * @param method %Segmentation method.
+ */
+ void buildSegmentation(Segmenter::SegmentationMethod method);
+
+ /**
+ * @brief Build a segmentation recursively.
+ *
+ * Some of the segmentation methods are implemented
+ * recursively.
+ *
+ * @param method %Segmentation method.
+ * @param segmentation Segmentation object which holds results.
+ * @param beg Beginning of the subquery to process.
+ * @param end End the subquery to process.
+ */
+ void buildSegmentationRecursive(Segmenter::SegmentationMethod method,
+ Segmentation& segmentation,
+ unsigned int beg,
+ unsigned int end);
+
+ public:
+ /** Default constructor */
+ Segments() : _text(), _segments(), _map(),
+ _segmentation(Segmenter::SEGMENTATION_METHODS,NULL) {}
+
+ /** Destructor */
+ ~Segments() {}
+
+ /**
+ * @brief Set input text, and clear all results.
+ *
+ * @param text Input text.
+ */
+ void setText(const NGram &text)
+ {
+ _text.set(text);
+ clear();
+ }
+
+ /**
+ * @brief Set input text, and clear all results.
+ *
+ * @param text Input text.
+ */
+ void setText(const std::string &text)
+ {
+ _text.set(text);
+ clear();
+ }
+
+ /**
+ * @brief Set input text, and clear all results.
+ *
+ * @param text Input text.
+ */
+ void setText(const char *text)
+ {
+ _text.set(text);
+ clear();
+ }
+
+ /**
+ * @brief Get a reference to the input text.
+ *
+ * Get a reference to the input text. Valid as long as the
+ * Segments object is valid and not modified.
+ *
+ * return Reference to input text.
+ */
+ const NGram& getText() const { return _text; }
+
+ /**
+ * @brief Clear all detected segments and built segmentations.
+ */
+ void clear()
+ {
+ _segments.clear();
+ _map.init(_text.size());
+ initSingles();
+ for(unsigned int i=0;i<SEGMENTATION_METHODS;i++){
+ delete _segmentation[i];
+ _segmentation[i]=NULL;
+ }
+ }
+
+ /**
+ * @brief Insert a detected segment.
+ *
+ * This method will be called by the detector for each detected
+ * segment.
+ *
+ * @param text Input text.
+ * @param from Index of first token.
+ * @param length Number of tokens.
+ * @param state Final state after detected phrase.
+ */
+ void add(const NGram &text,
+ unsigned int from, int length,
+ const FSA::State &state)
+ {
+ (void)text;
+ unsigned int to=from+length;
+ int id=_map.get(from,to);
+ if(id==-1){
+ _map.set(from,to,_segments.size());
+ _segments.push_back(Segment(from,to,state.nData()));
+ }
+ else{
+ _segments[id].set(from,to,state.nData());
+ }
+ }
+
+ /**
+ * @brief Get the size (number of segments).
+ *
+ * @return Number of segments.
+ */
+ unsigned int size() const { return _segments.size(); }
+
+ /**
+ * @brief Get a segment as a string.
+ *
+ * @param i %Segment index.
+ * @return %Segment string.
+ */
+ const std::string operator[](unsigned int i) const { return sgm(i); }
+
+ /**
+ * @brief Get a segment as a string.
+ *
+ * @param i %Segment index.
+ * @return %Segment string.
+ */
+ const std::string sgm(unsigned int i) const
+ {
+ if(i<_segments.size())
+ return _text.join(" ",_segments[i].beg(),_segments[i].len());
+ return std::string();
+ }
+
+ /**
+ * @brief Get the beginning of a segment.
+ *
+ * @param i %Segment index.
+ * @return Beginning of the segment.
+ */
+ unsigned beg(unsigned int i) const
+ {
+ if(i<_segments.size())
+ return _segments[i].beg();
+ return 0;
+ }
+
+ /**
+ * @brief Get the end of a segment.
+ *
+ * @param i %Segment index.
+ * @return End of the segment.
+ */
+ unsigned end(unsigned int i) const
+ {
+ if(i<_segments.size())
+ return _segments[i].end();
+ return 0;
+ }
+
+ /**
+ * @brief Get the length of a segment.
+ *
+ * @param i %Segment index.
+ * @return Length of the segment.
+ */
+ unsigned len(unsigned int i) const
+ {
+ if(i<_segments.size())
+ return _segments[i].len();
+ return 0;
+ }
+
+ /**
+ * @brief Get the connexity of a segment.
+ *
+ * @param i %Segment index.
+ * @return Connexity of the segment.
+ */
+ unsigned conn(unsigned int i) const
+ {
+ if(i<_segments.size())
+ return _segments[i].conn();
+ return 0;
+ }
+
+ /**
+ * @brief Get the a segmentation of the query using the given method.
+ *
+ * @param method %Segmentation method
+ * @return Pointer to the Segmentation object, valid as long as the
+ * Segments object is valid and not modified.
+ */
+ const Segmenter::Segmentation* segmentation(Segmenter::SegmentationMethod method)
+ {
+ if(method<SEGMENTATION_WEIGHTED || method>=SEGMENTATION_METHODS)
+ method=SEGMENTATION_WEIGHTED;
+ if(_segmentation[method]==NULL){
+ buildSegmentation(method);
+ }
+ return _segmentation[method];
+ }
+
+ };
+
+ // }}}
+
+
+private:
+
+ const FSA& _dictionary; /**< Dictionary. */
+ Detector _detector; /**< Detector. */
+
+ /** Unimplemented private default constructor */
+ Segmenter();
+ /** Unimplemented private copy constructor */
+ Segmenter(const Segmenter&);
+
+public:
+
+ /**
+ * @brief Constructor.
+ *
+ * Create Segmeneter object and initialize dictionary and detector.
+ *
+ * @param dict Dictionary to use.
+ */
+ Segmenter(const FSA& dict) : _dictionary(dict), _detector(_dictionary) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Create Segmeneter object and initialize dictionary and detector.
+ *
+ * @param dict Dictionary to use.
+ */
+ Segmenter(const FSA* dict) : _dictionary(*dict), _detector(_dictionary) {}
+
+ /** Destructor */
+ ~Segmenter() {}
+
+ /**
+ * @brief %Segment a query.
+ *
+ * @param segments %Segments object, input text already initialized.
+ */
+ void segment(Segmenter::Segments &segments) const;
+
+ /**
+ * @brief %Segment a query.
+ *
+ * @param text Input text.
+ * @param segments %Segments object to hold the results.
+ */
+ void segment(const NGram &text, Segmenter::Segments &segments) const;
+
+ /**
+ * @brief %Segment a query.
+ *
+ * @param text Input text.
+ * @param segments %Segments object to hold the results.
+ */
+ void segment(const std::string &text, Segmenter::Segments &segments) const;
+
+ /**
+ * @brief %Segment a query.
+ *
+ * @param text Input text.
+ * @param segments %Segments object to hold the results.
+ */
+ void segment(const char *text, Segmenter::Segments &segments) const;
+
+ /**
+ * @brief %Segment a query.
+ *
+ * @param text Input text.
+ * @param segments %Segments object to hold the results.
+ */
+ void segment(const char *text, Segmenter::Segments *segments) const
+ {
+ segment(text,*segments);
+ }
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/selector.cpp b/fsa/src/vespa/fsa/selector.cpp
new file mode 100644
index 00000000000..b139a8ebaed
--- /dev/null
+++ b/fsa/src/vespa/fsa/selector.cpp
@@ -0,0 +1,77 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file selector.cpp
+ * @brief Selector class.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "selector.h"
+
+namespace fsa {
+
+// {{{ Selector::clear()
+
+void Selector::clear()
+{
+ _selector.clear();
+}
+
+// }}}
+// {{{ Selector::set()
+
+void Selector::set(unsigned int c)
+{
+ unsigned int idx=0;
+ while(c>0){
+ if(idx>=_selector.size()){
+ _selector.resize(idx+1,false);
+ }
+ if(c&1)
+ _selector[idx]=true;
+ c>>=1;
+ idx++;
+ }
+}
+
+// }}}
+// {{{ Selector::select()
+
+void Selector::select(unsigned int i)
+{
+ if(i>=_selector.size()){
+ _selector.resize(i+1,false);
+ }
+ _selector[i] = true;
+}
+
+// }}}
+// {{{ Selector::unselect()
+
+void Selector::unselect(unsigned int i)
+{
+ if(i>=_selector.size()){
+ _selector.resize(i+1,false);
+ }
+ _selector[i] = false;
+}
+
+// }}}
+// {{{ Selector::operator[]()
+
+bool Selector::operator[](unsigned int i) const
+{
+ if(i>=_selector.size()){
+ return false;
+ }
+ return _selector[i];
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/selector.h b/fsa/src/vespa/fsa/selector.h
new file mode 100644
index 00000000000..00e87bcb3f5
--- /dev/null
+++ b/fsa/src/vespa/fsa/selector.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file selector.h
+ * @brief Selector class.
+ */
+
+#pragma once
+
+#include <vector>
+
+namespace fsa {
+
+// {{{ class Selector
+
+
+/**
+ * @class Selector
+ * @brief Simple (bitmap-like) selector class.
+ */
+class Selector {
+
+ private:
+
+ /** Selector */
+ std::vector<bool> _selector;
+
+ public:
+
+ /**
+ * @brief Default constructor.
+ */
+ Selector() : _selector() {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param s Selector to copy.
+ */
+ Selector(const Selector &s) : _selector(s._selector) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Set the selector from a bitmask.
+ *
+ * @param c Bitmask.
+ */
+ Selector(unsigned int c) : _selector() { set(c); }
+
+ /**
+ * @brief Destructor.
+ */
+ ~Selector() {}
+
+
+ /**
+ * @brief Clear the selector.
+ */
+ void clear();
+
+ /**
+ * @brief Set selector from bitmask.
+ *
+ * @param c Bitmask.
+ */
+ void set(unsigned int c);
+
+ /**
+ * @brief Get size of selector.
+ *
+ * @return Size.
+ */
+ unsigned int size() const { return _selector.size(); }
+
+ /**
+ * @brief Set an item in the selector.
+ *
+ * @param i Index.
+ */
+ void select(unsigned int i);
+
+ /**
+ * @brief Unset an item in the selector.
+ *
+ * @param i Index.
+ */
+ void unselect(unsigned int i);
+
+ /**
+ * @brief Get an item.
+ *
+ * @param i Index.
+ * @return Item.
+ */
+ bool operator[](unsigned int i) const;
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/timestamp.h b/fsa/src/vespa/fsa/timestamp.h
new file mode 100644
index 00000000000..0455fc4c144
--- /dev/null
+++ b/fsa/src/vespa/fsa/timestamp.h
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file timestamp.h
+ * @brief Simple timestamp class.
+ */
+
+#pragma once
+
+#include <sys/time.h>
+#include <time.h>
+
+namespace fsa {
+
+// {{{ class TimeStamp
+
+/**
+ * @class TimeStamp
+ * @brief Simple timestamp class.
+ */
+class TimeStamp {
+private:
+ struct timeval _ts;
+public:
+ /**
+ * @brief Constructor, registers current time.
+ */
+ TimeStamp() {
+ gettimeofday(&_ts,NULL);
+ }
+ /**
+ * @brief Destructor.
+ */
+ ~TimeStamp() {}
+
+ /**
+ * @brief Reset timestamp.
+ *
+ * Set timestamp value to current time.
+ */
+ void reset()
+ {
+ gettimeofday(&_ts,NULL);
+ }
+
+ /**
+ * @brief Get timestamp value (= object creation or last reset time).
+ *
+ * @return Timestamp value in seconds.
+ */
+ double getVal() const
+ {
+ return double(_ts.tv_sec)+double(_ts.tv_usec)/1000000.0;
+ }
+
+ /**
+ * @brief Get elapsed time (since object creation time).
+ *
+ * @return Elapsed time in seconds.
+ */
+ double elapsed() const
+ {
+ struct timeval now;
+ gettimeofday(&now,NULL);
+ return double(now.tv_sec)-double(_ts.tv_sec)+
+ (double(now.tv_usec)-double(_ts.tv_usec))/1000000.0;
+ }
+
+ /**
+ * @brief Calculate difference between timestamps.
+ *
+ * @return Difference between timestamps in seconds.
+ */
+ double operator-(const TimeStamp &other) const
+ {
+ return double(_ts.tv_sec)-double(other._ts.tv_sec)+
+ (double(_ts.tv_usec)-double(other._ts.tv_usec))/1000000.0;
+ }
+};
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/tokenizer.h b/fsa/src/vespa/fsa/tokenizer.h
new file mode 100644
index 00000000000..2dceacca60b
--- /dev/null
+++ b/fsa/src/vespa/fsa/tokenizer.h
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file tokenizer.h
+ * @brief Generic tokenizer class.
+ */
+
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+
+
+namespace fsa {
+
+// {{{ class Tokenizer
+
+/**
+ * @class Tokenizer
+ * @brief Generic tokenizer class.
+ *
+ * Generic interface to various tokenizer implementations.
+ */
+class Tokenizer {
+
+public:
+
+ /**
+ * @brief Constructor.
+ */
+ Tokenizer() {}
+
+ /**
+ * @brief Destructor.
+ */
+ virtual ~Tokenizer() {}
+
+ /**
+ * @brief Initialize the tokenizer.
+ *
+ * @param text Input text.
+ * @return True on success.
+ */
+ virtual bool init(const std::string &text) = 0;
+
+ /**
+ * @brief Check if there are more tokens available.
+ *
+ * @return True if there are more tokens.
+ */
+ virtual bool hasMore() = 0;
+
+ /**
+ * @brief Get next token.
+ *
+ * @return Next token, or empty string if there are no more tokens left.
+ */
+ virtual std::string getNext() = 0;
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/unicode.cpp b/fsa/src/vespa/fsa/unicode.cpp
new file mode 100644
index 00000000000..4a35e79ff12
--- /dev/null
+++ b/fsa/src/vespa/fsa/unicode.cpp
@@ -0,0 +1,532 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "unicode.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+namespace fsa {
+
+const unsigned int Unicode::_BadUTF8Char;
+const unsigned int Unicode::_EOF;
+
+
+char* Unicode::utf8copy(char *dst, const ucs4_t *src)
+{
+ ucs4_t i;
+ char *p;
+
+ p = dst;
+ while ((i = *src++) != 0) {
+ if (i < 128)
+ *p++ = i;
+ else if (i < 0x800) {
+ *p++ = (i >> 6) | 0xc0;
+ *p++ = (i & 63) | 0x80;
+ } else if (i < 0x10000) {
+ *p++ = (i >> 12) | 0xe0;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ } else if (i < 0x200000) {
+ *p++ = (i >> 18) | 0xf0;
+ *p++ = ((i >> 12) & 63) | 0x80;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ } else if (i < 0x4000000) {
+ *p++ = (i >> 24) | 0xf8;
+ *p++ = ((i >> 18) & 63) | 0x80;
+ *p++ = ((i >> 12) & 63) | 0x80;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ } else {
+ *p++ = (i >> 30) | 0xfc;
+ *p++ = ((i >> 24) & 63) | 0x80;
+ *p++ = ((i >> 18) & 63) | 0x80;
+ *p++ = ((i >> 12) & 63) | 0x80;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ }
+ }
+ *p = 0;
+ return p;
+}
+
+char* Unicode::utf8ncopy(char *dst, const ucs4_t *src,
+ int maxdst, int maxsrc)
+{
+ ucs4_t i;
+ char *p;
+ char *edst;
+ const ucs4_t *esrc;
+
+ p = dst;
+ edst = dst + maxdst;
+ esrc = src + maxsrc;
+
+ while (src < esrc && (i = *src++) != 0 && p < edst) {
+ if (i < 128)
+ *p++ = i;
+ else if (i < 0x800) {
+ if (p + 1 >= edst)
+ break;
+ *p++ = (i >> 6) | 0xc0;
+ *p++ = (i & 63) | 0x80;
+ } else if (i < 0x10000) {
+ if (p + 2 >= edst)
+ break;
+ *p++ = (i >> 12) | 0xe0;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ } else if (i < 0x200000) {
+ if (p + 3 >= edst)
+ break;
+ *p++ = (i >> 18) | 0xf0;
+ *p++ = ((i >> 12) & 63) | 0x80;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ } else if (i < 0x4000000) {
+ if (p + 4 >= edst)
+ break;
+ *p++ = (i >> 24) | 0xf8;
+ *p++ = ((i >> 18) & 63) | 0x80;
+ *p++ = ((i >> 12) & 63) | 0x80;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ } else {
+ if (p + 5 >= edst)
+ break;
+ *p++ = (i >> 30) | 0xfc;
+ *p++ = ((i >> 24) & 63) | 0x80;
+ *p++ = ((i >> 18) & 63) | 0x80;
+ *p++ = ((i >> 12) & 63) | 0x80;
+ *p++ = ((i >> 6) & 63) | 0x80;
+ *p++ = (i & 63) | 0x80;
+ }
+ }
+ if (p < edst)
+ *p = 0;
+ return p;
+}
+
+
+int Unicode::utf8cmp(const char *s1, const ucs4_t *s2)
+{
+ ucs4_t i1;
+ ucs4_t i2;
+
+ const unsigned char *ps1 = reinterpret_cast<const unsigned char *>(s1);
+ do {
+ i1 = getUTF8Char(ps1);
+ i2 = *s2++;
+ } while (i1 != 0 && i1 == i2);
+ if (i1 > i2)
+ return 1;
+ if (i1 < i2)
+ return -1;
+ return 0;
+}
+
+
+int Unicode::utf8casecmp(const char *s1, const ucs4_t *s2)
+{
+ ucs4_t i1, i2;
+
+ const unsigned char *ps1 = reinterpret_cast<const unsigned char *>(s1);
+ do {
+ i1 = toLower(getUTF8Char(ps1));
+ i2 = toLower(*s2++);
+ } while (i1 != 0 && i1 == i2);
+ if (i1 > i2)
+ return 1;
+ if (i1 < i2)
+ return -1;
+ return 0;
+}
+
+size_t Unicode::utf8len(const ucs4_t *str)
+{
+ ucs4_t i;
+ size_t res;
+
+ res = 0;
+ while ((i = *str++) != 0) {
+ if (i < 128)
+ res += 1;
+ else if (i < 0x800)
+ res += 2;
+ else if (i < 0x10000)
+ res += 3;
+ else if (i < 0x200000)
+ res += 4;
+ else if (i < 0x4000000)
+ res += 5;
+ else
+ res += 6;
+ }
+ return res;
+}
+
+size_t Unicode::utf8nlen(const ucs4_t *str, int maxsrc)
+{
+ ucs4_t i;
+ size_t res;
+ int n;
+
+ n = 0;
+ res = 0;
+ while ((i = *str++) != 0 && n < maxsrc) {
+ if (i < 128)
+ res += 1;
+ else if (i < 0x800)
+ res += 2;
+ else if (i < 0x10000)
+ res += 3;
+ else if (i < 0x200000)
+ res += 4;
+ else if (i < 0x4000000)
+ res += 5;
+ else
+ res += 6;
+
+ n++;
+ }
+ return res;
+}
+
+size_t Unicode::ucs4strlen(const ucs4_t *str)
+{
+ const ucs4_t *p = str;
+ while (*p++ != 0) {
+ /* Do nothing */
+ }
+ return p - 1 - str;
+}
+
+size_t Unicode::ucs4len(const char *str)
+{
+ ucs4_t i;
+ size_t res;
+ const unsigned char *pstr = reinterpret_cast<const unsigned char *>(str);
+
+ res = 0;
+ while ((i = getUTF8Char(pstr)) != 0) {
+ if (i != _BadUTF8Char)
+ res++;
+ }
+ return res;
+}
+
+size_t Unicode::ucs4nlen(const char *str, size_t n)
+{
+ ucs4_t i;
+ size_t res;
+ const unsigned char *pstr = reinterpret_cast<const unsigned char *>(str);
+ const unsigned char *end_str = pstr + n;
+
+ res = 0;
+ while ((pstr < end_str) && (i = getUTF8Char(pstr, end_str-pstr)) != 0) {
+ if (i != _BadUTF8Char)
+ if (pstr <= end_str)
+ res++;
+ }
+ return res;
+}
+
+ucs4_t* Unicode::ucs4copy(ucs4_t *dst, const char *src)
+{
+ ucs4_t i;
+ ucs4_t *p;
+ const unsigned char *psrc = reinterpret_cast<const unsigned char *>(src);
+
+ p = dst;
+ while ((i = getUTF8Char(psrc)) != 0) {
+ if (i != _BadUTF8Char)
+ *p++ = i;
+ }
+ *p = 0;
+ return p;
+}
+
+ucs4_t* Unicode::ucs4ncopy(ucs4_t *dst, const char *src, int byteLength)
+{
+ ucs4_t i;
+ ucs4_t *p;
+ const unsigned char *psrc = reinterpret_cast<const unsigned char *>(src);
+ const unsigned char *end_src = psrc + byteLength;
+
+ p = dst;
+ while ((psrc < end_src) && (i = getUTF8Char(psrc, end_src-psrc)) != 0) {
+ if (i != _BadUTF8Char)
+ *p++ = i;
+ }
+ *p = 0;
+ return p;
+}
+
+
+char* Unicode::strdupUTF8(const char *src)
+{
+ char *res;
+ size_t reslen;
+ ucs4_t i;
+ const unsigned char *p;
+ char *q;
+
+ reslen = 0;
+ p = reinterpret_cast<const unsigned char *>(src);
+ while ((i = getUTF8Char(p)) != 0) {
+ if (i != _BadUTF8Char)
+ reslen += utf8clen(i);
+ }
+ res = static_cast<char *>(malloc(reslen + 1));
+ p = reinterpret_cast<const unsigned char *>(src);
+ q = res;
+ while ((i = getUTF8Char(p)) != 0) {
+ if (i != _BadUTF8Char)
+ q = utf8cput(q, i);
+ }
+ assert(q == res + reslen);
+ *q = 0;
+ return res;
+}
+
+
+char* Unicode::strlowdupUTF8(const char *src)
+{
+ char *res;
+ size_t reslen;
+ ucs4_t i;
+ const unsigned char *p;
+ char *q;
+
+ reslen = 0;
+ p = reinterpret_cast<const unsigned char *>(src);
+ while ((i = getUTF8Char(p)) != 0) {
+ if (i != _BadUTF8Char) {
+ i = toLower(i);
+ if (i != _BadUTF8Char)
+ reslen += utf8clen(i);
+ }
+ }
+ res = static_cast<char *>(malloc(reslen + 1));
+ p = reinterpret_cast<const unsigned char *>(src);
+ q = res;
+ while ((i = getUTF8Char(p)) != 0) {
+ if (i != _BadUTF8Char) {
+ i = toLower(i);
+ if (i != _BadUTF8Char)
+ q = utf8cput(q, i);
+ }
+ }
+ assert(q == res + reslen);
+ *q = 0;
+ return res;
+}
+
+char* Unicode::strdupLAT1(const char *src)
+{
+ char *res;
+ size_t reslen;
+ ucs4_t i;
+ const unsigned char *p;
+ char *q;
+
+ reslen = 0;
+ p = reinterpret_cast<const unsigned char *>(src);
+ while ((i = *p++) != 0) {
+ reslen += utf8clen(i);
+ }
+ res = static_cast<char *>(malloc(reslen + 1));
+ p = reinterpret_cast<const unsigned char *>(src);
+ q = res;
+ while ((i = *p++) != 0) {
+ q = utf8cput(q, i);
+ }
+ assert(q == res + reslen);
+ *q = 0;
+ return res;
+}
+
+ucs4_t Unicode::getUTF8Char(unsigned const char *&src,
+ int length /* = -1 */ )
+{
+ ucs4_t retval;
+
+ if (length != -1) {
+ // Check for unfinished UTF8 character sequence
+ int bytes = getUTF8ByteLength(*src);
+ if (bytes > length) {
+ src += bytes;
+ return _BadUTF8Char;
+ }
+ }
+
+ if (*src >= 0x80) { /* 0x80..0xff */
+ if (*src >= 0xc0) {
+ if (src[1] < 0x80 || src[1] >= 0xc0) {
+ src++;
+ return _BadUTF8Char;
+ }
+ if (*src >= 0xe0) { /* 0xe0..0xff */
+ if (src[2] < 0x80 || src[2] >= 0xc0) {
+ src += 2;
+ return _BadUTF8Char;
+ }
+ if (*src >= 0xf0) { /* 0xf0..0xff */
+ if (src[3] < 0x80 || src[3] >= 0xc0) {
+ src += 3;
+ return _BadUTF8Char;
+ }
+ if (*src >= 0xf8) { /* 0xf8..0xff */
+ if (src[4] < 0x80 || src[4] >= 0xc0) {
+ src += 4;
+ return _BadUTF8Char;
+ }
+ if (*src >= 0xfc) { /* 0xfc..0xff */
+ if (src[5] < 0x80 || src[5] >= 0xc0) {
+ src += 5;
+ return _BadUTF8Char;
+ }
+ if (*src >= 0xfe) { /* 0xfe..0xff: INVALID */
+ src += 5;
+ return _BadUTF8Char;
+ } else { /* 0xfc..0xfd: 6 bytes */
+ retval = ((src[0] & 1) << 30) |
+ ((src[1] & 63) << 24) |
+ ((src[2] & 63) << 18) |
+ ((src[3] & 63) << 12) |
+ ((src[4] & 63) << 6) |
+ (src[5] & 63);
+ if (retval < 0x4000000u) /* 6 bytes: >= 0x4000000 */
+ retval = _BadUTF8Char;
+ src += 6;
+ return retval;
+ }
+ } else { /* 0xf8..0xfb: 5 bytes */
+ retval = ((src[0] & 3) << 24) |
+ ((src[1] & 63) << 18) |
+ ((src[2] & 63) << 12) |
+ ((src[3] & 63) << 6) |
+ (src[4] & 63);
+ if (retval < 0x200000u) /* 5 bytes: >= 0x200000 */
+ retval = _BadUTF8Char;
+ src += 5;
+ return retval;
+ }
+ } else { /* 0xf0..0xf7: 4 bytes */
+ retval = ((src[0] & 7) << 18) |
+ ((src[1] & 63) << 12) |
+ ((src[2] & 63) << 6) |
+ (src[3] & 63);
+ if (retval < 0x10000) /* 4 bytes: >= 0x10000 */
+ retval = _BadUTF8Char;
+ src += 4;
+ return retval;
+ }
+ } else { /* 0xe0..0xef: 3 bytes */
+ retval = ((src[0] & 15) << 12) |
+ ((src[1] & 63) << 6) |
+ (src[2] & 63);
+ if (retval < 0x800) /* 3 bytes: >= 0x800 */
+ retval = _BadUTF8Char;
+ src += 3;
+ return retval;
+ }
+ } else { /* 0xc0..0xdf: 2 bytes */
+
+ retval = ((src[0] & 31) << 6) |
+ (src[1] & 63);
+ if (retval < 0x80) /* 2 bytes: >= 0x80 */
+ retval = _BadUTF8Char;
+ src += 2;
+ return retval;
+ }
+ } else { /* 0x80..0xbf: INVALID */
+ src += 1;
+ return _BadUTF8Char;
+ }
+ } else /* 0x00..0x7f: 1 byte */
+ return *src++;
+}
+
+
+
+
+#define UTF8_STARTCHAR(c) (!((c) & 0x80) || ((c) & 0x40))
+
+ /** Move forwards or backwards a number of characters within an UTF8 buffer
+ * Modify pos to yield new position if possible
+ * @param start A pointer to the start of the UTF8 buffer
+ * @param length The length of the UTF8 buffer
+ * @param pos A pointer to the current position within the UTF8 buffer,
+ * updated to reflect new position upon return. @param pos will
+ * point to the start of the offset'th character before or after the character
+ * currently pointed to.
+ * @param offset An offset (+/-) in number of UTF8 characters.
+ * Offset 0 consequently yields a move to the start of the current character.
+ * @return Number of bytes moved, or -1 if out of range.
+ * If -1 is returned, pos is unchanged.
+ */
+int Unicode::utf8move(unsigned const char* start, size_t length,
+ unsigned const char*& pos, off_t offset)
+{
+ int increment = offset > 0 ? 1 : -1;
+ unsigned const char* p = pos;
+
+ /* If running backward we first need to get to the start of
+ * the current character, that's an extra step.
+ * Similarly, if we are running forward an are at the start of a character,
+ * we count that character as a step.
+ */
+
+ if (increment < 0)
+ {
+ // Already at start?
+ if (p < start) return -1;
+ if (!offset)
+ {
+ if (p > start + length) return -1;
+ }
+ else if (p == start) return -1;
+
+ // Initially pointing to the first invalid char?
+ if (p == start + length)
+ p += increment;
+ else
+ offset += increment;
+ }
+ else if (p >= start + length)
+ return -1;
+ else if (UTF8_STARTCHAR(*p))
+ offset += increment;
+
+
+ for (; p >= start && p < start+length; p += increment)
+ {
+ /** Are we at start of a character? (both highest bits or none of them set) */
+ if (UTF8_STARTCHAR(*p))
+ offset -= increment; // We have "eaten" another character (independent of dir)
+ if (offset == 0) break;
+ }
+
+ if (offset != 0)
+ {
+ offset -= increment;
+ if (increment < 0)
+ p -= increment;
+ }
+
+ if (offset == 0) // Enough room to make it..
+ {
+ int moved = abs(p - pos);
+ pos = p;
+ return moved;
+ }
+ else
+ return -1;
+}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/unicode.h b/fsa/src/vespa/fsa/unicode.h
new file mode 100644
index 00000000000..3b14299193d
--- /dev/null
+++ b/fsa/src/vespa/fsa/unicode.h
@@ -0,0 +1,483 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+namespace fsa {
+
+/** utf8_t is the type of the multi-byte UTF-8 character components */
+typedef uint8_t utf8_t;
+/** ucs4_t is the type of the 4-byte UCS4 characters */
+typedef uint32_t ucs4_t;
+
+
+/**
+ * @class Unicode
+ * @brief Unicode character manipulation class.
+ *
+ * Utility class for unicode character handling.
+ * Used to examine properties of unicode characters, and
+ * provide fast conversion methods between often used encodings.
+ */
+class Unicode {
+private:
+ /** ISO 8859-1 digits. _isdigit[i] == 1 if i is a digit.
+ */
+ static const unsigned char _isdigit[256];
+ /** ISO 8859-1 operators in integer index expressions.
+ * _isintegerindexop[i] == 1 if i is a valid char in integer
+ * range expressions, which is ';<>[]'.
+ * This is maybe a bit specialized for the fastsearch application?
+ */
+ static const unsigned char _isintegerindexop[256];
+ /** ISO 8859-1 wordchar identification.
+ * _iswordchar[i] == 1 if i is a word character.
+ * Wordchars are A-Z, a-z, 0-9, 0xC0-0xFF except 0xD7 and 0xF7.
+ */
+ static const unsigned char _iswordchar[256];
+ /** ISO 8859-1 identifier start char.
+ * _isidstartchar[i] == 1 if i is an id start character.
+ * Is A-z, a-z.
+ */
+ static const unsigned char _isidstartchar[256];
+ /** ISO 8859-1 identifier char.
+ * _isidchar[i] == 1 if i is an id character.
+ * Is A-z, a-z, 0-9, and '-', '_', ':', '.'.
+ */
+ static const unsigned char _isidchar[256];
+ /** ISO 8859-1 space chars. _isspacechar[i] == 1 if i is a space char.
+ * Space chars are ' ', '\\r', '\\t', '\\n'.
+ */
+ static const unsigned char _isspacechar[256];
+ /**
+ * ISO 8859-1 uppercase to lowercase mapping table.
+ * _tolower[i] == j if j is the lowercase of i, else it is i (identity).
+ * It is useful in the range A-Z, 0xC0-0xE0 except 0xD7.
+ */
+ static const unsigned char _tolower[256];
+ /**
+ * Table for easy lookup of UTF8 character length in bytes
+ */
+ static const unsigned char _utf8header[256];
+
+ /** Two-level lowercase table. 256 pages, 256 elements each.
+ * This table is defined in unicode-lowercase.cpp, which is
+ * autogenerated by the extcase application. */
+ static const unsigned short *_compLowerCase[256];
+
+ /** Two-level character property table. 256 pages with 256 elements each.
+ * This table is defined in unicode-charprops.cpp, which is
+ * autogenerated by the extprop application. */
+ static const unsigned char *_compCharProps[256];
+
+public:
+
+ /** The property bit identificators */
+ enum {
+ _spaceProp = 1,
+ _wordcharProp = 2,
+ _ideographicProp = 4,
+ _decimalDigitCharProp = 8,
+ _ignorableControlCharProp = 16
+ };
+
+ /** Indicates an invalid UTF-8 character sequence. */
+ static const ucs4_t _BadUTF8Char = 0xfffffffeu;
+ /** EndOfFile */
+ static const ucs4_t _EOF = 0xffffffffu;
+
+ /**
+ * Return the 'raw' property bitmap.
+ * @param testchar the UCS4 character to test.
+ * @return unsigned char with the property bitmap.
+ */
+ static unsigned char getProperty(ucs4_t testchar) {
+ if (testchar < 65536)
+ return _compCharProps[testchar >> 8][testchar & 255];
+ else
+ return 0;
+ }
+
+ /**
+ * Test for a specified property.
+ * @param testchar the UCS4 character to test.
+ * @param testprops the set of properties to test for.
+ * @return true if testchar satisfies the specified set of properties.
+ */
+ static bool hasProperty(ucs4_t testchar, unsigned char testprops) {
+ return (testchar < 65536 &&
+ (_compCharProps[testchar >> 8][testchar & 255] & testprops) != 0);
+ }
+
+ /**
+ * Test for word character. Characters with certain unicode properties
+ * are recognized as word characters. In addition to this, all
+ * characters with the custom _FASTWordProp is regarded as a word
+ * character. The previous range in _privateUseProp is included
+ * in the _FASTWordProp set of ranges.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is a word character, i.e. if it has
+ * one or more of the properties alphabetic, ideographic,
+ * combining char, decimal digit char, private use, extender.
+ */
+ static bool isWordChar(ucs4_t testchar) {
+ return (testchar < 65536 &&
+ (_compCharProps[testchar >> 8][testchar & 255] &
+ _wordcharProp) != 0);
+ }
+
+ /**
+ * Test for ideographic character.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is an ideographic character,
+ * i.e. if it has the ideographic property.
+ */
+ static bool isIdeographicChar(ucs4_t testchar) {
+ return (testchar < 65536 &&
+ (_compCharProps[testchar >> 8][testchar & 255] &
+ _ideographicProp) != 0);
+ }
+
+ /**
+ * Test for private use character. Implemented to
+ * return true if character is in the range E000-F8FF,
+ * since this is the only range of characters with
+ * this property.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is a private use character,
+ * i.e. if it has the private use property.
+ */
+ static bool isPrivateUseChar(ucs4_t testchar) {
+ return (testchar >= 0xE000 && testchar <= 0xF8FF);
+ //return (testchar < 65536 &&
+ //(_compCharProps[testchar >> 8][testchar & 255] &
+ //(_privateUseProp)) != 0);
+ }
+
+ /**
+ * Test for ignorable character.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is an ignorable character,
+ * i.e. if it has the ignorable control char property.
+ */
+ static bool isIgnorableChar(ucs4_t testchar) {
+ return (testchar < 65536 &&
+ (_compCharProps[testchar >> 8][testchar & 255] &
+ _ignorableControlCharProp) != 0);
+ }
+
+ /**
+ * Test for identificator start character.
+ * InitTables should be called before using this test.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is an identificator start character.
+ */
+ static bool isIDStartChar(ucs4_t testchar)
+ {
+ return (testchar < 256 && _isidstartchar[testchar] != 0);
+ }
+
+ /**
+ * Test for identificator character.
+ * InitTables should be called before using this test.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is an identificator character.
+ */
+ static bool isIDChar(ucs4_t testchar)
+ {
+ return (testchar < 256 && _isidchar[testchar] != 0);
+ }
+
+ /**
+ * Test for digit character.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is a digit character,
+ * i.e. if it has the decimal digit char property.
+ */
+ static bool isDigit(ucs4_t testchar)
+ {
+ return (testchar < 65536 &&
+ (_compCharProps[testchar >> 8][testchar & 255] &
+ _decimalDigitCharProp) != 0);
+ }
+
+ /**
+ * Test for integer range expression character.
+ * InitTables should be called before using this test.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is an integer range expression character.
+ */
+ static bool isIntegerIndexOp(ucs4_t testchar)
+ {
+ return (testchar < 256 && _isintegerindexop[testchar] != 0);
+ }
+
+ /**
+ * Test for space character.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is a space character,
+ * i.e. if it has the space char property.
+ */
+ static bool isSpaceChar(ucs4_t testchar)
+ {
+ return (testchar < 65536 &&
+ (_compCharProps[testchar >> 8][testchar & 255] &
+ _spaceProp) != 0);
+ }
+
+ /**
+ * Test for uppercase character.
+ * @param testchar the UCS4 character to test.
+ * @return true if testchar is an uppercase character.
+ */
+ static bool isUpper(ucs4_t testchar)
+ {
+ if (testchar >= 65536)
+ return false;
+ ucs4_t ret = _compLowerCase[testchar >> 8][testchar & 255];
+ return (ret != 0 && ret != testchar);
+ }
+
+ /**
+ * Lowercase an UCS4 character.
+ * @param testchar The character to lowercase.
+ * @return The lowercase of the input, if defined. Else the input character.
+ */
+ static ucs4_t toLower(ucs4_t testchar)
+ {
+ ucs4_t ret;
+ if (testchar < 65536) {
+ ret = _compLowerCase[testchar >> 8][testchar & 255];
+ if (ret == 0)
+ return testchar;
+ return ret;
+ } else
+ return testchar;
+ }
+
+ /**
+ * Get the length of the UTF-8 representation of an UCS4 character.
+ * @param i The UCS4 character.
+ * @return The number of bytes required for the UTF-8 representation.
+ */
+ static size_t utf8clen(ucs4_t i) {
+ if (i < 128)
+ return 1;
+ else if (i < 0x800)
+ return 2;
+ else if (i < 0x10000)
+ return 3;
+ else if (i < 0x200000)
+ return 4;
+ else if (i < 0x4000000)
+ return 5;
+ else
+ return 6;
+ }
+
+ /**
+ * Get the length of the UTF8 character in number of bytes
+ * @param utf8char the first byte in a UTF8 character
+ * @return the number of bytes in the UTF8 character
+ */
+ static unsigned char getUTF8ByteLength(unsigned char utf8char) {
+ return _utf8header[utf8char];
+ }
+
+ /**
+ * Put an UCS4 character into a buffer as an UTF-8 representation.
+ * @param dst The destination buffer.
+ * @param i The UCS4 character.
+ * @return Pointer to the next position in dst after the putted byte(s).
+ */
+ static char *utf8cput(char *dst, ucs4_t i) {
+ if (i < 128)
+ *dst++ = i;
+ else if (i < 0x800) {
+ *dst++ = (i >> 6) | 0xc0;
+ *dst++ = (i & 63) | 0x80;
+ } else if (i < 0x10000) {
+ *dst++ = (i >> 12) | 0xe0;
+ *dst++ = ((i >> 6) & 63) | 0x80;
+ *dst++ = (i & 63) | 0x80;
+ } else if (i < 0x200000) {
+ *dst++ = (i >> 18) | 0xf0;
+ *dst++ = ((i >> 12) & 63) | 0x80;
+ *dst++ = ((i >> 6) & 63) | 0x80;
+ *dst++ = (i & 63) | 0x80;
+ } else if (i < 0x4000000) {
+ *dst++ = (i >> 24) | 0xf8;
+ *dst++ = ((i >> 18) & 63) | 0x80;
+ *dst++ = ((i >> 12) & 63) | 0x80;
+ *dst++ = ((i >> 6) & 63) | 0x80;
+ *dst++ = (i & 63) | 0x80;
+ } else {
+ *dst++ = (i >> 30) | 0xfc;
+ *dst++ = ((i >> 24) & 63) | 0x80;
+ *dst++ = ((i >> 18) & 63) | 0x80;
+ *dst++ = ((i >> 12) & 63) | 0x80;
+ *dst++ = ((i >> 6) & 63) | 0x80;
+ *dst++ = (i & 63) | 0x80;
+ }
+ return dst;
+ }
+
+
+ /**
+ * Convert UCS4 to UTF-8.
+ * @param dst The destination buffer for the UTF-8 string.
+ * @param src The source UCS4 string.
+ * @return A pointer to the destination.
+ */
+ static char *utf8copy(char *dst, const ucs4_t *src);
+
+ /**
+ * Convert UCS4 to UTF-8, bounded by max lengths.
+ * @param dst The destination buffer for the UTF-8 string.
+ * @param src The source UCS4 string.
+ * @param maxdst The maximum number of bytes to put into dst.
+ * @param maxsrc The maximum number of characters to convert from src.
+ * @return A pointer to the destination.
+ */
+ static char *utf8ncopy(char *dst, const ucs4_t *src, int maxdst, int maxsrc);
+
+ /**
+ * Compare an UTF-8 string to a UCS4 string, analogous to strcmp(3).
+ * @param s1 The UTF-8 string.
+ * @param s2 The UCS4 string.
+ * @return An integer less than, equal to, or greater than zero,
+ * if s1 is, respectively, less than, matching, or greater than s2.
+ */
+ static int utf8cmp(const char *s1, const ucs4_t *s2);
+
+ /**
+ * Compare an UTF-8 string to a UCS4 string, ignoring case.
+ * This is comparable to strcasecmp(3).
+ * @param s1 The UTF-8 string.
+ * @param s2 The UCS4 string.
+ * @return An integer less than, equal to, or greater than zero,
+ * if s1 is, respectively, less than, matching, or greater than s2.
+ */
+ static int utf8casecmp(const char *s1, const ucs4_t *s2);
+
+ /**
+ * Find the length, in bytes, of the UTF-8 representation of an UCS4 string.
+ * @param str The UCS4 string.
+ * @return The length, in bytes, of the equivalent UTF-8 representation.
+ */
+ static size_t utf8len(const ucs4_t *str);
+
+ /**
+ * Find the length, in bytes, of the UTF-8 representation of the first
+ * maxsrc characters of an UCS4 string.
+ * @param str The UCS4 string.
+ * @param maxsrc The maximum number of UCS4 characters to consider.
+ * @return The length, in bytes, of the equivalent UTF-8 representation.
+ */
+ static size_t utf8nlen(const ucs4_t *str, int maxsrc);
+
+ /**
+ * Find the number of characters in an UCS4 string.
+ * @param str The UCS4 string.
+ * @return The number of characters.
+ */
+ static size_t ucs4strlen(const ucs4_t *str);
+
+ /**
+ * Find the number of UCS4 characters in an UTF-8 string. I.e.
+ * how many UCS4 characters would be needed for the string.
+ * @param str The UTF-8 string.
+ * @return The number of characters needed.
+ */
+ static size_t ucs4len(const char *str);
+
+ /**
+ * Find the number of characters in an UTF-8 string, up to
+ * a maximum of bytes.
+ * @param str The UTF-8 string.
+ * @param n The max number of bytes to consider.
+ * @return The number of characters needed.
+ */
+ static size_t ucs4nlen(const char *str, size_t n);
+
+ /**
+ * Copy an UTF-8 string into an UCS4 string.
+ * @param dst The UCS4 destination buffer.
+ * @param src The UTF-8 source buffer.
+ * @return A pointer to the destination string.
+ */
+ static ucs4_t *ucs4copy(ucs4_t *dst, const char *src);
+
+ /**
+ * Copy an UTF-8 string into an UCS4 string, up to a maximum
+ * number of bytes from the UTF-8 string.
+ * @param dst Destination UCS4 string buffer.
+ * @param src Source UTF-8 string.
+ * @param maxsrc Max number of bytes to copy.
+ * @return Pointer to the destination buffer.
+ */
+ static ucs4_t *ucs4ncopy(ucs4_t *dst, const char *src, int maxsrc);
+
+ /**
+ * Copy an UTF-8 string to an UTF-8 string.
+ * This only copies the valid UTF-8 characters.
+ * @param src The source UTF-8 string.
+ * @return Pointer to a new allocated buffer with the result.
+ */
+ static char *strdupUTF8(const char *src);
+
+ /**
+ * Copy an UTF-8 string to an UTF-8 string, converting
+ * to lowercase as we go.
+ * @param src The source UTF-8 string.
+ * @return Pointer to a new allocated buffer with the result.
+ */
+ static char *strlowdupUTF8(const char *src);
+
+ /**
+ * Copy an ISO-8859-1 string to an UTF-8 string.
+ * @param src The source ISO-8859-1 string.
+ * @return Pointer to a new alloacted buffer with the UTF-8 result.
+ */
+ static char *strdupLAT1(const char *src);
+
+ /**
+ * Get the next UCS4 character from an UTF-8 string buffer.
+ * Modify the src pointer to allow future calls.
+ * @param src The address of a pointer to the current position
+ * in the UTF-8 string.
+ * @param length The maximum allowed length of the byte sequence.
+ * -1 means no check.
+ * @return The next UCS4 character, or _BadUTF8Char if the
+ * next character is invalid.
+ */
+ static ucs4_t getUTF8Char(unsigned const char *&src,
+ int length = -1);
+ static ucs4_t getUTF8Char(char *&src,
+ int length = -1)
+ {
+ unsigned const char *temp = reinterpret_cast<unsigned char*>(src);
+ ucs4_t res=getUTF8Char(temp,length);
+ src=reinterpret_cast<char*>(const_cast<unsigned char*>(temp));
+ return res;
+ }
+
+
+ /** Move forwards or backwards a number of characters within an UTF8 buffer
+ * Modify pos to yield new position if possible
+ * @param start A pointer to the start of the UTF8 buffer
+ * @param length The length of the UTF8 buffer
+ * @param pos A pointer to the current position within the UTF8 buffer,
+ * updated to reflect new position upon return
+ * @param offset An offset (+/-) in number of UTF8 characters.
+ * Offset 0 means move to the start of the current character.
+ * @return Number of bytes moved, or -1 if out of range
+ */
+ static int utf8move(unsigned const char* start, size_t length,
+ unsigned const char*& pos, off_t offset);
+};
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/unicode_charprops.cpp b/fsa/src/vespa/fsa/unicode_charprops.cpp
new file mode 100644
index 00000000000..3bcc45d4a2c
--- /dev/null
+++ b/fsa/src/vespa/fsa/unicode_charprops.cpp
@@ -0,0 +1,1688 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "unicode.h"
+
+namespace fsa {
+
+/*
+ * Bit 0 indicates white space character
+ * Bit 1 indicates Word character
+ * Bit 2 indicates ideographic character
+ * Bit 3 indicates decimal digit character
+ * Bit 4 indicates ignorable control character
+ */
+
+static unsigned char _intCompCharProps[11264]={
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x01, 0x01, 0x01, 0x01, 0x01, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x01, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x00, 0x02, 0x00, 0x00, 0x10, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x10,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x00, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x00, 0x02,
+ 0x00, 0x02, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x10, 0x00, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x02, 0x02, 0x02, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x02,
+ 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x02,
+ 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x00, 0x00, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x02,
+ 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02,
+ 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x00, 0x02, 0x00, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x00, 0x02, 0x00, 0x00, 0x02,
+ 0x02, 0x00, 0x02, 0x00, 0x00, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x02, 0x02, 0x00, 0x02, 0x00, 0x02,
+ 0x00, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x02,
+ 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12,
+ 0x12, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x10, 0x10, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x00, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x10, 0x10, 0x10, 0x01, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x10, 0x10, 0x10, 0x10, 0x10, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x06,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x06, 0x06, 0x06, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x12, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x00,
+ 0x02, 0x02, 0x00, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00, 0x10,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
+ 0x0A, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x22, 0x22,
+ 0x12, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x00, 0x00, 0x02, 0x02, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00,
+};
+
+const unsigned char *Unicode::_compCharProps[256]={
+ _intCompCharProps+0x0000, /* Page 0x00 */
+ _intCompCharProps+0x0100, /* Page 0x01 */
+ _intCompCharProps+0x0200, /* Page 0x02 */
+ _intCompCharProps+0x0300, /* Page 0x03 */
+ _intCompCharProps+0x0400, /* Page 0x04 */
+ _intCompCharProps+0x0500, /* Page 0x05 */
+ _intCompCharProps+0x0600, /* Page 0x06 */
+ _intCompCharProps+0x0700, /* Page 0x07 */
+ _intCompCharProps+0x0800, /* Page 0x08 */
+ _intCompCharProps+0x0900, /* Page 0x09 */
+ _intCompCharProps+0x0A00, /* Page 0x0A */
+ _intCompCharProps+0x0B00, /* Page 0x0B */
+ _intCompCharProps+0x0C00, /* Page 0x0C */
+ _intCompCharProps+0x0D00, /* Page 0x0D */
+ _intCompCharProps+0x0E00, /* Page 0x0E */
+ _intCompCharProps+0x0F00, /* Page 0x0F */
+ _intCompCharProps+0x1000, /* Page 0x10 */
+ _intCompCharProps+0x1100, /* Page 0x11 */
+ _intCompCharProps+0x1200, /* Page 0x12 */
+ _intCompCharProps+0x1300, /* Page 0x13 */
+ _intCompCharProps+0x1400, /* Page 0x14 */
+ _intCompCharProps+0x0100, /* Page 0x15 */
+ _intCompCharProps+0x1500, /* Page 0x16 */
+ _intCompCharProps+0x1600, /* Page 0x17 */
+ _intCompCharProps+0x1700, /* Page 0x18 */
+ _intCompCharProps+0x1800, /* Page 0x19 */
+ _intCompCharProps+0x0800, /* Page 0x1A */
+ _intCompCharProps+0x0800, /* Page 0x1B */
+ _intCompCharProps+0x0800, /* Page 0x1C */
+ _intCompCharProps+0x1900, /* Page 0x1D */
+ _intCompCharProps+0x1A00, /* Page 0x1E */
+ _intCompCharProps+0x1B00, /* Page 0x1F */
+ _intCompCharProps+0x1C00, /* Page 0x20 */
+ _intCompCharProps+0x1D00, /* Page 0x21 */
+ _intCompCharProps+0x0800, /* Page 0x22 */
+ _intCompCharProps+0x0800, /* Page 0x23 */
+ _intCompCharProps+0x0800, /* Page 0x24 */
+ _intCompCharProps+0x0800, /* Page 0x25 */
+ _intCompCharProps+0x0800, /* Page 0x26 */
+ _intCompCharProps+0x0800, /* Page 0x27 */
+ _intCompCharProps+0x0800, /* Page 0x28 */
+ _intCompCharProps+0x0800, /* Page 0x29 */
+ _intCompCharProps+0x0800, /* Page 0x2A */
+ _intCompCharProps+0x0800, /* Page 0x2B */
+ _intCompCharProps+0x0800, /* Page 0x2C */
+ _intCompCharProps+0x0800, /* Page 0x2D */
+ _intCompCharProps+0x0800, /* Page 0x2E */
+ _intCompCharProps+0x0800, /* Page 0x2F */
+ _intCompCharProps+0x1E00, /* Page 0x30 */
+ _intCompCharProps+0x1F00, /* Page 0x31 */
+ _intCompCharProps+0x2000, /* Page 0x32 */
+ _intCompCharProps+0x0800, /* Page 0x33 */
+ _intCompCharProps+0x2100, /* Page 0x34 */
+ _intCompCharProps+0x2100, /* Page 0x35 */
+ _intCompCharProps+0x2100, /* Page 0x36 */
+ _intCompCharProps+0x2100, /* Page 0x37 */
+ _intCompCharProps+0x2100, /* Page 0x38 */
+ _intCompCharProps+0x2100, /* Page 0x39 */
+ _intCompCharProps+0x2100, /* Page 0x3A */
+ _intCompCharProps+0x2100, /* Page 0x3B */
+ _intCompCharProps+0x2100, /* Page 0x3C */
+ _intCompCharProps+0x2100, /* Page 0x3D */
+ _intCompCharProps+0x2100, /* Page 0x3E */
+ _intCompCharProps+0x2100, /* Page 0x3F */
+ _intCompCharProps+0x2100, /* Page 0x40 */
+ _intCompCharProps+0x2100, /* Page 0x41 */
+ _intCompCharProps+0x2100, /* Page 0x42 */
+ _intCompCharProps+0x2100, /* Page 0x43 */
+ _intCompCharProps+0x2100, /* Page 0x44 */
+ _intCompCharProps+0x2100, /* Page 0x45 */
+ _intCompCharProps+0x2100, /* Page 0x46 */
+ _intCompCharProps+0x2100, /* Page 0x47 */
+ _intCompCharProps+0x2100, /* Page 0x48 */
+ _intCompCharProps+0x2100, /* Page 0x49 */
+ _intCompCharProps+0x2100, /* Page 0x4A */
+ _intCompCharProps+0x2100, /* Page 0x4B */
+ _intCompCharProps+0x2100, /* Page 0x4C */
+ _intCompCharProps+0x2200, /* Page 0x4D */
+ _intCompCharProps+0x2100, /* Page 0x4E */
+ _intCompCharProps+0x2100, /* Page 0x4F */
+ _intCompCharProps+0x2100, /* Page 0x50 */
+ _intCompCharProps+0x2100, /* Page 0x51 */
+ _intCompCharProps+0x2100, /* Page 0x52 */
+ _intCompCharProps+0x2100, /* Page 0x53 */
+ _intCompCharProps+0x2100, /* Page 0x54 */
+ _intCompCharProps+0x2100, /* Page 0x55 */
+ _intCompCharProps+0x2100, /* Page 0x56 */
+ _intCompCharProps+0x2100, /* Page 0x57 */
+ _intCompCharProps+0x2100, /* Page 0x58 */
+ _intCompCharProps+0x2100, /* Page 0x59 */
+ _intCompCharProps+0x2100, /* Page 0x5A */
+ _intCompCharProps+0x2100, /* Page 0x5B */
+ _intCompCharProps+0x2100, /* Page 0x5C */
+ _intCompCharProps+0x2100, /* Page 0x5D */
+ _intCompCharProps+0x2100, /* Page 0x5E */
+ _intCompCharProps+0x2100, /* Page 0x5F */
+ _intCompCharProps+0x2100, /* Page 0x60 */
+ _intCompCharProps+0x2100, /* Page 0x61 */
+ _intCompCharProps+0x2100, /* Page 0x62 */
+ _intCompCharProps+0x2100, /* Page 0x63 */
+ _intCompCharProps+0x2100, /* Page 0x64 */
+ _intCompCharProps+0x2100, /* Page 0x65 */
+ _intCompCharProps+0x2100, /* Page 0x66 */
+ _intCompCharProps+0x2100, /* Page 0x67 */
+ _intCompCharProps+0x2100, /* Page 0x68 */
+ _intCompCharProps+0x2100, /* Page 0x69 */
+ _intCompCharProps+0x2100, /* Page 0x6A */
+ _intCompCharProps+0x2100, /* Page 0x6B */
+ _intCompCharProps+0x2100, /* Page 0x6C */
+ _intCompCharProps+0x2100, /* Page 0x6D */
+ _intCompCharProps+0x2100, /* Page 0x6E */
+ _intCompCharProps+0x2100, /* Page 0x6F */
+ _intCompCharProps+0x2100, /* Page 0x70 */
+ _intCompCharProps+0x2100, /* Page 0x71 */
+ _intCompCharProps+0x2100, /* Page 0x72 */
+ _intCompCharProps+0x2100, /* Page 0x73 */
+ _intCompCharProps+0x2100, /* Page 0x74 */
+ _intCompCharProps+0x2100, /* Page 0x75 */
+ _intCompCharProps+0x2100, /* Page 0x76 */
+ _intCompCharProps+0x2100, /* Page 0x77 */
+ _intCompCharProps+0x2100, /* Page 0x78 */
+ _intCompCharProps+0x2100, /* Page 0x79 */
+ _intCompCharProps+0x2100, /* Page 0x7A */
+ _intCompCharProps+0x2100, /* Page 0x7B */
+ _intCompCharProps+0x2100, /* Page 0x7C */
+ _intCompCharProps+0x2100, /* Page 0x7D */
+ _intCompCharProps+0x2100, /* Page 0x7E */
+ _intCompCharProps+0x2100, /* Page 0x7F */
+ _intCompCharProps+0x2100, /* Page 0x80 */
+ _intCompCharProps+0x2100, /* Page 0x81 */
+ _intCompCharProps+0x2100, /* Page 0x82 */
+ _intCompCharProps+0x2100, /* Page 0x83 */
+ _intCompCharProps+0x2100, /* Page 0x84 */
+ _intCompCharProps+0x2100, /* Page 0x85 */
+ _intCompCharProps+0x2100, /* Page 0x86 */
+ _intCompCharProps+0x2100, /* Page 0x87 */
+ _intCompCharProps+0x2100, /* Page 0x88 */
+ _intCompCharProps+0x2100, /* Page 0x89 */
+ _intCompCharProps+0x2100, /* Page 0x8A */
+ _intCompCharProps+0x2100, /* Page 0x8B */
+ _intCompCharProps+0x2100, /* Page 0x8C */
+ _intCompCharProps+0x2100, /* Page 0x8D */
+ _intCompCharProps+0x2100, /* Page 0x8E */
+ _intCompCharProps+0x2100, /* Page 0x8F */
+ _intCompCharProps+0x2100, /* Page 0x90 */
+ _intCompCharProps+0x2100, /* Page 0x91 */
+ _intCompCharProps+0x2100, /* Page 0x92 */
+ _intCompCharProps+0x2100, /* Page 0x93 */
+ _intCompCharProps+0x2100, /* Page 0x94 */
+ _intCompCharProps+0x2100, /* Page 0x95 */
+ _intCompCharProps+0x2100, /* Page 0x96 */
+ _intCompCharProps+0x2100, /* Page 0x97 */
+ _intCompCharProps+0x2100, /* Page 0x98 */
+ _intCompCharProps+0x2100, /* Page 0x99 */
+ _intCompCharProps+0x2100, /* Page 0x9A */
+ _intCompCharProps+0x2100, /* Page 0x9B */
+ _intCompCharProps+0x2100, /* Page 0x9C */
+ _intCompCharProps+0x2100, /* Page 0x9D */
+ _intCompCharProps+0x2100, /* Page 0x9E */
+ _intCompCharProps+0x2300, /* Page 0x9F */
+ _intCompCharProps+0x0100, /* Page 0xA0 */
+ _intCompCharProps+0x0100, /* Page 0xA1 */
+ _intCompCharProps+0x0100, /* Page 0xA2 */
+ _intCompCharProps+0x0100, /* Page 0xA3 */
+ _intCompCharProps+0x2400, /* Page 0xA4 */
+ _intCompCharProps+0x0800, /* Page 0xA5 */
+ _intCompCharProps+0x0800, /* Page 0xA6 */
+ _intCompCharProps+0x0800, /* Page 0xA7 */
+ _intCompCharProps+0x0800, /* Page 0xA8 */
+ _intCompCharProps+0x0800, /* Page 0xA9 */
+ _intCompCharProps+0x0800, /* Page 0xAA */
+ _intCompCharProps+0x0800, /* Page 0xAB */
+ _intCompCharProps+0x0100, /* Page 0xAC */
+ _intCompCharProps+0x0100, /* Page 0xAD */
+ _intCompCharProps+0x0100, /* Page 0xAE */
+ _intCompCharProps+0x0100, /* Page 0xAF */
+ _intCompCharProps+0x0100, /* Page 0xB0 */
+ _intCompCharProps+0x0100, /* Page 0xB1 */
+ _intCompCharProps+0x0100, /* Page 0xB2 */
+ _intCompCharProps+0x0100, /* Page 0xB3 */
+ _intCompCharProps+0x0100, /* Page 0xB4 */
+ _intCompCharProps+0x0100, /* Page 0xB5 */
+ _intCompCharProps+0x0100, /* Page 0xB6 */
+ _intCompCharProps+0x0100, /* Page 0xB7 */
+ _intCompCharProps+0x0100, /* Page 0xB8 */
+ _intCompCharProps+0x0100, /* Page 0xB9 */
+ _intCompCharProps+0x0100, /* Page 0xBA */
+ _intCompCharProps+0x0100, /* Page 0xBB */
+ _intCompCharProps+0x0100, /* Page 0xBC */
+ _intCompCharProps+0x0100, /* Page 0xBD */
+ _intCompCharProps+0x0100, /* Page 0xBE */
+ _intCompCharProps+0x0100, /* Page 0xBF */
+ _intCompCharProps+0x0100, /* Page 0xC0 */
+ _intCompCharProps+0x0100, /* Page 0xC1 */
+ _intCompCharProps+0x0100, /* Page 0xC2 */
+ _intCompCharProps+0x0100, /* Page 0xC3 */
+ _intCompCharProps+0x0100, /* Page 0xC4 */
+ _intCompCharProps+0x0100, /* Page 0xC5 */
+ _intCompCharProps+0x0100, /* Page 0xC6 */
+ _intCompCharProps+0x0100, /* Page 0xC7 */
+ _intCompCharProps+0x0100, /* Page 0xC8 */
+ _intCompCharProps+0x0100, /* Page 0xC9 */
+ _intCompCharProps+0x0100, /* Page 0xCA */
+ _intCompCharProps+0x0100, /* Page 0xCB */
+ _intCompCharProps+0x0100, /* Page 0xCC */
+ _intCompCharProps+0x0100, /* Page 0xCD */
+ _intCompCharProps+0x0100, /* Page 0xCE */
+ _intCompCharProps+0x0100, /* Page 0xCF */
+ _intCompCharProps+0x0100, /* Page 0xD0 */
+ _intCompCharProps+0x0100, /* Page 0xD1 */
+ _intCompCharProps+0x0100, /* Page 0xD2 */
+ _intCompCharProps+0x0100, /* Page 0xD3 */
+ _intCompCharProps+0x0100, /* Page 0xD4 */
+ _intCompCharProps+0x0100, /* Page 0xD5 */
+ _intCompCharProps+0x0100, /* Page 0xD6 */
+ _intCompCharProps+0x2500, /* Page 0xD7 */
+ _intCompCharProps+0x2600, /* Page 0xD8 */
+ _intCompCharProps+0x2600, /* Page 0xD9 */
+ _intCompCharProps+0x2600, /* Page 0xDA */
+ _intCompCharProps+0x2600, /* Page 0xDB */
+ _intCompCharProps+0x2600, /* Page 0xDC */
+ _intCompCharProps+0x2600, /* Page 0xDD */
+ _intCompCharProps+0x2600, /* Page 0xDE */
+ _intCompCharProps+0x2600, /* Page 0xDF */
+ _intCompCharProps+0x0100, /* Page 0xE0 */
+ _intCompCharProps+0x0100, /* Page 0xE1 */
+ _intCompCharProps+0x0100, /* Page 0xE2 */
+ _intCompCharProps+0x0100, /* Page 0xE3 */
+ _intCompCharProps+0x0100, /* Page 0xE4 */
+ _intCompCharProps+0x0100, /* Page 0xE5 */
+ _intCompCharProps+0x0100, /* Page 0xE6 */
+ _intCompCharProps+0x0100, /* Page 0xE7 */
+ _intCompCharProps+0x0100, /* Page 0xE8 */
+ _intCompCharProps+0x0100, /* Page 0xE9 */
+ _intCompCharProps+0x0100, /* Page 0xEA */
+ _intCompCharProps+0x0100, /* Page 0xEB */
+ _intCompCharProps+0x0100, /* Page 0xEC */
+ _intCompCharProps+0x0100, /* Page 0xED */
+ _intCompCharProps+0x0100, /* Page 0xEE */
+ _intCompCharProps+0x0100, /* Page 0xEF */
+ _intCompCharProps+0x0100, /* Page 0xF0 */
+ _intCompCharProps+0x0100, /* Page 0xF1 */
+ _intCompCharProps+0x0100, /* Page 0xF2 */
+ _intCompCharProps+0x0100, /* Page 0xF3 */
+ _intCompCharProps+0x0100, /* Page 0xF4 */
+ _intCompCharProps+0x0100, /* Page 0xF5 */
+ _intCompCharProps+0x0100, /* Page 0xF6 */
+ _intCompCharProps+0x0100, /* Page 0xF7 */
+ _intCompCharProps+0x0100, /* Page 0xF8 */
+ _intCompCharProps+0x2100, /* Page 0xF9 */
+ _intCompCharProps+0x2700, /* Page 0xFA */
+ _intCompCharProps+0x2800, /* Page 0xFB */
+ _intCompCharProps+0x0100, /* Page 0xFC */
+ _intCompCharProps+0x2900, /* Page 0xFD */
+ _intCompCharProps+0x2A00, /* Page 0xFE */
+ _intCompCharProps+0x2B00 /* Page 0xFF */
+};
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/unicode_lowercase.cpp b/fsa/src/vespa/fsa/unicode_lowercase.cpp
new file mode 100644
index 00000000000..e69368c6ef3
--- /dev/null
+++ b/fsa/src/vespa/fsa/unicode_lowercase.cpp
@@ -0,0 +1,656 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "unicode.h"
+
+namespace fsa {
+
+static unsigned short _intCompLowerCase[3072]={
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0061u, 0x0062u, 0x0063u, 0x0064u, 0x0065u, 0x0066u, 0x0067u,
+ 0x0068u, 0x0069u, 0x006Au, 0x006Bu, 0x006Cu, 0x006Du, 0x006Eu, 0x006Fu,
+ 0x0070u, 0x0071u, 0x0072u, 0x0073u, 0x0074u, 0x0075u, 0x0076u, 0x0077u,
+ 0x0078u, 0x0079u, 0x007Au, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x00E0u, 0x00E1u, 0x00E2u, 0x00E3u, 0x00E4u, 0x00E5u, 0x00E6u, 0x00E7u,
+ 0x00E8u, 0x00E9u, 0x00EAu, 0x00EBu, 0x00ECu, 0x00EDu, 0x00EEu, 0x00EFu,
+ 0x00F0u, 0x00F1u, 0x00F2u, 0x00F3u, 0x00F4u, 0x00F5u, 0x00F6u, 0x0000u,
+ 0x00F8u, 0x00F9u, 0x00FAu, 0x00FBu, 0x00FCu, 0x00FDu, 0x00FEu, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0101u, 0x0000u, 0x0103u, 0x0000u, 0x0105u, 0x0000u, 0x0107u, 0x0000u,
+ 0x0109u, 0x0000u, 0x010Bu, 0x0000u, 0x010Du, 0x0000u, 0x010Fu, 0x0000u,
+ 0x0111u, 0x0000u, 0x0113u, 0x0000u, 0x0115u, 0x0000u, 0x0117u, 0x0000u,
+ 0x0119u, 0x0000u, 0x011Bu, 0x0000u, 0x011Du, 0x0000u, 0x011Fu, 0x0000u,
+ 0x0121u, 0x0000u, 0x0123u, 0x0000u, 0x0125u, 0x0000u, 0x0127u, 0x0000u,
+ 0x0129u, 0x0000u, 0x012Bu, 0x0000u, 0x012Du, 0x0000u, 0x012Fu, 0x0000u,
+ 0x0069u, 0x0000u, 0x0133u, 0x0000u, 0x0135u, 0x0000u, 0x0137u, 0x0000u,
+ 0x0000u, 0x013Au, 0x0000u, 0x013Cu, 0x0000u, 0x013Eu, 0x0000u, 0x0140u,
+ 0x0000u, 0x0142u, 0x0000u, 0x0144u, 0x0000u, 0x0146u, 0x0000u, 0x0148u,
+ 0x0000u, 0x0000u, 0x014Bu, 0x0000u, 0x014Du, 0x0000u, 0x014Fu, 0x0000u,
+ 0x0151u, 0x0000u, 0x0153u, 0x0000u, 0x0155u, 0x0000u, 0x0157u, 0x0000u,
+ 0x0159u, 0x0000u, 0x015Bu, 0x0000u, 0x015Du, 0x0000u, 0x015Fu, 0x0000u,
+ 0x0161u, 0x0000u, 0x0163u, 0x0000u, 0x0165u, 0x0000u, 0x0167u, 0x0000u,
+ 0x0169u, 0x0000u, 0x016Bu, 0x0000u, 0x016Du, 0x0000u, 0x016Fu, 0x0000u,
+ 0x0171u, 0x0000u, 0x0173u, 0x0000u, 0x0175u, 0x0000u, 0x0177u, 0x0000u,
+ 0x00FFu, 0x017Au, 0x0000u, 0x017Cu, 0x0000u, 0x017Eu, 0x0000u, 0x0000u,
+ 0x0000u, 0x0253u, 0x0183u, 0x0000u, 0x0185u, 0x0000u, 0x0254u, 0x0188u,
+ 0x0000u, 0x0256u, 0x0257u, 0x018Cu, 0x0000u, 0x0000u, 0x01DDu, 0x0259u,
+ 0x025Bu, 0x0192u, 0x0000u, 0x0260u, 0x0263u, 0x0000u, 0x0269u, 0x0268u,
+ 0x0199u, 0x0000u, 0x0000u, 0x0000u, 0x026Fu, 0x0272u, 0x0000u, 0x0275u,
+ 0x01A1u, 0x0000u, 0x01A3u, 0x0000u, 0x01A5u, 0x0000u, 0x0280u, 0x01A8u,
+ 0x0000u, 0x0283u, 0x0000u, 0x0000u, 0x01ADu, 0x0000u, 0x0288u, 0x01B0u,
+ 0x0000u, 0x028Au, 0x028Bu, 0x01B4u, 0x0000u, 0x01B6u, 0x0000u, 0x0292u,
+ 0x01B9u, 0x0000u, 0x0000u, 0x0000u, 0x01BDu, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x01C6u, 0x01C6u, 0x0000u, 0x01C9u,
+ 0x01C9u, 0x0000u, 0x01CCu, 0x01CCu, 0x0000u, 0x01CEu, 0x0000u, 0x01D0u,
+ 0x0000u, 0x01D2u, 0x0000u, 0x01D4u, 0x0000u, 0x01D6u, 0x0000u, 0x01D8u,
+ 0x0000u, 0x01DAu, 0x0000u, 0x01DCu, 0x0000u, 0x0000u, 0x01DFu, 0x0000u,
+ 0x01E1u, 0x0000u, 0x01E3u, 0x0000u, 0x01E5u, 0x0000u, 0x01E7u, 0x0000u,
+ 0x01E9u, 0x0000u, 0x01EBu, 0x0000u, 0x01EDu, 0x0000u, 0x01EFu, 0x0000u,
+ 0x0000u, 0x01F3u, 0x01F3u, 0x0000u, 0x01F5u, 0x0000u, 0x0195u, 0x01BFu,
+ 0x01F9u, 0x0000u, 0x01FBu, 0x0000u, 0x01FDu, 0x0000u, 0x01FFu, 0x0000u,
+ 0x0201u, 0x0000u, 0x0203u, 0x0000u, 0x0205u, 0x0000u, 0x0207u, 0x0000u,
+ 0x0209u, 0x0000u, 0x020Bu, 0x0000u, 0x020Du, 0x0000u, 0x020Fu, 0x0000u,
+ 0x0211u, 0x0000u, 0x0213u, 0x0000u, 0x0215u, 0x0000u, 0x0217u, 0x0000u,
+ 0x0219u, 0x0000u, 0x021Bu, 0x0000u, 0x021Du, 0x0000u, 0x021Fu, 0x0000u,
+ 0x019Eu, 0x0000u, 0x0223u, 0x0000u, 0x0225u, 0x0000u, 0x0227u, 0x0000u,
+ 0x0229u, 0x0000u, 0x022Bu, 0x0000u, 0x022Du, 0x0000u, 0x022Fu, 0x0000u,
+ 0x0231u, 0x0000u, 0x0233u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x03ACu, 0x0000u,
+ 0x03ADu, 0x03AEu, 0x03AFu, 0x0000u, 0x03CCu, 0x0000u, 0x03CDu, 0x03CEu,
+ 0x0000u, 0x03B1u, 0x03B2u, 0x03B3u, 0x03B4u, 0x03B5u, 0x03B6u, 0x03B7u,
+ 0x03B8u, 0x03B9u, 0x03BAu, 0x03BBu, 0x03BCu, 0x03BDu, 0x03BEu, 0x03BFu,
+ 0x03C0u, 0x03C1u, 0x0000u, 0x03C3u, 0x03C4u, 0x03C5u, 0x03C6u, 0x03C7u,
+ 0x03C8u, 0x03C9u, 0x03CAu, 0x03CBu, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x03D9u, 0x0000u, 0x03DBu, 0x0000u, 0x03DDu, 0x0000u, 0x03DFu, 0x0000u,
+ 0x03E1u, 0x0000u, 0x03E3u, 0x0000u, 0x03E5u, 0x0000u, 0x03E7u, 0x0000u,
+ 0x03E9u, 0x0000u, 0x03EBu, 0x0000u, 0x03EDu, 0x0000u, 0x03EFu, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x03B8u, 0x0000u, 0x0000u, 0x03F8u,
+ 0x0000u, 0x03F2u, 0x03FBu, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0450u, 0x0451u, 0x0452u, 0x0453u, 0x0454u, 0x0455u, 0x0456u, 0x0457u,
+ 0x0458u, 0x0459u, 0x045Au, 0x045Bu, 0x045Cu, 0x045Du, 0x045Eu, 0x045Fu,
+ 0x0430u, 0x0431u, 0x0432u, 0x0433u, 0x0434u, 0x0435u, 0x0436u, 0x0437u,
+ 0x0438u, 0x0439u, 0x043Au, 0x043Bu, 0x043Cu, 0x043Du, 0x043Eu, 0x043Fu,
+ 0x0440u, 0x0441u, 0x0442u, 0x0443u, 0x0444u, 0x0445u, 0x0446u, 0x0447u,
+ 0x0448u, 0x0449u, 0x044Au, 0x044Bu, 0x044Cu, 0x044Du, 0x044Eu, 0x044Fu,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0461u, 0x0000u, 0x0463u, 0x0000u, 0x0465u, 0x0000u, 0x0467u, 0x0000u,
+ 0x0469u, 0x0000u, 0x046Bu, 0x0000u, 0x046Du, 0x0000u, 0x046Fu, 0x0000u,
+ 0x0471u, 0x0000u, 0x0473u, 0x0000u, 0x0475u, 0x0000u, 0x0477u, 0x0000u,
+ 0x0479u, 0x0000u, 0x047Bu, 0x0000u, 0x047Du, 0x0000u, 0x047Fu, 0x0000u,
+ 0x0481u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x048Bu, 0x0000u, 0x048Du, 0x0000u, 0x048Fu, 0x0000u,
+ 0x0491u, 0x0000u, 0x0493u, 0x0000u, 0x0495u, 0x0000u, 0x0497u, 0x0000u,
+ 0x0499u, 0x0000u, 0x049Bu, 0x0000u, 0x049Du, 0x0000u, 0x049Fu, 0x0000u,
+ 0x04A1u, 0x0000u, 0x04A3u, 0x0000u, 0x04A5u, 0x0000u, 0x04A7u, 0x0000u,
+ 0x04A9u, 0x0000u, 0x04ABu, 0x0000u, 0x04ADu, 0x0000u, 0x04AFu, 0x0000u,
+ 0x04B1u, 0x0000u, 0x04B3u, 0x0000u, 0x04B5u, 0x0000u, 0x04B7u, 0x0000u,
+ 0x04B9u, 0x0000u, 0x04BBu, 0x0000u, 0x04BDu, 0x0000u, 0x04BFu, 0x0000u,
+ 0x0000u, 0x04C2u, 0x0000u, 0x04C4u, 0x0000u, 0x04C6u, 0x0000u, 0x04C8u,
+ 0x0000u, 0x04CAu, 0x0000u, 0x04CCu, 0x0000u, 0x04CEu, 0x0000u, 0x0000u,
+ 0x04D1u, 0x0000u, 0x04D3u, 0x0000u, 0x04D5u, 0x0000u, 0x04D7u, 0x0000u,
+ 0x04D9u, 0x0000u, 0x04DBu, 0x0000u, 0x04DDu, 0x0000u, 0x04DFu, 0x0000u,
+ 0x04E1u, 0x0000u, 0x04E3u, 0x0000u, 0x04E5u, 0x0000u, 0x04E7u, 0x0000u,
+ 0x04E9u, 0x0000u, 0x04EBu, 0x0000u, 0x04EDu, 0x0000u, 0x04EFu, 0x0000u,
+ 0x04F1u, 0x0000u, 0x04F3u, 0x0000u, 0x04F5u, 0x0000u, 0x0000u, 0x0000u,
+ 0x04F9u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0501u, 0x0000u, 0x0503u, 0x0000u, 0x0505u, 0x0000u, 0x0507u, 0x0000u,
+ 0x0509u, 0x0000u, 0x050Bu, 0x0000u, 0x050Du, 0x0000u, 0x050Fu, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0561u, 0x0562u, 0x0563u, 0x0564u, 0x0565u, 0x0566u, 0x0567u,
+ 0x0568u, 0x0569u, 0x056Au, 0x056Bu, 0x056Cu, 0x056Du, 0x056Eu, 0x056Fu,
+ 0x0570u, 0x0571u, 0x0572u, 0x0573u, 0x0574u, 0x0575u, 0x0576u, 0x0577u,
+ 0x0578u, 0x0579u, 0x057Au, 0x057Bu, 0x057Cu, 0x057Du, 0x057Eu, 0x057Fu,
+ 0x0580u, 0x0581u, 0x0582u, 0x0583u, 0x0584u, 0x0585u, 0x0586u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1E01u, 0x0000u, 0x1E03u, 0x0000u, 0x1E05u, 0x0000u, 0x1E07u, 0x0000u,
+ 0x1E09u, 0x0000u, 0x1E0Bu, 0x0000u, 0x1E0Du, 0x0000u, 0x1E0Fu, 0x0000u,
+ 0x1E11u, 0x0000u, 0x1E13u, 0x0000u, 0x1E15u, 0x0000u, 0x1E17u, 0x0000u,
+ 0x1E19u, 0x0000u, 0x1E1Bu, 0x0000u, 0x1E1Du, 0x0000u, 0x1E1Fu, 0x0000u,
+ 0x1E21u, 0x0000u, 0x1E23u, 0x0000u, 0x1E25u, 0x0000u, 0x1E27u, 0x0000u,
+ 0x1E29u, 0x0000u, 0x1E2Bu, 0x0000u, 0x1E2Du, 0x0000u, 0x1E2Fu, 0x0000u,
+ 0x1E31u, 0x0000u, 0x1E33u, 0x0000u, 0x1E35u, 0x0000u, 0x1E37u, 0x0000u,
+ 0x1E39u, 0x0000u, 0x1E3Bu, 0x0000u, 0x1E3Du, 0x0000u, 0x1E3Fu, 0x0000u,
+ 0x1E41u, 0x0000u, 0x1E43u, 0x0000u, 0x1E45u, 0x0000u, 0x1E47u, 0x0000u,
+ 0x1E49u, 0x0000u, 0x1E4Bu, 0x0000u, 0x1E4Du, 0x0000u, 0x1E4Fu, 0x0000u,
+ 0x1E51u, 0x0000u, 0x1E53u, 0x0000u, 0x1E55u, 0x0000u, 0x1E57u, 0x0000u,
+ 0x1E59u, 0x0000u, 0x1E5Bu, 0x0000u, 0x1E5Du, 0x0000u, 0x1E5Fu, 0x0000u,
+ 0x1E61u, 0x0000u, 0x1E63u, 0x0000u, 0x1E65u, 0x0000u, 0x1E67u, 0x0000u,
+ 0x1E69u, 0x0000u, 0x1E6Bu, 0x0000u, 0x1E6Du, 0x0000u, 0x1E6Fu, 0x0000u,
+ 0x1E71u, 0x0000u, 0x1E73u, 0x0000u, 0x1E75u, 0x0000u, 0x1E77u, 0x0000u,
+ 0x1E79u, 0x0000u, 0x1E7Bu, 0x0000u, 0x1E7Du, 0x0000u, 0x1E7Fu, 0x0000u,
+ 0x1E81u, 0x0000u, 0x1E83u, 0x0000u, 0x1E85u, 0x0000u, 0x1E87u, 0x0000u,
+ 0x1E89u, 0x0000u, 0x1E8Bu, 0x0000u, 0x1E8Du, 0x0000u, 0x1E8Fu, 0x0000u,
+ 0x1E91u, 0x0000u, 0x1E93u, 0x0000u, 0x1E95u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1EA1u, 0x0000u, 0x1EA3u, 0x0000u, 0x1EA5u, 0x0000u, 0x1EA7u, 0x0000u,
+ 0x1EA9u, 0x0000u, 0x1EABu, 0x0000u, 0x1EADu, 0x0000u, 0x1EAFu, 0x0000u,
+ 0x1EB1u, 0x0000u, 0x1EB3u, 0x0000u, 0x1EB5u, 0x0000u, 0x1EB7u, 0x0000u,
+ 0x1EB9u, 0x0000u, 0x1EBBu, 0x0000u, 0x1EBDu, 0x0000u, 0x1EBFu, 0x0000u,
+ 0x1EC1u, 0x0000u, 0x1EC3u, 0x0000u, 0x1EC5u, 0x0000u, 0x1EC7u, 0x0000u,
+ 0x1EC9u, 0x0000u, 0x1ECBu, 0x0000u, 0x1ECDu, 0x0000u, 0x1ECFu, 0x0000u,
+ 0x1ED1u, 0x0000u, 0x1ED3u, 0x0000u, 0x1ED5u, 0x0000u, 0x1ED7u, 0x0000u,
+ 0x1ED9u, 0x0000u, 0x1EDBu, 0x0000u, 0x1EDDu, 0x0000u, 0x1EDFu, 0x0000u,
+ 0x1EE1u, 0x0000u, 0x1EE3u, 0x0000u, 0x1EE5u, 0x0000u, 0x1EE7u, 0x0000u,
+ 0x1EE9u, 0x0000u, 0x1EEBu, 0x0000u, 0x1EEDu, 0x0000u, 0x1EEFu, 0x0000u,
+ 0x1EF1u, 0x0000u, 0x1EF3u, 0x0000u, 0x1EF5u, 0x0000u, 0x1EF7u, 0x0000u,
+ 0x1EF9u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F00u, 0x1F01u, 0x1F02u, 0x1F03u, 0x1F04u, 0x1F05u, 0x1F06u, 0x1F07u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F10u, 0x1F11u, 0x1F12u, 0x1F13u, 0x1F14u, 0x1F15u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F20u, 0x1F21u, 0x1F22u, 0x1F23u, 0x1F24u, 0x1F25u, 0x1F26u, 0x1F27u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F30u, 0x1F31u, 0x1F32u, 0x1F33u, 0x1F34u, 0x1F35u, 0x1F36u, 0x1F37u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F40u, 0x1F41u, 0x1F42u, 0x1F43u, 0x1F44u, 0x1F45u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x1F51u, 0x0000u, 0x1F53u, 0x0000u, 0x1F55u, 0x0000u, 0x1F57u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F60u, 0x1F61u, 0x1F62u, 0x1F63u, 0x1F64u, 0x1F65u, 0x1F66u, 0x1F67u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F80u, 0x1F81u, 0x1F82u, 0x1F83u, 0x1F84u, 0x1F85u, 0x1F86u, 0x1F87u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F90u, 0x1F91u, 0x1F92u, 0x1F93u, 0x1F94u, 0x1F95u, 0x1F96u, 0x1F97u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1FA0u, 0x1FA1u, 0x1FA2u, 0x1FA3u, 0x1FA4u, 0x1FA5u, 0x1FA6u, 0x1FA7u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1FB0u, 0x1FB1u, 0x1F70u, 0x1F71u, 0x1FB3u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F72u, 0x1F73u, 0x1F74u, 0x1F75u, 0x1FC3u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1FD0u, 0x1FD1u, 0x1F76u, 0x1F77u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1FE0u, 0x1FE1u, 0x1F7Au, 0x1F7Bu, 0x1FE5u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x1F78u, 0x1F79u, 0x1F7Cu, 0x1F7Du, 0x1FF3u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x03C9u, 0x0000u,
+ 0x0000u, 0x0000u, 0x006Bu, 0x00E5u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x2170u, 0x2171u, 0x2172u, 0x2173u, 0x2174u, 0x2175u, 0x2176u, 0x2177u,
+ 0x2178u, 0x2179u, 0x217Au, 0x217Bu, 0x217Cu, 0x217Du, 0x217Eu, 0x217Fu,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x24D0u, 0x24D1u,
+ 0x24D2u, 0x24D3u, 0x24D4u, 0x24D5u, 0x24D6u, 0x24D7u, 0x24D8u, 0x24D9u,
+ 0x24DAu, 0x24DBu, 0x24DCu, 0x24DDu, 0x24DEu, 0x24DFu, 0x24E0u, 0x24E1u,
+ 0x24E2u, 0x24E3u, 0x24E4u, 0x24E5u, 0x24E6u, 0x24E7u, 0x24E8u, 0x24E9u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0xFF41u, 0xFF42u, 0xFF43u, 0xFF44u, 0xFF45u, 0xFF46u, 0xFF47u,
+ 0xFF48u, 0xFF49u, 0xFF4Au, 0xFF4Bu, 0xFF4Cu, 0xFF4Du, 0xFF4Eu, 0xFF4Fu,
+ 0xFF50u, 0xFF51u, 0xFF52u, 0xFF53u, 0xFF54u, 0xFF55u, 0xFF56u, 0xFF57u,
+ 0xFF58u, 0xFF59u, 0xFF5Au, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+ 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u, 0x0000u,
+};
+
+const unsigned short *Unicode::_compLowerCase[256]={
+ _intCompLowerCase+0x0000, /* Page 0x00 */
+ _intCompLowerCase+0x0100, /* Page 0x01 */
+ _intCompLowerCase+0x0200, /* Page 0x02 */
+ _intCompLowerCase+0x0300, /* Page 0x03 */
+ _intCompLowerCase+0x0400, /* Page 0x04 */
+ _intCompLowerCase+0x0500, /* Page 0x05 */
+ _intCompLowerCase+0x0600, /* Page 0x06 */
+ _intCompLowerCase+0x0600, /* Page 0x07 */
+ _intCompLowerCase+0x0600, /* Page 0x08 */
+ _intCompLowerCase+0x0600, /* Page 0x09 */
+ _intCompLowerCase+0x0600, /* Page 0x0A */
+ _intCompLowerCase+0x0600, /* Page 0x0B */
+ _intCompLowerCase+0x0600, /* Page 0x0C */
+ _intCompLowerCase+0x0600, /* Page 0x0D */
+ _intCompLowerCase+0x0600, /* Page 0x0E */
+ _intCompLowerCase+0x0600, /* Page 0x0F */
+ _intCompLowerCase+0x0600, /* Page 0x10 */
+ _intCompLowerCase+0x0600, /* Page 0x11 */
+ _intCompLowerCase+0x0600, /* Page 0x12 */
+ _intCompLowerCase+0x0600, /* Page 0x13 */
+ _intCompLowerCase+0x0600, /* Page 0x14 */
+ _intCompLowerCase+0x0600, /* Page 0x15 */
+ _intCompLowerCase+0x0600, /* Page 0x16 */
+ _intCompLowerCase+0x0600, /* Page 0x17 */
+ _intCompLowerCase+0x0600, /* Page 0x18 */
+ _intCompLowerCase+0x0600, /* Page 0x19 */
+ _intCompLowerCase+0x0600, /* Page 0x1A */
+ _intCompLowerCase+0x0600, /* Page 0x1B */
+ _intCompLowerCase+0x0600, /* Page 0x1C */
+ _intCompLowerCase+0x0600, /* Page 0x1D */
+ _intCompLowerCase+0x0700, /* Page 0x1E */
+ _intCompLowerCase+0x0800, /* Page 0x1F */
+ _intCompLowerCase+0x0600, /* Page 0x20 */
+ _intCompLowerCase+0x0900, /* Page 0x21 */
+ _intCompLowerCase+0x0600, /* Page 0x22 */
+ _intCompLowerCase+0x0600, /* Page 0x23 */
+ _intCompLowerCase+0x0A00, /* Page 0x24 */
+ _intCompLowerCase+0x0600, /* Page 0x25 */
+ _intCompLowerCase+0x0600, /* Page 0x26 */
+ _intCompLowerCase+0x0600, /* Page 0x27 */
+ _intCompLowerCase+0x0600, /* Page 0x28 */
+ _intCompLowerCase+0x0600, /* Page 0x29 */
+ _intCompLowerCase+0x0600, /* Page 0x2A */
+ _intCompLowerCase+0x0600, /* Page 0x2B */
+ _intCompLowerCase+0x0600, /* Page 0x2C */
+ _intCompLowerCase+0x0600, /* Page 0x2D */
+ _intCompLowerCase+0x0600, /* Page 0x2E */
+ _intCompLowerCase+0x0600, /* Page 0x2F */
+ _intCompLowerCase+0x0600, /* Page 0x30 */
+ _intCompLowerCase+0x0600, /* Page 0x31 */
+ _intCompLowerCase+0x0600, /* Page 0x32 */
+ _intCompLowerCase+0x0600, /* Page 0x33 */
+ _intCompLowerCase+0x0600, /* Page 0x34 */
+ _intCompLowerCase+0x0600, /* Page 0x35 */
+ _intCompLowerCase+0x0600, /* Page 0x36 */
+ _intCompLowerCase+0x0600, /* Page 0x37 */
+ _intCompLowerCase+0x0600, /* Page 0x38 */
+ _intCompLowerCase+0x0600, /* Page 0x39 */
+ _intCompLowerCase+0x0600, /* Page 0x3A */
+ _intCompLowerCase+0x0600, /* Page 0x3B */
+ _intCompLowerCase+0x0600, /* Page 0x3C */
+ _intCompLowerCase+0x0600, /* Page 0x3D */
+ _intCompLowerCase+0x0600, /* Page 0x3E */
+ _intCompLowerCase+0x0600, /* Page 0x3F */
+ _intCompLowerCase+0x0600, /* Page 0x40 */
+ _intCompLowerCase+0x0600, /* Page 0x41 */
+ _intCompLowerCase+0x0600, /* Page 0x42 */
+ _intCompLowerCase+0x0600, /* Page 0x43 */
+ _intCompLowerCase+0x0600, /* Page 0x44 */
+ _intCompLowerCase+0x0600, /* Page 0x45 */
+ _intCompLowerCase+0x0600, /* Page 0x46 */
+ _intCompLowerCase+0x0600, /* Page 0x47 */
+ _intCompLowerCase+0x0600, /* Page 0x48 */
+ _intCompLowerCase+0x0600, /* Page 0x49 */
+ _intCompLowerCase+0x0600, /* Page 0x4A */
+ _intCompLowerCase+0x0600, /* Page 0x4B */
+ _intCompLowerCase+0x0600, /* Page 0x4C */
+ _intCompLowerCase+0x0600, /* Page 0x4D */
+ _intCompLowerCase+0x0600, /* Page 0x4E */
+ _intCompLowerCase+0x0600, /* Page 0x4F */
+ _intCompLowerCase+0x0600, /* Page 0x50 */
+ _intCompLowerCase+0x0600, /* Page 0x51 */
+ _intCompLowerCase+0x0600, /* Page 0x52 */
+ _intCompLowerCase+0x0600, /* Page 0x53 */
+ _intCompLowerCase+0x0600, /* Page 0x54 */
+ _intCompLowerCase+0x0600, /* Page 0x55 */
+ _intCompLowerCase+0x0600, /* Page 0x56 */
+ _intCompLowerCase+0x0600, /* Page 0x57 */
+ _intCompLowerCase+0x0600, /* Page 0x58 */
+ _intCompLowerCase+0x0600, /* Page 0x59 */
+ _intCompLowerCase+0x0600, /* Page 0x5A */
+ _intCompLowerCase+0x0600, /* Page 0x5B */
+ _intCompLowerCase+0x0600, /* Page 0x5C */
+ _intCompLowerCase+0x0600, /* Page 0x5D */
+ _intCompLowerCase+0x0600, /* Page 0x5E */
+ _intCompLowerCase+0x0600, /* Page 0x5F */
+ _intCompLowerCase+0x0600, /* Page 0x60 */
+ _intCompLowerCase+0x0600, /* Page 0x61 */
+ _intCompLowerCase+0x0600, /* Page 0x62 */
+ _intCompLowerCase+0x0600, /* Page 0x63 */
+ _intCompLowerCase+0x0600, /* Page 0x64 */
+ _intCompLowerCase+0x0600, /* Page 0x65 */
+ _intCompLowerCase+0x0600, /* Page 0x66 */
+ _intCompLowerCase+0x0600, /* Page 0x67 */
+ _intCompLowerCase+0x0600, /* Page 0x68 */
+ _intCompLowerCase+0x0600, /* Page 0x69 */
+ _intCompLowerCase+0x0600, /* Page 0x6A */
+ _intCompLowerCase+0x0600, /* Page 0x6B */
+ _intCompLowerCase+0x0600, /* Page 0x6C */
+ _intCompLowerCase+0x0600, /* Page 0x6D */
+ _intCompLowerCase+0x0600, /* Page 0x6E */
+ _intCompLowerCase+0x0600, /* Page 0x6F */
+ _intCompLowerCase+0x0600, /* Page 0x70 */
+ _intCompLowerCase+0x0600, /* Page 0x71 */
+ _intCompLowerCase+0x0600, /* Page 0x72 */
+ _intCompLowerCase+0x0600, /* Page 0x73 */
+ _intCompLowerCase+0x0600, /* Page 0x74 */
+ _intCompLowerCase+0x0600, /* Page 0x75 */
+ _intCompLowerCase+0x0600, /* Page 0x76 */
+ _intCompLowerCase+0x0600, /* Page 0x77 */
+ _intCompLowerCase+0x0600, /* Page 0x78 */
+ _intCompLowerCase+0x0600, /* Page 0x79 */
+ _intCompLowerCase+0x0600, /* Page 0x7A */
+ _intCompLowerCase+0x0600, /* Page 0x7B */
+ _intCompLowerCase+0x0600, /* Page 0x7C */
+ _intCompLowerCase+0x0600, /* Page 0x7D */
+ _intCompLowerCase+0x0600, /* Page 0x7E */
+ _intCompLowerCase+0x0600, /* Page 0x7F */
+ _intCompLowerCase+0x0600, /* Page 0x80 */
+ _intCompLowerCase+0x0600, /* Page 0x81 */
+ _intCompLowerCase+0x0600, /* Page 0x82 */
+ _intCompLowerCase+0x0600, /* Page 0x83 */
+ _intCompLowerCase+0x0600, /* Page 0x84 */
+ _intCompLowerCase+0x0600, /* Page 0x85 */
+ _intCompLowerCase+0x0600, /* Page 0x86 */
+ _intCompLowerCase+0x0600, /* Page 0x87 */
+ _intCompLowerCase+0x0600, /* Page 0x88 */
+ _intCompLowerCase+0x0600, /* Page 0x89 */
+ _intCompLowerCase+0x0600, /* Page 0x8A */
+ _intCompLowerCase+0x0600, /* Page 0x8B */
+ _intCompLowerCase+0x0600, /* Page 0x8C */
+ _intCompLowerCase+0x0600, /* Page 0x8D */
+ _intCompLowerCase+0x0600, /* Page 0x8E */
+ _intCompLowerCase+0x0600, /* Page 0x8F */
+ _intCompLowerCase+0x0600, /* Page 0x90 */
+ _intCompLowerCase+0x0600, /* Page 0x91 */
+ _intCompLowerCase+0x0600, /* Page 0x92 */
+ _intCompLowerCase+0x0600, /* Page 0x93 */
+ _intCompLowerCase+0x0600, /* Page 0x94 */
+ _intCompLowerCase+0x0600, /* Page 0x95 */
+ _intCompLowerCase+0x0600, /* Page 0x96 */
+ _intCompLowerCase+0x0600, /* Page 0x97 */
+ _intCompLowerCase+0x0600, /* Page 0x98 */
+ _intCompLowerCase+0x0600, /* Page 0x99 */
+ _intCompLowerCase+0x0600, /* Page 0x9A */
+ _intCompLowerCase+0x0600, /* Page 0x9B */
+ _intCompLowerCase+0x0600, /* Page 0x9C */
+ _intCompLowerCase+0x0600, /* Page 0x9D */
+ _intCompLowerCase+0x0600, /* Page 0x9E */
+ _intCompLowerCase+0x0600, /* Page 0x9F */
+ _intCompLowerCase+0x0600, /* Page 0xA0 */
+ _intCompLowerCase+0x0600, /* Page 0xA1 */
+ _intCompLowerCase+0x0600, /* Page 0xA2 */
+ _intCompLowerCase+0x0600, /* Page 0xA3 */
+ _intCompLowerCase+0x0600, /* Page 0xA4 */
+ _intCompLowerCase+0x0600, /* Page 0xA5 */
+ _intCompLowerCase+0x0600, /* Page 0xA6 */
+ _intCompLowerCase+0x0600, /* Page 0xA7 */
+ _intCompLowerCase+0x0600, /* Page 0xA8 */
+ _intCompLowerCase+0x0600, /* Page 0xA9 */
+ _intCompLowerCase+0x0600, /* Page 0xAA */
+ _intCompLowerCase+0x0600, /* Page 0xAB */
+ _intCompLowerCase+0x0600, /* Page 0xAC */
+ _intCompLowerCase+0x0600, /* Page 0xAD */
+ _intCompLowerCase+0x0600, /* Page 0xAE */
+ _intCompLowerCase+0x0600, /* Page 0xAF */
+ _intCompLowerCase+0x0600, /* Page 0xB0 */
+ _intCompLowerCase+0x0600, /* Page 0xB1 */
+ _intCompLowerCase+0x0600, /* Page 0xB2 */
+ _intCompLowerCase+0x0600, /* Page 0xB3 */
+ _intCompLowerCase+0x0600, /* Page 0xB4 */
+ _intCompLowerCase+0x0600, /* Page 0xB5 */
+ _intCompLowerCase+0x0600, /* Page 0xB6 */
+ _intCompLowerCase+0x0600, /* Page 0xB7 */
+ _intCompLowerCase+0x0600, /* Page 0xB8 */
+ _intCompLowerCase+0x0600, /* Page 0xB9 */
+ _intCompLowerCase+0x0600, /* Page 0xBA */
+ _intCompLowerCase+0x0600, /* Page 0xBB */
+ _intCompLowerCase+0x0600, /* Page 0xBC */
+ _intCompLowerCase+0x0600, /* Page 0xBD */
+ _intCompLowerCase+0x0600, /* Page 0xBE */
+ _intCompLowerCase+0x0600, /* Page 0xBF */
+ _intCompLowerCase+0x0600, /* Page 0xC0 */
+ _intCompLowerCase+0x0600, /* Page 0xC1 */
+ _intCompLowerCase+0x0600, /* Page 0xC2 */
+ _intCompLowerCase+0x0600, /* Page 0xC3 */
+ _intCompLowerCase+0x0600, /* Page 0xC4 */
+ _intCompLowerCase+0x0600, /* Page 0xC5 */
+ _intCompLowerCase+0x0600, /* Page 0xC6 */
+ _intCompLowerCase+0x0600, /* Page 0xC7 */
+ _intCompLowerCase+0x0600, /* Page 0xC8 */
+ _intCompLowerCase+0x0600, /* Page 0xC9 */
+ _intCompLowerCase+0x0600, /* Page 0xCA */
+ _intCompLowerCase+0x0600, /* Page 0xCB */
+ _intCompLowerCase+0x0600, /* Page 0xCC */
+ _intCompLowerCase+0x0600, /* Page 0xCD */
+ _intCompLowerCase+0x0600, /* Page 0xCE */
+ _intCompLowerCase+0x0600, /* Page 0xCF */
+ _intCompLowerCase+0x0600, /* Page 0xD0 */
+ _intCompLowerCase+0x0600, /* Page 0xD1 */
+ _intCompLowerCase+0x0600, /* Page 0xD2 */
+ _intCompLowerCase+0x0600, /* Page 0xD3 */
+ _intCompLowerCase+0x0600, /* Page 0xD4 */
+ _intCompLowerCase+0x0600, /* Page 0xD5 */
+ _intCompLowerCase+0x0600, /* Page 0xD6 */
+ _intCompLowerCase+0x0600, /* Page 0xD7 */
+ _intCompLowerCase+0x0600, /* Page 0xD8 */
+ _intCompLowerCase+0x0600, /* Page 0xD9 */
+ _intCompLowerCase+0x0600, /* Page 0xDA */
+ _intCompLowerCase+0x0600, /* Page 0xDB */
+ _intCompLowerCase+0x0600, /* Page 0xDC */
+ _intCompLowerCase+0x0600, /* Page 0xDD */
+ _intCompLowerCase+0x0600, /* Page 0xDE */
+ _intCompLowerCase+0x0600, /* Page 0xDF */
+ _intCompLowerCase+0x0600, /* Page 0xE0 */
+ _intCompLowerCase+0x0600, /* Page 0xE1 */
+ _intCompLowerCase+0x0600, /* Page 0xE2 */
+ _intCompLowerCase+0x0600, /* Page 0xE3 */
+ _intCompLowerCase+0x0600, /* Page 0xE4 */
+ _intCompLowerCase+0x0600, /* Page 0xE5 */
+ _intCompLowerCase+0x0600, /* Page 0xE6 */
+ _intCompLowerCase+0x0600, /* Page 0xE7 */
+ _intCompLowerCase+0x0600, /* Page 0xE8 */
+ _intCompLowerCase+0x0600, /* Page 0xE9 */
+ _intCompLowerCase+0x0600, /* Page 0xEA */
+ _intCompLowerCase+0x0600, /* Page 0xEB */
+ _intCompLowerCase+0x0600, /* Page 0xEC */
+ _intCompLowerCase+0x0600, /* Page 0xED */
+ _intCompLowerCase+0x0600, /* Page 0xEE */
+ _intCompLowerCase+0x0600, /* Page 0xEF */
+ _intCompLowerCase+0x0600, /* Page 0xF0 */
+ _intCompLowerCase+0x0600, /* Page 0xF1 */
+ _intCompLowerCase+0x0600, /* Page 0xF2 */
+ _intCompLowerCase+0x0600, /* Page 0xF3 */
+ _intCompLowerCase+0x0600, /* Page 0xF4 */
+ _intCompLowerCase+0x0600, /* Page 0xF5 */
+ _intCompLowerCase+0x0600, /* Page 0xF6 */
+ _intCompLowerCase+0x0600, /* Page 0xF7 */
+ _intCompLowerCase+0x0600, /* Page 0xF8 */
+ _intCompLowerCase+0x0600, /* Page 0xF9 */
+ _intCompLowerCase+0x0600, /* Page 0xFA */
+ _intCompLowerCase+0x0600, /* Page 0xFB */
+ _intCompLowerCase+0x0600, /* Page 0xFC */
+ _intCompLowerCase+0x0600, /* Page 0xFD */
+ _intCompLowerCase+0x0600, /* Page 0xFE */
+ _intCompLowerCase+0x0B00 /* Page 0xFF */
+};
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/unicode_tables.cpp b/fsa/src/vespa/fsa/unicode_tables.cpp
new file mode 100644
index 00000000000..d20255f29c5
--- /dev/null
+++ b/fsa/src/vespa/fsa/unicode_tables.cpp
@@ -0,0 +1,162 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "unicode.h"
+
+namespace fsa {
+
+const unsigned char Unicode::_isdigit[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+const unsigned char Unicode::_isintegerindexop[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+const unsigned char Unicode::_iswordchar[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ };
+
+const unsigned char Unicode::_isidstartchar[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+const unsigned char Unicode::_isidchar[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+const unsigned char Unicode::_isspacechar[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+const unsigned char Unicode::_tolower[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+ };
+
+const unsigned char Unicode::_utf8header[256] = {
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x00, 0x00,
+ };
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/vectorizer.cpp b/fsa/src/vespa/fsa/vectorizer.cpp
new file mode 100644
index 00000000000..54c67fdc800
--- /dev/null
+++ b/fsa/src/vespa/fsa/vectorizer.cpp
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizer.cpp
+ * @brief Simple document vectorizer based on %FSA (%Finite %State %Automaton) (implementation)
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <list>
+#include <algorithm>
+
+#include <math.h>
+
+#include "vectorizer.h"
+#include "fsa.h"
+
+
+namespace fsa {
+
+// {{{ Vectorizer::TfIdf::weight
+
+double Vectorizer::TfIdf::weight(unsigned int tfnorm, unsigned int idfnorm,
+ double tfexp, double idfexp) const
+{
+ double tf_n, idf_n;
+
+ if(tfnorm==0 || tfexp==0.0){
+ tf_n = 1.0;
+ }
+ else{
+ tf_n = (double)_tf/tfnorm;
+ if(tfexp!=1.0 && tf_n!=0.0){
+ tf_n = exp(tfexp*log(tf_n));
+ }
+ }
+
+ if(idfnorm==0 || idfexp==0.0){
+ idf_n = 1.0;
+ }
+ else{
+ idf_n = 1.0-(double)_idf/idfnorm;
+ if(idf_n<0.0)
+ idf_n = 0.0;
+ if(idfexp!=1.0 && idf_n!=0.0){
+ idf_n = exp(idfexp*log(idf_n));
+ }
+ }
+
+ return tf_n * idf_n;
+}
+
+// }}}
+
+// {{{ Vectorizer::vectorize
+
+void Vectorizer::vectorize(const NGram &text, TermVector &vector, unsigned int limit,
+ bool keephits, double tfexp, double idfexp) const
+{
+ RawVector raw_vect(keephits);
+ RawVector::iterator rvi;
+
+ _detector.detect(text,raw_vect);
+ vector.clear();
+ unsigned int tfmax=1;
+ for(rvi=raw_vect.begin(); rvi!=raw_vect.end(); ++rvi){
+ if(rvi->second.first.tf()>tfmax)
+ tfmax=rvi->second.first.tf();
+ }
+ vector.reserve(raw_vect.size());
+ for(rvi=raw_vect.begin(); rvi!=raw_vect.end(); ++rvi){
+ vector.push_back(VectorItem(rvi->first,rvi->second.first.weight(tfmax,_idf_docs,tfexp,idfexp),rvi->second.second));
+ }
+ std::sort(vector.begin(),vector.end());
+ if(vector.size()>limit){
+ vector.resize(limit);
+ }
+}
+
+void Vectorizer::vectorize(const NGram &text, TermVector &vector, unsigned int limit,
+ double tfexp, double idfexp) const
+{
+ vectorize(text, vector, limit, false, tfexp, idfexp);
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/vectorizer.h b/fsa/src/vespa/fsa/vectorizer.h
new file mode 100644
index 00000000000..9e8856191da
--- /dev/null
+++ b/fsa/src/vespa/fsa/vectorizer.h
@@ -0,0 +1,642 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file vectorizer.h
+ * @brief Simple document vectorizer based on %FSA (%Finite %State %Automaton)
+ */
+
+#pragma once
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include "fsa.h"
+#include "detector.h"
+
+namespace fsa {
+
+// {{{ Vectorizer
+
+/**
+ * @class Vectorizer
+ * @brief Simple document vectorizer based on %FSA.
+ */
+class Vectorizer {
+
+public:
+
+ // {{{ Vectorizer::VectorItem
+
+ /**
+ * @class VectorItem
+ * @brief Document vector item.
+ *
+ * Document vector item. Contains a term/phrase and an assigned
+ * weight, and provides comparison operators for sorting.
+ */
+ class VectorItem {
+ public:
+ typedef std::pair<unsigned int /*position*/, int /*length*/> Hit;
+ typedef std::vector<Hit> Hits;
+ private:
+ std::string _term; /**< Term/phrase. */
+ double _weight; /**< Term weight. */
+ Hits _hits; /**< The token positions at which the term was found */
+ public:
+ /**
+ * @brief Default constructor, creates empty item with zero weight.
+ */
+ VectorItem() : _term(), _weight(0.0), _hits() {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param v VectorItem to copy.
+ */
+ VectorItem(const VectorItem &v) : _term(v._term), _weight(v._weight), _hits(v._hits) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates a vector item from a string and a weight.
+ *
+ * @param t Term/phrase.
+ * @param w Weight.
+ */
+ VectorItem(const std::string t, double w) : _term(t), _weight(w), _hits() {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Creates a vector item from a string and a weight.
+ *
+ * @param t Term/phrase.
+ * @param w Weight.
+ */
+ VectorItem(const std::string t, double w, const Hits &h) : _term(t), _weight(w), _hits(h) {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~VectorItem() {}
+
+ /**
+ * @brief Assignment operator.
+ *
+ * @param v VectorItem.
+ * @return Reference to (this) VectorItem.
+ */
+ const VectorItem& operator=(const VectorItem& v)
+ {
+ _term = v._term;
+ _weight = v._weight;
+ _hits = v._hits;
+ return *this;
+ }
+
+ /**
+ * @brief Less-than operator.
+ *
+ * The order is highest weight first, than sorted alphabetically.
+ *
+ * @param v Other vector item.
+ * @return True is this item<other item.
+ */
+ bool operator<(const VectorItem & v) const
+ {
+ if(_weight>v._weight) return true;
+ if(_weight<v._weight) return false;
+ if(_term<v._term) return true;
+ return false;
+ }
+
+ /**
+ * @brief Greater-than operator.
+ *
+ * The order is highest weight first, than sorted alphabetically.
+ *
+ * @param v Other vector item.
+ * @return True is this item>other item.
+ */
+ bool operator>(const VectorItem & v) const
+ {
+ if(_weight<v._weight) return true;
+ if(_weight>v._weight) return false;
+ if(_term>v._term) return true;
+ return false;
+ }
+
+ /**
+ * @brief Equals operator.
+ *
+ * Two VectorItems equal if both the terms and weight are equal.
+ *
+ * @param v Other vector item.
+ * @return True is this item==other item.
+ */
+ bool operator==(const VectorItem & v) const
+ {
+ if(_weight==v._weight && _term==v._term) return true;
+ return false;
+ }
+
+ /**
+ * @brief Get the term/phrase.
+ *
+ * @return (Copy of) term/phrase.
+ */
+ std::string term() const { return _term; }
+
+ /**
+ * @brief An obsolete alias for term().
+ *
+ * @return (Copy of) term/phrase.
+ */
+ std::string getTerm() const { return _term; }
+
+ /**
+ * @brief Get the weight.
+ *
+ * @return Weight.
+ */
+ double weight() const { return _weight; }
+
+ /**
+ * @brief An obsolete alias for weight().
+ *
+ * @return Weight.
+ */
+ double getWeight() const { return _weight; }
+
+ /**
+ * @brief Get the hits.
+ *
+ * @return A reference to the hits vector.
+ */
+ const Hits &hits() const { return _hits; }
+
+ };
+
+ // }}}
+
+ // {{{ Vectorizer::TfIdf
+
+ /**
+ * @class TfIdf
+ * @brief Class for computing TfIdf weights.
+ *
+ * Class for computing TfIdf (term frequency/inverse document
+ * frequency) weights.
+ */
+ class TfIdf {
+ private:
+ unsigned int _tf; /**< Term frequency. */
+ unsigned int _idf; /**< (Inverse) document frequency. */
+ public:
+ /**
+ * @brief Default constructor.
+ */
+ TfIdf() : _tf(0), _idf(0) {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param ti TfIdf object to copy.
+ */
+ TfIdf(const TfIdf &ti) : _tf(ti._tf), _idf(ti._idf) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * @param t Term frequency.
+ * @param i (Inverse) document frequency.
+ */
+ TfIdf(unsigned int t, unsigned int i) : _tf(t), _idf(i) {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~TfIdf() {}
+
+ /**
+ * @brief Assignment operator.
+ *
+ * @param ti Reference to TfIdf object.
+ * @return Reference to (this) TfIdf object.
+ */
+ const TfIdf& operator=(const TfIdf& ti)
+ {
+ _tf = ti._tf;
+ _idf = ti._idf;
+ return *this;
+ }
+
+ /**
+ * @brief Assignment operator, set only Tf.
+ *
+ * @param t Term frequency.
+ * @return Reference to (this) TfIdf object.
+ */
+ const TfIdf& operator=(unsigned int t)
+ {
+ _tf = t;
+ return *this;
+ }
+
+ /**
+ * @brief Prefix increment operator.
+ *
+ * Prefix increment operator, increments Tf.
+ *
+ * @return Reference to (this) TfIdf object.
+ */
+ TfIdf& operator++()
+ {
+ ++_tf;
+ return *this;
+ }
+
+ /**
+ * @brief += operator.
+ *
+ * += operator, adds the parameter to Tf.
+ *
+ * @return Reference to (this) TfIdf object.
+ */
+ const TfIdf& operator+=(unsigned int t)
+ {
+ _tf+=t;
+ return *this;
+ }
+
+ /**
+ * @brief Get Tf value.
+ *
+ * @return Tf (term frequency) value.
+ */
+ unsigned int tf() const { return _tf; }
+
+ /**
+ * @brief An obsolete alias for tf().
+ *
+ * @return Tf (term frequency) value.
+ */
+ unsigned int getTf() const { return _tf; }
+
+ /**
+ * @brief Get Idf value.
+ *
+ * @return Idf ((inverse) document frequency) value.
+ */
+ unsigned int idf() const { return _idf; }
+
+ /**
+ * @brief An obsolete alias for idf().
+ *
+ * @return Idf ((inverse) document frequency) value.
+ */
+ unsigned int getIdf() const { return _idf; }
+
+ /**
+ * @brief Compute the weight from the Tf and Idf values.
+ *
+ * @param tfnorm Normalize Tf (divide by tfnorm).
+ * @param idfnorm Normalize Idf (divide by idfnorm).
+ * @param tfexp Tf exponent.
+ * @param idfexp Idf exponent.
+ * @return Weight based on Tf and Idf values.
+ */
+ double weight(unsigned int tfnorm=1, unsigned int idfnorm=1,
+ double tfexp=1.0, double idfexp=1.0) const;
+
+ /**
+ * @brief An obsolete alias for weight().
+ *
+ * @param tfnorm Normalize Tf (divide by tfnorm).
+ * @param idfnorm Normalize Idf (divide by idfnorm).
+ * @param tfexp Tf exponent.
+ * @param idfexp Idf exponent.
+ * @return Weight based on Tf and Idf values.
+ */
+ double getWeight(unsigned int tfnorm=1, unsigned int idfnorm=1,
+ double tfexp=1.0, double idfexp=1.0) const
+ {
+ return weight(tfnorm,idfnorm,tfexp,idfexp);
+ }
+
+ };
+
+ // }}}
+
+ /**
+ * @brief Term vector type.
+ */
+ typedef std::vector<VectorItem> TermVector;
+
+
+private:
+
+ // {{{ Vectorizer::RawVector
+
+ /**
+ * @class RawVector
+ * @brief Class for building a raw document vector.
+ *
+ * The RawVector class is a subclass of Detector::Hits, so it can be
+ * used directly with a Detector. The recognized terms and phrases
+ * will be collected and counted (->term frequency). Idf counts are
+ * obtained from the automaton the first time the term is
+ * encountered.
+ */
+ class RawVector : public Detector::Hits {
+
+ public:
+
+ typedef std::map<std::string, std::pair<TfIdf, VectorItem::Hits> > ItemMap;
+
+ // {{{ Vectorizer::RawVector::iterator
+
+ /**
+ * @class iterator
+ * @brief Iterator for the RawVector class.
+ *
+ * This class is actually a wrapper around an
+ * std::map<std::string,TfIdf>::iterator.
+ */
+ class iterator {
+ friend class RawVector;
+ private:
+
+ /**
+ * @brief The real (std::map<>) iterator.
+ */
+ ItemMap::iterator _mi;
+
+ /**
+ * @brief Constructor.
+ *
+ * @param mi A real (std::map<>) iterator.
+ */
+ iterator(ItemMap::iterator mi) : _mi(mi) {}
+
+ public:
+
+ /**
+ * @brief Default constructor.
+ */
+ iterator() : _mi() {}
+
+ /**
+ * @brief Copy constructor.
+ *
+ * @param it Reference to a Vectorizer::RawVector::iterator
+ * object.
+ */
+ iterator(const iterator &it) : _mi(it._mi) {}
+
+ /**
+ * @brief Constructor.
+ *
+ * Initialize the iterator to the beginning of a RawVector
+ * object.
+ *
+ * @param rv Reference to a Vectorizer::RawVector object, the
+ * iterator will be initalized to rv.begin().
+ */
+ iterator(RawVector &rv) : _mi(rv._item_map.begin()) { }
+
+ /**
+ * @brief Assignment operator.
+ *
+ * @param it Reference to another iterator.
+ * @return Reference to this iterator.
+ */
+ iterator& operator=(const iterator &it) { _mi=it._mi; return *this; }
+
+ /**
+ * @brief Not equals operator.
+ *
+ * @param it Reference to another iterator.
+ * @return True if the two iterators point to different elements.
+ */
+ bool operator!=(const iterator &it) const { return _mi!=it._mi; }
+
+ /**
+ * @brief Prefix increment operator.
+ *
+ * @return Reference to the (incremented) iterator.
+ */
+ iterator& operator++() { ++_mi; return *this; }
+
+ /**
+ * @brief Dereference operator
+ *
+ * @return Reference to the actual pair the iterator refers to.
+ */
+ ItemMap::value_type& operator*() { return _mi.operator*(); }
+
+ /**
+ * @brief Dereference operator
+ *
+ * @return Pointer to the actual pair the iterator refers to.
+ */
+ ItemMap::value_type* operator->() { return _mi.operator->(); }
+ };
+
+ // }}}
+
+#if (__GNUG__<3 || (__GNUG__ == 3 && __GNUC_MINOR__ < 1))
+ friend RawVector::iterator;
+#endif
+
+ private:
+
+ /**
+ * @brief Flag for controlling whether or not the detector will
+ * save hit position information.
+ */
+ bool _save_positions;
+
+ /**
+ * @brief The map holding the detected terms/phrases.
+ */
+ ItemMap _item_map;
+
+ public:
+
+ /**
+ * @brief Default constructor.
+ */
+ RawVector(bool save_positions = false) : _save_positions(save_positions), _item_map() {}
+
+ /**
+ * @brief Destructor.
+ */
+ ~RawVector() {}
+
+ /**
+ * @brief Clear all data structures.
+ */
+ void clear() { _item_map.clear(); }
+
+ /**
+ * @brief Register a term or phrase.
+ *
+ * This method will be called by the detector for each term or
+ * recognized.
+ *
+ * @param text Input document (tokenized).
+ * @param from Index of first token of the phrase.
+ * @param length Length of the phrase.
+ * @param state Reference to the final state of the automaton
+ * after recognition of the phrase.
+ */
+ void add(const NGram &text,
+ unsigned int from, int length,
+ const FSA::State &state)
+ {
+ ItemMap::iterator pos;
+ std::string str = text.join(" ",from,length);
+ pos=_item_map.find(str);
+ if(pos==_item_map.end()){
+ pos=_item_map.insert(
+ ItemMap::value_type(
+ str,
+ std::pair<TfIdf,VectorItem::Hits>(
+ TfIdf(1,state.nData()),
+ VectorItem::Hits()
+ )
+ )
+ ).first;
+ }
+ else {
+ ++(pos->second.first);
+ }
+ if(_save_positions){
+ pos->second.second.push_back(VectorItem::Hit(from,length));
+ }
+ }
+
+ /**
+ * @brief Get the size of the vector.
+ *
+ * @return Size of the vector (number of items).
+ */
+ unsigned int size() const { return _item_map.size(); }
+
+ /**
+ * @brief Get an iterator to the beginning of the vector.
+ *
+ * @return Iterator pointing to the first item of the vector.
+ */
+ iterator begin() { return iterator(_item_map.begin()); }
+
+ /**
+ * @brief Get an iterator to the end of the vector.
+ *
+ * @return Iterator pointing beyond the last item of the vector.
+ */
+ iterator end() { return iterator(_item_map.end()); }
+
+ };
+
+ // }}}
+
+ const FSA& _dictionary; /**< The dictionary. */
+ Detector _detector; /**< The detector. */
+ unsigned int _idf_docs; /**< Total number of documents (for Idf calculations) */
+
+ /**
+ * @brief Retrieve total number of documents from the automaton.
+ *
+ * Retrieve total number of documents from the automaton. For the
+ * Idf calculations to work properly, the total number of documents
+ * needs to be stored in the automaton. This is done via a special
+ * term, '#IDFDOCS', with a numerical meta info which equals the
+ * total number of documents.
+ */
+ void initIdfCount()
+ {
+ _idf_docs=0;
+ FSA::State s(_dictionary);
+ if(s.start("#IDFDOCS"))
+ _idf_docs = s.nData();
+
+ if(!_idf_docs)
+ ++_idf_docs;
+ }
+
+public:
+
+ /**
+ * @brief Constructor.
+ *
+ * Initialize the dictionary and the detector from an FSA.
+ *
+ * @param dict FSA
+ */
+ Vectorizer(const FSA& dict) :
+ _dictionary(dict),
+ _detector(_dictionary),
+ _idf_docs(0)
+ {
+ initIdfCount();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Initialize the dictionary and the detector from an FSA.
+ *
+ * @param dict FSA
+ */
+ Vectorizer(const FSA* dict) :
+ _dictionary(*dict),
+ _detector(_dictionary),
+ _idf_docs(0)
+ {
+ initIdfCount();
+ }
+
+ /**
+ * @brief Destructor.
+ */
+ ~Vectorizer() {}
+
+
+ /**
+ * @brief Vectorize a document.
+ *
+ * @param text Input document.
+ * @param vector TermVector object to hold the document vector.
+ * @param limit Limit the number of vector items.
+ * @param keephits Include in the vector items the hit positions of terms.
+ * @param tfexp Exponent for tf (term frequency).
+ * @param idfexp Exponent for idf (inverse document frequency).
+ */
+ void vectorize(const NGram &text, TermVector &vector, unsigned int limit,
+ bool keephits, double tfexp = 1.0, double idfexp = 1.0) const;
+
+ /**
+ * @brief Vectorize a document.
+ *
+ * In this version of the call, hit positions are not kept.
+ *
+ * @param text Input document.
+ * @param vector TermVector object to hold the document vector.
+ * @param limit Limit the number of vector items (default=15).
+ * @param tfexp Exponent for tf (term frequency).
+ * @param idfexp Exponent for idf (inverse document frequency).
+ */
+ void vectorize(const NGram &text, TermVector &vector, unsigned int limit=15,
+ double tfexp = 1.0, double idfexp = 1.0) const;
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsa/wordchartokenizer.cpp b/fsa/src/vespa/fsa/wordchartokenizer.cpp
new file mode 100644
index 00000000000..e6ea7ec918a
--- /dev/null
+++ b/fsa/src/vespa/fsa/wordchartokenizer.cpp
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "wordchartokenizer.h"
+#include "unicode.h"
+
+#include <string.h>
+
+
+namespace fsa {
+
+const bool WordCharTokenizer::_punctuation_table[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+};
+
+
+bool WordCharTokenizer::init(const std::string &text)
+{
+ _tokens.clear();
+ _current = 0;
+
+ char *dup;
+ if(_lowercase)
+ dup = Unicode::strlowdupUTF8(text.c_str());
+ else
+ dup = Unicode::strdupUTF8(text.c_str());
+
+ char *tmp = dup;
+ char *tok,*end;
+ ucs4_t ch=0;
+ bool need_punct=false, added_punct=false;
+
+ while(*tmp) {
+ tok=NULL;
+ while((tok=tmp,*tmp) &&
+ (ch=Unicode::getUTF8Char(tmp),
+ _punctuation==PUNCTUATION_WHITESPACEONLY?Unicode::isSpaceChar(ch):!Unicode::isWordChar(ch))){
+ if(_punctuation!=PUNCTUATION_DISCARD && _punctuation!=PUNCTUATION_WHITESPACEONLY){
+ if(ch<128 && _punctuation_table[ch] && need_punct && !added_punct){
+ _tokens.push_back(_punctuation_token);
+ added_punct=true;
+ }
+ }
+ }
+
+ while((end=tmp,*tmp) &&
+ (ch=Unicode::getUTF8Char(tmp),
+ _punctuation==PUNCTUATION_WHITESPACEONLY?!Unicode::isSpaceChar(ch):Unicode::isWordChar(ch)));
+
+ if(*end) {
+ *end=0;
+ }
+ if(*tok){
+ _tokens.push_back(std::string((char *)tok));
+ added_punct = false;
+ need_punct = true;
+ if(_punctuation!=PUNCTUATION_DISCARD && _punctuation!=PUNCTUATION_WHITESPACEONLY){
+ if(ch<128 && _punctuation_table[ch]){
+ if(_punctuation==PUNCTUATION_FULL || ch!='.' || strlen(tok)>1){
+ _tokens.push_back(_punctuation_token);
+ added_punct=true;
+ }
+ }
+ }
+ }
+ }
+
+ if(added_punct) { // The last token is a puctuation, drop it
+ _tokens.pop_back();
+ }
+
+ free(dup);
+ return true;
+}
+
+
+bool WordCharTokenizer::hasMore()
+{
+ return _tokens.size()>_current;
+}
+
+std::string WordCharTokenizer::getNext()
+{
+ if(_tokens.size()>_current){
+ return _tokens[_current++];
+ }
+ else{
+ return std::string();
+ }
+}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsa/wordchartokenizer.h b/fsa/src/vespa/fsa/wordchartokenizer.h
new file mode 100644
index 00000000000..c66c727207f
--- /dev/null
+++ b/fsa/src/vespa/fsa/wordchartokenizer.h
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file wordchartokenizer.h
+ * @brief Tokenizer based on the unicode WORDCHAR property.
+ */
+
+#pragma once
+
+#include "tokenizer.h"
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+
+
+namespace fsa {
+
+// {{{ class WordCharTokenizer
+
+/**
+ * @class WordCharTokenizer
+ * @brief Tokenizer based on the Unicode WORDCHAR property.
+ */
+class WordCharTokenizer : public Tokenizer {
+
+public:
+ /**
+ * @brief Enumareted type for specifying puctuation removal strategy.
+ *
+ * Enumareted type for specifying puctuation removal strategy. The
+ * following strategies are currently supported:
+ * - PUNCTUATION_DISCARD: discard all punctuation.
+ * - PUNCTUATION_FULL: honour all punctuation and insert
+ * punctuation token.
+ * - PUNCTUATION_SMART: same as PUNCTUATION_FULL, with some
+ * heuristics to not break acronyms and names.
+ * - PUNCTUATION_WHITESPACEONLY: treat everything (including
+ * punctuation) as word characters, except white space.
+ */
+ enum Punctuation {
+ PUNCTUATION_DISCARD = 0,
+ PUNCTUATION_FULL,
+ PUNCTUATION_SMART,
+ PUNCTUATION_WHITESPACEONLY
+ };
+
+private:
+
+ static const bool _punctuation_table[]; /**< Table used for punctuation tests. */
+
+ std::vector<std::string> _tokens; /**< Vector holding the tokens. */
+ unsigned int _current; /**< Index of current token. */
+ Punctuation _punctuation; /**< Punctuation strategy. */
+ std::string _punctuation_token; /**< Special token for marking punctuation. */
+ bool _lowercase; /**< Indicator whether tokens should be lowercased. */
+
+public:
+
+ WordCharTokenizer(Punctuation punct = PUNCTUATION_DISCARD, const std::string &punct_token = ".") :
+ _tokens(),
+ _current(0),
+ _punctuation(punct),
+ _punctuation_token(punct_token),
+ _lowercase(true)
+ {}
+
+ virtual ~WordCharTokenizer() {}
+
+ Punctuation getPunctuation() const { return _punctuation; }
+ void setPunctuation(Punctuation punct) { _punctuation=punct; }
+ std::string getPunctuationToken() const { return _punctuation_token; }
+ void setPunctuationToken(const std::string &punct_token) { _punctuation_token=punct_token; }
+ void rewind() { _current=0; }
+ void setLowerCase(bool lc) { _lowercase = lc; }
+ bool getLowerCase() const { return _lowercase; }
+
+ /**
+ * @brief Initialize the tokenizer.
+ *
+ * @param text Input text.
+ * @return True on success.
+ */
+ virtual bool init(const std::string &text);
+
+
+ /**
+ * @brief Check if there are more tokens available.
+ *
+ * @return True if there are more tokens.
+ */
+ virtual bool hasMore();
+
+ /**
+ * @brief Get next token.
+ *
+ * @return Next token, or empty string if there are no more tokens left.
+ */
+ virtual std::string getNext();
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/CMakeLists.txt b/fsa/src/vespa/fsamanagers/CMakeLists.txt
new file mode 100644
index 00000000000..3e02946c59b
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(fsamanagers
+ SOURCES
+ conceptnetmanager.cpp
+ fsamanager.cpp
+ metadatamanager.cpp
+ mutex.cpp
+ rwlock.cpp
+ singleton.cpp
+ INSTALL lib64
+ DEPENDS
+)
+
+install(FILES
+ conceptnethandle.h
+ conceptnetmanager.h
+ fsahandle.h
+ fsamanager.h
+ metadatahandle.h
+ metadatamanager.h
+ mutex.h
+ refcountable.h
+ rwlock.h
+ singleton.h
+ DESTINATION include/vespa/fsamanagers)
diff --git a/fsa/src/vespa/fsamanagers/conceptnethandle.h b/fsa/src/vespa/fsamanagers/conceptnethandle.h
new file mode 100644
index 00000000000..a574343714f
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/conceptnethandle.h
@@ -0,0 +1,123 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file conceptnetmanager.h
+ * @brief Concept network handle class definition.
+ *
+ */
+
+#pragma once
+
+#include <string>
+
+#include "refcountable.h"
+#include <vespa/fsa/conceptnet.h>
+
+namespace fsa {
+
+// {{{ class ConceptNet::Handle
+
+/**
+ * @class Handle
+ * @brief Concept net handle.
+ *
+ * A Handle looks like a ConceptNet, but copies are cheap; the actual
+ * ConceptNet objects are refcounted and Handle copies merely copy the
+ * ConceptNet pointer and increment the refcount.
+ */
+class ConceptNet::Handle {
+
+private:
+
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ Handle();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ Handle& operator=(const Handle&);
+
+ class RefCountableConceptNet: public ConceptNet, public RefCountable<ConceptNet> {
+ public:
+ RefCountableConceptNet(const char *fsafile, const char *datafile=NULL, FileAccessMethod fam = FILE_ACCESS_UNDEF) : ConceptNet(fsafile,datafile,fam) {}
+ };
+
+ RefCountableConceptNet *_conceptNet; /**< The ConceptNet object itself. */
+
+public:
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate a handle (and add new reference to the ConceptNet object.
+ *
+ * @param h Reference to existing ConceptNet::Handle.
+ */
+ Handle(const Handle& h) : _conceptNet(h._conceptNet)
+ {
+ _conceptNet->addReference();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * @param fsafile %FSA file containing the units, with a perfect has
+ * (used for indexing the data file).
+ * @param datafile Concept net data file.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global preferred access mode will be used.
+ */
+ Handle(const char *fsafile, const char *datafile=NULL, FileAccessMethod fam = FILE_ACCESS_UNDEF) :
+ _conceptNet(new RefCountableConceptNet(fsafile,datafile,fam))
+ {
+ _conceptNet->addReference();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * @param fsafile %FSA file containing the units, with a perfect has
+ * (used for indexing the data file).
+ * @param datafile Concept net data file.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global preferred access mode will be used.
+ */
+ Handle(const std::string &fsafile, const std::string &datafile=NULL, FileAccessMethod fam = FILE_ACCESS_UNDEF) :
+ _conceptNet(new RefCountableConceptNet(fsafile.c_str(),datafile.c_str(),fam))
+ {
+ _conceptNet->addReference();
+ }
+
+ /**
+ * @brief Destructor.
+ */
+ ~Handle(void)
+ {
+ _conceptNet->removeReference();
+ }
+
+ /**
+ * @brief Dereference operator, provides access to ConceptNet
+ * methods.
+ *
+ * @return Reference to the ConceptNet object.
+ */
+ const ConceptNet& operator*() const { return *_conceptNet; }
+
+ /**
+ * @brief Dereference operator, provides access to ConceptNet
+ * methods.
+ *
+ * @return Pointer the ConceptNet object.
+ */
+ const ConceptNet* operator->() const { return _conceptNet; }
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/conceptnetmanager.cpp b/fsa/src/vespa/fsamanagers/conceptnetmanager.cpp
new file mode 100644
index 00000000000..459d7c81239
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/conceptnetmanager.cpp
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file conceptnetmanager.cpp
+ * @brief Concept network manager class implementation.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "conceptnetmanager.h"
+
+namespace fsa {
+
+// {{{ ConceptNetManager::~ConceptNetManager()
+
+ConceptNetManager::~ConceptNetManager()
+{
+ for(LibraryIterator it=_library.begin(); it!=_library.end();++it){
+ delete it->second;
+ }
+}
+
+// }}}
+
+// {{{ ConceptNetManager::load()
+
+bool ConceptNetManager::load(const std::string &id, const std::string &fsafile, const std::string &datafile)
+{
+ ConceptNet::Handle *newcn = new ConceptNet::Handle(fsafile.c_str(), datafile.length()>0?datafile.c_str():NULL);
+
+ if(newcn==NULL || !(*newcn)->isOk()){
+ delete newcn;
+ return false;
+ }
+
+ _lock.wrLock();
+ {
+ LibraryIterator it = _library.find(id);
+ if(it!=_library.end()){
+ delete it->second;
+ it->second = newcn;
+ }
+ else
+ _library.insert(Library::value_type(id,newcn));
+ }
+ _lock.unlock();
+
+ return true;
+}
+
+// }}}
+// {{{ ConceptNetManager::get()
+
+ConceptNet::Handle* ConceptNetManager::get(const std::string &id) const
+{
+ ConceptNet::Handle *newhandle=NULL;
+ _lock.rdLock();
+ {
+ LibraryConstIterator it = _library.find(id);
+ if(it!=_library.end()){
+ newhandle = new ConceptNet::Handle(*(it->second));
+ }
+ }
+ _lock.unlock();
+ return newhandle;
+}
+
+// }}}
+// {{{ ConceptNetManager::drop()
+
+void ConceptNetManager::drop(const std::string &id)
+{
+ _lock.wrLock();
+ {
+ LibraryIterator it = _library.find(id);
+ if(it!=_library.end()){
+ delete it->second;
+ _library.erase(it);
+ }
+ }
+ _lock.unlock();
+}
+
+// }}}
+// {{{ ConceptNetManager::clear()
+
+void ConceptNetManager::clear()
+{
+ _lock.wrLock();
+ {
+ for(LibraryIterator it = _library.begin(); it!=_library.end(); ++it)
+ delete it->second;
+ _library.clear();
+ }
+ _lock.unlock();
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsamanagers/conceptnetmanager.h b/fsa/src/vespa/fsamanagers/conceptnetmanager.h
new file mode 100644
index 00000000000..d4e55bc68a6
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/conceptnetmanager.h
@@ -0,0 +1,104 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file conceptnetmanager.h
+ * @brief Concept network manager class definition.
+ *
+ */
+
+#pragma once
+
+#include <string>
+#include <map>
+
+#include "singleton.h"
+#include "rwlock.h"
+#include "conceptnethandle.h"
+
+namespace fsa {
+
+// {{{ class ConceptNetManager
+
+/**
+ * @class ConceptNetManager
+ * @brief Class for managing concept networks.
+ *
+ * This class provides a single point of access to all concept networks
+ * used by the applications.
+ */
+class ConceptNetManager : public Singleton<ConceptNetManager> {
+
+protected:
+ friend class Singleton<ConceptNetManager>;
+
+ /** Default constructor. Protected to avoid accidental creation */
+ ConceptNetManager() : _library(), _lock() {}
+
+private:
+
+ /** Private unimplemented copy constructor */
+ ConceptNetManager(const ConceptNetManager&);
+ /** Private unimplemented assignment operator */
+ ConceptNetManager& operator=(const ConceptNetManager&);
+
+ /** %ConceptNet library type */
+ typedef std::map<std::string,ConceptNet::Handle*> Library;
+ /** %ConceptNet library iterator type */
+ typedef std::map<std::string,ConceptNet::Handle*>::iterator LibraryIterator;
+ /** %ConceptNet library const iterator type */
+ typedef std::map<std::string,ConceptNet::Handle*>::const_iterator LibraryConstIterator;
+
+ Library _library; /**< Library of concept networks. */
+ mutable RWLock _lock; /**< Read-write lock for library synchronization. */
+
+public:
+
+ /** Destructor */
+ ~ConceptNetManager();
+
+ /**
+ * @brief Load a concept network into memory.
+ *
+ * @param id Concept network id (to be used in later get() or drop() calls).
+ * @param fsafile Concept net %FSA file name
+ * @param datafile Concept net data file name (defaults to empty
+ * string which means use the fsa file name but
+ * replace .fsa extension with .dat).
+ */
+ bool load(const std::string &id,
+ const std::string &fsafile,
+ const std::string &datafile=std::string(""));
+
+ /**
+ * @brief Get a handle to a concept net.
+ *
+ * @param id Concept net id.
+ * @return Newly allocated handle, must be deleted by the
+ * caller. (NULL if no concept net with the given id was found.)
+ */
+ ConceptNet::Handle* get(const std::string &id) const;
+
+ /**
+ * @brief Drop a concept net from the library.
+ *
+ * Drop a concept net from the library. The concept net object will
+ * be deleted automagically when there are no more handles referring
+ * to it.
+ *
+ * @param id Concept net id.
+ */
+ void drop(const std::string &id);
+
+ /**
+ * @brief Drop all concept nets from the library.
+ */
+ void clear();
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/fsahandle.h b/fsa/src/vespa/fsamanagers/fsahandle.h
new file mode 100644
index 00000000000..9504c416c79
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/fsahandle.h
@@ -0,0 +1,191 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/07
+ * @version $Id$
+ * @file fsamanager.h
+ * @brief FSA handle class definition.
+ *
+ */
+
+#pragma once
+
+#include <string>
+
+#include "refcountable.h"
+#include <vespa/fsa/fsa.h>
+
+namespace fsa {
+
+// {{{ FSA::Handle
+
+/**
+ * @class Handle
+ * @brief FSA accessor.
+ *
+ * A Handle looks like an FSA, but copies are cheap; the actual FSA
+ * objects are refcounted and Handle copies merely copy the FSA pointer
+ * and increment the refcount.
+ */
+class FSA::Handle {
+
+private:
+
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ Handle();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ Handle& operator=(const Handle&);
+
+ class RefCountableFSA: public FSA, public RefCountable<FSA> {
+ public:
+ RefCountableFSA(const char *file, FileAccessMethod fam = FILE_ACCESS_UNDEF) : FSA(file,fam) {}
+ };
+
+ RefCountableFSA *_fsa; /**< The FSA object itself. */
+
+ /**
+ * @brief Get a pointer to the referred FSA object.
+ *
+ * @return pointer to the referred FSA object.
+ */
+ const FSA* getFSA() const
+ {
+ return _fsa;
+ }
+
+public:
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate a handle (and add new reference to the FSA object.
+ *
+ * @param h Reference to handle to duplicate.
+ */
+ Handle(const Handle& h) : _fsa(h._fsa)
+ {
+ _fsa->addReference();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new FSA object (loaded from file) and add reference.
+ *
+ * @param file Name of the file containing the automaton.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global preferred access mode will be used.
+ */
+ Handle(const char *file, FileAccessMethod fam = FILE_ACCESS_UNDEF) :
+ _fsa(new RefCountableFSA(file,fam))
+ {
+ _fsa->addReference();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new FSA object (loaded from file) and add reference.
+ *
+ * @param file Name of the file containing the automaton.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global preferred access mode will be used.
+ */
+ Handle(const std::string &file, FileAccessMethod fam = FILE_ACCESS_UNDEF) :
+ _fsa(new RefCountableFSA(file.c_str(),fam))
+ {
+ _fsa->addReference();
+ }
+
+ /**
+ * @brief Destructor.
+ *
+ * Remove reference to the FSA object.
+ */
+ ~Handle(void)
+ {
+ _fsa->removeReference();
+ }
+
+ /**
+ * @brief Dereference operator, provides access to Metadata
+ * methods.
+ *
+ * @return Reference to the Metadata object.
+ */
+ const FSA& operator*() const { return *_fsa; }
+
+ /**
+ * @brief Dereference operator, provides access to Metadata
+ * methods.
+ *
+ * @return Pointer the Metadata object.
+ */
+ const FSA* operator->() const { return _fsa; }
+
+ /**
+ * @brief Check if %FSA was properly constructed.
+ *
+ * @return true iff underlying %FSA was properly constructed.
+ */
+ bool isOk(void) const
+ {
+ return _fsa->isOk();
+ }
+
+ /**
+ * @brief Get the fsa library version used for building this %FSA.
+ *
+ * @return fsa library version.
+ */
+ uint32_t version(void) const
+ {
+ return _fsa->version();
+ }
+
+ /**
+ * @brief Get the serial number of the %FSA.
+ *
+ * @return Serial number.
+ */
+ uint32_t serial(void) const
+ {
+ return _fsa->serial();
+ }
+
+ /**
+ * @brief Check is the automaton has perfect hash built in.
+ *
+ * Returns true if the automaton was built with a perfect hash included.
+ *
+ * @return True if the automaton has perfect hash.
+ */
+ bool hasPerfectHash() const
+ {
+ return _fsa->hasPerfectHash();
+ }
+
+ /**
+ * @brief Get iterator pointing to the beginning of the fsa.
+ *
+ * @return iterator pointing to the first string in the fsa.
+ */
+ FSA::iterator begin() const { return FSA::iterator(_fsa); }
+
+ /**
+ * @brief Get iterator pointing past the end of the fsa.
+ *
+ * @return iterator pointing past the last string in the fsa.
+ */
+ FSA::iterator end() const { return FSA::iterator(_fsa,true); }
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/fsamanager.cpp b/fsa/src/vespa/fsamanagers/fsamanager.cpp
new file mode 100644
index 00000000000..8816ea2a4b8
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/fsamanager.cpp
@@ -0,0 +1,187 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file fsamanager.cpp
+ * @brief
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "fsamanager.h"
+
+#ifdef HAVE_CURL
+#include <stdio.h>
+#include <unistd.h>
+#include <curl/curl.h>
+#include <curl/types.h>
+#include <curl/easy.h>
+#endif
+
+
+
+namespace fsa {
+
+// {{{ FSAManager::~FSAManager()
+
+FSAManager::~FSAManager()
+{
+ for(LibraryIterator it=_library.begin(); it!=_library.end();++it){
+ delete it->second;
+ }
+}
+
+// }}}
+// {{{ FSAManager::load()
+
+bool FSAManager::load(const std::string &id, const std::string &url)
+{
+ std::string file=url;
+
+#if ((__GNUG__ == 3 && __GNUC_MINOR__ >= 1) || __GNUG__ > 3)
+ if(!url.compare(0,7,"http://"))
+#else
+ if(!url.compare("http://",0,7))
+#endif
+ {
+ unsigned int pos=url.find_last_of('/');
+ if(pos==url.size()-1) return false;
+ _cacheLock.lock();
+ file=_cacheDir;
+ _cacheLock.unlock();
+ if(file.size()>0 && file[file.size()-1]!='/') file+='/';
+ file+=url.substr(pos+1);
+ if(!getUrl(url,file)) return false;
+ }
+
+ FSA::Handle *newdict = new FSA::Handle(file);
+ if(!newdict->isOk()){
+ delete newdict;
+ return false;
+ }
+
+ _lock.wrLock();
+ {
+ LibraryIterator it = _library.find(id);
+ if(it!=_library.end()){
+ delete it->second;
+ it->second = newdict;
+ }
+ else
+ _library.insert(Library::value_type(id,newdict));
+ }
+ _lock.unlock();
+
+ return true;
+}
+
+// }}}
+// {{{ FSAManager::getUrl()
+
+bool FSAManager::getUrl(const std::string &url, const std::string &file)
+{
+#ifdef HAVE_CURL
+ CURL *curl_handle;
+ FILE *filehandle;
+ long response_code;
+
+ filehandle = fopen(file.c_str(),"r");
+ if(filehandle!=NULL){
+ fclose(filehandle);
+ return true;
+ }
+
+ filehandle = fopen(file.c_str(),"w");
+ if(filehandle==NULL)
+ return false;
+
+ curl_handle = curl_easy_init();
+
+ curl_easy_setopt(curl_handle, CURLOPT_URL, url.c_str());
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)filehandle);
+ curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libfsa-url-agent/0.1");
+
+ curl_easy_perform(curl_handle);
+
+ curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &response_code);
+
+ curl_easy_cleanup(curl_handle);
+
+ fclose(filehandle);
+
+ if(response_code!=200){
+ unlink(file.c_str());
+ return false;
+ }
+
+ return true;
+#else // HAVE_CURL
+ (void)url;(void)file;
+ return false;
+#endif // HAVE_CURL
+}
+
+// }}}
+// {{{ FSAManager::get()
+
+FSA::Handle* FSAManager::get(const std::string &id) const
+{
+ FSA::Handle *newhandle=NULL;
+ _lock.rdLock();
+ {
+ LibraryConstIterator it = _library.find(id);
+ if(it!=_library.end()){
+ newhandle = new FSA::Handle(*(it->second));
+ }
+ }
+ _lock.unlock();
+ return newhandle;
+}
+
+// }}}
+// {{{ FSAManager::drop()
+
+void FSAManager::drop(const std::string &id)
+{
+ _lock.wrLock();
+ {
+ LibraryIterator it = _library.find(id);
+ if(it!=_library.end()){
+ delete it->second;
+ _library.erase(it);
+ }
+ }
+ _lock.unlock();
+}
+
+// }}}
+// {{{ FSAManager::clear()
+
+void FSAManager::clear()
+{
+ _lock.wrLock();
+ {
+ for(LibraryIterator it = _library.begin(); it!=_library.end(); ++it)
+ delete it->second;
+ _library.clear();
+ }
+ _lock.unlock();
+}
+
+// }}}
+// {{{ FSAManager::setCacheDir()
+
+void FSAManager::setCacheDir(const std::string &dir)
+{
+ _cacheLock.lock();
+ _cacheDir = dir;
+ _cacheLock.unlock();
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsamanagers/fsamanager.h b/fsa/src/vespa/fsamanagers/fsamanager.h
new file mode 100644
index 00000000000..6de1b95a085
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/fsamanager.h
@@ -0,0 +1,140 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/07
+ * @version $Id$
+ * @file fsamanager.h
+ * @brief Class definition of the %FSA manager
+ *
+ */
+
+#pragma once
+
+#include <string>
+#include <map>
+
+#include "singleton.h"
+#include "rwlock.h"
+#include "fsahandle.h"
+
+namespace fsa {
+
+// {{{ class FSAManager
+
+/**
+ * @class FSAManager
+ * @brief Class for managing finite state automata.
+ *
+ * This class provides a single point of access to all finite state
+ * automata used by the applications. Supports loading fsa files and
+ * downloading from the net if libcurl support is built in, in which
+ * case the files are cached in a local cache directory. FSAManager is
+ * implemented as a singleton.
+ */
+class FSAManager : public Singleton<FSAManager> {
+
+protected:
+ friend class Singleton<FSAManager>;
+
+ /** Default constructor. Protected to avoid accidental creation */
+ FSAManager() : _library(), _lock(), _cacheDir(), _cacheLock() {}
+
+private:
+
+ /** Private unimplemented copy constructor */
+ FSAManager(const FSAManager&);
+ /** Private unimplemented assignment operator */
+ FSAManager& operator=(const FSAManager&);
+
+ /** %FSA library type */
+ typedef std::map<std::string,FSA::Handle*> Library;
+ /** %FSA library iterator type */
+ typedef std::map<std::string,FSA::Handle*>::iterator LibraryIterator;
+ /** %FSA library const iterator type */
+ typedef std::map<std::string,FSA::Handle*>::const_iterator LibraryConstIterator;
+
+ Library _library; /**< Library of automata. */
+ mutable RWLock _lock; /**< Read-write lock for library synchronization. */
+ std::string _cacheDir; /**< Cache directory. */
+ mutable Mutex _cacheLock; /**< Mutex for cache synchronization. */
+
+ /**
+ * @brief Fetch an automaton from the net.
+ *
+ * @param url URL to automaton.
+ * @param file Name of local file to store automaton.
+ * @return True on success.
+ */
+ bool getUrl(const std::string &url, const std::string &file);
+
+public:
+
+ /** Destructor */
+ ~FSAManager();
+
+ /**
+ * @brief Load automaton from file or fetch from the net.
+ *
+ * Load automaton from file or fetch from the net. If the url begins
+ * with "http://", and libcurl support is compiled in, the automaton
+ * is downloaded from the net an stored in the local cache, unless
+ * an automaton with that filename already exist in the cache, in which
+ * case the local copy is used. This behaviour is expected to change
+ * in the future, and it will use the serial number from the fsa
+ * header to decide whether an update is needed.
+ *
+ * If an automaton is already registered with the given ID, the old
+ * one is dropped as soon as the new is loaded. This does not
+ * effects handles to the old automaton which were acquired
+ * previously, as the old automaton will stay in memory until all
+ * handles are deleted.
+ *
+ * @param id Automaton ID (name) used by the application.
+ * @param url File name or URL (the latter if it begins with "http://").
+ * @return True on success.
+ */
+ bool load(const std::string &id, const std::string &url);
+
+ /**
+ * @brief Get a handle to an automaton.
+ *
+ * @param id Automaton ID (name).
+ * @return Pointer to a new handle to the automaton, or NULL if not found.
+ * The handle must be deleted when it is not needed
+ * anymore. (In fact it should be deleted and re-requested
+ * on a regular basis if automaton updates may be performed.)
+ */
+ FSA::Handle* get(const std::string &id) const;
+
+ /**
+ * @brief Drop an automaton from the library.
+ *
+ * Drop the automaton from the library. All new requests for the
+ * given ID will receive a NULL handle after this operation (unless
+ * an automaton with the same ID is later loaded again).
+ *
+ * @param id Automaton ID
+ */
+ void drop(const std::string &id);
+
+ /**
+ * @brief Drop all automatons from the library.
+ */
+ void clear();
+
+ /**
+ * @brief Set the local cache directory.
+ *
+ * Set the local cache directory (default is empty, which
+ * corresponds to the CWD (current working directory).
+ *
+ * @param dir Cache directory.
+ */
+ void setCacheDir(const std::string &dir);
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/metadatahandle.h b/fsa/src/vespa/fsamanagers/metadatahandle.h
new file mode 100644
index 00000000000..8603caedfb7
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/metadatahandle.h
@@ -0,0 +1,130 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file metadatamanager.h
+ * @brief Metadata handle class definition.
+ *
+ */
+
+#pragma once
+
+#include <string>
+
+#include "refcountable.h"
+#include <vespa/fsa/metadata.h>
+
+namespace fsa {
+
+// {{{ class MetaData::Handle
+
+/**
+ * @class Handle
+ * @brief MetaData handle.
+ *
+ * A Handle looks like a MetaData, but copies are cheap; the actual
+ * MetaData objects are refcounted and Handle copies merely copy the
+ * MetaData pointer and increment the refcount.
+ */
+class MetaData::Handle {
+
+private:
+
+ /**
+ * @brief Unimplemented private default constructor.
+ */
+ Handle();
+ /**
+ * @brief Unimplemented private assignment operator.
+ */
+ Handle& operator=(const Handle&);
+
+ class RefCountableMetaData: public MetaData, public RefCountable<MetaData> {
+ public:
+ RefCountableMetaData(const char *datafile, FileAccessMethod fam = FILE_ACCESS_UNDEF) : MetaData(datafile,fam) {}
+ };
+
+ RefCountableMetaData *_metaData; /**< The MetaData object itself. */
+
+public:
+
+ /**
+ * @brief Copy constructor.
+ *
+ * Duplicate a handle (and add new reference to the MetaData object.
+ *
+ * @param h Reference to existing Metadata::Handle.
+ */
+ Handle(const Handle& h) : _metaData(h._metaData)
+ {
+ _metaData->addReference();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new MetaData object (loaded from file) and add reference.
+ *
+ * @param datafile Name of the file containing the metadata.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global preferred access mode will be used.
+ */
+ Handle(const char *datafile, FileAccessMethod fam = FILE_ACCESS_UNDEF) :
+ _metaData(new RefCountableMetaData(datafile,fam))
+ {
+ _metaData->addReference();
+ }
+
+ /**
+ * @brief Constructor.
+ *
+ * Create a new MetaData object (loaded from file) and add reference.
+ *
+ * @param datafile Name of the file containing the metadata.
+ * @param fam File access mode (read or mmap). If not set, the
+ * global preferred access mode will be used.
+ */
+ Handle(const std::string &datafile, FileAccessMethod fam = FILE_ACCESS_UNDEF) :
+ _metaData(new RefCountableMetaData(datafile.c_str(),fam))
+ {
+ _metaData->addReference();
+ }
+
+ /**
+ * @brief Destructor.
+ */
+ ~Handle(void)
+ {
+ _metaData->removeReference();
+ }
+
+ /**
+ * @brief Dereference operator, provides access to Metadata
+ * methods.
+ *
+ * @return Reference to the Metadata object.
+ */
+ const MetaData& operator*() const { return *_metaData; }
+
+ /**
+ * @brief Dereference operator, provides access to Metadata
+ * methods.
+ *
+ * @return Pointer the Metadata object.
+ */
+ const MetaData* operator->() const { return _metaData; }
+
+ /**
+ * @brief Proxy methods
+ */
+ uint32_t user(unsigned int idx) const
+ {
+ return _metaData->user(idx);
+ }
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/metadatamanager.cpp b/fsa/src/vespa/fsamanagers/metadatamanager.cpp
new file mode 100644
index 00000000000..9721d632c52
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/metadatamanager.cpp
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file metadatamanager.cpp
+ * @brief Metadata manager class implementation.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "metadatamanager.h"
+
+namespace fsa {
+
+// {{{ MetaDataManager::~MetaDataManager()
+
+MetaDataManager::~MetaDataManager()
+{
+ for(LibraryIterator it=_library.begin(); it!=_library.end();++it){
+ delete it->second;
+ }
+}
+
+// }}}
+
+// {{{ MetaDataManager::load()
+
+bool MetaDataManager::load(const std::string &id, const std::string &datafile)
+{
+ MetaData::Handle *newmd = new MetaData::Handle(datafile.c_str());
+
+ if(newmd==NULL || !(*newmd)->isOk()){
+ delete newmd;
+ return false;
+ }
+
+ _lock.wrLock();
+ {
+ LibraryIterator it = _library.find(id);
+ if(it!=_library.end()){
+ delete it->second;
+ it->second = newmd;
+ }
+ else
+ _library.insert(Library::value_type(id,newmd));
+ }
+ _lock.unlock();
+
+ return true;
+}
+
+// }}}
+// {{{ MetaDataManager::get()
+
+MetaData::Handle* MetaDataManager::get(const std::string &id) const
+{
+ MetaData::Handle *newhandle=NULL;
+ _lock.rdLock();
+ {
+ LibraryConstIterator it = _library.find(id);
+ if(it!=_library.end()){
+ newhandle = new MetaData::Handle(*(it->second));
+ }
+ }
+ _lock.unlock();
+ return newhandle;
+}
+
+// }}}
+// {{{ MetaDataManager::drop()
+
+void MetaDataManager::drop(const std::string &id)
+{
+ _lock.wrLock();
+ {
+ LibraryIterator it = _library.find(id);
+ if(it!=_library.end()){
+ delete it->second;
+ _library.erase(it);
+ }
+ }
+ _lock.unlock();
+}
+
+// }}}
+// {{{ MetaDataManager::clear()
+
+void MetaDataManager::clear()
+{
+ _lock.wrLock();
+ {
+ for(LibraryIterator it = _library.begin(); it!=_library.end(); ++it)
+ delete it->second;
+ _library.clear();
+ }
+ _lock.unlock();
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsamanagers/metadatamanager.h b/fsa/src/vespa/fsamanagers/metadatamanager.h
new file mode 100644
index 00000000000..d87ca59626c
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/metadatamanager.h
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/10/01
+ * @version $Id$
+ * @file metadatamanager.h
+ * @brief Metadata manager class definition.
+ *
+ */
+
+#pragma once
+
+#include <string>
+#include <map>
+
+#include "singleton.h"
+#include "rwlock.h"
+#include "metadatahandle.h"
+
+namespace fsa {
+
+// {{{ class MetaDataManager
+
+/**
+ * @class MetaDataManager
+ * @brief Class for managing generic metadata.
+ *
+ * This class provides a single point of access to all metadata
+ * used by the applications.
+ */
+class MetaDataManager : public Singleton<MetaDataManager> {
+
+protected:
+ friend class Singleton<MetaDataManager>;
+
+ /** Default constructor. Protected to avoid accidental creation */
+ MetaDataManager() : _library(), _lock() {}
+
+private:
+
+ /** Private unimplemented copy constructor */
+ MetaDataManager(const MetaDataManager&);
+ /** Private unimplemented assignment operator */
+ MetaDataManager& operator=(const MetaDataManager&);
+
+ /** %MetaData library type */
+ typedef std::map<std::string,MetaData::Handle*> Library;
+ /** %MetaData library iterator type */
+ typedef std::map<std::string,MetaData::Handle*>::iterator LibraryIterator;
+ /** %MetaData library const iterator type */
+ typedef std::map<std::string,MetaData::Handle*>::const_iterator LibraryConstIterator;
+
+ Library _library; /**< Library of MetaData objects. */
+ mutable RWLock _lock; /**< Read-write lock for library synchronization. */
+
+public:
+
+ /** Destructor */
+ ~MetaDataManager();
+
+ /**
+ * @brief Load a metadata file into memory.
+ *
+ * @param id MetaData id (to be used in later get() or drop() calls).
+ * @param datafile Metadata file name
+ */
+ bool load(const std::string &id, const std::string &datafile);
+
+ /**
+ * @brief Get a handle to metadata.
+ *
+ * @param id Metadata id.
+ * @return Newly allocated handle, must be deleted by the
+ * caller. (NULL if no metadata with the given id was found.)
+ */
+ MetaData::Handle* get(const std::string &id) const;
+
+ /**
+ * @brief Drop a metadata from the library.
+ *
+ * Drop a metadata from the library. The metadata object will
+ * be deleted automagically when there are no more handles referring
+ * to it.
+ *
+ * @param id MetaData id.
+ */
+ void drop(const std::string &id);
+
+ /**
+ * @brief Drop all metadatas from the library.
+ */
+ void clear();
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/mutex.cpp b/fsa/src/vespa/fsamanagers/mutex.cpp
new file mode 100644
index 00000000000..1c62744291d
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/mutex.cpp
@@ -0,0 +1,82 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/07
+ * @version $Id$
+ * @file mutex.cpp
+ * @brief Mutex.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef DISABLE_THREADS
+#include <pthread.h>
+#include <sched.h>
+#include <assert.h>
+#endif
+
+#include "mutex.h"
+
+namespace fsa {
+
+// {{{ class Mutex::Impl
+
+struct Mutex::Impl
+{
+#ifndef DISABLE_THREADS
+ pthread_mutex_t _mutex; /**< lock */
+#else
+ int _mutex;
+#endif
+};
+
+// }}}
+
+Mutex::Mutex(void) : _impl(new Impl)
+{
+#ifndef DISABLE_THREADS
+ int rc;
+ rc = pthread_mutex_init(&(_impl->_mutex),NULL);
+ assert(rc == 0);
+#endif
+}
+
+Mutex::~Mutex(void)
+{
+#ifndef DISABLE_THREADS
+ pthread_mutex_destroy(&(_impl->_mutex));
+#endif
+ delete _impl;
+}
+
+bool Mutex::tryLock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_mutex_trylock(&(_impl->_mutex)) == 0;
+#else
+ return true;
+#endif
+}
+
+bool Mutex::lock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_mutex_lock(&(_impl->_mutex)) == 0;
+#else
+ return true;
+#endif
+}
+
+bool Mutex::unlock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_mutex_unlock(&(_impl->_mutex)) == 0;
+#else
+ return true;
+#endif
+}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsamanagers/mutex.h b/fsa/src/vespa/fsamanagers/mutex.h
new file mode 100644
index 00000000000..87deb081b08
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/mutex.h
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/07
+ * @version $Id$
+ * @file mutex.h
+ * @brief Mutex.
+ *
+ */
+
+#pragma once
+
+// {{{ class Mutex
+
+namespace fsa {
+
+/**
+ * @class Mutex
+ * @brief Mutex.
+ *
+ * Simple mutex class based on POSIX pthread_mutex_t.
+ */
+class Mutex
+{
+ protected:
+ class Impl;
+ Impl *_impl;
+
+ public:
+ /**
+ * @brief Constructor
+ */
+ Mutex(void);
+
+ /**
+ * @brief Destructor
+ */
+ ~Mutex(void);
+
+ /**
+ * @brief Try to get a lock.
+ *
+ * Try to get a lock. This method is non-blocking, and
+ * returns true if locking was succesful.
+ *
+ * @return True if locking was successful.
+ */
+ bool tryLock (void);
+
+ /**
+ * @brief Get a lock.
+ *
+ * Get a read (shared) lock. This method blocks until a
+ * lock is available (that is no other thread holds a
+ * lock on the object.)
+ *
+ * @return True if locking was successful.
+ */
+ bool lock (void);
+
+ /**
+ * @brief Release a lock.
+ *
+ * @return True if unlocking was successful.
+ */
+ bool unlock (void);
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/refcountable.h b/fsa/src/vespa/fsamanagers/refcountable.h
new file mode 100644
index 00000000000..77f00bc3450
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/refcountable.h
@@ -0,0 +1,111 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/08/20
+ * @version $Id$
+ * @file refcountable.h
+ * @brief Reference countable template
+ */
+
+#pragma once
+
+#include "mutex.h"
+
+namespace fsa {
+
+// {{{ class RefCountable
+
+/**
+ * @class RefCountable
+ * @brief Reference countable template
+ *
+ * Subclass this template, and use the addReference and removeReference
+ * methods to keep track of how many references the object has. When
+ * the last reference is removed, the object blows up (well, destroys
+ * itself).
+ */
+template <typename T>
+class RefCountable
+{
+protected:
+
+ /** Reference count */
+ int _refCount;
+
+ /** Lock */
+ Mutex _sequencerLock;
+
+
+ /**
+ * @brief Destroy the object
+ *
+ * @return True.
+ */
+ virtual bool destroy(void)
+ {
+ delete this;
+ return true;
+ };
+
+private:
+
+ /** Unimplemented private copy constructor. */
+ RefCountable(const RefCountable &original);
+ /** Unimplemented private assignment operator. */
+ const RefCountable& operator=(const RefCountable &original);
+
+public:
+
+ /**
+ * @brief Constructor
+ */
+ RefCountable(void)
+ : _refCount(0),
+ _sequencerLock()
+ {
+ }
+
+ /**
+ * @brief Destructor
+ */
+ virtual ~RefCountable(void) {}
+
+ /**
+ * @brief Increase reference count.
+ */
+ virtual void addReference(void)
+ {
+ _sequencerLock.lock();
+ _refCount++;
+ _sequencerLock.unlock();
+ }
+
+ /**
+ * @brief Decrease reference count, and destroy object if no
+ * references are left.
+ *
+ * @return True if the object was destroyed.
+ */
+ virtual bool removeReference(void)
+ {
+ bool destroyed = false;
+
+ _sequencerLock.lock();
+ _refCount--;
+
+ if(_refCount<1){
+ _sequencerLock.unlock();
+ destroyed = destroy();
+ }
+ else{
+ _sequencerLock.unlock();
+ }
+ return destroyed;
+ }
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/rwlock.cpp b/fsa/src/vespa/fsamanagers/rwlock.cpp
new file mode 100644
index 00000000000..9c296dfe980
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/rwlock.cpp
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/07
+ * @version $Id$
+ * @file rwlock.cpp
+ * @brief Read-write lock.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef DISABLE_THREADS
+#include <pthread.h>
+#include <sched.h>
+#include <assert.h>
+#endif
+
+#include "rwlock.h"
+
+namespace fsa {
+
+// {{{ class RWLock::Impl
+
+struct RWLock::Impl
+{
+#ifndef DISABLE_THREADS
+ pthread_rwlock_t _rwlock; /**< Lock. */
+#else
+ int _rwlock;
+#endif
+};
+
+// }}}
+
+RWLock::RWLock(void) : _impl(new Impl)
+{
+#ifndef DISABLE_THREADS
+ int rc;
+ rc = pthread_rwlock_init(&(_impl->_rwlock),NULL);
+ assert(rc == 0);
+#endif
+}
+
+RWLock::~RWLock(void)
+{
+#ifndef DISABLE_THREADS
+ pthread_rwlock_destroy(&(_impl->_rwlock));
+#endif
+}
+
+bool RWLock::tryRdLock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_rwlock_tryrdlock(&(_impl->_rwlock)) == 0;
+#else
+ return true;
+#endif
+}
+
+bool RWLock::tryWrLock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_rwlock_trywrlock(&(_impl->_rwlock)) == 0;
+#else
+ return true;
+#endif
+}
+
+bool RWLock::rdLock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_rwlock_rdlock(&(_impl->_rwlock)) == 0;
+#else
+ return true;
+#endif
+}
+
+bool RWLock::wrLock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_rwlock_wrlock(&(_impl->_rwlock)) == 0;
+#else
+ return true;
+#endif
+}
+
+bool RWLock::unlock (void)
+{
+#ifndef DISABLE_THREADS
+ return pthread_rwlock_unlock(&(_impl->_rwlock)) == 0;
+#else
+ return true;
+#endif
+}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsamanagers/rwlock.h b/fsa/src/vespa/fsamanagers/rwlock.h
new file mode 100644
index 00000000000..4c85d1cac8a
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/rwlock.h
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/07
+ * @version $Id$
+ * @file rwlock.h
+ * @brief Read-write lock.
+ *
+ */
+
+#pragma once
+
+namespace fsa {
+
+// {{{ class RWLock
+
+/**
+ * @class RWLock
+ * @brief Read-write lock.
+ *
+ * Simple read-write lock class based on POSIX pthread_rwlock_t.
+ */
+class RWLock
+{
+ protected:
+ class Impl;
+ Impl *_impl;
+
+ public:
+
+ /**
+ * @brief Constructor.
+ */
+ RWLock(void);
+
+ /**
+ * @brief Destructor.
+ */
+ ~RWLock(void);
+
+ /**
+ * @brief Try to get a read (shared) lock.
+ *
+ * Try to get a read (shared) lock. This method is non-blocking, and
+ * returns true if locking was succesful.
+ *
+ * @return True if locking was successful.
+ */
+ bool tryRdLock (void);
+
+ /**
+ * @brief Try to get a write (exclusive) lock.
+ *
+ * Try to get a write (exclusive) lock. This method is non-blocking, and
+ * returns true if locking was succesful.
+ *
+ * @return True if locking was successful.
+ */
+ bool tryWrLock (void);
+
+ /**
+ * @brief Get a read (shared) lock.
+ *
+ * Get a read (shared) lock. This method blocks until a shared
+ * lock is available (that is no other thread holds an exclusive
+ * lock on the object.)
+ *
+ * @return True if locking was successful.
+ */
+ bool rdLock (void);
+
+ /**
+ * @brief Get a write (exclusive) lock.
+ *
+ * Get a write (exclusive) lock. This method blocks until an
+ * exclusive lock is available (that is no other thread holds a
+ * shared or an exclusive lock on the object.)
+ *
+ * @return True if locking was successful.
+ */
+ bool wrLock (void);
+
+ /**
+ * @brief Release a (shared or exclusive) lock.
+ *
+ * @return True if unlocking was successful.
+ */
+ bool unlock (void);
+
+};
+
+// }}}
+
+} // namespace fsa
+
diff --git a/fsa/src/vespa/fsamanagers/singleton.cpp b/fsa/src/vespa/fsamanagers/singleton.cpp
new file mode 100644
index 00000000000..76e9535b450
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/singleton.cpp
@@ -0,0 +1,89 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/05
+ * @version $Id$
+ * @file singleton.cpp
+ * @brief Singleton pattern.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "singleton.h"
+
+
+namespace fsa {
+
+// {{{ SingletonExitHandler::_instance
+
+SingletonExitHandler* SingletonExitHandler::_instance = NULL;
+
+// }}}
+
+// {{{ SingletonExitHandler::SingletonExitHandler()
+
+SingletonExitHandler::SingletonExitHandler()
+ : _functionList()
+{
+ /*
+ * This won't work as part of plugins. When library is unloaded, the
+ * registration remains, and the program will crash when trying to
+ * exit.
+ */
+ atexit(&atExit);
+}
+
+// }}}
+// {{{ SingletonExitHandler::~SingletonExitHandler()
+
+SingletonExitHandler::~SingletonExitHandler()
+{
+}
+
+// }}}
+// {{{ SingletonExitHandler::instance()
+
+SingletonExitHandler* SingletonExitHandler::instance()
+{
+ if (_instance == NULL) {
+ _instance = new SingletonExitHandler();
+ }
+ return _instance;
+}
+
+// }}}
+// {{{ SingletonExitHandler::registerSingletonDestroyer()
+
+void SingletonExitHandler::registerSingletonDestroyer(void (*p)())
+{
+ _functionList.push_front(p);
+}
+
+// }}}
+// {{{ SingletonExitHandler::atExit()
+
+void SingletonExitHandler::atExit()
+{
+ SingletonExitHandler::instance()->destroy();
+ delete SingletonExitHandler::instance();
+}
+
+// }}}
+// {{{ SingletonExitHandler::destroy()
+
+void SingletonExitHandler::destroy()
+{
+ for(FunctionListIterator iterator=_functionList.begin();
+ iterator!=_functionList.end(); ++iterator) {
+ (*iterator)();
+ }
+
+}
+
+// }}}
+
+} // namespace fsa
diff --git a/fsa/src/vespa/fsamanagers/singleton.h b/fsa/src/vespa/fsamanagers/singleton.h
new file mode 100644
index 00000000000..db11a9bf444
--- /dev/null
+++ b/fsa/src/vespa/fsamanagers/singleton.h
@@ -0,0 +1,172 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author Peter Boros
+ * @date 2004/09/05
+ * @version $Id$
+ * @file singleton.h
+ * @brief Singleton pattern.
+ */
+
+
+#pragma once
+
+#include <list>
+
+#include "mutex.h"
+
+
+namespace fsa {
+
+// {{{ class SingletonExitHandler
+
+/**
+ * @class SingletonExitHandler
+ * @brief %Singleton exit handler.
+ *
+ * %Singleton exit handler. Uses the atexit() librarary call to
+ * destory all Singleton objects in reverse order as they were
+ * created. It is also a singleton self.
+ */
+class SingletonExitHandler
+{
+private:
+
+ /** Default constructor */
+ SingletonExitHandler();
+
+ /** Method to call at exit, destroys all Singletons. */
+ static void atExit();
+
+ /** Instance pointer */
+ static SingletonExitHandler* _instance;
+
+ /** Destroy method - does the dirty work */
+ void destroy();
+
+
+ typedef std::list<void(*)()> FunctionList;
+ typedef std::list<void(*)()>::iterator FunctionListIterator;
+
+ /** List of Singleton destroy functions */
+ FunctionList _functionList;
+
+public:
+
+ /** Destructor */
+ virtual ~SingletonExitHandler();
+
+ /**
+ * @brief Get instance pointer.
+ *
+ * @return pointer to instance.
+ */
+ static SingletonExitHandler* instance();
+
+ /**
+ * @brief Register a singleton.
+ *
+ * @param p Pointer to destroy function of the singleton.
+ */
+ void registerSingletonDestroyer(void (*p)());
+
+};
+
+// }}}
+
+// {{{ class Singleton
+
+/**
+ * @class Singleton
+ * @brief %Singleton template.
+ *
+ * %Singleton template (from Design Patterns by Gamma et al.). To use
+ * it, subclass as follows, and make constructors private:
+ *
+ * class MyClass : public Singleton<MyClass> {
+ * friend class Singleton<MyClass>;
+ * private:
+ * MyClass();
+ * public:
+ * void MyMethod();
+ * ...
+ * }
+ *
+ * and then call MyMethod as:
+ *
+ * MyClass::instance().MyMethod();
+ *
+ */
+template<typename T>
+class Singleton
+{
+ /** SingletonExitHandler handles destruction. */
+ friend class SingletonExitHandler;
+
+public:
+ /** Destructor */
+ virtual ~Singleton();
+
+ /**
+ * @brief Get reference to the instance.
+ *
+ * Get reference to the instance. The first call of this method will
+ * create the instance, and register the destroy function with the
+ * exit handler.
+ *
+ * @return Reference to the instance.
+ */
+ static T& instance();
+
+protected:
+
+ /** Explicit constructor (to avoid implicit conversion). */
+ explicit Singleton();
+
+private:
+
+ /** Copy constructor (unimplemented) */
+ Singleton(const Singleton&);
+ /** Assignment operator (unimplemented) */
+ Singleton& operator=(const Singleton&);
+
+ /** Destroy function - this will be registered with the exit handler. */
+ static void destroy();
+
+ static Mutex _lock; /**< Mutex for synchronization. */
+
+ static T* _instance; /**< Instance pointer. */
+};
+
+
+template<typename T> Singleton<T>::Singleton() {}
+
+template<typename T> Singleton<T>::~Singleton() {}
+
+template<typename T> void Singleton<T>::destroy()
+{
+ delete _instance;
+ _instance = NULL;
+}
+
+template<typename T> T& Singleton<T>::instance()
+{
+ if (_instance == NULL) {
+ _lock.lock();
+ if (_instance == NULL) {
+ SingletonExitHandler::instance()->registerSingletonDestroyer(&destroy);
+ _instance = new T();
+ }
+ _lock.unlock();
+ }
+
+ return *_instance;
+}
+
+template<typename T> T* Singleton<T>::_instance = NULL;
+
+template<typename T> Mutex Singleton<T>::_lock;
+
+// }}}
+
+} // namespace fsa
+