diff options
119 files changed, 739 insertions, 5520 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index ec0fc4a4dda..c0f68a1c0b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,7 +80,6 @@ add_subdirectory(docprocs) add_subdirectory(document) add_subdirectory(documentapi) add_subdirectory(eval) -add_subdirectory(fastlib) add_subdirectory(fastos) add_subdirectory(fbench) add_subdirectory(fileacquirer) diff --git a/config-model-fat/pom.xml b/config-model-fat/pom.xml index 5f36a26ccef..96cafd4ec0f 100644 --- a/config-model-fat/pom.xml +++ b/config-model-fat/pom.xml @@ -72,7 +72,7 @@ <_fixupmessages>"Classes found in the wrong directory"</_fixupmessages> <!-- Hide warnings for multi-release jars --> <Bundle-SymbolicName>${project.artifactId}</Bundle-SymbolicName> <Bundle-Version>${parsedVersion.majorVersion}.${parsedVersion.minorVersion}.${parsedVersion.incrementalVersion}</Bundle-Version> - <Embed-Dependency>*;scope=compile|runtime</Embed-Dependency> + <Embed-Dependency>*;scope=compile|runtime;type=!pom</Embed-Dependency> <Embed-Transitive>true</Embed-Transitive> <Import-Package>!*</Import-Package> <!-- Should only import packages that meet one or more of the below criteria: diff --git a/fastlib/.gitignore b/fastlib/.gitignore deleted file mode 100644 index d52d93b8dda..00000000000 --- a/fastlib/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -.Build_completed -.Dist_completed -.EscrowCopy_completed -.Install_completed -.PostBuild_completed -.PreBuild_completed -.Test_completed -bin -etc -include -lib -update.log -Makefile diff --git a/fastlib/CMakeLists.txt b/fastlib/CMakeLists.txt deleted file mode 100644 index 8d7208e956c..00000000000 --- a/fastlib/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_define_module( - DEPENDS - fastos - vespalib - - LIBS - src/vespa/fastlib/io - src/vespa/fastlib/io/tests - src/vespa/fastlib/testsuite - src/vespa/fastlib/text - src/vespa/fastlib/text/apps - src/vespa/fastlib/text/tests - src/vespa/packages -) diff --git a/fastlib/INSTALL b/fastlib/INSTALL deleted file mode 100644 index 501c4adf550..00000000000 --- a/fastlib/INSTALL +++ /dev/null @@ -1,62 +0,0 @@ -********************************************************************** -** FastLib Build and Installation Guide ** -********************************************************************** - - ----------------------------------------------------------------------- - -How do I build and install the C++ library? - - cd src/cpp - ./configure --fastos-dir <full path to fastos> [--help | <options>] - make -s bootstrap - make -s install - -The default install directory is '../..' (relative to the src/cpp -directory). This means that the library file will be installed in -../../lib/ and the include files in ../../include/fastlib/. -To override the default install directory, use the configure option -'--install-dir'. Try './configure --fastos-dir <fastosdir> --help' -for all available options. - -Support for additional functionality is available when configuring -with --libxml2-dir <full path to libxml2>. -The XML include files must have been installed in -<libxml2-dir>/includes/libxml2/libxml -The XML library must have been installed as -<libxml2-dir>/lib/libxml2.a (or xml2.a on Windows) - - ----------------------------------------------------------------------- - -What kind of make targets are available, and what do they do? - -make depend - generate make-dependancy information -make makefiles - generate makefiles for all modules -make clean - delete all intermediate and output files -make cleandir - does 'make clean' + deletes depend info -make install - install the library and include files - to location $(INSTALLDIR) -make kdoc - generate kdoc documentation -make doxygen - generate doxygen documentation -make bootstrap - (recommended build target) does: - make cleandir - make depend - make makefiles - make -make tests - Compiles test programs, and runs them. - If a test program fails to compile or run, - the make process is stopped. - Please run 'make tests' after changing fastlib code - to ensure you have not broken anything. - ----------------------------------------------------------------------- - - - - - - - - - diff --git a/fastlib/OWNERS b/fastlib/OWNERS deleted file mode 100644 index 885ab949d74..00000000000 --- a/fastlib/OWNERS +++ /dev/null @@ -1,3 +0,0 @@ -havardpe -toregge -baldersheim diff --git a/fastlib/README b/fastlib/README deleted file mode 100644 index 98913493a5b..00000000000 --- a/fastlib/README +++ /dev/null @@ -1 +0,0 @@ -old legacy code that should be removed as soon as we stop using it diff --git a/fastlib/common_config/install.sh b/fastlib/common_config/install.sh deleted file mode 100644 index 0e6395146b0..00000000000 --- a/fastlib/common_config/install.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -cd src/cpp -make FASTOS_DIR=${autobuild_installroot} INSTALL_DIR=${autobuild_installroot} install diff --git a/fastlib/common_config/make.sh b/fastlib/common_config/make.sh deleted file mode 100644 index 66d0cea0b76..00000000000 --- a/fastlib/common_config/make.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -cd src/cpp -make FASTOS_DIR=${autobuild_installroot} INSTALL_DIR=${autobuild_installroot} diff --git a/fastlib/current/release/buildspec b/fastlib/current/release/buildspec deleted file mode 100644 index a546c902603..00000000000 --- a/fastlib/current/release/buildspec +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -# Config file added by mkspec -# Added by oholsen -$fastlib_about="FAST standard library classes"; -$fastlib_cvs_module="fastlib"; -$fastlib_cvs_tag="current"; -$fastlib_maintainer='mike@fast.no'; -@fastlib_dependencies = ( - "fastos:4.1.1:release", -); -1; diff --git a/fastlib/src/.gitignore b/fastlib/src/.gitignore deleted file mode 100644 index 9d5b23b7747..00000000000 --- a/fastlib/src/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -*.dsp -*.dsw -*.ncb -*.opt -*.plg -Makefile.ini -config_command.bat -config_command.sh -output diff --git a/fastlib/src/Doxyfile b/fastlib/src/Doxyfile deleted file mode 100644 index da1209eda82..00000000000 --- a/fastlib/src/Doxyfile +++ /dev/null @@ -1,1113 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -# Doxyfile 1.3.3 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# General configuration options -#--------------------------------------------------------------------------- - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = FastLib - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = head - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = ../../doc/doxygen - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, -# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en -# (Japanese with English messages), Korean, Norwegian, Polish, Portuguese, -# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian. - -OUTPUT_LANGUAGE = English - -# This tag can be used to specify the encoding used in the generated output. -# The encoding is not always determined by the language that is chosen, -# but also whether or not the output is meant for Windows or non-Windows users. -# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES -# forces the Windows encoding (this is the default for the Windows binary), -# whereas setting the tag to NO uses a Unix-style encoding (the default for -# all platforms other than Windows). - -USE_WINDOWS_ENCODING = NO - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = YES - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = YES - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these classes will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any -# documentation blocks found inside the body of a function. -# If set to NO (the default) these blocks will be appended to the -# function's detailed documentation block. - -HIDE_IN_BODY_DOCS = NO - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited -# members of a class in the documentation of that class as if those members were -# ordinary class members. Constructors, destructors and assignment operators of -# the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = YES - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. It is allowed to use relative paths in the argument list. - -STRIP_FROM_PATH = /ld/home/mike/autodoxygen/fastlib/src/cpp - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# users are advised to set this option to NO. - -CASE_SENSE_NAMES = YES - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put a list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = YES - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like the Qt-style comments (thus requiring an -# explict @brief command for a brief description. - -JAVADOC_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the DETAILS_AT_TOP tag is set to YES then Doxygen -# will output the detailed description near the top, like JavaDoc. -# If set to NO, the detailed description appears after the member -# documentation. - -DETAILS_AT_TOP = NO - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# reimplements. - -INHERIT_DOCS = YES - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 8 - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting -# \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consists of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources -# only. Doxygen will then generate output that is more tailored for Java. -# For instance, namespaces will be presented as packages, qualified scopes -# will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -# Set the SUBGROUPING tag to YES (the default) to allow class member groups of -# the same type (for instance a group of public functions) to be put as a -# subgroup of that type (e.g. under the Public Functions section). Set it to -# NO to prevent subgrouping. Alternatively, this can be done per class using -# the \nosubgrouping command. - -SUBGROUPING = YES - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = YES - -# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some -# parameters in a documented function, or documenting parameters that -# don't exist or using markup commands wrongly. - -WARN_IF_DOC_ERROR = YES - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = . - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp -# *.h++ *.idl *.odl *.cs - -FILE_PATTERNS = - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories -# that are symbolic links (a Unix filesystem feature) are excluded from the input. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. - -EXCLUDE_PATTERNS = */tests/* - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command <filter> <input-file>, where <filter> -# is the value of the INPUT_FILTER tag, and <input-file> is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. - -INPUT_FILTER = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. - -SOURCE_BROWSER = YES - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES (the default) -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = YES - -# If the REFERENCES_RELATION tag is set to YES (the default) -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = YES - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = YES - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = Fast_ -IGNORE_PREFIX += FastOS_ -IGNORE_PREFIX += FastS_ - - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet - -HTML_STYLESHEET = - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output dir. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run -# the HTML help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the HTML help documentation and to the tree view. - -TOC_EXPAND = NO - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be -# generated containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, -# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are -# probably better off using the HTML help feature. - -GENERATE_TREEVIEW = NO - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = NO - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. If left blank `latex' will be used as the default command name. - -LATEX_CMD_NAME = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = makeindex - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = a4wide - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = NO - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = NO - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = NO - -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) -# in the output. - -LATEX_HIDE_INDICES = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimised for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assigments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_XML = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `xml' will be used as the default path. - -XML_OUTPUT = xml - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES Doxygen will -# generate a Perl module file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_PERLMOD = NO - -# If the PERLMOD_LATEX tag is set to YES Doxygen will generate -# the necessary Makefile rules, Perl scripts and LaTeX code to be able -# to generate PDF and DVI output from the Perl module output. - -PERLMOD_LATEX = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be -# nicely formatted so it can be parsed by a human reader. This is useful -# if you want to understand what is going on. On the other hand, if this -# tag is set to NO the size of the Perl module output will be much smaller -# and Perl will parse it just the same. - -PERLMOD_PRETTY = YES - -# The names of the make variables in the generated doxyrules.make file -# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. -# This is useful so different doxyrules.make files included by the same -# Makefile don't overwrite each other's variables. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_PREDEFINED tags. - -EXPAND_ONLY_PREDEF = NO - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. - -PREDEFINED = - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse the -# parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::addtions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or -# super classes. Setting the tag to NO turns the diagrams off. Note that this -# option is superceded by the HAVE_DOT option below. This is only a fallback. It is -# recommended to install and use dot, since it yields more powerful graphs. - -CLASS_DIAGRAMS = YES - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = YES - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similiar to the OMG's Unified Modeling -# Language. - -UML_LOOK = YES - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will -# generate a call dependency graph for every global function or class method. -# Note that enabling this option will significantly increase the time of a run. -# So in most cases it will be better to enable call graphs for selected -# functions only using the \callgraph command. - -CALL_GRAPH = YES - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found on the path. - -DOT_PATH = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width -# (in pixels) of the graphs generated by dot. If a graph becomes larger than -# this value, doxygen will try to truncate the graph, so that it fits within -# the specified constraint. Beware that most browsers cannot cope with very -# large images. - -MAX_DOT_GRAPH_WIDTH = 1024 - -# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height -# (in pixels) of the graphs generated by dot. If a graph becomes larger than -# this value, doxygen will try to truncate the graph, so that it fits within -# the specified constraint. Beware that most browsers cannot cope with very -# large images. - -MAX_DOT_GRAPH_HEIGHT = 1024 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the -# graphs generated by dot. A depth value of 3 means that only nodes reachable -# from the root by following a path via at most 3 edges will be shown. Nodes that -# lay further from the root node will be omitted. Note that setting this option to -# 1 or 2 may greatly reduce the computation time needed for large code bases. Also -# note that a graph may be further truncated if the graph's image dimensions are -# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT). -# If 0 is used for the depth value (the default), the graph is not depth-constrained. - -MAX_DOT_GRAPH_DEPTH = 0 - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermediate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES - -#--------------------------------------------------------------------------- -# Configuration::addtions related to the search engine -#--------------------------------------------------------------------------- - -# The SEARCHENGINE tag specifies whether or not a search engine should be -# used. If set to NO the values of all tags below this one will be ignored. - -SEARCHENGINE = NO - -# The CGI_NAME tag should be the name of the CGI script that -# starts the search engine (doxysearch) with the correct parameters. -# A script with this name will be generated by doxygen. - -CGI_NAME = search.cgi - -# The CGI_URL tag should be the absolute URL to the directory where the -# cgi binaries are located. See the documentation of your http daemon for -# details. - -CGI_URL = - -# The DOC_URL tag should be the absolute URL to the directory where the -# documentation is located. If left blank the absolute path to the -# documentation, with file:// prepended to it, will be used. - -DOC_URL = - -# The DOC_ABSPATH tag should be the absolute path to the directory where the -# documentation is located. If left blank the directory on the local machine -# will be used. - -DOC_ABSPATH = - -# The BIN_ABSPATH tag must point to the directory where the doxysearch binary -# is installed. - -BIN_ABSPATH = /usr/local/bin/ - -# The EXT_DOC_PATHS tag can be used to specify one or more paths to -# documentation generated for other projects. This allows doxysearch to search -# the documentation for these projects as well. - -EXT_DOC_PATHS = diff --git a/fastlib/src/vespa/fastlib/.gitignore b/fastlib/src/vespa/fastlib/.gitignore deleted file mode 100644 index 0e56cf2f8c1..00000000000 --- a/fastlib/src/vespa/fastlib/.gitignore +++ /dev/null @@ -1 +0,0 @@ -config.h diff --git a/fastlib/src/vespa/fastlib/testsuite/.gitignore b/fastlib/src/vespa/fastlib/testsuite/.gitignore deleted file mode 100644 index 3c74a910ba1..00000000000 --- a/fastlib/src/vespa/fastlib/testsuite/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -*.So -*.a -*.elc -*.ilk -*.lib -*.o -*.obj -*.pdb -.cvsignore -.depend -.pure -Debug -Makefile -SunWS_cache diff --git a/fastlib/src/vespa/fastlib/testsuite/CMakeLists.txt b/fastlib/src/vespa/fastlib/testsuite/CMakeLists.txt deleted file mode 100644 index 5fe13b21fb9..00000000000 --- a/fastlib/src/vespa/fastlib/testsuite/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(fastlib_fast_testsuite - SOURCES - suite.cpp - test.cpp - INSTALL lib64 - DEPENDS -) diff --git a/fastlib/src/vespa/fastlib/testsuite/cpptest.el b/fastlib/src/vespa/fastlib/testsuite/cpptest.el deleted file mode 100644 index 31c0906d576..00000000000 --- a/fastlib/src/vespa/fastlib/testsuite/cpptest.el +++ /dev/null @@ -1,2526 +0,0 @@ -;; Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -;; cpptest.el --- C++ unit test support for c++-mode - -;; $Revision: 1.179 $ $Date: 2004-02-17 17:01:15 $ - -;; Author: Nils Sandøy <nils.sandoy@fast.no> - -;; Keywords: C++, tools -;; -;; This file is not part of Emacs -;; -;; Utility functions for unit testing of C++ classes as an addition to the -;; CC mode. - -;; To use this functionality, put the following code in your .emacs file -;; -;; (setq load-path (nconc '( "<TESTSUITE path>" )load-path)) -;; (require 'cpptest) -;; -;; If you desire a different function/frame-key binding (default -;; f4-f8) you should setq the cppt-xxxx-key variables in your .emacs -;; file as well. - -;; This code will look for a 'testproject.el' file in the source code -;; directory, which will be loaded after all variables are -;; defined. This file should be used to set module/project specific -;; variables, like templates and cppt-test-dir. See the sample -;; testproject.el file in this directory. - -;; Notes & issues: -;; -;; The cppt-fund-public-methods function still has some problems with -;; inline methods, most often resulting in the missinterpretation of -;; parts of the inlined method body as method declarations. -;; -;; If you have more than one class declared in the same header file, -;; you might run into problems. Caveat emptor! -;; -;; This code employs some features that are only supported by emacs -;; 21.x and later. Most notably hashes and the push pop methods. -;; -;; I also recommend that you byte-compile this file, to make it run -;; faster. - - -(require 'cppttemplates) - -;; ------------------------------------------------------------------------ -;; User configurable variables -;; ------------------------------------------------------------------------ - -(defvar cppt-use-fastlib-debug-p nil - "*Flag indicating whether to use the fastlib/util/debug.h utility. -This requires fastlib 1.6.2.2+.") - -(defvar cppt-source-location nil - "This variable will be set by cppt-find-builddir to the current source root. -This is to allow cppt-find-srcdir to replace a build path with the original -source path") - -;;(defvar cppt-relative-compile '("~/fbuild/official" -;; "~/fbuild/build/redhat7.3-i686/statusserver-0.0-with-stlport-000") -(defvar cppt-relative-compile nil - "*Substitution to make to convert source directory to build directory. -argument should be nil or a list with two elements, the first element being the -directory prefix to match and the second element the value to replace with. -A value of nil means no conversion") - -(defvar cppt-relative-compile-versions nil - "*Set this to t if the source directory has a versionnumber path element - e.g. -it is on the form <packagename>/nn.nn[.nn]/ (typically 0.0 for development) but -there is no such corresponding element in the build tree (fbuild typical situation). -This path element will then be removed when constructing the build path.") - -(defvar cppt-pretest-target nil - "*Make target that is invoked before each test run") - -(defvar cppt-extra-make-targets nil - "*Extra make targets inserted in the CUSTOMMAKE part of the -fastos.project file in the test directory. This is often used in -conjunction with cppt-pretest-target") - -(defvar cppt-doc-author-p t - "*Should the author name be used in documentation? 'nil' if not.") - -(defvar cppt-use-underscore-p nil - "*If non-nil, use an underscore based naming scheme for methods and -classes. When nil, use uppercase to separate words.") - -(defvar cppt-pretest "$(ECHO_CMD)" - "*Commands to be inserted for PRE_TEST in fastos.project. These -commands will be run before the test executable, and in the same -shell. Typically you'll set your LD_LIBRARY_PATH here. If you use more -than one command, enclose them in a set of curly brackets {} so that -they all execute as a single command.") - -(defvar cppt-posttest "$(ECHO_CMD)" - "*Commands to be inserted for POST_TEST in fastos.project. These -commands will be run before the test executable, and in the same -shell. If you use more than one command, enclose them in a set of -curly brackets {} so that they all execute as a single command.") - -(defvar cppt-test-dir "test" - "*Name of the test directory. This is a subdirectory of the -sourcecode directory.") - -(defvar cppt-LD_LIBRARY_PATH nil - "*This will be prepended to LD_LIBRARY_PATH when executing test binaries") - -(defvar cppt-use-function-keys-flag t - "*Should cppt set function keys? This only affects C++ mode. -If you want to turn this off you must set this variable, before the -(require \'cpptest) statement.") - -(defvar cppt-toggle-header-key [f4] - "*Shortcut key to execute the \\[cppt-toggle-header-src] method. -This only affects C++ mode.") - -(defvar cppt-toggle-interface-key [S-f4] - "*Shortcut key to execute the \\[cppt-toggle-interface-headers] method. -This only affects C++ mode.") - -(defvar cppt-toggle-header-method-key [C-f4] - "*Shortcut key to execute the \\[cppt-toggle-header-src-method] method. -This only affects C++ mode.") - -(defvar cppt-find-other-file-key [C-S-f4] - "*Shortcut key to execute the \\[ff-find-other-file] method. -This only affects C++ mode.") - -(defvar cppt-switch-code-test-key [f5] - "*Shortcut key to execute the \\[cppt-switch-code-test] method. -This only affects C++ mode.") - -(defvar cppt-switch-code-test-method-key [S-C-f5] - "*Shortcut key to execute the \\[cppt-switch-code-test-method] method. -This only affects C++ mode.") - -(defvar cppt-verify-test-methods-key [S-f5] - "*Shortcut key to execute the \\[cppt-verify-test-methods] method. -This only affects C++ mode.") - -(defvar cppt-new-test-method-key [C-f5] - "*Shortcut key to execute the \\[cppt-new-test-method] method. -This only affects C++ mode.") - -(defvar cppt-run-test-key [f6] - "*Shortcut key to execute the \\[cppt-run-test] method. -This only affects C++ mode.") - -(defvar cppt-run-test-debug-key [C-f6] - "*Shortcut key to execute the \\[cppt-run-test-debug] method. -This only affects C++ mode.") - -(defvar cppt-run-single-test-key [S-f2] - "*Shortcut key to execute the \\[cppt-run-single-test] method. -This only affects C++ mode.") - -(defvar cppt-run-single-test-debug-key [C-S-f2] - "*Shortcut key to execute the \\[cppt-run-single-test-debug] method. -This only affects C++ mode.") - -(defvar cppt-run-test-purify-key [S-C-f6] - "*Shortcut key to execute the \\[cppt-run-test-purify] method. -This only affects C++ mode.") - -(defvar cppt-run-test-purify-debug-key [S-M-f6] - "*Shortcut key to execute the \\[cppt-run-test-purify-debug] method. -This only affects C++ mode.") - -(defvar cppt-test-suite-key [f7] - "*Shortcut key to execute the \\[cppt-test-suite] method. -This only affects C++ mode.") - -(defvar cppt-make-test-key [S-f7] - "*Shortcut key to execute the \\[cppt-make-test] method. -This only affects C++ mode.") - -(defvar cppt-suite-debug-key [C-f7] - "*Shortcut key to execute the \\[cppt-suite-debug] method. -This only affects C++ mode.") - -(defvar cppt-suite-purify-key [S-C-f7] - "*Shortcut key to execute the \\[cppt-suite-purify] method. -This only affects C++ mode.") - -(defvar cppt-suite-purify-debug-key [S-M-f7] - "*Shortcut key to execute the \\[cppt-suite-purify-debug] method. -This only affects C++ mode.") - -(defvar cppt-make-plain-key [f8] - "*Shortcut key to execute the \\[cppt-make-plain] method. -This only affects C++ mode.") - -(defvar cppt-make-build-key [S-f8] - "*Shortcut key to execute the \\[cppt-make-build] method. -This only affects C++ mode.") - -(defvar cppt-compile-key [C-f8] - "*Shortcut key to execute the \\[cppt-compile] method. -This only affects C++ mode.") - -(defvar cppt-insert-file-doc-key [S-f9] - "*Shortcut key to execute the \\[cppt-insert-file-doc] method. -This only affects C++ mode.") - -(defvar cppt-insert-class-doc-key [C-f9] - "*Shortcut key to execute the \\[cppt-insert-class-doc] method. -This only affects C++ mode.") - -(defvar cppt-insert-method-doc-key [f9] - "*Shortcut key to execute the \\[cppt-insert-method-doc] method. -This only affects C++ mode.") - -(defvar cppt-insert-copy-disallowed-key [C-f10] - "*Shortcut key to execute the \\[cppt-insert-copy-disallowed] method. -This only affects C++ mode.") - -(defvar cppt-indent-buffer-key [S-iso-lefttab] - "*Shortcut key to execute the \\[cppt-indent-buffer] method. -This only affects C++ mode.") - -(defvar cppt-make-plain-args "\-k " - "* Arguments for make when calling cppt-make-plain.") - -(defvar cppt-use-callback-p nil - "*With newer (3.*) versions of gcc the old callback did not - compile. Introduced a callback in fastlib/util/callback.h and used - this instead. To use it set cppt-use-callback to non nil.") - -;; ------------------------------------------------------------------------ -;; Initialization and include extentions -;; ------------------------------------------------------------------------ - -(defvar cppt-include-source-p "t" - "Should we include the original file being tested in the test -executable? Set this to nil if you are including this as part of a library") - -(defvar cppt-extra-source-includes - "#include \"../Log.h\"" - "String with extra include statements that will be inserted in all -source files. These will also be inserted into the Application and -Suite files. These include statements will be inserted for the -EXTRA_INCLUDES string in the cppt-test-class-app-template and -cppt-suite-template variables") - -(defvar cppt-extra-object-files - '("../Log") - "List of extra object files to be inlcuded in fastos.project for the -test and the suite applications.") - -(defvar cppt-extra-libraries - nil - "List of extra libraries to be inlcuded in fastos.project as -LIBS.") - -(defvar cppt-extra-external-libraries - '("fast") - "List of extra libraries to be inlcuded in fastos.project as -EXTERNALLIBS.") - -(defvar cppt-application-init-code - "FastOS_File::Delete(\"CLASS_NAMETest.log\"); - Fast_FileLogger filelogger(\"CLASS_NAMETest.log\"); - Fast_FileLogger stderrlogger(stderr); - LogDistributor().RegisterDestination(&filelogger, FLOG_ALL); - for (int i=0; i < _argc; ++i) { - if (strcmp(_argv[i], \"-d\") == 0) { - // Turn on debug mode (log to stderr) - LogDistributor().RegisterDestination(&stderrlogger, FLOG_ALL); - LOG_DBG(\"Running in debug mode\"); - } - }" - "Initialization code that is inserted for the INIT_CODE string in the -cppt-test-class-app-template variable") - -(defvar cppt-suite-init-code - "FastOS_File::Delete(\"SUITE.log\"); - Fast_FileLogger filelogger(\"SUITE.log\"); - Fast_FileLogger stderrlogger(stderr); - LogDistributor().RegisterDestination(&filelogger, FLOG_ALL); - for (int i=0; i < _argc; ++i) { - if (strcmp(_argv[i], \"-d\") == 0) { - // Turn on debug mode (log to stderr) - LogDistributor().RegisterDestination(&stderrlogger, FLOG_ALL); - LOG_DBG(\"Running in debug mode\"); - } - }" - "Initialization code that is inserted for the INIT_CODE string in the -cppt-suite-template variable") - -(defvar cppt-test-parameters - "" - "Parameter string to pass the the test executable when running it. -This is inserted where the 'CPPT_TEST_PARAMETERS' string is found in the -cppt-project-template.") - -(defvar cppt-test-dbflags - "-d" - "Parameter string to pass the the test executable when running it -in debug mode. This is inserted where the 'CPPT_TEST_DBFLAGS' string -is found in the cppt-project-template.") - - -;; ---------------------------------------------- -;; Internal utility methods -;; ---------------------------------------------- - -(defun cppt-is-header-file-p (file-name) - "Is file-name a header file ?" - (let* ((idx (string-match "[^.]+$" file-name)) - (ext (substring file-name idx))) - (string-match "h\\(pp\\)?$" ext))) - -(defun cppt-get-parent-dir (dir) - "Retrieve the parent directory of the given directory" -;; (message "Retrieveing parent directory of '%s'" dir) - (let ((parent (if (string-match "\\(.*/\\)[^/]" dir) - (match-string 1 dir) - "/"))) -;; (message "Got parent '%s'" parent) - parent)) - -(defun cppt-buffer-dir-name () - "Return the directory part of the file in the current buffer" - (let* ((cur-file (buffer-file-name)) - (cur-dir (if (string-match "\\(.*/\\)[^/]+$" cur-file) - (match-string 1 cur-file) - "/"))) -;; (message "Current file: '%s'" cur-file) -;; (message "Current directory: '%s'" cur-dir) - cur-dir)) - -(defun cppt-test-method-name (method-name) - "Prefix the named method with test according to the chose naming convention" - (if cppt-use-underscore-p - (concat "test_" method-name) - (concat "test" (cppt-upcase-first-letter method-name)))) - -(defun cppt-replace-token (token replacement) - "Do a buffer wide search replace with a fill-paragraph after each -match" - (let ((old-case-fold case-fold-search)) - (goto-char (point-min)) - (setq case-fold-search nil) - (while (search-forward-regexp token nil t) - (replace-match replacement t t) - (if (string= mode-name "C++") - (c-fill-paragraph))) - (setq case-fold-search old-case-fold))) - -(defun cppt-default-method-name (&optional include-test-prefix-p) - "Make a guess at a default method-name entry. This guess is based -on the text surrounding the cursor." - (let* ((is-header-p (cppt-is-header-file-p buffer-file-name)) - (cw (current-word)) - (word (if (or include-test-prefix-p - (string-match "^test_?\\(.*\\)" cw)) - (match-string 1 cw) cw)) - (regexp-stub "^\\s-*\\(?:[^ (]+\\s-+\\)*%s%s[*&]?\\([^ ()]+\\)\\s-*(\\s-*\\([^(){};]*\\)\\s-*\\(?:throw\\s-*([^)]*)\\)?\\(?:)[^(){};]*[;{]\\|,\\s-*$\\|)\\s-*$\\)") - (regexp-header - (format regexp-stub "\\s-*" - ;; Ignore or include test prefix in method name? - (if include-test-prefix-p "" "\\(?:test_?\\)?"))) - (regexp-src - (format regexp-stub "\\s-*[^ (]+\\s-*::\\s-*" - ;; Ignore or include test prefix in method name? - (if include-test-prefix-p "" "\\(?:test_?\\)?"))) - (regexp (if is-header-p regexp-header regexp-src))) - (save-excursion - (save-restriction - (end-of-line) - (if (not is-header-p) - (c-end-of-statement)) - (end-of-line) - (if (or (search-backward-regexp regexp nil t) - (and (not is-header-p) - ;; Try searching for namespace methods (without ::) - (search-backward-regexp regexp-header nil t))) - (let ((method-name (match-string 1)) - (parameters (match-string 2))) - (message - "Found method declaration for method '%s' with parameters '%s'" - method-name parameters) - (if (string-match "^operator\\([^_].*\\)$" method-name) - (format - "operator%s%s" - (if cppt-use-underscore-p "_" "") - (cppt-get-operator-name - (match-string 1 method-name) parameters)) - method-name)) - word))))) - -(defun cppt-print-default-method-name () - "Print default method name." - (interactive) - (message "Default method name '%s'" (cppt-default-method-name))) - -(defun cppt-upcase-first-letter (str) - "Upcase the first letter of the string argument." - (concat (upcase (substring str 0 1)) (substring str 1))) - -(defun cppt-list-contains-p (lst elem) - "Return t if the list contains element elem." - (or (equal elem (car lst)) - (and lst (cppt-list-contains-p (cdr lst) elem)))) - -(defun cppt-get-src-dir () - "Return the full path of the source directory" - (let* ((reg-ex (format "\\(.*\\)%s/?$" cppt-test-dir)) - (cur-dir (cppt-buffer-dir-name)) - (src-dir (if (string-match reg-ex cur-dir) - (match-string 1 cur-dir) - cur-dir))) - src-dir)) - -(defun cppt-get-test-dir () - "Return the full path of the test directory" - (let* ((reg-ex (format "%s/?$" cppt-test-dir)) - (cur-dir (cppt-buffer-dir-name)) - (test-dir - (if (string-match reg-ex cur-dir) - cur-dir - (format "%s%s/" cur-dir cppt-test-dir)))) - test-dir)) - -(defun cppt-replace-user-name () - "Got point-min and search and replace the USER_NAME string with the -full name of the current user. If the cppt-doc-author-p is nil, the -whole line containing the USER_NAME tag is removed" - (goto-char (point-min)) - (if cppt-doc-author-p - (replace-string "USER_NAME" (user-full-name)) - ;; Remove line with ' * @author USER_NAME' - (while (re-search-forward - "^.*USER_NAME[ \n\r*]*\n\\(\\s-*\\*?\\s-*[^ \n\r]\\)" nil t) - (message "Removing USER_NAME") - (replace-match "\\1" nil nil)))) - -(defun cppt-insert-template (template) - "Insert the given template at current, point and replace common -key words foun in the text. Point at sompletion is at the end of the -inserted text." - (let ((start-point (point)) - (end-point) - (file-name (cppt-strip-path buffer-file-name))) - ;; Insert the template boilerplate - (insert template) - (setq end-point (point)) - (save-restriction - ;; Substitute tags - (narrow-to-region start-point end-point) - (cppt-replace-user-name) - (goto-char (point-min)) - (let ((creation-date nil)) - (while (search-forward "CREATION_DATE" nil t) - (goto-char (match-beginning 0)) - (if (not creation-date) - (setq creation-date (cppt-get-creation-date))) - (message "Replacing CREATION_DATE with '%s'" creation-date) - (replace-string "CREATION_DATE" creation-date))) - (goto-char (point-min)) - (replace-string "YEAR" (format-time-string "%Y")) - (goto-char (point-min)) - (replace-string "FILE_NAME" file-name) - (goto-char (point-min)) - ;; Clear CVS tags - (goto-char (point-min)) - (replace-string "CVS_TAG" "\$\Id\: \$") - (goto-char end-point)))) - -(defun cppt-build-test-method-name (public-method test-methods) - "Build a name for the test method for the named public method. -Append a number to the name if it already exists in the test-method list." - (let ((test-method - (if cppt-use-underscore-p - (concat "test_" public-method) - (concat "test" (cppt-upcase-first-letter public-method)))) - (x)) - (setq x 0) - (while (cppt-list-contains-p test-methods test-method) - (progn - (setq x (+ x 1)) - (setq test-method - (if cppt-use-underscore-p - (format "test_%s%d" public-method x) - (format "test%s%d" - (cppt-upcase-first-letter public-method) x))))) - test-method)) - -(defun cppt-build-suite-name (module-name) - "Build the test suite name for the named module" - (if cppt-use-underscore-p - (concat - (replace-regexp-in-string "_" "" module-name) - "testsuite") - ;; (concat module-name "_test_suite") - (concat (cppt-upcase-first-letter module-name) "TestSuite"))) - -(defun cppt-edit-test (src-file-name dir-name) - "Open the test file for the named file. -Creates a new template for the class if no test file exists." - (let* ((test-file-name - (if (string-match (format "%s/?$" cppt-test-dir) dir-name) - (format "%s/%s%s.cpp" - dir-name src-file-name - (if cppt-use-underscore-p "test" "Test")) - (format "%s%s/%s%s.cpp" - dir-name cppt-test-dir src-file-name - (if cppt-use-underscore-p "test" "Test"))))) -;; (message "Looking for test source file '%s'" test-file-name) - (if (file-exists-p test-file-name) - (find-file test-file-name) - (if (y-or-n-p (format "Add tests for file '%s.h'? " src-file-name)) - (progn - ;; Open the header file for the current buffer before building - ;; a new test file for it - (find-file (cppt-find-header-file-name buffer-file-name)) - (cppt-create-test-file dir-name - src-file-name - test-file-name - (cppt-find-class-name) - (cppt-find-module-name) - (cppt-find-public-method-names)) - ;; Switch back to the new buffer - (find-file test-file-name)))))) - -(defun cppt-create-test-file (dir-name - src-file-name - test-file-name - class-name - module-name - public-methods) - "Create a new test file for the class. -Also add the file to the module test file, alternatively create the module -test file if it does not already exist." - ;; Check whether the target directory exists, create it if not - (let* ((test-dir-name (format "%s/%s" dir-name cppt-test-dir))) - (unless (file-directory-p test-dir-name) - (if (file-exists-p test-dir-name) - (error "Cannot create test directory '%s'. -A file with the same name exists already." test-dir-name) - (progn - (message "Creating test dir '%s'." test-dir-name) - (make-directory test-dir-name) - ;; Create a default .cvsignore file in the new directory - (find-file (format "%s/.cvsignore" test-dir-name)) - (insert cppt-cvsignore-template) - (save-buffer))))) - ;; First build the header file for the new test class - (let ((header-file-name (cppt-find-header-file-name test-file-name))) -;; (message "Creating new test header file %s" header-file-name) - (find-file header-file-name) - ;; (message "Header file created") - (goto-char (point-min)) - ;; (message "Inserting template header body") - (cppt-insert-template cppt-test-header-template) - - (goto-char (point-min)) - (cppt-replace-token "INCLUDE_CALLBACK" - (if cppt-use-callback-p - "#include <fastlib/util/callback.h>" "")) - (goto-char (point-min)) - (message "cpp use callback %s" cppt-use-callback-p) - (cppt-replace-token "CALLBACK_TYPEDEF" - (if cppt-use-callback-p - (concat "typedef fast::util::callback<" - class-name - "_test> tst_method_ptr") - "typedef void(CLASS_NAMETest::* tst_method_ptr) ()")) - - ;; Convert to underscore based naming - (goto-char (point-min)) - ;; (replace-string "CLASS_NAMETest" "CLASS_NAME_test") - (if cppt-use-underscore-p - (cppt-replace-token "CLASS_NAMETest" "CLASS_NAME_test")) - - - (goto-char (point-min)) - (replace-string - "CLASS_NAME.h" - (format "%s.h" (replace-regexp-in-string "_" "" class-name))) - ;; Insert class name - (cppt-replace-token "CLASS_NAME" class-name) - (cppt-insert-test-method-declarations public-methods) - ;; Fill the first paragraph in file header - (goto-char (point-min)) - (search-forward-regexp "[^\n\r /*]") - (c-fill-paragraph) - (save-buffer)) - ;; Build .cpp file for the new application for running the test class - (let ((app-file-name (cppt-find-app-file-name test-file-name))) -;; (message "Creating new test file application %s" app-file-name) - (find-file app-file-name) - (goto-char (point-min)) - (cppt-insert-template cppt-test-class-app-template) - ;; Convert to underscore based naming - (if cppt-use-underscore-p - (progn - (goto-char (point-min)) - (replace-string "CLASS_NAMETestApp" "CLASS_NAME_test_app") - (goto-char (point-min)) - (replace-string "CLASS_NAMETest" "CLASS_NAME_test"))) - (goto-char (point-min)) - (replace-string "TEST_HEADER" - (format "%s%s" - (replace-regexp-in-string "_" "" class-name) - (if cppt-use-underscore-p "test" "Test"))) - ;; DEBUG replace - (goto-char (point-min)) - (cppt-replace-token "DEBUG_INCLUDES" - (if cppt-use-fastlib-debug-p - "#include <fastlib/util/debug.h>" - "")) - (cppt-replace-token "DEBUG_INIT" - (if cppt-use-fastlib-debug-p - "INIT_DEBUG(_argc, _argv);" - "")) - ;; Insert extra include files - (goto-char (point-min)) - (replace-string "EXTRA_INCLUDES" cppt-extra-source-includes) - ;; Insert initialization code - (goto-char (point-min)) - (replace-string "INIT_CODE" - ;; Format argv & argc to _argv and _argc - (replace-regexp-in-string - "\\([^_]\\)argc" "\\1_argc" - (replace-regexp-in-string - "\\([^_]\\)argv" "\\1_argv" - cppt-application-init-code))) - ;; Set the suite name - (cppt-replace-token "MODULE" module-name) - (cppt-replace-token "CLASS_NAME" class-name) - ;; Fill the first paragraph in file header - (goto-char (point-min)) - (search-forward-regexp "[^\n\r /*]") - (c-fill-paragraph) - (save-buffer)) - ;; Build the .cpp file for the new test class -;; (message "Creating new test file %s" test-file-name) - (find-file test-file-name) - (goto-char 1) - (cppt-insert-template cppt-test-body-template) - ;; DEBUG replace - (goto-char (point-min)) - (cppt-replace-token "DEBUG_INCLUDES" - (if cppt-use-fastlib-debug-p - "#include <fastlib/util/debug.h>" - "")) - (if (not cppt-use-fastlib-debug-p) - (cppt-replace-token "DEBUG(.*);" "")) - - ;; callback call replace - (goto-char (point-min)) - (cppt-replace-token "CALL_CALLBACK" - (if cppt-use-callback-p - "itr->second()" - "(this->*itr->second)()")) - ;; Convert to underscore based naming - (goto-char (point-min)) - (and cppt-use-underscore-p - (goto-char (point-min)) - (replace-string "CLASS_NAMETest" "CLASS_NAME_test")) - (goto-char (point-min)) - (replace-string "TEST_HEADER" - (format "%s%s" - (replace-regexp-in-string "_" "" class-name) - (if cppt-use-underscore-p "test" "Test"))) - ;; Insert extra include files - (goto-char (point-min)) - (replace-string "EXTRA_INCLUDES" cppt-extra-source-includes) - (cppt-replace-token "MODULE" module-name) - (cppt-replace-token "CLASS_NAME" class-name) - ;; Fill the first paragraph in file header - (goto-char (point-min)) - (search-forward-regexp "[^\n\r /*]") - (c-fill-paragraph) - (cppt-insert-test-methods public-methods class-name) - (cppt-insert-run-method public-methods class-name) - (save-buffer) - (if (< (count-windows) 2) (split-window-vertically)) - (other-window 1) - (cppt-add-class-test dir-name - test-file-name - src-file-name - class-name - module-name) - (other-window -1)) - -(defun cppt-find-header-file-name (file-name) - "Find the name of the header file for the named file." - (let ((header-file-name - (concat - (substring file-name 0 - (string-match "[^.]+$" file-name)) "h"))) - ;; (message "Header file name: %s" header-file-name) - header-file-name)) - -(defun cppt-find-app-file-name (file-name) - "Build a name for an application file from the given file name" - (let ((app-file-name - (concat - (substring file-name 0 - (string-match "\\.[^.]+$" file-name)) - (if cppt-use-underscore-p "app.cpp" "App.cpp")))) - ;; (message "App file name: %s" app-file-name) - app-file-name)) - -(defun cppt-create-test-suite (file-name suite-name module-name) - "Create a new test suite for the named module." -;; (message "Creating test suite file %s" file-name) - (find-file file-name) - (goto-char (point-min)) - (cppt-insert-template cppt-suite-template) - ;; DEBUG replace - (goto-char (point-min)) - (cppt-replace-token "DEBUG_INCLUDES" - (if cppt-use-fastlib-debug-p - "#include <fastlib/util/debug.h>" - "")) - (cppt-replace-token "DEBUG_INIT" - (if cppt-use-fastlib-debug-p - "INIT_DEBUG(_argc, _argv);" - "")) - ;; Insert extra include files - (goto-char (point-min)) - (replace-string "EXTRA_INCLUDES" cppt-extra-source-includes) - ;; Insert initialization code - (goto-char (point-min)) - (replace-string "INIT_CODE" - ;; Format argv & argc to _argv and _argc - (replace-regexp-in-string - "\\([^_]\\)argc" "\\1_argc" - (replace-regexp-in-string - "\\([^_]\\)argv" "\\1_argv" - cppt-suite-init-code))) - ;; Convert to underscare naming - (and cppt-use-underscore-p - (goto-char (point-min)) - (replace-string "SUITEApp" "SUITE_app")) - (cppt-replace-token "SUITE" suite-name) - (cppt-replace-token "MODULE" module-name) - (save-buffer)) - -(defun cppt-insert-libs (libs) - "Insert one line for each of the items in the libs list" - ;; (message "cppt-insert-libs: %s, num %d" (car libs) (length libs)) - (if libs - (progn - (insert (format "%s\n" (car libs))) - (cppt-insert-libs (cdr libs))) - ;; Insert extra libs, if any - (if cppt-extra-libraries - (let ((lib (car cppt-extra-libraries)) - (rest (cdr cppt-extra-libraries))) - (insert "LIBS ") - (while lib - (progn - (insert lib " ") - (setq lib (car rest)) - (setq rest (cdr rest)))) - (insert "\n"))) - ;; Insert extra external libs, if any - (if cppt-extra-external-libraries - (let ((lib (car cppt-extra-external-libraries)) - (rest (cdr cppt-extra-external-libraries))) - (insert "EXTERNALLIBS ") - (while lib - (progn - (insert lib " ") - (setq lib (car rest)) - (setq rest (cdr rest)))) - (insert "\n"))))) - -(defun cppt-find-project-libs (file-name) - "Build a list with all LIBS or EXTERNALLIBS found in the named project." - ;; (message "Searching for library dependencies within %s" file-name) - (find-file file-name) - (goto-char (point-min)) - (let ((libs)) - (while (search-forward-regexp - "^\\(EXTERNALLIBS\\|LIBS\\).*$" (point-max) t) - (let ((lib-str (match-string 0))) - (unless (cppt-list-contains-p libs lib-str) - (setq libs (cons lib-str libs)) - ;; (message "Found '%s', num libs: %d" (car libs) (length libs)) - ))) - (kill-buffer nil) - libs)) - -(defun cppt-create-project (file-name suite-name) - "Create a new fastos.project file" -;; (message "Creating project file '%s'" file-name) - (find-file file-name) - (goto-char (point-min)) - (cppt-insert-template cppt-project-template) - (if cppt-use-underscore-p - (progn - (goto-char (point-min)) - (replace-string "%Test" "%test") - (goto-char (point-min)) - (replace-string "%testSuite" "%testsuite"))) - (goto-char (point-min)) - (replace-string "EXTRA_MAKE_TARGETS" (or cppt-extra-make-targets "")) - (goto-char (point-min)) - (replace-string "_PRETEST_TARGET_" - (if cppt-pretest-target cppt-pretest-target "")) - (goto-char (point-min)) - (replace-string "_PRE_TEST_" cppt-pretest) - (goto-char (point-min)) - (replace-string "_POST_TEST_" cppt-posttest) - (goto-char (point-min)) - (replace-string "SUITE" suite-name) - (goto-char (point-min)) - (replace-string "CPPT_TEST_PARAMETERS" cppt-test-parameters) - (goto-char (point-min)) - (replace-string "CPPT_TEST_DBFLAGS" cppt-test-dbflags)) - -(defun cppt-insert-test-method-declarations (public-methods &optional comment) - "Insert the declarations of the test methods for all methods -in the given list" - (let ((test-methods) - (test-method)) - (goto-char 1) - (re-search-forward "/[* \n]+Test methods[^/]+/\n" nil t) - (if public-methods - (while public-methods -;; (message "Creating test method declaration for '%s'" -;; (car public-methods)) - (setq test-method - (cppt-build-test-method-name - (car public-methods) test-methods)) - (setq test-methods (cons test-method test-methods)) -;; (message "Inserting declaration of test method '%s'" test-method) - (insert - (format - "\n /**\n * %s\n */\n void %s();\n\n" - (if comment - comment - (format "Test of the '%s' method." (car public-methods))) - test-method)) - (setq public-methods (cdr public-methods))) - ;; No public methods for the class beeing tested - (insert (format " - /** - * This is just a dummy test method to indicate that there are no tests - * for this class - */ - void testTest();\n\n"))))) - -(defun cppt-insert-test-method (test-method class-name public-method - &optional comment) - ;; Verify that the NOTEST macro exists in the file - (save-excursion - (save-restriction - (goto-char (point-min)) - (if (not (search-forward "NOTEST" nil t)) - (progn - (message "Inserting NOTETST macro definition") - (search-forward-regexp "\n/\\*+\n\\s-*\\*\\s-*Test methods" nil t) - (goto-char (match-beginning 0)) - (insert cppt-notest-template))))) - (message "Creating test method '%s' for class '%s'" test-method class-name) - (insert (format " -/** - * %s - */ -void %s%s::%s() { - NOTEST(\"%s\"\); -}\n\n" - (if comment - comment - (format "Test of the '%s' method." public-method)) - class-name - (if (string-match "_?[tT]est$" class-name) - "" - (if cppt-use-underscore-p "_test" "Test")) - test-method - public-method))) - -(defun cppt-insert-test-methods (public-methods class-name &optional comment) - "Insert test methods for all methods in the given list" - (message "Inserting test methods for class %s" class-name) - (let ((test-methods)) - (goto-char 1) - (re-search-forward "/[* \n]+Test methods[^/]+/\n" nil t) - (if public-methods - (while public-methods - (let* ((public-method - (if (listp public-methods) - (car public-methods) - public-methods)) - (test-method - (cppt-build-test-method-name public-method test-methods))) - (setq test-methods (cons test-method test-methods)) - (cppt-insert-test-method test-method - class-name - public-method - comment) - (setq public-methods (cdr public-methods)))) - (progn - (message "No public methods in class '%s'. %s" - "Inserting default test method" class-name) - (insert (format " -/** - * This is just a dummy test method to indicate that there are no tests - * for this class - */ -void %sTest::testTest() { - _fail(\"No tests implemented for class %s\"); -}\n" class-name class-name)))))) - -(defun cppt-insert-run-method (public-methods class-name) - "Insert a run method to execute all test methods in the given list" - (let ((test-methods)) - (goto-char 1) - ;; Try to add to init method first - (if (re-search-forward "::init() {[ \t]*\n" nil t) - (progn - (if public-methods - (while public-methods - (let* ((public-method (if (listp public-methods) - (car public-methods) - public-methods)) - (test-method (cppt-build-test-method-name - public-method test-methods))) - (setq test-methods (cons test-method test-methods)) - (message - "Adding test method '%s' to method container for class '%s'" - test-method class-name) - (insert (format - (concat " test_methods_.\n insert(MethodContainer::value_type\n (std::string(\"%s\"), \n " - (if cppt-use-callback-p - "fast::util::make_callback(*this, &%s%s::%s)));\n" - "&%s%s::%s));\n")) - test-method - class-name - (if (string-match "_?[tT]est$" class-name) - "" - (if cppt-use-underscore-p "_test" "Test")) - test-method)) - (setq public-methods (cdr public-methods)))) - (insert (format - (concat "\n test_methods_[\"test\"] = " - (if cppt-use-callback-p - "fast::util::make_callback(*this, &%s%s::test);\n" - "&%s%s::test;\n")) - class-name - (if (string-match "_?[tT]est$" class-name) - "" - (if cppt-use-underscore-p "_test" "Test")))))) - ;; Use the old way of putting everything in Run instead - (re-search-forward "::Run() {[ \t]*\n" nil t) - (if public-methods - (while public-methods - (let* ((public-method (if (listp public-methods) - (car public-methods) - public-methods)) - (test-method (cppt-build-test-method-name public-method - test-methods))) - (setq test-methods (cons test-method test-methods)) - (message - "Creating run statement for test method %s" test-method) - (insert (format " - if (setUp()) { - %s(); - tearDown(); - }\n" test-method)) - (setq public-methods (cdr public-methods)))) - (insert " - if (setUp()) { - testTest(); - tearDown(); - }\n"))))) - -(defun cppt-insert-extra-objs (extra-objs) - "Add extra OBJS ... descriptions to the current buffer" - (if (car extra-objs) - (progn - (insert (format "OBJS %s\n" (car extra-objs))) - (cppt-insert-extra-objs (cdr extra-objs))))) - -(defun cppt-add-class-test (dir-name - test-file-name - src-file-name - class-name - module-name) - "Add class test to the module test suite and the fastos.project file." - (let* ((src-dir (cppt-get-src-dir)) - (test-name (cppt-strip-file-name test-file-name)) - (src-name (cppt-strip-file-name src-file-name)) - (suite-name (cppt-build-suite-name module-name)) - (file-name (format "%s%s/%s.cpp" - dir-name cppt-test-dir suite-name)) - (project-file-name - (format "%s%s/fastos.project" dir-name cppt-test-dir)) - (test-class-name (concat class-name (if cppt-use-underscore-p - "_test" "Test")))) - ;; Create or open the test suite source file - (if (not (file-exists-p file-name)) - (cppt-create-test-suite file-name suite-name module-name) - (find-file file-name)) - (goto-char (point-min)) - (if (not (search-forward "All tests for this module" nil t)) - (error "Malformed suite template") - (progn -;; (message "Adding %s to suite %s" test-class-name suite-name) - (let ((start (point))) - (insert (format "\n AddTest(new %s());" test-class-name )) - ;; Sort the order of the tests - (sort-lines nil start (search-forward "}"))) - (goto-char (point-min)) - ;; Go to the end of the initial file comment section - (search-forward "*/\n") - (insert (format "#include \"%s.h\"\n" test-name)) - (save-buffer))) - (let ((libs (cppt-find-project-libs - (format "%s/fastos.project" dir-name)))) - (setq suite-name (replace-regexp-in-string "_" "" suite-name)) - ;; Create or open the fastos.project file - (if (not (file-exists-p project-file-name)) - (cppt-create-project project-file-name suite-name) - (find-file project-file-name)) - (goto-char (point-min)) - (if (search-forward (concat "APPLICATION " suite-name) - (point-max) t) - ;; Add class to suite dependencies - (insert (format "\nOBJS %s%s" test-name - (if cppt-include-source-p - (format "\nOBJS ../%s" src-name) - ""))) - (progn - ;; Add suite application, since not present - (goto-char (point-min)) -;; (message "Creating APPLICATION for %s" suite-name) - (insert (format "APPLICATION %s\nOBJS %s\nOBJS %s%s\n" - suite-name suite-name test-name - (if cppt-include-source-p - (format "\nOBJS ../%s" src-name) - ""))) - (cppt-insert-extra-objs cppt-extra-object-files) - (cppt-insert-libs libs))) - ;; Create separate application for the class - (goto-char (point-min)) -;; (message "Creating APPLICATION %s" test-name) - (insert (format "APPLICATION %s\nOBJS %s\nOBJS %s%s\n" - test-name test-name - (format "%s%s" test-name - (if cppt-use-underscore-p "app" "App")) - (if cppt-include-source-p - (format "\nOBJS ../%s" src-name) - ""))) - (cppt-insert-extra-objs cppt-extra-object-files) - (cppt-insert-libs libs) - (insert "\n\n") - (save-buffer) - ;; Generate the makefile by switching back to the source code, - ;; and running make from that directory - (find-file (format "%s/%s.h" src-dir src-file-name)) -;; (message "Generating makefile") - (compile "make makefiles")) - ;; Switch back to the suite file - (find-file file-name))) - -(defun cppt-set-lib-path (lib-path envir) - "Prepend lib-path to the LD_LIBRARY_PATH environent variable." - (let ((new-envir)) - (while (and (car envir) - (not (equal t (compare-strings - (car envir) 0 15 "LD_LIBRARY_PATH" 0 15)))) - (push (pop envir) new-envir)) - (if (car envir) - (progn - ;; Modify existing LD_LIBRARY_PATH entry - (push (format "LD_LIBRARY_PATH=%s:%s" - lib-path (substring (car envir) 16)) - new-envir) - (pop envir) - ;; Add the rest of the environment - (while (car envir) - (push (pop envir) new-envir))) - ;; Add new LD_LIBRARY_PATH entry - (push (format "LD_LIBRARY_PATH=%s" lib-path) new-envir)) - new-envir)) - -(defun cppt-verify-method (method-name - class-name - test-header-file - test-src-file) - "Verify that the test class has a test method for the named method" - (let ((test-method - (if cppt-use-underscore-p - (concat "test_" method-name) - (concat "test" (cppt-upcase-first-letter method-name))))) - ;; Search through the header file to see if the test method exists - (find-file test-header-file) - (goto-char (point-min)) - (if (search-forward (concat test-method "(") nil t) - (message "Test method '%s' exists for method '%s'" - test-method method-name) - (if (not - (y-or-n-p (format "Add test for method '%s'? " method-name))) - (message "Skipping test method '%s'" test-method) - (message "Adding test method '%s'" test-method) - (cppt-insert-test-method-declarations (list method-name)) - (recenter nil) - (find-file test-src-file) - (cppt-insert-test-methods (list method-name) class-name) - (cppt-insert-run-method (list method-name) class-name) - ;; Search to the implementation of the latest method added - (goto-char (point-min)) - (search-forward (format "NOTEST(\"%s\")" method-name)) - (beginning-of-line) - (recenter nil))))) - -(defun cppt-find-class-name () - "Find the first class name of the current buffer. Will use the -name of the file instead if no class can be found" - (interactive) - (save-excursion - (save-restriction - (let ((class-name)) - (goto-char (point-min)) - (if (re-search-forward - "^\\s-*class\\s-+\\([^ \n\r\t;{]+\\)[^;]*{" nil t) - (setq class-name (match-string 1))) - (if class-name - (message "Found class '%s' in '%s'" class-name buffer-file-name) - (progn - (setq class-name (cppt-find-file-name-root)) - (message "No class in '%s', using '%s'" - buffer-file-name class-name))) - class-name)))) - -(defun cppt-get-path (file-name) - "Return just the path of the file-name up to, and including, the last /." - (if (string-match "\\(.*/\\)" file-name) - (match-string 1 file-name) - file-name)) - -(defun cppt-strip-path (file-name) - "Return the last part of the file-name without the leading path." - (let ((regexp ".*/\\(.*\\)")) - (if (string-match regexp file-name) - (match-string 1 file-name) - file-name))) - -(defun cppt-strip-file-name (file-name) - "Strip off leading path and any .h .cpp extension from the given file name" - (let* ((reg-ex ".*/\\(.*\\)") - (stripped-name file-name)) - ;; First strip off any extension - (if (string-match "\\(.*\\)\\..*$" file-name) - (setq stripped-name (match-string 1 file-name))) - (cppt-strip-path stripped-name))) - -(defun cppt-find-file-name-root () - "Find the file name of the current buffer without extension (.h | .cpp)." - (interactive) - (let* ((regexp "[^\\/]+$") - (file-name - (substring buffer-file-name - (string-match regexp buffer-file-name) - (- (string-match "[^.]+$" buffer-file-name) 1)))) - ;; (message "File name: %s" file-name) - file-name)) - -(defun cppt-find-module-name () - "Find the module name of the current buffer." - (interactive) - (save-excursion - (save-restriction - (let* ((dir-name (cppt-buffer-dir-name)) - (re (format "/%s/$" cppt-test-dir)) - (regexp1 "[^\\/]+\\/$") - (regexp2 "\\/$") - (idx (string-match re dir-name)) - (module-name)) - (if idx (setq dir-name (substring dir-name 0 (+ idx 1)))) - (setq module-name (substring dir-name - (string-match regexp1 dir-name) - (string-match regexp2 dir-name))) -;; (message "Module name: %s" module-name) - module-name)))) - -(defun cppt-get-operator-name (operator parameters) - "Find the textual name of the given operator" - (let ((opr-hash (make-hash-table :test 'equal))) - (puthash "+" "plus" opr-hash) - (puthash "-" "minus" opr-hash) - (puthash "*" "star" opr-hash) - (puthash "/" "divide" opr-hash) - (puthash "%" "mod" opr-hash) - (puthash "^" "hat" opr-hash) - (puthash "&" "bitand" opr-hash) - (puthash "|" "bitor" opr-hash) - (puthash "~" "tilde" opr-hash) - (puthash "!" "not" opr-hash) - (puthash "=" "assign" opr-hash) - (puthash "<" "less" opr-hash) - (puthash ">" "greater" opr-hash) - (puthash "++" "increment" opr-hash) - (puthash "--" "decrement" opr-hash) - (puthash "==" "equality" opr-hash) - (puthash "!=" "inequality" opr-hash) - (puthash "<=" "lessequal" opr-hash) - (puthash ">=" "greaterequal" opr-hash) - (puthash "+=" "plusassign" opr-hash) - (puthash "-=" "minusassign" opr-hash) - (puthash "*=" "starassign" opr-hash) - (puthash "/=" "slashassign" opr-hash) - (puthash "%=" "modassign" opr-hash) - (puthash "^=" "hatassign" opr-hash) - (puthash "&=" "andassign" opr-hash) - (puthash "|=" "orassign" opr-hash) - (puthash "<<" "leftshift" opr-hash) - (puthash ">>" "rightsift" opr-hash) - (puthash ">>=" "rightsiftassign" opr-hash) - (puthash "<<=" "leftshiftassign" opr-hash) - (puthash "&&" "and" opr-hash) - (puthash "||" "or" opr-hash) - (puthash "->*" "pointertomember" opr-hash) - (puthash "," "comma" opr-hash) - (puthash "->" "pointer" opr-hash) - (puthash "[]" "squarebracket" opr-hash) - (puthash "()" "parenthesis" opr-hash) - (puthash "new" "new" opr-hash) - (puthash "new[]" "newarray" opr-hash) - (puthash "delete" "delete" opr-hash) - (puthash "delete[]" "deletearray" opr-hash) - (let ((name (gethash operator opr-hash "unknown"))) - (if (or (string= name "increment") (string= name "decrement")) - (if (string= parameters "") - (concat "pre" name) - (concat "post" name)) - name)))) - -(defun cppt-get-operator (operator-name) - "Return a reg-exp for the named operator" - (let ((opr-hash (make-hash-table :test 'equal))) - (puthash "plus" "\\+\\s-*([^}{]+{" opr-hash) - (puthash "minus" "-\\s-*([^}{]+{" opr-hash) - (puthash "star" "\\*\\s-*([^}{]+{" opr-hash) - (puthash "divide" "/\\s-*([^}{]+{" opr-hash) - (puthash "mod" "%\\s-*([^}{]+{" opr-hash) - (puthash "hat" "\\^\\s-*([^}{]+{" opr-hash) - (puthash "bitand" "&\\s-*([^}{]+{" opr-hash) - (puthash "bitor" "|\\s-*([^}{]+{" opr-hash) - (puthash "tilde" "~\\s-*([^}{]+{" opr-hash) - (puthash "not" "!\\s-*([^}{]+{" opr-hash) - (puthash "assign" "=\\s-*([^}{]+{" opr-hash) - (puthash "less" "<\\s-*([^}{]+{" opr-hash) - (puthash "greater" ">\\s-*([^}{]+{" opr-hash) - (puthash "postincrement" "\\+\\+\\s-*(\\s-*int\\s-*)[^}{]*{" opr-hash) - (puthash "preincrement" "\\+\\+\\s-*(\\s-*)[^}{]*{" opr-hash) - (puthash "postdecrement" "--\\s-*(\\s-*int\\s-*)[^}{]*{" opr-hash) - (puthash "predecrement" "--\\s-*(\\s-*)[^}{]*{" opr-hash) - (puthash "equality" "==\\s-*([^}{]+{" opr-hash) - (puthash "inequality" "!=\\s-*([^}{]+{" opr-hash) - (puthash "lessequal" "<=\\s-*([^}{]+{" opr-hash) - (puthash "greaterequal" ">=\\s-*([^}{]+{" opr-hash) - (puthash "plusassign" "\\+=\\s-*([^}{]+{" opr-hash) - (puthash "minusassign" "-=\\s-*([^}{]+{" opr-hash) - (puthash "starassign" "\\*=\\s-*([^}{]+{" opr-hash) - (puthash "slashassign" "/=\\s-*([^}{]+{" opr-hash) - (puthash "modassign" "%=\\s-*([^}{]+{" opr-hash) - (puthash "hatassign" "\\^=\\s-*([^}{]+{" opr-hash) - (puthash "andassign" "&=\\s-*([^}{]+{" opr-hash) - (puthash "orassign" "|=\\s-*([^}{]+{" opr-hash) - (puthash "leftshift" "<<\\s-*([^}{]+{" opr-hash) - (puthash "rightsift" ">>\\s-*([^}{]+{" opr-hash) - (puthash "rightsiftassign" ">>=\\s-*([^}{]+{" opr-hash) - (puthash "leftshiftassign" "<<=\\s-*([^}{]+{" opr-hash) - (puthash "and" "&&\\s-*([^}{]+{" opr-hash) - (puthash "or" "||\\s-*([^}{]+{" opr-hash) - (puthash "pointertomember" "->*\\s-*([^}{]+{" opr-hash) - (puthash "comma" ",\\s-*([^}{]+{" opr-hash) - (puthash "pointer" "->\\s-*([^}{]+{" opr-hash) - (puthash "squarebracket" "\\[\\]\\s-*([^}{]+{" opr-hash) - (puthash "parenthesis" "()\\s-*([^}{]+{" opr-hash) - (puthash "new" "new\\s-*([^}{]+{" opr-hash) - (puthash "newarray" "new\\[\\]\\s-*([^}{]+{" opr-hash) - (puthash "delete" "delete\\s-*([^}{]+{" opr-hash) - (puthash "deletearray" "delete\\[\\]\\s-*([^}{]+{" opr-hash) - (concat "operator\\s-*" (gethash operator-name opr-hash "unknown")))) - -(defun cppt-skip-comments (end) - "Find the first line of non-comments" - ;; Skip // and /* comments - (goto-char - (if (re-search-forward "^\\s-*[^ \t\r\n/][^ \t\r\n/*].*" end t) - (match-beginning 0) - end)) - ;; Skip lines starting with * - (if (< (point) end) - (goto-char - (if (re-search-forward "^\\s-*[^ \t\n\r*].*" end t) - (match-beginning 0) - end)))) - -(defun cppt-find-methods (end-of-region &optional class-name) - "Find all method declarations btw current point and end-of-region" - (let ((methods) - (method) - (end (if end-of-region end-of-region (point-max)))) - (while (< (point) end) - (cppt-skip-comments end) - (if (< (point) end) - ;; Extract method name - (if (not (re-search-forward - "^\\s-+\\(\\([^ /:,{}();\t\n*]\\|[^:]::[^:]\\)+\\)\\(\\([^({},:;]\\|[^:]::[^:]\\)*[ *&\n]+\\)\\([^ *_~][^ :\n;(]*\\)\\s-*(\\s-*\\([^{};]*\\)\\s-*)[^;{/)]*[;{]" end t)) - (goto-char end) - ;; TODO, store the complete match, for documentation - ;; (message "Match #3 %s" (match-string 0)) - (goto-char (match-end 0)) - (let ((leadtok (match-string-no-properties 1)) - (operator-str (match-string-no-properties 3)) - (params (match-string-no-properties 6))) - (setq method (match-string-no-properties 5)) -;; (message "DEBUG: leadtok '%s'" leadtok) -;; (message "DEBUG: operator-str '%s'" operator-str) -;; (message "DEBUG: method '%s'" method) -;; (message "DEBUG: params '%s'" params) - (if (string-match "operator" operator-str) - (setq method (concat "operator" method))) - (if (and class-name (string-equal class-name method)) - (message "Skipping constructor %s" method) - (if (or (string-match "return" leadtok) - (string-match "return" operator-str)) - (message "Skipping return statement %s" method) - (if (string-equal "friend" leadtok) - (message "Skipping friendship declaration %s" - method) - ;; Handle overloaded operators - (if (string-match "operator\\(.*\\)$" method) - (setq method (format "operator_%s" - (cppt-get-operator-name - (match-string 1 method) - params)))) - (setq methods (cons method methods)) - (message "Found %s C++ method %s" - (if class-name "public" "free") method)))))))) - methods)) - -(defun cppt-find-public-method-names () - "Find all public method declarations in the current buffer." - (interactive) - (let ((public-methods) - (method) - (start-of-class) - (end-of-class) - (start-public) - (end-public) - (cur-point) - (class-name) - (indentation) - (type)) - (save-excursion - (save-restriction - (goto-char (point-min)) - (while (< (point) (point-max)) - (setq cur-point (point)) - ;; Find the start of the next class - ;; If class is not found, just move to the end of the file - (if (setq start-of-class - (re-search-forward - "^\\(\\s-*\\)\\(class\\|struct\\)\\s-+\\([^ \n;{]+\\)[^;{]+{" nil 1)) - (progn - (setq indentation (match-string 1)) - (setq type (match-string 2)) - (setq class-name (match-string 3)) - (message "Found class name '%s'" class-name))) - ;; Find all free method declarations before the class declaration - (message "Locating non-class (free) methods") - (goto-char cur-point) - (setq public-methods - (append public-methods (cppt-find-methods start-of-class))) - (if (not start-of-class) - (goto-char (point-max)) - (message "Locating public methods within class '%s'" class-name) - (goto-char start-of-class) - ;; Find }; indented the same way as the class declaration - (setq end-of-class - (or (re-search-forward (format"^%s};" indentation) nil t) - (point-max))) - (goto-char start-of-class) - ;; Loop across all public method declarations in the class - (while (and (setq start-public - (if (string-match "struct" type) - (progn - (setq type "") - start-of-class) - (re-search-forward "^\\s-*public\\s-*:" nil t))) - (< start-public end-of-class)) - (goto-char start-public) - (setq end-public - (or (re-search-forward - "^\\s-*\\(protected\\|private\\)\\s-*:" nil t) - end-of-class)) - (goto-char start-public) - (setq public-methods - (append public-methods - (cppt-find-methods end-public class-name)))) - (goto-char end-of-class))) - public-methods)))) - -(defun cppt-find-builddir (srcdir &optional test-file) - "Return the corresponding build directory for SRCDIR according to -cppt-relative-compile. If cppt-relative-compile is nil, just return SRCDIR" -;; (message "Resolving build directory for directory '%s'" srcdir) - (setq cppt-source-location srcdir) - (let ((build-dir (if cppt-relative-compile - (let* ((src-path (car cppt-relative-compile)) - (build-path (cadr cppt-relative-compile)) - (abbr-src (abbreviate-file-name srcdir))) - (if (string-match src-path abbr-src) - (let ((tmp-build (replace-match build-path t t abbr-src))) - (if (and cppt-relative-compile-versions - (string-match "/[0-9][0-9.]+[0-9]/" - tmp-build)) - (setq tmp-build (replace-match "/" t t tmp-build)) - tmp-build) - tmp-build))) - srcdir))) - (if (not build-dir) (setq build-dir srcdir)) -;; (message "Resolved build dir: '%s'" build-dir) - (if (not test-file) - (setq test-file "Makefile")) - (while (and (file-exists-p build-dir) - (not (file-exists-p (concat build-dir test-file))) - (not (string-equal build-dir "/"))) -;; (message "No '%s' in directory '%s'... trying parent directory" -;; test-file build-dir) - (setq build-dir (cppt-get-parent-dir build-dir))) - build-dir)) - - -(defun cppt-find-containsdir (srcdir &optional test-file) - "Return the first directory above or at this directory that contains -a Makefile or the file test-file if present" - (if (not test-file) - (setq test-file "Makefile")) - (while (and (file-exists-p srcdir) - (not (file-exists-p (concat srcdir test-file))) - (not (string-equal srcdir "/"))) - (message "No '%s' in directory '%s'... trying parent directory" - test-file srcdir) - (setq srcdir (cppt-get-parent-dir srcdir))) - srcdir) - -;; ---------------------------------------------- -;; Interactive test code manipulation methods -;; ---------------------------------------------- - -(defun cppt-new-test-method () - "Interactively insert a new test method" - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (let* ((default-name (cppt-default-method-name)) - (default-test-name - (if (stringp default-name) - (concat (if cppt-use-underscore-p "test_" "test") - default-name))) - (input (read-string - (format "Insert new test method%s: " - (if (string= default-test-name "") - "" - (format " (default %s)" default-test-name))))) - (method-name (if (string= input "") - (if (string= default-test-name "") - (error "No method name given") - default-test-name) - input))) - (if (not (string= method-name "")) - ;; Open the test header file - (let* ((default-comment - (format "Test of the '%s' method." method-name)) - (input (read-string - (format "Comment (default \"%s\"): " default-comment))) - (comment (if (string= input "") default-comment input)) - (file-name (cppt-find-file-name-root)) - (dir-name (cppt-buffer-dir-name)) - (class-name)) - (if (string-match "Suite$" file-name) - (error "No corresponding code for test suite %s" file-name) - (progn - (if (string-match "\\(.*\\)\\([Aa]pp\\|[Tt]est\\)$" file-name) - (setq file-name (concat (match-string 1 file-name) - (if cppt-use-underscore-p - "test" "Test"))) - (setq file-name - (concat file-name - (if cppt-use-underscore-p "test" "Test")))) - (if (not (string-match cppt-test-dir dir-name)) - (setq dir-name (concat dir-name cppt-test-dir))) - (setq file-name - (format "%s/%s" dir-name file-name)) - (if (and (file-exists-p (concat file-name ".h")) - (file-exists-p (concat file-name ".cpp"))) - (progn - ;; Insert test declaration in header file - (find-file (concat file-name ".h")) - (setq class-name (cppt-find-class-name)) - (goto-char (point-min)) - (if (re-search-forward - (concat method-name "\\s-*\\s(") nil t) - (error "Method '%s' already exists" method-name) - (progn - (cppt-insert-test-method-declarations - (list method-name) comment) - (recenter nil) - ;; Insert default test implementation in src file - (if (< (count-windows) 2) (split-window-vertically)) - (other-window 1) - (find-file (concat file-name ".cpp")) - (cppt-insert-test-methods - (list method-name) class-name comment) - (cppt-insert-run-method - (list method-name) class-name) - ;; Search to the implementation of the latest method - ;; added - (goto-char (point-min)) - (search-forward - (format "NOTEST(\"%s\")" method-name)) - (beginning-of-line) - (recenter nil)))) - (error "No test code for file '%s'" - buffer-file-name)))))))) - -(defun cppt-verify-test-methods () - "Verify that all public methods in the current class has test methods, -and if not, then interactively ask whether to add tests for each of them." - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (let* ((start-file buffer-file-name) - (src-file-name (cppt-find-file-name-root)) - (src-header-file) - (src-dir (cppt-get-src-dir)) - (test-dir (cppt-get-test-dir)) - (test-header-file) - (test-src-file) - (class-name) - (methods) - (idx)) - (if (string-match "Suite$" src-file-name) - (error "No corresponding code for test suite %s" src-file-name) - (if (setq idx (string-match "\\(App\\|Test\\)$" src-file-name)) - (setq src-file-name (substring src-file-name 0 idx))) - ;; Open the header file for the current class - (delete-other-windows) - (setq src-header-file (format "%s/%s.h" src-dir src-file-name)) - (find-file src-header-file) - (setq class-name (cppt-find-class-name)) - (setq methods (cppt-find-public-method-names)) - ;; Open the header and src file for the test code - (setq test-header-file (format "%s/%s%s.h" - test-dir src-file-name - (if cppt-use-underscore-p "test" "Test"))) - (setq test-src-file (format "%s/%s%s.cpp" - test-dir src-file-name - (if cppt-use-underscore-p "test" "Test"))) - (if (not (file-exists-p test-header-file)) - (cppt-switch-code-test) - ;; Now find missing test methods - (let ((missing)) - (while methods - (unless (cppt-verify-method - (car methods) class-name test-header-file test-src-file) - (setq missing t)) - (setq methods (cdr methods))) - (if missing - (find-file test-src-file) - (message "Test methods found for all methods") - (find-file start-file))))))) - -(defun cppt-switch-code-test () - "Switch buffers between class code and class test-code. -If the current buffer holds class code, then the test code is opened. -If the current buffer holds the test code for a class, the code for the class -is opened" - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (let* ((idx) - (file-name (cppt-find-file-name-root)) - (directory-name (cppt-buffer-dir-name)) - (src-file) - (hdr-file (concat file-name ".h"))) - (if (string-match "Suite$" file-name) - (error "No corresponding code for test suite %s" file-name) - (progn - (if (setq idx (string-match "[aA]pp$" file-name)) - (setq file-name (substring file-name 0 idx))) - (if (setq idx (string-match "[Tt]est$" file-name)) - (progn - (setq src-file (concat (cppt-get-parent-dir directory-name) - (substring file-name 0 idx) ".cpp")) - (setq hdr-file (concat (cppt-get-parent-dir directory-name) - (substring file-name 0 idx) ".h")) - (find-file (if (file-exists-p src-file) src-file hdr-file))) - (cppt-edit-test file-name directory-name)))))) - -(defun cppt-switch-code-test-method () - "Switch buffers between code- and and test-method. -If the current buffer holds class code, then the test code is opened. -If the current buffer holds the test code for a class, the code for the class -is opened" - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (let* ((file-name (cppt-find-file-name-root)) - (is-test-p (or (string-match "[aA]pp$" file-name) - (string-match "[Tt]est$" file-name))) - (default-name (cppt-default-method-name)) - (default-test-name (if (stringp default-name) - (if is-test-p - default-name - (cppt-test-method-name default-name)))) - (method-name (if (string= default-test-name "") - (error "No method name found") - default-test-name))) - (cppt-switch-code-test) - (unless (string= method-name "") - (let* ((is-header-p (string-match "\\.h$" buffer-file-name)) - (reg-exp (if (and is-test-p - (string-match "^operator_?\\(.*\\)" - method-name)) - (cppt-get-operator (match-string 1 method-name)) - (concat method-name "\\s-*([^}{]+{")))) - ;; Go to the named method in the buffer -;; (message "Looking for implementation of method '%s' using reg-exp '%s'" -;; method-name reg-exp) - (goto-char (point-min)) - (if (or (search-forward-regexp (concat "::" reg-exp) nil t) - (and (not is-header-p) - (if is-test-p - ;; Try the header file if no implementation is - ;; found in the source file - (progn - (cppt-toggle-header-src) - (goto-char (point-min)) - (search-forward-regexp reg-exp nil t))))) - (goto-char (match-beginning 0)) - (error "Could not find %smethod '%s'" - (if is-test-p "" "test for ") - method-name)))))) - -(defun cppt-toggle-header-src-method () - "Toggle btw the src and the header file for the current buffer and -go to the current method declaration or implementation. This assumes -that the two files differ only in their extention (.h(pp) .cpp)." - (interactive) - (let* ((is-header-p (cppt-is-header-file-p buffer-file-name)) - (default-name (cppt-default-method-name t)) - (input (read-string - (format "Switch to %s of method%s: " - (if is-header-p "implementation" "declaration") - (if (string= default-name "") - "" - (format " (default %s)" default-name))))) - (method-name (if (string= input "") - (if (string= default-name "") - (error "No method name given") - default-name) - input)) - (regexp (format (if is-header-p "::%s\\s-*(" "\\s-+%s\\s-*(") - method-name))) - (cppt-toggle-header-src) - (goto-char (point-min)) - (if (not (search-forward-regexp regexp nil t)) - (if is-header-p - ;; Try searching for method without :: prefix in case it is - ;; a namespace method - (search-forward-regexp - (format "\\s-+%s\\s-*([^)]*[),]" method-name) nil t))))) - -(defun cppt-toggle-header-src () - "Toggle btw the src and the header file for the current buffer. -This assumes that the two files differ only in their extention (.h(pp) -.cpp)." - (interactive) - ;; There shouldn't be a need for reloading settings here - (let* ((file-name buffer-file-name) - (idx (string-match "[^.]+$" file-name)) - (ext (substring file-name idx)) - (trunk (substring file-name 0 idx)) - (h (concat trunk "h")) - (hpp (concat trunk "hpp")) - (cpp (concat trunk "cpp")) - (target-file-name (if (string-match "cpp" ext) - (if (and (not (file-exists-p h)) - (file-exists-p hpp)) - hpp - h) - cpp))) - (if (or (file-exists-p target-file-name) - (y-or-n-p (format "File '%s' does not exist. Create it? " - target-file-name))) - (find-file target-file-name)))) - -(defun cppt-toggle-interface-headers () - "Toggle btw the header files for the interface and its implementation. -This assumes that the two files differ only in their i prefix" - (interactive) - ;; There should'nt be a need for reloading settings here - (let* ((just-path (cppt-get-path buffer-file-name)) - (just-file-name (cppt-strip-path buffer-file-name)) - (idx (string-match "[^.]+$" just-file-name)) - (prefix (substring just-file-name 0 1)) - (target-file-name - (format "%s%sh" - just-path - (if (string-match "i" prefix) - (substring just-file-name 1 idx) - (concat "i" (substring just-file-name 0 idx)))))) - (if (or (file-exists-p target-file-name) - (y-or-n-p (format "File '%s' does not exist. Create it? " - target-file-name))) - (find-file target-file-name)))) - - -;; ---------------------------------------------- -;; Make and run methods -;; ---------------------------------------------- - -(defun cppt-make-cmd (cmd &optional dir) - "Execute make with cmd as argument" - (let ((old-dir (cppt-buffer-dir-name))) - (if (not dir) (setq dir (cppt-find-builddir old-dir))) - (cd dir) - (message "Executing compile command with args: '%s' within dir: '%s'" - cmd dir) - (if cmd - (compile (concat "make " cmd)) - (call-interactively 'compile)) - (cd old-dir) - (end-of-buffer-other-window nil))) - -(defun cppt-compile () - "Find the lowermost directory with a makefile and interactively run compile" - (interactive) - (let ((compilation-read-command "t")) - (cppt-make-cmd nil))) - -(defun cppt-make (directory-name - module-name - test-file-name - &optional debug - &optional environment) - "Run make within the given directory, then run the given class, -within the the named module." - (let ((dir-name (cppt-find-builddir directory-name)) - (cmd (format "%s %s" - (if environment environment "") - test-file-name)) - (old-process-environment process-environment)) - (if debug (setq cmd (concat cmd debug))) - ;; Temporarily Set LD_LIBRARY_PATH - (if cppt-LD_LIBRARY_PATH - (setq process-environment - (cppt-set-lib-path cppt-LD_LIBRARY_PATH process-environment))) - (cppt-make-cmd cmd dir-name) - (end-of-buffer-other-window nil) - ;; Reset LD_LIBRARY_PATH - (if cppt-LD_LIBRARY_PATH - (setq process-environment old-process-environment)))) - -(defun cppt-make-build () - "Execute make build" - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (cppt-make-cmd "clean all")) - -(defun cppt-make-test () - "Execute make test" - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (let* ((old-dir (cppt-buffer-dir-name)) - (directory-name (cppt-find-builddir old-dir))) - (if (or (string-match (format "%s/?$" cppt-test-dir) directory-name) - ;; Also support the old fastlib naming convention - (string-match "tests/$" directory-name)) - (progn - (setq directory-name (cppt-get-parent-dir directory-name)) - (cd directory-name)))) - (cppt-make-cmd "test") - (cd old-dir)) - -(defun cppt-make-plain () - "Execute make without arguments" - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (cppt-make-cmd (if cppt-make-plain-args cppt-make-plain-args ""))) - -(defun cppt-run-test (&optional debug &optional prefix &optional environment) - "Make and execute the test for the current buffer. -The buffer may contain a class to be tested, a class-test or a test suite." - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (let ((file-name (cppt-find-file-name-root)) - (module-name (cppt-find-module-name)) - (test-dir (cppt-get-test-dir)) - (idx)) - (unless (or (string-match "[Tt]est$" file-name) - (string-match "[Ss]uite$" file-name)) - (if (setq idx (string-match "[Tt]est[Aa]pp$" file-name)) - (setq file-name (format "%s%s" - (substring file-name 0 idx) - (if cppt-use-underscore-p "test" "Test"))) - (setq file-name - (concat file-name - (if cppt-use-underscore-p "test" "Test"))))) - (cppt-make test-dir module-name - (concat (if cppt-pretest-target - (concat cppt-pretest-target " ")) - (if prefix prefix "run") file-name) - debug - environment))) - -(defun cppt-run-test-debug () - "Execute the test with debug logging" - (interactive) - (cppt-run-test "Debug")) - -(defun cppt-run-single-test (&optional debug &optional prefix) - "Make and execute a single test method for the current buffer. -The buffer may contain a class to be tested, a class-test or a test suite." - (interactive) - (let* ((default-name (cppt-default-method-name)) - (input (read-string - (format "Run test for method%s: " - (if (string= default-name "") - "" - (format " (default %s)" default-name))))) - (method-name (if (string= input "") - (if (string= default-name "") - (error "No method name given") - default-name) - input))) - (if (not (string= method-name "")) - (cppt-run-test - debug prefix - (format "METHOD=%s" - (if cppt-use-underscore-p - (concat "test_" method-name) - (concat "test" (cppt-upcase-first-letter - method-name)))))))) - -(defun cppt-run-single-test-debug () - "Execute a single test method with debug logging" - (interactive) - (cppt-run-single-test "Debug")) - -(defun cppt-run-single-test-purify () - "Execute a single test method with debug logging" - (interactive) - (cppt-run-single-test "" "purify")) - -(defun cppt-run-test-purify () - "Execute the test with purify" - (interactive) - (cppt-run-test "" "purify")) - -(defun cppt-run-test-purify-debug () - "Execute the test with purify in debug mode" - (interactive) - (cppt-run-test "Debug" "purify")) - -(defun cppt-test-suite (&optional debug &optional prefix) - "Make and execute the test suite for the module of the current buffer. -This also works if the current buffer is one of the test suites or classes." - (interactive) - ;; Re-Load any local user configurations - (cppt-load-test-project "testproject.el") - (let ((module-name (cppt-find-module-name)) - (directory-name (cppt-buffer-dir-name))) - ;; Check whether we are in the test directory or the regular - ;; module directory - (unless (string-match (format "/%s/?$" cppt-test-dir) directory-name) - (setq directory-name (concat directory-name "/" cppt-test-dir "/"))) - (cppt-make directory-name module-name - (concat (if cppt-pretest-target - (concat cppt-pretest-target " ")) - (if prefix prefix "run") - (cppt-build-suite-name module-name)) - debug))) - -(defun cppt-suite-debug () - "Make and run the test suite with debug logging" - (interactive) - (cppt-test-suite "Debug")) - -(defun cppt-suite-purify () - "Make and run the test suite with purify" - (interactive) - (cppt-test-suite "" "purify")) - -(defun cppt-suite-purify-debug () - "Make and run the test suite with purify in debug mode" - (interactive) - (cppt-test-suite "Debug" "purify")) - - -;; ---------------------------------------------- -;; Formatting methods -;; ---------------------------------------------- - -(defun cppt-indent (beg end) - (message "Indenting code") - (save-excursion - (save-restriction - (narrow-to-region beg end) - (indent-region (point-min) (point-max) nil) - (untabify (point-min) (point-max)) - (message "Indentation complete")))) - -(defun cppt-indent-buffer () - "Indents and untabifies the current buffer." - (interactive) - (cppt-indent (point-min) (point-max))) - -;; ---------------------------------------------- -;; Automatic code generation methods -;; ---------------------------------------------- - -(defun cppt-insert-copy-disallowed () - "Insert private and unimplemented declarations of the copy CTOR and -assignment operator for the current class" - (interactive) - (end-of-line) - (c-end-of-statement) - ;; Find the class declaration and its name - (if (not - (search-backward-regexp - "\\(?:class\\|struct\\)\\s-+\\(?:<[^>]+>\\s-+\\)?\\(\\w[A-Za-z0-9_]*\\)\\s-*\\(:[^:]\\|{\\|$\\)" - nil t)) - (message "Could not find class declaration") - (let* ((class-name (match-string 1)) - (start-of-class (point)) - (end-of-class (search-forward "};" nil t))) - ;; find first private declaration - (goto-char start-of-class) - (if (not (search-forward-regexp "private\\s-*:" end-of-class t)) - ;; No private declaration, insert one - (progn - (goto-char end-of-class) - (forward-line -1) - (end-of-line) - (insert " -private:"))) - ;; Insert empty and private copy CTOR and assignment operator - (insert (format " - - // Assignment and copy of %s is disallowed, so the following are - // private and unimplemented -%s(const %s&); -%s &operator = (const %s&); -" - class-name class-name class-name class-name class-name)) - (indent-region start-of-class (point) nil) - (search-backward "// Assignment and copy of") - (c-fill-paragraph)))) - -;; ---------------------------------------------- -;; Documentation methods -;; ---------------------------------------------- - -(defun cppt-get-creation-date () - "If the file is registered in CVS use the first registration date, -otherwise use current date" - (save-excursion - (save-restriction - (let ((file buffer-file-name) - (time (current-time))) - (if (vc-registered file) - (progn - (vc-call print-log file) - (set-buffer "*vc*") - (while (not (or (string-match "revision\\s-*1\\.1\\s-*$" - (buffer-string)) - (string-match "added, but not committed" - (buffer-string)))) - (sleep-for 1)) - (if (string-match "added, but not committed" (buffer-string)) - (format-time-string "%d %b %Y" time) - (goto-char (point-min)) - (search-forward-regexp - "revision\\s-*1\\.1[^0-9d]+date:\\s-*\\([0-9]+\\)/0?\\([0-9]+\\)/0?\\([0-9]+\\) 0?\\([0-9]+\\):0?\\([0-9]+\\):0?\\([0-9]+\\)") - (require 'parse-time) - (let* ((year (parse-integer (match-string 1))) - (month (parse-integer (match-string 2))) - (day (parse-integer (match-string 3))) - (hour (parse-integer (match-string 4))) - (min (parse-integer (match-string 5))) - (sec (parse-integer (match-string 6)))) - (setq time (encode-time sec min hour day month year)))))) - (format-time-string "%d %b %Y" time))))) - -(defun cppt-insert-doc-template (template &optional class-name type indent) - "Substitute standard documentation tags found" - (let ((start-point (point)) - (end-point)) - ;; Insert the documentation template - (if indent (insert indent)) - (cppt-insert-template template) - (setq end-point (point)) - (save-restriction - ;; Substitute tags - (narrow-to-region start-point end-point) - (goto-char (point-min)) - (replace-string "TYPE" type) - (goto-char (point-min)) - (replace-string "CLASS_NAME" class-name) - (goto-char (point-min)) - (if indent - (progn - (end-of-line) - (indent-region (point) (point-max) nil) - (goto-char (point-min))))))) - -(defun cppt-insert-file-doc () - "This will insert, a file documentation template at the very top -of the current file." - (interactive) - (let ((class-name nil) (namespace nil) (type)) - (save-excursion - (goto-char (point-min)) - (while (re-search-forward - "^\\s-*namespace\\s-+\\([^ \n\r\t;{]+\\)[^{;]*{" nil t) - (progn - (if namespace - (setq namespace (format "%s::%s" namespace (match-string 1))) - (setq namespace (match-string 1)) -;; (message "Found namespace %s" namespace) - (setq type "namespace") - (goto-char (match-end 0))))) - (goto-char (point-min)) - (if (re-search-forward - "^\\s-*\\(class\\|struct\\)\\s-+\\([^ \n\r\t:;{]+\\)[^;]*{" nil t) - (progn - (setq class-name - (if namespace - (format "%s::%s" namespace (match-string 2)) - (match-string 2))) - (setq type (match-string 1))) -;; (message "Found %s %s" type class-name) - (setq class-name namespace))) - (if (y-or-n-p "Insert file documentation template? ") - (progn - (goto-char (point-min)) - (cppt-insert-doc-template cppt-file-doc-template class-name type) - (re-search-forward "^ \\*\\s-*$") - (if (and (string-match "\\.h$" buffer-file-name) class-name) - (progn - (insert - (format "Header file for the %s %s.\n * " class-name type)) - (search-backward type) - (c-fill-paragraph))))))) - -(defun cppt-insert-class-doc () - "This will insert, a class documentation template in front of the -first class or namespace declaration found from the current point" - (interactive) - (save-excursion - (save-restriction - (beginning-of-line) - (search-forward "};") ;Goto the end of the class declaration - (re-search-backward - "^\\(\\s-*\\)\\(class\\|struct\\|namespace\\)\\s-+\\([^ \n\r\t;{]+\\)[^;]*{" - nil t))) - (let ((class-name (match-string 3)) - (type (match-string 2)) - (indent (match-string 1))) - (if class-name - (progn - (goto-char (match-beginning 0)) - (if (y-or-n-p - (format "Insert documentation template for %s '%s'? " - type class-name)) - (progn - (cppt-insert-doc-template - cppt-class-doc-template class-name type indent) - (re-search-forward class-name) - (end-of-line)))) - (error "No class, struct or namespace found in '%s'" - buffer-file-name)))) - -(defun cppt-build-param-docs (declarations) - "Build a string with documentation of all parameters in the -declarations string" - (if (string-match "\\([^)]+\\))" declarations) - (setq declarations (match-string 1 declarations))) - (let ((documentation "") - (type) (name)) - (while (string-match "\\s-*\\([^=,\n\r]+\\s-+[&*]?\\)\\([^ \t\n\r,=]+\\)[^,)]*,?\\([^{]*\\)" - declarations) - (progn - (setq type (match-string 1 declarations)) - (setq name (match-string 2 declarations)) - (setq declarations (match-string 3 declarations)) - (setq type (replace-regexp-in-string "[\n\r]+" "" type)) - (setq name (replace-regexp-in-string "[\n\r]+" "" name)) - ;; Collapse multipple whitespace - (setq type (replace-regexp-in-string "\\s-\\s-+" " " type)) - ;; Remove trailing whitespace - (setq type (replace-regexp-in-string "\\s-+$" "" type)) - (setq documentation (format "%s * @param %s a '%s' value\n" - documentation name type)))) - documentation)) - -(defun cppt-build-exception-docs (declarations) - "Build a string with documentation of all exceptions in the -declarations string" - (let ((documentation "") - (name)) - (while (and (stringp declarations) - (string-match "\\s-*\\([^=,\n\r]+\\),?\\(.*\\)" - declarations)) - (progn - (setq name (match-string 1 declarations)) - (setq declarations (match-string 2 declarations)) - (setq documentation (format "%s * @exception %s \n" - documentation name)))) - documentation)) - -(defun cppt-insert-method-doc () - "This will insert, a method documentation template in front of the -first method declaration found from the current point" - (interactive) - ;; Search backward to the beginning of the declaration - (end-of-line) - (c-beginning-of-statement) - (beginning-of-line) - (let ((indentation) - (dtor-flag) - (method-name) - (ret-type) - (parameter-declarations) - (start-point (point)) - (method-type) - (exception-declarations)) - (cppt-skip-comments (point-max)) - (if (or - ;; For method declarations - (re-search-forward -;; "^\\(\\s-*\\)\\(\\([^ /:,{}();\t\n*]\\|[^:]::[^:]\\)+\\)\\(\\([^({},:;]\\|[^:]::[^:]\\)*[ *&\n]+\\)\\([^ *_~][^ :\n;(]*\\)\\s-*([^)]*)[^;{/]*[;{]" -;; "^\\(\\s-*\\)\\([^ {};\t\n]*\\s-+[^({};]*\\)\\s-+[*&]?\\s-*\\(~?\\)\\([^ *_~][^ :\n;(]*\\)\\s-*(\\([^;{]*\\))[^);{]*[;{]" - "^\\(\\s-*\\)\\(\\([^/:,{}();\t\n*]\\|[^:]::[^:]\\)+\\)\\s-+[*&]?\\s-*\\(~?\\)\\([^ *_~][^ :\n;(]*\\)\\s-*(\\([^;{)]*\\))[^;{]*[;{]" - (point-max) t) - ;; For method implementations - (and (goto-char start-point) - (re-search-forward - "^\\(\\s-*\\)\\([^({}\n\r\t;]*\\)\\s-+[*&]?[^: \t]+::\\(~?\\)\\([^ *_~][^ :\n;(]*\\)\\s-*(\\([^;{]*\\))[^);{]*[;{]" - (point-max) t))) - (progn - (setq indentation (match-string 1)) - (setq ret-type (match-string 2)) - (setq dtor-flag (match-string 4)) - (setq method-name (match-string 5)) - (setq parameter-declarations (match-string 6)) - ;; Search for exception declarations - (goto-char (match-beginning 0)) - (save-restriction - (save-excursion - (let ((end) - (start (point))) - (search-forward ";") - (setq end (match-beginning 0)) - (goto-char start) - (if (re-search-forward - "throw\\s-*(\\(.*\\))" - end t) - (setq exception-declarations (match-string 1)))))) - ;; Remove method qualifiers from the return type - (setq ret-type (replace-regexp-in-string - "\\(explicit\\|virtual\\|inline\\|static\\)\\s-*" - "" - ret-type)) - ;; Remove trailing whitespace from the return type - (setq ret-type (replace-regexp-in-string "\\s-+$" "" ret-type)) - ;; Remove leading whitespace from the return type - (setq ret-type (replace-regexp-in-string "^\\s-+" "" ret-type)) - (setq method-type - (if (string-equal ret-type "") - (if (string-equal dtor-flag "") - "constructor" - "destructor") - "method")) - ;; Add extra spaces to the indentation that were lost in the regexp - (setq indentation (concat - (if (string-equal ret-type "") " " " ") - indentation)) -;; (message "cppt-insert-method-doc: indentation '%s'" indentation) -;; (message "cppt-insert-method-doc: ret-type '%s'" ret-type) -;; (message "cppt-insert-method-doc: dtor-flag '%s'" dtor-flag) -;; (message "cppt-insert-method-doc: method-name '%s'" method-name) -;; (message "cppt-insert-method-doc: method-type '%s'" method-type) -;; (message "cppt-insert-method-doc: parameter-declarations '%s'" -;; parameter-declarations) -;; (message "cppt-insert-method-doc: exception-declarations '%s'" -;; exception-declarations) - (save-restriction - (save-excursion - ;; Convert method name for overloaded operators - (if (string-match "operator\\(.*\\)" method-name) - (progn - (setq method-name (format "overloaded %s" - (match-string 1 method-name))) - (setq method-type "operator"))))) - (if (y-or-n-p - (format "Insert documentation template for the '%s' %s? " - method-name method-type)) - (let ((end-point)) - (setq start-point (point)) - (save-restriction - ;; Insert the class documentation template - (insert - (format "%s%s" indentation cppt-method-doc-template)) - (setq end-point (point)) - (narrow-to-region start-point end-point) - ;; Substitute the different tags - (goto-char (point-min)) - (replace-string "METHOD_NAME" method-name) - (cppt-replace-user-name) - (goto-char (point-min)) - ;; Trim whitespace - (setq ret-type (replace-regexp-in-string - "[\n\r]+" " " ret-type)) - (setq ret-type (replace-regexp-in-string - "\\s-\\s-+" " " ret-type)) - (setq ret-type (replace-regexp-in-string - "\\s-+$" "" ret-type)) - (replace-string "RETURN_TYPE\n" - (if (or (string-equal ret-type "") - (string-equal ret-type "void")) - "" - (format " * @return a '%s' value\n" - ret-type))) - (goto-char (point-min)) - (replace-string - "PARAMETERS\n" - (cppt-build-param-docs parameter-declarations)) - (goto-char (point-min)) - (replace-string - "EXCEPTIONS\n" - (cppt-build-exception-docs exception-declarations)) - ;; Indent comments properly - (goto-char (point-min)) - (replace-regexp "^\\s-*\\*" (format "%s *" indentation)) - (goto-char (point-max))) - (indent-region (- start-point 1) (point) nil) - (goto-char start-point) - (search-forward method-name) - (end-of-line) - (insert (format "%s " method-type))))) - (error "No method declaration found in '%s'" buffer-file-name)))) - -(defun cppt-cleanup-doc () - "Perform cleanup of documentation strings" - (interactive) - (save-excursion - (save-restriction - (goto-char (point-min)) - (query-replace-regexp "^\\(/\\*\\*\\)\\(\\*+\\|\\s-*\n\\s-*\\*\\*+\\)" - "\\1") - (goto-char (point-min)) - (query-replace-regexp "\\*\\(\\*+\\)\\s-*\n\\s-*\\(\\*/\\)" "\\1\\2") - (goto-char (point-min)) - (query-replace-regexp "\\(@\\(file\\|date\\) \\)\\s-+" "\\1") - (goto-char (point-min)) - (while (search-forward-regexp - "/\\*\\*\\([^/]\\|/[0-9A-Za-z]\\)+\\*/" nil t) - (indent-region (match-beginning 0) (match-end 0) nil))))) - -;; ---------------------------------------------- -;; Load local project definitions -;; ---------------------------------------------- - -(defun cppt-load-test-project (&optional project-file) - "Load the local test project file" - (interactive) - (if (not project-file) - (setq project-file "testproject.el")) - (let* ((directory-name - (cppt-find-containsdir (cppt-buffer-dir-name) project-file)) - (project-file-name)) - (if (string-match (format "%s/$" cppt-test-dir) directory-name) - (setq directory-name (cppt-get-parent-dir directory-name))) - (setq project-file-name (format "%s/%s" directory-name project-file)) - (setq project-file-name - (replace-regexp-in-string "//+" "/" project-file-name)) - ;; Look for the test project file - (if (file-exists-p project-file-name) - (progn -;; (message "Loading local module configuration") - (load-file project-file-name))))) - - -;; --------------------------------------------------------------------- -;; --------------------------------------------------------------------- -;; --------------------------------------------------------------------- -;; Create a minor mode -;; --------------------------------------------------------------------- -;; --------------------------------------------------------------------- -;; --------------------------------------------------------------------- -(defvar c++-test-minor-mode nil - "Mode variable for Fast c++ unit test minor mode.") -(make-variable-buffer-local 'c++-test-minor-mode) - - -(if (not (assq 'c++-test-minor-mode minor-mode-alist)) - (setq minor-mode-alist - (cons '(c++-test-minor-mode " Test") - minor-mode-alist))) - -;; Menu bar -(defvar c++-test-minor-mode-menu-map - (let ((map (make-sparse-keymap "Test"))) - (define-key map [indent-buffer] - '("Indent and untabify buffer" . cppt-indent-buffer)) - - (define-key map [lambda1] '("----")) - - (define-key map [insert-file-doc] - '("Insert doc template for file..." . cppt-insert-file-doc)) - (define-key map [insert-class-doc] - '("Insert doc template for class/struct/namespace..." . - cppt-insert-class-doc)) - (define-key map [insert-method-doc] - '("Insert doc template for method..." . cppt-insert-method-doc)) - - (define-key map [lambda2] '("----")) - - (define-key map [insert-copy-disallowed] - '("Insert empty copy CTOR and assignment operator..." . - cppt-insert-copy-disallowed)) - - (define-key map [lambda3] '("----")) - - (define-key map [cppt-compile] - '("Run make with args..." . cppt-compile)) - (define-key map [make-build] - '("Run 'make clean all'" . cppt-make-build)) - (define-key map [make-plain] - '("Run 'make without arguments'" . cppt-make-plain)) - (define-key map [make-test] - '("Run 'make test'" . cppt-make-test)) - - (define-key map [lambda4] '("----")) - - (define-key map [suite-purify-debug] - '("Run test suite in Purify in debug mode" . cppt-suite-purify-debug)) - (define-key map [suite-purify] - '("Run test suite in Purify" . cppt-suite-purify)) - (define-key map [suite-debug] - '("Run test suite in debug mode" . cppt-suite-debug)) - (define-key map [test-suite] - '("Run test suite" . cppt-test-suite)) - - (define-key map [lambda5] '("----")) - - (define-key map [run-test-purify-debug] - '("Run tests for class in Purify in debug mode" . - cppt-run-test-purify-debug)) - (define-key map [run-test-purify] - '("Run tests for class in Purify" . cppt-run-test-purify)) - (define-key map [run-test-debug] - '("Run tests for class in debug mode" . cppt-run-test-debug)) - (define-key map [run-test] - '("Run tests for class" . cppt-run-test)) - - (define-key map [lambda6] '("----")) - - (define-key map [run-single-test-debug] - '("Run a single test in debug mode..." . cppt-run-single-test-debug)) - (define-key map [run-single-test] - '("Run a single test for class..." . cppt-run-single-test)) - - (define-key map [lambda7] '("----")) - - (define-key map [new-test-method] - '("Create a new test method..." . cppt-new-test-method)) - (define-key map [verify-test-methods] - '("Verify tests for all public methods" . cppt-verify-test-methods)) - - (define-key map [lambda8] '("----")) - - (define-key map [switch-code-test] - '("Toggle test- and source-file" . cppt-switch-code-test)) - (define-key map [switch-code-test-method] - '("Toggle test- and source-method" . cppt-switch-code-test-method)) - (define-key map [toggle-header-src] - '("Toggle header and source" . cppt-toggle-header-src)) - (define-key map [toggle-header-src-method] - '("Toggle header and source with regards to the current method" . - cppt-toggle-header-src-method)) - (define-key map [ff-find-other-file] - '("Find related file, taking includes into consideration" . - ff-find-other-file)) - (define-key map [toggle-interface-headers] - '("Toggle header files for interface and implementation" . - cppt-toggle-interface-headers)) - map) - "Menu for C++ test minor mode") - -;; ---------------------------------------------------------------------- -;; Key bindings, the user should set the cppt-xxx-key variables -;; ---------------------------------------------------------------------- -(defvar c++-test-minor-keymap - (let ((map (make-sparse-keymap))) - (define-key map [menu-bar test] - (cons "Test" c++-test-minor-mode-menu-map)) - (if cppt-use-function-keys-flag - (progn - (define-key map cppt-indent-buffer-key 'cppt-indent-buffer) - (define-key map cppt-insert-file-doc-key 'cppt-insert-file-doc) - (define-key map cppt-insert-class-doc-key 'cppt-insert-class-doc) - (define-key map cppt-insert-method-doc-key 'cppt-insert-method-doc) - (define-key map cppt-insert-copy-disallowed-key - 'cppt-insert-copy-disallowed) - (define-key map cppt-compile-key 'cppt-compile) - (define-key map cppt-make-build-key 'cppt-make-build) - (define-key map cppt-make-plain-key 'cppt-make-plain) - (define-key map cppt-make-test-key 'cppt-make-test) - (define-key map cppt-suite-purify-debug-key 'cppt-suite-purify-debug) - (define-key map cppt-suite-purify-key 'cppt-suite-purify) - (define-key map cppt-suite-debug-key 'cppt-suite-debug) - (define-key map cppt-test-suite-key 'cppt-test-suite) - (define-key map cppt-run-test-purify-debug-key - 'cppt-run-test-purify-debug) - (define-key map cppt-run-test-purify-key 'cppt-run-test-purify) - (define-key map cppt-run-test-debug-key 'cppt-run-test-debug) - (define-key map cppt-run-test-key 'cppt-run-test) - (define-key map cppt-run-single-test-debug-key - 'cppt-run-single-test-debug) - (define-key map cppt-run-single-test-key 'cppt-run-single-test) - (define-key map cppt-new-test-method-key 'cppt-new-test-method) - (define-key map cppt-verify-test-methods-key - 'cppt-verify-test-methods) - (define-key map cppt-switch-code-test-key 'cppt-switch-code-test) - (define-key map cppt-switch-code-test-method-key - 'cppt-switch-code-test-method) - (define-key map cppt-toggle-header-method-key - 'cppt-toggle-header-src-method) - (define-key map cppt-find-other-file-key 'ff-find-other-file) - (define-key map cppt-toggle-interface-key - 'cppt-toggle-interface-headers) - (define-key map cppt-toggle-header-key 'cppt-toggle-header-src) - (define-key map [S-right] 'forward-sexp) - (define-key map [S-left] 'backward-sexp) - (define-key map [S-up] 'beginning-of-defun) - (define-key map [S-down] 'end-of-defun))) - map) - "Keymap used for the c++ test minor mode") - -(or (not (boundp 'minor-mode-map-alist)) - (assoc 'c++-test-minor-mode minor-mode-map-alist) - (setq minor-mode-map-alist - (cons (cons 'c++-test-minor-mode c++-test-minor-keymap) - minor-mode-map-alist))) - -(defun c++-test-minor-mode (&optional arg) - "C++ unit test minor mode. This minor mode is invoked automatically -as an extension of c++-mode. It has extensive functionality for -writing, extending and running automated unit tests for C++ code. - -The functionality may be roughly separated into three different areas: - * Writing and extending unit tests - * Executing the tests - * Documenting code - -The test commands all have the cppt- prefix, and are by default tied -to the different function keys: - -\\{c++-test-minor-keymap} -" - (interactive "P") - (setq c++-test-minor-mode - (if (null arg) - (not c++-test-minor-mode) - (> (prefix-numeric-value arg) 0))) - (if c++-test-minor-mode - (progn - (message "Enabling c++ unit test minor mode") - ;; Load any local user configurations - ;; This is done every time c++-mode is invoked on a file. - (cppt-load-test-project "testproject.el")))) - - -;; Always use this minor mode for c++-mode -(add-hook 'c++-mode-hook 'c++-test-minor-mode) - -(defun cppt-makefile-make-interactive () - "Execute compile with argument taken from current word" - (interactive) - (let* ((regexp "^\\(\\w+\\)\\s-*:") - (compile-command (concat "make " - (progn - (end-of-line) - (if (or (search-backward-regexp regexp nil t) - (search-forward-regexp regexp nil t)) - (match-string 1) - "")))) - (compilation-read-command "t")) - (call-interactively 'compile) - (end-of-buffer-other-window nil))) - -(defun cppt-makefile-mode-hook () - (local-set-key cppt-compile-key 'cppt-makefile-make-interactive) - (local-set-key cppt-make-build-key 'cppt-make-build) - (local-set-key cppt-make-plain-key 'cppt-make-plain) - (local-set-key cppt-make-test-key 'cppt-make-test) - (local-set-key cppt-suite-purify-debug-key 'cppt-suite-purify-debug) - (local-set-key cppt-suite-purify-key 'cppt-suite-purify) - (local-set-key cppt-suite-debug-key 'cppt-suite-debug) - (local-set-key cppt-test-suite-key 'cppt-test-suite)) -(add-hook 'makefile-mode-hook 'cppt-makefile-mode-hook) - - -(provide 'cpptest) diff --git a/fastlib/src/vespa/fastlib/testsuite/suite.cpp b/fastlib/src/vespa/fastlib/testsuite/suite.cpp deleted file mode 100644 index e69de29bb2d..00000000000 --- a/fastlib/src/vespa/fastlib/testsuite/suite.cpp +++ /dev/null diff --git a/fastlib/src/vespa/fastlib/testsuite/testproject.el b/fastlib/src/vespa/fastlib/testsuite/testproject.el deleted file mode 100644 index 983bd582a8a..00000000000 --- a/fastlib/src/vespa/fastlib/testsuite/testproject.el +++ /dev/null @@ -1,108 +0,0 @@ -;; Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -;; testproject.el - -;; Local configurations for the cpptest Emacs unit-test -;; framework. This is just an example of typical variables that one -;; usually uses This file should be located in the same directory as -;; the class(es) you want to test. - -;; $Revision: 1.6 $ $Date: 2003-09-11 09:14:01 $ -;; Author: Nils Sandøy <nils.sandoy@fast.no> - -;; Just a message to show that this file is beeing read. Look for this -;; in the *Messages* buffer. -(message "Setting local test configuration for the module") - -;; Use fastlib's new debugging features. -;; This requires fastlib 1.6.2.2+ -(setq cppt-use-fastlib-debug-p t) - -;; Use an underscore based naming scheme classes and method names will -;; Upcase each word instead -(setq cppt-use-underscore-p t) - -;; Use author in documentation. Set this value to nil if not -(setq cppt-doc-author-p t) - -;; Use the new fastlib callback method. -;; Use this for newer versions of gcc -(setq cppt-use-callback-p t) - -;; This is a subdirectory of the directory in which this file, along with -;; the source code to test, resides -(setq cppt-test-dir "test") - -;; Use this variable to include extra file in your test source, and -;; application files. Typically this will hold headers for log -;; functionality etc. -;; Example: (setq cppt-extra-source-includes "#include \"../Log.h\"") -(setq cppt-extra-source-includes "") - -;; If the above source files are not part of a library, you will -;; probably have to include them in the fastos.project file. -;; Example: (setq cppt-extra-object-files '("../Log")) -(setq cppt-extra-object-files nil) - -;; If the source code does not have a fastos.project file with all -;; required libraries for linking an executable (typically the case -;; when the source is part of a library itself), then you should use -;; this variable to provide a list of libraries which will be appended -;; to the EXTERNALLIBS section for all applications in the -;; fastos.project file. -;; Example: (setq cppt-extra-libraries '("fast")) -(setq cppt-extra-libraries nil) -(setq cppt-extra-external-libraries '("fast")) - -;; Include source file in test executables. -;; Set this to nil if you are testing part of a library -(setq cppt-include-source-p "t") - - -;; If your initialisation code below requires special parameters for -;; running the test executables, add them here -;; Example: (setq cppt-test-parameters "--test-mode") -(setq cppt-test-parameters "") - -;; If you support a special debug mode, which is executed through the -;; cppt-suite-debug or cppt-run-test-debug methods, then you should -;; add the parameter for identifying this here. -;; The parameters given here assume that the fastlib debug features are -;; turned on -;; Example: (setq cppt-test-dbflags "-d") -(setq cppt-test-dbflags "-d all -d emacs") - -;; If you support logging etc, you should include code here for -;; insitializing this as part of the Main body of the test application -;; Example: -;; Add intialization code that turns on logging, and logs to stderr in debug -;; mode -;; (setq cppt-application-init-code -;; "RTLogDistributor::GetInstance().RegisterDestination( -;; new Fast_FileLogger(\"CLASSTest.log\"), FLOG_ALL); -;; for (int i=0; i < argc; ++i) { -;; if (strcmp(argv[i], \"-d\") == 0) { -;; // Turn on debug mode (log to stderr) -;; RTLogDistributor::GetInstance().RegisterDestination( -;; new Fast_FileLogger(stderr), FLOG_ALL); -;; LOG_DBG(\"Running in debug mode\"); -;; } -;; }") -(setq cppt-application-init-code "") - -;; Pretty much the same as the application init code, but this is used -;; for the Main method of the test suite. -;; Example: -;; Add intialization code that turns on logging, and logs to stderr in debug -;; mode -;; (setq cppt-suite-init-code -;; "RTLogDistributor::GetInstance().RegisterDestination( -;; new Fast_FileLogger(\"SUITETest.log\"), FLOG_ALL); -;; for (int i=0; i < argc; ++i) { -;; if (strcmp(argv[i], \"-d\") == 0) { -;; // Turn on debug mode (log to stderr) -;; RTLogDistributor::GetInstance().RegisterDestination( -;; new Fast_FileLogger(stderr), FLOG_ALL); -;; LOG_DBG(\"Running in debug mode\"); -;; } -;; }") -(setq cppt-suite-init-code "") diff --git a/fastlib/src/vespa/fastlib/text/normwordfolder.cpp b/fastlib/src/vespa/fastlib/text/normwordfolder.cpp deleted file mode 100644 index a063332e3d1..00000000000 --- a/fastlib/src/vespa/fastlib/text/normwordfolder.cpp +++ /dev/null @@ -1,1195 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/fastlib/text/unicodeutil.h> -#include <vespa/fastlib/text/normwordfolder.h> -#include <mutex> -#include <cstring> - -bool Fast_NormalizeWordFolder::_isInitialized = false; -std::mutex _initMutex; -bool Fast_NormalizeWordFolder::_doAccentRemoval = false; -bool Fast_NormalizeWordFolder::_doSmallToNormalKana = false; -bool Fast_NormalizeWordFolder::_doKatakanaToHiragana = false; -bool Fast_NormalizeWordFolder::_doKanaAccentCollapsing = false; -bool Fast_NormalizeWordFolder::_doFullwidthToBasicLatin = false; -bool Fast_NormalizeWordFolder::_doSharpSSubstitution = false; -bool Fast_NormalizeWordFolder::_doLigatureSubstitution = false; -bool Fast_NormalizeWordFolder::_doMulticharExpansion = false; -bool Fast_NormalizeWordFolder::_isWord[128]; - -ucs4_t Fast_NormalizeWordFolder::_foldCase[767]; // Up to Latin Extended B (0x0250) -ucs4_t Fast_NormalizeWordFolder::_keepCase[767]; // -ucs4_t Fast_NormalizeWordFolder::_foldCaseHighAscii[256]; // Latin Extended Additional (0x1E00 - 0x1F00) -ucs4_t Fast_NormalizeWordFolder::_keepCaseHighAscii[256]; // (incl. vietnamese) -ucs4_t Fast_NormalizeWordFolder::_kanaMap[192]; -ucs4_t Fast_NormalizeWordFolder::_halfwidth_fullwidthMap[240]; - -void -Fast_NormalizeWordFolder::Setup(uint32_t flags) -{ - // Only allow setting these when not initialized or initializing... - { - std::lock_guard<std::mutex> initGuard(_initMutex); - _doAccentRemoval = (DO_ACCENT_REMOVAL & flags) != 0; -// _doSmallToNormalKana = (DO_SMALL_TO_NORMAL_KANA & flags) != 0; -// _doKatakanaToHiragana = (DO_KATAKANA_TO_HIRAGANA & flags) != 0; -// _doKanaAccentCollapsing = (DO_KANA_ACCENT_COLLAPSING & flags) != 0; // Not implemented - _doFullwidthToBasicLatin = (DO_FULLWIDTH_TO_BASIC_LATIN & flags) != 0; // Not implemented - _doSharpSSubstitution = (DO_SHARP_S_SUBSTITUTION & flags) != 0; - _doLigatureSubstitution = (DO_LIGATURE_SUBSTITUTION & flags) != 0; - _doMulticharExpansion = (DO_MULTICHAR_EXPANSION & flags) != 0; - _isInitialized = false; - } - Initialize(); -} - -void -Fast_NormalizeWordFolder::Initialize() -{ - unsigned int i; - if (!_isInitialized) { - std::lock_guard<std::mutex> initGuard(_initMutex); - if (!_isInitialized) { - - for (i = 0; i < 128; i++) - _isWord[i] = Fast_UnicodeUtil::IsWordChar(i); - for (i = 0; i < 767; i++) { - _foldCase[i] = Fast_UnicodeUtil::ToLower(i); - _keepCase[i] = i; - } - - for (i = 0x1E00; i < 0x1F00; i++) { - _foldCaseHighAscii[i - 0x1E00] = Fast_UnicodeUtil::ToLower(i); - _keepCaseHighAscii[i - 0x1E00] = i; - } - - if (_doAccentRemoval) { - _foldCase[0xc0] = 'a'; - _foldCase[0xc1] = 'a'; - _foldCase[0xc2] = 'a'; - _foldCase[0xc3] = 'a'; // A tilde - _foldCase[0xc7] = 'c'; - _foldCase[0xc8] = 'e'; - _foldCase[0xc9] = 'e'; - _foldCase[0xca] = 'e'; - _foldCase[0xcb] = 'e'; - _foldCase[0xcc] = 'i'; // I grave - _foldCase[0xcd] = 'i'; - _foldCase[0xce] = 'i'; - _foldCase[0xcf] = 'i'; - _foldCase[0xd1] = 'n'; - _foldCase[0xd2] = 'o'; - _foldCase[0xd3] = 'o'; - _foldCase[0xd4] = 'o'; - _foldCase[0xd5] = 'o'; - _foldCase[0xd9] = 'u'; - _foldCase[0xda] = 'u'; - _foldCase[0xdb] = 'u'; - _foldCase[0xdd] = 'y'; - - _foldCase[0xe0] = 'a'; - _foldCase[0xe1] = 'a'; - _foldCase[0xe2] = 'a'; - _foldCase[0xe3] = 'a'; // a tilde - _foldCase[0xe7] = 'c'; - _foldCase[0xe8] = 'e'; - _foldCase[0xe9] = 'e'; - _foldCase[0xea] = 'e'; - _foldCase[0xeb] = 'e'; - _foldCase[0xec] = 'i'; // i grave - _foldCase[0xed] = 'i'; - _foldCase[0xee] = 'i'; - _foldCase[0xef] = 'i'; - _foldCase[0xf1] = 'n'; - _foldCase[0xf2] = 'o'; - _foldCase[0xf3] = 'o'; - _foldCase[0xf4] = 'o'; - _foldCase[0xf5] = 'o'; - _foldCase[0xf9] = 'u'; - _foldCase[0xfa] = 'u'; - _foldCase[0xfb] = 'u'; - _foldCase[0xfd] = 'y'; - _foldCase[0xff] = 'y'; - _foldCase[0x102] = 'a'; - _foldCase[0x103] = 'a'; - _foldCase[0x110] = 'd'; - _foldCase[0x111] = 'd'; - _foldCase[0x128] = 'i'; - _foldCase[0x129] = 'i'; - _foldCase[0x178] = 'y'; - _foldCase[0x1a0] = 'o'; - _foldCase[0x1a1] = 'o'; - _foldCase[0x1af] = 'u'; - _foldCase[0x1b0] = 'u'; - - // Superscript spacing modifiers - _foldCase[0x2b0] = 'h'; - _foldCase[0x2b1] = 0x266; - _foldCase[0x2b2] = 'j'; - _foldCase[0x2b3] = 'r'; - _foldCase[0x2b4] = 0x279; - _foldCase[0x2b5] = 0x27b; - _foldCase[0x2b6] = 0x281; - _foldCase[0x2b7] = 'w'; - _foldCase[0x2b8] = 'y'; - _foldCase[0x2e0] = 0x263; - _foldCase[0x2e1] = 'l'; - _foldCase[0x2e2] = 's'; - _foldCase[0x2e3] = 'x'; - _foldCase[0x2e4] = 0x295; - - _keepCase[0xc0] = 'A'; - _keepCase[0xc1] = 'A'; - _keepCase[0xc2] = 'A'; - _keepCase[0xc3] = 'A'; // A tilde - _keepCase[0xc7] = 'C'; - _keepCase[0xc8] = 'E'; - _keepCase[0xc9] = 'E'; - _keepCase[0xca] = 'E'; - _keepCase[0xcb] = 'E'; - _keepCase[0xcc] = 'I'; // I grave - _keepCase[0xcd] = 'I'; - _keepCase[0xce] = 'I'; - _keepCase[0xcf] = 'I'; - _keepCase[0xd1] = 'N'; - _keepCase[0xd2] = 'O'; - _keepCase[0xd3] = 'O'; - _keepCase[0xd4] = 'O'; - _keepCase[0xd5] = 'O'; - _keepCase[0xd9] = 'U'; - _keepCase[0xda] = 'U'; - _keepCase[0xdb] = 'U'; - _keepCase[0xdd] = 'Y'; - - _keepCase[0xe0] = 'a'; - _keepCase[0xe1] = 'a'; - _keepCase[0xe2] = 'a'; - _keepCase[0xe3] = 'a'; // a tilde - _keepCase[0xe7] = 'c'; - _keepCase[0xe8] = 'e'; - _keepCase[0xe9] = 'e'; - _keepCase[0xea] = 'e'; - _keepCase[0xeb] = 'e'; - _keepCase[0xec] = 'i'; // i grave - _keepCase[0xed] = 'i'; - _keepCase[0xee] = 'i'; - _keepCase[0xef] = 'i'; - _keepCase[0xf1] = 'n'; - _keepCase[0xf2] = 'o'; - _keepCase[0xf3] = 'o'; - _keepCase[0xf4] = 'o'; - _keepCase[0xf5] = 'o'; - _keepCase[0xf9] = 'u'; - _keepCase[0xfa] = 'u'; - _keepCase[0xfb] = 'u'; - _keepCase[0xfd] = 'y'; - _keepCase[0xff] = 'y'; - - _keepCase[0x102] = 'A'; - _keepCase[0x103] = 'a'; - _keepCase[0x110] = 'D'; - _keepCase[0x111] = 'd'; - _keepCase[0x128] = 'I'; - _keepCase[0x129] = 'i'; - _keepCase[0x178] = 'Y'; - _keepCase[0x1a0] = 'O'; - _keepCase[0x1a1] = 'o'; - _keepCase[0x1af] = 'U'; - _keepCase[0x1b0] = 'u'; - - // Superscript spacing modifiers - _foldCase[0x2b0] = 'h'; - _foldCase[0x2b1] = 0x266; - _foldCase[0x2b2] = 'j'; - _foldCase[0x2b3] = 'r'; - _foldCase[0x2b4] = 0x279; - _foldCase[0x2b5] = 0x27b; - _foldCase[0x2b6] = 0x281; - _foldCase[0x2b7] = 'w'; - _foldCase[0x2b8] = 'y'; - _foldCase[0x2e0] = 0x263; - _foldCase[0x2e1] = 'l'; - _foldCase[0x2e2] = 's'; - _foldCase[0x2e3] = 'x'; - _foldCase[0x2e4] = 0x295; - - // Deaccenting-table for Ascii Extended Additional - _foldCaseHighAscii[0x1ea0 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea1 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea2 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea3 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea4 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea5 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea6 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea7 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea8 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ea9 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eaa - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eab - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eac - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1ead - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eae - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eaf - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb0 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb1 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb2 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb3 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb4 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb5 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb6 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb7 - 0x1e00] = 'a'; - _foldCaseHighAscii[0x1eb8 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1eb9 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1eba - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ebb - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ebc - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ebd - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ebe - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ebf - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec0 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec1 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec2 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec3 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec4 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec5 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec6 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec7 - 0x1e00] = 'e'; - _foldCaseHighAscii[0x1ec8 - 0x1e00] = 'i'; - _foldCaseHighAscii[0x1ec9 - 0x1e00] = 'i'; - _foldCaseHighAscii[0x1eca - 0x1e00] = 'i'; - _foldCaseHighAscii[0x1ecb - 0x1e00] = 'i'; - _foldCaseHighAscii[0x1ecc - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ecd - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ece - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ecf - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed0 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed1 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed2 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed3 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed4 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed5 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed6 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed7 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed8 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ed9 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1eda - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1edb - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1edc - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1edd - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ede - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1edf - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ee0 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ee1 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ee2 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ee3 - 0x1e00] = 'o'; - _foldCaseHighAscii[0x1ee4 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ee5 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ee6 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ee7 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ee8 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ee9 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1eea - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1eeb - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1eec - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1eed - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1eee - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1eef - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ef0 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ef1 - 0x1e00] = 'u'; - _foldCaseHighAscii[0x1ef2 - 0x1e00] = 'y'; - _foldCaseHighAscii[0x1ef3 - 0x1e00] = 'y'; - _foldCaseHighAscii[0x1ef4 - 0x1e00] = 'y'; - _foldCaseHighAscii[0x1ef5 - 0x1e00] = 'y'; - _foldCaseHighAscii[0x1ef6 - 0x1e00] = 'y'; - _foldCaseHighAscii[0x1ef7 - 0x1e00] = 'y'; - _foldCaseHighAscii[0x1ef8 - 0x1e00] = 'y'; - _foldCaseHighAscii[0x1ef9 - 0x1e00] = 'y'; - - _keepCaseHighAscii[0x1ea0 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1ea1 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1ea2 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1ea3 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1ea4 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1ea5 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1ea6 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1ea7 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1ea8 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1ea9 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eaa - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1eab - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eac - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1ead - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eae - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1eaf - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eb0 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1eb1 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eb2 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1eb3 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eb4 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1eb5 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eb6 - 0x1e00] = 'A'; - _keepCaseHighAscii[0x1eb7 - 0x1e00] = 'a'; - _keepCaseHighAscii[0x1eb8 - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1eb9 - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1eba - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1ebb - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1ebc - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1ebd - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1ebe - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1ebf - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1ec0 - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1ec1 - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1ec2 - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1ec3 - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1ec4 - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1ec5 - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1ec6 - 0x1e00] = 'E'; - _keepCaseHighAscii[0x1ec7 - 0x1e00] = 'e'; - _keepCaseHighAscii[0x1ec8 - 0x1e00] = 'I'; - _keepCaseHighAscii[0x1ec9 - 0x1e00] = 'i'; - _keepCaseHighAscii[0x1eca - 0x1e00] = 'I'; - _keepCaseHighAscii[0x1ecb - 0x1e00] = 'i'; - _keepCaseHighAscii[0x1ecc - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ecd - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ece - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ecf - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ed0 - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ed1 - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ed2 - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ed3 - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ed4 - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ed5 - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ed6 - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ed7 - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ed8 - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ed9 - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1eda - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1edb - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1edc - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1edd - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ede - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1edf - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ee0 - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ee1 - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ee2 - 0x1e00] = 'O'; - _keepCaseHighAscii[0x1ee3 - 0x1e00] = 'o'; - _keepCaseHighAscii[0x1ee4 - 0x1e00] = 'U'; - _keepCaseHighAscii[0x1ee5 - 0x1e00] = 'u'; - _keepCaseHighAscii[0x1ee6 - 0x1e00] = 'U'; - _keepCaseHighAscii[0x1ee7 - 0x1e00] = 'u'; - _keepCaseHighAscii[0x1ee8 - 0x1e00] = 'U'; - _keepCaseHighAscii[0x1ee9 - 0x1e00] = 'u'; - _keepCaseHighAscii[0x1eea - 0x1e00] = 'U'; - _keepCaseHighAscii[0x1eeb - 0x1e00] = 'u'; - _keepCaseHighAscii[0x1eec - 0x1e00] = 'U'; - _keepCaseHighAscii[0x1eed - 0x1e00] = 'u'; - _keepCaseHighAscii[0x1eee - 0x1e00] = 'U'; - _keepCaseHighAscii[0x1eef - 0x1e00] = 'u'; - _keepCaseHighAscii[0x1ef0 - 0x1e00] = 'U'; - _keepCaseHighAscii[0x1ef1 - 0x1e00] = 'u'; - _keepCaseHighAscii[0x1ef2 - 0x1e00] = 'Y'; - _keepCaseHighAscii[0x1ef3 - 0x1e00] = 'y'; - _keepCaseHighAscii[0x1ef4 - 0x1e00] = 'Y'; - _keepCaseHighAscii[0x1ef5 - 0x1e00] = 'y'; - _keepCaseHighAscii[0x1ef6 - 0x1e00] = 'Y'; - _keepCaseHighAscii[0x1ef7 - 0x1e00] = 'y'; - _keepCaseHighAscii[0x1ef8 - 0x1e00] = 'Y'; - _keepCaseHighAscii[0x1ef9 - 0x1e00] = 'y'; - - } - - // Base case hiragana - hiragana ID - for (i = 0; i < 96; i++) { - _kanaMap[i] = 0x3040 + i; - } - - // Modify some hiragana - hiragana - if (_doSmallToNormalKana) { - // A I U E O YA YU YO WA, and TSU (previously we did not convert TSU) - _kanaMap[0x3041 - 0x3040] = 0x3042; - _kanaMap[0x3043 - 0x3040] = 0x3044; - _kanaMap[0x3045 - 0x3040] = 0x3046; - _kanaMap[0x3047 - 0x3040] = 0x3048; - _kanaMap[0x3049 - 0x3040] = 0x304A; - _kanaMap[0x3063 - 0x3040] = 0x3064; - _kanaMap[0x3083 - 0x3040] = 0x3084; - _kanaMap[0x3085 - 0x3040] = 0x3086; - _kanaMap[0x3087 - 0x3040] = 0x3088; - _kanaMap[0x308E - 0x3040] = 0x308F; - } - - if (_doKatakanaToHiragana) { - // base katakana to hiragana - for (i = 96; i < 192; i++) { - _kanaMap[i] = 0x3040 + i - 0x60; - } - - // modify some katakana - hiragana - - // 0x30A0 -> id - _kanaMap[0x30A0 - 0x3040] = 0x30A0; - // 0x30F7 to 0x30FC -> id - _kanaMap[0x30F7 - 0x3040] = 0x30F7; - _kanaMap[0x30F8 - 0x3040] = 0x30F8; - _kanaMap[0x30F9 - 0x3040] = 0x30F9; - _kanaMap[0x30FA - 0x3040] = 0x30FA; - _kanaMap[0x30FB - 0x3040] = 0x30FB; - _kanaMap[0x30FC - 0x3040] = 0x30FC; - // 0x30FF -> id - _kanaMap[0x30FF - 0x3040] = 0x30FF; - - if (_doSmallToNormalKana) { - // A I U E O YA YU YO WA, and TSU (previously we did not convert TSU) - _kanaMap[0x30A1 - 0x3040] = 0x3042; - _kanaMap[0x30A3 - 0x3040] = 0x3044; - _kanaMap[0x30A5 - 0x3040] = 0x3046; - _kanaMap[0x30A7 - 0x3040] = 0x3048; - _kanaMap[0x30A9 - 0x3040] = 0x304A; - _kanaMap[0x30C3 - 0x3040] = 0x30C4; - _kanaMap[0x30E3 - 0x3040] = 0x3084; - _kanaMap[0x30E5 - 0x3040] = 0x3086; - _kanaMap[0x30E7 - 0x3040] = 0x3088; - _kanaMap[0x30EE - 0x3040] = 0x308F; - // KA KE - _kanaMap[0x30F5 - 0x3040] = 0x304B; - _kanaMap[0x30F6 - 0x3040] = 0x3051; - } else { // !_doSmallToNormalKana - // A I U E O YA YU YO WA, not TSU is normal katakana - hiragana - // KA KE; No small hiragana exists, so id - _kanaMap[0x30F5 - 0x3040] = 0x30F5; - _kanaMap[0x30F6 - 0x3040] = 0x30F6; - } - } else { // !_doKatakanaToHiragana - // katakana - katakana ID - for (i = 96; i < 192; i++) { - _kanaMap[i] = 0x3040 + i; - } - - // modify some katakana - katakana - if (_doSmallToNormalKana) { - // A I U E O YA YU YO WA, not TSU - _kanaMap[0x30A1 - 0x3040] = 0x30A2; - _kanaMap[0x30A3 - 0x3040] = 0x30A4; - _kanaMap[0x30A5 - 0x3040] = 0x30A6; - _kanaMap[0x30A7 - 0x3040] = 0x30A8; - _kanaMap[0x30A9 - 0x3040] = 0x30AA; - _kanaMap[0x30E3 - 0x3040] = 0x30E4; - _kanaMap[0x30E5 - 0x3040] = 0x30E6; - _kanaMap[0x30E7 - 0x3040] = 0x30E8; - _kanaMap[0x30EE - 0x3040] = 0x30EF; - // KA KE - _kanaMap[0x30F5 - 0x3040] = 0x30AB; - _kanaMap[0x30F6 - 0x3040] = 0x30B1; - } - } - - - - // Fullwidth ASCII - for (i = 0; i < 0x21; i++) - _halfwidth_fullwidthMap[i] = 0x20 + i; - for (i = 0x21; i < 0x3B; i++) // full uppercase to half lowercase - _halfwidth_fullwidthMap[i] = 0x40 + i; - for (i = 0x3B; i < 0x5F; i++) - _halfwidth_fullwidthMap[i] = 0x20 + i; - // 0xFF00, 0xFF5F -> id - _halfwidth_fullwidthMap[0x00] = 0xFF00; - _halfwidth_fullwidthMap[0x5F] = 0xFF5F; - - // Halfwidth CJK Punctuation - // 0xFF60 -> id - _halfwidth_fullwidthMap[0x60] = 0xFF60; - _halfwidth_fullwidthMap[0x61] = 0x3002; - _halfwidth_fullwidthMap[0x62] = 0x300C; - _halfwidth_fullwidthMap[0x63] = 0x300D; - _halfwidth_fullwidthMap[0x64] = 0x3001; - - // Halfwidth katakana (maps directly to hiragana) - - // Common cases for halfwidth katakana - _halfwidth_fullwidthMap[0x65] = 0x30FB; - - if (_doKatakanaToHiragana) { - _halfwidth_fullwidthMap[0x66] = 0x3092; - _halfwidth_fullwidthMap[0x6F] = 0x3063; - _halfwidth_fullwidthMap[0x70] = 0x30FC; - _halfwidth_fullwidthMap[0x71] = 0x3042; - _halfwidth_fullwidthMap[0x72] = 0x3044; - _halfwidth_fullwidthMap[0x73] = 0x3046; - _halfwidth_fullwidthMap[0x74] = 0x3048; - _halfwidth_fullwidthMap[0x75] = 0x304A; - _halfwidth_fullwidthMap[0x76] = 0x304B; - _halfwidth_fullwidthMap[0x77] = 0x304D; - _halfwidth_fullwidthMap[0x78] = 0x304F; - _halfwidth_fullwidthMap[0x79] = 0x3051; - _halfwidth_fullwidthMap[0x7A] = 0x3053; - _halfwidth_fullwidthMap[0x7B] = 0x3055; - _halfwidth_fullwidthMap[0x7C] = 0x3057; - _halfwidth_fullwidthMap[0x7D] = 0x3059; - _halfwidth_fullwidthMap[0x7E] = 0x305B; - _halfwidth_fullwidthMap[0x7F] = 0x305D; - _halfwidth_fullwidthMap[0x80] = 0x305F; - _halfwidth_fullwidthMap[0x81] = 0x3061; - _halfwidth_fullwidthMap[0x82] = 0x3064; - _halfwidth_fullwidthMap[0x83] = 0x3066; - _halfwidth_fullwidthMap[0x84] = 0x3068; - _halfwidth_fullwidthMap[0x85] = 0x306A; - _halfwidth_fullwidthMap[0x86] = 0x306B; - _halfwidth_fullwidthMap[0x87] = 0x306C; - _halfwidth_fullwidthMap[0x88] = 0x306D; - _halfwidth_fullwidthMap[0x89] = 0x306E; - _halfwidth_fullwidthMap[0x8A] = 0x306F; - _halfwidth_fullwidthMap[0x8B] = 0x3072; - _halfwidth_fullwidthMap[0x8C] = 0x3075; - _halfwidth_fullwidthMap[0x8D] = 0x3078; - _halfwidth_fullwidthMap[0x8E] = 0x307B; - _halfwidth_fullwidthMap[0x8F] = 0x307E; - _halfwidth_fullwidthMap[0x90] = 0x307F; - _halfwidth_fullwidthMap[0x91] = 0x3080; - _halfwidth_fullwidthMap[0x92] = 0x3081; - _halfwidth_fullwidthMap[0x93] = 0x3082; - _halfwidth_fullwidthMap[0x94] = 0x3084; - _halfwidth_fullwidthMap[0x95] = 0x3086; - _halfwidth_fullwidthMap[0x96] = 0x3088; - _halfwidth_fullwidthMap[0x97] = 0x3089; - _halfwidth_fullwidthMap[0x98] = 0x308A; - _halfwidth_fullwidthMap[0x99] = 0x308B; - _halfwidth_fullwidthMap[0x9A] = 0x308C; - _halfwidth_fullwidthMap[0x9B] = 0x308D; - _halfwidth_fullwidthMap[0x9C] = 0x308F; - _halfwidth_fullwidthMap[0x9D] = 0x3093; - _halfwidth_fullwidthMap[0x9E] = 0x3099; - _halfwidth_fullwidthMap[0x9F] = 0x309A; - if (_doSmallToNormalKana) { - _halfwidth_fullwidthMap[0x67] = 0x3042; - _halfwidth_fullwidthMap[0x68] = 0x3044; - _halfwidth_fullwidthMap[0x69] = 0x3046; - _halfwidth_fullwidthMap[0x6A] = 0x3048; - _halfwidth_fullwidthMap[0x6B] = 0x304A; - _halfwidth_fullwidthMap[0x6C] = 0x3084; - _halfwidth_fullwidthMap[0x6D] = 0x3086; - _halfwidth_fullwidthMap[0x6E] = 0x3088; - } else { // !_doSmallToNormalKana - _halfwidth_fullwidthMap[0x67] = 0x3041; - _halfwidth_fullwidthMap[0x68] = 0x3043; - _halfwidth_fullwidthMap[0x69] = 0x3045; - _halfwidth_fullwidthMap[0x6A] = 0x3047; - _halfwidth_fullwidthMap[0x6B] = 0x3049; - _halfwidth_fullwidthMap[0x6C] = 0x3083; - _halfwidth_fullwidthMap[0x6D] = 0x3085; - _halfwidth_fullwidthMap[0x6E] = 0x3087; - } - } else { // !_doKatakanaToHiragana - _halfwidth_fullwidthMap[0x66] = 0x30F2; - _halfwidth_fullwidthMap[0x6F] = 0x30C3; - _halfwidth_fullwidthMap[0x70] = 0x30FC; - _halfwidth_fullwidthMap[0x71] = 0x30A2; - _halfwidth_fullwidthMap[0x72] = 0x30A4; - _halfwidth_fullwidthMap[0x73] = 0x30A6; - _halfwidth_fullwidthMap[0x74] = 0x30A8; - _halfwidth_fullwidthMap[0x75] = 0x30AA; - _halfwidth_fullwidthMap[0x76] = 0x30AB; - _halfwidth_fullwidthMap[0x77] = 0x30AD; - _halfwidth_fullwidthMap[0x78] = 0x30AF; - _halfwidth_fullwidthMap[0x79] = 0x30B1; - _halfwidth_fullwidthMap[0x7A] = 0x30B3; - _halfwidth_fullwidthMap[0x7B] = 0x30B5; - _halfwidth_fullwidthMap[0x7C] = 0x30B7; - _halfwidth_fullwidthMap[0x7D] = 0x30B9; - _halfwidth_fullwidthMap[0x7E] = 0x30BB; - _halfwidth_fullwidthMap[0x7F] = 0x30BD; - _halfwidth_fullwidthMap[0x80] = 0x30BF; - _halfwidth_fullwidthMap[0x81] = 0x30C1; - _halfwidth_fullwidthMap[0x82] = 0x30C4; - _halfwidth_fullwidthMap[0x83] = 0x30C6; - _halfwidth_fullwidthMap[0x84] = 0x30C8; - _halfwidth_fullwidthMap[0x85] = 0x30CA; - _halfwidth_fullwidthMap[0x86] = 0x30CB; - _halfwidth_fullwidthMap[0x87] = 0x30CC; - _halfwidth_fullwidthMap[0x88] = 0x30CD; - _halfwidth_fullwidthMap[0x89] = 0x30CE; - _halfwidth_fullwidthMap[0x8A] = 0x30CF; - _halfwidth_fullwidthMap[0x8B] = 0x30D2; - _halfwidth_fullwidthMap[0x8C] = 0x30D5; - _halfwidth_fullwidthMap[0x8D] = 0x30D8; - _halfwidth_fullwidthMap[0x8E] = 0x30DB; - _halfwidth_fullwidthMap[0x8F] = 0x30DE; - _halfwidth_fullwidthMap[0x90] = 0x30DF; - _halfwidth_fullwidthMap[0x91] = 0x30E0; - _halfwidth_fullwidthMap[0x92] = 0x30E1; - _halfwidth_fullwidthMap[0x93] = 0x30E2; - _halfwidth_fullwidthMap[0x94] = 0x30E4; - _halfwidth_fullwidthMap[0x95] = 0x30E6; - _halfwidth_fullwidthMap[0x96] = 0x30E8; - _halfwidth_fullwidthMap[0x97] = 0x30E9; - _halfwidth_fullwidthMap[0x98] = 0x30EA; - _halfwidth_fullwidthMap[0x99] = 0x30EB; - _halfwidth_fullwidthMap[0x9A] = 0x30EC; - _halfwidth_fullwidthMap[0x9B] = 0x30ED; - _halfwidth_fullwidthMap[0x9C] = 0x30EF; - _halfwidth_fullwidthMap[0x9D] = 0x30F3; - _halfwidth_fullwidthMap[0x9E] = 0x3099; - _halfwidth_fullwidthMap[0x9F] = 0x309A; - if (_doSmallToNormalKana) { - _halfwidth_fullwidthMap[0x67] = 0x30a2; - _halfwidth_fullwidthMap[0x68] = 0x30a4; - _halfwidth_fullwidthMap[0x69] = 0x30a6; - _halfwidth_fullwidthMap[0x6A] = 0x30a8; - _halfwidth_fullwidthMap[0x6B] = 0x30aA; - _halfwidth_fullwidthMap[0x6C] = 0x30e4; - _halfwidth_fullwidthMap[0x6D] = 0x30e6; - _halfwidth_fullwidthMap[0x6E] = 0x30e8; - } else { // !_doSmallToNormalKana - _halfwidth_fullwidthMap[0x67] = 0x30a1; - _halfwidth_fullwidthMap[0x68] = 0x30a3; - _halfwidth_fullwidthMap[0x69] = 0x30a5; - _halfwidth_fullwidthMap[0x6A] = 0x30a7; - _halfwidth_fullwidthMap[0x6B] = 0x30a9; - _halfwidth_fullwidthMap[0x6C] = 0x30e3; - _halfwidth_fullwidthMap[0x6D] = 0x30e5; - _halfwidth_fullwidthMap[0x6E] = 0x30e7; - } - } - - // Halfwidth Hangul - _halfwidth_fullwidthMap[0xA0] = 0x3164; - // fill in 0xFFA1 - 0xFFBE => 0x3131 - 0x314E - for (i = 0xA1; i < 0xBF; i++) - _halfwidth_fullwidthMap[i] = 0x3090 + i; - _halfwidth_fullwidthMap[0xBF] = 0xFFBF; - _halfwidth_fullwidthMap[0xC0] = 0xFFC0; - _halfwidth_fullwidthMap[0xC1] = 0xFFC1; - // fill in 0xFFC2 - 0xFFC7 => 0x314F - 0x3154 - for (i = 0xC2; i < 0xC8; i++) - _halfwidth_fullwidthMap[i] = 0x308D + i; - _halfwidth_fullwidthMap[0xC8] = 0xFFC8; - _halfwidth_fullwidthMap[0xC9] = 0xFFC9; - // fill in 0xFFCA - 0xFFCF => 0x3155 - 0x315A - for (i = 0xCA; i < 0xD0; i++) - _halfwidth_fullwidthMap[i] = 0x308B + i; - _halfwidth_fullwidthMap[0xD0] = 0xFFD0; - _halfwidth_fullwidthMap[0xD1] = 0xFFD1; - // fill in 0xFFD2 - 0xFFD7 => 0x315B - 0x3160 - for (i = 0xD2; i < 0xD8; i++) - _halfwidth_fullwidthMap[i] = 0x3089 + i; - _halfwidth_fullwidthMap[0xD8] = 0xFFD8; - _halfwidth_fullwidthMap[0xD9] = 0xFFD9; - // fill in 0xFFDA - 0xFFDC => 0x3161 - 0x3163 - for (i = 0xDA; i < 0xDD; i++) - _halfwidth_fullwidthMap[i] = 0x3087 + i; - - // Fullwidth symbols - _halfwidth_fullwidthMap[0xE0] = 0x00A2; - _halfwidth_fullwidthMap[0xE1] = 0x00A3; - _halfwidth_fullwidthMap[0xE2] = 0x00AC; - _halfwidth_fullwidthMap[0xE3] = 0x00AF; - _halfwidth_fullwidthMap[0xE4] = 0x00A6; - _halfwidth_fullwidthMap[0xE5] = 0x00A5; - _halfwidth_fullwidthMap[0xE6] = 0x20A9; - - // 0xFFE7 -> id - _halfwidth_fullwidthMap[0xE7] = 0xFFE7; - - // Halfwidth symbols - _halfwidth_fullwidthMap[0xE8] = 0x2502; - _halfwidth_fullwidthMap[0xE9] = 0x2190; - _halfwidth_fullwidthMap[0xEA] = 0x2191; - _halfwidth_fullwidthMap[0xEB] = 0x2192; - _halfwidth_fullwidthMap[0xEC] = 0x2193; - _halfwidth_fullwidthMap[0xED] = 0x25A0; - _halfwidth_fullwidthMap[0xEE] = 0x25CB; - - // 0xFFEF -> id - _halfwidth_fullwidthMap[0xEF] = 0xFFEF; - - - // - // DONE - // - _isInitialized = true; - } - } -} - -Fast_NormalizeWordFolder::Fast_NormalizeWordFolder() -{ - Initialize(); -} - - -Fast_NormalizeWordFolder::~Fast_NormalizeWordFolder(void) -{ -} - -size_t -Fast_NormalizeWordFolder::FoldedSizeAsUTF8(const char *word) const -{ - ucs4_t c; - size_t res; - const unsigned char *uword; - - res = 0; - uword = reinterpret_cast<const unsigned char *>(word); - c = Fast_UnicodeUtil::GetUTF8Char(uword); - while (c != 0) { - if (c != Fast_UnicodeUtil::_BadUTF8Char) { - const char *repl = ReplacementString(c); - if (repl != NULL) { - res += strlen(repl); - } else { - c = ToFold(c); - res += Fast_UnicodeUtil::utf8clen(c); - } - } - c = Fast_UnicodeUtil::GetUTF8Char(uword); - } - return res; -} - - -char * -Fast_NormalizeWordFolder::FoldUTF8WordToUTF8Quick(char *wordbufpos, - const char *word) - const -{ - ucs4_t c; - const unsigned char *uword; - - uword = reinterpret_cast<const unsigned char *>(word); - c = Fast_UnicodeUtil::GetUTF8Char(uword); - while (c != 0) { - if (c != Fast_UnicodeUtil::_BadUTF8Char) { - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) - memcpy(wordbufpos, repl, repllen); - wordbufpos += repllen; - } else { - c = ToFold(c); - wordbufpos = Fast_UnicodeUtil::utf8cput(wordbufpos, c); - } - } - c = Fast_UnicodeUtil::GetUTF8Char(uword); - } - return wordbufpos; -} - -const char* -Fast_NormalizeWordFolder::Tokenize(const char *buf, - const char *bufend, - char *dstbuf, - char *dstbufend, - const char*& origstart, - size_t& tokenlen) const -{ - - ucs4_t c = 0; - const unsigned char *p; - char *q = NULL; - char *eq = NULL; - const unsigned char *ep; - p = reinterpret_cast<const unsigned char *>(buf); - ep = reinterpret_cast<const unsigned char *>(bufend); - - // Skip characters between words - for (;;) { - if (p >= ep) { // End of input buffer, no more words - *dstbuf = 0; - return reinterpret_cast<const char *>(p); - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (_isWord[c]) - { - origstart = reinterpret_cast<const char *>(p) - 1; - break; - } - } else { - const unsigned char* prev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (IsWordCharOrIA(c)) - { - origstart = reinterpret_cast<const char *>(prev_p); - break; - } - } - } - - // Start saving word. - q = dstbuf; - eq = dstbufend - 6; // Make room for long UTF8 char and NUL - // Doesn't check for space for the first char, assumes that - // word buffer is at least 13 characters - if (c < 128) { // Common case, ASCII - *q++ = _foldCase[c]; - } else { - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) - memcpy(q, repl, repllen); - q += repllen; - } else { - c = ToFold(c); - q = Fast_UnicodeUtil::utf8cput(q, c); - } - } - - // Special case for interlinear annotation - if (c == 0xFFF9) { // ANCHOR - // Collect up to and including terminator - for(;;) { - if (p >= ep) { - c = 0; - break; - } - if (*p < 128) { // Note, no exit on plain ASCII - c = *p++; - *q++ = c; - if (q >= eq) { // Junk rest of annotation block - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - } else { - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (c == 0xFFFB) { - break; // out of junking loop - } - } - } - break; // out of annotation block processing - } - } else { - c = Fast_UnicodeUtil::GetUTF8Char(p); - q = Fast_UnicodeUtil::utf8cput(q, c); - if (c == 0xFFFB) { // TERMINATOR => Exit condition - break; - } - if (q >= eq) { // Junk rest of word - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - } else { - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (c == 0xFFFB) { - break; - } - } - } - break; - } - } - } - } else - - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (!_isWord[c]) - { - p--; - break; - } - *q++ = _foldCase[c]; - if (q >= eq) { // Junk rest of word - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (!_isWord[c]) - { - p--; - break; - } - } else { - const unsigned char* prev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (!Fast_UnicodeUtil::IsWordChar(c)) - { - p = prev_p; - break; - } - } - } - break; - } - } else { - const unsigned char* prev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (!Fast_UnicodeUtil::IsWordChar(c)) - { - p = prev_p; - break; - } - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) - memcpy(q, repl, repllen); - q += repllen; - } else { - c = ToFold(c); - q = Fast_UnicodeUtil::utf8cput(q, c); - } - if (q >= eq) { // Junk rest of word - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (!_isWord[c]) - { - p--; - break; - } - } else { - const unsigned char* xprev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (!Fast_UnicodeUtil::IsWordChar(c)) - { - p = xprev_p; - break; - } - } - } - break; - } - } - } - *q = 0; - tokenlen = q - dstbuf; - return reinterpret_cast<const char *>(p); -} - - - -const char* -Fast_NormalizeWordFolder::UCS4Tokenize(const char *buf, - const char *bufend, - ucs4_t *dstbuf, - ucs4_t *dstbufend, - const char*& origstart, - size_t& tokenlen) const -{ - return Tokenize(buf, bufend, dstbuf, dstbufend, origstart, tokenlen); -} - -const char* -Fast_NormalizeWordFolder::Tokenize(const char *buf, - const char *bufend, - ucs4_t *dstbuf, - ucs4_t *dstbufend, - const char*& origstart, - size_t& tokenlen) const -{ - - ucs4_t c = 0; - const unsigned char *p; - ucs4_t *q = NULL; - ucs4_t *eq = NULL; - const unsigned char *ep; - p = reinterpret_cast<const unsigned char *>(buf); - ep = reinterpret_cast<const unsigned char *>(bufend); - - // Skip characters between words - for (;;) { - if (p >= ep) { // End of input buffer, no more words - *dstbuf = 0; - return reinterpret_cast<const char *>(p); - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (_isWord[c]) - { - origstart = reinterpret_cast<const char *>(p) - 1; - break; - } - } else { - const unsigned char* prev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (IsWordCharOrIA(c)) - { - origstart = reinterpret_cast<const char *>(prev_p); - break; - } - } - } - - // Start saving word. - q = dstbuf; - eq = dstbufend - 3; // Make room for UCS4 char replacement string and NUL - // Doesn't check for space for the first char, assumes that - // word buffer is at least 13 characters - if (c < 128) { // Common case, ASCII - *q++ = _foldCase[c]; - } else { - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) - q = Fast_UnicodeUtil::ucs4copy(q,repl); - } else { - c = ToFold(c); - *q++ = c; - } - } - - // Special case for interlinear annotation - if (c == 0xFFF9) { // ANCHOR - // Collect up to and including terminator - for(;;) { - if (p >= ep) { - c = 0; - break; - } - if (*p < 128) { // Note, no exit on plain ASCII - c = *p++; - *q++ = c; - if (q >= eq) { // Junk rest of annotation block - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - } else { - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (c == 0xFFFB) { - break; // out of junking loop - } - } - } - break; // out of annotation block processing - } - } else { - c = Fast_UnicodeUtil::GetUTF8Char(p); - *q++ = c; - if (c == 0xFFFB) { // TERMINATOR => Exit condition - break; - } - if (q >= eq) { // Junk rest of word - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - } else { - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (c == 0xFFFB) { - break; - } - } - } - break; - } - } - } - } else - - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (!_isWord[c]) - { - p--; - break; - } - *q++ = _foldCase[c]; - if (q >= eq) { // Junk rest of word - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (!_isWord[c]) - { - p--; - break; - } - } else { - const unsigned char* prev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (!Fast_UnicodeUtil::IsWordChar(c)) - { - p = prev_p; - break; - } - } - } - break; - } - } else { - const unsigned char* prev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (!Fast_UnicodeUtil::IsWordChar(c)) - { - p = prev_p; - break; - } - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) - q = Fast_UnicodeUtil::ucs4copy(q,repl); - } else { - c = ToFold(c); - *q++ = c; - } - if (q >= eq) { // Junk rest of word - for (;;) { - if (p >= ep) { // End of input buffer - c = 0; - break; - } - if (*p < 128) { // Common case, ASCII - c = *p++; - if (!_isWord[c]) - { - p--; - break; - } - } else { - const unsigned char* xprev_p = p; - c = Fast_UnicodeUtil::GetUTF8Char(p); - if (!Fast_UnicodeUtil::IsWordChar(c)) - { - p = xprev_p; - break; - } - } - } - break; - } - } - } - *q = 0; - tokenlen = q - dstbuf; - return reinterpret_cast<const char *>(p); -} diff --git a/fastlib/src/vespa/fastlib/text/tests/characterclasstest.cpp b/fastlib/src/vespa/fastlib/text/tests/characterclasstest.cpp deleted file mode 100644 index 4120e574233..00000000000 --- a/fastlib/src/vespa/fastlib/text/tests/characterclasstest.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "characterclasstest.h" - -int character_class_test_app::Main() -{ - character_class_test t("Test for the character_class"); - t.SetStream(&std::cout); - t.Run(); - return t.Report(); -} - -FASTOS_MAIN(character_class_test_app) diff --git a/fastlib/src/vespa/fastlib/text/tests/unicodeutiltest.cpp b/fastlib/src/vespa/fastlib/text/tests/unicodeutiltest.cpp deleted file mode 100644 index 9e7fa176a8c..00000000000 --- a/fastlib/src/vespa/fastlib/text/tests/unicodeutiltest.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "unicodeutiltest.h" - -int main(int, char **) { - UnicodeUtilTest t; - t.SetStream(&std::cout); - t.Run(); - return t.Report(); -} diff --git a/fastlib/src/vespa/fastlib/text/tests/unicodeutiltest.h b/fastlib/src/vespa/fastlib/text/tests/unicodeutiltest.h deleted file mode 100644 index da7bae6d313..00000000000 --- a/fastlib/src/vespa/fastlib/text/tests/unicodeutiltest.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastlib/testsuite/test.h> -#include <vespa/fastlib/text/unicodeutil.h> - -class UnicodeUtilTest : public Test -{ - bool GetUTF8Char_WrongInput() { - const char *testdata = "ab\xF8"; - - ucs4_t the_char = 0; - - const unsigned char *src = reinterpret_cast<const unsigned char *>(testdata); - while (*src != 0) { - the_char = Fast_UnicodeUtil::GetUTF8Char(src); - // fprintf(stderr, "GetUTF8Char_WrongInput(): the_char = U+%04X\n", the_char); - } - return (the_char == Fast_UnicodeUtil::_BadUTF8Char); - } - bool IsTerminalPunctuationChar(char ch, bool b) { - if (Fast_UnicodeUtil::IsTerminalPunctuationChar(ch) != b) { - printf("expected char '%c' %s terminal punctuation char\n", ch, b ? "to be" : "not to be"); - return false; - } - return true; - } - - bool IsTerminalPunctuationChar() { - // test a small selection - bool retval = true; - retval &= IsTerminalPunctuationChar('!', true); - retval &= IsTerminalPunctuationChar(',', true); - retval &= IsTerminalPunctuationChar('.', true); - retval &= IsTerminalPunctuationChar(':', true); - retval &= IsTerminalPunctuationChar(';', true); - retval &= IsTerminalPunctuationChar(' ', false); - retval &= IsTerminalPunctuationChar('a', false); - retval &= IsTerminalPunctuationChar('A', false); - return retval; - } - -public: - void Run() override { - // do the tests - _test(GetUTF8Char_WrongInput()); - _test(IsTerminalPunctuationChar()); - } -}; diff --git a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.cpp b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.cpp deleted file mode 100644 index ea254b3d114..00000000000 --- a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "wordfolderstest.h" - -int main(int, char **) { - WordFoldersTest t; - t.SetStream(&std::cout); - t.Run(); - return t.Report(); -} diff --git a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h b/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h deleted file mode 100644 index b82415a9822..00000000000 --- a/fastlib/src/vespa/fastlib/text/tests/wordfolderstest.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastlib/testsuite/test.h> -#include <vespa/fastlib/text/normwordfolder.h> -#include <memory> -#include <cstring> - -class WordFoldersTest : public Test -{ - bool NormalizeWordFolderConstruction() { - Fast_NormalizeWordFolder::Setup( - Fast_NormalizeWordFolder::DO_ACCENT_REMOVAL - | Fast_NormalizeWordFolder::DO_KATAKANA_TO_HIRAGANA - | Fast_NormalizeWordFolder::DO_SMALL_TO_NORMAL_KANA - | Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION - | Fast_NormalizeWordFolder::DO_LIGATURE_SUBSTITUTION - | Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION); - - Fast_NormalizeWordFolder *nwf = new Fast_NormalizeWordFolder(); - delete nwf; - - return true; - } - - bool TokenizeAnnotatedBuffer() { - Fast_NormalizeWordFolder *nwf = new Fast_NormalizeWordFolder(); - const char *testinput = "This is a " - "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB" - " superduperextrafeaturecoolandlongplainword fun " - "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA" - "world wide web extra long annotation block" "\xEF\xBF\xBB" - " test\nIt is cool.\n"; - const char *correct[] = { - "this", "is", "a", - "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB", - "superduperextrafeaturecool", "fun", - "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA" "world wide web ex", - "test", "it", "is", "cool" }; - const char *teststart = testinput; - const char *testend = testinput + strlen(testinput); - char destbuf[32]; - char *destbufend = destbuf + 32; - const char *origstart = testinput; - size_t tokenlen = 0; - - int tokencounter = 0; - bool success = true; - while ( - (teststart - = nwf->Tokenize(teststart, testend, - destbuf, destbufend, - origstart, tokenlen)) < testend) { - // printf("found: %s, correct: %s\n", destbuf, correct[tokencounter]); - success &= strcmp(destbuf, correct[tokencounter++]) == 0; - } - - delete nwf; - - return success; - } - - bool TokenizeAnnotatedUCS4Buffer() { - Fast_NormalizeWordFolder *nwf = new Fast_NormalizeWordFolder(); - const char *testinput = "This is a " - "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB" - " superduperextrafeaturecoolandlongplainword fun " - "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA" - "world wide web extra long annotation block" "\xEF\xBF\xBB" - " test\nIt is cool.\n"; - const char *correct[] = { - "this", "is", "a", - "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB", - "superduperextrafeaturecooland", "fun", - "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA" "world wide web extra lon", - "test", "it", "is", "cool" }; - - const char *teststart = testinput; - const char *testend = testinput + strlen(testinput); - ucs4_t destbuf[32]; - ucs4_t *destbufend = destbuf + 32; - - const char *origstart = testinput; - size_t tokenlen = 0; - - int tokencounter = 0; - bool success = true; - while ( - (teststart - = nwf->UCS4Tokenize(teststart, testend, - destbuf, destbufend, - origstart, tokenlen)) < testend) { - success &= Fast_UnicodeUtil::utf8cmp(correct[tokencounter++], destbuf) == 0; - } - - delete nwf; - - return success; - } - - bool AccentRemovalTest() { - // Note last encoded characters encoded as octets to avoid interpreting following letters after xNN as part of the encoding of the character - // See http://en.cppreference.com/w/cpp/language/escape - auto freefunction = [] (char * ptr) { free(ptr); }; - auto input = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\x70\xFE\x21"), - freefunction); - auto yelloutput = std::unique_ptr<char, decltype(freefunction)>(Fast_UnicodeUtil::strdupLAT1("\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\277AAAAAEAAAECEEEEIIIIDNOOOOOE\327OEUUUUEYTHssaaaaaeaaaeceeeeiiiidnoooooe\367oeuuuueythpth!"), - freefunction); - Fast_NormalizeWordFolder wordfolder; - int len = wordfolder.FoldedSizeAsUTF8(input.get()); - auto fastliboutput = std::unique_ptr<char[]>(new char[len + 1]); - wordfolder.FoldUTF8WordToUTF8Quick(fastliboutput.get(), input.get()); - fastliboutput[len] = '\0'; - printf("\n%s\n", yelloutput.get()); - printf("%s\n", fastliboutput.get()); - return strcasecmp(yelloutput.get(), fastliboutput.get()) == 0; - } - - -public: - - void Run() override { - // do the tests - _test(NormalizeWordFolderConstruction()); - _test(TokenizeAnnotatedBuffer()); - _test(TokenizeAnnotatedUCS4Buffer()); - _test(AccentRemovalTest()); - } -}; diff --git a/fastlib/src/vespa/fastlib/text/wordfolder.cpp b/fastlib/src/vespa/fastlib/text/wordfolder.cpp deleted file mode 100644 index e0b1aba9356..00000000000 --- a/fastlib/src/vespa/fastlib/text/wordfolder.cpp +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "wordfolder.h" - -Fast_WordFolder::~Fast_WordFolder() -{ -} diff --git a/fastlib/src/vespa/packages/.gitignore b/fastlib/src/vespa/packages/.gitignore deleted file mode 100644 index 1559ce1aa84..00000000000 --- a/fastlib/src/vespa/packages/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.a -*.lib -*.so.* -.depend -Makefile diff --git a/fastlib/src/vespa/packages/CMakeLists.txt b/fastlib/src/vespa/packages/CMakeLists.txt deleted file mode 100644 index a7c648a1c4d..00000000000 --- a/fastlib/src/vespa/packages/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(fastlib_fast - SOURCES - $<TARGET_OBJECTS:fastlib_io> - $<TARGET_OBJECTS:fastlib_text> - INSTALL lib64 - DEPENDS -) diff --git a/searchcore/CMakeLists.txt b/searchcore/CMakeLists.txt index 48a5e03fca0..0f7d90b1491 100644 --- a/searchcore/CMakeLists.txt +++ b/searchcore/CMakeLists.txt @@ -7,7 +7,6 @@ vespa_define_module( vespalib metrics config_cloudconfig - fastlib_fast configdefinitions document searchlib diff --git a/searchcore/src/apps/proton/CMakeLists.txt b/searchcore/src/apps/proton/CMakeLists.txt index 90aefd3dde5..400eb0eacc3 100644 --- a/searchcore/src/apps/proton/CMakeLists.txt +++ b/searchcore/src/apps/proton/CMakeLists.txt @@ -24,5 +24,4 @@ vespa_add_executable(searchcore_proton_app searchcore_proton_metrics searchcore_fconfig storageserver_storageapp - searchlib_searchlib_uca ) diff --git a/searchcore/src/apps/tests/CMakeLists.txt b/searchcore/src/apps/tests/CMakeLists.txt index 532c89d6ab7..933be1a6f89 100644 --- a/searchcore/src/apps/tests/CMakeLists.txt +++ b/searchcore/src/apps/tests/CMakeLists.txt @@ -22,5 +22,4 @@ vespa_add_executable(searchcore_persistenceconformance_test_app TEST searchcore_fconfig vdstestlib persistence_persistence_conformancetest - searchlib_searchlib_uca ) diff --git a/searchcore/src/tests/proton/docsummary/CMakeLists.txt b/searchcore/src/tests/proton/docsummary/CMakeLists.txt index 4286bbea224..274c1e302da 100644 --- a/searchcore/src/tests/proton/docsummary/CMakeLists.txt +++ b/searchcore/src/tests/proton/docsummary/CMakeLists.txt @@ -22,7 +22,6 @@ vespa_add_executable(searchcore_docsummary_test_app TEST searchcore_grouping searchcore_proton_metrics searchcore_fconfig - searchlib_searchlib_uca ) vespa_add_executable(searchcore_summaryfieldconverter_test_app SOURCES diff --git a/searchcore/src/tests/proton/documentdb/CMakeLists.txt b/searchcore/src/tests/proton/documentdb/CMakeLists.txt index d36368a8ebd..0cb678992b9 100644 --- a/searchcore/src/tests/proton/documentdb/CMakeLists.txt +++ b/searchcore/src/tests/proton/documentdb/CMakeLists.txt @@ -20,7 +20,6 @@ vespa_add_executable(searchcore_documentdb_test_app TEST searchcore_grouping searchcore_proton_metrics searchcore_fconfig - searchlib_searchlib_uca ) vespa_add_test(NAME searchcore_documentdb_test_app COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/documentdb_test.sh DEPENDS searchcore_documentdb_test_app) diff --git a/searchcore/src/tests/proton/documentdb/configurer/CMakeLists.txt b/searchcore/src/tests/proton/documentdb/configurer/CMakeLists.txt index 6472134f6e1..f217c4da740 100644 --- a/searchcore/src/tests/proton/documentdb/configurer/CMakeLists.txt +++ b/searchcore/src/tests/proton/documentdb/configurer/CMakeLists.txt @@ -18,6 +18,5 @@ vespa_add_executable(searchcore_configurer_test_app TEST searchcore_grouping searchcore_proton_metrics searchcore_fconfig - searchlib_searchlib_uca ) vespa_add_test(NAME searchcore_configurer_test_app COMMAND searchcore_configurer_test_app) diff --git a/searchcore/src/tests/proton/documentdb/document_subdbs/CMakeLists.txt b/searchcore/src/tests/proton/documentdb/document_subdbs/CMakeLists.txt index 2706d183988..bc783c2eb3d 100644 --- a/searchcore/src/tests/proton/documentdb/document_subdbs/CMakeLists.txt +++ b/searchcore/src/tests/proton/documentdb/document_subdbs/CMakeLists.txt @@ -20,6 +20,5 @@ vespa_add_executable(searchcore_document_subdbs_test_app TEST searchcore_grouping searchcore_proton_metrics searchcore_fconfig - searchlib_searchlib_uca ) vespa_add_test(NAME searchcore_document_subdbs_test_app COMMAND searchcore_document_subdbs_test_app) diff --git a/searchcore/src/tests/proton/matching/CMakeLists.txt b/searchcore/src/tests/proton/matching/CMakeLists.txt index c42a11be8ea..b4737be7819 100644 --- a/searchcore/src/tests/proton/matching/CMakeLists.txt +++ b/searchcore/src/tests/proton/matching/CMakeLists.txt @@ -12,7 +12,6 @@ vespa_add_executable(searchcore_matching_test_app TEST searchcore_bucketdb searchcore_pcommon searchcore_grouping - searchlib_searchlib_uca searchlib_test ) vespa_add_test(NAME searchcore_matching_test_app COMMAND searchcore_matching_test_app) diff --git a/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt b/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt index 501c5e468dd..0489690c7c4 100644 --- a/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/bmcluster/CMakeLists.txt @@ -53,5 +53,4 @@ vespa_add_library(searchcore_bmcluster STATIC storageserver_storageapp messagebus_messagebus-test messagebus - searchlib_searchlib_uca ) diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 7c67508a196..43a8da19191 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -8,7 +8,6 @@ vespa_define_module( fnet configdefinitions metrics - fastlib_fast document config_cloudconfig diff --git a/searchlib/src/tests/forcelink/CMakeLists.txt b/searchlib/src/tests/forcelink/CMakeLists.txt index 81ce6beab32..a6af5a77b0c 100644 --- a/searchlib/src/tests/forcelink/CMakeLists.txt +++ b/searchlib/src/tests/forcelink/CMakeLists.txt @@ -4,6 +4,5 @@ vespa_add_executable(searchlib_forcelink_test_app TEST forcelink.cpp DEPENDS searchlib - searchlib_searchlib_uca ) vespa_add_test(NAME searchlib_forcelink_test_app COMMAND searchlib_forcelink_test_app) diff --git a/searchlib/src/tests/sort/CMakeLists.txt b/searchlib/src/tests/sort/CMakeLists.txt index c59b3a4d8af..e342237adf0 100644 --- a/searchlib/src/tests/sort/CMakeLists.txt +++ b/searchlib/src/tests/sort/CMakeLists.txt @@ -11,7 +11,6 @@ vespa_add_executable(searchlib_sort_test_app sort_test.cpp DEPENDS searchlib - searchlib_searchlib_uca ) #vespa_add_test(NAME searchlib_sort_test_app COMMAND searchlib_sort_test_app) vespa_add_executable(searchlib_uca_stress_app @@ -19,6 +18,5 @@ vespa_add_executable(searchlib_uca_stress_app uca.cpp DEPENDS searchlib - searchlib_searchlib_uca ) vespa_add_test(NAME searchlib_uca_stress_app COMMAND searchlib_uca_stress_app BENCHMARK) diff --git a/searchlib/src/tests/sortspec/CMakeLists.txt b/searchlib/src/tests/sortspec/CMakeLists.txt index 5ebaa22f154..a9b3a906804 100644 --- a/searchlib/src/tests/sortspec/CMakeLists.txt +++ b/searchlib/src/tests/sortspec/CMakeLists.txt @@ -4,6 +4,5 @@ vespa_add_executable(searchlib_multilevelsort_test_app TEST multilevelsort.cpp DEPENDS searchlib - searchlib_searchlib_uca ) vespa_add_test(NAME searchlib_multilevelsort_test_app COMMAND searchlib_multilevelsort_test_app) diff --git a/searchlib/src/vespa/searchlib/CMakeLists.txt b/searchlib/src/vespa/searchlib/CMakeLists.txt index 91813a17379..71442e27592 100644 --- a/searchlib/src/vespa/searchlib/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/CMakeLists.txt @@ -1,4 +1,5 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +find_package(ICU 60.0 REQUIRED COMPONENTS uc i18n) vespa_add_library(searchlib SOURCES $<TARGET_OBJECTS:searchlib_aggregation> @@ -28,6 +29,7 @@ vespa_add_library(searchlib $<TARGET_OBJECTS:searchlib_searchlib_index> $<TARGET_OBJECTS:searchlib_tensor> $<TARGET_OBJECTS:searchlib_transactionlog> + $<TARGET_OBJECTS:searchlib_uca> $<TARGET_OBJECTS:searchlib_util> $<TARGET_OBJECTS:searchcommon_searchcommon_common> $<TARGET_OBJECTS:searchcommon_searchcommon_attribute> @@ -35,6 +37,8 @@ vespa_add_library(searchlib INSTALL lib64 DEPENDS vespalib + ICU::i18n + ICU::uc ${VESPA_ATOMIC_LIB} ) diff --git a/searchlib/src/vespa/searchlib/uca/CMakeLists.txt b/searchlib/src/vespa/searchlib/uca/CMakeLists.txt index 6a4602381f4..75be1472a83 100644 --- a/searchlib/src/vespa/searchlib/uca/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/uca/CMakeLists.txt @@ -1,12 +1,7 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -find_package(ICU 60.0 REQUIRED COMPONENTS uc i18n) -vespa_add_library(searchlib_searchlib_uca +vespa_add_library(searchlib_uca OBJECT SOURCES ucaconverter.cpp ucafunctionnode.cpp - INSTALL lib64 DEPENDS - searchlib - ICU::i18n - ICU::uc ) diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt index 84633bee262..d51e29dbd00 100644 --- a/searchsummary/CMakeLists.txt +++ b/searchsummary/CMakeLists.txt @@ -4,7 +4,6 @@ vespa_define_module( vespalog vespalib configdefinitions - fastlib_fast document searchlib diff --git a/searchsummary/src/tests/juniper/.gitignore b/searchsummary/src/tests/juniper/.gitignore index 46b307da632..09d6225ca26 100644 --- a/searchsummary/src/tests/juniper/.gitignore +++ b/searchsummary/src/tests/juniper/.gitignore @@ -14,3 +14,4 @@ juniper_auxTest_app juniper_matchobjectTest_app juniper_mcandTest_app juniper_queryparserTest_app +juniper_latintokenizertest_app diff --git a/searchsummary/src/tests/juniper/CMakeLists.txt b/searchsummary/src/tests/juniper/CMakeLists.txt index d15e91f1f63..77e7052a429 100644 --- a/searchsummary/src/tests/juniper/CMakeLists.txt +++ b/searchsummary/src/tests/juniper/CMakeLists.txt @@ -1,4 +1,10 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(juniper_testsuite + SOURCES + test.cpp + DEPENDS +) + vespa_add_executable(juniper_mcandTest_app TEST SOURCES mcandTest.cpp @@ -6,7 +12,7 @@ vespa_add_executable(juniper_mcandTest_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_mcandTest_app COMMAND juniper_mcandTest_app) vespa_add_executable(juniper_queryparserTest_app TEST @@ -17,7 +23,7 @@ vespa_add_executable(juniper_queryparserTest_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_queryparserTest_app COMMAND juniper_queryparserTest_app) vespa_add_executable(juniper_matchobjectTest_app TEST @@ -28,7 +34,7 @@ vespa_add_executable(juniper_matchobjectTest_app TEST fakerewriter.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_matchobjectTest_app COMMAND juniper_matchobjectTest_app) vespa_add_executable(juniper_appender_test_app TEST @@ -36,7 +42,6 @@ vespa_add_executable(juniper_appender_test_app TEST appender_test.cpp DEPENDS searchsummary - fastlib_fast_testsuite ) vespa_add_test(NAME juniper_appender_test_app COMMAND juniper_appender_test_app) vespa_add_executable(juniper_queryvisitor_test_app TEST @@ -44,7 +49,6 @@ vespa_add_executable(juniper_queryvisitor_test_app TEST queryvisitor_test.cpp DEPENDS searchsummary - fastlib_fast_testsuite ) vespa_add_test(NAME juniper_queryvisitor_test_app COMMAND juniper_queryvisitor_test_app) vespa_add_executable(juniper_auxTest_app TEST @@ -54,7 +58,7 @@ vespa_add_executable(juniper_auxTest_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_auxTest_app COMMAND juniper_auxTest_app) vespa_add_executable(juniper_SrcTestSuite_app TEST @@ -68,6 +72,13 @@ vespa_add_executable(juniper_SrcTestSuite_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_SrcTestSuite_app COMMAND juniper_SrcTestSuite_app) +vespa_add_executable(juniper_latintokenizertest_app TEST + SOURCES + latintokenizertest.cpp + DEPENDS + juniper_testsuite +) +vespa_add_test(NAME juniper_latintokenizertest_app NO_VALGRIND COMMAND juniper_latintokenizertest_app) diff --git a/searchsummary/src/tests/juniper/SrcTestSuite.cpp b/searchsummary/src/tests/juniper/SrcTestSuite.cpp index c1e4dc2cd19..870c7b9f378 100644 --- a/searchsummary/src/tests/juniper/SrcTestSuite.cpp +++ b/searchsummary/src/tests/juniper/SrcTestSuite.cpp @@ -1,12 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "testenv.h" +#include "suite.h" #include "mcandTest.h" #include "queryparserTest.h" #include "matchobjectTest.h" #include "auxTest.h" #include <vespa/vespalib/testkit/testapp.h> -#include <vespa/fastlib/testsuite/suite.h> /** * The SrcTestSuite class runs all the unit tests for the src module. * diff --git a/searchsummary/src/tests/juniper/auxTest.h b/searchsummary/src/tests/juniper/auxTest.h index dd6d79e024a..9ff391911b3 100644 --- a/searchsummary/src/tests/juniper/auxTest.h +++ b/searchsummary/src/tests/juniper/auxTest.h @@ -3,9 +3,9 @@ // Auxiliary tests for juniper - based on Juniper 1.x proximitytest.cpp -#include <map> -#include <vespa/fastlib/testsuite/test.h> #include "testenv.h" +#include "test.h" +#include <map> class AuxTest : public Test { diff --git a/fastlib/src/vespa/fastlib/text/tests/latintokenizertest.cpp b/searchsummary/src/tests/juniper/latintokenizertest.cpp index 89273ab1ec0..89273ab1ec0 100644 --- a/fastlib/src/vespa/fastlib/text/tests/latintokenizertest.cpp +++ b/searchsummary/src/tests/juniper/latintokenizertest.cpp diff --git a/fastlib/src/vespa/fastlib/text/tests/latintokenizertest.h b/searchsummary/src/tests/juniper/latintokenizertest.h index 38dc08aa677..b4d113918ee 100644 --- a/fastlib/src/vespa/fastlib/text/tests/latintokenizertest.h +++ b/searchsummary/src/tests/juniper/latintokenizertest.h @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include <vespa/fastlib/testsuite/test.h> -#include <vespa/fastlib/text/latintokenizer.h> +#include "test.h" +#include <vespa/juniper/latintokenizer.h> #include <vespa/vespalib/util/stringfmt.h> class Mapel_Pucntuation { diff --git a/searchsummary/src/tests/juniper/matchobjectTest.h b/searchsummary/src/tests/juniper/matchobjectTest.h index 5bfd29a371f..9fdd3e4719f 100644 --- a/searchsummary/src/tests/juniper/matchobjectTest.h +++ b/searchsummary/src/tests/juniper/matchobjectTest.h @@ -5,7 +5,7 @@ #pragma once #include "testenv.h" -#include <vespa/fastlib/testsuite/test.h> +#include "test.h" #include <map> /** diff --git a/searchsummary/src/tests/juniper/mcandTest.h b/searchsummary/src/tests/juniper/mcandTest.h index cdb01e91e3b..6eb8b4d66e5 100644 --- a/searchsummary/src/tests/juniper/mcandTest.h +++ b/searchsummary/src/tests/juniper/mcandTest.h @@ -4,10 +4,10 @@ */ #pragma once -#include <map> -#include <vespa/fastlib/testsuite/test.h> #include "testenv.h" +#include "test.h" #include <vespa/juniper/mcand.h> +#include <map> /** * The MatchCandidateTest class holds diff --git a/searchsummary/src/tests/juniper/queryparserTest.h b/searchsummary/src/tests/juniper/queryparserTest.h index 7dc4dda63fa..803fbd4c999 100644 --- a/searchsummary/src/tests/juniper/queryparserTest.h +++ b/searchsummary/src/tests/juniper/queryparserTest.h @@ -5,9 +5,9 @@ #pragma once #include "testenv.h" +#include "test.h" #include <vespa/juniper/queryparser.h> #include <vespa/juniper/rewriter.h> -#include <vespa/fastlib/testsuite/test.h> #include <map> /** diff --git a/fastlib/src/vespa/fastlib/testsuite/suite.h b/searchsummary/src/tests/juniper/suite.h index fea685731ae..fea685731ae 100644 --- a/fastlib/src/vespa/fastlib/testsuite/suite.h +++ b/searchsummary/src/tests/juniper/suite.h diff --git a/fastlib/src/vespa/fastlib/testsuite/test.cpp b/searchsummary/src/tests/juniper/test.cpp index 18930b1bca2..18930b1bca2 100644 --- a/fastlib/src/vespa/fastlib/testsuite/test.cpp +++ b/searchsummary/src/tests/juniper/test.cpp diff --git a/fastlib/src/vespa/fastlib/testsuite/test.h b/searchsummary/src/tests/juniper/test.h index 1388c3ba812..1388c3ba812 100644 --- a/fastlib/src/vespa/fastlib/testsuite/test.h +++ b/searchsummary/src/tests/juniper/test.h diff --git a/searchsummary/src/vespa/juniper/config.cpp b/searchsummary/src/vespa/juniper/config.cpp index b9213bb21f1..a82a8d74b8a 100644 --- a/searchsummary/src/vespa/juniper/config.cpp +++ b/searchsummary/src/vespa/juniper/config.cpp @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "config.h" -#include "IJuniperProperties.h" #include "rpinterface.h" #include "juniperdebug.h" #define _NEED_SUMMARY_CONFIG_IMPL @@ -14,7 +13,7 @@ namespace juniper Config::Config(const char* config_name, Juniper & juniper) : _docsumparams(), _matcherparams(), - _sumconf(NULL), + _sumconf(nullptr), _config_name(config_name), _juniper(juniper) { @@ -51,7 +50,7 @@ Config::Config(const char* config_name, Juniper & juniper) : .SetMaxMatches(max_matches) .SetSurroundMax(surround_max) .SetFallback(fallback); - _matcherparams.SetWantGlobalRank(true) + _matcherparams .SetStemMinLength(stem_min).SetStemMaxExtend(stem_extend) .SetMatchWindowSize(match_winsize) .SetMaxMatchCandidates(max_match_candidates) diff --git a/searchsummary/src/vespa/juniper/juniperparams.cpp b/searchsummary/src/vespa/juniper/juniperparams.cpp index e600c23f7c4..4f25b2446ad 100644 --- a/searchsummary/src/vespa/juniper/juniperparams.cpp +++ b/searchsummary/src/vespa/juniper/juniperparams.cpp @@ -1,7 +1,5 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "juniperdebug.h" #include "juniperparams.h" -#include "Matcher.h" #include <cstring> // DocsumParams implementation: @@ -9,7 +7,7 @@ DocsumParams::DocsumParams() : _enabled(false), _length(256), _min_length(128), _max_matches(3), - _surround_max(80), _space_chars(""), _fallback(FALLBACK_NONE) + _surround_max(80), _fallback(FALLBACK_NONE) { } DocsumParams& DocsumParams::SetEnabled(bool en) @@ -42,12 +40,6 @@ DocsumParams& DocsumParams::SetSurroundMax(size_t length) return *this; } -DocsumParams& DocsumParams::SetSpaceChars(const char* spacechars) -{ - _space_chars = spacechars; - return *this; -} - DocsumParams& DocsumParams::SetFallback(const char* fallback) { if (strcmp("prefix", fallback) == 0) { @@ -63,7 +55,6 @@ size_t DocsumParams::MinLength() const { return _min_length; } size_t DocsumParams::MaxMatches() const { return _max_matches; } size_t DocsumParams::SurroundMax() const { return _surround_max; } bool DocsumParams::Enabled() const { return _enabled; } -const char* DocsumParams::SpaceChars() const { return _space_chars.c_str(); } int DocsumParams::Fallback() const { return _fallback; } // MatcherParams implementation: @@ -71,54 +62,26 @@ int DocsumParams::Fallback() const { return _fallback; } MatcherParams::MatcherParams() : - _prefix_extend_length(3), - _prefix_min_length(5), _match_winsize(200), _match_winsize_fallback_multiplier(10.0), _max_match_candidates(1000), - _want_global_rank(false), _stem_min(0), _stem_extend(0), _wordfolder(NULL), _proximity_factor(1.0) { } -MatcherParams& MatcherParams::SetPrefixExtendLength(size_t extend_length) -{ - _prefix_extend_length = extend_length; - return *this; -} - -MatcherParams& MatcherParams::SetPrefixMinLength(size_t min_length) -{ - _prefix_min_length = min_length; - return *this; -} - - MatcherParams& MatcherParams::SetMatchWindowSize(size_t winsize) { _match_winsize = winsize; return *this; } -MatcherParams& MatcherParams::SetMatchWindowSizeFallbackMultiplier(double winsize) -{ - _match_winsize_fallback_multiplier = winsize; - return *this; -} - MatcherParams& MatcherParams::SetMaxMatchCandidates(size_t max_match_candidates) { _max_match_candidates = max_match_candidates; return *this; } -MatcherParams& MatcherParams::SetWantGlobalRank(bool global_rank) -{ - _want_global_rank = global_rank; - return *this; -} - MatcherParams& MatcherParams::SetStemMinLength(size_t stem_min) { _stem_min = stem_min; @@ -132,12 +95,9 @@ MatcherParams& MatcherParams::SetStemMaxExtend(size_t stem_extend) return *this; } -size_t MatcherParams::PrefixExtendLength() const { return _prefix_extend_length; } -size_t MatcherParams::PrefixMinLength() const { return _prefix_min_length; } size_t MatcherParams::MatchWindowSize() const { return _match_winsize; } double MatcherParams::MatchWindowSizeFallbackMultiplier() const { return _match_winsize_fallback_multiplier; } size_t MatcherParams::MaxMatchCandidates() const { return _max_match_candidates; } -bool MatcherParams::WantGlobalRank() const { return _want_global_rank; } size_t MatcherParams::StemMinLength() const { return _stem_min; } size_t MatcherParams::StemMaxExtend() const { return _stem_extend; } diff --git a/searchsummary/src/vespa/juniper/juniperparams.h b/searchsummary/src/vespa/juniper/juniperparams.h index 44980ce8b43..f4f17779f2d 100644 --- a/searchsummary/src/vespa/juniper/juniperparams.h +++ b/searchsummary/src/vespa/juniper/juniperparams.h @@ -31,9 +31,6 @@ public: DocsumParams& SetSurroundMax(size_t length); size_t SurroundMax() const; - DocsumParams& SetSpaceChars(const char* spacechars); - const char* SpaceChars() const; - DocsumParams& SetFallback(const char* fallback); int Fallback() const; @@ -43,7 +40,6 @@ private: size_t _min_length; size_t _max_matches; size_t _surround_max; - std::string _space_chars; int _fallback; }; @@ -52,25 +48,17 @@ class MatcherParams { public: MatcherParams(); - - MatcherParams& SetPrefixExtendLength(size_t extend_length); - size_t PrefixExtendLength() const; - - MatcherParams& SetPrefixMinLength(size_t min_length); - size_t PrefixMinLength() const; + MatcherParams(MatcherParams &) = delete; + MatcherParams &operator=(MatcherParams &) = delete; MatcherParams& SetMatchWindowSize(size_t winsize); size_t MatchWindowSize() const; - MatcherParams& SetMatchWindowSizeFallbackMultiplier(double winsize); double MatchWindowSizeFallbackMultiplier() const; MatcherParams& SetMaxMatchCandidates(size_t max_match_candidates); size_t MaxMatchCandidates() const; - MatcherParams& SetWantGlobalRank(bool global_rank); - bool WantGlobalRank() const; - MatcherParams& SetStemMinLength(size_t stem_min); size_t StemMinLength() const; @@ -84,19 +72,13 @@ public: double ProximityFactor(); private: - size_t _prefix_extend_length; - size_t _prefix_min_length; size_t _match_winsize; double _match_winsize_fallback_multiplier; size_t _max_match_candidates; - bool _want_global_rank; size_t _stem_min; size_t _stem_extend; Fast_WordFolder* _wordfolder; // The wordfolder object needed as 1st parameter to folderfun double _proximity_factor; - - MatcherParams(MatcherParams &); - MatcherParams &operator=(MatcherParams &); }; diff --git a/fastlib/src/vespa/fastlib/text/latintokenizer.h b/searchsummary/src/vespa/juniper/latintokenizer.h index be5d98054d9..7a98d780c56 100644 --- a/fastlib/src/vespa/fastlib/text/latintokenizer.h +++ b/searchsummary/src/vespa/juniper/latintokenizer.h @@ -34,8 +34,8 @@ #pragma once -#include <ctype.h> -#include <string.h> +#include <cctype> +#include <cstring> /** ***************************************************************************** diff --git a/searchsummary/src/vespa/juniper/queryparser.h b/searchsummary/src/vespa/juniper/queryparser.h index 5715daa3661..9c596892e31 100644 --- a/searchsummary/src/vespa/juniper/queryparser.h +++ b/searchsummary/src/vespa/juniper/queryparser.h @@ -1,13 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once - /* Simple prefix syntax advanced query parser for Juniper debug/testing */ #include "query.h" +#include "latintokenizer.h" #include <map> #include <string> -#include <vespa/fastlib/text/latintokenizer.h> namespace juniper { diff --git a/searchsummary/src/vespa/juniper/result.cpp b/searchsummary/src/vespa/juniper/result.cpp index 653e692e015..15ad9aa2a98 100644 --- a/searchsummary/src/vespa/juniper/result.cpp +++ b/searchsummary/src/vespa/juniper/result.cpp @@ -80,12 +80,12 @@ Result::Result(Config* config, QueryHandle* qhandle, } /* Create the new pipeline */ - _tokenizer.reset(new JuniperTokenizer(wordfolder, NULL, 0, NULL)); + _tokenizer = std::make_unique<JuniperTokenizer>(wordfolder, nullptr, 0, nullptr, nullptr); - _matcher.reset(new Matcher(this)); + _matcher = std::make_unique<Matcher>(this); _matcher->SetProximityFactor(mp.ProximityFactor()); - _registry.reset(new SpecialTokenRegistry(_matcher->getQuery())); + _registry = std::make_unique<SpecialTokenRegistry>(_matcher->getQuery()); if (qhandle->_log_mask) _matcher->set_log(qhandle->_log_mask); diff --git a/searchsummary/src/vespa/juniper/rpinterface.cpp b/searchsummary/src/vespa/juniper/rpinterface.cpp index 75a441fb957..f9e91073a9b 100644 --- a/searchsummary/src/vespa/juniper/rpinterface.cpp +++ b/searchsummary/src/vespa/juniper/rpinterface.cpp @@ -94,12 +94,6 @@ void Juniper::FlushRewriters() _modifier->FlushRewriters(); } -void ReleaseConfig(Config*& config) -{ - delete config; - config = NULL; -} - void ReleaseQueryHandle(QueryHandle*& handle) { diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt index a3768f9a193..4b7c12b0f31 100644 --- a/storage/CMakeLists.txt +++ b/storage/CMakeLists.txt @@ -3,7 +3,6 @@ vespa_define_module( DEPENDS vespadefaults fastos - fastlib_fast metrics config_cloudconfig configdefinitions diff --git a/storageserver/src/apps/storaged/CMakeLists.txt b/storageserver/src/apps/storaged/CMakeLists.txt index 3fb02b5d1b9..10f38cf5269 100644 --- a/storageserver/src/apps/storaged/CMakeLists.txt +++ b/storageserver/src/apps/storaged/CMakeLists.txt @@ -7,7 +7,6 @@ vespa_add_executable(storageserver_storaged_app INSTALL sbin DEPENDS storageserver_storageapp - searchlib_searchlib_uca ) install(CODE "execute_process(COMMAND ln -snf vespa-storaged-bin \$ENV{DESTDIR}/\${CMAKE_INSTALL_PREFIX}/sbin/vespa-distributord-bin)") diff --git a/streamingvisitors/CMakeLists.txt b/streamingvisitors/CMakeLists.txt index 166b4284a89..0e7789a21b9 100644 --- a/streamingvisitors/CMakeLists.txt +++ b/streamingvisitors/CMakeLists.txt @@ -2,7 +2,6 @@ vespa_define_module( DEPENDS fastos - fastlib_fast vespalog storage config_cloudconfig diff --git a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt index ff629462f9e..8a3bd92f5a8 100644 --- a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt +++ b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt @@ -17,5 +17,4 @@ vespa_add_library(streamingvisitors $<TARGET_OBJECTS:vsm_vsmsearcher> INSTALL lib64 DEPENDS - searchlib_searchlib_uca ) diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index fd1c52a868a..69bd709c613 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -193,6 +193,8 @@ vespa_define_module( src/tests/wakeup src/tests/xmlserializable src/tests/zcurve + src/tests/fastlib/io + src/tests/fastlib/text LIBS src/vespa/vespalib @@ -224,4 +226,7 @@ vespa_define_module( src/vespa/vespalib/time src/vespa/vespalib/trace src/vespa/vespalib/util + src/vespa/fastlib/io + src/vespa/fastlib/text + src/vespa/fastlib/text/apps ) diff --git a/fastlib/src/vespa/fastlib/io/tests/.gitignore b/vespalib/src/tests/fastlib/io/.gitignore index 816281ccbfb..816281ccbfb 100644 --- a/fastlib/src/vespa/fastlib/io/tests/.gitignore +++ b/vespalib/src/tests/fastlib/io/.gitignore diff --git a/fastlib/src/vespa/fastlib/io/tests/CMakeLists.txt b/vespalib/src/tests/fastlib/io/CMakeLists.txt index ae1e81a6170..345b3456bbf 100644 --- a/fastlib/src/vespa/fastlib/io/tests/CMakeLists.txt +++ b/vespalib/src/tests/fastlib/io/CMakeLists.txt @@ -3,6 +3,6 @@ vespa_add_executable(fastlib_bufferedfiletest_app TEST SOURCES bufferedfiletest.cpp DEPENDS - fastlib_io + vespalib ) vespa_add_test(NAME fastlib_bufferedfiletest_app COMMAND fastlib_bufferedfiletest_app) diff --git a/fastlib/src/vespa/fastlib/io/tests/bufferedfiletest.cpp b/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp index 8aa9b943419..8aa9b943419 100644 --- a/fastlib/src/vespa/fastlib/io/tests/bufferedfiletest.cpp +++ b/vespalib/src/tests/fastlib/io/bufferedfiletest.cpp diff --git a/fastlib/src/vespa/fastlib/text/tests/.gitignore b/vespalib/src/tests/fastlib/text/.gitignore index 8134602778d..8134602778d 100644 --- a/fastlib/src/vespa/fastlib/text/tests/.gitignore +++ b/vespalib/src/tests/fastlib/text/.gitignore diff --git a/fastlib/src/vespa/fastlib/text/tests/CMakeLists.txt b/vespalib/src/tests/fastlib/text/CMakeLists.txt index bcb7be76aac..690da0a3d80 100644 --- a/fastlib/src/vespa/fastlib/text/tests/CMakeLists.txt +++ b/vespalib/src/tests/fastlib/text/CMakeLists.txt @@ -3,23 +3,13 @@ vespa_add_executable(fastlib_unicodeutiltest_app TEST SOURCES unicodeutiltest.cpp DEPENDS - fastlib_text - fastlib_fast_testsuite + vespalib ) vespa_add_test(NAME fastlib_unicodeutiltest_app NO_VALGRIND COMMAND fastlib_unicodeutiltest_app) -vespa_add_executable(fastlib_latintokenizertest_app TEST - SOURCES - latintokenizertest.cpp - DEPENDS - fastlib_text - fastlib_fast_testsuite -) -vespa_add_test(NAME fastlib_latintokenizertest_app NO_VALGRIND COMMAND fastlib_latintokenizertest_app) vespa_add_executable(fastlib_wordfolderstest_app TEST SOURCES wordfolderstest.cpp DEPENDS - fastlib_text - fastlib_fast_testsuite + vespalib ) vespa_add_test(NAME fastlib_wordfolderstest_app NO_VALGRIND COMMAND fastlib_wordfolderstest_app) diff --git a/vespalib/src/tests/fastlib/text/unicodeutiltest.cpp b/vespalib/src/tests/fastlib/text/unicodeutiltest.cpp new file mode 100644 index 00000000000..d734b3b6aab --- /dev/null +++ b/vespalib/src/tests/fastlib/text/unicodeutiltest.cpp @@ -0,0 +1,31 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastlib/text/unicodeutil.h> +#include <vespa/vespalib/testkit/test_kit.h> + +TEST("GetUTF8Char_WrongInput") { + const char *testdata = "ab\xF8"; + + ucs4_t the_char = 0; + + const unsigned char *src = reinterpret_cast<const unsigned char *>(testdata); + while (*src != 0) { + the_char = Fast_UnicodeUtil::GetUTF8Char(src); + } + EXPECT_EQUAL(Fast_UnicodeUtil::_BadUTF8Char, the_char); +} + +TEST("IsTerminalPunctuationChar") { + // test a small selection + + EXPECT_TRUE(Fast_UnicodeUtil::IsTerminalPunctuationChar('!')); + EXPECT_TRUE(Fast_UnicodeUtil::IsTerminalPunctuationChar(',')); + EXPECT_TRUE(Fast_UnicodeUtil::IsTerminalPunctuationChar('.')); + EXPECT_TRUE(Fast_UnicodeUtil::IsTerminalPunctuationChar(':')); + EXPECT_TRUE(Fast_UnicodeUtil::IsTerminalPunctuationChar(';')); + EXPECT_FALSE(Fast_UnicodeUtil::IsTerminalPunctuationChar(' ')); + EXPECT_FALSE(Fast_UnicodeUtil::IsTerminalPunctuationChar('a')); + EXPECT_FALSE(Fast_UnicodeUtil::IsTerminalPunctuationChar('A')); +} + +TEST_MAIN() { TEST_RUN_ALL(); }
\ No newline at end of file diff --git a/vespalib/src/tests/fastlib/text/wordfolderstest.cpp b/vespalib/src/tests/fastlib/text/wordfolderstest.cpp new file mode 100644 index 00000000000..b2e05250951 --- /dev/null +++ b/vespalib/src/tests/fastlib/text/wordfolderstest.cpp @@ -0,0 +1,46 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastlib/text/normwordfolder.h> +#include <vespa/vespalib/testkit/test_kit.h> + +TEST("NormalizeWordFolderConstruction") { + Fast_NormalizeWordFolder::Setup( + Fast_NormalizeWordFolder::DO_ACCENT_REMOVAL + | Fast_NormalizeWordFolder::DO_KATAKANA_TO_HIRAGANA + | Fast_NormalizeWordFolder::DO_SMALL_TO_NORMAL_KANA + | Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION + | Fast_NormalizeWordFolder::DO_LIGATURE_SUBSTITUTION + | Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION); +} + +TEST("TokenizeAnnotatedUCS4Buffer") { + auto nwf = std::make_unique<Fast_NormalizeWordFolder>(); + const char *testinput = "This is a " + "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB" + " superduperextrafeaturecoolandlongplainword fun " + "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA" + "world wide web extra long annotation block" "\xEF\xBF\xBB" + " test\nIt is cool.\n"; + const char *correct[] = { + "this", "is", "a", + "\xEF\xBF\xB9" "café" "\xEF\xBF\xBA" "cafe" "\xEF\xBF\xBB", + "superduperextrafeaturecooland", "fun", + "\xEF\xBF\xB9" "www" "\xEF\xBF\xBA" "world wide web extra lon", + "test", "it", "is", "cool" }; + + const char *teststart = testinput; + const char *testend = testinput + strlen(testinput); + ucs4_t destbuf[32]; + ucs4_t *destbufend = destbuf + 32; + + const char *origstart = testinput; + size_t tokenlen = 0; + + int tokencounter = 0; + while ((teststart = nwf->UCS4Tokenize(teststart, testend, destbuf, destbufend, origstart, tokenlen)) < testend) { + EXPECT_EQUAL(0, Fast_UnicodeUtil::utf8cmp(correct[tokencounter++], destbuf)); + } + +} + +TEST_MAIN() { TEST_RUN_ALL(); }
\ No newline at end of file diff --git a/fastlib/src/vespa/fastlib/io/.gitignore b/vespalib/src/vespa/fastlib/io/.gitignore index aa9c3f19188..aa9c3f19188 100644 --- a/fastlib/src/vespa/fastlib/io/.gitignore +++ b/vespalib/src/vespa/fastlib/io/.gitignore diff --git a/fastlib/src/vespa/fastlib/io/CMakeLists.txt b/vespalib/src/vespa/fastlib/io/CMakeLists.txt index f21cf27b21e..f21cf27b21e 100644 --- a/fastlib/src/vespa/fastlib/io/CMakeLists.txt +++ b/vespalib/src/vespa/fastlib/io/CMakeLists.txt diff --git a/fastlib/src/vespa/fastlib/io/bufferedfile.cpp b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp index fa55d0be812..fa55d0be812 100644 --- a/fastlib/src/vespa/fastlib/io/bufferedfile.cpp +++ b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp diff --git a/fastlib/src/vespa/fastlib/io/bufferedfile.h b/vespalib/src/vespa/fastlib/io/bufferedfile.h index 48f90262ad9..48f90262ad9 100644 --- a/fastlib/src/vespa/fastlib/io/bufferedfile.h +++ b/vespalib/src/vespa/fastlib/io/bufferedfile.h diff --git a/fastlib/src/vespa/fastlib/text/.gitignore b/vespalib/src/vespa/fastlib/text/.gitignore index c452e2714be..c452e2714be 100644 --- a/fastlib/src/vespa/fastlib/text/.gitignore +++ b/vespalib/src/vespa/fastlib/text/.gitignore diff --git a/fastlib/src/vespa/fastlib/text/CMakeLists.txt b/vespalib/src/vespa/fastlib/text/CMakeLists.txt index 0708bde9a25..d6cb8c29305 100644 --- a/fastlib/src/vespa/fastlib/text/CMakeLists.txt +++ b/vespalib/src/vespa/fastlib/text/CMakeLists.txt @@ -2,7 +2,6 @@ vespa_add_library(fastlib_text OBJECT SOURCES unicodeutil.cpp - wordfolder.cpp normwordfolder.cpp DEPENDS ) diff --git a/fastlib/src/vespa/fastlib/text/CustomProperties-4.0.0.txt b/vespalib/src/vespa/fastlib/text/CustomProperties-4.0.0.txt index 461d268cc6e..461d268cc6e 100644 --- a/fastlib/src/vespa/fastlib/text/CustomProperties-4.0.0.txt +++ b/vespalib/src/vespa/fastlib/text/CustomProperties-4.0.0.txt diff --git a/fastlib/src/vespa/fastlib/text/DerivedCoreProperties-4.0.0.txt b/vespalib/src/vespa/fastlib/text/DerivedCoreProperties-4.0.0.txt index 0fb45afd6af..0fb45afd6af 100644 --- a/fastlib/src/vespa/fastlib/text/DerivedCoreProperties-4.0.0.txt +++ b/vespalib/src/vespa/fastlib/text/DerivedCoreProperties-4.0.0.txt diff --git a/fastlib/src/vespa/fastlib/text/PropList-3.0.0.txt b/vespalib/src/vespa/fastlib/text/PropList-3.0.0.txt index 048efb2d369..048efb2d369 100644 --- a/fastlib/src/vespa/fastlib/text/PropList-3.0.0.txt +++ b/vespalib/src/vespa/fastlib/text/PropList-3.0.0.txt diff --git a/fastlib/src/vespa/fastlib/text/PropList-4.0.0.txt b/vespalib/src/vespa/fastlib/text/PropList-4.0.0.txt index 90176ad019b..90176ad019b 100644 --- a/fastlib/src/vespa/fastlib/text/PropList-4.0.0.txt +++ b/vespalib/src/vespa/fastlib/text/PropList-4.0.0.txt diff --git a/fastlib/src/vespa/fastlib/text/UCD-License b/vespalib/src/vespa/fastlib/text/UCD-License index 25cd5fa40e1..25cd5fa40e1 100644 --- a/fastlib/src/vespa/fastlib/text/UCD-License +++ b/vespalib/src/vespa/fastlib/text/UCD-License diff --git a/fastlib/src/vespa/fastlib/text/UnicodeData-3.0.0.txt b/vespalib/src/vespa/fastlib/text/UnicodeData-3.0.0.txt index 6a54d3d74e9..6a54d3d74e9 100644 --- a/fastlib/src/vespa/fastlib/text/UnicodeData-3.0.0.txt +++ b/vespalib/src/vespa/fastlib/text/UnicodeData-3.0.0.txt diff --git a/fastlib/src/vespa/fastlib/text/UnicodeData-4.0.0.txt b/vespalib/src/vespa/fastlib/text/UnicodeData-4.0.0.txt index 5394611560d..5394611560d 100644 --- a/fastlib/src/vespa/fastlib/text/UnicodeData-4.0.0.txt +++ b/vespalib/src/vespa/fastlib/text/UnicodeData-4.0.0.txt diff --git a/fastlib/src/vespa/fastlib/text/alphasort/AlphaSort1_0.dtd b/vespalib/src/vespa/fastlib/text/alphasort/AlphaSort1_0.dtd index 6c9a45bbbfc..6c9a45bbbfc 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/AlphaSort1_0.dtd +++ b/vespalib/src/vespa/fastlib/text/alphasort/AlphaSort1_0.dtd diff --git a/fastlib/src/vespa/fastlib/text/alphasort/AlphaSortMasterFile.xml b/vespalib/src/vespa/fastlib/text/alphasort/AlphaSortMasterFile.xml index 6a9da8cc1cf..6a9da8cc1cf 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/AlphaSortMasterFile.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/AlphaSortMasterFile.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Arabic.xml b/vespalib/src/vespa/fastlib/text/alphasort/Arabic.xml index 9789f519fd2..9789f519fd2 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Arabic.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Arabic.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Cyrillic.xml b/vespalib/src/vespa/fastlib/text/alphasort/Cyrillic.xml index 7332d1633ca..7332d1633ca 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Cyrillic.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Cyrillic.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Greek.xml b/vespalib/src/vespa/fastlib/text/alphasort/Greek.xml index 662003aefc6..662003aefc6 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Greek.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Greek.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Hangul.xml b/vespalib/src/vespa/fastlib/text/alphasort/Hangul.xml index 44ce855869e..44ce855869e 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Hangul.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Hangul.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-pinyin.xml b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-pinyin.xml index 5f5486aaf4b..5f5486aaf4b 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-pinyin.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-pinyin.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-radical.xml b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-radical.xml index f6b2a47642d..f6b2a47642d 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-radical.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-simplified-by-radical.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-pinyin.xml b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-pinyin.xml index 93fe57de8d2..93fe57de8d2 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-pinyin.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-pinyin.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-radical.xml b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-radical.xml index 28223fdcb2a..28223fdcb2a 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-radical.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-radical.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-stroke.xml b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-stroke.xml index f9b4cceb89d..f9b4cceb89d 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-stroke.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Hanzi-traditional-by-stroke.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Hebrew.xml b/vespalib/src/vespa/fastlib/text/alphasort/Hebrew.xml index 320aa1eacd7..320aa1eacd7 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Hebrew.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Hebrew.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Kana.xml b/vespalib/src/vespa/fastlib/text/alphasort/Kana.xml index 64d67a97719..64d67a97719 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Kana.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Kana.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Kanji-by-radical.xml b/vespalib/src/vespa/fastlib/text/alphasort/Kanji-by-radical.xml index 66b1fe4c447..66b1fe4c447 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Kanji-by-radical.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Kanji-by-radical.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Latin.xml b/vespalib/src/vespa/fastlib/text/alphasort/Latin.xml index 4c0df6aa186..4c0df6aa186 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Latin.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Latin.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Numbers.xml b/vespalib/src/vespa/fastlib/text/alphasort/Numbers.xml index e73b1fd2fd8..e73b1fd2fd8 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Numbers.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Numbers.xml diff --git a/fastlib/src/vespa/fastlib/text/alphasort/SortMaster1_0.dtd b/vespalib/src/vespa/fastlib/text/alphasort/SortMaster1_0.dtd index 9877d1bf8ca..9877d1bf8ca 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/SortMaster1_0.dtd +++ b/vespalib/src/vespa/fastlib/text/alphasort/SortMaster1_0.dtd diff --git a/fastlib/src/vespa/fastlib/text/alphasort/Space.xml b/vespalib/src/vespa/fastlib/text/alphasort/Space.xml index bb6b08c001a..bb6b08c001a 100644 --- a/fastlib/src/vespa/fastlib/text/alphasort/Space.xml +++ b/vespalib/src/vespa/fastlib/text/alphasort/Space.xml diff --git a/fastlib/src/vespa/fastlib/text/apps/.gitignore b/vespalib/src/vespa/fastlib/text/apps/.gitignore index c452e2714be..c452e2714be 100644 --- a/fastlib/src/vespa/fastlib/text/apps/.gitignore +++ b/vespalib/src/vespa/fastlib/text/apps/.gitignore diff --git a/fastlib/src/vespa/fastlib/text/apps/CMakeLists.txt b/vespalib/src/vespa/fastlib/text/apps/CMakeLists.txt index fd1049fa689..fd1049fa689 100644 --- a/fastlib/src/vespa/fastlib/text/apps/CMakeLists.txt +++ b/vespalib/src/vespa/fastlib/text/apps/CMakeLists.txt diff --git a/fastlib/src/vespa/fastlib/text/apps/extcase.cpp b/vespalib/src/vespa/fastlib/text/apps/extcase.cpp index 23486330f66..23486330f66 100644 --- a/fastlib/src/vespa/fastlib/text/apps/extcase.cpp +++ b/vespalib/src/vespa/fastlib/text/apps/extcase.cpp diff --git a/fastlib/src/vespa/fastlib/text/apps/unicode_propertydump.cpp b/vespalib/src/vespa/fastlib/text/apps/unicode_propertydump.cpp index 5f2b6089cdf..5f2b6089cdf 100644 --- a/fastlib/src/vespa/fastlib/text/apps/unicode_propertydump.cpp +++ b/vespalib/src/vespa/fastlib/text/apps/unicode_propertydump.cpp diff --git a/fastlib/src/vespa/fastlib/text/apps/unicode_tolowerdump.cpp b/vespalib/src/vespa/fastlib/text/apps/unicode_tolowerdump.cpp index 7d84f6e931d..7d84f6e931d 100644 --- a/fastlib/src/vespa/fastlib/text/apps/unicode_tolowerdump.cpp +++ b/vespalib/src/vespa/fastlib/text/apps/unicode_tolowerdump.cpp diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp new file mode 100644 index 00000000000..a8454f42363 --- /dev/null +++ b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp @@ -0,0 +1,599 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "normwordfolder.h" +#include <mutex> +#include <cstring> + +bool Fast_NormalizeWordFolder::_isInitialized = false; +std::mutex _initMutex; +bool Fast_NormalizeWordFolder::_doAccentRemoval = false; +bool Fast_NormalizeWordFolder::_doSharpSSubstitution = false; +bool Fast_NormalizeWordFolder::_doLigatureSubstitution = false; +bool Fast_NormalizeWordFolder::_doMulticharExpansion = false; +bool Fast_NormalizeWordFolder::_isWord[128]; + +ucs4_t Fast_NormalizeWordFolder::_foldCase[767]; // Up to Latin Extended B (0x0250) +ucs4_t Fast_NormalizeWordFolder::_foldCaseHighAscii[256]; // Latin Extended Additional (0x1E00 - 0x1F00) +ucs4_t Fast_NormalizeWordFolder::_kanaMap[192]; +ucs4_t Fast_NormalizeWordFolder::_halfwidth_fullwidthMap[240]; + +void +Fast_NormalizeWordFolder::Setup(uint32_t flags) +{ + // Only allow setting these when not initialized or initializing... + { + std::lock_guard<std::mutex> initGuard(_initMutex); + _doAccentRemoval = (DO_ACCENT_REMOVAL & flags) != 0; + _doSharpSSubstitution = (DO_SHARP_S_SUBSTITUTION & flags) != 0; + _doLigatureSubstitution = (DO_LIGATURE_SUBSTITUTION & flags) != 0; + _doMulticharExpansion = (DO_MULTICHAR_EXPANSION & flags) != 0; + _isInitialized = false; + } + Initialize(); +} + +void +Fast_NormalizeWordFolder::Initialize() +{ + unsigned int i; + if (!_isInitialized) { + std::lock_guard<std::mutex> initGuard(_initMutex); + if (!_isInitialized) { + + for (i = 0; i < 128; i++) + _isWord[i] = Fast_UnicodeUtil::IsWordChar(i); + for (i = 0; i < 767; i++) { + _foldCase[i] = Fast_UnicodeUtil::ToLower(i); + } + + for (i = 0x1E00; i < 0x1F00; i++) { + _foldCaseHighAscii[i - 0x1E00] = Fast_UnicodeUtil::ToLower(i); + } + + if (_doAccentRemoval) { + _foldCase[0xc0] = 'a'; + _foldCase[0xc1] = 'a'; + _foldCase[0xc2] = 'a'; + _foldCase[0xc3] = 'a'; // A tilde + _foldCase[0xc7] = 'c'; + _foldCase[0xc8] = 'e'; + _foldCase[0xc9] = 'e'; + _foldCase[0xca] = 'e'; + _foldCase[0xcb] = 'e'; + _foldCase[0xcc] = 'i'; // I grave + _foldCase[0xcd] = 'i'; + _foldCase[0xce] = 'i'; + _foldCase[0xcf] = 'i'; + _foldCase[0xd1] = 'n'; + _foldCase[0xd2] = 'o'; + _foldCase[0xd3] = 'o'; + _foldCase[0xd4] = 'o'; + _foldCase[0xd5] = 'o'; + _foldCase[0xd9] = 'u'; + _foldCase[0xda] = 'u'; + _foldCase[0xdb] = 'u'; + _foldCase[0xdd] = 'y'; + + _foldCase[0xe0] = 'a'; + _foldCase[0xe1] = 'a'; + _foldCase[0xe2] = 'a'; + _foldCase[0xe3] = 'a'; // a tilde + _foldCase[0xe7] = 'c'; + _foldCase[0xe8] = 'e'; + _foldCase[0xe9] = 'e'; + _foldCase[0xea] = 'e'; + _foldCase[0xeb] = 'e'; + _foldCase[0xec] = 'i'; // i grave + _foldCase[0xed] = 'i'; + _foldCase[0xee] = 'i'; + _foldCase[0xef] = 'i'; + _foldCase[0xf1] = 'n'; + _foldCase[0xf2] = 'o'; + _foldCase[0xf3] = 'o'; + _foldCase[0xf4] = 'o'; + _foldCase[0xf5] = 'o'; + _foldCase[0xf9] = 'u'; + _foldCase[0xfa] = 'u'; + _foldCase[0xfb] = 'u'; + _foldCase[0xfd] = 'y'; + _foldCase[0xff] = 'y'; + _foldCase[0x102] = 'a'; + _foldCase[0x103] = 'a'; + _foldCase[0x110] = 'd'; + _foldCase[0x111] = 'd'; + _foldCase[0x128] = 'i'; + _foldCase[0x129] = 'i'; + _foldCase[0x178] = 'y'; + _foldCase[0x1a0] = 'o'; + _foldCase[0x1a1] = 'o'; + _foldCase[0x1af] = 'u'; + _foldCase[0x1b0] = 'u'; + + // Superscript spacing modifiers + _foldCase[0x2b0] = 'h'; + _foldCase[0x2b1] = 0x266; + _foldCase[0x2b2] = 'j'; + _foldCase[0x2b3] = 'r'; + _foldCase[0x2b4] = 0x279; + _foldCase[0x2b5] = 0x27b; + _foldCase[0x2b6] = 0x281; + _foldCase[0x2b7] = 'w'; + _foldCase[0x2b8] = 'y'; + _foldCase[0x2e0] = 0x263; + _foldCase[0x2e1] = 'l'; + _foldCase[0x2e2] = 's'; + _foldCase[0x2e3] = 'x'; + _foldCase[0x2e4] = 0x295; + + // Superscript spacing modifiers + _foldCase[0x2b0] = 'h'; + _foldCase[0x2b1] = 0x266; + _foldCase[0x2b2] = 'j'; + _foldCase[0x2b3] = 'r'; + _foldCase[0x2b4] = 0x279; + _foldCase[0x2b5] = 0x27b; + _foldCase[0x2b6] = 0x281; + _foldCase[0x2b7] = 'w'; + _foldCase[0x2b8] = 'y'; + _foldCase[0x2e0] = 0x263; + _foldCase[0x2e1] = 'l'; + _foldCase[0x2e2] = 's'; + _foldCase[0x2e3] = 'x'; + _foldCase[0x2e4] = 0x295; + + // Deaccenting-table for Ascii Extended Additional + _foldCaseHighAscii[0x1ea0 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea1 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea2 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea3 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea4 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea5 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea6 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea7 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea8 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ea9 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eaa - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eab - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eac - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1ead - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eae - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eaf - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb0 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb1 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb2 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb3 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb4 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb5 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb6 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb7 - 0x1e00] = 'a'; + _foldCaseHighAscii[0x1eb8 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1eb9 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1eba - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ebb - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ebc - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ebd - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ebe - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ebf - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec0 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec1 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec2 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec3 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec4 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec5 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec6 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec7 - 0x1e00] = 'e'; + _foldCaseHighAscii[0x1ec8 - 0x1e00] = 'i'; + _foldCaseHighAscii[0x1ec9 - 0x1e00] = 'i'; + _foldCaseHighAscii[0x1eca - 0x1e00] = 'i'; + _foldCaseHighAscii[0x1ecb - 0x1e00] = 'i'; + _foldCaseHighAscii[0x1ecc - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ecd - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ece - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ecf - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed0 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed1 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed2 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed3 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed4 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed5 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed6 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed7 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed8 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ed9 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1eda - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1edb - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1edc - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1edd - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ede - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1edf - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ee0 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ee1 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ee2 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ee3 - 0x1e00] = 'o'; + _foldCaseHighAscii[0x1ee4 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ee5 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ee6 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ee7 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ee8 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ee9 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1eea - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1eeb - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1eec - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1eed - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1eee - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1eef - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ef0 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ef1 - 0x1e00] = 'u'; + _foldCaseHighAscii[0x1ef2 - 0x1e00] = 'y'; + _foldCaseHighAscii[0x1ef3 - 0x1e00] = 'y'; + _foldCaseHighAscii[0x1ef4 - 0x1e00] = 'y'; + _foldCaseHighAscii[0x1ef5 - 0x1e00] = 'y'; + _foldCaseHighAscii[0x1ef6 - 0x1e00] = 'y'; + _foldCaseHighAscii[0x1ef7 - 0x1e00] = 'y'; + _foldCaseHighAscii[0x1ef8 - 0x1e00] = 'y'; + _foldCaseHighAscii[0x1ef9 - 0x1e00] = 'y'; + } + + // Base case hiragana - hiragana ID + for (i = 0; i < 96; i++) { + _kanaMap[i] = 0x3040 + i; + } + + // katakana - katakana ID + for (i = 96; i < 192; i++) { + _kanaMap[i] = 0x3040 + i; + } + + // Fullwidth ASCII + for (i = 0; i < 0x21; i++) + _halfwidth_fullwidthMap[i] = 0x20 + i; + for (i = 0x21; i < 0x3B; i++) // full uppercase to half lowercase + _halfwidth_fullwidthMap[i] = 0x40 + i; + for (i = 0x3B; i < 0x5F; i++) + _halfwidth_fullwidthMap[i] = 0x20 + i; + // 0xFF00, 0xFF5F -> id + _halfwidth_fullwidthMap[0x00] = 0xFF00; + _halfwidth_fullwidthMap[0x5F] = 0xFF5F; + + // Halfwidth CJK Punctuation + // 0xFF60 -> id + _halfwidth_fullwidthMap[0x60] = 0xFF60; + _halfwidth_fullwidthMap[0x61] = 0x3002; + _halfwidth_fullwidthMap[0x62] = 0x300C; + _halfwidth_fullwidthMap[0x63] = 0x300D; + _halfwidth_fullwidthMap[0x64] = 0x3001; + + // Halfwidth katakana (maps directly to hiragana) + + // Common cases for halfwidth katakana + _halfwidth_fullwidthMap[0x65] = 0x30FB; + + _halfwidth_fullwidthMap[0x66] = 0x30F2; + _halfwidth_fullwidthMap[0x6F] = 0x30C3; + _halfwidth_fullwidthMap[0x70] = 0x30FC; + _halfwidth_fullwidthMap[0x71] = 0x30A2; + _halfwidth_fullwidthMap[0x72] = 0x30A4; + _halfwidth_fullwidthMap[0x73] = 0x30A6; + _halfwidth_fullwidthMap[0x74] = 0x30A8; + _halfwidth_fullwidthMap[0x75] = 0x30AA; + _halfwidth_fullwidthMap[0x76] = 0x30AB; + _halfwidth_fullwidthMap[0x77] = 0x30AD; + _halfwidth_fullwidthMap[0x78] = 0x30AF; + _halfwidth_fullwidthMap[0x79] = 0x30B1; + _halfwidth_fullwidthMap[0x7A] = 0x30B3; + _halfwidth_fullwidthMap[0x7B] = 0x30B5; + _halfwidth_fullwidthMap[0x7C] = 0x30B7; + _halfwidth_fullwidthMap[0x7D] = 0x30B9; + _halfwidth_fullwidthMap[0x7E] = 0x30BB; + _halfwidth_fullwidthMap[0x7F] = 0x30BD; + _halfwidth_fullwidthMap[0x80] = 0x30BF; + _halfwidth_fullwidthMap[0x81] = 0x30C1; + _halfwidth_fullwidthMap[0x82] = 0x30C4; + _halfwidth_fullwidthMap[0x83] = 0x30C6; + _halfwidth_fullwidthMap[0x84] = 0x30C8; + _halfwidth_fullwidthMap[0x85] = 0x30CA; + _halfwidth_fullwidthMap[0x86] = 0x30CB; + _halfwidth_fullwidthMap[0x87] = 0x30CC; + _halfwidth_fullwidthMap[0x88] = 0x30CD; + _halfwidth_fullwidthMap[0x89] = 0x30CE; + _halfwidth_fullwidthMap[0x8A] = 0x30CF; + _halfwidth_fullwidthMap[0x8B] = 0x30D2; + _halfwidth_fullwidthMap[0x8C] = 0x30D5; + _halfwidth_fullwidthMap[0x8D] = 0x30D8; + _halfwidth_fullwidthMap[0x8E] = 0x30DB; + _halfwidth_fullwidthMap[0x8F] = 0x30DE; + _halfwidth_fullwidthMap[0x90] = 0x30DF; + _halfwidth_fullwidthMap[0x91] = 0x30E0; + _halfwidth_fullwidthMap[0x92] = 0x30E1; + _halfwidth_fullwidthMap[0x93] = 0x30E2; + _halfwidth_fullwidthMap[0x94] = 0x30E4; + _halfwidth_fullwidthMap[0x95] = 0x30E6; + _halfwidth_fullwidthMap[0x96] = 0x30E8; + _halfwidth_fullwidthMap[0x97] = 0x30E9; + _halfwidth_fullwidthMap[0x98] = 0x30EA; + _halfwidth_fullwidthMap[0x99] = 0x30EB; + _halfwidth_fullwidthMap[0x9A] = 0x30EC; + _halfwidth_fullwidthMap[0x9B] = 0x30ED; + _halfwidth_fullwidthMap[0x9C] = 0x30EF; + _halfwidth_fullwidthMap[0x9D] = 0x30F3; + _halfwidth_fullwidthMap[0x9E] = 0x3099; + _halfwidth_fullwidthMap[0x9F] = 0x309A; + + _halfwidth_fullwidthMap[0x67] = 0x30a1; + _halfwidth_fullwidthMap[0x68] = 0x30a3; + _halfwidth_fullwidthMap[0x69] = 0x30a5; + _halfwidth_fullwidthMap[0x6A] = 0x30a7; + _halfwidth_fullwidthMap[0x6B] = 0x30a9; + _halfwidth_fullwidthMap[0x6C] = 0x30e3; + _halfwidth_fullwidthMap[0x6D] = 0x30e5; + _halfwidth_fullwidthMap[0x6E] = 0x30e7; + + // Halfwidth Hangul + _halfwidth_fullwidthMap[0xA0] = 0x3164; + // fill in 0xFFA1 - 0xFFBE => 0x3131 - 0x314E + for (i = 0xA1; i < 0xBF; i++) + _halfwidth_fullwidthMap[i] = 0x3090 + i; + _halfwidth_fullwidthMap[0xBF] = 0xFFBF; + _halfwidth_fullwidthMap[0xC0] = 0xFFC0; + _halfwidth_fullwidthMap[0xC1] = 0xFFC1; + // fill in 0xFFC2 - 0xFFC7 => 0x314F - 0x3154 + for (i = 0xC2; i < 0xC8; i++) + _halfwidth_fullwidthMap[i] = 0x308D + i; + _halfwidth_fullwidthMap[0xC8] = 0xFFC8; + _halfwidth_fullwidthMap[0xC9] = 0xFFC9; + // fill in 0xFFCA - 0xFFCF => 0x3155 - 0x315A + for (i = 0xCA; i < 0xD0; i++) + _halfwidth_fullwidthMap[i] = 0x308B + i; + _halfwidth_fullwidthMap[0xD0] = 0xFFD0; + _halfwidth_fullwidthMap[0xD1] = 0xFFD1; + // fill in 0xFFD2 - 0xFFD7 => 0x315B - 0x3160 + for (i = 0xD2; i < 0xD8; i++) + _halfwidth_fullwidthMap[i] = 0x3089 + i; + _halfwidth_fullwidthMap[0xD8] = 0xFFD8; + _halfwidth_fullwidthMap[0xD9] = 0xFFD9; + // fill in 0xFFDA - 0xFFDC => 0x3161 - 0x3163 + for (i = 0xDA; i < 0xDD; i++) + _halfwidth_fullwidthMap[i] = 0x3087 + i; + + // Fullwidth symbols + _halfwidth_fullwidthMap[0xE0] = 0x00A2; + _halfwidth_fullwidthMap[0xE1] = 0x00A3; + _halfwidth_fullwidthMap[0xE2] = 0x00AC; + _halfwidth_fullwidthMap[0xE3] = 0x00AF; + _halfwidth_fullwidthMap[0xE4] = 0x00A6; + _halfwidth_fullwidthMap[0xE5] = 0x00A5; + _halfwidth_fullwidthMap[0xE6] = 0x20A9; + + // 0xFFE7 -> id + _halfwidth_fullwidthMap[0xE7] = 0xFFE7; + + // Halfwidth symbols + _halfwidth_fullwidthMap[0xE8] = 0x2502; + _halfwidth_fullwidthMap[0xE9] = 0x2190; + _halfwidth_fullwidthMap[0xEA] = 0x2191; + _halfwidth_fullwidthMap[0xEB] = 0x2192; + _halfwidth_fullwidthMap[0xEC] = 0x2193; + _halfwidth_fullwidthMap[0xED] = 0x25A0; + _halfwidth_fullwidthMap[0xEE] = 0x25CB; + + // 0xFFEF -> id + _halfwidth_fullwidthMap[0xEF] = 0xFFEF; + + + // + // DONE + // + _isInitialized = true; + } + } +} + +Fast_NormalizeWordFolder::Fast_NormalizeWordFolder() +{ + Initialize(); +} + + +Fast_NormalizeWordFolder::~Fast_NormalizeWordFolder(void) +{ +} + +const char* +Fast_NormalizeWordFolder::UCS4Tokenize(const char *buf, + const char *bufend, + ucs4_t *dstbuf, + ucs4_t *dstbufend, + const char*& origstart, + size_t& tokenlen) const +{ + + ucs4_t c; + const unsigned char *p, *ep; + ucs4_t *q, *eq; + p = reinterpret_cast<const unsigned char *>(buf); + ep = reinterpret_cast<const unsigned char *>(bufend); + + // Skip characters between words + for (;;) { + if (p >= ep) { // End of input buffer, no more words + *dstbuf = 0; + return reinterpret_cast<const char *>(p); + } + if (*p < 128) { // Common case, ASCII + c = *p++; + if (_isWord[c]) + { + origstart = reinterpret_cast<const char *>(p) - 1; + break; + } + } else { + const unsigned char* prev_p = p; + c = Fast_UnicodeUtil::GetUTF8Char(p); + if (IsWordCharOrIA(c)) + { + origstart = reinterpret_cast<const char *>(prev_p); + break; + } + } + } + + // Start saving word. + q = dstbuf; + eq = dstbufend - 3; // Make room for UCS4 char replacement string and NUL + // Doesn't check for space for the first char, assumes that + // word buffer is at least 13 characters + if (c < 128) { // Common case, ASCII + *q++ = _foldCase[c]; + } else { + const char *repl = ReplacementString(c); + if (repl != nullptr) { + size_t repllen = strlen(repl); + if (repllen > 0) + q = Fast_UnicodeUtil::ucs4copy(q,repl); + } else { + c = ToFold(c); + *q++ = c; + } + } + + // Special case for interlinear annotation + if (c == 0xFFF9) { // ANCHOR + // Collect up to and including terminator + for(;;) { + if (p >= ep) { + c = 0; + break; + } + if (*p < 128) { // Note, no exit on plain ASCII + c = *p++; + *q++ = c; + if (q >= eq) { // Junk rest of annotation block + for (;;) { + if (p >= ep) { // End of input buffer + c = 0; + break; + } + if (*p < 128) { // Common case, ASCII + c = *p++; + } else { + c = Fast_UnicodeUtil::GetUTF8Char(p); + if (c == 0xFFFB) { + break; // out of junking loop + } + } + } + break; // out of annotation block processing + } + } else { + c = Fast_UnicodeUtil::GetUTF8Char(p); + *q++ = c; + if (c == 0xFFFB) { // TERMINATOR => Exit condition + break; + } + if (q >= eq) { // Junk rest of word + for (;;) { + if (p >= ep) { // End of input buffer + c = 0; + break; + } + if (*p < 128) { // Common case, ASCII + c = *p++; + } else { + c = Fast_UnicodeUtil::GetUTF8Char(p); + if (c == 0xFFFB) { + break; + } + } + } + break; + } + } + } + } else + + for (;;) { + if (p >= ep) { // End of input buffer + c = 0; + break; + } + if (*p < 128) { // Common case, ASCII + c = *p++; + if (!_isWord[c]) + { + p--; + break; + } + *q++ = _foldCase[c]; + if (q >= eq) { // Junk rest of word + for (;;) { + if (p >= ep) { // End of input buffer + c = 0; + break; + } + if (*p < 128) { // Common case, ASCII + c = *p++; + if (!_isWord[c]) + { + p--; + break; + } + } else { + const unsigned char* prev_p = p; + c = Fast_UnicodeUtil::GetUTF8Char(p); + if (!Fast_UnicodeUtil::IsWordChar(c)) + { + p = prev_p; + break; + } + } + } + break; + } + } else { + const unsigned char* prev_p = p; + c = Fast_UnicodeUtil::GetUTF8Char(p); + if (!Fast_UnicodeUtil::IsWordChar(c)) + { + p = prev_p; + break; + } + const char *repl = ReplacementString(c); + if (repl != nullptr) { + size_t repllen = strlen(repl); + if (repllen > 0) + q = Fast_UnicodeUtil::ucs4copy(q,repl); + } else { + c = ToFold(c); + *q++ = c; + } + if (q >= eq) { // Junk rest of word + for (;;) { + if (p >= ep) { // End of input buffer + c = 0; + break; + } + if (*p < 128) { // Common case, ASCII + c = *p++; + if (!_isWord[c]) + { + p--; + break; + } + } else { + const unsigned char* xprev_p = p; + c = Fast_UnicodeUtil::GetUTF8Char(p); + if (!Fast_UnicodeUtil::IsWordChar(c)) + { + p = xprev_p; + break; + } + } + } + break; + } + } + } + *q = 0; + tokenlen = q - dstbuf; + return reinterpret_cast<const char *>(p); +} diff --git a/fastlib/src/vespa/fastlib/text/normwordfolder.h b/vespalib/src/vespa/fastlib/text/normwordfolder.h index 7136a8b3829..dc345e7d9be 100644 --- a/fastlib/src/vespa/fastlib/text/normwordfolder.h +++ b/vespalib/src/vespa/fastlib/text/normwordfolder.h @@ -16,10 +16,6 @@ private: /** Features */ static bool _doAccentRemoval; - static bool _doSmallToNormalKana; - static bool _doKatakanaToHiragana; - static bool _doKanaAccentCollapsing; - static bool _doFullwidthToBasicLatin; static bool _doSharpSSubstitution; static bool _doLigatureSubstitution; static bool _doMulticharExpansion; @@ -57,8 +53,6 @@ public: static ucs4_t _foldCase[767]; // Up to Spacing Modifiers, inclusize (0x02FF) static ucs4_t _foldCaseHighAscii[256]; // Latin Extended Additional (0x1E00 - 0x1F00) (incl. vietnamese) private: - static ucs4_t _keepCase[767]; - static ucs4_t _keepCaseHighAscii[256]; /** Map the values from range 0x3040 (0) - 0x30FF (191). */ static ucs4_t _kanaMap[192]; static ucs4_t _halfwidth_fullwidthMap[240]; @@ -165,16 +159,9 @@ private: return Fast_UnicodeUtil::IsWordChar(c) || c == 0xFFF9 || c == 0xFFFA || c == 0xFFFB; } - public: Fast_NormalizeWordFolder(); - virtual ~Fast_NormalizeWordFolder(); - virtual size_t FoldedSizeAsUTF8(const char *word) const; - virtual char *FoldUTF8WordToUTF8Quick(char *wordbufpos, const char *word) const; - virtual const char *Tokenize(const char *buf, const char *bufend, char *dstbuf, - char *dstbufend, const char*& origstart, size_t& tokenlen) const; - virtual const char *Tokenize(const char *buf, const char *bufend, ucs4_t *dstbuf, - ucs4_t *dstbufend, const char*& origstart, size_t& tokenlen) const; + ~Fast_NormalizeWordFolder() override; const char* UCS4Tokenize(const char *buf, const char *bufend, ucs4_t *dstbuf, ucs4_t *dstbufend, const char*& origstart, size_t& tokenlen) const override; }; diff --git a/fastlib/src/vespa/fastlib/text/unicodeutil-charprops.cpp b/vespalib/src/vespa/fastlib/text/unicodeutil-charprops.cpp index ee59c5c84dd..ee59c5c84dd 100644 --- a/fastlib/src/vespa/fastlib/text/unicodeutil-charprops.cpp +++ b/vespalib/src/vespa/fastlib/text/unicodeutil-charprops.cpp diff --git a/fastlib/src/vespa/fastlib/text/unicodeutil-lowercase.cpp b/vespalib/src/vespa/fastlib/text/unicodeutil-lowercase.cpp index def65d642cd..def65d642cd 100644 --- a/fastlib/src/vespa/fastlib/text/unicodeutil-lowercase.cpp +++ b/vespalib/src/vespa/fastlib/text/unicodeutil-lowercase.cpp diff --git a/fastlib/src/vespa/fastlib/text/unicodeutil.cpp b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp index 54ccce9f8e8..08107337ee4 100644 --- a/fastlib/src/vespa/fastlib/text/unicodeutil.cpp +++ b/vespalib/src/vespa/fastlib/text/unicodeutil.cpp @@ -2,14 +2,10 @@ #include "unicodeutil.h" #include <cstdlib> -#include <cstdint> -#include <cassert> #include "unicodeutil-charprops.cpp" #include "unicodeutil-lowercase.cpp" -unsigned char Fast_UnicodeUtil::_utf8header[256]; - namespace { class Initialize @@ -30,16 +26,6 @@ Fast_UnicodeUtil::InitTables() */ _compCharProps[(0xFF9E >> 8)][(0xFF9E & 255)] |= 32; _compCharProps[(0xFF9F >> 8)][(0xFF9F & 255)] |= 32; - - for (uint32_t i = 0; i < 256; i++) { _utf8header[i] = 0; } - - // Initialize _utf8header array - for (uint32_t i = 0x00; i <= 0x7F; i++) { _utf8header[i] = 1; } - for (uint32_t i = 0xC0; i <= 0xDF; i++) { _utf8header[i] = 2; } - for (uint32_t i = 0xE0; i <= 0xEF; i++) { _utf8header[i] = 3; } - for (uint32_t i = 0xF0; i <= 0xF7; i++) { _utf8header[i] = 4; } - for (uint32_t i = 0xF8; i <= 0xFB; i++) { _utf8header[i] = 5; } - for (uint32_t i = 0xFC; i <= 0xFD; i++) { _utf8header[i] = 6; } } char * @@ -140,31 +126,6 @@ Fast_UnicodeUtil::ucs4copy(ucs4_t *dst, const char *src) return p; } -char * -Fast_UnicodeUtil::strdupLAT1(const char *src) -{ - char *res; - size_t reslen; - ucs4_t i; - const unsigned char *p; - char *q; - - reslen = 0; - p = reinterpret_cast<const unsigned char *>(src); - while ((i = *p++) != 0) { - reslen += utf8clen(i); - } - res = static_cast<char *>(malloc(reslen + 1)); - p = reinterpret_cast<const unsigned char *>(src); - q = res; - while ((i = *p++) != 0) { - q = utf8cput(q, i); - } - assert(q == res + reslen); - *q = 0; - return res; -} - ucs4_t Fast_UnicodeUtil::GetUTF8CharNonAscii(unsigned const char *&src) { diff --git a/fastlib/src/vespa/fastlib/text/unicodeutil.h b/vespalib/src/vespa/fastlib/text/unicodeutil.h index d6ba0ae0e23..e155af134fb 100644 --- a/fastlib/src/vespa/fastlib/text/unicodeutil.h +++ b/vespalib/src/vespa/fastlib/text/unicodeutil.h @@ -6,11 +6,6 @@ #include <sys/types.h> -#define FASTLIB_UNICODEUTIL_USES_WORDCHARPROP - - -/** utf8_t is the type of the multi-byte UTF-8 character components */ -typedef unsigned char utf8_t; /** ucs4_t is the type of the 4-byte UCS4 characters */ typedef unsigned int ucs4_t; @@ -22,10 +17,6 @@ typedef unsigned int ucs4_t; class Fast_UnicodeUtil { private: /** - * Table for easy lookup of UTF8 character length in bytes - */ - static unsigned char _utf8header[256]; - /** * Is true when the tables have been initialized. Is set by * InitTables, and should be protected by the _initMutex before * inspection. @@ -220,14 +211,6 @@ public: static int utf8cmp(const char *s1, const ucs4_t *s2); /** - * Copy an ISO-8859-1 string to an UTF-8 string. - * @param src The source ISO-8859-1 string. - * @return Pointer to a new alloacted buffer with the UTF-8 result. - * NB Only use in local test - */ - static char *strdupLAT1(const char *src); - - /** * Test for terminal punctuation. * @param testchar the UCS4 character to test. * @return true if testchar is a terminal punctuation character, diff --git a/fastlib/src/vespa/fastlib/text/wordfolder.h b/vespalib/src/vespa/fastlib/text/wordfolder.h index b0edd2d1385..8b75606f828 100644 --- a/fastlib/src/vespa/fastlib/text/wordfolder.h +++ b/vespalib/src/vespa/fastlib/text/wordfolder.h @@ -6,7 +6,7 @@ class Fast_WordFolder { public: - virtual ~Fast_WordFolder(void); + virtual ~Fast_WordFolder() = default; virtual const char* UCS4Tokenize(const char *buf, const char *bufend, ucs4_t *dstbuf, diff --git a/vespalib/src/vespa/vespalib/CMakeLists.txt b/vespalib/src/vespa/vespalib/CMakeLists.txt index 5e7a4bb1fd3..024d61b89e5 100644 --- a/vespalib/src/vespa/vespalib/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/CMakeLists.txt @@ -29,6 +29,8 @@ vespa_add_library(vespalib $<TARGET_OBJECTS:vespalib_vespalib_time> $<TARGET_OBJECTS:vespalib_vespalib_trace> $<TARGET_OBJECTS:vespalib_vespalib_util> + $<TARGET_OBJECTS:fastlib_io> + $<TARGET_OBJECTS:fastlib_text> INSTALL lib64 DEPENDS ${VESPA_GCC_LIB} diff --git a/vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp b/vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp index 8e7f1265550..00916ec9fac 100644 --- a/vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp +++ b/vespamalloc/src/vespamalloc/malloc/load_as_huge.cpp @@ -20,8 +20,9 @@ mmap_huge(size_t sz) { assert ((sz % HUGEPAGE_SIZE) == 0); void * mem = mmap(nullptr, sz, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); assert(mem != MAP_FAILED); - int retval = madvise(mem, sz, MADV_HUGEPAGE); - assert(retval == 0); + if (madvise(mem, sz, MADV_HUGEPAGE) != 0) { + perror("load_as_huge.cpp:mmap_huge => madvise( MADV_HUGEPAGE) failed"); + } return mem; } @@ -74,8 +75,9 @@ remap_segments(size_t base_vaddr, const Elf64_Phdr * segments, size_t count) { size_t sz = segments[i].p_memsz; last_end = dest + sz; - int madvise_retval = madvise(dest, sz, MADV_HUGEPAGE); - assert(madvise_retval == 0); + if (madvise(dest, sz, MADV_HUGEPAGE) != 0) { + perror("load_as_huge.cpp:mmap_huge => madvise( MADV_HUGEPAGE) failed"); + } non_optimized_non_inlined_memcpy(dest, reinterpret_cast<void*>(vaddr), sz); int prot = PROT_READ; if (segments[i].p_flags & PF_X) prot|= PROT_EXEC; |