diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /vsm/src/tests |
Publish
Diffstat (limited to 'vsm/src/tests')
30 files changed, 2250 insertions, 0 deletions
diff --git a/vsm/src/tests/charbuffer/.gitignore b/vsm/src/tests/charbuffer/.gitignore new file mode 100644 index 00000000000..2c980038fb5 --- /dev/null +++ b/vsm/src/tests/charbuffer/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +charbuffer_test +vsm_charbuffer_test_app diff --git a/vsm/src/tests/charbuffer/CMakeLists.txt b/vsm/src/tests/charbuffer/CMakeLists.txt new file mode 100644 index 00000000000..38d1f519714 --- /dev/null +++ b/vsm/src/tests/charbuffer/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vsm_charbuffer_test_app + SOURCES + charbuffer.cpp + DEPENDS + vsm +) +vespa_add_test(NAME vsm_charbuffer_test_app COMMAND vsm_charbuffer_test_app) diff --git a/vsm/src/tests/charbuffer/DESC b/vsm/src/tests/charbuffer/DESC new file mode 100644 index 00000000000..2f1758f837e --- /dev/null +++ b/vsm/src/tests/charbuffer/DESC @@ -0,0 +1 @@ +charbuffer test. Take a look at charbuffer.cpp for details. diff --git a/vsm/src/tests/charbuffer/FILES b/vsm/src/tests/charbuffer/FILES new file mode 100644 index 00000000000..ef12614a361 --- /dev/null +++ b/vsm/src/tests/charbuffer/FILES @@ -0,0 +1 @@ +charbuffer.cpp diff --git a/vsm/src/tests/charbuffer/charbuffer.cpp b/vsm/src/tests/charbuffer/charbuffer.cpp new file mode 100644 index 00000000000..b285005de42 --- /dev/null +++ b/vsm/src/tests/charbuffer/charbuffer.cpp @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("charbuffer_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/vsm/common/charbuffer.h> + +namespace vsm { + +class CharBufferTest : public vespalib::TestApp +{ +private: + void test(); +public: + int Main(); +}; + +void +CharBufferTest::test() +{ + { // empty + CharBuffer buf; + EXPECT_EQUAL(buf.getLength(), 0u); + EXPECT_EQUAL(buf.getPos(), 0u); + EXPECT_EQUAL(buf.getRemaining(), 0u); + } + { // explicit length + CharBuffer buf(8); + EXPECT_EQUAL(buf.getLength(), 8u); + EXPECT_EQUAL(buf.getPos(), 0u); + EXPECT_EQUAL(buf.getRemaining(), 8u); + } + { // resize + CharBuffer buf(8); + EXPECT_EQUAL(buf.getLength(), 8u); + buf.resize(16); + EXPECT_EQUAL(buf.getLength(), 16u); + buf.resize(8); + EXPECT_EQUAL(buf.getLength(), 16u); + } + { // put with triggered resize + CharBuffer buf(8); + buf.put("123456", 6); + EXPECT_EQUAL(buf.getLength(), 8u); + EXPECT_EQUAL(buf.getPos(), 6u); + EXPECT_EQUAL(buf.getRemaining(), 2u); + EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456"); + buf.put("789", 3); + EXPECT_EQUAL(buf.getLength(), 12u); + EXPECT_EQUAL(buf.getPos(), 9u); + EXPECT_EQUAL(buf.getRemaining(), 3u); + EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456789"); + buf.put('a'); + EXPECT_EQUAL(buf.getLength(), 12u); + EXPECT_EQUAL(buf.getPos(), 10u); + EXPECT_EQUAL(buf.getRemaining(), 2u); + EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456789a"); + buf.reset(); + EXPECT_EQUAL(buf.getLength(), 12u); + EXPECT_EQUAL(buf.getPos(), 0u); + EXPECT_EQUAL(buf.getRemaining(), 12u); + buf.put("bcd", 3); + EXPECT_EQUAL(buf.getLength(), 12u); + EXPECT_EQUAL(buf.getPos(), 3u); + EXPECT_EQUAL(buf.getRemaining(), 9u); + EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "bcd"); + } +} + +int +CharBufferTest::Main() +{ + TEST_INIT("charbuffer_test"); + + test(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(vsm::CharBufferTest); diff --git a/vsm/src/tests/config/mail.cfg b/vsm/src/tests/config/mail.cfg new file mode 100644 index 00000000000..ce830beac23 --- /dev/null +++ b/vsm/src/tests/config/mail.cfg @@ -0,0 +1,116 @@ +datatype[2] +datatype[0].id 1012 +datatype[0].arraytype[1] +datatype[0].arraytype[0].datatype 12 +datatype[1].id 1013 +datatype[1].arraytype[1] +datatype[1].arraytype[0].datatype 13 +documenttype[1] +documenttype[0].name mail +documenttype[0].version 0 +documenttype[0].inherits[0] +documenttype[0].field[26] +documenttype[0].field[0].name mailid +documenttype[0].field[0].id 2 +documenttype[0].field[0].header true +documenttype[0].field[0].datatype 2 +documenttype[0].field[1].name date +documenttype[0].field[1].id 3 +documenttype[0].field[1].header true +documenttype[0].field[1].datatype 0 +documenttype[0].field[2].name from +documenttype[0].field[2].id 4 +documenttype[0].field[2].header true +documenttype[0].field[2].datatype 12 +documenttype[0].field[3].name replyto +documenttype[0].field[3].id 5 +documenttype[0].field[3].header true +documenttype[0].field[3].datatype 12 +documenttype[0].field[4].name to +documenttype[0].field[4].id 6 +documenttype[0].field[4].header true +documenttype[0].field[4].datatype 12 +documenttype[0].field[5].name cc +documenttype[0].field[5].id 7 +documenttype[0].field[5].header true +documenttype[0].field[5].datatype 12 +documenttype[0].field[6].name bcc +documenttype[0].field[6].id 8 +documenttype[0].field[6].header true +documenttype[0].field[6].datatype 12 +documenttype[0].field[7].name subject +documenttype[0].field[7].id 9 +documenttype[0].field[7].header true +documenttype[0].field[7].datatype 12 +documenttype[0].field[8].name body +documenttype[0].field[8].id 10 +documenttype[0].field[8].header false +documenttype[0].field[8].datatype 12 +documenttype[0].field[9].name attachmentcount +documenttype[0].field[9].id 11 +documenttype[0].field[9].header false +documenttype[0].field[9].datatype 0 +documenttype[0].field[10].name attachmentpartids +documenttype[0].field[10].id 12 +documenttype[0].field[10].header false +documenttype[0].field[10].datatype 2 +documenttype[0].field[11].name attachmentsizes +documenttype[0].field[11].id 13 +documenttype[0].field[11].header false +documenttype[0].field[11].datatype 2 +documenttype[0].field[12].name attachmentnames +documenttype[0].field[12].id 14 +documenttype[0].field[12].header false +documenttype[0].field[12].datatype 2 +documenttype[0].field[13].name attachmenttypes +documenttype[0].field[13].id 15 +documenttype[0].field[13].header false +documenttype[0].field[13].datatype 2 +documenttype[0].field[14].name attachmentlanguages +documenttype[0].field[14].id 16 +documenttype[0].field[14].header false +documenttype[0].field[14].datatype 2 +documenttype[0].field[15].name attachmentcontent +documenttype[0].field[15].id 17 +documenttype[0].field[15].header false +documenttype[0].field[15].datatype 2 +documenttype[0].field[16].name bodylanguage +documenttype[0].field[16].id 18 +documenttype[0].field[16].header false +documenttype[0].field[16].datatype 2 +documenttype[0].field[17].name bodyencoding +documenttype[0].field[17].id 19 +documenttype[0].field[17].header false +documenttype[0].field[17].datatype 2 +documenttype[0].field[18].name collectionid +documenttype[0].field[18].id 20 +documenttype[0].field[18].header true +documenttype[0].field[18].datatype 4 +documenttype[0].field[19].name content +documenttype[0].field[19].id 21 +documenttype[0].field[19].header true +documenttype[0].field[19].datatype 12 +documenttype[0].field[20].name bodymeta +documenttype[0].field[20].id 50027053 +documenttype[0].field[20].header false +documenttype[0].field[20].datatype 13 +documenttype[0].field[21].name attachments +documenttype[0].field[21].id 1081629685 +documenttype[0].field[21].header false +documenttype[0].field[21].datatype 1012 +documenttype[0].field[22].name attachmentsmeta +documenttype[0].field[22].id 1203055625 +documenttype[0].field[22].header false +documenttype[0].field[22].datatype 1013 +documenttype[0].field[23].name tolist +documenttype[0].field[23].id 1084918181 +documenttype[0].field[23].header false +documenttype[0].field[23].datatype 1012 +documenttype[0].field[24].name cclist +documenttype[0].field[24].id 1733332403 +documenttype[0].field[24].header false +documenttype[0].field[24].datatype 1012 +documenttype[0].field[25].name bcclist +documenttype[0].field[25].id 410546306 +documenttype[0].field[25].header false +documenttype[0].field[25].datatype 1012 diff --git a/vsm/src/tests/config/vsm.cfg b/vsm/src/tests/config/vsm.cfg new file mode 100644 index 00000000000..dc50447f623 --- /dev/null +++ b/vsm/src/tests/config/vsm.cfg @@ -0,0 +1,3 @@ +doctype file:../config/mail.cfg +storagecfg "" +vsmfields file:../config/vsmfields.cfg diff --git a/vsm/src/tests/config/vsmfields.cfg b/vsm/src/tests/config/vsmfields.cfg new file mode 100644 index 00000000000..30f1c8ed8b1 --- /dev/null +++ b/vsm/src/tests/config/vsmfields.cfg @@ -0,0 +1,297 @@ +threadsperquery 4 +documentverificationlevel=0 +searchall 1 +fieldspec[17] +fieldspec[0].name bcc +fieldspec[0].searchmethod AUTOUTF8 +fieldspec[0].arg1 "" +fieldspec[1].name cc +fieldspec[1].searchmethod AUTOUTF8 +fieldspec[1].arg1 "" +fieldspec[2].name from +fieldspec[2].searchmethod AUTOUTF8 +fieldspec[2].arg1 "" +fieldspec[3].name date +fieldspec[3].searchmethod INT32 +fieldspec[3].arg1 "" +fieldspec[4].name replyto +fieldspec[4].searchmethod AUTOUTF8 +fieldspec[4].arg1 "" +fieldspec[5].name subject +fieldspec[5].searchmethod AUTOUTF8 +fieldspec[5].arg1 "" +fieldspec[6].name to +fieldspec[6].searchmethod AUTOUTF8 +fieldspec[6].arg1 "" +fieldspec[7].name body +fieldspec[7].searchmethod AUTOUTF8 +fieldspec[7].arg1 "" +fieldspec[8].name bodymeta +fieldspec[8].searchmethod AUTOUTF8 +fieldspec[8].arg1 "" +fieldspec[9].name mailid +fieldspec[9].searchmethod AUTOUTF8 +fieldspec[9].arg1 "" +fieldspec[10].name attachmentcount +fieldspec[10].searchmethod INT32 +fieldspec[10].arg1 "" +fieldspec[11].name attachmentcontent +fieldspec[11].searchmethod AUTOUTF8 +fieldspec[11].arg1 "" +fieldspec[12].name attachmenttypes +fieldspec[12].searchmethod AUTOUTF8 +fieldspec[12].arg1 "" +fieldspec[13].name attachmentnames +fieldspec[13].searchmethod AUTOUTF8 +fieldspec[13].arg1 "" +fieldspec[14].name attachmentlanguages +fieldspec[14].searchmethod AUTOUTF8 +fieldspec[14].arg1 "" +fieldspec[15].name URI +fieldspec[15].searchmethod AUTOUTF8 +fieldspec[15].arg1 "" +fieldspec[16].name vsm_whichfieldmatched +fieldspec[16].searchmethod AUTOUTF8 +fieldspec[16].arg1 "" +index[26] +index[0].name default +index[0].field[10] +index[0].field[0].name from +index[0].field[1].name to +index[0].field[2].name cc +index[0].field[3].name bcc +index[0].field[4].name subject +index[0].field[5].name body +index[0].field[6].name attachmentcontent +index[0].field[7].name attachmentnames +index[0].field[8].name attachmenttypes +index[0].field[9].name date +index[1].name all +index[1].field[8] +index[1].field[0].name to +index[1].field[1].name cc +index[1].field[2].name bcc +index[1].field[3].name subject +index[1].field[4].name body +index[1].field[5].name attachmentcontent +index[1].field[6].name attachmentnames +index[1].field[7].name attachmenttypes +index[2].name header +index[2].field[6] +index[2].field[0].name from +index[2].field[1].name replyto +index[2].field[2].name to +index[2].field[3].name cc +index[2].field[4].name bcc +index[2].field[5].name subject +index[3].name senders +index[3].field[2] +index[3].field[0].name from +index[3].field[1].name replyto +index[4].name recipients +index[4].field[3] +index[4].field[0].name to +index[4].field[1].name cc +index[4].field[2].name bcc +index[5].name address +index[5].field[5] +index[5].field[0].name from +index[5].field[1].name replyto +index[5].field[2].name to +index[5].field[3].name cc +index[5].field[4].name bcc +index[6].name body +index[6].field[2] +index[6].field[0].name subject +index[6].field[1].name body +index[7].name meta +index[7].field[2] +index[7].field[0].name attachmentcontent +index[7].field[1].name attachmenttypes +index[8].name index1 +index[8].field[1] +index[8].field[0].name bcc +index[9].name index2 +index[9].field[2] +index[9].field[0].name bcc +index[9].field[1].name cc +index[10].name index3 +index[10].field[3] +index[10].field[0].name bcc +index[10].field[1].name cc +index[10].field[2].name from +index[11].name index4 +index[11].field[4] +index[11].field[0].name bcc +index[11].field[1].name cc +index[11].field[2].name from +index[11].field[3].name date +index[12].name index5 +index[12].field[5] +index[12].field[0].name bcc +index[12].field[1].name cc +index[12].field[2].name from +index[12].field[3].name date +index[12].field[4].name replyto +index[13].name index6 +index[13].field[6] +index[13].field[0].name bcc +index[13].field[1].name cc +index[13].field[2].name from +index[13].field[3].name date +index[13].field[4].name replyto +index[13].field[5].name subject +index[14].name index7 +index[14].field[7] +index[14].field[0].name bcc +index[14].field[1].name cc +index[14].field[2].name from +index[14].field[3].name date +index[14].field[4].name replyto +index[14].field[5].name subject +index[14].field[6].name to +index[15].name index8 +index[15].field[8] +index[15].field[0].name bcc +index[15].field[1].name cc +index[15].field[2].name from +index[15].field[3].name date +index[15].field[4].name replyto +index[15].field[5].name subject +index[15].field[6].name to +index[15].field[7].name body +index[16].name index9 +index[16].field[9] +index[16].field[0].name bcc +index[16].field[1].name cc +index[16].field[2].name from +index[16].field[3].name date +index[16].field[4].name replyto +index[16].field[5].name subject +index[16].field[6].name to +index[16].field[7].name body +index[16].field[8].name bodymeta +index[17].name index10 +index[17].field[10] +index[17].field[0].name bcc +index[17].field[1].name cc +index[17].field[2].name from +index[17].field[3].name date +index[17].field[4].name replyto +index[17].field[5].name subject +index[17].field[6].name to +index[17].field[7].name body +index[17].field[8].name bodymeta +index[17].field[9].name mailid +index[18].name index11 +index[18].field[11] +index[18].field[0].name bcc +index[18].field[1].name cc +index[18].field[2].name from +index[18].field[3].name date +index[18].field[4].name replyto +index[18].field[5].name subject +index[18].field[6].name to +index[18].field[7].name body +index[18].field[8].name bodymeta +index[18].field[9].name mailid +index[18].field[10].name attachmentcount +index[19].name index12 +index[19].field[12] +index[19].field[0].name bcc +index[19].field[1].name cc +index[19].field[2].name from +index[19].field[3].name date +index[19].field[4].name replyto +index[19].field[5].name subject +index[19].field[6].name to +index[19].field[7].name body +index[19].field[8].name bodymeta +index[19].field[9].name mailid +index[19].field[10].name attachmentcount +index[19].field[11].name attachmentcontent +index[20].name index13 +index[20].field[13] +index[20].field[0].name bcc +index[20].field[1].name cc +index[20].field[2].name from +index[20].field[3].name date +index[20].field[4].name replyto +index[20].field[5].name subject +index[20].field[6].name to +index[20].field[7].name body +index[20].field[8].name bodymeta +index[20].field[9].name mailid +index[20].field[10].name attachmentcount +index[20].field[11].name attachmentcontent +index[20].field[12].name attachmenttypes +index[21].name index14 +index[21].field[14] +index[21].field[0].name bcc +index[21].field[1].name cc +index[21].field[2].name from +index[21].field[3].name date +index[21].field[4].name replyto +index[21].field[5].name subject +index[21].field[6].name to +index[21].field[7].name body +index[21].field[8].name bodymeta +index[21].field[9].name mailid +index[21].field[10].name attachmentcount +index[21].field[11].name attachmentcontent +index[21].field[12].name attachmenttypes +index[21].field[13].name attachmentnames +index[22].name index15 +index[22].field[15] +index[22].field[0].name bcc +index[22].field[1].name cc +index[22].field[2].name from +index[22].field[3].name date +index[22].field[4].name replyto +index[22].field[5].name subject +index[22].field[6].name to +index[22].field[7].name body +index[22].field[8].name bodymeta +index[22].field[9].name mailid +index[22].field[10].name attachmentcount +index[22].field[11].name attachmentcontent +index[22].field[12].name attachmenttypes +index[22].field[13].name attachmentnames +index[22].field[14].name attachmentlanguages +index[23].name index16 +index[23].field[15] +index[23].field[0].name bcc +index[23].field[1].name cc +index[23].field[2].name from +index[23].field[3].name date +index[23].field[4].name replyto +index[23].field[5].name subject +index[23].field[6].name to +index[23].field[7].name body +index[23].field[8].name bodymeta +index[23].field[9].name mailid +index[23].field[10].name attachmentcount +index[23].field[11].name attachmentcontent +index[23].field[12].name attachmenttypes +index[23].field[13].name attachmentnames +index[23].field[14].name attachmentlanguages +index[24].name index17 +index[24].field[15] +index[24].field[0].name bcc +index[24].field[1].name cc +index[24].field[2].name from +index[24].field[3].name date +index[24].field[4].name replyto +index[24].field[5].name subject +index[24].field[6].name to +index[24].field[7].name body +index[24].field[8].name bodymeta +index[24].field[9].name mailid +index[24].field[10].name attachmentcount +index[24].field[11].name attachmentcontent +index[24].field[12].name attachmenttypes +index[24].field[13].name attachmentnames +index[24].field[14].name attachmentlanguages +index[25].name date +index[25].field[1] +index[25].field[0].name date diff --git a/vsm/src/tests/create-test.sh b/vsm/src/tests/create-test.sh new file mode 100755 index 00000000000..c4259526089 --- /dev/null +++ b/vsm/src/tests/create-test.sh @@ -0,0 +1,73 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +gen_ignore_file() { + echo "generating '$1' ..." + echo ".depend" > $1 + echo "Makefile" >> $1 + echo "${test}_test" >> $1 +} + +gen_project_file() { + echo "generating '$1' ..." + echo "APPLICATION ${test}_test" > $1 + echo "OBJS $test" >> $1 + echo "LIBS vsm/vsm" >> $1 + echo "EXTERNALLIBS vespalib vespalog" >> $1 + echo "" >> $1 + echo "CUSTOMMAKE" >> $1 + echo "test: depend ${test}_test" >> $1 + echo -e "\t@./${test}_test" >> $1 +} + +gen_source() { + echo "generating '$1' ..." + echo "#include <vespa/log/log.h>" > $1 + echo "LOG_SETUP(\"${test}_test\");" >> $1 + echo "#include <vespa/fastos/fastos.h>" >> $1 + echo "#include <vespa/vespalib/testkit/testapp.h>" >> $1 + echo "" >> $1 + echo "// using namespace ;" >> $1 + echo "" >> $1 + echo "TEST_SETUP(Test);" >> $1 + echo "" >> $1 + echo "int" >> $1 + echo "Test::Main()" >> $1 + echo "{" >> $1 + echo " TEST_INIT(\"${test}_test\");" >> $1 + echo " TEST_DONE();" >> $1 + echo "}" >> $1 +} + +gen_desc() { + echo "generating '$1' ..." + echo "$test test. Take a look at $test.cpp for details." > $1 +} + +gen_file_list() { + echo "generating '$1' ..." + echo "$test.cpp" > $1 +} + +if [ $# -ne 1 ]; then + echo "usage: $0 <name>" + echo " name: name of the test to create" + exit 1 +fi + +test=$1 +if [ -e $test ]; then + echo "$test already present, don't want to mess it up..." + exit 1 +fi + +echo "creating directory '$test' ..." +mkdir -p $test || exit 1 +cd $test || exit 1 +test=`basename $test` + +gen_ignore_file .cvsignore +gen_project_file fastos.project +gen_source $test.cpp +gen_desc DESC +gen_file_list FILES diff --git a/vsm/src/tests/docsum/.gitignore b/vsm/src/tests/docsum/.gitignore new file mode 100644 index 00000000000..9a697a94de8 --- /dev/null +++ b/vsm/src/tests/docsum/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +docsum_test +vsm_docsum_test_app diff --git a/vsm/src/tests/docsum/CMakeLists.txt b/vsm/src/tests/docsum/CMakeLists.txt new file mode 100644 index 00000000000..506e0a9bf66 --- /dev/null +++ b/vsm/src/tests/docsum/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vsm_docsum_test_app + SOURCES + docsum.cpp + DEPENDS + vsm +) +vespa_add_test(NAME vsm_docsum_test_app COMMAND vsm_docsum_test_app) diff --git a/vsm/src/tests/docsum/DESC b/vsm/src/tests/docsum/DESC new file mode 100644 index 00000000000..a2008f9b6c4 --- /dev/null +++ b/vsm/src/tests/docsum/DESC @@ -0,0 +1 @@ +docsum test. Take a look at docsum.cpp for details. diff --git a/vsm/src/tests/docsum/FILES b/vsm/src/tests/docsum/FILES new file mode 100644 index 00000000000..0ada8d30e81 --- /dev/null +++ b/vsm/src/tests/docsum/FILES @@ -0,0 +1 @@ +docsum.cpp diff --git a/vsm/src/tests/docsum/docsum.cpp b/vsm/src/tests/docsum/docsum.cpp new file mode 100644 index 00000000000..366fae65849 --- /dev/null +++ b/vsm/src/tests/docsum/docsum.cpp @@ -0,0 +1,296 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("docsum_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vector> +#include <vespa/document/fieldvalue/fieldvalues.h> +#include <vespa/vsm/common/docsum.h> +#include <vespa/vsm/vsm/flattendocsumwriter.h> +#include <vespa/vsm/vsm/jsondocsumwriter.h> + +using namespace document; + +namespace vsm { + +template <typename T> +class Vector : public std::vector<T> +{ +public: + Vector<T> & add(T v) { this->push_back(v); return *this; } +}; + +typedef Vector<std::string> StringList; +typedef Vector<std::pair<std::string, int32_t> > WeightedStringList; + + +class TestDocument : public vsm::Document +{ +private: + std::vector<FieldValueContainer> _fields; + +public: + TestDocument(const search::DocumentIdT & docId, size_t numFields) : vsm::Document(docId, numFields), _fields(numFields) {} + virtual bool setField(FieldIdT fId, document::FieldValue::UP fv) { + if (fId < _fields.size()) { + _fields[fId].reset(fv.release()); + return true; + } + return false; + } + virtual const document::FieldValue * getField(FieldIdT fId) const { + if (fId < _fields.size()) { + return _fields[fId].get(); + } + return NULL; + } +}; + + +class DocsumTest : public vespalib::TestApp +{ +private: + ArrayFieldValue createFieldValue(const StringList & fv); + WeightedSetFieldValue createFieldValue(const WeightedStringList & fv); + + void assertFlattenDocsumWriter(const FieldValue & fv, const std::string & exp) { + FlattenDocsumWriter fdw; + assertFlattenDocsumWriter(fdw, fv, exp); + } + void assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValue & fv, const std::string & exp); + void assertJSONDocsumWriter(const FieldValue & fv, const std::string & exp) { + JSONDocsumWriter jdw; + assertJSONDocsumWriter(jdw, fv, exp); + } + void assertJSONDocsumWriter(JSONDocsumWriter & jdw, const FieldValue & fv, const std::string & exp); + + void testFlattenDocsumWriter(); + void testJSONDocsumWriter(); + void requireThatJSONDocsumWriterHandlesMap(); + void testDocSumCache(); + +public: + int Main(); +}; + +ArrayFieldValue +DocsumTest::createFieldValue(const StringList & fv) +{ + + static ArrayDataType type(*DataType::STRING); + ArrayFieldValue afv(type); + for (size_t i = 0; i < fv.size(); ++i) { + afv.add(StringFieldValue(fv[i])); + } + return afv; +} + +WeightedSetFieldValue +DocsumTest::createFieldValue(const WeightedStringList & fv) +{ + static WeightedSetDataType type(*DataType::STRING, false, false); + WeightedSetFieldValue wsfv(type); + for (size_t i = 0; i < fv.size(); ++i) { + wsfv.add(StringFieldValue(fv[i].first), fv[i].second); + } + return wsfv; +} + +void +DocsumTest::assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValue & fv, const std::string & exp) +{ + FieldPath empty; + fv.iterateNested(empty.begin(), empty.end(), fdw); + std::string actual(fdw.getResult().getBuffer(), fdw.getResult().getPos()); + EXPECT_EQUAL(actual, exp); +} + +void +DocsumTest::assertJSONDocsumWriter(JSONDocsumWriter & jdw, const FieldValue & fv, const std::string & exp) +{ + jdw.write(fv); + EXPECT_EQUAL(jdw.getResult(), exp); +} + +void +DocsumTest::testFlattenDocsumWriter() +{ + { // basic tests + assertFlattenDocsumWriter(StringFieldValue("foo bar"), "foo bar"); + assertFlattenDocsumWriter(RawFieldValue("foo bar"), "foo bar"); + assertFlattenDocsumWriter(LongFieldValue(123456789), "123456789"); + assertFlattenDocsumWriter(createFieldValue(StringList().add("foo bar").add("baz").add(" qux ")), + "foo bar baz qux "); + } + { // test mulitple invokations + FlattenDocsumWriter fdw("#"); + assertFlattenDocsumWriter(fdw, StringFieldValue("foo"), "foo"); + assertFlattenDocsumWriter(fdw, StringFieldValue("bar"), "foo#bar"); + fdw.clear(); + assertFlattenDocsumWriter(fdw, StringFieldValue("baz"), "baz"); + assertFlattenDocsumWriter(fdw, StringFieldValue("qux"), "baz qux"); + } + { // test resizing + FlattenDocsumWriter fdw("#"); + EXPECT_EQUAL(fdw.getResult().getPos(), 0u); + EXPECT_EQUAL(fdw.getResult().getLength(), 32u); + assertFlattenDocsumWriter(fdw, StringFieldValue("aaaabbbbccccddddeeeeffffgggghhhh"), + "aaaabbbbccccddddeeeeffffgggghhhh"); + EXPECT_EQUAL(fdw.getResult().getPos(), 32u); + EXPECT_EQUAL(fdw.getResult().getLength(), 32u); + assertFlattenDocsumWriter(fdw, StringFieldValue("aaaa"), + "aaaabbbbccccddddeeeeffffgggghhhh#aaaa"); + EXPECT_EQUAL(fdw.getResult().getPos(), 37u); + EXPECT_TRUE(fdw.getResult().getLength() >= 37u); + fdw.clear(); + EXPECT_EQUAL(fdw.getResult().getPos(), 0u); + EXPECT_TRUE(fdw.getResult().getLength() >= 37u); + } +} + +void +DocsumTest::testJSONDocsumWriter() +{ + { // basic types + assertJSONDocsumWriter(LongFieldValue(123456789), "123456789"); + assertJSONDocsumWriter(FloatFieldValue(12.34), "12.34"); + assertJSONDocsumWriter(StringFieldValue("foo bar"), "\"foo bar\""); + } + { // collection field values + assertJSONDocsumWriter(createFieldValue(StringList().add("foo").add("bar").add("baz")), + "[\"foo\",\"bar\",\"baz\"]"); + assertJSONDocsumWriter(createFieldValue(WeightedStringList().add(std::make_pair("bar", 20)). + add(std::make_pair("baz", 30)). + add(std::make_pair("foo", 10))), + "[[\"bar\",20],[\"baz\",30],[\"foo\",10]]"); + } + { // struct field value + StructDataType subType("substruct"); + Field fd("d", 0, *DataType::STRING, true); + Field fe("e", 1, *DataType::STRING, true); + subType.addField(fd); + subType.addField(fe); + StructFieldValue subValue(subType); + subValue.setValue(fd, StringFieldValue("baz")); + subValue.setValue(fe, StringFieldValue("qux")); + + StructDataType type("struct"); + Field fa("a", 0, *DataType::STRING, true); + Field fb("b", 1, *DataType::STRING, true); + Field fc("c", 2, subType, true); + type.addField(fa); + type.addField(fb); + type.addField(fc); + StructFieldValue value(type); + value.setValue(fa, StringFieldValue("foo")); + value.setValue(fb, StringFieldValue("bar")); + value.setValue(fc, subValue); + + + { // select a subset and then all + JSONDocsumWriter jdw; + DocsumFieldSpec::FieldIdentifierVector fields; + fields.push_back(DocsumFieldSpec::FieldIdentifier( + 0, *type.buildFieldPath("a"))); + fields.push_back(DocsumFieldSpec::FieldIdentifier( + 0, *type.buildFieldPath("c.e"))); + jdw.setInputFields(fields); + assertJSONDocsumWriter(jdw, value, "{\"a\":\"foo\",\"c\":{\"e\":\"qux\"}}"); + jdw.clear(); + assertJSONDocsumWriter(jdw, value, "{\"a\":\"foo\",\"b\":\"bar\",\"c\":{\"d\":\"baz\",\"e\":\"qux\"}}"); + } + } + { // multiple invocations + JSONDocsumWriter jdw; + assertJSONDocsumWriter(jdw, StringFieldValue("foo"), "\"foo\""); + assertJSONDocsumWriter(jdw, StringFieldValue("bar"), "\"foo\"\"bar\""); + jdw.clear(); + assertJSONDocsumWriter(jdw, StringFieldValue("baz"), "\"baz\""); + } +} + +void +DocsumTest::requireThatJSONDocsumWriterHandlesMap() +{ + { // map<string, string> + MapDataType mapType(*DataType::STRING, *DataType::STRING); + MapFieldValue mapfv(mapType); + EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), StringFieldValue("v1"))); + EXPECT_TRUE(mapfv.put(StringFieldValue("k2"), StringFieldValue("v2"))); + assertJSONDocsumWriter(mapfv, "[{\"key\":\"k1\",\"value\":\"v1\"},{\"key\":\"k2\",\"value\":\"v2\"}]"); + } + { // map<string, struct> + StructDataType structType("struct"); + Field fa("a", 0, *DataType::STRING, true); + Field fb("b", 1, *DataType::STRING, true); + structType.addField(fa); + structType.addField(fb); + StructFieldValue structValue(structType); + structValue.setValue(fa, StringFieldValue("foo")); + structValue.setValue(fb, StringFieldValue("bar")); + MapDataType mapType(*DataType::STRING, structType); + MapFieldValue mapfv(mapType); + EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), structValue)); + { // select a subset and then all + JSONDocsumWriter jdw; + DocsumFieldSpec::FieldIdentifierVector fields; + fields.push_back(DocsumFieldSpec::FieldIdentifier(0, *mapType.buildFieldPath("value.b"))); + jdw.setInputFields(fields); + assertJSONDocsumWriter(jdw, mapfv, "[{\"key\":\"k1\",\"value\":{\"b\":\"bar\"}}]"); + fields[0] = DocsumFieldSpec::FieldIdentifier(0, *mapType.buildFieldPath("{k1}.a")); + jdw.clear(); + jdw.setInputFields(fields); + assertJSONDocsumWriter(jdw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\"}}]"); + jdw.clear(); // all fields implicit + assertJSONDocsumWriter(jdw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\",\"b\":\"bar\"}}]"); + } + } +} + +void +DocsumTest::testDocSumCache() +{ + Document::SP d1(new TestDocument(0, 1)); + d1->setField(0, FieldValue::UP(new StringFieldValue("aa"))); + Document::SP d2(new TestDocument(1, 2)); + d2->setField(0, FieldValue::UP(new StringFieldValue("bbb"))); + d2->setField(1, FieldValue::UP(new StringFieldValue("cccc"))); + DocSumCache cac1; + cac1.push_back(d1); + cac1.push_back(d2); + EXPECT_EQUAL(cac1.cache().size(), 2u); + + Document::SP d3(new TestDocument(2, 1)); + d3->setField(0, FieldValue::UP(new StringFieldValue("ddddd"))); + DocSumCache cac2; + cac2.push_back(d3); + cac1.insert(cac2); + EXPECT_EQUAL(cac1.cache().size(), 3u); + + Document::SP d4(new TestDocument(2, 1)); + d4->setField(0, FieldValue::UP(new StringFieldValue("eeeeee"))); + DocSumCache cac3; + cac3.push_back(d4); + cac1.insert(cac3); + EXPECT_EQUAL(cac1.cache().size(), 3u); + EXPECT_EQUAL(2u, cac1.getDocSum(2).getDocId()); +} + +int +DocsumTest::Main() +{ + TEST_INIT("docsum_test"); + + testFlattenDocsumWriter(); + testJSONDocsumWriter(); + requireThatJSONDocsumWriterHandlesMap(); + testDocSumCache(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(vsm::DocsumTest); + diff --git a/vsm/src/tests/document/.gitignore b/vsm/src/tests/document/.gitignore new file mode 100644 index 00000000000..d47781eff63 --- /dev/null +++ b/vsm/src/tests/document/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +document_test +vsm_document_test_app diff --git a/vsm/src/tests/document/CMakeLists.txt b/vsm/src/tests/document/CMakeLists.txt new file mode 100644 index 00000000000..36da98129a1 --- /dev/null +++ b/vsm/src/tests/document/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vsm_document_test_app + SOURCES + document.cpp + DEPENDS + vsm +) +vespa_add_test(NAME vsm_document_test_app COMMAND vsm_document_test_app) diff --git a/vsm/src/tests/document/DESC b/vsm/src/tests/document/DESC new file mode 100644 index 00000000000..6ab6ded2dbc --- /dev/null +++ b/vsm/src/tests/document/DESC @@ -0,0 +1 @@ +document test. Take a look at document.cpp for details. diff --git a/vsm/src/tests/document/FILES b/vsm/src/tests/document/FILES new file mode 100644 index 00000000000..2721ca2d928 --- /dev/null +++ b/vsm/src/tests/document/FILES @@ -0,0 +1 @@ +document.cpp diff --git a/vsm/src/tests/document/document.cpp b/vsm/src/tests/document/document.cpp new file mode 100644 index 00000000000..37cccae09cc --- /dev/null +++ b/vsm/src/tests/document/document.cpp @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("document_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/document/fieldvalue/fieldvalues.h> +#include <vespa/vsm/common/storagedocument.h> + +using namespace document; + +namespace vsm { + +class DocumentTest : public vespalib::TestApp +{ +private: + void testStorageDocument(); + void testStringFieldIdTMap(); +public: + int Main(); +}; + +void +DocumentTest::testStorageDocument() +{ + DocumentType dt("testdoc", 0); + + Field fa("a", 0, *DataType::STRING, true); + Field fb("b", 1, *DataType::STRING, true); + dt.addField(fa); + dt.addField(fb); + + document::Document::UP doc(new document::Document(dt, DocumentId())); + doc->setValue(fa, StringFieldValue("foo")); + doc->setValue(fb, StringFieldValue("bar")); + + SharedFieldPathMap fpmap(new FieldPathMapT()); + fpmap->push_back(*dt.buildFieldPath("a")); + fpmap->push_back(*dt.buildFieldPath("b")); + fpmap->push_back(FieldPath()); + ASSERT_TRUE((*fpmap)[0].size() == 1); + ASSERT_TRUE((*fpmap)[1].size() == 1); + ASSERT_TRUE((*fpmap)[2].size() == 0); + + StorageDocument sdoc(std::move(doc)); + ASSERT_TRUE(sdoc.valid()); + sdoc.setFieldCount(3); + sdoc.fieldPathMap(fpmap); + sdoc.init(); + + EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString()); + EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); + EXPECT_TRUE(sdoc.getField(2) == NULL); + // test caching + EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString()); + EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); + EXPECT_TRUE(sdoc.getField(2) == NULL); + + // set new values + EXPECT_TRUE(sdoc.setField(0, FieldValue::UP(new StringFieldValue("baz")))); + EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString()); + EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); + EXPECT_TRUE(sdoc.getField(2) == NULL); + EXPECT_TRUE(sdoc.setField(1, FieldValue::UP(new StringFieldValue("qux")))); + EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString()); + EXPECT_EQUAL(std::string("qux"), sdoc.getField(1)->getAsString()); + EXPECT_TRUE(sdoc.getField(2) == NULL); + EXPECT_TRUE(sdoc.setField(2, FieldValue::UP(new StringFieldValue("quux")))); + EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString()); + EXPECT_EQUAL(std::string("qux"), sdoc.getField(1)->getAsString()); + EXPECT_EQUAL(std::string("quux"), sdoc.getField(2)->getAsString()); + + // reset cached field values + sdoc.init(); + EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString()); + EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); + EXPECT_TRUE(sdoc.getField(2) == NULL); + + EXPECT_TRUE(!sdoc.setField(3, FieldValue::UP(new StringFieldValue("thud")))); + + SharedFieldPathMap fim; + StorageDocument s2(fim); + EXPECT_EQUAL(vespalib::string("null::"), s2.docDoc().getId().toString()); +} + +void DocumentTest::testStringFieldIdTMap() +{ + StringFieldIdTMap m; + EXPECT_EQUAL(0u, m.highestFieldNo()); + EXPECT_TRUE(StringFieldIdTMap::npos == m.fieldNo("unknown")); + m.add("f1"); + EXPECT_EQUAL(0u, m.fieldNo("f1")); + EXPECT_EQUAL(1u, m.highestFieldNo()); + m.add("f1"); + EXPECT_EQUAL(0u, m.fieldNo("f1")); + EXPECT_EQUAL(1u, m.highestFieldNo()); + m.add("f2"); + EXPECT_EQUAL(1u, m.fieldNo("f2")); + EXPECT_EQUAL(2u, m.highestFieldNo()); + m.add("f3", 7); + EXPECT_EQUAL(7u, m.fieldNo("f3")); + EXPECT_EQUAL(8u, m.highestFieldNo()); + m.add("f3"); + EXPECT_EQUAL(7u, m.fieldNo("f3")); + EXPECT_EQUAL(8u, m.highestFieldNo()); + m.add("f2", 13); + EXPECT_EQUAL(13u, m.fieldNo("f2")); + EXPECT_EQUAL(14u, m.highestFieldNo()); + m.add("f4"); + EXPECT_EQUAL(3u, m.fieldNo("f4")); + EXPECT_EQUAL(14u, m.highestFieldNo()); + { + vespalib::asciistream os; + StringFieldIdTMap t; + t.add("b"); + t.add("a"); + os << t; + EXPECT_EQUAL(vespalib::string("a = 1\nb = 0\n"), os.str()); + } + +} + +int +DocumentTest::Main() +{ + TEST_INIT("document_test"); + + testStorageDocument(); + testStringFieldIdTMap(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(vsm::DocumentTest); + diff --git a/vsm/src/tests/searcher/.gitignore b/vsm/src/tests/searcher/.gitignore new file mode 100644 index 00000000000..52a56dff405 --- /dev/null +++ b/vsm/src/tests/searcher/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +searcher_test +vsm_searcher_test_app diff --git a/vsm/src/tests/searcher/CMakeLists.txt b/vsm/src/tests/searcher/CMakeLists.txt new file mode 100644 index 00000000000..26d6115e3a7 --- /dev/null +++ b/vsm/src/tests/searcher/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vsm_searcher_test_app + SOURCES + searcher.cpp + DEPENDS + vsm +) +vespa_add_test(NAME vsm_searcher_test_app COMMAND vsm_searcher_test_app) diff --git a/vsm/src/tests/searcher/DESC b/vsm/src/tests/searcher/DESC new file mode 100644 index 00000000000..1165ce57737 --- /dev/null +++ b/vsm/src/tests/searcher/DESC @@ -0,0 +1 @@ +Unit tests for string and numeric field searchers. Take a look at searcher.cpp for details. diff --git a/vsm/src/tests/searcher/FILES b/vsm/src/tests/searcher/FILES new file mode 100644 index 00000000000..603eb41c816 --- /dev/null +++ b/vsm/src/tests/searcher/FILES @@ -0,0 +1 @@ +searcher.cpp diff --git a/vsm/src/tests/searcher/searcher.cpp b/vsm/src/tests/searcher/searcher.cpp new file mode 100644 index 00000000000..dbf458a0c32 --- /dev/null +++ b/vsm/src/tests/searcher/searcher.cpp @@ -0,0 +1,897 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("searcher_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/query/queryterm.h> +#include <vespa/vsm/searcher/fieldsearcher.h> +#include <vespa/vsm/searcher/floatfieldsearcher.h> +#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> +#include <vespa/vsm/searcher/intfieldsearcher.h> +#include <vespa/vsm/searcher/strchrfieldsearcher.h> +#include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h> +#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h> +#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h> +#include <vespa/vsm/searcher/utf8substringsearcher.h> +#include <vespa/vsm/searcher/utf8substringsnippetmodifier.h> +#include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h> +#include <vespa/vsm/vsm/snippetmodifier.h> +#include <vespa/vsm/vsm/fieldsearchspec.h> + +using namespace document; +using search::EmptyQueryNodeResult; +using search::QueryTerm; +using search::QueryTermList; + +namespace vsm { + +template <typename T> +class Vector : public std::vector<T> +{ +public: + Vector<T>() : std::vector<T>() {} + Vector<T> & add(T v) { this->push_back(v); return *this; } +}; + +typedef Vector<size_t> Hits; +typedef Vector<std::string> StringList; +typedef Vector<Hits> HitsList; +typedef Vector<bool> BoolList; +typedef Vector<int64_t> LongList; +typedef Vector<float> FloatList; +typedef QueryTerm::FieldInfo QTFieldInfo; +typedef Vector<QTFieldInfo> FieldInfoList; + +class String +{ +private: + const std::string & _str; +public: + String(const std::string & str) : _str(str) {} + bool operator==(const String & rhs) const { + return _str == rhs._str; + } +}; + +class Query +{ +private: + void setupQuery(const StringList & terms) { + for (size_t i = 0; i < terms.size(); ++i) { + ParsedQueryTerm pqt = parseQueryTerm(terms[i]); + ParsedTerm pt = parseTerm(pqt.second); + qtv.push_back(QueryTerm(eqnr, pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second)); + } + for (size_t i = 0; i < qtv.size(); ++i) { + qtl.push_back(&qtv[i]); + } + } +public: + typedef std::pair<std::string, std::string> ParsedQueryTerm; + typedef std::pair<std::string, QueryTerm::SearchTerm> ParsedTerm; + EmptyQueryNodeResult eqnr; + std::vector<QueryTerm> qtv; + QueryTermList qtl; + Query(const StringList & terms) : eqnr(), qtv(), qtl() { + setupQuery(terms); + } + static ParsedQueryTerm parseQueryTerm(const std::string & queryTerm) { + size_t i = queryTerm.find(':'); + if (i != std::string::npos) { + return ParsedQueryTerm(queryTerm.substr(0, i), queryTerm.substr(i + 1)); + } + return ParsedQueryTerm(std::string(), queryTerm); + } + static ParsedTerm parseTerm(const std::string & term) { + if (term[0] == '*' && term[term.size() - 1] == '*') { + return std::make_pair(term.substr(1, term.size() - 2), QueryTerm::SUBSTRINGTERM); + } else if (term[0] == '*') { + return std::make_pair(term.substr(1, term.size() - 1), QueryTerm::SUFFIXTERM); + } else if (term[term.size() - 1] == '*') { + return std::make_pair(term.substr(0, term.size() - 1), QueryTerm::PREFIXTERM); + } else { + return std::make_pair(term, QueryTerm::WORD); + } + } +}; + +struct SnippetModifierSetup +{ + Query query; + UTF8SubstringSnippetModifier::SP searcher; + SharedSearcherBuf buf; + SnippetModifier modifier; + explicit SnippetModifierSetup(const StringList & terms) : + query(terms), + searcher(new UTF8SubstringSnippetModifier()), + buf(new SearcherBuf(8)), + modifier(searcher) + { + searcher->prepare(query.qtl, buf); + } +}; + +class SearcherTest : public vespalib::TestApp +{ +private: + + // helper functions + ArrayFieldValue getFieldValue(const StringList & fv); + ArrayFieldValue getFieldValue(const LongList & fv); + ArrayFieldValue getFieldValue(const FloatList & fv); + + bool assertMatchTermSuffix(const std::string & term, const std::string & word); + + /** string field searcher **/ + void assertString(StrChrFieldSearcher & fs, const std::string & term, const std::string & field, const Hits & exp) { + assertString(fs, StringList().add(term), field, HitsList().add(exp)); + } + void assertString(StrChrFieldSearcher & fs, const StringList & query, const std::string & field, const HitsList & exp) { + assertSearch(fs, query, StringFieldValue(field), exp); + } + void assertString(StrChrFieldSearcher & fs, const std::string & term, const StringList & field, const Hits & exp) { + assertString(fs, StringList().add(term), field, HitsList().add(exp)); + } + void assertString(StrChrFieldSearcher & fs, const StringList & query, const StringList & field, const HitsList & exp) { + assertSearch(fs, query, getFieldValue(field), exp); + } + + /** int field searcher **/ + void assertInt(IntFieldSearcher fs, const std::string & term, int64_t field, bool exp) { + assertInt(fs, StringList().add(term), field, BoolList().add(exp)); + } + void assertInt(IntFieldSearcher fs, const StringList & query, int64_t field, const BoolList & exp) { + assertNumeric(fs, query, LongFieldValue(field), exp); + } + void assertInt(IntFieldSearcher fs, const std::string & term, const LongList & field, const Hits & exp) { + assertInt(fs, StringList().add(term), field, HitsList().add(exp)); + } + void assertInt(IntFieldSearcher fs, const StringList & query, const LongList & field, const HitsList & exp) { + assertSearch(fs, query, getFieldValue(field), exp); + } + + /** float field searcher **/ + void assertFloat(FloatFieldSearcher fs, const std::string & term, float field, bool exp) { + assertFloat(fs, StringList().add(term), field, BoolList().add(exp)); + } + void assertFloat(FloatFieldSearcher fs, const StringList & query, float field, const BoolList & exp) { + assertNumeric(fs, query, FloatFieldValue(field), exp); + } + void assertFloat(FloatFieldSearcher fs, const std::string & term, const FloatList & field, const Hits & exp) { + assertFloat(fs, StringList().add(term), field, HitsList().add(exp)); + } + void assertFloat(FloatFieldSearcher fs, const StringList & query, const FloatList & field, const HitsList & exp) { + assertSearch(fs, query, getFieldValue(field), exp); + } + + void assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const BoolList & exp); + std::vector<QueryTerm> performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv); + void assertSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const HitsList & exp); + + /** string field searcher **/ + bool assertFieldInfo(StrChrFieldSearcher & fs, const std::string & term, const std::string & fv, const QTFieldInfo & exp) { + return assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); + } + bool assertFieldInfo(StrChrFieldSearcher & fs, const std::string & term, const StringList & fv, const QTFieldInfo & exp) { + return assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); + } + bool assertFieldInfo(StrChrFieldSearcher & fs, const StringList & query, const std::string & fv, const FieldInfoList & exp) { + return assertFieldInfo(fs, query, StringFieldValue(fv), exp); + } + bool assertFieldInfo(StrChrFieldSearcher & fs, const StringList & query, const StringList & fv, const FieldInfoList & exp) { + return assertFieldInfo(fs, query, getFieldValue(fv), exp); + } + + /** int field searcher **/ + void assertFieldInfo(IntFieldSearcher fs, const std::string & term, int64_t fv, const QTFieldInfo & exp) { + assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); + } + void assertFieldInfo(IntFieldSearcher fs, const std::string & term, const LongList & fv, const QTFieldInfo & exp) { + assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); + } + void assertFieldInfo(IntFieldSearcher fs, const StringList & query, int64_t fv, const FieldInfoList & exp) { + assertFieldInfo(fs, query, LongFieldValue(fv), exp); + } + void assertFieldInfo(IntFieldSearcher fs, const StringList & query, const LongList & fv, const FieldInfoList & exp) { + assertFieldInfo(fs, query, getFieldValue(fv), exp); + } + + /** float field searcher **/ + void assertFieldInfo(FloatFieldSearcher fs, const std::string & term, float fv, const QTFieldInfo & exp) { + assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); + } + void assertFieldInfo(FloatFieldSearcher fs, const std::string & term, const FloatList & fv, const QTFieldInfo & exp) { + assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); + } + void assertFieldInfo(FloatFieldSearcher fs, const StringList & query, float fv, const FieldInfoList & exp) { + assertFieldInfo(fs, query, FloatFieldValue(fv), exp); + } + void assertFieldInfo(FloatFieldSearcher fs, const StringList & query, const FloatList & fv, const FieldInfoList & exp) { + assertFieldInfo(fs, query, getFieldValue(fv), exp); + } + + bool assertFieldInfo(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const FieldInfoList & exp); + + /** snippet modifer searcher **/ + void assertSnippetModifier(const std::string & term, const std::string & fv, const std::string & exp) { + assertSnippetModifier(StringList().add(term), fv, exp); + } + void assertSnippetModifier(const StringList & query, const std::string & fv, const std::string & exp); + /** snippet modifier **/ + void assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, const std::string & exp); + void assertQueryTerms(const SnippetModifierManager & man, FieldIdT fId, const StringList & terms); + /** count words **/ + bool assertCountWords(size_t numWords, const std::string & field); + + // test functions + void testParseTerm(); + void testMatchTermSuffix(); + bool testStrChrFieldSearcher(StrChrFieldSearcher & fs); + void testStrChrFieldSearcher(); + bool testUTF8SubStringFieldSearcher(StrChrFieldSearcher & fs); + void testUTF8SubStringFieldSearcher(); + void testUTF8SuffixStringFieldSearcher(); + void testUTF8FlexibleStringFieldSearcher(); + void testUTF8ExactStringFieldSearcher(); + void testIntFieldSearcher(); + void testFloatFieldSearcher(); + bool testStringFieldInfo(StrChrFieldSearcher & fs); + void testSnippetModifierSearcher(); + void testSnippetModifier(); + void testFieldSearchSpec(); + void testSnippetModifierManager(); + void testStripIndexes(); + void requireThatCountWordsIsWorking(); + +public: + int Main(); +}; + +ArrayFieldValue +SearcherTest::getFieldValue(const StringList & fv) +{ + + static ArrayDataType type(*DataType::STRING); + ArrayFieldValue afv(type); + for (size_t i = 0; i < fv.size(); ++i) { + afv.add(StringFieldValue(fv[i])); + } + return afv; +} + +ArrayFieldValue +SearcherTest::getFieldValue(const LongList & fv) +{ + static ArrayDataType type(*DataType::LONG); + ArrayFieldValue afv(type); + for (size_t i = 0; i < fv.size(); ++i) { + afv.add(LongFieldValue(fv[i])); + } + return afv; +} + +ArrayFieldValue +SearcherTest::getFieldValue(const FloatList & fv) +{ + static ArrayDataType type(*DataType::FLOAT); + ArrayFieldValue afv(type); + for (size_t i = 0; i < fv.size(); ++i) { + afv.add(FloatFieldValue(fv[i])); + } + return afv; +} + +bool +SearcherTest::assertMatchTermSuffix(const std::string & term, const std::string & word) +{ + EmptyQueryNodeResult eqnr; + QueryTerm qa(eqnr, term, "index", QueryTerm::WORD); + QueryTerm qb(eqnr, word, "index", QueryTerm::WORD); + const ucs4_t * a; + size_t alen = qa.term(a); + const ucs4_t * b; + size_t blen = qb.term(b); + return UTF8StringFieldSearcherBase::matchTermSuffix(a, alen, b, blen); +} + +void +SearcherTest::assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const BoolList & exp) +{ + HitsList hl; + for (size_t i = 0; i < exp.size(); ++i) { + hl.push_back(exp[i] ? Hits().add(0) : Hits()); + } + assertSearch(fs, query, fv, hl); +} + +std::vector<QueryTerm> +SearcherTest::performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv) +{ + Query q(query); + + // prepare field searcher + SharedSearcherBuf ssb = SharedSearcherBuf(new SearcherBuf()); + fs.prepare(q.qtl, ssb); + + // setup document + SharedFieldPathMap sfim(new FieldPathMapT()); + sfim->push_back(FieldPath()); + StorageDocument doc(sfim); + doc.setFieldCount(1); + doc.init(); + doc.setField(0, document::FieldValue::UP(fv.clone())); + + fs.search(doc); + return q.qtv; +} + +void +SearcherTest::assertSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const HitsList & exp) +{ + std::vector<QueryTerm> qtv = performSearch(fs, query, fv); + EXPECT_EQUAL(qtv.size(), exp.size()); + ASSERT_TRUE(qtv.size() == exp.size()); + for (size_t i = 0; i < qtv.size(); ++i) { + const search::HitList & hl = qtv[i].getHitList(); + EXPECT_EQUAL(hl.size(), exp[i].size()); + ASSERT_TRUE(hl.size() == exp[i].size()); + for (size_t j = 0; j < hl.size(); ++j) { + EXPECT_EQUAL((size_t)hl[j].pos(), exp[i][j]); + } + } +} + +bool +SearcherTest::assertFieldInfo(FieldSearcher & fs, const StringList & query, + const FieldValue & fv, const FieldInfoList & exp) +{ + std::vector<QueryTerm> qtv = performSearch(fs, query, fv); + if (!EXPECT_EQUAL(qtv.size(), exp.size())) return false; + bool retval = true; + for (size_t i = 0; i < qtv.size(); ++i) { + if (!EXPECT_EQUAL(qtv[i].getFieldInfo(0).getHitOffset(), exp[i].getHitOffset())) retval = false; + if (!EXPECT_EQUAL(qtv[i].getFieldInfo(0).getHitCount(), exp[i].getHitCount())) retval = false; + if (!EXPECT_EQUAL(qtv[i].getFieldInfo(0).getFieldLength(), exp[i].getFieldLength())) retval = false; + } + return retval; +} + +void +SearcherTest::assertSnippetModifier(const StringList & query, const std::string & fv, const std::string & exp) +{ + UTF8SubstringSnippetModifier mod; + performSearch(mod, query, StringFieldValue(fv)); + EXPECT_EQUAL(mod.getModifiedBuf().getPos(), exp.size()); + std::string actual(mod.getModifiedBuf().getBuffer(), mod.getModifiedBuf().getPos()); + EXPECT_EQUAL(actual.size(), exp.size()); + EXPECT_EQUAL(actual, exp); +} + +void +SearcherTest::assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, const std::string & exp) +{ + FieldValue::UP mfv = setup.modifier.modify(fv); + const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(*mfv.get()); + const std::string & actual = lfv.getValue(); + EXPECT_EQUAL(actual.size(), exp.size()); + EXPECT_EQUAL(actual, exp); +} + +void +SearcherTest::assertQueryTerms(const SnippetModifierManager & man, FieldIdT fId, const StringList & terms) +{ + if (terms.size() == 0) { + ASSERT_TRUE(man.getModifiers().getModifier(fId) == NULL); + return; + } + ASSERT_TRUE(man.getModifiers().getModifier(fId) != NULL); + UTF8SubstringSnippetModifier * searcher = + (static_cast<SnippetModifier *>(man.getModifiers().getModifier(fId)))->getSearcher().get(); + EXPECT_EQUAL(searcher->getQueryTerms().size(), terms.size()); + ASSERT_TRUE(searcher->getQueryTerms().size() == terms.size()); + for (size_t i = 0; i < terms.size(); ++i) { + EXPECT_EQUAL(std::string(searcher->getQueryTerms()[i]->getTerm()), terms[i]); + } +} + +bool +SearcherTest::assertCountWords(size_t numWords, const std::string & field) +{ + FieldRef ref(field.c_str(), field.size()); + return EXPECT_EQUAL(numWords, FieldSearcher::countWords(ref)); +} + +void +SearcherTest::testParseTerm() +{ + ASSERT_TRUE(Query::parseQueryTerm("index:term").first == "index"); + ASSERT_TRUE(Query::parseQueryTerm("index:term").second == "term"); + ASSERT_TRUE(Query::parseQueryTerm("term").first == ""); + ASSERT_TRUE(Query::parseQueryTerm("term").second == "term"); + ASSERT_TRUE(Query::parseTerm("*substr*").first == "substr"); + ASSERT_TRUE(Query::parseTerm("*substr*").second == QueryTerm::SUBSTRINGTERM); + ASSERT_TRUE(Query::parseTerm("*suffix").first == "suffix"); + ASSERT_TRUE(Query::parseTerm("*suffix").second == QueryTerm::SUFFIXTERM); + ASSERT_TRUE(Query::parseTerm("prefix*").first == "prefix"); + ASSERT_TRUE(Query::parseTerm("prefix*").second == QueryTerm::PREFIXTERM); + ASSERT_TRUE(Query::parseTerm("term").first == "term"); + ASSERT_TRUE(Query::parseTerm("term").second == QueryTerm::WORD); +} + +void +SearcherTest::testMatchTermSuffix() +{ + EXPECT_EQUAL(assertMatchTermSuffix("a", "vespa"), true); + EXPECT_EQUAL(assertMatchTermSuffix("spa", "vespa"), true); + EXPECT_EQUAL(assertMatchTermSuffix("vespa", "vespa"), true); + EXPECT_EQUAL(assertMatchTermSuffix("vvespa", "vespa"), false); + EXPECT_EQUAL(assertMatchTermSuffix("fspa", "vespa"), false); + EXPECT_EQUAL(assertMatchTermSuffix("v", "vespa"), false); +} + +bool +SearcherTest::testStrChrFieldSearcher(StrChrFieldSearcher & fs) +{ + std::string field = "operators and operator overloading with utf8 char oe = \xc3\x98"; + assertString(fs, "oper", field, Hits()); + assertString(fs, "tor", field, Hits()); + assertString(fs, "oper*", field, Hits().add(0).add(2)); + assertString(fs, "and", field, Hits().add(1)); + + assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits()).add(Hits())); + assertString(fs, StringList().add("and").add("overloading"), field, HitsList().add(Hits().add(1)).add(Hits().add(3))); + + fs.setMatchType(FieldSearcher::PREFIX); + assertString(fs, "oper", field, Hits().add(0).add(2)); + assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits().add(0).add(2)).add(Hits())); + + fs.setMatchType(FieldSearcher::REGULAR); + if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false; + + { // test handling of several underscores + StringList query = StringList().add("foo").add("bar"); + HitsList exp = HitsList().add(Hits().add(0)).add(Hits().add(1)); + assertString(fs, query, "foo_bar", exp); + assertString(fs, query, "foo__bar", exp); + assertString(fs, query, "foo___bar", exp); + assertString(fs, query, "foo________bar", exp); + assertString(fs, query, "foo____________________bar", exp); + assertString(fs, query, "________________________________________foo________________________________________bar________________________________________", exp); + query = StringList().add("foo").add("thisisaveryveryverylongword"); + assertString(fs, query, "foo____________________thisisaveryveryverylongword", exp); + + assertString(fs, "bar", "foo bar", Hits().add(1)); + assertString(fs, "bar", "foo____________________bar", Hits().add(1)); + assertString(fs, "bar", "foo____________________thisisaveryveryverylongword____________________bar", Hits().add(2)); + } + return true; +} + +void +SearcherTest::testStrChrFieldSearcher() +{ + { + UTF8StrChrFieldSearcher fs(0); + EXPECT_TRUE(testStrChrFieldSearcher(fs)); + } + { + FUTF8StrChrFieldSearcher fs(0); + EXPECT_TRUE(testStrChrFieldSearcher(fs)); + } +} + +bool +SearcherTest::testUTF8SubStringFieldSearcher(StrChrFieldSearcher & fs) +{ + std::string field = "operators and operator overloading"; + assertString(fs, "rsand", field, Hits()); + assertString(fs, "ove", field, Hits().add(3)); + assertString(fs, "ing", field, Hits().add(3)); + assertString(fs, "era", field, Hits().add(0).add(2)); + assertString(fs, "a", field, Hits().add(0).add(1).add(2).add(3)); + + assertString(fs, StringList().add("dn").add("gn"), field, HitsList().add(Hits()).add(Hits())); + assertString(fs, StringList().add("ato").add("load"), field, HitsList().add(Hits().add(0).add(2)).add(Hits().add(3))); + + assertString(fs, StringList().add("aa").add("ab"), "aaaab", + HitsList().add(Hits().add(0).add(0).add(0)).add(Hits().add(0))); + + if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false; + return true; +} + +void +SearcherTest::testUTF8SubStringFieldSearcher() +{ + { + UTF8SubStringFieldSearcher fs(0); + EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); + assertString(fs, "aa", "aaaa", Hits().add(0).add(0)); + } + { + UTF8SubStringFieldSearcher fs(0); + EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); + assertString(fs, "abc", "abc bcd abc", Hits().add(0).add(2)); + fs.maxFieldLength(4); + assertString(fs, "abc", "abc bcd abc", Hits().add(0)); + } + { + UTF8SubstringSnippetModifier fs(0); + EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); + // we don't have 1 term optimization + assertString(fs, "aa", "aaaa", Hits().add(0).add(0).add(0)); + } +} + +void +SearcherTest::testUTF8SuffixStringFieldSearcher() +{ + UTF8SuffixStringFieldSearcher fs(0); + std::string field = "operators and operator overloading"; + assertString(fs, "rsand", field, Hits()); + assertString(fs, "tor", field, Hits().add(2)); + assertString(fs, "tors", field, Hits().add(0)); + + assertString(fs, StringList().add("an").add("din"), field, HitsList().add(Hits()).add(Hits())); + assertString(fs, StringList().add("nd").add("g"), field, HitsList().add(Hits().add(1)).add(Hits().add(3))); + + EXPECT_TRUE(testStringFieldInfo(fs)); +} + +void +SearcherTest::testUTF8ExactStringFieldSearcher() +{ + UTF8ExactStringFieldSearcher fs(0); + // regular + assertString(fs, "vespa", "vespa", Hits().add(0)); + assertString(fs, "vespa", "vespa vespa", Hits()); + assertString(fs, "vesp", "vespa", Hits()); + assertString(fs, "vesp*", "vespa", Hits().add(0)); +} + +void +SearcherTest::testUTF8FlexibleStringFieldSearcher() +{ + UTF8FlexibleStringFieldSearcher fs(0); + // regular + assertString(fs, "vespa", "vespa", Hits().add(0)); + assertString(fs, "vesp", "vespa", Hits()); + assertString(fs, "esp", "vespa", Hits()); + assertString(fs, "espa", "vespa", Hits()); + + // prefix + assertString(fs, "vesp*", "vespa", Hits().add(0)); + fs.setMatchType(FieldSearcher::PREFIX); + assertString(fs, "vesp", "vespa", Hits().add(0)); + + // substring + fs.setMatchType(FieldSearcher::REGULAR); + assertString(fs, "*esp*", "vespa", Hits().add(0)); + fs.setMatchType(FieldSearcher::SUBSTRING); + assertString(fs, "esp", "vespa", Hits().add(0)); + + // suffix + fs.setMatchType(FieldSearcher::REGULAR); + assertString(fs, "*espa", "vespa", Hits().add(0)); + fs.setMatchType(FieldSearcher::SUFFIX); + assertString(fs, "espa", "vespa", Hits().add(0)); + + fs.setMatchType(FieldSearcher::REGULAR); + EXPECT_TRUE(testStringFieldInfo(fs)); +} + +void +SearcherTest::testIntFieldSearcher() +{ + IntFieldSearcher fs; + assertInt(fs, "10", 10, true); + assertInt(fs, "9", 10, false); + assertInt(fs, ">9", 10, true); + assertInt(fs, ">9", 9, false); + assertInt(fs, "<11", 10, true); + assertInt(fs, "<11", 11, false); + assertInt(fs, "-10", -10, true); + assertInt(fs, "-9", -10, false); + assertInt(fs, "a", 10, false); + assertInt(fs, "[-5;5]", -5, true); + assertInt(fs, "[-5;5]", 0, true); + assertInt(fs, "[-5;5]", 5, true); + assertInt(fs, "[-5;5]", -6, false); + assertInt(fs, "[-5;5]", 6, false); + + assertInt(fs, StringList().add("9").add("11"), 10, BoolList().add(false).add(false)); + assertInt(fs, StringList().add("9").add("10"), 10, BoolList().add(false).add(true)); + assertInt(fs, StringList().add("10").add(">9"), 10, BoolList().add(true).add(true)); + + assertInt(fs, "10", LongList().add(10).add(20).add(10).add(30), Hits().add(0).add(2)); + assertInt(fs, StringList().add("10").add("20"), LongList().add(10).add(20).add(10).add(30), + HitsList().add(Hits().add(0).add(2)).add(Hits().add(1))); + + assertFieldInfo(fs, "10", 10, QTFieldInfo(0, 1, 1)); + assertFieldInfo(fs, "10", LongList().add(10).add(20).add(10).add(30), QTFieldInfo(0, 2, 4)); + assertFieldInfo(fs, StringList().add("10").add("20"), 10, + FieldInfoList().add(QTFieldInfo(0, 1, 1)).add(QTFieldInfo(0, 0, 1))); + assertFieldInfo(fs, StringList().add("10").add("20"), LongList().add(10).add(20).add(10).add(30), + FieldInfoList().add(QTFieldInfo(0, 2, 4)).add(QTFieldInfo(0, 1, 4))); +} + +void +SearcherTest::testFloatFieldSearcher() +{ + FloatFieldSearcher fs; + assertFloat(fs, "10", 10, true); + assertFloat(fs, "10.5", 10.5, true); + assertFloat(fs, "-10.5", -10.5, true); + assertFloat(fs, ">10.5", 10.6, true); + assertFloat(fs, ">10.5", 10.5, false); + assertFloat(fs, "<10.5", 10.4, true); + assertFloat(fs, "<10.5", 10.5, false); + assertFloat(fs, "10.4", 10.5, false); + assertFloat(fs, "-10.4", -10.5, false); + assertFloat(fs, "a", 10.5, false); + assertFloat(fs, "[-5.5;5.5]", -5.5, true); + assertFloat(fs, "[-5.5;5.5]", 0, true); + assertFloat(fs, "[-5.5;5.5]", 5.5, true); + assertFloat(fs, "[-5.5;5.5]", -5.6, false); + assertFloat(fs, "[-5.5;5.5]", 5.6, false); + + assertFloat(fs, StringList().add("10").add("11"), 10.5, BoolList().add(false).add(false)); + assertFloat(fs, StringList().add("10").add("10.5"), 10.5, BoolList().add(false).add(true)); + assertFloat(fs, StringList().add(">10.4").add("10.5"), 10.5, BoolList().add(true).add(true)); + + assertFloat(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), Hits().add(0).add(2)); + assertFloat(fs, StringList().add("10.5").add("20.5"), FloatList().add(10.5).add(20.5).add(10.5).add(30.5), + HitsList().add(Hits().add(0).add(2)).add(Hits().add(1))); + + assertFieldInfo(fs, "10.5", 10.5, QTFieldInfo(0, 1, 1)); + assertFieldInfo(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), QTFieldInfo(0, 2, 4)); + assertFieldInfo(fs, StringList().add("10.5").add("20.5"), 10.5, + FieldInfoList().add(QTFieldInfo(0, 1, 1)).add(QTFieldInfo(0, 0, 1))); + assertFieldInfo(fs, StringList().add("10.5").add("20.5"), FloatList().add(10.5).add(20.5).add(10.5).add(30.5), + FieldInfoList().add(QTFieldInfo(0, 2, 4)).add(QTFieldInfo(0, 1, 4))); +} + +bool +SearcherTest::testStringFieldInfo(StrChrFieldSearcher & fs) +{ + assertString(fs, "foo", StringList().add("foo bar baz").add("foo bar").add("baz foo"), Hits().add(0).add(3).add(6)); + assertString(fs, StringList().add("foo").add("bar"), StringList().add("foo bar baz").add("foo bar").add("baz foo"), + HitsList().add(Hits().add(0).add(3).add(6)).add(Hits().add(1).add(4))); + + bool retval = true; + if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo", QTFieldInfo(0, 1, 1)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, "bar", "foo", QTFieldInfo(0, 0, 1)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, "bar", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, "baz", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, "qux", "foo bar baz", QTFieldInfo(0, 0, 3)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo foo foo", QTFieldInfo(0, 3, 3)))) retval = false; + // query term size > last term size + if (!EXPECT_TRUE(assertFieldInfo(fs, "runner", "Road Runner Disco", QTFieldInfo(0, 1, 3)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("roadrun").add("runner"), "Road Runner Disco", + FieldInfoList().add(QTFieldInfo(0, 0, 3)).add(QTFieldInfo(0, 1, 3))))) retval = false; + // multiple terms + if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", StringList().add("foo bar baz").add("foo bar"), + QTFieldInfo(0, 2, 5)))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("foo").add("baz"), "foo bar baz", + FieldInfoList().add(QTFieldInfo(0, 1, 3)).add(QTFieldInfo(0, 1, 3))))) retval = false; + if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("foo").add("baz"), StringList().add("foo bar baz").add("foo bar"), + FieldInfoList().add(QTFieldInfo(0, 2, 5)).add(QTFieldInfo(0, 1, 5))))) retval = false; + return retval; +} + +void +SearcherTest::testSnippetModifierSearcher() +{ + // ascii + assertSnippetModifier("f", "foo", "\x1F""f\x1Foo"); + assertSnippetModifier("o", "foo", "f\x1Fo\x1F\x1Fo\x1F"); + assertSnippetModifier("r", "bar", "ba\x1Fr\x1F"); + assertSnippetModifier("foo", "foo foo", "\x1F""foo\x1F \x1F""foo\x1F"); + assertSnippetModifier("aa", "aaaaaa", "\x1F""aa\x1F\x1F""aa\x1F\x1F""aa\x1F"); + assertSnippetModifier("ab", "abcd\x1F""efgh", "\x1F""ab\x1F""cd\x1F""efgh"); + assertSnippetModifier("ef", "abcd\x1F""efgh", "abcd\x1F\x1F""ef\x1Fgh"); + assertSnippetModifier("fg", "abcd\x1F""efgh", "abcd\x1F""e\x1F""fg\x1Fh"); + // the separator overlapping the match is skipped + assertSnippetModifier("cdef", "abcd\x1F""efgh", "ab\x1F""cdef\x1F""gh"); + // no hits + assertSnippetModifier("bb", "aaaaaa", "aaaaaa"); + + + // multiple query terms + assertSnippetModifier(StringList().add("ab").add("cd"), "abcd", "\x1F""ab\x1F\x1F""cd\x1F"); + // when we have overlap we only get the first match + assertSnippetModifier(StringList().add("ab").add("bc"), "abcd", "\x1F""ab\x1F""cd"); + assertSnippetModifier(StringList().add("bc").add("ab"), "abcd", "\x1F""ab\x1F""cd"); + // the separator overlapping the match is skipped + assertSnippetModifier(StringList().add("de").add("ef"), "abcd\x1F""efgh", "abc\x1F""de\x1F""fgh"); + + // cjk + assertSnippetModifier("\xe7\x9f\xb3", "\xe7\x9f\xb3\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8", + "\x1f\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8"); + assertSnippetModifier("\xe6\x98\x8e\xe5\x87\xb1", "\xe7\x9f\xb3\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8", + "\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\x1f\xe5\x9c\xa8"); + // the separator overlapping the match is skipped + assertSnippetModifier("\xe6\x98\x8e\xe5\x87\xb1", "\xe7\x9f\xb3\xe6\x98\x8e\x1f\xe5\x87\xb1\xe5\x9c\xa8", + "\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\x1f\xe5\x9c\xa8"); + + { // check that resizing works + UTF8SubstringSnippetModifier mod; + EXPECT_EQUAL(mod.getModifiedBuf().getLength(), 32u); + EXPECT_EQUAL(mod.getModifiedBuf().getPos(), 0u); + performSearch(mod, StringList().add("a"), StringFieldValue("aaaaaaaaaaaaaaaa")); + EXPECT_EQUAL(mod.getModifiedBuf().getPos(), 16u + 2 * 16u); + EXPECT_TRUE(mod.getModifiedBuf().getLength() >= mod.getModifiedBuf().getPos()); + } +} + +void +SearcherTest::testSnippetModifier() +{ + { // string field value + SnippetModifierSetup sms(StringList().add("ab")); + // multiple invokations + assertSnippetModifier(sms, StringFieldValue("ab"), "\x1F""ab\x1F"); + assertSnippetModifier(sms, StringFieldValue("xxxxabxxxxabxxxx"), "xxxx\x1F""ab\x1Fxxxx\x1F""ab\x1Fxxxx"); + assertSnippetModifier(sms, StringFieldValue("xxabxx"), "xx\x1F""ab\x1Fxx"); + } + { // collection field value + SnippetModifierSetup sms(StringList().add("ab")); + // multiple invokations + assertSnippetModifier(sms, getFieldValue(StringList().add("ab")), "\x1F""ab\x1F"); + assertSnippetModifier(sms, getFieldValue(StringList().add("xxabxx")), "xx\x1F""ab\x1Fxx"); + assertSnippetModifier(sms, getFieldValue(StringList().add("ab").add("xxabxx").add("xxxxxx")), + "\x1F""ab\x1F\x1E""xx\x1F""ab\x1F""xx\x1E""xxxxxx"); + assertSnippetModifier(sms, getFieldValue(StringList().add("cd").add("ef").add("gh")), + "cd\x1E""ef\x1E""gh"); + } + { // check that resizing works + SnippetModifierSetup sms(StringList().add("a")); + EXPECT_EQUAL(sms.modifier.getValueBuf().getLength(), 32u); + EXPECT_EQUAL(sms.modifier.getValueBuf().getPos(), 0u); + sms.modifier.modify(StringFieldValue("aaaaaaaaaaaaaaaa")); + EXPECT_EQUAL(sms.modifier.getValueBuf().getPos(), 16u + 2 * 16u); + EXPECT_TRUE(sms.modifier.getValueBuf().getLength() >= sms.modifier.getValueBuf().getPos()); + } +} + +void +SearcherTest::testFieldSearchSpec() +{ + { + FieldSearchSpec f; + EXPECT_FALSE(f.valid()); + EXPECT_EQUAL(0u, f.id()); + EXPECT_EQUAL("", f.name()); + EXPECT_EQUAL(0x100000u, f.maxLength()); + } + { + FieldSearchSpec f(7, "f0", VsmfieldsConfig::Fieldspec::AUTOUTF8, "substring", 789); + EXPECT_TRUE(f.valid()); + EXPECT_EQUAL(7u, f.id()); + EXPECT_EQUAL("f0", f.name()); + EXPECT_EQUAL(789u, f.maxLength()); + EXPECT_EQUAL(789u, f.searcher().maxFieldLength()); + } +} + +void +SearcherTest::testSnippetModifierManager() +{ + FieldSearchSpecMapT specMap; + specMap[0] = FieldSearchSpec(0, "f0", VsmfieldsConfig::Fieldspec::AUTOUTF8, "substring", 1000); + specMap[1] = FieldSearchSpec(1, "f1", VsmfieldsConfig::Fieldspec::AUTOUTF8, "", 1000); + IndexFieldMapT indexMap; + indexMap["i0"].push_back(0); + indexMap["i1"].push_back(1); + indexMap["i2"].push_back(0); + indexMap["i2"].push_back(1); + + { + SnippetModifierManager man; + Query query(StringList().add("i0:foo")); + man.setup(query.qtl, specMap, indexMap); + assertQueryTerms(man, 0, StringList().add("foo")); + assertQueryTerms(man, 1, StringList()); + } + { + SnippetModifierManager man; + Query query(StringList().add("i1:foo")); + man.setup(query.qtl, specMap, indexMap); + assertQueryTerms(man, 0, StringList()); + assertQueryTerms(man, 1, StringList()); + } + { + SnippetModifierManager man; + Query query(StringList().add("i1:*foo*")); + man.setup(query.qtl, specMap, indexMap); + assertQueryTerms(man, 0, StringList()); + assertQueryTerms(man, 1, StringList().add("foo")); + } + { + SnippetModifierManager man; + Query query(StringList().add("i2:foo").add("i2:*bar*")); + man.setup(query.qtl, specMap, indexMap); + assertQueryTerms(man, 0, StringList().add("foo").add("bar")); + assertQueryTerms(man, 1, StringList().add("bar")); + } + { // check buffer sizes + SnippetModifierManager man; + Query query(StringList().add("i2:foo").add("i2:*bar*")); + man.setup(query.qtl, specMap, indexMap); + { + SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(0)); + UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get(); + EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u); + EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u); + } + { + SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(1)); + UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get(); + EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u); + EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u); + } + } +} + +void +SearcherTest::testStripIndexes() +{ + EXPECT_EQUAL("f", FieldSearchSpecMap::stripNonFields("f")); + EXPECT_EQUAL("f", FieldSearchSpecMap::stripNonFields("f[0]")); + EXPECT_EQUAL("f[a]", FieldSearchSpecMap::stripNonFields("f[a]")); + + EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{a}")); + EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{a0}")); + EXPECT_EQUAL("f{a 0}", FieldSearchSpecMap::stripNonFields("f{a 0}")); + EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{\"a 0\"}")); +} + +void +SearcherTest::requireThatCountWordsIsWorking() +{ + EXPECT_TRUE(assertCountWords(0, "")); + EXPECT_TRUE(assertCountWords(0, "?")); + EXPECT_TRUE(assertCountWords(1, "foo")); + EXPECT_TRUE(assertCountWords(2, "foo bar")); + EXPECT_TRUE(assertCountWords(2, "? foo bar")); + EXPECT_TRUE(assertCountWords(2, "foo bar ?")); + + // check that 'a' is counted as 1 word + UTF8StrChrFieldSearcher fs(0); + StringList field = StringList().add("a").add("aa bb cc"); + assertString(fs, "bb", field, Hits().add(2)); + assertString(fs, StringList().add("bb").add("not"), field, HitsList().add(Hits().add(2)).add(Hits())); +} + +int +SearcherTest::Main() +{ + TEST_INIT("searcher_test"); + + testFieldSearchSpec(); + testParseTerm(); + testMatchTermSuffix(); + testStrChrFieldSearcher(); + testUTF8SubStringFieldSearcher(); + testUTF8SuffixStringFieldSearcher(); + testUTF8FlexibleStringFieldSearcher(); + testUTF8ExactStringFieldSearcher(); + testIntFieldSearcher(); + testFloatFieldSearcher(); + + testSnippetModifierSearcher(); + testSnippetModifier(); + testSnippetModifierManager(); + testStripIndexes(); + requireThatCountWordsIsWorking(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(vsm::SearcherTest); + diff --git a/vsm/src/tests/textutil/.gitignore b/vsm/src/tests/textutil/.gitignore new file mode 100644 index 00000000000..1103f79800a --- /dev/null +++ b/vsm/src/tests/textutil/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +textutil_test +vsm_textutil_test_app diff --git a/vsm/src/tests/textutil/CMakeLists.txt b/vsm/src/tests/textutil/CMakeLists.txt new file mode 100644 index 00000000000..c3169a842f0 --- /dev/null +++ b/vsm/src/tests/textutil/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vsm_textutil_test_app + SOURCES + textutil.cpp + DEPENDS + vsm +) +vespa_add_test(NAME vsm_textutil_test_app COMMAND vsm_textutil_test_app) diff --git a/vsm/src/tests/textutil/DESC b/vsm/src/tests/textutil/DESC new file mode 100644 index 00000000000..e1a0220f550 --- /dev/null +++ b/vsm/src/tests/textutil/DESC @@ -0,0 +1 @@ +Tests of text utils used during searching. Take a look at textutil.cpp for details. diff --git a/vsm/src/tests/textutil/FILES b/vsm/src/tests/textutil/FILES new file mode 100644 index 00000000000..f1b37f6aaec --- /dev/null +++ b/vsm/src/tests/textutil/FILES @@ -0,0 +1 @@ +textutil.cpp diff --git a/vsm/src/tests/textutil/textutil.cpp b/vsm/src/tests/textutil/textutil.cpp new file mode 100644 index 00000000000..ba6a276eb49 --- /dev/null +++ b/vsm/src/tests/textutil/textutil.cpp @@ -0,0 +1,278 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("textutil_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/fastlib/text/unicodeutil.h> +#include <vespa/searchlib/query/base.h> +#include <vespa/vsm/searcher/fold.h> +#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> +#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h> + +using search::byte; // unsigned char + +namespace vsm { + +template <typename T> +class Vector : public std::vector<T> +{ +public: + Vector<T>() : std::vector<T>() {} + Vector<T> & a(T v) { this->push_back(v); return *this; } +}; + +typedef Vector<ucs4_t> UCS4V; +typedef Vector<size_t> SizeV; +typedef UTF8StringFieldSearcherBase SFSB; +typedef FUTF8StrChrFieldSearcher FSFS; + +class TextUtilTest : public vespalib::TestApp +{ +private: + ucs4_t getUTF8Char(const char * src); + template <typename BW, bool OFF> + void assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets); + void assertAnsiFold(const std::string & toFold, const std::string & exp); + void assertAnsiFold(char c, char exp); + void assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp); + void assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded = 16); + + template <typename BW, bool OFF> + void testSkipSeparators(); + void testSkipSeparators(); + void testSeparatorCharacter(); + void testAnsiFold(); + void test_lfoldua(); + void test_sse2_foldua(); + +public: + int Main(); +}; + +ucs4_t +TextUtilTest::getUTF8Char(const char * src) +{ + ucs4_t retval = Fast_UnicodeUtil::GetUTF8Char(src); + ASSERT_TRUE(retval != Fast_UnicodeUtil::_BadUTF8Char); + return retval; +} + +template <typename BW, bool OFF> +void +TextUtilTest::assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets) +{ + const byte * srcbuf = reinterpret_cast<const byte *>(input); + ucs4_t dstbuf[len]; + size_t offsets[len]; + UTF8StrChrFieldSearcher fs; + BW bw(dstbuf, offsets); + size_t dstlen = fs.skipSeparators(srcbuf, len, bw); + EXPECT_EQUAL(dstlen, expdstbuf.size()); + ASSERT_TRUE(dstlen == expdstbuf.size()); + for (size_t i = 0; i < dstlen; ++i) { + EXPECT_EQUAL(dstbuf[i], expdstbuf[i]); + if (OFF) { + EXPECT_EQUAL(offsets[i], expoffsets[i]); + } + } +} + +void +TextUtilTest::assertAnsiFold(const std::string & toFold, const std::string & exp) +{ + char folded[256]; + EXPECT_TRUE(FSFS::ansiFold(toFold.c_str(), toFold.size(), folded)); + EXPECT_EQUAL(std::string(folded, toFold.size()), exp); +} + +void +TextUtilTest::assertAnsiFold(char c, char exp) +{ + char folded; + EXPECT_TRUE(FSFS::ansiFold(&c, 1, &folded)); + EXPECT_EQUAL((int32_t)folded, (int32_t)exp); +} + +void +TextUtilTest::assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp) +{ + char folded[256]; + size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10); + const unsigned char * toFoldOrg = reinterpret_cast<const unsigned char *>(toFold.c_str()); + const unsigned char * retval = + sse2_foldua(toFoldOrg, toFold.size(), reinterpret_cast<unsigned char *>(folded + alignedStart)); + EXPECT_EQUAL((size_t)(retval - toFoldOrg), charFolded); + EXPECT_EQUAL(std::string(folded + alignedStart, charFolded), exp); +} + +void +TextUtilTest::assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded) +{ + unsigned char toFold[16]; + memset(toFold, c, 16); + unsigned char folded[32]; + size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10); + const unsigned char * retval = sse2_foldua(toFold, 16, folded + alignedStart); + EXPECT_EQUAL((size_t)(retval - toFold), charFolded); + for (size_t i = 0; i < charFolded; ++i) { + EXPECT_EQUAL((int32_t)folded[i + alignedStart], (int32_t)exp); + } +} + +template <typename BW, bool OFF> +void +TextUtilTest::testSkipSeparators() +{ + // ascii characters + assertSkipSeparators<BW, OFF>("foo", 3, UCS4V().a('f').a('o').a('o'), SizeV().a(0).a(1).a(2)); + assertSkipSeparators<BW, OFF>("f\x1Fo", 3, UCS4V().a('f').a('o'), SizeV().a(0).a(2)); + assertSkipSeparators<BW, OFF>("f\no", 3, UCS4V().a('f').a('\n').a('o'), SizeV().a(0).a(1).a(2)); + assertSkipSeparators<BW, OFF>("f\to", 3, UCS4V().a('f').a('\t').a('o'), SizeV().a(0).a(1).a(2)); + + // utf8 char + assertSkipSeparators<BW, OFF>("\xC2\x80\x66", 3, UCS4V().a(getUTF8Char("\xC2\x80")).a('f'), + SizeV().a(0).a(2)); + assertSkipSeparators<BW, OFF>("\xE0\xA0\x80\x66", 4, UCS4V().a(getUTF8Char("\xE0\xA0\x80")).a('f'), + SizeV().a(0).a(3)); + assertSkipSeparators<BW, OFF>("\xF0\x90\x80\x80\x66", 5, UCS4V().a(getUTF8Char("\xF0\x90\x80\x80")).a('f'), + SizeV().a(0).a(4)); + + // replacement string (sharp s -> ss) + assertSkipSeparators<BW, OFF>("\xC3\x9F\x66\xC3\x9F", 5, UCS4V().a('s').a('s').a('f').a('s').a('s'), + SizeV().a(0).a(0).a(2).a(3).a(3)); +} + +void +TextUtilTest::testSkipSeparators() +{ + Fast_NormalizeWordFolder::Setup(Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION); + + testSkipSeparators<SFSB::BufferWrapper, false>(); + testSkipSeparators<SFSB::OffsetWrapper, true>(); +} + +void +TextUtilTest::testSeparatorCharacter() +{ + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x00')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x01')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x02')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x03')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x04')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x05')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x06')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x07')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x08')); + EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x09')); // '\t' + EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x0a')); // '\n' + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0b')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0c')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0d')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0e')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0f')); + + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x10')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x11')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x12')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x13')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x14')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x15')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x16')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x17')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x18')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x19')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1a')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1b')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1c')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1d')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1e')); + EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1f')); + + EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x20')); // space +} + +void +TextUtilTest::testAnsiFold() +{ + FieldSearcher::init(); + assertAnsiFold("", ""); + assertAnsiFold("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"); + assertAnsiFold("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"); + assertAnsiFold("0123456789", "0123456789"); + for (int i = 0; i < 128; ++i) { + if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) { + assertAnsiFold(i, i); + } else if (i >= 'A' && i <= 'Z') { + assertAnsiFold(i, i + 32); + } else { + assertAnsiFold(i, 0); + } + } + + // non-ascii is ignored + for (int i = 128; i < 256; ++i) { + char toFold = i; + char folded; + EXPECT_TRUE(!FSFS::ansiFold(&toFold, 1, &folded)); + } +} + +void +TextUtilTest::test_lfoldua() +{ + FieldSearcher::init(); + char folded[256]; + size_t alignedStart = 0; + const char * toFold = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + size_t len = strlen(toFold); + EXPECT_TRUE(FSFS::lfoldua(toFold, len, folded, alignedStart)); + EXPECT_EQUAL(std::string(folded + alignedStart, len), "abcdefghijklmnopqrstuvwxyz"); +} + +void +TextUtilTest::test_sse2_foldua() +{ + assert_sse2_foldua("", 0, ""); + assert_sse2_foldua("ABCD", 0, ""); + assert_sse2_foldua("ABCDEFGHIJKLMNO", 0, ""); + assert_sse2_foldua("ABCDEFGHIJKLMNOP", 16, "abcdefghijklmnop"); + assert_sse2_foldua("ABCDEFGHIJKLMNOPQ", 16, "abcdefghijklmnop"); + assert_sse2_foldua("KLMNOPQRSTUVWXYZ", 16, "klmnopqrstuvwxyz"); + assert_sse2_foldua("abcdefghijklmnop", 16, "abcdefghijklmnop"); + assert_sse2_foldua("klmnopqrstuvwxyz", 16, "klmnopqrstuvwxyz"); + assert_sse2_foldua("0123456789abcdef", 16, "0123456789abcdef"); + + for (int i = 0; i < 128; ++i) { + if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) { + assert_sse2_foldua(i, i); + } else if (i >= 'A' && i <= 'Z') { + assert_sse2_foldua(i, i + 32); + } else { + assert_sse2_foldua(i, 0); + } + } + + // non-ascii is ignored + for (int i = 128; i < 256; ++i) { + assert_sse2_foldua(i, '?', 0); + } +} + +int +TextUtilTest::Main() +{ + TEST_INIT("textutil_test"); + + testSkipSeparators(); + testSeparatorCharacter(); + testAnsiFold(); + test_lfoldua(); + test_sse2_foldua(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(vsm::TextUtilTest); diff --git a/vsm/src/tests/utilapps/.gitignore b/vsm/src/tests/utilapps/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/vsm/src/tests/utilapps/.gitignore |