summaryrefslogtreecommitdiffstats
path: root/vsm/src/tests
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /vsm/src/tests
Publish
Diffstat (limited to 'vsm/src/tests')
-rw-r--r--vsm/src/tests/charbuffer/.gitignore4
-rw-r--r--vsm/src/tests/charbuffer/CMakeLists.txt8
-rw-r--r--vsm/src/tests/charbuffer/DESC1
-rw-r--r--vsm/src/tests/charbuffer/FILES1
-rw-r--r--vsm/src/tests/charbuffer/charbuffer.cpp83
-rw-r--r--vsm/src/tests/config/mail.cfg116
-rw-r--r--vsm/src/tests/config/vsm.cfg3
-rw-r--r--vsm/src/tests/config/vsmfields.cfg297
-rwxr-xr-xvsm/src/tests/create-test.sh73
-rw-r--r--vsm/src/tests/docsum/.gitignore4
-rw-r--r--vsm/src/tests/docsum/CMakeLists.txt8
-rw-r--r--vsm/src/tests/docsum/DESC1
-rw-r--r--vsm/src/tests/docsum/FILES1
-rw-r--r--vsm/src/tests/docsum/docsum.cpp296
-rw-r--r--vsm/src/tests/document/.gitignore4
-rw-r--r--vsm/src/tests/document/CMakeLists.txt8
-rw-r--r--vsm/src/tests/document/DESC1
-rw-r--r--vsm/src/tests/document/FILES1
-rw-r--r--vsm/src/tests/document/document.cpp137
-rw-r--r--vsm/src/tests/searcher/.gitignore4
-rw-r--r--vsm/src/tests/searcher/CMakeLists.txt8
-rw-r--r--vsm/src/tests/searcher/DESC1
-rw-r--r--vsm/src/tests/searcher/FILES1
-rw-r--r--vsm/src/tests/searcher/searcher.cpp897
-rw-r--r--vsm/src/tests/textutil/.gitignore4
-rw-r--r--vsm/src/tests/textutil/CMakeLists.txt8
-rw-r--r--vsm/src/tests/textutil/DESC1
-rw-r--r--vsm/src/tests/textutil/FILES1
-rw-r--r--vsm/src/tests/textutil/textutil.cpp278
-rw-r--r--vsm/src/tests/utilapps/.gitignore0
30 files changed, 2250 insertions, 0 deletions
diff --git a/vsm/src/tests/charbuffer/.gitignore b/vsm/src/tests/charbuffer/.gitignore
new file mode 100644
index 00000000000..2c980038fb5
--- /dev/null
+++ b/vsm/src/tests/charbuffer/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+charbuffer_test
+vsm_charbuffer_test_app
diff --git a/vsm/src/tests/charbuffer/CMakeLists.txt b/vsm/src/tests/charbuffer/CMakeLists.txt
new file mode 100644
index 00000000000..38d1f519714
--- /dev/null
+++ b/vsm/src/tests/charbuffer/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vsm_charbuffer_test_app
+ SOURCES
+ charbuffer.cpp
+ DEPENDS
+ vsm
+)
+vespa_add_test(NAME vsm_charbuffer_test_app COMMAND vsm_charbuffer_test_app)
diff --git a/vsm/src/tests/charbuffer/DESC b/vsm/src/tests/charbuffer/DESC
new file mode 100644
index 00000000000..2f1758f837e
--- /dev/null
+++ b/vsm/src/tests/charbuffer/DESC
@@ -0,0 +1 @@
+charbuffer test. Take a look at charbuffer.cpp for details.
diff --git a/vsm/src/tests/charbuffer/FILES b/vsm/src/tests/charbuffer/FILES
new file mode 100644
index 00000000000..ef12614a361
--- /dev/null
+++ b/vsm/src/tests/charbuffer/FILES
@@ -0,0 +1 @@
+charbuffer.cpp
diff --git a/vsm/src/tests/charbuffer/charbuffer.cpp b/vsm/src/tests/charbuffer/charbuffer.cpp
new file mode 100644
index 00000000000..b285005de42
--- /dev/null
+++ b/vsm/src/tests/charbuffer/charbuffer.cpp
@@ -0,0 +1,83 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("charbuffer_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/vsm/common/charbuffer.h>
+
+namespace vsm {
+
+class CharBufferTest : public vespalib::TestApp
+{
+private:
+ void test();
+public:
+ int Main();
+};
+
+void
+CharBufferTest::test()
+{
+ { // empty
+ CharBuffer buf;
+ EXPECT_EQUAL(buf.getLength(), 0u);
+ EXPECT_EQUAL(buf.getPos(), 0u);
+ EXPECT_EQUAL(buf.getRemaining(), 0u);
+ }
+ { // explicit length
+ CharBuffer buf(8);
+ EXPECT_EQUAL(buf.getLength(), 8u);
+ EXPECT_EQUAL(buf.getPos(), 0u);
+ EXPECT_EQUAL(buf.getRemaining(), 8u);
+ }
+ { // resize
+ CharBuffer buf(8);
+ EXPECT_EQUAL(buf.getLength(), 8u);
+ buf.resize(16);
+ EXPECT_EQUAL(buf.getLength(), 16u);
+ buf.resize(8);
+ EXPECT_EQUAL(buf.getLength(), 16u);
+ }
+ { // put with triggered resize
+ CharBuffer buf(8);
+ buf.put("123456", 6);
+ EXPECT_EQUAL(buf.getLength(), 8u);
+ EXPECT_EQUAL(buf.getPos(), 6u);
+ EXPECT_EQUAL(buf.getRemaining(), 2u);
+ EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456");
+ buf.put("789", 3);
+ EXPECT_EQUAL(buf.getLength(), 12u);
+ EXPECT_EQUAL(buf.getPos(), 9u);
+ EXPECT_EQUAL(buf.getRemaining(), 3u);
+ EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456789");
+ buf.put('a');
+ EXPECT_EQUAL(buf.getLength(), 12u);
+ EXPECT_EQUAL(buf.getPos(), 10u);
+ EXPECT_EQUAL(buf.getRemaining(), 2u);
+ EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456789a");
+ buf.reset();
+ EXPECT_EQUAL(buf.getLength(), 12u);
+ EXPECT_EQUAL(buf.getPos(), 0u);
+ EXPECT_EQUAL(buf.getRemaining(), 12u);
+ buf.put("bcd", 3);
+ EXPECT_EQUAL(buf.getLength(), 12u);
+ EXPECT_EQUAL(buf.getPos(), 3u);
+ EXPECT_EQUAL(buf.getRemaining(), 9u);
+ EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "bcd");
+ }
+}
+
+int
+CharBufferTest::Main()
+{
+ TEST_INIT("charbuffer_test");
+
+ test();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(vsm::CharBufferTest);
diff --git a/vsm/src/tests/config/mail.cfg b/vsm/src/tests/config/mail.cfg
new file mode 100644
index 00000000000..ce830beac23
--- /dev/null
+++ b/vsm/src/tests/config/mail.cfg
@@ -0,0 +1,116 @@
+datatype[2]
+datatype[0].id 1012
+datatype[0].arraytype[1]
+datatype[0].arraytype[0].datatype 12
+datatype[1].id 1013
+datatype[1].arraytype[1]
+datatype[1].arraytype[0].datatype 13
+documenttype[1]
+documenttype[0].name mail
+documenttype[0].version 0
+documenttype[0].inherits[0]
+documenttype[0].field[26]
+documenttype[0].field[0].name mailid
+documenttype[0].field[0].id 2
+documenttype[0].field[0].header true
+documenttype[0].field[0].datatype 2
+documenttype[0].field[1].name date
+documenttype[0].field[1].id 3
+documenttype[0].field[1].header true
+documenttype[0].field[1].datatype 0
+documenttype[0].field[2].name from
+documenttype[0].field[2].id 4
+documenttype[0].field[2].header true
+documenttype[0].field[2].datatype 12
+documenttype[0].field[3].name replyto
+documenttype[0].field[3].id 5
+documenttype[0].field[3].header true
+documenttype[0].field[3].datatype 12
+documenttype[0].field[4].name to
+documenttype[0].field[4].id 6
+documenttype[0].field[4].header true
+documenttype[0].field[4].datatype 12
+documenttype[0].field[5].name cc
+documenttype[0].field[5].id 7
+documenttype[0].field[5].header true
+documenttype[0].field[5].datatype 12
+documenttype[0].field[6].name bcc
+documenttype[0].field[6].id 8
+documenttype[0].field[6].header true
+documenttype[0].field[6].datatype 12
+documenttype[0].field[7].name subject
+documenttype[0].field[7].id 9
+documenttype[0].field[7].header true
+documenttype[0].field[7].datatype 12
+documenttype[0].field[8].name body
+documenttype[0].field[8].id 10
+documenttype[0].field[8].header false
+documenttype[0].field[8].datatype 12
+documenttype[0].field[9].name attachmentcount
+documenttype[0].field[9].id 11
+documenttype[0].field[9].header false
+documenttype[0].field[9].datatype 0
+documenttype[0].field[10].name attachmentpartids
+documenttype[0].field[10].id 12
+documenttype[0].field[10].header false
+documenttype[0].field[10].datatype 2
+documenttype[0].field[11].name attachmentsizes
+documenttype[0].field[11].id 13
+documenttype[0].field[11].header false
+documenttype[0].field[11].datatype 2
+documenttype[0].field[12].name attachmentnames
+documenttype[0].field[12].id 14
+documenttype[0].field[12].header false
+documenttype[0].field[12].datatype 2
+documenttype[0].field[13].name attachmenttypes
+documenttype[0].field[13].id 15
+documenttype[0].field[13].header false
+documenttype[0].field[13].datatype 2
+documenttype[0].field[14].name attachmentlanguages
+documenttype[0].field[14].id 16
+documenttype[0].field[14].header false
+documenttype[0].field[14].datatype 2
+documenttype[0].field[15].name attachmentcontent
+documenttype[0].field[15].id 17
+documenttype[0].field[15].header false
+documenttype[0].field[15].datatype 2
+documenttype[0].field[16].name bodylanguage
+documenttype[0].field[16].id 18
+documenttype[0].field[16].header false
+documenttype[0].field[16].datatype 2
+documenttype[0].field[17].name bodyencoding
+documenttype[0].field[17].id 19
+documenttype[0].field[17].header false
+documenttype[0].field[17].datatype 2
+documenttype[0].field[18].name collectionid
+documenttype[0].field[18].id 20
+documenttype[0].field[18].header true
+documenttype[0].field[18].datatype 4
+documenttype[0].field[19].name content
+documenttype[0].field[19].id 21
+documenttype[0].field[19].header true
+documenttype[0].field[19].datatype 12
+documenttype[0].field[20].name bodymeta
+documenttype[0].field[20].id 50027053
+documenttype[0].field[20].header false
+documenttype[0].field[20].datatype 13
+documenttype[0].field[21].name attachments
+documenttype[0].field[21].id 1081629685
+documenttype[0].field[21].header false
+documenttype[0].field[21].datatype 1012
+documenttype[0].field[22].name attachmentsmeta
+documenttype[0].field[22].id 1203055625
+documenttype[0].field[22].header false
+documenttype[0].field[22].datatype 1013
+documenttype[0].field[23].name tolist
+documenttype[0].field[23].id 1084918181
+documenttype[0].field[23].header false
+documenttype[0].field[23].datatype 1012
+documenttype[0].field[24].name cclist
+documenttype[0].field[24].id 1733332403
+documenttype[0].field[24].header false
+documenttype[0].field[24].datatype 1012
+documenttype[0].field[25].name bcclist
+documenttype[0].field[25].id 410546306
+documenttype[0].field[25].header false
+documenttype[0].field[25].datatype 1012
diff --git a/vsm/src/tests/config/vsm.cfg b/vsm/src/tests/config/vsm.cfg
new file mode 100644
index 00000000000..dc50447f623
--- /dev/null
+++ b/vsm/src/tests/config/vsm.cfg
@@ -0,0 +1,3 @@
+doctype file:../config/mail.cfg
+storagecfg ""
+vsmfields file:../config/vsmfields.cfg
diff --git a/vsm/src/tests/config/vsmfields.cfg b/vsm/src/tests/config/vsmfields.cfg
new file mode 100644
index 00000000000..30f1c8ed8b1
--- /dev/null
+++ b/vsm/src/tests/config/vsmfields.cfg
@@ -0,0 +1,297 @@
+threadsperquery 4
+documentverificationlevel=0
+searchall 1
+fieldspec[17]
+fieldspec[0].name bcc
+fieldspec[0].searchmethod AUTOUTF8
+fieldspec[0].arg1 ""
+fieldspec[1].name cc
+fieldspec[1].searchmethod AUTOUTF8
+fieldspec[1].arg1 ""
+fieldspec[2].name from
+fieldspec[2].searchmethod AUTOUTF8
+fieldspec[2].arg1 ""
+fieldspec[3].name date
+fieldspec[3].searchmethod INT32
+fieldspec[3].arg1 ""
+fieldspec[4].name replyto
+fieldspec[4].searchmethod AUTOUTF8
+fieldspec[4].arg1 ""
+fieldspec[5].name subject
+fieldspec[5].searchmethod AUTOUTF8
+fieldspec[5].arg1 ""
+fieldspec[6].name to
+fieldspec[6].searchmethod AUTOUTF8
+fieldspec[6].arg1 ""
+fieldspec[7].name body
+fieldspec[7].searchmethod AUTOUTF8
+fieldspec[7].arg1 ""
+fieldspec[8].name bodymeta
+fieldspec[8].searchmethod AUTOUTF8
+fieldspec[8].arg1 ""
+fieldspec[9].name mailid
+fieldspec[9].searchmethod AUTOUTF8
+fieldspec[9].arg1 ""
+fieldspec[10].name attachmentcount
+fieldspec[10].searchmethod INT32
+fieldspec[10].arg1 ""
+fieldspec[11].name attachmentcontent
+fieldspec[11].searchmethod AUTOUTF8
+fieldspec[11].arg1 ""
+fieldspec[12].name attachmenttypes
+fieldspec[12].searchmethod AUTOUTF8
+fieldspec[12].arg1 ""
+fieldspec[13].name attachmentnames
+fieldspec[13].searchmethod AUTOUTF8
+fieldspec[13].arg1 ""
+fieldspec[14].name attachmentlanguages
+fieldspec[14].searchmethod AUTOUTF8
+fieldspec[14].arg1 ""
+fieldspec[15].name URI
+fieldspec[15].searchmethod AUTOUTF8
+fieldspec[15].arg1 ""
+fieldspec[16].name vsm_whichfieldmatched
+fieldspec[16].searchmethod AUTOUTF8
+fieldspec[16].arg1 ""
+index[26]
+index[0].name default
+index[0].field[10]
+index[0].field[0].name from
+index[0].field[1].name to
+index[0].field[2].name cc
+index[0].field[3].name bcc
+index[0].field[4].name subject
+index[0].field[5].name body
+index[0].field[6].name attachmentcontent
+index[0].field[7].name attachmentnames
+index[0].field[8].name attachmenttypes
+index[0].field[9].name date
+index[1].name all
+index[1].field[8]
+index[1].field[0].name to
+index[1].field[1].name cc
+index[1].field[2].name bcc
+index[1].field[3].name subject
+index[1].field[4].name body
+index[1].field[5].name attachmentcontent
+index[1].field[6].name attachmentnames
+index[1].field[7].name attachmenttypes
+index[2].name header
+index[2].field[6]
+index[2].field[0].name from
+index[2].field[1].name replyto
+index[2].field[2].name to
+index[2].field[3].name cc
+index[2].field[4].name bcc
+index[2].field[5].name subject
+index[3].name senders
+index[3].field[2]
+index[3].field[0].name from
+index[3].field[1].name replyto
+index[4].name recipients
+index[4].field[3]
+index[4].field[0].name to
+index[4].field[1].name cc
+index[4].field[2].name bcc
+index[5].name address
+index[5].field[5]
+index[5].field[0].name from
+index[5].field[1].name replyto
+index[5].field[2].name to
+index[5].field[3].name cc
+index[5].field[4].name bcc
+index[6].name body
+index[6].field[2]
+index[6].field[0].name subject
+index[6].field[1].name body
+index[7].name meta
+index[7].field[2]
+index[7].field[0].name attachmentcontent
+index[7].field[1].name attachmenttypes
+index[8].name index1
+index[8].field[1]
+index[8].field[0].name bcc
+index[9].name index2
+index[9].field[2]
+index[9].field[0].name bcc
+index[9].field[1].name cc
+index[10].name index3
+index[10].field[3]
+index[10].field[0].name bcc
+index[10].field[1].name cc
+index[10].field[2].name from
+index[11].name index4
+index[11].field[4]
+index[11].field[0].name bcc
+index[11].field[1].name cc
+index[11].field[2].name from
+index[11].field[3].name date
+index[12].name index5
+index[12].field[5]
+index[12].field[0].name bcc
+index[12].field[1].name cc
+index[12].field[2].name from
+index[12].field[3].name date
+index[12].field[4].name replyto
+index[13].name index6
+index[13].field[6]
+index[13].field[0].name bcc
+index[13].field[1].name cc
+index[13].field[2].name from
+index[13].field[3].name date
+index[13].field[4].name replyto
+index[13].field[5].name subject
+index[14].name index7
+index[14].field[7]
+index[14].field[0].name bcc
+index[14].field[1].name cc
+index[14].field[2].name from
+index[14].field[3].name date
+index[14].field[4].name replyto
+index[14].field[5].name subject
+index[14].field[6].name to
+index[15].name index8
+index[15].field[8]
+index[15].field[0].name bcc
+index[15].field[1].name cc
+index[15].field[2].name from
+index[15].field[3].name date
+index[15].field[4].name replyto
+index[15].field[5].name subject
+index[15].field[6].name to
+index[15].field[7].name body
+index[16].name index9
+index[16].field[9]
+index[16].field[0].name bcc
+index[16].field[1].name cc
+index[16].field[2].name from
+index[16].field[3].name date
+index[16].field[4].name replyto
+index[16].field[5].name subject
+index[16].field[6].name to
+index[16].field[7].name body
+index[16].field[8].name bodymeta
+index[17].name index10
+index[17].field[10]
+index[17].field[0].name bcc
+index[17].field[1].name cc
+index[17].field[2].name from
+index[17].field[3].name date
+index[17].field[4].name replyto
+index[17].field[5].name subject
+index[17].field[6].name to
+index[17].field[7].name body
+index[17].field[8].name bodymeta
+index[17].field[9].name mailid
+index[18].name index11
+index[18].field[11]
+index[18].field[0].name bcc
+index[18].field[1].name cc
+index[18].field[2].name from
+index[18].field[3].name date
+index[18].field[4].name replyto
+index[18].field[5].name subject
+index[18].field[6].name to
+index[18].field[7].name body
+index[18].field[8].name bodymeta
+index[18].field[9].name mailid
+index[18].field[10].name attachmentcount
+index[19].name index12
+index[19].field[12]
+index[19].field[0].name bcc
+index[19].field[1].name cc
+index[19].field[2].name from
+index[19].field[3].name date
+index[19].field[4].name replyto
+index[19].field[5].name subject
+index[19].field[6].name to
+index[19].field[7].name body
+index[19].field[8].name bodymeta
+index[19].field[9].name mailid
+index[19].field[10].name attachmentcount
+index[19].field[11].name attachmentcontent
+index[20].name index13
+index[20].field[13]
+index[20].field[0].name bcc
+index[20].field[1].name cc
+index[20].field[2].name from
+index[20].field[3].name date
+index[20].field[4].name replyto
+index[20].field[5].name subject
+index[20].field[6].name to
+index[20].field[7].name body
+index[20].field[8].name bodymeta
+index[20].field[9].name mailid
+index[20].field[10].name attachmentcount
+index[20].field[11].name attachmentcontent
+index[20].field[12].name attachmenttypes
+index[21].name index14
+index[21].field[14]
+index[21].field[0].name bcc
+index[21].field[1].name cc
+index[21].field[2].name from
+index[21].field[3].name date
+index[21].field[4].name replyto
+index[21].field[5].name subject
+index[21].field[6].name to
+index[21].field[7].name body
+index[21].field[8].name bodymeta
+index[21].field[9].name mailid
+index[21].field[10].name attachmentcount
+index[21].field[11].name attachmentcontent
+index[21].field[12].name attachmenttypes
+index[21].field[13].name attachmentnames
+index[22].name index15
+index[22].field[15]
+index[22].field[0].name bcc
+index[22].field[1].name cc
+index[22].field[2].name from
+index[22].field[3].name date
+index[22].field[4].name replyto
+index[22].field[5].name subject
+index[22].field[6].name to
+index[22].field[7].name body
+index[22].field[8].name bodymeta
+index[22].field[9].name mailid
+index[22].field[10].name attachmentcount
+index[22].field[11].name attachmentcontent
+index[22].field[12].name attachmenttypes
+index[22].field[13].name attachmentnames
+index[22].field[14].name attachmentlanguages
+index[23].name index16
+index[23].field[15]
+index[23].field[0].name bcc
+index[23].field[1].name cc
+index[23].field[2].name from
+index[23].field[3].name date
+index[23].field[4].name replyto
+index[23].field[5].name subject
+index[23].field[6].name to
+index[23].field[7].name body
+index[23].field[8].name bodymeta
+index[23].field[9].name mailid
+index[23].field[10].name attachmentcount
+index[23].field[11].name attachmentcontent
+index[23].field[12].name attachmenttypes
+index[23].field[13].name attachmentnames
+index[23].field[14].name attachmentlanguages
+index[24].name index17
+index[24].field[15]
+index[24].field[0].name bcc
+index[24].field[1].name cc
+index[24].field[2].name from
+index[24].field[3].name date
+index[24].field[4].name replyto
+index[24].field[5].name subject
+index[24].field[6].name to
+index[24].field[7].name body
+index[24].field[8].name bodymeta
+index[24].field[9].name mailid
+index[24].field[10].name attachmentcount
+index[24].field[11].name attachmentcontent
+index[24].field[12].name attachmenttypes
+index[24].field[13].name attachmentnames
+index[24].field[14].name attachmentlanguages
+index[25].name date
+index[25].field[1]
+index[25].field[0].name date
diff --git a/vsm/src/tests/create-test.sh b/vsm/src/tests/create-test.sh
new file mode 100755
index 00000000000..c4259526089
--- /dev/null
+++ b/vsm/src/tests/create-test.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+gen_ignore_file() {
+ echo "generating '$1' ..."
+ echo ".depend" > $1
+ echo "Makefile" >> $1
+ echo "${test}_test" >> $1
+}
+
+gen_project_file() {
+ echo "generating '$1' ..."
+ echo "APPLICATION ${test}_test" > $1
+ echo "OBJS $test" >> $1
+ echo "LIBS vsm/vsm" >> $1
+ echo "EXTERNALLIBS vespalib vespalog" >> $1
+ echo "" >> $1
+ echo "CUSTOMMAKE" >> $1
+ echo "test: depend ${test}_test" >> $1
+ echo -e "\t@./${test}_test" >> $1
+}
+
+gen_source() {
+ echo "generating '$1' ..."
+ echo "#include <vespa/log/log.h>" > $1
+ echo "LOG_SETUP(\"${test}_test\");" >> $1
+ echo "#include <vespa/fastos/fastos.h>" >> $1
+ echo "#include <vespa/vespalib/testkit/testapp.h>" >> $1
+ echo "" >> $1
+ echo "// using namespace ;" >> $1
+ echo "" >> $1
+ echo "TEST_SETUP(Test);" >> $1
+ echo "" >> $1
+ echo "int" >> $1
+ echo "Test::Main()" >> $1
+ echo "{" >> $1
+ echo " TEST_INIT(\"${test}_test\");" >> $1
+ echo " TEST_DONE();" >> $1
+ echo "}" >> $1
+}
+
+gen_desc() {
+ echo "generating '$1' ..."
+ echo "$test test. Take a look at $test.cpp for details." > $1
+}
+
+gen_file_list() {
+ echo "generating '$1' ..."
+ echo "$test.cpp" > $1
+}
+
+if [ $# -ne 1 ]; then
+ echo "usage: $0 <name>"
+ echo " name: name of the test to create"
+ exit 1
+fi
+
+test=$1
+if [ -e $test ]; then
+ echo "$test already present, don't want to mess it up..."
+ exit 1
+fi
+
+echo "creating directory '$test' ..."
+mkdir -p $test || exit 1
+cd $test || exit 1
+test=`basename $test`
+
+gen_ignore_file .cvsignore
+gen_project_file fastos.project
+gen_source $test.cpp
+gen_desc DESC
+gen_file_list FILES
diff --git a/vsm/src/tests/docsum/.gitignore b/vsm/src/tests/docsum/.gitignore
new file mode 100644
index 00000000000..9a697a94de8
--- /dev/null
+++ b/vsm/src/tests/docsum/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+docsum_test
+vsm_docsum_test_app
diff --git a/vsm/src/tests/docsum/CMakeLists.txt b/vsm/src/tests/docsum/CMakeLists.txt
new file mode 100644
index 00000000000..506e0a9bf66
--- /dev/null
+++ b/vsm/src/tests/docsum/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vsm_docsum_test_app
+ SOURCES
+ docsum.cpp
+ DEPENDS
+ vsm
+)
+vespa_add_test(NAME vsm_docsum_test_app COMMAND vsm_docsum_test_app)
diff --git a/vsm/src/tests/docsum/DESC b/vsm/src/tests/docsum/DESC
new file mode 100644
index 00000000000..a2008f9b6c4
--- /dev/null
+++ b/vsm/src/tests/docsum/DESC
@@ -0,0 +1 @@
+docsum test. Take a look at docsum.cpp for details.
diff --git a/vsm/src/tests/docsum/FILES b/vsm/src/tests/docsum/FILES
new file mode 100644
index 00000000000..0ada8d30e81
--- /dev/null
+++ b/vsm/src/tests/docsum/FILES
@@ -0,0 +1 @@
+docsum.cpp
diff --git a/vsm/src/tests/docsum/docsum.cpp b/vsm/src/tests/docsum/docsum.cpp
new file mode 100644
index 00000000000..366fae65849
--- /dev/null
+++ b/vsm/src/tests/docsum/docsum.cpp
@@ -0,0 +1,296 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("docsum_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vector>
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/vsm/common/docsum.h>
+#include <vespa/vsm/vsm/flattendocsumwriter.h>
+#include <vespa/vsm/vsm/jsondocsumwriter.h>
+
+using namespace document;
+
+namespace vsm {
+
+template <typename T>
+class Vector : public std::vector<T>
+{
+public:
+ Vector<T> & add(T v) { this->push_back(v); return *this; }
+};
+
+typedef Vector<std::string> StringList;
+typedef Vector<std::pair<std::string, int32_t> > WeightedStringList;
+
+
+class TestDocument : public vsm::Document
+{
+private:
+ std::vector<FieldValueContainer> _fields;
+
+public:
+ TestDocument(const search::DocumentIdT & docId, size_t numFields) : vsm::Document(docId, numFields), _fields(numFields) {}
+ virtual bool setField(FieldIdT fId, document::FieldValue::UP fv) {
+ if (fId < _fields.size()) {
+ _fields[fId].reset(fv.release());
+ return true;
+ }
+ return false;
+ }
+ virtual const document::FieldValue * getField(FieldIdT fId) const {
+ if (fId < _fields.size()) {
+ return _fields[fId].get();
+ }
+ return NULL;
+ }
+};
+
+
+class DocsumTest : public vespalib::TestApp
+{
+private:
+ ArrayFieldValue createFieldValue(const StringList & fv);
+ WeightedSetFieldValue createFieldValue(const WeightedStringList & fv);
+
+ void assertFlattenDocsumWriter(const FieldValue & fv, const std::string & exp) {
+ FlattenDocsumWriter fdw;
+ assertFlattenDocsumWriter(fdw, fv, exp);
+ }
+ void assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValue & fv, const std::string & exp);
+ void assertJSONDocsumWriter(const FieldValue & fv, const std::string & exp) {
+ JSONDocsumWriter jdw;
+ assertJSONDocsumWriter(jdw, fv, exp);
+ }
+ void assertJSONDocsumWriter(JSONDocsumWriter & jdw, const FieldValue & fv, const std::string & exp);
+
+ void testFlattenDocsumWriter();
+ void testJSONDocsumWriter();
+ void requireThatJSONDocsumWriterHandlesMap();
+ void testDocSumCache();
+
+public:
+ int Main();
+};
+
+ArrayFieldValue
+DocsumTest::createFieldValue(const StringList & fv)
+{
+
+ static ArrayDataType type(*DataType::STRING);
+ ArrayFieldValue afv(type);
+ for (size_t i = 0; i < fv.size(); ++i) {
+ afv.add(StringFieldValue(fv[i]));
+ }
+ return afv;
+}
+
+WeightedSetFieldValue
+DocsumTest::createFieldValue(const WeightedStringList & fv)
+{
+ static WeightedSetDataType type(*DataType::STRING, false, false);
+ WeightedSetFieldValue wsfv(type);
+ for (size_t i = 0; i < fv.size(); ++i) {
+ wsfv.add(StringFieldValue(fv[i].first), fv[i].second);
+ }
+ return wsfv;
+}
+
+void
+DocsumTest::assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValue & fv, const std::string & exp)
+{
+ FieldPath empty;
+ fv.iterateNested(empty.begin(), empty.end(), fdw);
+ std::string actual(fdw.getResult().getBuffer(), fdw.getResult().getPos());
+ EXPECT_EQUAL(actual, exp);
+}
+
+void
+DocsumTest::assertJSONDocsumWriter(JSONDocsumWriter & jdw, const FieldValue & fv, const std::string & exp)
+{
+ jdw.write(fv);
+ EXPECT_EQUAL(jdw.getResult(), exp);
+}
+
+void
+DocsumTest::testFlattenDocsumWriter()
+{
+ { // basic tests
+ assertFlattenDocsumWriter(StringFieldValue("foo bar"), "foo bar");
+ assertFlattenDocsumWriter(RawFieldValue("foo bar"), "foo bar");
+ assertFlattenDocsumWriter(LongFieldValue(123456789), "123456789");
+ assertFlattenDocsumWriter(createFieldValue(StringList().add("foo bar").add("baz").add(" qux ")),
+ "foo bar baz qux ");
+ }
+ { // test mulitple invokations
+ FlattenDocsumWriter fdw("#");
+ assertFlattenDocsumWriter(fdw, StringFieldValue("foo"), "foo");
+ assertFlattenDocsumWriter(fdw, StringFieldValue("bar"), "foo#bar");
+ fdw.clear();
+ assertFlattenDocsumWriter(fdw, StringFieldValue("baz"), "baz");
+ assertFlattenDocsumWriter(fdw, StringFieldValue("qux"), "baz qux");
+ }
+ { // test resizing
+ FlattenDocsumWriter fdw("#");
+ EXPECT_EQUAL(fdw.getResult().getPos(), 0u);
+ EXPECT_EQUAL(fdw.getResult().getLength(), 32u);
+ assertFlattenDocsumWriter(fdw, StringFieldValue("aaaabbbbccccddddeeeeffffgggghhhh"),
+ "aaaabbbbccccddddeeeeffffgggghhhh");
+ EXPECT_EQUAL(fdw.getResult().getPos(), 32u);
+ EXPECT_EQUAL(fdw.getResult().getLength(), 32u);
+ assertFlattenDocsumWriter(fdw, StringFieldValue("aaaa"),
+ "aaaabbbbccccddddeeeeffffgggghhhh#aaaa");
+ EXPECT_EQUAL(fdw.getResult().getPos(), 37u);
+ EXPECT_TRUE(fdw.getResult().getLength() >= 37u);
+ fdw.clear();
+ EXPECT_EQUAL(fdw.getResult().getPos(), 0u);
+ EXPECT_TRUE(fdw.getResult().getLength() >= 37u);
+ }
+}
+
+void
+DocsumTest::testJSONDocsumWriter()
+{
+ { // basic types
+ assertJSONDocsumWriter(LongFieldValue(123456789), "123456789");
+ assertJSONDocsumWriter(FloatFieldValue(12.34), "12.34");
+ assertJSONDocsumWriter(StringFieldValue("foo bar"), "\"foo bar\"");
+ }
+ { // collection field values
+ assertJSONDocsumWriter(createFieldValue(StringList().add("foo").add("bar").add("baz")),
+ "[\"foo\",\"bar\",\"baz\"]");
+ assertJSONDocsumWriter(createFieldValue(WeightedStringList().add(std::make_pair("bar", 20)).
+ add(std::make_pair("baz", 30)).
+ add(std::make_pair("foo", 10))),
+ "[[\"bar\",20],[\"baz\",30],[\"foo\",10]]");
+ }
+ { // struct field value
+ StructDataType subType("substruct");
+ Field fd("d", 0, *DataType::STRING, true);
+ Field fe("e", 1, *DataType::STRING, true);
+ subType.addField(fd);
+ subType.addField(fe);
+ StructFieldValue subValue(subType);
+ subValue.setValue(fd, StringFieldValue("baz"));
+ subValue.setValue(fe, StringFieldValue("qux"));
+
+ StructDataType type("struct");
+ Field fa("a", 0, *DataType::STRING, true);
+ Field fb("b", 1, *DataType::STRING, true);
+ Field fc("c", 2, subType, true);
+ type.addField(fa);
+ type.addField(fb);
+ type.addField(fc);
+ StructFieldValue value(type);
+ value.setValue(fa, StringFieldValue("foo"));
+ value.setValue(fb, StringFieldValue("bar"));
+ value.setValue(fc, subValue);
+
+
+ { // select a subset and then all
+ JSONDocsumWriter jdw;
+ DocsumFieldSpec::FieldIdentifierVector fields;
+ fields.push_back(DocsumFieldSpec::FieldIdentifier(
+ 0, *type.buildFieldPath("a")));
+ fields.push_back(DocsumFieldSpec::FieldIdentifier(
+ 0, *type.buildFieldPath("c.e")));
+ jdw.setInputFields(fields);
+ assertJSONDocsumWriter(jdw, value, "{\"a\":\"foo\",\"c\":{\"e\":\"qux\"}}");
+ jdw.clear();
+ assertJSONDocsumWriter(jdw, value, "{\"a\":\"foo\",\"b\":\"bar\",\"c\":{\"d\":\"baz\",\"e\":\"qux\"}}");
+ }
+ }
+ { // multiple invocations
+ JSONDocsumWriter jdw;
+ assertJSONDocsumWriter(jdw, StringFieldValue("foo"), "\"foo\"");
+ assertJSONDocsumWriter(jdw, StringFieldValue("bar"), "\"foo\"\"bar\"");
+ jdw.clear();
+ assertJSONDocsumWriter(jdw, StringFieldValue("baz"), "\"baz\"");
+ }
+}
+
+void
+DocsumTest::requireThatJSONDocsumWriterHandlesMap()
+{
+ { // map<string, string>
+ MapDataType mapType(*DataType::STRING, *DataType::STRING);
+ MapFieldValue mapfv(mapType);
+ EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), StringFieldValue("v1")));
+ EXPECT_TRUE(mapfv.put(StringFieldValue("k2"), StringFieldValue("v2")));
+ assertJSONDocsumWriter(mapfv, "[{\"key\":\"k1\",\"value\":\"v1\"},{\"key\":\"k2\",\"value\":\"v2\"}]");
+ }
+ { // map<string, struct>
+ StructDataType structType("struct");
+ Field fa("a", 0, *DataType::STRING, true);
+ Field fb("b", 1, *DataType::STRING, true);
+ structType.addField(fa);
+ structType.addField(fb);
+ StructFieldValue structValue(structType);
+ structValue.setValue(fa, StringFieldValue("foo"));
+ structValue.setValue(fb, StringFieldValue("bar"));
+ MapDataType mapType(*DataType::STRING, structType);
+ MapFieldValue mapfv(mapType);
+ EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), structValue));
+ { // select a subset and then all
+ JSONDocsumWriter jdw;
+ DocsumFieldSpec::FieldIdentifierVector fields;
+ fields.push_back(DocsumFieldSpec::FieldIdentifier(0, *mapType.buildFieldPath("value.b")));
+ jdw.setInputFields(fields);
+ assertJSONDocsumWriter(jdw, mapfv, "[{\"key\":\"k1\",\"value\":{\"b\":\"bar\"}}]");
+ fields[0] = DocsumFieldSpec::FieldIdentifier(0, *mapType.buildFieldPath("{k1}.a"));
+ jdw.clear();
+ jdw.setInputFields(fields);
+ assertJSONDocsumWriter(jdw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\"}}]");
+ jdw.clear(); // all fields implicit
+ assertJSONDocsumWriter(jdw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\",\"b\":\"bar\"}}]");
+ }
+ }
+}
+
+void
+DocsumTest::testDocSumCache()
+{
+ Document::SP d1(new TestDocument(0, 1));
+ d1->setField(0, FieldValue::UP(new StringFieldValue("aa")));
+ Document::SP d2(new TestDocument(1, 2));
+ d2->setField(0, FieldValue::UP(new StringFieldValue("bbb")));
+ d2->setField(1, FieldValue::UP(new StringFieldValue("cccc")));
+ DocSumCache cac1;
+ cac1.push_back(d1);
+ cac1.push_back(d2);
+ EXPECT_EQUAL(cac1.cache().size(), 2u);
+
+ Document::SP d3(new TestDocument(2, 1));
+ d3->setField(0, FieldValue::UP(new StringFieldValue("ddddd")));
+ DocSumCache cac2;
+ cac2.push_back(d3);
+ cac1.insert(cac2);
+ EXPECT_EQUAL(cac1.cache().size(), 3u);
+
+ Document::SP d4(new TestDocument(2, 1));
+ d4->setField(0, FieldValue::UP(new StringFieldValue("eeeeee")));
+ DocSumCache cac3;
+ cac3.push_back(d4);
+ cac1.insert(cac3);
+ EXPECT_EQUAL(cac1.cache().size(), 3u);
+ EXPECT_EQUAL(2u, cac1.getDocSum(2).getDocId());
+}
+
+int
+DocsumTest::Main()
+{
+ TEST_INIT("docsum_test");
+
+ testFlattenDocsumWriter();
+ testJSONDocsumWriter();
+ requireThatJSONDocsumWriterHandlesMap();
+ testDocSumCache();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(vsm::DocsumTest);
+
diff --git a/vsm/src/tests/document/.gitignore b/vsm/src/tests/document/.gitignore
new file mode 100644
index 00000000000..d47781eff63
--- /dev/null
+++ b/vsm/src/tests/document/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+document_test
+vsm_document_test_app
diff --git a/vsm/src/tests/document/CMakeLists.txt b/vsm/src/tests/document/CMakeLists.txt
new file mode 100644
index 00000000000..36da98129a1
--- /dev/null
+++ b/vsm/src/tests/document/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vsm_document_test_app
+ SOURCES
+ document.cpp
+ DEPENDS
+ vsm
+)
+vespa_add_test(NAME vsm_document_test_app COMMAND vsm_document_test_app)
diff --git a/vsm/src/tests/document/DESC b/vsm/src/tests/document/DESC
new file mode 100644
index 00000000000..6ab6ded2dbc
--- /dev/null
+++ b/vsm/src/tests/document/DESC
@@ -0,0 +1 @@
+document test. Take a look at document.cpp for details.
diff --git a/vsm/src/tests/document/FILES b/vsm/src/tests/document/FILES
new file mode 100644
index 00000000000..2721ca2d928
--- /dev/null
+++ b/vsm/src/tests/document/FILES
@@ -0,0 +1 @@
+document.cpp
diff --git a/vsm/src/tests/document/document.cpp b/vsm/src/tests/document/document.cpp
new file mode 100644
index 00000000000..37cccae09cc
--- /dev/null
+++ b/vsm/src/tests/document/document.cpp
@@ -0,0 +1,137 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("document_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/vsm/common/storagedocument.h>
+
+using namespace document;
+
+namespace vsm {
+
+class DocumentTest : public vespalib::TestApp
+{
+private:
+ void testStorageDocument();
+ void testStringFieldIdTMap();
+public:
+ int Main();
+};
+
+void
+DocumentTest::testStorageDocument()
+{
+ DocumentType dt("testdoc", 0);
+
+ Field fa("a", 0, *DataType::STRING, true);
+ Field fb("b", 1, *DataType::STRING, true);
+ dt.addField(fa);
+ dt.addField(fb);
+
+ document::Document::UP doc(new document::Document(dt, DocumentId()));
+ doc->setValue(fa, StringFieldValue("foo"));
+ doc->setValue(fb, StringFieldValue("bar"));
+
+ SharedFieldPathMap fpmap(new FieldPathMapT());
+ fpmap->push_back(*dt.buildFieldPath("a"));
+ fpmap->push_back(*dt.buildFieldPath("b"));
+ fpmap->push_back(FieldPath());
+ ASSERT_TRUE((*fpmap)[0].size() == 1);
+ ASSERT_TRUE((*fpmap)[1].size() == 1);
+ ASSERT_TRUE((*fpmap)[2].size() == 0);
+
+ StorageDocument sdoc(std::move(doc));
+ ASSERT_TRUE(sdoc.valid());
+ sdoc.setFieldCount(3);
+ sdoc.fieldPathMap(fpmap);
+ sdoc.init();
+
+ EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString());
+ EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString());
+ EXPECT_TRUE(sdoc.getField(2) == NULL);
+ // test caching
+ EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString());
+ EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString());
+ EXPECT_TRUE(sdoc.getField(2) == NULL);
+
+ // set new values
+ EXPECT_TRUE(sdoc.setField(0, FieldValue::UP(new StringFieldValue("baz"))));
+ EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString());
+ EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString());
+ EXPECT_TRUE(sdoc.getField(2) == NULL);
+ EXPECT_TRUE(sdoc.setField(1, FieldValue::UP(new StringFieldValue("qux"))));
+ EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString());
+ EXPECT_EQUAL(std::string("qux"), sdoc.getField(1)->getAsString());
+ EXPECT_TRUE(sdoc.getField(2) == NULL);
+ EXPECT_TRUE(sdoc.setField(2, FieldValue::UP(new StringFieldValue("quux"))));
+ EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString());
+ EXPECT_EQUAL(std::string("qux"), sdoc.getField(1)->getAsString());
+ EXPECT_EQUAL(std::string("quux"), sdoc.getField(2)->getAsString());
+
+ // reset cached field values
+ sdoc.init();
+ EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString());
+ EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString());
+ EXPECT_TRUE(sdoc.getField(2) == NULL);
+
+ EXPECT_TRUE(!sdoc.setField(3, FieldValue::UP(new StringFieldValue("thud"))));
+
+ SharedFieldPathMap fim;
+ StorageDocument s2(fim);
+ EXPECT_EQUAL(vespalib::string("null::"), s2.docDoc().getId().toString());
+}
+
+void DocumentTest::testStringFieldIdTMap()
+{
+ StringFieldIdTMap m;
+ EXPECT_EQUAL(0u, m.highestFieldNo());
+ EXPECT_TRUE(StringFieldIdTMap::npos == m.fieldNo("unknown"));
+ m.add("f1");
+ EXPECT_EQUAL(0u, m.fieldNo("f1"));
+ EXPECT_EQUAL(1u, m.highestFieldNo());
+ m.add("f1");
+ EXPECT_EQUAL(0u, m.fieldNo("f1"));
+ EXPECT_EQUAL(1u, m.highestFieldNo());
+ m.add("f2");
+ EXPECT_EQUAL(1u, m.fieldNo("f2"));
+ EXPECT_EQUAL(2u, m.highestFieldNo());
+ m.add("f3", 7);
+ EXPECT_EQUAL(7u, m.fieldNo("f3"));
+ EXPECT_EQUAL(8u, m.highestFieldNo());
+ m.add("f3");
+ EXPECT_EQUAL(7u, m.fieldNo("f3"));
+ EXPECT_EQUAL(8u, m.highestFieldNo());
+ m.add("f2", 13);
+ EXPECT_EQUAL(13u, m.fieldNo("f2"));
+ EXPECT_EQUAL(14u, m.highestFieldNo());
+ m.add("f4");
+ EXPECT_EQUAL(3u, m.fieldNo("f4"));
+ EXPECT_EQUAL(14u, m.highestFieldNo());
+ {
+ vespalib::asciistream os;
+ StringFieldIdTMap t;
+ t.add("b");
+ t.add("a");
+ os << t;
+ EXPECT_EQUAL(vespalib::string("a = 1\nb = 0\n"), os.str());
+ }
+
+}
+
+int
+DocumentTest::Main()
+{
+ TEST_INIT("document_test");
+
+ testStorageDocument();
+ testStringFieldIdTMap();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(vsm::DocumentTest);
+
diff --git a/vsm/src/tests/searcher/.gitignore b/vsm/src/tests/searcher/.gitignore
new file mode 100644
index 00000000000..52a56dff405
--- /dev/null
+++ b/vsm/src/tests/searcher/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+searcher_test
+vsm_searcher_test_app
diff --git a/vsm/src/tests/searcher/CMakeLists.txt b/vsm/src/tests/searcher/CMakeLists.txt
new file mode 100644
index 00000000000..26d6115e3a7
--- /dev/null
+++ b/vsm/src/tests/searcher/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vsm_searcher_test_app
+ SOURCES
+ searcher.cpp
+ DEPENDS
+ vsm
+)
+vespa_add_test(NAME vsm_searcher_test_app COMMAND vsm_searcher_test_app)
diff --git a/vsm/src/tests/searcher/DESC b/vsm/src/tests/searcher/DESC
new file mode 100644
index 00000000000..1165ce57737
--- /dev/null
+++ b/vsm/src/tests/searcher/DESC
@@ -0,0 +1 @@
+Unit tests for string and numeric field searchers. Take a look at searcher.cpp for details.
diff --git a/vsm/src/tests/searcher/FILES b/vsm/src/tests/searcher/FILES
new file mode 100644
index 00000000000..603eb41c816
--- /dev/null
+++ b/vsm/src/tests/searcher/FILES
@@ -0,0 +1 @@
+searcher.cpp
diff --git a/vsm/src/tests/searcher/searcher.cpp b/vsm/src/tests/searcher/searcher.cpp
new file mode 100644
index 00000000000..dbf458a0c32
--- /dev/null
+++ b/vsm/src/tests/searcher/searcher.cpp
@@ -0,0 +1,897 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("searcher_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/query/queryterm.h>
+#include <vespa/vsm/searcher/fieldsearcher.h>
+#include <vespa/vsm/searcher/floatfieldsearcher.h>
+#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h>
+#include <vespa/vsm/searcher/intfieldsearcher.h>
+#include <vespa/vsm/searcher/strchrfieldsearcher.h>
+#include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h>
+#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h>
+#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h>
+#include <vespa/vsm/searcher/utf8substringsearcher.h>
+#include <vespa/vsm/searcher/utf8substringsnippetmodifier.h>
+#include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h>
+#include <vespa/vsm/vsm/snippetmodifier.h>
+#include <vespa/vsm/vsm/fieldsearchspec.h>
+
+using namespace document;
+using search::EmptyQueryNodeResult;
+using search::QueryTerm;
+using search::QueryTermList;
+
+namespace vsm {
+
+template <typename T>
+class Vector : public std::vector<T>
+{
+public:
+ Vector<T>() : std::vector<T>() {}
+ Vector<T> & add(T v) { this->push_back(v); return *this; }
+};
+
+typedef Vector<size_t> Hits;
+typedef Vector<std::string> StringList;
+typedef Vector<Hits> HitsList;
+typedef Vector<bool> BoolList;
+typedef Vector<int64_t> LongList;
+typedef Vector<float> FloatList;
+typedef QueryTerm::FieldInfo QTFieldInfo;
+typedef Vector<QTFieldInfo> FieldInfoList;
+
+class String
+{
+private:
+ const std::string & _str;
+public:
+ String(const std::string & str) : _str(str) {}
+ bool operator==(const String & rhs) const {
+ return _str == rhs._str;
+ }
+};
+
+class Query
+{
+private:
+ void setupQuery(const StringList & terms) {
+ for (size_t i = 0; i < terms.size(); ++i) {
+ ParsedQueryTerm pqt = parseQueryTerm(terms[i]);
+ ParsedTerm pt = parseTerm(pqt.second);
+ qtv.push_back(QueryTerm(eqnr, pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second));
+ }
+ for (size_t i = 0; i < qtv.size(); ++i) {
+ qtl.push_back(&qtv[i]);
+ }
+ }
+public:
+ typedef std::pair<std::string, std::string> ParsedQueryTerm;
+ typedef std::pair<std::string, QueryTerm::SearchTerm> ParsedTerm;
+ EmptyQueryNodeResult eqnr;
+ std::vector<QueryTerm> qtv;
+ QueryTermList qtl;
+ Query(const StringList & terms) : eqnr(), qtv(), qtl() {
+ setupQuery(terms);
+ }
+ static ParsedQueryTerm parseQueryTerm(const std::string & queryTerm) {
+ size_t i = queryTerm.find(':');
+ if (i != std::string::npos) {
+ return ParsedQueryTerm(queryTerm.substr(0, i), queryTerm.substr(i + 1));
+ }
+ return ParsedQueryTerm(std::string(), queryTerm);
+ }
+ static ParsedTerm parseTerm(const std::string & term) {
+ if (term[0] == '*' && term[term.size() - 1] == '*') {
+ return std::make_pair(term.substr(1, term.size() - 2), QueryTerm::SUBSTRINGTERM);
+ } else if (term[0] == '*') {
+ return std::make_pair(term.substr(1, term.size() - 1), QueryTerm::SUFFIXTERM);
+ } else if (term[term.size() - 1] == '*') {
+ return std::make_pair(term.substr(0, term.size() - 1), QueryTerm::PREFIXTERM);
+ } else {
+ return std::make_pair(term, QueryTerm::WORD);
+ }
+ }
+};
+
+struct SnippetModifierSetup
+{
+ Query query;
+ UTF8SubstringSnippetModifier::SP searcher;
+ SharedSearcherBuf buf;
+ SnippetModifier modifier;
+ explicit SnippetModifierSetup(const StringList & terms) :
+ query(terms),
+ searcher(new UTF8SubstringSnippetModifier()),
+ buf(new SearcherBuf(8)),
+ modifier(searcher)
+ {
+ searcher->prepare(query.qtl, buf);
+ }
+};
+
+class SearcherTest : public vespalib::TestApp
+{
+private:
+
+ // helper functions
+ ArrayFieldValue getFieldValue(const StringList & fv);
+ ArrayFieldValue getFieldValue(const LongList & fv);
+ ArrayFieldValue getFieldValue(const FloatList & fv);
+
+ bool assertMatchTermSuffix(const std::string & term, const std::string & word);
+
+ /** string field searcher **/
+ void assertString(StrChrFieldSearcher & fs, const std::string & term, const std::string & field, const Hits & exp) {
+ assertString(fs, StringList().add(term), field, HitsList().add(exp));
+ }
+ void assertString(StrChrFieldSearcher & fs, const StringList & query, const std::string & field, const HitsList & exp) {
+ assertSearch(fs, query, StringFieldValue(field), exp);
+ }
+ void assertString(StrChrFieldSearcher & fs, const std::string & term, const StringList & field, const Hits & exp) {
+ assertString(fs, StringList().add(term), field, HitsList().add(exp));
+ }
+ void assertString(StrChrFieldSearcher & fs, const StringList & query, const StringList & field, const HitsList & exp) {
+ assertSearch(fs, query, getFieldValue(field), exp);
+ }
+
+ /** int field searcher **/
+ void assertInt(IntFieldSearcher fs, const std::string & term, int64_t field, bool exp) {
+ assertInt(fs, StringList().add(term), field, BoolList().add(exp));
+ }
+ void assertInt(IntFieldSearcher fs, const StringList & query, int64_t field, const BoolList & exp) {
+ assertNumeric(fs, query, LongFieldValue(field), exp);
+ }
+ void assertInt(IntFieldSearcher fs, const std::string & term, const LongList & field, const Hits & exp) {
+ assertInt(fs, StringList().add(term), field, HitsList().add(exp));
+ }
+ void assertInt(IntFieldSearcher fs, const StringList & query, const LongList & field, const HitsList & exp) {
+ assertSearch(fs, query, getFieldValue(field), exp);
+ }
+
+ /** float field searcher **/
+ void assertFloat(FloatFieldSearcher fs, const std::string & term, float field, bool exp) {
+ assertFloat(fs, StringList().add(term), field, BoolList().add(exp));
+ }
+ void assertFloat(FloatFieldSearcher fs, const StringList & query, float field, const BoolList & exp) {
+ assertNumeric(fs, query, FloatFieldValue(field), exp);
+ }
+ void assertFloat(FloatFieldSearcher fs, const std::string & term, const FloatList & field, const Hits & exp) {
+ assertFloat(fs, StringList().add(term), field, HitsList().add(exp));
+ }
+ void assertFloat(FloatFieldSearcher fs, const StringList & query, const FloatList & field, const HitsList & exp) {
+ assertSearch(fs, query, getFieldValue(field), exp);
+ }
+
+ void assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const BoolList & exp);
+ std::vector<QueryTerm> performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv);
+ void assertSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const HitsList & exp);
+
+ /** string field searcher **/
+ bool assertFieldInfo(StrChrFieldSearcher & fs, const std::string & term, const std::string & fv, const QTFieldInfo & exp) {
+ return assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp));
+ }
+ bool assertFieldInfo(StrChrFieldSearcher & fs, const std::string & term, const StringList & fv, const QTFieldInfo & exp) {
+ return assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp));
+ }
+ bool assertFieldInfo(StrChrFieldSearcher & fs, const StringList & query, const std::string & fv, const FieldInfoList & exp) {
+ return assertFieldInfo(fs, query, StringFieldValue(fv), exp);
+ }
+ bool assertFieldInfo(StrChrFieldSearcher & fs, const StringList & query, const StringList & fv, const FieldInfoList & exp) {
+ return assertFieldInfo(fs, query, getFieldValue(fv), exp);
+ }
+
+ /** int field searcher **/
+ void assertFieldInfo(IntFieldSearcher fs, const std::string & term, int64_t fv, const QTFieldInfo & exp) {
+ assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp));
+ }
+ void assertFieldInfo(IntFieldSearcher fs, const std::string & term, const LongList & fv, const QTFieldInfo & exp) {
+ assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp));
+ }
+ void assertFieldInfo(IntFieldSearcher fs, const StringList & query, int64_t fv, const FieldInfoList & exp) {
+ assertFieldInfo(fs, query, LongFieldValue(fv), exp);
+ }
+ void assertFieldInfo(IntFieldSearcher fs, const StringList & query, const LongList & fv, const FieldInfoList & exp) {
+ assertFieldInfo(fs, query, getFieldValue(fv), exp);
+ }
+
+ /** float field searcher **/
+ void assertFieldInfo(FloatFieldSearcher fs, const std::string & term, float fv, const QTFieldInfo & exp) {
+ assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp));
+ }
+ void assertFieldInfo(FloatFieldSearcher fs, const std::string & term, const FloatList & fv, const QTFieldInfo & exp) {
+ assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp));
+ }
+ void assertFieldInfo(FloatFieldSearcher fs, const StringList & query, float fv, const FieldInfoList & exp) {
+ assertFieldInfo(fs, query, FloatFieldValue(fv), exp);
+ }
+ void assertFieldInfo(FloatFieldSearcher fs, const StringList & query, const FloatList & fv, const FieldInfoList & exp) {
+ assertFieldInfo(fs, query, getFieldValue(fv), exp);
+ }
+
+ bool assertFieldInfo(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const FieldInfoList & exp);
+
+ /** snippet modifer searcher **/
+ void assertSnippetModifier(const std::string & term, const std::string & fv, const std::string & exp) {
+ assertSnippetModifier(StringList().add(term), fv, exp);
+ }
+ void assertSnippetModifier(const StringList & query, const std::string & fv, const std::string & exp);
+ /** snippet modifier **/
+ void assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, const std::string & exp);
+ void assertQueryTerms(const SnippetModifierManager & man, FieldIdT fId, const StringList & terms);
+ /** count words **/
+ bool assertCountWords(size_t numWords, const std::string & field);
+
+ // test functions
+ void testParseTerm();
+ void testMatchTermSuffix();
+ bool testStrChrFieldSearcher(StrChrFieldSearcher & fs);
+ void testStrChrFieldSearcher();
+ bool testUTF8SubStringFieldSearcher(StrChrFieldSearcher & fs);
+ void testUTF8SubStringFieldSearcher();
+ void testUTF8SuffixStringFieldSearcher();
+ void testUTF8FlexibleStringFieldSearcher();
+ void testUTF8ExactStringFieldSearcher();
+ void testIntFieldSearcher();
+ void testFloatFieldSearcher();
+ bool testStringFieldInfo(StrChrFieldSearcher & fs);
+ void testSnippetModifierSearcher();
+ void testSnippetModifier();
+ void testFieldSearchSpec();
+ void testSnippetModifierManager();
+ void testStripIndexes();
+ void requireThatCountWordsIsWorking();
+
+public:
+ int Main();
+};
+
+ArrayFieldValue
+SearcherTest::getFieldValue(const StringList & fv)
+{
+
+ static ArrayDataType type(*DataType::STRING);
+ ArrayFieldValue afv(type);
+ for (size_t i = 0; i < fv.size(); ++i) {
+ afv.add(StringFieldValue(fv[i]));
+ }
+ return afv;
+}
+
+ArrayFieldValue
+SearcherTest::getFieldValue(const LongList & fv)
+{
+ static ArrayDataType type(*DataType::LONG);
+ ArrayFieldValue afv(type);
+ for (size_t i = 0; i < fv.size(); ++i) {
+ afv.add(LongFieldValue(fv[i]));
+ }
+ return afv;
+}
+
+ArrayFieldValue
+SearcherTest::getFieldValue(const FloatList & fv)
+{
+ static ArrayDataType type(*DataType::FLOAT);
+ ArrayFieldValue afv(type);
+ for (size_t i = 0; i < fv.size(); ++i) {
+ afv.add(FloatFieldValue(fv[i]));
+ }
+ return afv;
+}
+
+bool
+SearcherTest::assertMatchTermSuffix(const std::string & term, const std::string & word)
+{
+ EmptyQueryNodeResult eqnr;
+ QueryTerm qa(eqnr, term, "index", QueryTerm::WORD);
+ QueryTerm qb(eqnr, word, "index", QueryTerm::WORD);
+ const ucs4_t * a;
+ size_t alen = qa.term(a);
+ const ucs4_t * b;
+ size_t blen = qb.term(b);
+ return UTF8StringFieldSearcherBase::matchTermSuffix(a, alen, b, blen);
+}
+
+void
+SearcherTest::assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const BoolList & exp)
+{
+ HitsList hl;
+ for (size_t i = 0; i < exp.size(); ++i) {
+ hl.push_back(exp[i] ? Hits().add(0) : Hits());
+ }
+ assertSearch(fs, query, fv, hl);
+}
+
+std::vector<QueryTerm>
+SearcherTest::performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv)
+{
+ Query q(query);
+
+ // prepare field searcher
+ SharedSearcherBuf ssb = SharedSearcherBuf(new SearcherBuf());
+ fs.prepare(q.qtl, ssb);
+
+ // setup document
+ SharedFieldPathMap sfim(new FieldPathMapT());
+ sfim->push_back(FieldPath());
+ StorageDocument doc(sfim);
+ doc.setFieldCount(1);
+ doc.init();
+ doc.setField(0, document::FieldValue::UP(fv.clone()));
+
+ fs.search(doc);
+ return q.qtv;
+}
+
+void
+SearcherTest::assertSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const HitsList & exp)
+{
+ std::vector<QueryTerm> qtv = performSearch(fs, query, fv);
+ EXPECT_EQUAL(qtv.size(), exp.size());
+ ASSERT_TRUE(qtv.size() == exp.size());
+ for (size_t i = 0; i < qtv.size(); ++i) {
+ const search::HitList & hl = qtv[i].getHitList();
+ EXPECT_EQUAL(hl.size(), exp[i].size());
+ ASSERT_TRUE(hl.size() == exp[i].size());
+ for (size_t j = 0; j < hl.size(); ++j) {
+ EXPECT_EQUAL((size_t)hl[j].pos(), exp[i][j]);
+ }
+ }
+}
+
+bool
+SearcherTest::assertFieldInfo(FieldSearcher & fs, const StringList & query,
+ const FieldValue & fv, const FieldInfoList & exp)
+{
+ std::vector<QueryTerm> qtv = performSearch(fs, query, fv);
+ if (!EXPECT_EQUAL(qtv.size(), exp.size())) return false;
+ bool retval = true;
+ for (size_t i = 0; i < qtv.size(); ++i) {
+ if (!EXPECT_EQUAL(qtv[i].getFieldInfo(0).getHitOffset(), exp[i].getHitOffset())) retval = false;
+ if (!EXPECT_EQUAL(qtv[i].getFieldInfo(0).getHitCount(), exp[i].getHitCount())) retval = false;
+ if (!EXPECT_EQUAL(qtv[i].getFieldInfo(0).getFieldLength(), exp[i].getFieldLength())) retval = false;
+ }
+ return retval;
+}
+
+void
+SearcherTest::assertSnippetModifier(const StringList & query, const std::string & fv, const std::string & exp)
+{
+ UTF8SubstringSnippetModifier mod;
+ performSearch(mod, query, StringFieldValue(fv));
+ EXPECT_EQUAL(mod.getModifiedBuf().getPos(), exp.size());
+ std::string actual(mod.getModifiedBuf().getBuffer(), mod.getModifiedBuf().getPos());
+ EXPECT_EQUAL(actual.size(), exp.size());
+ EXPECT_EQUAL(actual, exp);
+}
+
+void
+SearcherTest::assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, const std::string & exp)
+{
+ FieldValue::UP mfv = setup.modifier.modify(fv);
+ const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(*mfv.get());
+ const std::string & actual = lfv.getValue();
+ EXPECT_EQUAL(actual.size(), exp.size());
+ EXPECT_EQUAL(actual, exp);
+}
+
+void
+SearcherTest::assertQueryTerms(const SnippetModifierManager & man, FieldIdT fId, const StringList & terms)
+{
+ if (terms.size() == 0) {
+ ASSERT_TRUE(man.getModifiers().getModifier(fId) == NULL);
+ return;
+ }
+ ASSERT_TRUE(man.getModifiers().getModifier(fId) != NULL);
+ UTF8SubstringSnippetModifier * searcher =
+ (static_cast<SnippetModifier *>(man.getModifiers().getModifier(fId)))->getSearcher().get();
+ EXPECT_EQUAL(searcher->getQueryTerms().size(), terms.size());
+ ASSERT_TRUE(searcher->getQueryTerms().size() == terms.size());
+ for (size_t i = 0; i < terms.size(); ++i) {
+ EXPECT_EQUAL(std::string(searcher->getQueryTerms()[i]->getTerm()), terms[i]);
+ }
+}
+
+bool
+SearcherTest::assertCountWords(size_t numWords, const std::string & field)
+{
+ FieldRef ref(field.c_str(), field.size());
+ return EXPECT_EQUAL(numWords, FieldSearcher::countWords(ref));
+}
+
+void
+SearcherTest::testParseTerm()
+{
+ ASSERT_TRUE(Query::parseQueryTerm("index:term").first == "index");
+ ASSERT_TRUE(Query::parseQueryTerm("index:term").second == "term");
+ ASSERT_TRUE(Query::parseQueryTerm("term").first == "");
+ ASSERT_TRUE(Query::parseQueryTerm("term").second == "term");
+ ASSERT_TRUE(Query::parseTerm("*substr*").first == "substr");
+ ASSERT_TRUE(Query::parseTerm("*substr*").second == QueryTerm::SUBSTRINGTERM);
+ ASSERT_TRUE(Query::parseTerm("*suffix").first == "suffix");
+ ASSERT_TRUE(Query::parseTerm("*suffix").second == QueryTerm::SUFFIXTERM);
+ ASSERT_TRUE(Query::parseTerm("prefix*").first == "prefix");
+ ASSERT_TRUE(Query::parseTerm("prefix*").second == QueryTerm::PREFIXTERM);
+ ASSERT_TRUE(Query::parseTerm("term").first == "term");
+ ASSERT_TRUE(Query::parseTerm("term").second == QueryTerm::WORD);
+}
+
+void
+SearcherTest::testMatchTermSuffix()
+{
+ EXPECT_EQUAL(assertMatchTermSuffix("a", "vespa"), true);
+ EXPECT_EQUAL(assertMatchTermSuffix("spa", "vespa"), true);
+ EXPECT_EQUAL(assertMatchTermSuffix("vespa", "vespa"), true);
+ EXPECT_EQUAL(assertMatchTermSuffix("vvespa", "vespa"), false);
+ EXPECT_EQUAL(assertMatchTermSuffix("fspa", "vespa"), false);
+ EXPECT_EQUAL(assertMatchTermSuffix("v", "vespa"), false);
+}
+
+bool
+SearcherTest::testStrChrFieldSearcher(StrChrFieldSearcher & fs)
+{
+ std::string field = "operators and operator overloading with utf8 char oe = \xc3\x98";
+ assertString(fs, "oper", field, Hits());
+ assertString(fs, "tor", field, Hits());
+ assertString(fs, "oper*", field, Hits().add(0).add(2));
+ assertString(fs, "and", field, Hits().add(1));
+
+ assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits()).add(Hits()));
+ assertString(fs, StringList().add("and").add("overloading"), field, HitsList().add(Hits().add(1)).add(Hits().add(3)));
+
+ fs.setMatchType(FieldSearcher::PREFIX);
+ assertString(fs, "oper", field, Hits().add(0).add(2));
+ assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits().add(0).add(2)).add(Hits()));
+
+ fs.setMatchType(FieldSearcher::REGULAR);
+ if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false;
+
+ { // test handling of several underscores
+ StringList query = StringList().add("foo").add("bar");
+ HitsList exp = HitsList().add(Hits().add(0)).add(Hits().add(1));
+ assertString(fs, query, "foo_bar", exp);
+ assertString(fs, query, "foo__bar", exp);
+ assertString(fs, query, "foo___bar", exp);
+ assertString(fs, query, "foo________bar", exp);
+ assertString(fs, query, "foo____________________bar", exp);
+ assertString(fs, query, "________________________________________foo________________________________________bar________________________________________", exp);
+ query = StringList().add("foo").add("thisisaveryveryverylongword");
+ assertString(fs, query, "foo____________________thisisaveryveryverylongword", exp);
+
+ assertString(fs, "bar", "foo bar", Hits().add(1));
+ assertString(fs, "bar", "foo____________________bar", Hits().add(1));
+ assertString(fs, "bar", "foo____________________thisisaveryveryverylongword____________________bar", Hits().add(2));
+ }
+ return true;
+}
+
+void
+SearcherTest::testStrChrFieldSearcher()
+{
+ {
+ UTF8StrChrFieldSearcher fs(0);
+ EXPECT_TRUE(testStrChrFieldSearcher(fs));
+ }
+ {
+ FUTF8StrChrFieldSearcher fs(0);
+ EXPECT_TRUE(testStrChrFieldSearcher(fs));
+ }
+}
+
+bool
+SearcherTest::testUTF8SubStringFieldSearcher(StrChrFieldSearcher & fs)
+{
+ std::string field = "operators and operator overloading";
+ assertString(fs, "rsand", field, Hits());
+ assertString(fs, "ove", field, Hits().add(3));
+ assertString(fs, "ing", field, Hits().add(3));
+ assertString(fs, "era", field, Hits().add(0).add(2));
+ assertString(fs, "a", field, Hits().add(0).add(1).add(2).add(3));
+
+ assertString(fs, StringList().add("dn").add("gn"), field, HitsList().add(Hits()).add(Hits()));
+ assertString(fs, StringList().add("ato").add("load"), field, HitsList().add(Hits().add(0).add(2)).add(Hits().add(3)));
+
+ assertString(fs, StringList().add("aa").add("ab"), "aaaab",
+ HitsList().add(Hits().add(0).add(0).add(0)).add(Hits().add(0)));
+
+ if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false;
+ return true;
+}
+
+void
+SearcherTest::testUTF8SubStringFieldSearcher()
+{
+ {
+ UTF8SubStringFieldSearcher fs(0);
+ EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs));
+ assertString(fs, "aa", "aaaa", Hits().add(0).add(0));
+ }
+ {
+ UTF8SubStringFieldSearcher fs(0);
+ EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs));
+ assertString(fs, "abc", "abc bcd abc", Hits().add(0).add(2));
+ fs.maxFieldLength(4);
+ assertString(fs, "abc", "abc bcd abc", Hits().add(0));
+ }
+ {
+ UTF8SubstringSnippetModifier fs(0);
+ EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs));
+ // we don't have 1 term optimization
+ assertString(fs, "aa", "aaaa", Hits().add(0).add(0).add(0));
+ }
+}
+
+void
+SearcherTest::testUTF8SuffixStringFieldSearcher()
+{
+ UTF8SuffixStringFieldSearcher fs(0);
+ std::string field = "operators and operator overloading";
+ assertString(fs, "rsand", field, Hits());
+ assertString(fs, "tor", field, Hits().add(2));
+ assertString(fs, "tors", field, Hits().add(0));
+
+ assertString(fs, StringList().add("an").add("din"), field, HitsList().add(Hits()).add(Hits()));
+ assertString(fs, StringList().add("nd").add("g"), field, HitsList().add(Hits().add(1)).add(Hits().add(3)));
+
+ EXPECT_TRUE(testStringFieldInfo(fs));
+}
+
+void
+SearcherTest::testUTF8ExactStringFieldSearcher()
+{
+ UTF8ExactStringFieldSearcher fs(0);
+ // regular
+ assertString(fs, "vespa", "vespa", Hits().add(0));
+ assertString(fs, "vespa", "vespa vespa", Hits());
+ assertString(fs, "vesp", "vespa", Hits());
+ assertString(fs, "vesp*", "vespa", Hits().add(0));
+}
+
+void
+SearcherTest::testUTF8FlexibleStringFieldSearcher()
+{
+ UTF8FlexibleStringFieldSearcher fs(0);
+ // regular
+ assertString(fs, "vespa", "vespa", Hits().add(0));
+ assertString(fs, "vesp", "vespa", Hits());
+ assertString(fs, "esp", "vespa", Hits());
+ assertString(fs, "espa", "vespa", Hits());
+
+ // prefix
+ assertString(fs, "vesp*", "vespa", Hits().add(0));
+ fs.setMatchType(FieldSearcher::PREFIX);
+ assertString(fs, "vesp", "vespa", Hits().add(0));
+
+ // substring
+ fs.setMatchType(FieldSearcher::REGULAR);
+ assertString(fs, "*esp*", "vespa", Hits().add(0));
+ fs.setMatchType(FieldSearcher::SUBSTRING);
+ assertString(fs, "esp", "vespa", Hits().add(0));
+
+ // suffix
+ fs.setMatchType(FieldSearcher::REGULAR);
+ assertString(fs, "*espa", "vespa", Hits().add(0));
+ fs.setMatchType(FieldSearcher::SUFFIX);
+ assertString(fs, "espa", "vespa", Hits().add(0));
+
+ fs.setMatchType(FieldSearcher::REGULAR);
+ EXPECT_TRUE(testStringFieldInfo(fs));
+}
+
+void
+SearcherTest::testIntFieldSearcher()
+{
+ IntFieldSearcher fs;
+ assertInt(fs, "10", 10, true);
+ assertInt(fs, "9", 10, false);
+ assertInt(fs, ">9", 10, true);
+ assertInt(fs, ">9", 9, false);
+ assertInt(fs, "<11", 10, true);
+ assertInt(fs, "<11", 11, false);
+ assertInt(fs, "-10", -10, true);
+ assertInt(fs, "-9", -10, false);
+ assertInt(fs, "a", 10, false);
+ assertInt(fs, "[-5;5]", -5, true);
+ assertInt(fs, "[-5;5]", 0, true);
+ assertInt(fs, "[-5;5]", 5, true);
+ assertInt(fs, "[-5;5]", -6, false);
+ assertInt(fs, "[-5;5]", 6, false);
+
+ assertInt(fs, StringList().add("9").add("11"), 10, BoolList().add(false).add(false));
+ assertInt(fs, StringList().add("9").add("10"), 10, BoolList().add(false).add(true));
+ assertInt(fs, StringList().add("10").add(">9"), 10, BoolList().add(true).add(true));
+
+ assertInt(fs, "10", LongList().add(10).add(20).add(10).add(30), Hits().add(0).add(2));
+ assertInt(fs, StringList().add("10").add("20"), LongList().add(10).add(20).add(10).add(30),
+ HitsList().add(Hits().add(0).add(2)).add(Hits().add(1)));
+
+ assertFieldInfo(fs, "10", 10, QTFieldInfo(0, 1, 1));
+ assertFieldInfo(fs, "10", LongList().add(10).add(20).add(10).add(30), QTFieldInfo(0, 2, 4));
+ assertFieldInfo(fs, StringList().add("10").add("20"), 10,
+ FieldInfoList().add(QTFieldInfo(0, 1, 1)).add(QTFieldInfo(0, 0, 1)));
+ assertFieldInfo(fs, StringList().add("10").add("20"), LongList().add(10).add(20).add(10).add(30),
+ FieldInfoList().add(QTFieldInfo(0, 2, 4)).add(QTFieldInfo(0, 1, 4)));
+}
+
+void
+SearcherTest::testFloatFieldSearcher()
+{
+ FloatFieldSearcher fs;
+ assertFloat(fs, "10", 10, true);
+ assertFloat(fs, "10.5", 10.5, true);
+ assertFloat(fs, "-10.5", -10.5, true);
+ assertFloat(fs, ">10.5", 10.6, true);
+ assertFloat(fs, ">10.5", 10.5, false);
+ assertFloat(fs, "<10.5", 10.4, true);
+ assertFloat(fs, "<10.5", 10.5, false);
+ assertFloat(fs, "10.4", 10.5, false);
+ assertFloat(fs, "-10.4", -10.5, false);
+ assertFloat(fs, "a", 10.5, false);
+ assertFloat(fs, "[-5.5;5.5]", -5.5, true);
+ assertFloat(fs, "[-5.5;5.5]", 0, true);
+ assertFloat(fs, "[-5.5;5.5]", 5.5, true);
+ assertFloat(fs, "[-5.5;5.5]", -5.6, false);
+ assertFloat(fs, "[-5.5;5.5]", 5.6, false);
+
+ assertFloat(fs, StringList().add("10").add("11"), 10.5, BoolList().add(false).add(false));
+ assertFloat(fs, StringList().add("10").add("10.5"), 10.5, BoolList().add(false).add(true));
+ assertFloat(fs, StringList().add(">10.4").add("10.5"), 10.5, BoolList().add(true).add(true));
+
+ assertFloat(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), Hits().add(0).add(2));
+ assertFloat(fs, StringList().add("10.5").add("20.5"), FloatList().add(10.5).add(20.5).add(10.5).add(30.5),
+ HitsList().add(Hits().add(0).add(2)).add(Hits().add(1)));
+
+ assertFieldInfo(fs, "10.5", 10.5, QTFieldInfo(0, 1, 1));
+ assertFieldInfo(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), QTFieldInfo(0, 2, 4));
+ assertFieldInfo(fs, StringList().add("10.5").add("20.5"), 10.5,
+ FieldInfoList().add(QTFieldInfo(0, 1, 1)).add(QTFieldInfo(0, 0, 1)));
+ assertFieldInfo(fs, StringList().add("10.5").add("20.5"), FloatList().add(10.5).add(20.5).add(10.5).add(30.5),
+ FieldInfoList().add(QTFieldInfo(0, 2, 4)).add(QTFieldInfo(0, 1, 4)));
+}
+
+bool
+SearcherTest::testStringFieldInfo(StrChrFieldSearcher & fs)
+{
+ assertString(fs, "foo", StringList().add("foo bar baz").add("foo bar").add("baz foo"), Hits().add(0).add(3).add(6));
+ assertString(fs, StringList().add("foo").add("bar"), StringList().add("foo bar baz").add("foo bar").add("baz foo"),
+ HitsList().add(Hits().add(0).add(3).add(6)).add(Hits().add(1).add(4)));
+
+ bool retval = true;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo", QTFieldInfo(0, 1, 1)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "bar", "foo", QTFieldInfo(0, 0, 1)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "bar", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "baz", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "qux", "foo bar baz", QTFieldInfo(0, 0, 3)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo foo foo", QTFieldInfo(0, 3, 3)))) retval = false;
+ // query term size > last term size
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "runner", "Road Runner Disco", QTFieldInfo(0, 1, 3)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("roadrun").add("runner"), "Road Runner Disco",
+ FieldInfoList().add(QTFieldInfo(0, 0, 3)).add(QTFieldInfo(0, 1, 3))))) retval = false;
+ // multiple terms
+ if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", StringList().add("foo bar baz").add("foo bar"),
+ QTFieldInfo(0, 2, 5)))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("foo").add("baz"), "foo bar baz",
+ FieldInfoList().add(QTFieldInfo(0, 1, 3)).add(QTFieldInfo(0, 1, 3))))) retval = false;
+ if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("foo").add("baz"), StringList().add("foo bar baz").add("foo bar"),
+ FieldInfoList().add(QTFieldInfo(0, 2, 5)).add(QTFieldInfo(0, 1, 5))))) retval = false;
+ return retval;
+}
+
+void
+SearcherTest::testSnippetModifierSearcher()
+{
+ // ascii
+ assertSnippetModifier("f", "foo", "\x1F""f\x1Foo");
+ assertSnippetModifier("o", "foo", "f\x1Fo\x1F\x1Fo\x1F");
+ assertSnippetModifier("r", "bar", "ba\x1Fr\x1F");
+ assertSnippetModifier("foo", "foo foo", "\x1F""foo\x1F \x1F""foo\x1F");
+ assertSnippetModifier("aa", "aaaaaa", "\x1F""aa\x1F\x1F""aa\x1F\x1F""aa\x1F");
+ assertSnippetModifier("ab", "abcd\x1F""efgh", "\x1F""ab\x1F""cd\x1F""efgh");
+ assertSnippetModifier("ef", "abcd\x1F""efgh", "abcd\x1F\x1F""ef\x1Fgh");
+ assertSnippetModifier("fg", "abcd\x1F""efgh", "abcd\x1F""e\x1F""fg\x1Fh");
+ // the separator overlapping the match is skipped
+ assertSnippetModifier("cdef", "abcd\x1F""efgh", "ab\x1F""cdef\x1F""gh");
+ // no hits
+ assertSnippetModifier("bb", "aaaaaa", "aaaaaa");
+
+
+ // multiple query terms
+ assertSnippetModifier(StringList().add("ab").add("cd"), "abcd", "\x1F""ab\x1F\x1F""cd\x1F");
+ // when we have overlap we only get the first match
+ assertSnippetModifier(StringList().add("ab").add("bc"), "abcd", "\x1F""ab\x1F""cd");
+ assertSnippetModifier(StringList().add("bc").add("ab"), "abcd", "\x1F""ab\x1F""cd");
+ // the separator overlapping the match is skipped
+ assertSnippetModifier(StringList().add("de").add("ef"), "abcd\x1F""efgh", "abc\x1F""de\x1F""fgh");
+
+ // cjk
+ assertSnippetModifier("\xe7\x9f\xb3", "\xe7\x9f\xb3\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8",
+ "\x1f\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8");
+ assertSnippetModifier("\xe6\x98\x8e\xe5\x87\xb1", "\xe7\x9f\xb3\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8",
+ "\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\x1f\xe5\x9c\xa8");
+ // the separator overlapping the match is skipped
+ assertSnippetModifier("\xe6\x98\x8e\xe5\x87\xb1", "\xe7\x9f\xb3\xe6\x98\x8e\x1f\xe5\x87\xb1\xe5\x9c\xa8",
+ "\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\x1f\xe5\x9c\xa8");
+
+ { // check that resizing works
+ UTF8SubstringSnippetModifier mod;
+ EXPECT_EQUAL(mod.getModifiedBuf().getLength(), 32u);
+ EXPECT_EQUAL(mod.getModifiedBuf().getPos(), 0u);
+ performSearch(mod, StringList().add("a"), StringFieldValue("aaaaaaaaaaaaaaaa"));
+ EXPECT_EQUAL(mod.getModifiedBuf().getPos(), 16u + 2 * 16u);
+ EXPECT_TRUE(mod.getModifiedBuf().getLength() >= mod.getModifiedBuf().getPos());
+ }
+}
+
+void
+SearcherTest::testSnippetModifier()
+{
+ { // string field value
+ SnippetModifierSetup sms(StringList().add("ab"));
+ // multiple invokations
+ assertSnippetModifier(sms, StringFieldValue("ab"), "\x1F""ab\x1F");
+ assertSnippetModifier(sms, StringFieldValue("xxxxabxxxxabxxxx"), "xxxx\x1F""ab\x1Fxxxx\x1F""ab\x1Fxxxx");
+ assertSnippetModifier(sms, StringFieldValue("xxabxx"), "xx\x1F""ab\x1Fxx");
+ }
+ { // collection field value
+ SnippetModifierSetup sms(StringList().add("ab"));
+ // multiple invokations
+ assertSnippetModifier(sms, getFieldValue(StringList().add("ab")), "\x1F""ab\x1F");
+ assertSnippetModifier(sms, getFieldValue(StringList().add("xxabxx")), "xx\x1F""ab\x1Fxx");
+ assertSnippetModifier(sms, getFieldValue(StringList().add("ab").add("xxabxx").add("xxxxxx")),
+ "\x1F""ab\x1F\x1E""xx\x1F""ab\x1F""xx\x1E""xxxxxx");
+ assertSnippetModifier(sms, getFieldValue(StringList().add("cd").add("ef").add("gh")),
+ "cd\x1E""ef\x1E""gh");
+ }
+ { // check that resizing works
+ SnippetModifierSetup sms(StringList().add("a"));
+ EXPECT_EQUAL(sms.modifier.getValueBuf().getLength(), 32u);
+ EXPECT_EQUAL(sms.modifier.getValueBuf().getPos(), 0u);
+ sms.modifier.modify(StringFieldValue("aaaaaaaaaaaaaaaa"));
+ EXPECT_EQUAL(sms.modifier.getValueBuf().getPos(), 16u + 2 * 16u);
+ EXPECT_TRUE(sms.modifier.getValueBuf().getLength() >= sms.modifier.getValueBuf().getPos());
+ }
+}
+
+void
+SearcherTest::testFieldSearchSpec()
+{
+ {
+ FieldSearchSpec f;
+ EXPECT_FALSE(f.valid());
+ EXPECT_EQUAL(0u, f.id());
+ EXPECT_EQUAL("", f.name());
+ EXPECT_EQUAL(0x100000u, f.maxLength());
+ }
+ {
+ FieldSearchSpec f(7, "f0", VsmfieldsConfig::Fieldspec::AUTOUTF8, "substring", 789);
+ EXPECT_TRUE(f.valid());
+ EXPECT_EQUAL(7u, f.id());
+ EXPECT_EQUAL("f0", f.name());
+ EXPECT_EQUAL(789u, f.maxLength());
+ EXPECT_EQUAL(789u, f.searcher().maxFieldLength());
+ }
+}
+
+void
+SearcherTest::testSnippetModifierManager()
+{
+ FieldSearchSpecMapT specMap;
+ specMap[0] = FieldSearchSpec(0, "f0", VsmfieldsConfig::Fieldspec::AUTOUTF8, "substring", 1000);
+ specMap[1] = FieldSearchSpec(1, "f1", VsmfieldsConfig::Fieldspec::AUTOUTF8, "", 1000);
+ IndexFieldMapT indexMap;
+ indexMap["i0"].push_back(0);
+ indexMap["i1"].push_back(1);
+ indexMap["i2"].push_back(0);
+ indexMap["i2"].push_back(1);
+
+ {
+ SnippetModifierManager man;
+ Query query(StringList().add("i0:foo"));
+ man.setup(query.qtl, specMap, indexMap);
+ assertQueryTerms(man, 0, StringList().add("foo"));
+ assertQueryTerms(man, 1, StringList());
+ }
+ {
+ SnippetModifierManager man;
+ Query query(StringList().add("i1:foo"));
+ man.setup(query.qtl, specMap, indexMap);
+ assertQueryTerms(man, 0, StringList());
+ assertQueryTerms(man, 1, StringList());
+ }
+ {
+ SnippetModifierManager man;
+ Query query(StringList().add("i1:*foo*"));
+ man.setup(query.qtl, specMap, indexMap);
+ assertQueryTerms(man, 0, StringList());
+ assertQueryTerms(man, 1, StringList().add("foo"));
+ }
+ {
+ SnippetModifierManager man;
+ Query query(StringList().add("i2:foo").add("i2:*bar*"));
+ man.setup(query.qtl, specMap, indexMap);
+ assertQueryTerms(man, 0, StringList().add("foo").add("bar"));
+ assertQueryTerms(man, 1, StringList().add("bar"));
+ }
+ { // check buffer sizes
+ SnippetModifierManager man;
+ Query query(StringList().add("i2:foo").add("i2:*bar*"));
+ man.setup(query.qtl, specMap, indexMap);
+ {
+ SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(0));
+ UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get();
+ EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u);
+ EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u);
+ }
+ {
+ SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(1));
+ UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get();
+ EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u);
+ EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u);
+ }
+ }
+}
+
+void
+SearcherTest::testStripIndexes()
+{
+ EXPECT_EQUAL("f", FieldSearchSpecMap::stripNonFields("f"));
+ EXPECT_EQUAL("f", FieldSearchSpecMap::stripNonFields("f[0]"));
+ EXPECT_EQUAL("f[a]", FieldSearchSpecMap::stripNonFields("f[a]"));
+
+ EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{a}"));
+ EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{a0}"));
+ EXPECT_EQUAL("f{a 0}", FieldSearchSpecMap::stripNonFields("f{a 0}"));
+ EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{\"a 0\"}"));
+}
+
+void
+SearcherTest::requireThatCountWordsIsWorking()
+{
+ EXPECT_TRUE(assertCountWords(0, ""));
+ EXPECT_TRUE(assertCountWords(0, "?"));
+ EXPECT_TRUE(assertCountWords(1, "foo"));
+ EXPECT_TRUE(assertCountWords(2, "foo bar"));
+ EXPECT_TRUE(assertCountWords(2, "? foo bar"));
+ EXPECT_TRUE(assertCountWords(2, "foo bar ?"));
+
+ // check that 'a' is counted as 1 word
+ UTF8StrChrFieldSearcher fs(0);
+ StringList field = StringList().add("a").add("aa bb cc");
+ assertString(fs, "bb", field, Hits().add(2));
+ assertString(fs, StringList().add("bb").add("not"), field, HitsList().add(Hits().add(2)).add(Hits()));
+}
+
+int
+SearcherTest::Main()
+{
+ TEST_INIT("searcher_test");
+
+ testFieldSearchSpec();
+ testParseTerm();
+ testMatchTermSuffix();
+ testStrChrFieldSearcher();
+ testUTF8SubStringFieldSearcher();
+ testUTF8SuffixStringFieldSearcher();
+ testUTF8FlexibleStringFieldSearcher();
+ testUTF8ExactStringFieldSearcher();
+ testIntFieldSearcher();
+ testFloatFieldSearcher();
+
+ testSnippetModifierSearcher();
+ testSnippetModifier();
+ testSnippetModifierManager();
+ testStripIndexes();
+ requireThatCountWordsIsWorking();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(vsm::SearcherTest);
+
diff --git a/vsm/src/tests/textutil/.gitignore b/vsm/src/tests/textutil/.gitignore
new file mode 100644
index 00000000000..1103f79800a
--- /dev/null
+++ b/vsm/src/tests/textutil/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+textutil_test
+vsm_textutil_test_app
diff --git a/vsm/src/tests/textutil/CMakeLists.txt b/vsm/src/tests/textutil/CMakeLists.txt
new file mode 100644
index 00000000000..c3169a842f0
--- /dev/null
+++ b/vsm/src/tests/textutil/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vsm_textutil_test_app
+ SOURCES
+ textutil.cpp
+ DEPENDS
+ vsm
+)
+vespa_add_test(NAME vsm_textutil_test_app COMMAND vsm_textutil_test_app)
diff --git a/vsm/src/tests/textutil/DESC b/vsm/src/tests/textutil/DESC
new file mode 100644
index 00000000000..e1a0220f550
--- /dev/null
+++ b/vsm/src/tests/textutil/DESC
@@ -0,0 +1 @@
+Tests of text utils used during searching. Take a look at textutil.cpp for details.
diff --git a/vsm/src/tests/textutil/FILES b/vsm/src/tests/textutil/FILES
new file mode 100644
index 00000000000..f1b37f6aaec
--- /dev/null
+++ b/vsm/src/tests/textutil/FILES
@@ -0,0 +1 @@
+textutil.cpp
diff --git a/vsm/src/tests/textutil/textutil.cpp b/vsm/src/tests/textutil/textutil.cpp
new file mode 100644
index 00000000000..ba6a276eb49
--- /dev/null
+++ b/vsm/src/tests/textutil/textutil.cpp
@@ -0,0 +1,278 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("textutil_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/fastlib/text/unicodeutil.h>
+#include <vespa/searchlib/query/base.h>
+#include <vespa/vsm/searcher/fold.h>
+#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h>
+#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h>
+
+using search::byte; // unsigned char
+
+namespace vsm {
+
+template <typename T>
+class Vector : public std::vector<T>
+{
+public:
+ Vector<T>() : std::vector<T>() {}
+ Vector<T> & a(T v) { this->push_back(v); return *this; }
+};
+
+typedef Vector<ucs4_t> UCS4V;
+typedef Vector<size_t> SizeV;
+typedef UTF8StringFieldSearcherBase SFSB;
+typedef FUTF8StrChrFieldSearcher FSFS;
+
+class TextUtilTest : public vespalib::TestApp
+{
+private:
+ ucs4_t getUTF8Char(const char * src);
+ template <typename BW, bool OFF>
+ void assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets);
+ void assertAnsiFold(const std::string & toFold, const std::string & exp);
+ void assertAnsiFold(char c, char exp);
+ void assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp);
+ void assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded = 16);
+
+ template <typename BW, bool OFF>
+ void testSkipSeparators();
+ void testSkipSeparators();
+ void testSeparatorCharacter();
+ void testAnsiFold();
+ void test_lfoldua();
+ void test_sse2_foldua();
+
+public:
+ int Main();
+};
+
+ucs4_t
+TextUtilTest::getUTF8Char(const char * src)
+{
+ ucs4_t retval = Fast_UnicodeUtil::GetUTF8Char(src);
+ ASSERT_TRUE(retval != Fast_UnicodeUtil::_BadUTF8Char);
+ return retval;
+}
+
+template <typename BW, bool OFF>
+void
+TextUtilTest::assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets)
+{
+ const byte * srcbuf = reinterpret_cast<const byte *>(input);
+ ucs4_t dstbuf[len];
+ size_t offsets[len];
+ UTF8StrChrFieldSearcher fs;
+ BW bw(dstbuf, offsets);
+ size_t dstlen = fs.skipSeparators(srcbuf, len, bw);
+ EXPECT_EQUAL(dstlen, expdstbuf.size());
+ ASSERT_TRUE(dstlen == expdstbuf.size());
+ for (size_t i = 0; i < dstlen; ++i) {
+ EXPECT_EQUAL(dstbuf[i], expdstbuf[i]);
+ if (OFF) {
+ EXPECT_EQUAL(offsets[i], expoffsets[i]);
+ }
+ }
+}
+
+void
+TextUtilTest::assertAnsiFold(const std::string & toFold, const std::string & exp)
+{
+ char folded[256];
+ EXPECT_TRUE(FSFS::ansiFold(toFold.c_str(), toFold.size(), folded));
+ EXPECT_EQUAL(std::string(folded, toFold.size()), exp);
+}
+
+void
+TextUtilTest::assertAnsiFold(char c, char exp)
+{
+ char folded;
+ EXPECT_TRUE(FSFS::ansiFold(&c, 1, &folded));
+ EXPECT_EQUAL((int32_t)folded, (int32_t)exp);
+}
+
+void
+TextUtilTest::assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp)
+{
+ char folded[256];
+ size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10);
+ const unsigned char * toFoldOrg = reinterpret_cast<const unsigned char *>(toFold.c_str());
+ const unsigned char * retval =
+ sse2_foldua(toFoldOrg, toFold.size(), reinterpret_cast<unsigned char *>(folded + alignedStart));
+ EXPECT_EQUAL((size_t)(retval - toFoldOrg), charFolded);
+ EXPECT_EQUAL(std::string(folded + alignedStart, charFolded), exp);
+}
+
+void
+TextUtilTest::assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded)
+{
+ unsigned char toFold[16];
+ memset(toFold, c, 16);
+ unsigned char folded[32];
+ size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10);
+ const unsigned char * retval = sse2_foldua(toFold, 16, folded + alignedStart);
+ EXPECT_EQUAL((size_t)(retval - toFold), charFolded);
+ for (size_t i = 0; i < charFolded; ++i) {
+ EXPECT_EQUAL((int32_t)folded[i + alignedStart], (int32_t)exp);
+ }
+}
+
+template <typename BW, bool OFF>
+void
+TextUtilTest::testSkipSeparators()
+{
+ // ascii characters
+ assertSkipSeparators<BW, OFF>("foo", 3, UCS4V().a('f').a('o').a('o'), SizeV().a(0).a(1).a(2));
+ assertSkipSeparators<BW, OFF>("f\x1Fo", 3, UCS4V().a('f').a('o'), SizeV().a(0).a(2));
+ assertSkipSeparators<BW, OFF>("f\no", 3, UCS4V().a('f').a('\n').a('o'), SizeV().a(0).a(1).a(2));
+ assertSkipSeparators<BW, OFF>("f\to", 3, UCS4V().a('f').a('\t').a('o'), SizeV().a(0).a(1).a(2));
+
+ // utf8 char
+ assertSkipSeparators<BW, OFF>("\xC2\x80\x66", 3, UCS4V().a(getUTF8Char("\xC2\x80")).a('f'),
+ SizeV().a(0).a(2));
+ assertSkipSeparators<BW, OFF>("\xE0\xA0\x80\x66", 4, UCS4V().a(getUTF8Char("\xE0\xA0\x80")).a('f'),
+ SizeV().a(0).a(3));
+ assertSkipSeparators<BW, OFF>("\xF0\x90\x80\x80\x66", 5, UCS4V().a(getUTF8Char("\xF0\x90\x80\x80")).a('f'),
+ SizeV().a(0).a(4));
+
+ // replacement string (sharp s -> ss)
+ assertSkipSeparators<BW, OFF>("\xC3\x9F\x66\xC3\x9F", 5, UCS4V().a('s').a('s').a('f').a('s').a('s'),
+ SizeV().a(0).a(0).a(2).a(3).a(3));
+}
+
+void
+TextUtilTest::testSkipSeparators()
+{
+ Fast_NormalizeWordFolder::Setup(Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION);
+
+ testSkipSeparators<SFSB::BufferWrapper, false>();
+ testSkipSeparators<SFSB::OffsetWrapper, true>();
+}
+
+void
+TextUtilTest::testSeparatorCharacter()
+{
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x00'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x01'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x02'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x03'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x04'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x05'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x06'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x07'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x08'));
+ EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x09')); // '\t'
+ EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x0a')); // '\n'
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0b'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0c'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0d'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0e'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0f'));
+
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x10'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x11'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x12'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x13'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x14'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x15'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x16'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x17'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x18'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x19'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1a'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1b'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1c'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1d'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1e'));
+ EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1f'));
+
+ EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x20')); // space
+}
+
+void
+TextUtilTest::testAnsiFold()
+{
+ FieldSearcher::init();
+ assertAnsiFold("", "");
+ assertAnsiFold("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz");
+ assertAnsiFold("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz");
+ assertAnsiFold("0123456789", "0123456789");
+ for (int i = 0; i < 128; ++i) {
+ if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) {
+ assertAnsiFold(i, i);
+ } else if (i >= 'A' && i <= 'Z') {
+ assertAnsiFold(i, i + 32);
+ } else {
+ assertAnsiFold(i, 0);
+ }
+ }
+
+ // non-ascii is ignored
+ for (int i = 128; i < 256; ++i) {
+ char toFold = i;
+ char folded;
+ EXPECT_TRUE(!FSFS::ansiFold(&toFold, 1, &folded));
+ }
+}
+
+void
+TextUtilTest::test_lfoldua()
+{
+ FieldSearcher::init();
+ char folded[256];
+ size_t alignedStart = 0;
+ const char * toFold = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ size_t len = strlen(toFold);
+ EXPECT_TRUE(FSFS::lfoldua(toFold, len, folded, alignedStart));
+ EXPECT_EQUAL(std::string(folded + alignedStart, len), "abcdefghijklmnopqrstuvwxyz");
+}
+
+void
+TextUtilTest::test_sse2_foldua()
+{
+ assert_sse2_foldua("", 0, "");
+ assert_sse2_foldua("ABCD", 0, "");
+ assert_sse2_foldua("ABCDEFGHIJKLMNO", 0, "");
+ assert_sse2_foldua("ABCDEFGHIJKLMNOP", 16, "abcdefghijklmnop");
+ assert_sse2_foldua("ABCDEFGHIJKLMNOPQ", 16, "abcdefghijklmnop");
+ assert_sse2_foldua("KLMNOPQRSTUVWXYZ", 16, "klmnopqrstuvwxyz");
+ assert_sse2_foldua("abcdefghijklmnop", 16, "abcdefghijklmnop");
+ assert_sse2_foldua("klmnopqrstuvwxyz", 16, "klmnopqrstuvwxyz");
+ assert_sse2_foldua("0123456789abcdef", 16, "0123456789abcdef");
+
+ for (int i = 0; i < 128; ++i) {
+ if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) {
+ assert_sse2_foldua(i, i);
+ } else if (i >= 'A' && i <= 'Z') {
+ assert_sse2_foldua(i, i + 32);
+ } else {
+ assert_sse2_foldua(i, 0);
+ }
+ }
+
+ // non-ascii is ignored
+ for (int i = 128; i < 256; ++i) {
+ assert_sse2_foldua(i, '?', 0);
+ }
+}
+
+int
+TextUtilTest::Main()
+{
+ TEST_INIT("textutil_test");
+
+ testSkipSeparators();
+ testSeparatorCharacter();
+ testAnsiFold();
+ test_lfoldua();
+ test_sse2_foldua();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(vsm::TextUtilTest);
diff --git a/vsm/src/tests/utilapps/.gitignore b/vsm/src/tests/utilapps/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/vsm/src/tests/utilapps/.gitignore