diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-15 00:40:43 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-15 00:40:43 +0200 |
commit | dacf557add1c6a3ffab036cdf2f7dfdf9750b22e (patch) | |
tree | 3a9dfff58b98898e2e28c0337925f4f04e5eaeb0 /streamingvisitors | |
parent | 2722ce9d1d1ec12d57ebd3833ce37b0958afb752 (diff) |
Revert "Collapse vsm into streamingvisitors"
Diffstat (limited to 'streamingvisitors')
98 files changed, 7 insertions, 7669 deletions
diff --git a/streamingvisitors/CMakeLists.txt b/streamingvisitors/CMakeLists.txt index 77ce4b471c0..5f5e16fc6c3 100644 --- a/streamingvisitors/CMakeLists.txt +++ b/streamingvisitors/CMakeLists.txt @@ -7,27 +7,17 @@ vespa_define_module( storage storageapi config_cloudconfig - vespalib document + vespalib vdslib - searchlib - searchsummary + vsm LIBS src/vespa/searchvisitor - src/vespa/vsm/common - src/vespa/vsm/config - src/vespa/vsm/searcher - src/vespa/vsm/vsm TESTS src/tests/hitcollector src/tests/matching_elements_filler src/tests/querywrapper src/tests/searchvisitor - src/tests/charbuffer - src/tests/docsum - src/tests/document - src/tests/searcher - src/tests/textutil ) diff --git a/streamingvisitors/pom.xml b/streamingvisitors/pom.xml deleted file mode 100644 index 2cc777be593..00000000000 --- a/streamingvisitors/pom.xml +++ /dev/null @@ -1,44 +0,0 @@ -<!-- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 - http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - <parent> - <groupId>com.yahoo.vespa</groupId> - <artifactId>parent</artifactId> - <version>7-SNAPSHOT</version> - <relativePath>../parent/pom.xml</relativePath> - </parent> - <artifactId>vsm</artifactId> - <version>7-SNAPSHOT</version> - <packaging>jar</packaging> - <name>${project.artifactId}</name> - <dependencies> - <dependency> - <groupId>com.yahoo.vespa</groupId> - <artifactId>config-lib</artifactId> - <version>${project.version}</version> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <groupId>com.yahoo.vespa</groupId> - <artifactId>config-class-plugin</artifactId> - <version>${project.version}</version> - <configuration> - <defFilesDirectories>src/vespa/vsm/config/</defFilesDirectories> - </configuration> - <executions> - <execution> - <id>config-gen</id> - <goals> - <goal>config-gen</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> -</project> diff --git a/streamingvisitors/src/tests/charbuffer/.gitignore b/streamingvisitors/src/tests/charbuffer/.gitignore deleted file mode 100644 index 2c980038fb5..00000000000 --- a/streamingvisitors/src/tests/charbuffer/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.depend -Makefile -charbuffer_test -vsm_charbuffer_test_app diff --git a/streamingvisitors/src/tests/charbuffer/CMakeLists.txt b/streamingvisitors/src/tests/charbuffer/CMakeLists.txt deleted file mode 100644 index 5d0c0068d37..00000000000 --- a/streamingvisitors/src/tests/charbuffer/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vsm_charbuffer_test_app TEST - SOURCES - charbuffer.cpp - DEPENDS - streamingvisitors -) -vespa_add_test(NAME vsm_charbuffer_test_app COMMAND vsm_charbuffer_test_app) diff --git a/streamingvisitors/src/tests/charbuffer/charbuffer.cpp b/streamingvisitors/src/tests/charbuffer/charbuffer.cpp deleted file mode 100644 index 736d35459cb..00000000000 --- a/streamingvisitors/src/tests/charbuffer/charbuffer.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> - -#include <vespa/vsm/common/charbuffer.h> - -namespace vsm { - -class CharBufferTest : public vespalib::TestApp -{ -private: - void test(); -public: - int Main() override; -}; - -void -CharBufferTest::test() -{ - { // empty - CharBuffer buf; - EXPECT_EQUAL(buf.getLength(), 0u); - EXPECT_EQUAL(buf.getPos(), 0u); - EXPECT_EQUAL(buf.getRemaining(), 0u); - } - { // explicit length - CharBuffer buf(8); - EXPECT_EQUAL(buf.getLength(), 8u); - EXPECT_EQUAL(buf.getPos(), 0u); - EXPECT_EQUAL(buf.getRemaining(), 8u); - } - { // resize - CharBuffer buf(8); - EXPECT_EQUAL(buf.getLength(), 8u); - buf.resize(16); - EXPECT_EQUAL(buf.getLength(), 16u); - buf.resize(8); - EXPECT_EQUAL(buf.getLength(), 16u); - } - { // put with triggered resize - CharBuffer buf(8); - buf.put("123456", 6); - EXPECT_EQUAL(buf.getLength(), 8u); - EXPECT_EQUAL(buf.getPos(), 6u); - EXPECT_EQUAL(buf.getRemaining(), 2u); - EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456"); - buf.put("789", 3); - EXPECT_EQUAL(buf.getLength(), 12u); - EXPECT_EQUAL(buf.getPos(), 9u); - EXPECT_EQUAL(buf.getRemaining(), 3u); - EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456789"); - buf.put('a'); - EXPECT_EQUAL(buf.getLength(), 12u); - EXPECT_EQUAL(buf.getPos(), 10u); - EXPECT_EQUAL(buf.getRemaining(), 2u); - EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "123456789a"); - buf.reset(); - EXPECT_EQUAL(buf.getLength(), 12u); - EXPECT_EQUAL(buf.getPos(), 0u); - EXPECT_EQUAL(buf.getRemaining(), 12u); - buf.put("bcd", 3); - EXPECT_EQUAL(buf.getLength(), 12u); - EXPECT_EQUAL(buf.getPos(), 3u); - EXPECT_EQUAL(buf.getRemaining(), 9u); - EXPECT_EQUAL(std::string(buf.getBuffer(), buf.getPos()), "bcd"); - } -} - -int -CharBufferTest::Main() -{ - TEST_INIT("charbuffer_test"); - - test(); - - TEST_DONE(); -} - -} - -TEST_APPHOOK(vsm::CharBufferTest); diff --git a/streamingvisitors/src/tests/config/mail.cfg b/streamingvisitors/src/tests/config/mail.cfg deleted file mode 100644 index ce830beac23..00000000000 --- a/streamingvisitors/src/tests/config/mail.cfg +++ /dev/null @@ -1,116 +0,0 @@ -datatype[2] -datatype[0].id 1012 -datatype[0].arraytype[1] -datatype[0].arraytype[0].datatype 12 -datatype[1].id 1013 -datatype[1].arraytype[1] -datatype[1].arraytype[0].datatype 13 -documenttype[1] -documenttype[0].name mail -documenttype[0].version 0 -documenttype[0].inherits[0] -documenttype[0].field[26] -documenttype[0].field[0].name mailid -documenttype[0].field[0].id 2 -documenttype[0].field[0].header true -documenttype[0].field[0].datatype 2 -documenttype[0].field[1].name date -documenttype[0].field[1].id 3 -documenttype[0].field[1].header true -documenttype[0].field[1].datatype 0 -documenttype[0].field[2].name from -documenttype[0].field[2].id 4 -documenttype[0].field[2].header true -documenttype[0].field[2].datatype 12 -documenttype[0].field[3].name replyto -documenttype[0].field[3].id 5 -documenttype[0].field[3].header true -documenttype[0].field[3].datatype 12 -documenttype[0].field[4].name to -documenttype[0].field[4].id 6 -documenttype[0].field[4].header true -documenttype[0].field[4].datatype 12 -documenttype[0].field[5].name cc -documenttype[0].field[5].id 7 -documenttype[0].field[5].header true -documenttype[0].field[5].datatype 12 -documenttype[0].field[6].name bcc -documenttype[0].field[6].id 8 -documenttype[0].field[6].header true -documenttype[0].field[6].datatype 12 -documenttype[0].field[7].name subject -documenttype[0].field[7].id 9 -documenttype[0].field[7].header true -documenttype[0].field[7].datatype 12 -documenttype[0].field[8].name body -documenttype[0].field[8].id 10 -documenttype[0].field[8].header false -documenttype[0].field[8].datatype 12 -documenttype[0].field[9].name attachmentcount -documenttype[0].field[9].id 11 -documenttype[0].field[9].header false -documenttype[0].field[9].datatype 0 -documenttype[0].field[10].name attachmentpartids -documenttype[0].field[10].id 12 -documenttype[0].field[10].header false -documenttype[0].field[10].datatype 2 -documenttype[0].field[11].name attachmentsizes -documenttype[0].field[11].id 13 -documenttype[0].field[11].header false -documenttype[0].field[11].datatype 2 -documenttype[0].field[12].name attachmentnames -documenttype[0].field[12].id 14 -documenttype[0].field[12].header false -documenttype[0].field[12].datatype 2 -documenttype[0].field[13].name attachmenttypes -documenttype[0].field[13].id 15 -documenttype[0].field[13].header false -documenttype[0].field[13].datatype 2 -documenttype[0].field[14].name attachmentlanguages -documenttype[0].field[14].id 16 -documenttype[0].field[14].header false -documenttype[0].field[14].datatype 2 -documenttype[0].field[15].name attachmentcontent -documenttype[0].field[15].id 17 -documenttype[0].field[15].header false -documenttype[0].field[15].datatype 2 -documenttype[0].field[16].name bodylanguage -documenttype[0].field[16].id 18 -documenttype[0].field[16].header false -documenttype[0].field[16].datatype 2 -documenttype[0].field[17].name bodyencoding -documenttype[0].field[17].id 19 -documenttype[0].field[17].header false -documenttype[0].field[17].datatype 2 -documenttype[0].field[18].name collectionid -documenttype[0].field[18].id 20 -documenttype[0].field[18].header true -documenttype[0].field[18].datatype 4 -documenttype[0].field[19].name content -documenttype[0].field[19].id 21 -documenttype[0].field[19].header true -documenttype[0].field[19].datatype 12 -documenttype[0].field[20].name bodymeta -documenttype[0].field[20].id 50027053 -documenttype[0].field[20].header false -documenttype[0].field[20].datatype 13 -documenttype[0].field[21].name attachments -documenttype[0].field[21].id 1081629685 -documenttype[0].field[21].header false -documenttype[0].field[21].datatype 1012 -documenttype[0].field[22].name attachmentsmeta -documenttype[0].field[22].id 1203055625 -documenttype[0].field[22].header false -documenttype[0].field[22].datatype 1013 -documenttype[0].field[23].name tolist -documenttype[0].field[23].id 1084918181 -documenttype[0].field[23].header false -documenttype[0].field[23].datatype 1012 -documenttype[0].field[24].name cclist -documenttype[0].field[24].id 1733332403 -documenttype[0].field[24].header false -documenttype[0].field[24].datatype 1012 -documenttype[0].field[25].name bcclist -documenttype[0].field[25].id 410546306 -documenttype[0].field[25].header false -documenttype[0].field[25].datatype 1012 diff --git a/streamingvisitors/src/tests/config/vsm.cfg b/streamingvisitors/src/tests/config/vsm.cfg deleted file mode 100644 index dc50447f623..00000000000 --- a/streamingvisitors/src/tests/config/vsm.cfg +++ /dev/null @@ -1,3 +0,0 @@ -doctype file:../config/mail.cfg -storagecfg "" -vsmfields file:../config/vsmfields.cfg diff --git a/streamingvisitors/src/tests/config/vsmfields.cfg b/streamingvisitors/src/tests/config/vsmfields.cfg deleted file mode 100644 index 30f1c8ed8b1..00000000000 --- a/streamingvisitors/src/tests/config/vsmfields.cfg +++ /dev/null @@ -1,297 +0,0 @@ -threadsperquery 4 -documentverificationlevel=0 -searchall 1 -fieldspec[17] -fieldspec[0].name bcc -fieldspec[0].searchmethod AUTOUTF8 -fieldspec[0].arg1 "" -fieldspec[1].name cc -fieldspec[1].searchmethod AUTOUTF8 -fieldspec[1].arg1 "" -fieldspec[2].name from -fieldspec[2].searchmethod AUTOUTF8 -fieldspec[2].arg1 "" -fieldspec[3].name date -fieldspec[3].searchmethod INT32 -fieldspec[3].arg1 "" -fieldspec[4].name replyto -fieldspec[4].searchmethod AUTOUTF8 -fieldspec[4].arg1 "" -fieldspec[5].name subject -fieldspec[5].searchmethod AUTOUTF8 -fieldspec[5].arg1 "" -fieldspec[6].name to -fieldspec[6].searchmethod AUTOUTF8 -fieldspec[6].arg1 "" -fieldspec[7].name body -fieldspec[7].searchmethod AUTOUTF8 -fieldspec[7].arg1 "" -fieldspec[8].name bodymeta -fieldspec[8].searchmethod AUTOUTF8 -fieldspec[8].arg1 "" -fieldspec[9].name mailid -fieldspec[9].searchmethod AUTOUTF8 -fieldspec[9].arg1 "" -fieldspec[10].name attachmentcount -fieldspec[10].searchmethod INT32 -fieldspec[10].arg1 "" -fieldspec[11].name attachmentcontent -fieldspec[11].searchmethod AUTOUTF8 -fieldspec[11].arg1 "" -fieldspec[12].name attachmenttypes -fieldspec[12].searchmethod AUTOUTF8 -fieldspec[12].arg1 "" -fieldspec[13].name attachmentnames -fieldspec[13].searchmethod AUTOUTF8 -fieldspec[13].arg1 "" -fieldspec[14].name attachmentlanguages -fieldspec[14].searchmethod AUTOUTF8 -fieldspec[14].arg1 "" -fieldspec[15].name URI -fieldspec[15].searchmethod AUTOUTF8 -fieldspec[15].arg1 "" -fieldspec[16].name vsm_whichfieldmatched -fieldspec[16].searchmethod AUTOUTF8 -fieldspec[16].arg1 "" -index[26] -index[0].name default -index[0].field[10] -index[0].field[0].name from -index[0].field[1].name to -index[0].field[2].name cc -index[0].field[3].name bcc -index[0].field[4].name subject -index[0].field[5].name body -index[0].field[6].name attachmentcontent -index[0].field[7].name attachmentnames -index[0].field[8].name attachmenttypes -index[0].field[9].name date -index[1].name all -index[1].field[8] -index[1].field[0].name to -index[1].field[1].name cc -index[1].field[2].name bcc -index[1].field[3].name subject -index[1].field[4].name body -index[1].field[5].name attachmentcontent -index[1].field[6].name attachmentnames -index[1].field[7].name attachmenttypes -index[2].name header -index[2].field[6] -index[2].field[0].name from -index[2].field[1].name replyto -index[2].field[2].name to -index[2].field[3].name cc -index[2].field[4].name bcc -index[2].field[5].name subject -index[3].name senders -index[3].field[2] -index[3].field[0].name from -index[3].field[1].name replyto -index[4].name recipients -index[4].field[3] -index[4].field[0].name to -index[4].field[1].name cc -index[4].field[2].name bcc -index[5].name address -index[5].field[5] -index[5].field[0].name from -index[5].field[1].name replyto -index[5].field[2].name to -index[5].field[3].name cc -index[5].field[4].name bcc -index[6].name body -index[6].field[2] -index[6].field[0].name subject -index[6].field[1].name body -index[7].name meta -index[7].field[2] -index[7].field[0].name attachmentcontent -index[7].field[1].name attachmenttypes -index[8].name index1 -index[8].field[1] -index[8].field[0].name bcc -index[9].name index2 -index[9].field[2] -index[9].field[0].name bcc -index[9].field[1].name cc -index[10].name index3 -index[10].field[3] -index[10].field[0].name bcc -index[10].field[1].name cc -index[10].field[2].name from -index[11].name index4 -index[11].field[4] -index[11].field[0].name bcc -index[11].field[1].name cc -index[11].field[2].name from -index[11].field[3].name date -index[12].name index5 -index[12].field[5] -index[12].field[0].name bcc -index[12].field[1].name cc -index[12].field[2].name from -index[12].field[3].name date -index[12].field[4].name replyto -index[13].name index6 -index[13].field[6] -index[13].field[0].name bcc -index[13].field[1].name cc -index[13].field[2].name from -index[13].field[3].name date -index[13].field[4].name replyto -index[13].field[5].name subject -index[14].name index7 -index[14].field[7] -index[14].field[0].name bcc -index[14].field[1].name cc -index[14].field[2].name from -index[14].field[3].name date -index[14].field[4].name replyto -index[14].field[5].name subject -index[14].field[6].name to -index[15].name index8 -index[15].field[8] -index[15].field[0].name bcc -index[15].field[1].name cc -index[15].field[2].name from -index[15].field[3].name date -index[15].field[4].name replyto -index[15].field[5].name subject -index[15].field[6].name to -index[15].field[7].name body -index[16].name index9 -index[16].field[9] -index[16].field[0].name bcc -index[16].field[1].name cc -index[16].field[2].name from -index[16].field[3].name date -index[16].field[4].name replyto -index[16].field[5].name subject -index[16].field[6].name to -index[16].field[7].name body -index[16].field[8].name bodymeta -index[17].name index10 -index[17].field[10] -index[17].field[0].name bcc -index[17].field[1].name cc -index[17].field[2].name from -index[17].field[3].name date -index[17].field[4].name replyto -index[17].field[5].name subject -index[17].field[6].name to -index[17].field[7].name body -index[17].field[8].name bodymeta -index[17].field[9].name mailid -index[18].name index11 -index[18].field[11] -index[18].field[0].name bcc -index[18].field[1].name cc -index[18].field[2].name from -index[18].field[3].name date -index[18].field[4].name replyto -index[18].field[5].name subject -index[18].field[6].name to -index[18].field[7].name body -index[18].field[8].name bodymeta -index[18].field[9].name mailid -index[18].field[10].name attachmentcount -index[19].name index12 -index[19].field[12] -index[19].field[0].name bcc -index[19].field[1].name cc -index[19].field[2].name from -index[19].field[3].name date -index[19].field[4].name replyto -index[19].field[5].name subject -index[19].field[6].name to -index[19].field[7].name body -index[19].field[8].name bodymeta -index[19].field[9].name mailid -index[19].field[10].name attachmentcount -index[19].field[11].name attachmentcontent -index[20].name index13 -index[20].field[13] -index[20].field[0].name bcc -index[20].field[1].name cc -index[20].field[2].name from -index[20].field[3].name date -index[20].field[4].name replyto -index[20].field[5].name subject -index[20].field[6].name to -index[20].field[7].name body -index[20].field[8].name bodymeta -index[20].field[9].name mailid -index[20].field[10].name attachmentcount -index[20].field[11].name attachmentcontent -index[20].field[12].name attachmenttypes -index[21].name index14 -index[21].field[14] -index[21].field[0].name bcc -index[21].field[1].name cc -index[21].field[2].name from -index[21].field[3].name date -index[21].field[4].name replyto -index[21].field[5].name subject -index[21].field[6].name to -index[21].field[7].name body -index[21].field[8].name bodymeta -index[21].field[9].name mailid -index[21].field[10].name attachmentcount -index[21].field[11].name attachmentcontent -index[21].field[12].name attachmenttypes -index[21].field[13].name attachmentnames -index[22].name index15 -index[22].field[15] -index[22].field[0].name bcc -index[22].field[1].name cc -index[22].field[2].name from -index[22].field[3].name date -index[22].field[4].name replyto -index[22].field[5].name subject -index[22].field[6].name to -index[22].field[7].name body -index[22].field[8].name bodymeta -index[22].field[9].name mailid -index[22].field[10].name attachmentcount -index[22].field[11].name attachmentcontent -index[22].field[12].name attachmenttypes -index[22].field[13].name attachmentnames -index[22].field[14].name attachmentlanguages -index[23].name index16 -index[23].field[15] -index[23].field[0].name bcc -index[23].field[1].name cc -index[23].field[2].name from -index[23].field[3].name date -index[23].field[4].name replyto -index[23].field[5].name subject -index[23].field[6].name to -index[23].field[7].name body -index[23].field[8].name bodymeta -index[23].field[9].name mailid -index[23].field[10].name attachmentcount -index[23].field[11].name attachmentcontent -index[23].field[12].name attachmenttypes -index[23].field[13].name attachmentnames -index[23].field[14].name attachmentlanguages -index[24].name index17 -index[24].field[15] -index[24].field[0].name bcc -index[24].field[1].name cc -index[24].field[2].name from -index[24].field[3].name date -index[24].field[4].name replyto -index[24].field[5].name subject -index[24].field[6].name to -index[24].field[7].name body -index[24].field[8].name bodymeta -index[24].field[9].name mailid -index[24].field[10].name attachmentcount -index[24].field[11].name attachmentcontent -index[24].field[12].name attachmenttypes -index[24].field[13].name attachmentnames -index[24].field[14].name attachmentlanguages -index[25].name date -index[25].field[1] -index[25].field[0].name date diff --git a/streamingvisitors/src/tests/docsum/.gitignore b/streamingvisitors/src/tests/docsum/.gitignore deleted file mode 100644 index 9a697a94de8..00000000000 --- a/streamingvisitors/src/tests/docsum/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.depend -Makefile -docsum_test -vsm_docsum_test_app diff --git a/streamingvisitors/src/tests/docsum/CMakeLists.txt b/streamingvisitors/src/tests/docsum/CMakeLists.txt deleted file mode 100644 index 87c46409053..00000000000 --- a/streamingvisitors/src/tests/docsum/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vsm_docsum_test_app TEST - SOURCES - docsum.cpp - DEPENDS - streamingvisitors -) -vespa_add_test(NAME vsm_docsum_test_app COMMAND vsm_docsum_test_app) diff --git a/streamingvisitors/src/tests/docsum/docsum.cpp b/streamingvisitors/src/tests/docsum/docsum.cpp deleted file mode 100644 index 475489d2f5a..00000000000 --- a/streamingvisitors/src/tests/docsum/docsum.cpp +++ /dev/null @@ -1,293 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/document/fieldvalue/fieldvalues.h> -#include <vespa/document/datatype/structdatatype.h> -#include <vespa/document/datatype/weightedsetdatatype.h> -#include <vespa/document/datatype/mapdatatype.h> -#include <vespa/vsm/common/docsum.h> -#include <vespa/vsm/vsm/flattendocsumwriter.h> -#include <vespa/vsm/vsm/slimefieldwriter.h> - -using namespace document; - -namespace vsm { - -template <typename T> -class Vector : public std::vector<T> -{ -public: - Vector<T> & add(T v) { this->push_back(v); return *this; } -}; - -typedef Vector<std::string> StringList; -typedef Vector<std::pair<std::string, int32_t> > WeightedStringList; - - -class TestDocument : public vsm::Document -{ -private: - std::vector<FieldValueContainer> _fields; - -public: - TestDocument(const search::DocumentIdT & docId, size_t numFields) : vsm::Document(docId, numFields), _fields(numFields) {} - virtual bool setField(FieldIdT fId, document::FieldValue::UP fv) override { - if (fId < _fields.size()) { - _fields[fId].reset(fv.release()); - return true; - } - return false; - } - virtual const document::FieldValue * getField(FieldIdT fId) const override { - if (fId < _fields.size()) { - return _fields[fId].get(); - } - return NULL; - } -}; - - -class DocsumTest : public vespalib::TestApp -{ -private: - ArrayFieldValue createFieldValue(const StringList & fv); - WeightedSetFieldValue createFieldValue(const WeightedStringList & fv); - - void assertFlattenDocsumWriter(const FieldValue & fv, const std::string & exp) { - FlattenDocsumWriter fdw; - assertFlattenDocsumWriter(fdw, fv, exp); - } - void assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValue & fv, const std::string & exp); - void assertSlimeFieldWriter(const FieldValue & fv, const std::string & exp) { - SlimeFieldWriter sfw; - TEST_DO(assertSlimeFieldWriter(sfw, fv, exp)); - } - void assertSlimeFieldWriter(SlimeFieldWriter & sfw, const FieldValue & fv, const std::string & exp); - - void testFlattenDocsumWriter(); - void testSlimeFieldWriter(); - void requireThatSlimeFieldWriterHandlesMap(); - void testDocSumCache(); - -public: - int Main() override; -}; - -ArrayFieldValue -DocsumTest::createFieldValue(const StringList & fv) -{ - - static ArrayDataType type(*DataType::STRING); - ArrayFieldValue afv(type); - for (size_t i = 0; i < fv.size(); ++i) { - afv.add(StringFieldValue(fv[i])); - } - return afv; -} - -WeightedSetFieldValue -DocsumTest::createFieldValue(const WeightedStringList & fv) -{ - static WeightedSetDataType type(*DataType::STRING, false, false); - WeightedSetFieldValue wsfv(type); - for (size_t i = 0; i < fv.size(); ++i) { - wsfv.add(StringFieldValue(fv[i].first), fv[i].second); - } - return wsfv; -} - -void -DocsumTest::assertFlattenDocsumWriter(FlattenDocsumWriter & fdw, const FieldValue & fv, const std::string & exp) -{ - FieldPath empty; - fv.iterateNested(empty.getFullRange(), fdw); - std::string actual(fdw.getResult().getBuffer(), fdw.getResult().getPos()); - EXPECT_EQUAL(actual, exp); -} - -void -DocsumTest::assertSlimeFieldWriter(SlimeFieldWriter & sfw, const FieldValue & fv, const std::string & exp) -{ - sfw.convert(fv); - - vespalib::Slime gotSlime; - vespalib::Memory serialized(sfw.out()); - size_t decodeRes = vespalib::slime::BinaryFormat::decode(serialized, gotSlime); - ASSERT_EQUAL(decodeRes, serialized.size); - - vespalib::Slime expSlime; - size_t used = vespalib::slime::JsonFormat::decode(exp, expSlime); - EXPECT_TRUE(used > 0); - EXPECT_EQUAL(expSlime, gotSlime); -} - -void -DocsumTest::testFlattenDocsumWriter() -{ - { // basic tests - TEST_DO(assertFlattenDocsumWriter(StringFieldValue("foo bar"), "foo bar")); - TEST_DO(assertFlattenDocsumWriter(RawFieldValue("foo bar"), "foo bar")); - TEST_DO(assertFlattenDocsumWriter(BoolFieldValue(true), "true")); - TEST_DO(assertFlattenDocsumWriter(BoolFieldValue(false), "false")); - TEST_DO(assertFlattenDocsumWriter(LongFieldValue(123456789), "123456789")); - TEST_DO(assertFlattenDocsumWriter(createFieldValue(StringList().add("foo bar").add("baz").add(" qux ")), - "foo bar baz qux ")); - } - { // test mulitple invokations - FlattenDocsumWriter fdw("#"); - TEST_DO(assertFlattenDocsumWriter(fdw, StringFieldValue("foo"), "foo")); - TEST_DO(assertFlattenDocsumWriter(fdw, StringFieldValue("bar"), "foo#bar")); - fdw.clear(); - TEST_DO(assertFlattenDocsumWriter(fdw, StringFieldValue("baz"), "baz")); - TEST_DO(assertFlattenDocsumWriter(fdw, StringFieldValue("qux"), "baz qux")); - } - { // test resizing - FlattenDocsumWriter fdw("#"); - EXPECT_EQUAL(fdw.getResult().getPos(), 0u); - EXPECT_EQUAL(fdw.getResult().getLength(), 32u); - TEST_DO(assertFlattenDocsumWriter(fdw, StringFieldValue("aaaabbbbccccddddeeeeffffgggghhhh"), - "aaaabbbbccccddddeeeeffffgggghhhh")); - EXPECT_EQUAL(fdw.getResult().getPos(), 32u); - EXPECT_EQUAL(fdw.getResult().getLength(), 32u); - TEST_DO(assertFlattenDocsumWriter(fdw, StringFieldValue("aaaa"), "aaaabbbbccccddddeeeeffffgggghhhh#aaaa")); - EXPECT_EQUAL(fdw.getResult().getPos(), 37u); - EXPECT_TRUE(fdw.getResult().getLength() >= 37u); - fdw.clear(); - EXPECT_EQUAL(fdw.getResult().getPos(), 0u); - EXPECT_TRUE(fdw.getResult().getLength() >= 37u); - } -} - -void -DocsumTest::testSlimeFieldWriter() -{ - { // basic types - assertSlimeFieldWriter(LongFieldValue(123456789), "123456789"); - assertSlimeFieldWriter(BoolFieldValue(true), "true"); - assertSlimeFieldWriter(BoolFieldValue(false), "false"); - assertSlimeFieldWriter(DoubleFieldValue(12.34), "12.34"); - assertSlimeFieldWriter(StringFieldValue("foo bar"), "\"foo bar\""); - } - { // collection field values - assertSlimeFieldWriter(createFieldValue(StringList().add("foo").add("bar").add("baz")), - "[\"foo\",\"bar\",\"baz\"]"); - assertSlimeFieldWriter(createFieldValue(WeightedStringList().add(std::make_pair("bar", 20)). - add(std::make_pair("baz", 30)). - add(std::make_pair("foo", 10))), - "[{item:\"bar\",weight:20},{item:\"baz\",weight:30},{item:\"foo\",weight:10}]"); - } - { // struct field value - StructDataType subType("substruct"); - Field fd("d", 0, *DataType::STRING); - Field fe("e", 1, *DataType::STRING); - subType.addField(fd); - subType.addField(fe); - StructFieldValue subValue(subType); - subValue.setValue(fd, StringFieldValue("baz")); - subValue.setValue(fe, StringFieldValue("qux")); - - StructDataType type("struct"); - Field fa("a", 0, *DataType::STRING); - Field fb("b", 1, *DataType::STRING); - Field fc("c", 2, subType); - type.addField(fa); - type.addField(fb); - type.addField(fc); - StructFieldValue value(type); - value.setValue(fa, StringFieldValue("foo")); - value.setValue(fb, StringFieldValue("bar")); - value.setValue(fc, subValue); - - - { // select a subset and then all - SlimeFieldWriter sfw; - DocsumFieldSpec::FieldIdentifierVector fields; - { - FieldPath path; - type.buildFieldPath(path, "a"); - fields.push_back(DocsumFieldSpec::FieldIdentifier(0, std::move(path))); - } - { - FieldPath path; - type.buildFieldPath(path, "c.e"); - fields.push_back(DocsumFieldSpec::FieldIdentifier(0, std::move(path))); - } - sfw.setInputFields(fields); - TEST_DO(assertSlimeFieldWriter(sfw, value, "{\"a\":\"foo\",\"c\":{\"e\":\"qux\"}}")); - sfw.clear(); - TEST_DO(assertSlimeFieldWriter(sfw, value, "{\"a\":\"foo\",\"b\":\"bar\",\"c\":{\"d\":\"baz\",\"e\":\"qux\"}}")); - } - - { // multiple invocations - SlimeFieldWriter sfw; - TEST_DO(assertSlimeFieldWriter(sfw, StringFieldValue("foo"), "\"foo\"")); - sfw.clear(); - TEST_DO(assertSlimeFieldWriter(sfw, StringFieldValue("bar"), "\"bar\"")); - sfw.clear(); - TEST_DO(assertSlimeFieldWriter(sfw, StringFieldValue("baz"), "\"baz\"")); - } - - } -} - -void -DocsumTest::requireThatSlimeFieldWriterHandlesMap() -{ - { // map<string, string> - MapDataType mapType(*DataType::STRING, *DataType::STRING); - MapFieldValue mapfv(mapType); - EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), StringFieldValue("v1"))); - EXPECT_TRUE(mapfv.put(StringFieldValue("k2"), StringFieldValue("v2"))); - assertSlimeFieldWriter(mapfv, "[{\"key\":\"k1\",\"value\":\"v1\"},{\"key\":\"k2\",\"value\":\"v2\"}]"); - } - { // map<string, struct> - StructDataType structType("struct"); - Field fa("a", 0, *DataType::STRING); - Field fb("b", 1, *DataType::STRING); - structType.addField(fa); - structType.addField(fb); - StructFieldValue structValue(structType); - structValue.setValue(fa, StringFieldValue("foo")); - structValue.setValue(fb, StringFieldValue("bar")); - MapDataType mapType(*DataType::STRING, structType); - MapFieldValue mapfv(mapType); - EXPECT_TRUE(mapfv.put(StringFieldValue("k1"), structValue)); - { // select a subset and then all - SlimeFieldWriter sfw; - DocsumFieldSpec::FieldIdentifierVector fields; - { - FieldPath path; - mapType.buildFieldPath(path, "value.b"); - fields.push_back(DocsumFieldSpec::FieldIdentifier(0, std::move(path))); - } - sfw.setInputFields(fields); - TEST_DO(assertSlimeFieldWriter(sfw, mapfv, "[{\"key\":\"k1\",\"value\":{\"b\":\"bar\"}}]")); - { - FieldPath path; - mapType.buildFieldPath(path, "{k1}.a"); - fields[0] = DocsumFieldSpec::FieldIdentifier(0, std::move(path)); - } - sfw.clear(); - sfw.setInputFields(fields); - TEST_DO(assertSlimeFieldWriter(sfw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\"}}]")); - sfw.clear(); // all fields implicit - TEST_DO(assertSlimeFieldWriter(sfw, mapfv, "[{\"key\":\"k1\",\"value\":{\"a\":\"foo\",\"b\":\"bar\"}}]")); - } - } -} - -int -DocsumTest::Main() -{ - TEST_INIT("docsum_test"); - - TEST_DO(testFlattenDocsumWriter()); - TEST_DO(testSlimeFieldWriter()); - TEST_DO(requireThatSlimeFieldWriterHandlesMap()); - - TEST_DONE(); -} - -} - -TEST_APPHOOK(vsm::DocsumTest); - diff --git a/streamingvisitors/src/tests/document/.gitignore b/streamingvisitors/src/tests/document/.gitignore deleted file mode 100644 index d47781eff63..00000000000 --- a/streamingvisitors/src/tests/document/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.depend -Makefile -document_test -vsm_document_test_app diff --git a/streamingvisitors/src/tests/document/CMakeLists.txt b/streamingvisitors/src/tests/document/CMakeLists.txt deleted file mode 100644 index 5ea12dc5e2d..00000000000 --- a/streamingvisitors/src/tests/document/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vsm_document_test_app TEST - SOURCES - document.cpp - DEPENDS - streamingvisitors -) -vespa_add_test(NAME vsm_document_test_app COMMAND vsm_document_test_app) diff --git a/streamingvisitors/src/tests/document/document.cpp b/streamingvisitors/src/tests/document/document.cpp deleted file mode 100644 index 1e97d232a64..00000000000 --- a/streamingvisitors/src/tests/document/document.cpp +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> - -#include <vespa/document/fieldvalue/fieldvalues.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/vsm/common/storagedocument.h> -#include <vespa/vespalib/stllike/asciistream.h> - -using namespace document; - -namespace vsm { - -class DocumentTest : public vespalib::TestApp -{ -private: - void testStorageDocument(); - void testStringFieldIdTMap(); -public: - int Main() override; -}; - -void -DocumentTest::testStorageDocument() -{ - DocumentType dt("testdoc", 0); - - Field fa("a", 0, *DataType::STRING); - Field fb("b", 1, *DataType::STRING); - dt.addField(fa); - dt.addField(fb); - - document::Document::UP doc(new document::Document(dt, DocumentId())); - doc->setValue(fa, StringFieldValue("foo")); - doc->setValue(fb, StringFieldValue("bar")); - - SharedFieldPathMap fpmap(new FieldPathMapT()); - fpmap->emplace_back(); - dt.buildFieldPath(fpmap->back(),"a"); - fpmap->emplace_back(); - dt.buildFieldPath(fpmap->back(), "b"); - fpmap->emplace_back(); - ASSERT_TRUE((*fpmap)[0].size() == 1); - ASSERT_TRUE((*fpmap)[1].size() == 1); - ASSERT_TRUE((*fpmap)[2].size() == 0); - - StorageDocument sdoc(std::move(doc), fpmap, 3); - ASSERT_TRUE(sdoc.valid()); - - EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString()); - EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); - EXPECT_TRUE(sdoc.getField(2) == nullptr); - // test caching - EXPECT_EQUAL(std::string("foo"), sdoc.getField(0)->getAsString()); - EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); - EXPECT_TRUE(sdoc.getField(2) == nullptr); - - // set new values - EXPECT_TRUE(sdoc.setField(0, FieldValue::UP(new StringFieldValue("baz")))); - EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString()); - EXPECT_EQUAL(std::string("bar"), sdoc.getField(1)->getAsString()); - EXPECT_TRUE(sdoc.getField(2) == nullptr); - EXPECT_TRUE(sdoc.setField(1, FieldValue::UP(new StringFieldValue("qux")))); - EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString()); - EXPECT_EQUAL(std::string("qux"), sdoc.getField(1)->getAsString()); - EXPECT_TRUE(sdoc.getField(2) == nullptr); - EXPECT_TRUE(sdoc.setField(2, FieldValue::UP(new StringFieldValue("quux")))); - EXPECT_EQUAL(std::string("baz"), sdoc.getField(0)->getAsString()); - EXPECT_EQUAL(std::string("qux"), sdoc.getField(1)->getAsString()); - EXPECT_EQUAL(std::string("quux"), sdoc.getField(2)->getAsString()); - - EXPECT_TRUE(!sdoc.setField(3, FieldValue::UP(new StringFieldValue("thud")))); - - SharedFieldPathMap fim; - StorageDocument s2(std::make_unique<document::Document>(), fim, 0); - EXPECT_EQUAL(IdString().toString(), s2.docDoc().getId().toString()); -} - -void DocumentTest::testStringFieldIdTMap() -{ - StringFieldIdTMap m; - EXPECT_EQUAL(0u, m.highestFieldNo()); - EXPECT_TRUE(StringFieldIdTMap::npos == m.fieldNo("unknown")); - m.add("f1"); - EXPECT_EQUAL(0u, m.fieldNo("f1")); - EXPECT_EQUAL(1u, m.highestFieldNo()); - m.add("f1"); - EXPECT_EQUAL(0u, m.fieldNo("f1")); - EXPECT_EQUAL(1u, m.highestFieldNo()); - m.add("f2"); - EXPECT_EQUAL(1u, m.fieldNo("f2")); - EXPECT_EQUAL(2u, m.highestFieldNo()); - m.add("f3", 7); - EXPECT_EQUAL(7u, m.fieldNo("f3")); - EXPECT_EQUAL(8u, m.highestFieldNo()); - m.add("f3"); - EXPECT_EQUAL(7u, m.fieldNo("f3")); - EXPECT_EQUAL(8u, m.highestFieldNo()); - m.add("f2", 13); - EXPECT_EQUAL(13u, m.fieldNo("f2")); - EXPECT_EQUAL(14u, m.highestFieldNo()); - m.add("f4"); - EXPECT_EQUAL(3u, m.fieldNo("f4")); - EXPECT_EQUAL(14u, m.highestFieldNo()); - { - vespalib::asciistream os; - StringFieldIdTMap t; - t.add("b"); - t.add("a"); - os << t; - EXPECT_EQUAL(vespalib::string("a = 1\nb = 0\n"), os.str()); - } - -} - -int -DocumentTest::Main() -{ - TEST_INIT("document_test"); - - testStorageDocument(); - testStringFieldIdTMap(); - - TEST_DONE(); -} - -} - -TEST_APPHOOK(vsm::DocumentTest); - diff --git a/streamingvisitors/src/tests/hitcollector/CMakeLists.txt b/streamingvisitors/src/tests/hitcollector/CMakeLists.txt index dbec820a462..f25ab348265 100644 --- a/streamingvisitors/src/tests/hitcollector/CMakeLists.txt +++ b/streamingvisitors/src/tests/hitcollector/CMakeLists.txt @@ -3,6 +3,6 @@ vespa_add_executable(streamingvisitors_hitcollector_test_app TEST SOURCES hitcollector_test.cpp DEPENDS - streamingvisitors + streamingvisitors_searchvisitor ) vespa_add_test(NAME streamingvisitors_hitcollector_test_app COMMAND streamingvisitors_hitcollector_test_app) diff --git a/streamingvisitors/src/tests/matching_elements_filler/CMakeLists.txt b/streamingvisitors/src/tests/matching_elements_filler/CMakeLists.txt index 5cc2977b3c3..ef93d551912 100644 --- a/streamingvisitors/src/tests/matching_elements_filler/CMakeLists.txt +++ b/streamingvisitors/src/tests/matching_elements_filler/CMakeLists.txt @@ -3,7 +3,7 @@ vespa_add_executable(streamingvisitors_matching_elements_filler_test_app TEST SOURCES matching_elements_filler_test.cpp DEPENDS - streamingvisitors + streamingvisitors_searchvisitor GTest::GTest ) vespa_add_test(NAME streamingvisitors_matching_elements_filler_test_app COMMAND streamingvisitors_matching_elements_filler_test_app) diff --git a/streamingvisitors/src/tests/querywrapper/CMakeLists.txt b/streamingvisitors/src/tests/querywrapper/CMakeLists.txt index e0131d0c6cc..7cae60e6a11 100644 --- a/streamingvisitors/src/tests/querywrapper/CMakeLists.txt +++ b/streamingvisitors/src/tests/querywrapper/CMakeLists.txt @@ -3,6 +3,6 @@ vespa_add_executable(streamingvisitors_querywrapper_test_app TEST SOURCES querywrapper_test.cpp DEPENDS - streamingvisitors + streamingvisitors_searchvisitor ) vespa_add_test(NAME streamingvisitors_querywrapper_test_app COMMAND streamingvisitors_querywrapper_test_app) diff --git a/streamingvisitors/src/tests/searcher/.gitignore b/streamingvisitors/src/tests/searcher/.gitignore deleted file mode 100644 index 52a56dff405..00000000000 --- a/streamingvisitors/src/tests/searcher/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.depend -Makefile -searcher_test -vsm_searcher_test_app diff --git a/streamingvisitors/src/tests/searcher/CMakeLists.txt b/streamingvisitors/src/tests/searcher/CMakeLists.txt deleted file mode 100644 index 2277f5ef55f..00000000000 --- a/streamingvisitors/src/tests/searcher/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vsm_searcher_test_app TEST - SOURCES - searcher_test.cpp - DEPENDS - streamingvisitors -) -vespa_add_test(NAME vsm_searcher_test_app COMMAND vsm_searcher_test_app) diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp deleted file mode 100644 index 34fa66eaa90..00000000000 --- a/streamingvisitors/src/tests/searcher/searcher_test.cpp +++ /dev/null @@ -1,864 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vespalib/testkit/testapp.h> - -#include <vespa/vsm/searcher/fieldsearcher.h> -#include <vespa/vsm/searcher/floatfieldsearcher.h> -#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> -#include <vespa/vsm/searcher/intfieldsearcher.h> -#include <vespa/vsm/searcher/boolfieldsearcher.h> -#include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h> -#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h> -#include <vespa/vsm/searcher/utf8substringsearcher.h> -#include <vespa/vsm/searcher/utf8substringsnippetmodifier.h> -#include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h> -#include <vespa/vsm/vsm/snippetmodifier.h> -#include <vespa/searchlib/query/streaming/queryterm.h> -#include <vespa/document/fieldvalue/fieldvalues.h> - -using namespace document; -using search::streaming::HitList; -using search::streaming::QueryNodeResultFactory; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; -using TermType = QueryTerm::Type; -using namespace vsm; - -template <typename T> -class Vector : public std::vector<T> -{ -public: - Vector() : std::vector<T>() {} - Vector<T> & add(T v) { this->push_back(v); return *this; } -}; - -typedef Vector<size_t> Hits; -typedef Vector<std::string> StringList; -typedef Vector<Hits> HitsList; -typedef Vector<bool> BoolList; -typedef Vector<int64_t> LongList; -typedef Vector<float> FloatList; -typedef QueryTerm::FieldInfo QTFieldInfo; -typedef Vector<QTFieldInfo> FieldInfoList; - -class String -{ -private: - const std::string & _str; -public: - String(const std::string & str) : _str(str) {} - bool operator==(const String & rhs) const { - return _str == rhs._str; - } -}; - -class Query -{ -private: - void setupQuery(const StringList & terms) { - for (size_t i = 0; i < terms.size(); ++i) { - ParsedQueryTerm pqt = parseQueryTerm(terms[i]); - ParsedTerm pt = parseTerm(pqt.second); - qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second)); - } - for (size_t i = 0; i < qtv.size(); ++i) { - qtl.push_back(qtv[i].get()); - } - } -public: - typedef std::pair<std::string, std::string> ParsedQueryTerm; - typedef std::pair<std::string, TermType> ParsedTerm; - QueryNodeResultFactory eqnr; - std::vector<QueryTerm::UP> qtv; - QueryTermList qtl; - Query(const StringList & terms); - ~Query(); - static ParsedQueryTerm parseQueryTerm(const std::string & queryTerm) { - size_t i = queryTerm.find(':'); - if (i != std::string::npos) { - return ParsedQueryTerm(queryTerm.substr(0, i), queryTerm.substr(i + 1)); - } - return ParsedQueryTerm(std::string(), queryTerm); - } - static ParsedTerm parseTerm(const std::string & term) { - if (term[0] == '*' && term[term.size() - 1] == '*') { - return std::make_pair(term.substr(1, term.size() - 2), TermType::SUBSTRINGTERM); - } else if (term[0] == '*') { - return std::make_pair(term.substr(1, term.size() - 1), TermType::SUFFIXTERM); - } else if (term[term.size() - 1] == '*') { - return std::make_pair(term.substr(0, term.size() - 1), TermType::PREFIXTERM); - } else { - return std::make_pair(term, TermType::WORD); - } - } -}; - -Query::Query(const StringList & terms) : eqnr(), qtv(), qtl() { - setupQuery(terms); -} -Query::~Query() = default; - -struct SnippetModifierSetup -{ - Query query; - UTF8SubstringSnippetModifier::SP searcher; - SharedSearcherBuf buf; - SnippetModifier modifier; - explicit SnippetModifierSetup(const StringList & terms); - ~SnippetModifierSetup(); -}; - -SnippetModifierSetup::SnippetModifierSetup(const StringList & terms) - : query(terms), - searcher(new UTF8SubstringSnippetModifier()), - buf(new SearcherBuf(8)), - modifier(searcher) -{ - searcher->prepare(query.qtl, buf); -} -SnippetModifierSetup::~SnippetModifierSetup() = default; - -// helper functions -ArrayFieldValue getFieldValue(const StringList &fv); -ArrayFieldValue getFieldValue(const LongList &fv); -ArrayFieldValue getFieldValue(const FloatList &fv); - -bool assertMatchTermSuffix(const std::string &term, const std::string &word); -void assertSnippetModifier(const StringList &query, const std::string &fv, const std::string &exp); -void assertSnippetModifier(SnippetModifierSetup &setup, const FieldValue &fv, const std::string &exp); -void assertQueryTerms(const SnippetModifierManager &man, FieldIdT fId, const StringList &terms); -void assertNumeric(FieldSearcher &fs, const StringList &query, const FieldValue &fv, const BoolList &exp); -std::vector<QueryTerm::UP> performSearch(FieldSearcher &fs, const StringList &query, const FieldValue &fv); -void assertSearch(FieldSearcher &fs, const StringList &query, const FieldValue &fv, const HitsList &exp); -bool assertCountWords(size_t numWords, const std::string &field); -bool assertFieldInfo(FieldSearcher &fs, const StringList &query, const FieldValue &fv, const FieldInfoList &exp); - -void assertString(StrChrFieldSearcher &fs, const StringList &query, const std::string &field, const HitsList &exp) { - assertSearch(fs, query, StringFieldValue(field), exp); -} - -void assertString(StrChrFieldSearcher &fs, const StringList &query, const StringList &field, const HitsList &exp) { - assertSearch(fs, query, getFieldValue(field), exp); -} - -void assertString(StrChrFieldSearcher &fs, const std::string &term, const std::string &field, const Hits &exp) { - assertString(fs, StringList().add(term), field, HitsList().add(exp)); -} -void assertString(StrChrFieldSearcher &fs, const std::string &term, const StringList &field, const Hits &exp) { - assertString(fs, StringList().add(term), field, HitsList().add(exp)); -} - -void assertInt(IntFieldSearcher & fs, const StringList &query, int64_t field, const BoolList &exp) { - assertNumeric(fs, query, LongFieldValue(field), exp); -} - -void assertInt(IntFieldSearcher & fs, const std::string &term, int64_t field, bool exp) { - assertInt(fs, StringList().add(term), field, BoolList().add(exp)); -} - -void assertBool(BoolFieldSearcher & fs, const StringList &query, bool field, const BoolList &exp) { - assertNumeric(fs, query, BoolFieldValue(field), exp); -} -void assertBool(BoolFieldSearcher & fs, const std::string &term, bool field, bool exp) { - assertBool(fs, StringList().add(term), field, BoolList().add(exp)); -} - -void assertInt(IntFieldSearcher & fs, const StringList &query, const LongList &field, const HitsList &exp) { - assertSearch(fs, query, getFieldValue(field), exp); -} - -void assertInt(IntFieldSearcher & fs, const std::string &term, const LongList &field, const Hits &exp) { - assertInt(fs, StringList().add(term), field, HitsList().add(exp)); -} - -void assertFloat(FloatFieldSearcher & fs, const StringList &query, float field, const BoolList &exp) { - assertNumeric(fs, query, FloatFieldValue(field), exp); -} - -void assertFloat(FloatFieldSearcher & fs, const std::string &term, float field, bool exp) { - assertFloat(fs, StringList().add(term), field, BoolList().add(exp)); -} - -void assertFloat(FloatFieldSearcher & fs, const StringList &query, const FloatList &field, const HitsList &exp) { - assertSearch(fs, query, getFieldValue(field), exp); -} - -void assertFloat(FloatFieldSearcher & fs, const std::string &term, const FloatList &field, const Hits &exp) { - assertFloat(fs, StringList().add(term), field, HitsList().add(exp)); -} - -bool -assertFieldInfo(StrChrFieldSearcher &fs, const StringList &query, const std::string &fv, const FieldInfoList &exp) { - return assertFieldInfo(fs, query, StringFieldValue(fv), exp); -} - -bool -assertFieldInfo(StrChrFieldSearcher &fs, const StringList &query, const StringList &fv, const FieldInfoList &exp) { - return assertFieldInfo(fs, query, getFieldValue(fv), exp); -} -bool -assertFieldInfo(StrChrFieldSearcher &fs, const std::string &term, const StringList &fv, const QTFieldInfo &exp) { - return assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); -} - -bool -assertFieldInfo(StrChrFieldSearcher &fs, const std::string &term, const std::string &fv, const QTFieldInfo &exp) { - return assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); -} - -void assertFieldInfo(IntFieldSearcher & fs, const StringList &query, int64_t fv, const FieldInfoList &exp) { - assertFieldInfo(fs, query, LongFieldValue(fv), exp); -} - -void assertFieldInfo(IntFieldSearcher & fs, const StringList &query, const LongList &fv, const FieldInfoList &exp) { - assertFieldInfo(fs, query, getFieldValue(fv), exp); -} - -void assertFieldInfo(IntFieldSearcher & fs, const std::string &term, int64_t fv, const QTFieldInfo &exp) { - assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); -} - -void assertFieldInfo(IntFieldSearcher & fs, const std::string &term, const LongList &fv, const QTFieldInfo &exp) { - assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); -} - -void assertFieldInfo(FloatFieldSearcher & fs, const StringList &query, float fv, const FieldInfoList &exp) { - assertFieldInfo(fs, query, FloatFieldValue(fv), exp); -} - -void -assertFieldInfo(FloatFieldSearcher & fs, const StringList &query, const FloatList &fv, const FieldInfoList &exp) { - assertFieldInfo(fs, query, getFieldValue(fv), exp); -} - -/** float field searcher **/ -void assertFieldInfo(FloatFieldSearcher & fs, const std::string &term, float fv, const QTFieldInfo &exp) { - assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); -} - -void assertFieldInfo(FloatFieldSearcher & fs, const std::string &term, const FloatList &fv, const QTFieldInfo &exp) { - assertFieldInfo(fs, StringList().add(term), fv, FieldInfoList().add(exp)); -} - - -/** snippet modifer searcher **/ -void assertSnippetModifier(const std::string &term, const std::string &fv, const std::string &exp) { - assertSnippetModifier(StringList().add(term), fv, exp); -} - - -ArrayFieldValue -getFieldValue(const StringList & fv) -{ - - static ArrayDataType type(*DataType::STRING); - ArrayFieldValue afv(type); - for (size_t i = 0; i < fv.size(); ++i) { - afv.add(StringFieldValue(fv[i])); - } - return afv; -} - -ArrayFieldValue -getFieldValue(const LongList & fv) -{ - static ArrayDataType type(*DataType::LONG); - ArrayFieldValue afv(type); - for (size_t i = 0; i < fv.size(); ++i) { - afv.add(LongFieldValue(fv[i])); - } - return afv; -} - -ArrayFieldValue -getFieldValue(const FloatList & fv) -{ - static ArrayDataType type(*DataType::FLOAT); - ArrayFieldValue afv(type); - for (size_t i = 0; i < fv.size(); ++i) { - afv.add(FloatFieldValue(fv[i])); - } - return afv; -} - -bool -assertMatchTermSuffix(const std::string & term, const std::string & word) -{ - QueryNodeResultFactory eqnr; - QueryTerm qa(eqnr.create(), term, "index", TermType::WORD); - QueryTerm qb(eqnr.create(), word, "index", TermType::WORD); - const ucs4_t * a; - size_t alen = qa.term(a); - const ucs4_t * b; - size_t blen = qb.term(b); - return UTF8StringFieldSearcherBase::matchTermSuffix(a, alen, b, blen); -} - -void -assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const BoolList & exp) -{ - HitsList hl; - for (size_t i = 0; i < exp.size(); ++i) { - hl.push_back(exp[i] ? Hits().add(0) : Hits()); - } - assertSearch(fs, query, fv, hl); -} - -std::vector<QueryTerm::UP> -performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv) -{ - Query q(query); - - // prepare field searcher - SharedSearcherBuf ssb = SharedSearcherBuf(new SearcherBuf()); - fs.prepare(q.qtl, ssb); - - // setup document - SharedFieldPathMap sfim(new FieldPathMapT()); - sfim->push_back(FieldPath()); - StorageDocument doc(std::make_unique<document::Document>(), sfim, 1); - doc.setField(0, document::FieldValue::UP(fv.clone())); - - fs.search(doc); - return std::move(q.qtv); -} - -void -assertSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv, const HitsList & exp) -{ - auto qtv = performSearch(fs, query, fv); - EXPECT_EQUAL(qtv.size(), exp.size()); - ASSERT_TRUE(qtv.size() == exp.size()); - for (size_t i = 0; i < qtv.size(); ++i) { - const HitList & hl = qtv[i]->getHitList(); - EXPECT_EQUAL(hl.size(), exp[i].size()); - ASSERT_TRUE(hl.size() == exp[i].size()); - for (size_t j = 0; j < hl.size(); ++j) { - EXPECT_EQUAL((size_t)hl[j].pos(), exp[i][j]); - } - } -} - -bool -assertFieldInfo(FieldSearcher & fs, const StringList & query, - const FieldValue & fv, const FieldInfoList & exp) -{ - auto qtv = performSearch(fs, query, fv); - if (!EXPECT_EQUAL(qtv.size(), exp.size())) return false; - bool retval = true; - for (size_t i = 0; i < qtv.size(); ++i) { - if (!EXPECT_EQUAL(qtv[i]->getFieldInfo(0).getHitOffset(), exp[i].getHitOffset())) retval = false; - if (!EXPECT_EQUAL(qtv[i]->getFieldInfo(0).getHitCount(), exp[i].getHitCount())) retval = false; - if (!EXPECT_EQUAL(qtv[i]->getFieldInfo(0).getFieldLength(), exp[i].getFieldLength())) retval = false; - } - return retval; -} - -void -assertSnippetModifier(const StringList & query, const std::string & fv, const std::string & exp) -{ - UTF8SubstringSnippetModifier mod; - performSearch(mod, query, StringFieldValue(fv)); - EXPECT_EQUAL(mod.getModifiedBuf().getPos(), exp.size()); - std::string actual(mod.getModifiedBuf().getBuffer(), mod.getModifiedBuf().getPos()); - EXPECT_EQUAL(actual.size(), exp.size()); - EXPECT_EQUAL(actual, exp); -} - -void assertSnippetModifier(SnippetModifierSetup & setup, const FieldValue & fv, const std::string & exp) -{ - FieldValue::UP mfv = setup.modifier.modify(fv); - const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(*mfv.get()); - const std::string & actual = lfv.getValue(); - EXPECT_EQUAL(actual.size(), exp.size()); - EXPECT_EQUAL(actual, exp); -} - -void assertQueryTerms(const SnippetModifierManager & man, FieldIdT fId, const StringList & terms) -{ - if (terms.size() == 0) { - ASSERT_TRUE(man.getModifiers().getModifier(fId) == NULL); - return; - } - ASSERT_TRUE(man.getModifiers().getModifier(fId) != NULL); - UTF8SubstringSnippetModifier * searcher = - (static_cast<SnippetModifier *>(man.getModifiers().getModifier(fId)))->getSearcher().get(); - EXPECT_EQUAL(searcher->getQueryTerms().size(), terms.size()); - ASSERT_TRUE(searcher->getQueryTerms().size() == terms.size()); - for (size_t i = 0; i < terms.size(); ++i) { - EXPECT_EQUAL(std::string(searcher->getQueryTerms()[i]->getTerm()), terms[i]); - } -} - -bool assertCountWords(size_t numWords, const std::string & field) -{ - FieldRef ref(field.c_str(), field.size()); - return EXPECT_EQUAL(numWords, FieldSearcher::countWords(ref)); -} - -bool -testStringFieldInfo(StrChrFieldSearcher & fs) -{ - assertString(fs, "foo", StringList().add("foo bar baz").add("foo bar").add("baz foo"), Hits().add(0).add(3).add(6)); - assertString(fs, StringList().add("foo").add("bar"), StringList().add("foo bar baz").add("foo bar").add("baz foo"), - HitsList().add(Hits().add(0).add(3).add(6)).add(Hits().add(1).add(4))); - - bool retval = true; - if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo", QTFieldInfo(0, 1, 1)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, "bar", "foo", QTFieldInfo(0, 0, 1)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, "bar", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, "baz", "foo bar baz", QTFieldInfo(0, 1, 3)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, "qux", "foo bar baz", QTFieldInfo(0, 0, 3)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", "foo foo foo", QTFieldInfo(0, 3, 3)))) retval = false; - // query term size > last term size - if (!EXPECT_TRUE(assertFieldInfo(fs, "runner", "Road Runner Disco", QTFieldInfo(0, 1, 3)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("roadrun").add("runner"), "Road Runner Disco", - FieldInfoList().add(QTFieldInfo(0, 0, 3)).add(QTFieldInfo(0, 1, 3))))) retval = false; - // multiple terms - if (!EXPECT_TRUE(assertFieldInfo(fs, "foo", StringList().add("foo bar baz").add("foo bar"), - QTFieldInfo(0, 2, 5)))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("foo").add("baz"), "foo bar baz", - FieldInfoList().add(QTFieldInfo(0, 1, 3)).add(QTFieldInfo(0, 1, 3))))) retval = false; - if (!EXPECT_TRUE(assertFieldInfo(fs, StringList().add("foo").add("baz"), StringList().add("foo bar baz").add("foo bar"), - FieldInfoList().add(QTFieldInfo(0, 2, 5)).add(QTFieldInfo(0, 1, 5))))) retval = false; - return retval; -} -bool -testStrChrFieldSearcher(StrChrFieldSearcher & fs) -{ - std::string field = "operators and operator overloading with utf8 char oe = \xc3\x98"; - assertString(fs, "oper", field, Hits()); - assertString(fs, "tor", field, Hits()); - assertString(fs, "oper*", field, Hits().add(0).add(2)); - assertString(fs, "and", field, Hits().add(1)); - - assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits()).add(Hits())); - assertString(fs, StringList().add("and").add("overloading"), field, HitsList().add(Hits().add(1)).add(Hits().add(3))); - - fs.setMatchType(FieldSearcher::PREFIX); - assertString(fs, "oper", field, Hits().add(0).add(2)); - assertString(fs, StringList().add("oper").add("tor"), field, HitsList().add(Hits().add(0).add(2)).add(Hits())); - - fs.setMatchType(FieldSearcher::REGULAR); - if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false; - - { // test handling of several underscores - StringList query = StringList().add("foo").add("bar"); - HitsList exp = HitsList().add(Hits().add(0)).add(Hits().add(1)); - assertString(fs, query, "foo_bar", exp); - assertString(fs, query, "foo__bar", exp); - assertString(fs, query, "foo___bar", exp); - assertString(fs, query, "foo________bar", exp); - assertString(fs, query, "foo____________________bar", exp); - assertString(fs, query, "________________________________________foo________________________________________bar________________________________________", exp); - query = StringList().add("foo").add("thisisaveryveryverylongword"); - assertString(fs, query, "foo____________________thisisaveryveryverylongword", exp); - - assertString(fs, "bar", "foo bar", Hits().add(1)); - assertString(fs, "bar", "foo____________________bar", Hits().add(1)); - assertString(fs, "bar", "foo____________________thisisaveryveryverylongword____________________bar", Hits().add(2)); - } - return true; -} - - TEST("verify correct term parsing") { - ASSERT_TRUE(Query::parseQueryTerm("index:term").first == "index"); - ASSERT_TRUE(Query::parseQueryTerm("index:term").second == "term"); - ASSERT_TRUE(Query::parseQueryTerm("term").first == ""); - ASSERT_TRUE(Query::parseQueryTerm("term").second == "term"); - ASSERT_TRUE(Query::parseTerm("*substr*").first == "substr"); - ASSERT_TRUE(Query::parseTerm("*substr*").second == TermType::SUBSTRINGTERM); - ASSERT_TRUE(Query::parseTerm("*suffix").first == "suffix"); - ASSERT_TRUE(Query::parseTerm("*suffix").second == TermType::SUFFIXTERM); - ASSERT_TRUE(Query::parseTerm("prefix*").first == "prefix"); - ASSERT_TRUE(Query::parseTerm("prefix*").second == TermType::PREFIXTERM); - ASSERT_TRUE(Query::parseTerm("term").first == "term"); - ASSERT_TRUE(Query::parseTerm("term").second == TermType::WORD); - } - - TEST("suffix matching") { - EXPECT_EQUAL(assertMatchTermSuffix("a", "vespa"), true); - EXPECT_EQUAL(assertMatchTermSuffix("spa", "vespa"), true); - EXPECT_EQUAL(assertMatchTermSuffix("vespa", "vespa"), true); - EXPECT_EQUAL(assertMatchTermSuffix("vvespa", "vespa"), false); - EXPECT_EQUAL(assertMatchTermSuffix("fspa", "vespa"), false); - EXPECT_EQUAL(assertMatchTermSuffix("v", "vespa"), false); - } - -TEST("Test basic strchrfield searchers") { - { - UTF8StrChrFieldSearcher fs(0); - EXPECT_TRUE(testStrChrFieldSearcher(fs)); - } - { - FUTF8StrChrFieldSearcher fs(0); - EXPECT_TRUE(testStrChrFieldSearcher(fs)); - } -} - -bool -testUTF8SubStringFieldSearcher(StrChrFieldSearcher & fs) -{ - std::string field = "operators and operator overloading"; - assertString(fs, "rsand", field, Hits()); - assertString(fs, "ove", field, Hits().add(3)); - assertString(fs, "ing", field, Hits().add(3)); - assertString(fs, "era", field, Hits().add(0).add(2)); - assertString(fs, "a", field, Hits().add(0).add(1).add(2).add(3)); - - assertString(fs, StringList().add("dn").add("gn"), field, HitsList().add(Hits()).add(Hits())); - assertString(fs, StringList().add("ato").add("load"), field, HitsList().add(Hits().add(0).add(2)).add(Hits().add(3))); - - assertString(fs, StringList().add("aa").add("ab"), "aaaab", - HitsList().add(Hits().add(0).add(0).add(0)).add(Hits().add(0))); - - if (!EXPECT_TRUE(testStringFieldInfo(fs))) return false; - return true; -} - -TEST("utf8 substring search") { - { - UTF8SubStringFieldSearcher fs(0); - EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); - assertString(fs, "aa", "aaaa", Hits().add(0).add(0)); - } - { - UTF8SubStringFieldSearcher fs(0); - EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); - assertString(fs, "abc", "abc bcd abc", Hits().add(0).add(2)); - fs.maxFieldLength(4); - assertString(fs, "abc", "abc bcd abc", Hits().add(0)); - } - { - UTF8SubstringSnippetModifier fs(0); - EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); - // we don't have 1 term optimization - assertString(fs, "aa", "aaaa", Hits().add(0).add(0).add(0)); - } -} - -TEST("utf8 substring search with empty term") -{ - UTF8SubStringFieldSearcher fs(0); - EXPECT_TRUE(testUTF8SubStringFieldSearcher(fs)); - assertString(fs, "", "abc", Hits()); - assertFieldInfo(fs, "", "abc", QTFieldInfo().setFieldLength(0)); -} - -TEST("utf8 suffix search") { - UTF8SuffixStringFieldSearcher fs(0); - std::string field = "operators and operator overloading"; - assertString(fs, "rsand", field, Hits()); - assertString(fs, "tor", field, Hits().add(2)); - assertString(fs, "tors", field, Hits().add(0)); - - assertString(fs, StringList().add("an").add("din"), field, HitsList().add(Hits()).add(Hits())); - assertString(fs, StringList().add("nd").add("g"), field, HitsList().add(Hits().add(1)).add(Hits().add(3))); - - EXPECT_TRUE(testStringFieldInfo(fs)); -} - -TEST("utf8 exact match") { - UTF8ExactStringFieldSearcher fs(0); - // regular - TEST_DO(assertString(fs, "vespa", "vespa", Hits().add(0))); - TEST_DO(assertString(fs, "vespar", "vespa", Hits())); - TEST_DO(assertString(fs, "vespa", "vespar", Hits())); - TEST_DO(assertString(fs, "vespa", "vespa vespa", Hits())); - TEST_DO(assertString(fs, "vesp", "vespa", Hits())); - TEST_DO(assertString(fs, "vesp*", "vespa", Hits().add(0))); - TEST_DO(assertString(fs, "hutte", "hutte", Hits().add(0))); - TEST_DO(assertString(fs, "hütte", "hütte", Hits().add(0))); - TEST_DO(assertString(fs, "hutte", "hütte", Hits())); - TEST_DO(assertString(fs, "hütte", "hutte", Hits())); - TEST_DO(assertString(fs, "hütter", "hütte", Hits())); - TEST_DO(assertString(fs, "hütte", "hütter", Hits())); -} - -TEST("utf8 flexible searcher"){ - UTF8FlexibleStringFieldSearcher fs(0); - // regular - assertString(fs, "vespa", "vespa", Hits().add(0)); - assertString(fs, "vesp", "vespa", Hits()); - assertString(fs, "esp", "vespa", Hits()); - assertString(fs, "espa", "vespa", Hits()); - - // prefix - assertString(fs, "vesp*", "vespa", Hits().add(0)); - fs.setMatchType(FieldSearcher::PREFIX); - assertString(fs, "vesp", "vespa", Hits().add(0)); - - // substring - fs.setMatchType(FieldSearcher::REGULAR); - assertString(fs, "*esp*", "vespa", Hits().add(0)); - fs.setMatchType(FieldSearcher::SUBSTRING); - assertString(fs, "esp", "vespa", Hits().add(0)); - - // suffix - fs.setMatchType(FieldSearcher::REGULAR); - assertString(fs, "*espa", "vespa", Hits().add(0)); - fs.setMatchType(FieldSearcher::SUFFIX); - assertString(fs, "espa", "vespa", Hits().add(0)); - - fs.setMatchType(FieldSearcher::REGULAR); - EXPECT_TRUE(testStringFieldInfo(fs)); -} - -TEST("bool search") { - BoolFieldSearcher fs(0); - TEST_DO(assertBool(fs, "true", true, true)); - TEST_DO(assertBool(fs, "true", false, false)); - TEST_DO(assertBool(fs, "1", true, true)); - TEST_DO(assertBool(fs, "1", false, false)); - TEST_DO(assertBool(fs, "false", true, false)); - TEST_DO(assertBool(fs, "false", false, true)); - TEST_DO(assertBool(fs, "0", true, false)); - TEST_DO(assertBool(fs, "0", false, true)); - TEST_DO(assertBool(fs, StringList().add("true").add("false").add("true"), true, BoolList().add(true).add(false).add(true))); - TEST_DO(assertBool(fs, StringList().add("true").add("false").add("true"), false, BoolList().add(false).add(true).add(false))); -} - -TEST("integer search") -{ - IntFieldSearcher fs(0); - TEST_DO(assertInt(fs, "10", 10, true)); - TEST_DO(assertInt(fs, "9", 10, false)); - TEST_DO(assertInt(fs, ">9", 10, true)); - TEST_DO(assertInt(fs, ">9", 9, false)); - TEST_DO(assertInt(fs, "<11", 10, true)); - TEST_DO(assertInt(fs, "<11", 11, false)); - TEST_DO(assertInt(fs, "-10", -10, true)); - TEST_DO(assertInt(fs, "-9", -10, false)); - TEST_DO(assertInt(fs, "a", 10, false)); - TEST_DO(assertInt(fs, "[-5;5]", -5, true)); - TEST_DO(assertInt(fs, "[-5;5]", 0, true)); - TEST_DO(assertInt(fs, "[-5;5]", 5, true)); - TEST_DO(assertInt(fs, "[-5;5]", -6, false)); - TEST_DO(assertInt(fs, "[-5;5]", 6, false)); - - TEST_DO(assertInt(fs, StringList().add("9").add("11"), 10, BoolList().add(false).add(false))); - TEST_DO(assertInt(fs, StringList().add("9").add("10"), 10, BoolList().add(false).add(true))); - TEST_DO(assertInt(fs, StringList().add("10").add(">9"), 10, BoolList().add(true).add(true))); - - TEST_DO(assertInt(fs, "10", LongList().add(10).add(20).add(10).add(30), Hits().add(0).add(2))); - TEST_DO(assertInt(fs, StringList().add("10").add("20"), LongList().add(10).add(20).add(10).add(30), - HitsList().add(Hits().add(0).add(2)).add(Hits().add(1)))); - - TEST_DO(assertFieldInfo(fs, "10", 10, QTFieldInfo(0, 1, 1))); - TEST_DO(assertFieldInfo(fs, "10", LongList().add(10).add(20).add(10).add(30), QTFieldInfo(0, 2, 4))); - TEST_DO(assertFieldInfo(fs, StringList().add("10").add("20"), 10, - FieldInfoList().add(QTFieldInfo(0, 1, 1)).add(QTFieldInfo(0, 0, 1)))); - TEST_DO(assertFieldInfo(fs, StringList().add("10").add("20"), LongList().add(10).add(20).add(10).add(30), - FieldInfoList().add(QTFieldInfo(0, 2, 4)).add(QTFieldInfo(0, 1, 4)))); -} - -TEST("floating point search") -{ - FloatFieldSearcher fs; - TEST_DO(assertFloat(fs, "10", 10, true)); - TEST_DO(assertFloat(fs, "10.5", 10.5, true)); - TEST_DO(assertFloat(fs, "-10.5", -10.5, true)); - TEST_DO(assertFloat(fs, ">10.5", 10.6, true)); - TEST_DO(assertFloat(fs, ">10.5", 10.5, false)); - TEST_DO(assertFloat(fs, "<10.5", 10.4, true)); - TEST_DO(assertFloat(fs, "<10.5", 10.5, false)); - TEST_DO(assertFloat(fs, "10.4", 10.5, false)); - TEST_DO(assertFloat(fs, "-10.4", -10.5, false)); - TEST_DO(assertFloat(fs, "a", 10.5, false)); - TEST_DO(assertFloat(fs, "[-5.5;5.5]", -5.5, true)); - TEST_DO(assertFloat(fs, "[-5.5;5.5]", 0, true)); - TEST_DO(assertFloat(fs, "[-5.5;5.5]", 5.5, true)); - TEST_DO(assertFloat(fs, "[-5.5;5.5]", -5.6, false)); - TEST_DO(assertFloat(fs, "[-5.5;5.5]", 5.6, false)); - - TEST_DO(assertFloat(fs, StringList().add("10").add("11"), 10.5, BoolList().add(false).add(false))); - TEST_DO(assertFloat(fs, StringList().add("10").add("10.5"), 10.5, BoolList().add(false).add(true))); - TEST_DO(assertFloat(fs, StringList().add(">10.4").add("10.5"), 10.5, BoolList().add(true).add(true))); - - TEST_DO(assertFloat(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), Hits().add(0).add(2))); - TEST_DO(assertFloat(fs, StringList().add("10.5").add("20.5"), FloatList().add(10.5).add(20.5).add(10.5).add(30.5), - HitsList().add(Hits().add(0).add(2)).add(Hits().add(1)))); - - TEST_DO(assertFieldInfo(fs, "10.5", 10.5, QTFieldInfo(0, 1, 1))); - TEST_DO(assertFieldInfo(fs, "10.5", FloatList().add(10.5).add(20.5).add(10.5).add(30.5), QTFieldInfo(0, 2, 4))); - TEST_DO(assertFieldInfo(fs, StringList().add("10.5").add("20.5"), 10.5, - FieldInfoList().add(QTFieldInfo(0, 1, 1)).add(QTFieldInfo(0, 0, 1)))); - TEST_DO(assertFieldInfo(fs, StringList().add("10.5").add("20.5"), FloatList().add(10.5).add(20.5).add(10.5).add(30.5), - FieldInfoList().add(QTFieldInfo(0, 2, 4)).add(QTFieldInfo(0, 1, 4)))); -} - -TEST("Snippet modifier search") { - // ascii - assertSnippetModifier("f", "foo", "\x1F""f\x1Foo"); - assertSnippetModifier("o", "foo", "f\x1Fo\x1F\x1Fo\x1F"); - assertSnippetModifier("r", "bar", "ba\x1Fr\x1F"); - assertSnippetModifier("foo", "foo foo", "\x1F""foo\x1F \x1F""foo\x1F"); - assertSnippetModifier("aa", "aaaaaa", "\x1F""aa\x1F\x1F""aa\x1F\x1F""aa\x1F"); - assertSnippetModifier("ab", "abcd\x1F""efgh", "\x1F""ab\x1F""cd\x1F""efgh"); - assertSnippetModifier("ef", "abcd\x1F""efgh", "abcd\x1F\x1F""ef\x1Fgh"); - assertSnippetModifier("fg", "abcd\x1F""efgh", "abcd\x1F""e\x1F""fg\x1Fh"); - // the separator overlapping the match is skipped - assertSnippetModifier("cdef", "abcd\x1F""efgh", "ab\x1F""cdef\x1F""gh"); - // no hits - assertSnippetModifier("bb", "aaaaaa", "aaaaaa"); - - - // multiple query terms - assertSnippetModifier(StringList().add("ab").add("cd"), "abcd", "\x1F""ab\x1F\x1F""cd\x1F"); - // when we have overlap we only get the first match - assertSnippetModifier(StringList().add("ab").add("bc"), "abcd", "\x1F""ab\x1F""cd"); - assertSnippetModifier(StringList().add("bc").add("ab"), "abcd", "\x1F""ab\x1F""cd"); - // the separator overlapping the match is skipped - assertSnippetModifier(StringList().add("de").add("ef"), "abcd\x1F""efgh", "abc\x1F""de\x1F""fgh"); - - // cjk - assertSnippetModifier("\xe7\x9f\xb3", "\xe7\x9f\xb3\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8", - "\x1f\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8"); - assertSnippetModifier("\xe6\x98\x8e\xe5\x87\xb1", "\xe7\x9f\xb3\xe6\x98\x8e\xe5\x87\xb1\xe5\x9c\xa8", - "\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\x1f\xe5\x9c\xa8"); - // the separator overlapping the match is skipped - assertSnippetModifier("\xe6\x98\x8e\xe5\x87\xb1", "\xe7\x9f\xb3\xe6\x98\x8e\x1f\xe5\x87\xb1\xe5\x9c\xa8", - "\xe7\x9f\xb3\x1f\xe6\x98\x8e\xe5\x87\xb1\x1f\xe5\x9c\xa8"); - - { // check that resizing works - UTF8SubstringSnippetModifier mod; - EXPECT_EQUAL(mod.getModifiedBuf().getLength(), 32u); - EXPECT_EQUAL(mod.getModifiedBuf().getPos(), 0u); - performSearch(mod, StringList().add("a"), StringFieldValue("aaaaaaaaaaaaaaaa")); - EXPECT_EQUAL(mod.getModifiedBuf().getPos(), 16u + 2 * 16u); - EXPECT_TRUE(mod.getModifiedBuf().getLength() >= mod.getModifiedBuf().getPos()); - } -} - -TEST("snippet modifier") { - { // string field value - SnippetModifierSetup sms(StringList().add("ab")); - // multiple invokations - assertSnippetModifier(sms, StringFieldValue("ab"), "\x1F""ab\x1F"); - assertSnippetModifier(sms, StringFieldValue("xxxxabxxxxabxxxx"), "xxxx\x1F""ab\x1Fxxxx\x1F""ab\x1Fxxxx"); - assertSnippetModifier(sms, StringFieldValue("xxabxx"), "xx\x1F""ab\x1Fxx"); - } - { // collection field value - SnippetModifierSetup sms(StringList().add("ab")); - // multiple invokations - assertSnippetModifier(sms, getFieldValue(StringList().add("ab")), "\x1F""ab\x1F"); - assertSnippetModifier(sms, getFieldValue(StringList().add("xxabxx")), "xx\x1F""ab\x1Fxx"); - assertSnippetModifier(sms, getFieldValue(StringList().add("ab").add("xxabxx").add("xxxxxx")), - "\x1F""ab\x1F\x1E""xx\x1F""ab\x1F""xx\x1E""xxxxxx"); - assertSnippetModifier(sms, getFieldValue(StringList().add("cd").add("ef").add("gh")), - "cd\x1E""ef\x1E""gh"); - } - { // check that resizing works - SnippetModifierSetup sms(StringList().add("a")); - EXPECT_EQUAL(sms.modifier.getValueBuf().getLength(), 32u); - EXPECT_EQUAL(sms.modifier.getValueBuf().getPos(), 0u); - sms.modifier.modify(StringFieldValue("aaaaaaaaaaaaaaaa")); - EXPECT_EQUAL(sms.modifier.getValueBuf().getPos(), 16u + 2 * 16u); - EXPECT_TRUE(sms.modifier.getValueBuf().getLength() >= sms.modifier.getValueBuf().getPos()); - } -} - -TEST("FieldSearchSpec constrution") { - { - FieldSearchSpec f; - EXPECT_FALSE(f.valid()); - EXPECT_EQUAL(0u, f.id()); - EXPECT_EQUAL("", f.name()); - EXPECT_EQUAL(0x100000u, f.maxLength()); - } - { - FieldSearchSpec f(7, "f0", VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8, "substring", 789); - EXPECT_TRUE(f.valid()); - EXPECT_EQUAL(7u, f.id()); - EXPECT_EQUAL("f0", f.name()); - EXPECT_EQUAL(789u, f.maxLength()); - EXPECT_EQUAL(789u, f.searcher().maxFieldLength()); - } -} - -TEST("snippet modifier manager") { - FieldSearchSpecMapT specMap; - specMap[0] = FieldSearchSpec(0, "f0", VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8, "substring", 1000); - specMap[1] = FieldSearchSpec(1, "f1", VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8, "", 1000); - IndexFieldMapT indexMap; - indexMap["i0"].push_back(0); - indexMap["i1"].push_back(1); - indexMap["i2"].push_back(0); - indexMap["i2"].push_back(1); - - { - SnippetModifierManager man; - Query query(StringList().add("i0:foo")); - man.setup(query.qtl, specMap, indexMap); - assertQueryTerms(man, 0, StringList().add("foo")); - assertQueryTerms(man, 1, StringList()); - } - { - SnippetModifierManager man; - Query query(StringList().add("i1:foo")); - man.setup(query.qtl, specMap, indexMap); - assertQueryTerms(man, 0, StringList()); - assertQueryTerms(man, 1, StringList()); - } - { - SnippetModifierManager man; - Query query(StringList().add("i1:*foo*")); - man.setup(query.qtl, specMap, indexMap); - assertQueryTerms(man, 0, StringList()); - assertQueryTerms(man, 1, StringList().add("foo")); - } - { - SnippetModifierManager man; - Query query(StringList().add("i2:foo").add("i2:*bar*")); - man.setup(query.qtl, specMap, indexMap); - assertQueryTerms(man, 0, StringList().add("foo").add("bar")); - assertQueryTerms(man, 1, StringList().add("bar")); - } - { // check buffer sizes - SnippetModifierManager man; - Query query(StringList().add("i2:foo").add("i2:*bar*")); - man.setup(query.qtl, specMap, indexMap); - { - SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(0)); - UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get(); - EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u); - EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u); - } - { - SnippetModifier * sm = static_cast<SnippetModifier *>(man.getModifiers().getModifier(1)); - UTF8SubstringSnippetModifier * searcher = sm->getSearcher().get(); - EXPECT_EQUAL(sm->getValueBuf().getLength(), 128u); - EXPECT_EQUAL(searcher->getModifiedBuf().getLength(), 64u); - } - } -} - -TEST("Stripping of indexes") -{ - EXPECT_EQUAL("f", FieldSearchSpecMap::stripNonFields("f")); - EXPECT_EQUAL("f", FieldSearchSpecMap::stripNonFields("f[0]")); - EXPECT_EQUAL("f[a]", FieldSearchSpecMap::stripNonFields("f[a]")); - - EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{a}")); - EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{a0}")); - EXPECT_EQUAL("f{a 0}", FieldSearchSpecMap::stripNonFields("f{a 0}")); - EXPECT_EQUAL("f.value", FieldSearchSpecMap::stripNonFields("f{\"a 0\"}")); -} - -TEST("counting of words") { - EXPECT_TRUE(assertCountWords(0, "")); - EXPECT_TRUE(assertCountWords(0, "?")); - EXPECT_TRUE(assertCountWords(1, "foo")); - EXPECT_TRUE(assertCountWords(2, "foo bar")); - EXPECT_TRUE(assertCountWords(2, "? foo bar")); - EXPECT_TRUE(assertCountWords(2, "foo bar ?")); - - // check that 'a' is counted as 1 word - UTF8StrChrFieldSearcher fs(0); - StringList field = StringList().add("a").add("aa bb cc"); - assertString(fs, "bb", field, Hits().add(2)); - assertString(fs, StringList().add("bb").add("not"), field, HitsList().add(Hits().add(2)).add(Hits())); -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/streamingvisitors/src/tests/searchvisitor/CMakeLists.txt b/streamingvisitors/src/tests/searchvisitor/CMakeLists.txt index 01b625b6b3b..fdbd60ce30a 100644 --- a/streamingvisitors/src/tests/searchvisitor/CMakeLists.txt +++ b/streamingvisitors/src/tests/searchvisitor/CMakeLists.txt @@ -3,6 +3,6 @@ vespa_add_executable(streamingvisitors_searchvisitor_test_app TEST SOURCES searchvisitor_test.cpp DEPENDS - streamingvisitors + streamingvisitors_searchvisitor ) vespa_add_test(NAME streamingvisitors_searchvisitor_test_app COMMAND streamingvisitors_searchvisitor_test_app) diff --git a/streamingvisitors/src/tests/textutil/.gitignore b/streamingvisitors/src/tests/textutil/.gitignore deleted file mode 100644 index 1103f79800a..00000000000 --- a/streamingvisitors/src/tests/textutil/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.depend -Makefile -textutil_test -vsm_textutil_test_app diff --git a/streamingvisitors/src/tests/textutil/CMakeLists.txt b/streamingvisitors/src/tests/textutil/CMakeLists.txt deleted file mode 100644 index 59817d01137..00000000000 --- a/streamingvisitors/src/tests/textutil/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vsm_textutil_test_app TEST - SOURCES - textutil.cpp - DEPENDS - streamingvisitors -) -vespa_add_test(NAME vsm_textutil_test_app COMMAND vsm_textutil_test_app) diff --git a/streamingvisitors/src/tests/textutil/textutil.cpp b/streamingvisitors/src/tests/textutil/textutil.cpp deleted file mode 100644 index 2a1390eaa01..00000000000 --- a/streamingvisitors/src/tests/textutil/textutil.cpp +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> - -#include <vespa/fastlib/text/unicodeutil.h> -#include <vespa/searchlib/query/base.h> -#include <vespa/vsm/searcher/fold.h> -#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> -#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h> - -using search::byte; // unsigned char - -namespace vsm { - -template <typename T> -class Vector : public std::vector<T> -{ -public: - Vector() : std::vector<T>() {} - Vector<T> & a(T v) { this->push_back(v); return *this; } -}; - -typedef Vector<ucs4_t> UCS4V; -typedef Vector<size_t> SizeV; -typedef UTF8StringFieldSearcherBase SFSB; -typedef FUTF8StrChrFieldSearcher FSFS; - -class TextUtilTest : public vespalib::TestApp -{ -private: - ucs4_t getUTF8Char(const char * src); - template <typename BW, bool OFF> - void assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets); - void assertAnsiFold(const std::string & toFold, const std::string & exp); - void assertAnsiFold(char c, char exp); -#ifdef __x86_64__ - void assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp); - void assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded = 16); -#endif - - template <typename BW, bool OFF> - void testSkipSeparators(); - void testSkipSeparators(); - void testSeparatorCharacter(); - void testAnsiFold(); - void test_lfoldua(); -#ifdef __x86_64__ - void test_sse2_foldua(); -#endif - -public: - int Main() override; -}; - -ucs4_t -TextUtilTest::getUTF8Char(const char * src) -{ - ucs4_t retval = Fast_UnicodeUtil::GetUTF8Char(src); - ASSERT_TRUE(retval != Fast_UnicodeUtil::_BadUTF8Char); - return retval; -} - -template <typename BW, bool OFF> -void -TextUtilTest::assertSkipSeparators(const char * input, size_t len, const UCS4V & expdstbuf, const SizeV & expoffsets) -{ - const byte * srcbuf = reinterpret_cast<const byte *>(input); - auto dstbuf = std::make_unique<ucs4_t[]>(len + 1); - auto offsets = std::make_unique<size_t[]>(len + 1); - UTF8StrChrFieldSearcher fs; - BW bw(dstbuf.get(), offsets.get()); - size_t dstlen = fs.skipSeparators(srcbuf, len, bw); - EXPECT_EQUAL(dstlen, expdstbuf.size()); - ASSERT_TRUE(dstlen == expdstbuf.size()); - for (size_t i = 0; i < dstlen; ++i) { - EXPECT_EQUAL(dstbuf[i], expdstbuf[i]); - if (OFF) { - EXPECT_EQUAL(offsets[i], expoffsets[i]); - } - } -} - -void -TextUtilTest::assertAnsiFold(const std::string & toFold, const std::string & exp) -{ - char folded[256]; - EXPECT_TRUE(FSFS::ansiFold(toFold.c_str(), toFold.size(), folded)); - EXPECT_EQUAL(std::string(folded, toFold.size()), exp); -} - -void -TextUtilTest::assertAnsiFold(char c, char exp) -{ - char folded; - EXPECT_TRUE(FSFS::ansiFold(&c, 1, &folded)); - EXPECT_EQUAL((int32_t)folded, (int32_t)exp); -} - -#ifdef __x86_64__ -void -TextUtilTest::assert_sse2_foldua(const std::string & toFold, size_t charFolded, const std::string & exp) -{ - char folded[256]; - size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10); - const unsigned char * toFoldOrg = reinterpret_cast<const unsigned char *>(toFold.c_str()); - const unsigned char * retval = - sse2_foldua(toFoldOrg, toFold.size(), reinterpret_cast<unsigned char *>(folded + alignedStart)); - EXPECT_EQUAL((size_t)(retval - toFoldOrg), charFolded); - EXPECT_EQUAL(std::string(folded + alignedStart, charFolded), exp); -} - -void -TextUtilTest::assert_sse2_foldua(unsigned char c, unsigned char exp, size_t charFolded) -{ - unsigned char toFold[16]; - memset(toFold, c, 16); - unsigned char folded[32]; - size_t alignedStart = 0xF - (size_t(folded + 0xF) % 0x10); - const unsigned char * retval = sse2_foldua(toFold, 16, folded + alignedStart); - EXPECT_EQUAL((size_t)(retval - toFold), charFolded); - for (size_t i = 0; i < charFolded; ++i) { - EXPECT_EQUAL((int32_t)folded[i + alignedStart], (int32_t)exp); - } -} -#endif - -template <typename BW, bool OFF> -void -TextUtilTest::testSkipSeparators() -{ - // ascii characters - assertSkipSeparators<BW, OFF>("foo", 3, UCS4V().a('f').a('o').a('o'), SizeV().a(0).a(1).a(2)); - assertSkipSeparators<BW, OFF>("f\x1Fo", 3, UCS4V().a('f').a('o'), SizeV().a(0).a(2)); - assertSkipSeparators<BW, OFF>("f\no", 3, UCS4V().a('f').a('\n').a('o'), SizeV().a(0).a(1).a(2)); - assertSkipSeparators<BW, OFF>("f\to", 3, UCS4V().a('f').a('\t').a('o'), SizeV().a(0).a(1).a(2)); - - // utf8 char - assertSkipSeparators<BW, OFF>("\xC2\x80\x66", 3, UCS4V().a(getUTF8Char("\xC2\x80")).a('f'), - SizeV().a(0).a(2)); - assertSkipSeparators<BW, OFF>("\xE0\xA0\x80\x66", 4, UCS4V().a(getUTF8Char("\xE0\xA0\x80")).a('f'), - SizeV().a(0).a(3)); - assertSkipSeparators<BW, OFF>("\xF0\x90\x80\x80\x66", 5, UCS4V().a(getUTF8Char("\xF0\x90\x80\x80")).a('f'), - SizeV().a(0).a(4)); - - // replacement string (sharp s -> ss) - assertSkipSeparators<BW, OFF>("\xC3\x9F\x66\xC3\x9F", 5, UCS4V().a('s').a('s').a('f').a('s').a('s'), - SizeV().a(0).a(0).a(2).a(3).a(3)); -} - -void -TextUtilTest::testSkipSeparators() -{ - Fast_NormalizeWordFolder::Setup(Fast_NormalizeWordFolder::DO_SHARP_S_SUBSTITUTION); - - testSkipSeparators<SFSB::BufferWrapper, false>(); - testSkipSeparators<SFSB::OffsetWrapper, true>(); -} - -void -TextUtilTest::testSeparatorCharacter() -{ - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x00')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x01')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x02')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x03')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x04')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x05')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x06')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x07')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x08')); - EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x09')); // '\t' - EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x0a')); // '\n' - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0b')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0c')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0d')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0e')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x0f')); - - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x10')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x11')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x12')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x13')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x14')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x15')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x16')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x17')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x18')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x19')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1a')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1b')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1c')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1d')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1e')); - EXPECT_TRUE(SFSB::isSeparatorCharacter('\x1f')); - - EXPECT_TRUE(! SFSB::isSeparatorCharacter('\x20')); // space -} - -void -TextUtilTest::testAnsiFold() -{ - FieldSearcher::init(); - assertAnsiFold("", ""); - assertAnsiFold("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"); - assertAnsiFold("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz"); - assertAnsiFold("0123456789", "0123456789"); - for (int i = 0; i < 128; ++i) { - if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) { - assertAnsiFold(i, i); - } else if (i >= 'A' && i <= 'Z') { - assertAnsiFold(i, i + 32); - } else { - assertAnsiFold(i, 0); - } - } - - // non-ascii is ignored - for (int i = 128; i < 256; ++i) { - char toFold = i; - char folded; - EXPECT_TRUE(!FSFS::ansiFold(&toFold, 1, &folded)); - } -} - -void -TextUtilTest::test_lfoldua() -{ - FieldSearcher::init(); - char folded[256]; - size_t alignedStart = 0; - const char * toFold = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - size_t len = strlen(toFold); - EXPECT_TRUE(FSFS::lfoldua(toFold, len, folded, alignedStart)); - EXPECT_EQUAL(std::string(folded + alignedStart, len), "abcdefghijklmnopqrstuvwxyz"); -} - -#ifdef __x86_64__ -void -TextUtilTest::test_sse2_foldua() -{ - assert_sse2_foldua("", 0, ""); - assert_sse2_foldua("ABCD", 0, ""); - assert_sse2_foldua("ABCDEFGHIJKLMNO", 0, ""); - assert_sse2_foldua("ABCDEFGHIJKLMNOP", 16, "abcdefghijklmnop"); - assert_sse2_foldua("ABCDEFGHIJKLMNOPQ", 16, "abcdefghijklmnop"); - assert_sse2_foldua("KLMNOPQRSTUVWXYZ", 16, "klmnopqrstuvwxyz"); - assert_sse2_foldua("abcdefghijklmnop", 16, "abcdefghijklmnop"); - assert_sse2_foldua("klmnopqrstuvwxyz", 16, "klmnopqrstuvwxyz"); - assert_sse2_foldua("0123456789abcdef", 16, "0123456789abcdef"); - - for (int i = 0; i < 128; ++i) { - if ((i >= 'a' && i <= 'z') || (i >= '0' && i <= '9')) { - assert_sse2_foldua(i, i); - } else if (i >= 'A' && i <= 'Z') { - assert_sse2_foldua(i, i + 32); - } else { - assert_sse2_foldua(i, 0); - } - } - - // non-ascii is ignored - for (int i = 128; i < 256; ++i) { - assert_sse2_foldua(i, '?', 0); - } -} -#endif - -int -TextUtilTest::Main() -{ - TEST_INIT("textutil_test"); - - testSkipSeparators(); - testSeparatorCharacter(); - testAnsiFold(); - test_lfoldua(); -#ifdef __x86_64__ - test_sse2_foldua(); -#endif - - TEST_DONE(); -} - -} - -TEST_APPHOOK(vsm::TextUtilTest); diff --git a/streamingvisitors/src/tests/utilapps/.gitignore b/streamingvisitors/src/tests/utilapps/.gitignore deleted file mode 100644 index e69de29bb2d..00000000000 --- a/streamingvisitors/src/tests/utilapps/.gitignore +++ /dev/null diff --git a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt index ff629462f9e..e8f85fc987e 100644 --- a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt +++ b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt @@ -1,5 +1,5 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(streamingvisitors +vespa_add_library(streamingvisitors_searchvisitor SOURCES hitcollector.cpp indexenvironment.cpp @@ -11,10 +11,6 @@ vespa_add_library(streamingvisitors rankprocessor.cpp searchenvironment.cpp searchvisitor.cpp - $<TARGET_OBJECTS:vsm_vconfig> - $<TARGET_OBJECTS:vsm_vsmbase> - $<TARGET_OBJECTS:vsm_vsmcommon> - $<TARGET_OBJECTS:vsm_vsmsearcher> INSTALL lib64 DEPENDS searchlib_searchlib_uca diff --git a/streamingvisitors/src/vespa/vsm/.gitignore b/streamingvisitors/src/vespa/vsm/.gitignore deleted file mode 100644 index 4c5f5d9ef7a..00000000000 --- a/streamingvisitors/src/vespa/vsm/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.depend -Makefile -/libvsm.so.5.1 diff --git a/streamingvisitors/src/vespa/vsm/common/.gitignore b/streamingvisitors/src/vespa/vsm/common/.gitignore deleted file mode 100644 index 95bc02923a9..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.exe -*.ilk -*.pdb -.depend* -Makefile diff --git a/streamingvisitors/src/vespa/vsm/common/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/common/CMakeLists.txt deleted file mode 100644 index 4570a9b581e..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(vsm_vsmcommon OBJECT - SOURCES - charbuffer.cpp - document.cpp - documenttypemapping.cpp - fieldmodifier.cpp - storagedocument.cpp - DEPENDS -) diff --git a/streamingvisitors/src/vespa/vsm/common/charbuffer.cpp b/streamingvisitors/src/vespa/vsm/common/charbuffer.cpp deleted file mode 100644 index b8fbb5c8846..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/charbuffer.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "charbuffer.h" -#include <cstring> - -namespace vsm { - -CharBuffer::CharBuffer(size_t len) : - _buffer(len), - _pos(0) -{ } - -void -CharBuffer::put(const char * src, size_t n) -{ - if (n > getRemaining()) { - resize(_pos + (n * 2)); - } - char * dst = &_buffer[_pos]; - memcpy(dst, src, n); - _pos += n; -} - -void -CharBuffer::resize(size_t len) -{ - if (len > getLength()) { - _buffer.resize(len); - } -} - -} - diff --git a/streamingvisitors/src/vespa/vsm/common/charbuffer.h b/streamingvisitors/src/vespa/vsm/common/charbuffer.h deleted file mode 100644 index 08618a9b973..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/charbuffer.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vector> -#include <memory> - -namespace vsm { - -/** - * Simple growable char buffer. - **/ -class CharBuffer -{ -private: - std::vector<char> _buffer; - size_t _pos; - -public: - typedef std::shared_ptr<CharBuffer> SP; - - /** - * Creates a char buffer with len bytes. - **/ - CharBuffer(size_t len = 0); - - /** - * Copies n bytes from the src array into the underlying buffer at the - * current position, and updates the position accordingly. - * Resizing will occur if needed. - **/ - void put(const char * src, size_t n); - - /** - * Resizes the buffer so that the new length becomes len. - * Resizing will not occur if len < current length. - **/ - void resize(size_t len); - - /** - * Resets the position to the beginning of the buffer. - **/ - void reset() { _pos = 0; } - - const char * getBuffer() const { return &_buffer[0]; } - size_t getLength() const { return _buffer.size(); } - size_t getPos() const { return _pos; } - size_t getRemaining() const { return getLength() - getPos(); } - void put(char c) { put(&c, 1); } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/common/docsum.h b/streamingvisitors/src/vespa/vsm/common/docsum.h deleted file mode 100644 index 49b84cb0783..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/docsum.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "document.h" - -namespace vsm { - -/** - Will represent a cache of the document summaries. -> Actual docsums will be - generated on the fly when requested. A document summary is accessed by its - documentId. -*/ - -class IDocSumCache -{ -public: - virtual const Document & getDocSum(const search::DocumentIdT & docId) const = 0; - virtual ~IDocSumCache() { } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/common/document.cpp b/streamingvisitors/src/vespa/vsm/common/document.cpp deleted file mode 100644 index a345c82ce2d..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/document.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "document.h" -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/stllike/hash_map.hpp> - -using search::DocumentIdT; -using search::TimeT; -using document::FieldValue; - -namespace vsm -{ - -vespalib::asciistream & operator << (vespalib::asciistream & os, const FieldRef & f) -{ - const char *s = f.data(); - os << f.size(); - if (s) { - os << s; // Better hope it's null terminated! - } - os << " : "; - return os; -} - -vespalib::asciistream & operator << (vespalib::asciistream & os, const StringFieldIdTMap & f) -{ - for (StringFieldIdTMapT::const_iterator it=f._map.begin(), mt=f._map.end(); it != mt; it++) { - os << it->first << " = " << it->second << '\n'; - } - return os; -} - -StringFieldIdTMap::StringFieldIdTMap() : - _map() -{ -} - -void StringFieldIdTMap::add(const vespalib::string & s, FieldIdT fieldId) -{ - _map[s] = fieldId; -} - -void StringFieldIdTMap::add(const vespalib::string & s) -{ - if (_map.find(s) == _map.end()) { - FieldIdT fieldId = _map.size(); - _map[s] = fieldId; - } -} - -FieldIdT StringFieldIdTMap::fieldNo(const vespalib::string & fName) const -{ - StringFieldIdTMapT::const_iterator found = _map.find(fName); - FieldIdT fNo((found != _map.end()) ? found->second : npos); - return fNo; -} - -size_t StringFieldIdTMap::highestFieldNo() const -{ - size_t maxFNo(0); - for (const auto & field : _map) { - if (field.second >= maxFNo) { - maxFNo = field.second + 1; - } - } - return maxFNo; -} - -Document::~Document() { } - -} - -VESPALIB_HASH_MAP_INSTANTIATE(vespalib::string, vsm::FieldIdTList); -VESPALIB_HASH_MAP_INSTANTIATE(vespalib::string, vsm::IndexFieldMapT); diff --git a/streamingvisitors/src/vespa/vsm/common/document.h b/streamingvisitors/src/vespa/vsm/common/document.h deleted file mode 100644 index 8c11d27072b..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/document.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/searchlib/query/base.h> -#include <vespa/document/fieldvalue/fieldvalue.h> -#include <vespa/vespalib/stllike/hash_map.h> -#include <map> - -namespace vespalib { - class asciistream; -} - -namespace vsm { - -/// Type to identify fields in documents. -typedef unsigned int FieldIdT; -/// A type to represent a list of FieldIds. -typedef std::vector<FieldIdT> FieldIdTList; -/// A type to represent all the fields contained in all the indexs. -typedef vespalib::hash_map<vespalib::string, FieldIdTList> IndexFieldMapT; -/// A type to represent all the fields contained in all the indexs in an all the document types. -typedef vespalib::hash_map<vespalib::string, IndexFieldMapT> DocumentTypeIndexFieldMapT; -/// A type to represent a map from fieldname to fieldid. -typedef std::map<vespalib::string, FieldIdT> StringFieldIdTMapT; - -class StringFieldIdTMap -{ - public: - enum { npos=0xFFFFFFFF }; - StringFieldIdTMap(); - FieldIdT fieldNo(const vespalib::string & fName) const; - void add(const vespalib::string & s); - void add(const vespalib::string & s, FieldIdT fNo); - const StringFieldIdTMapT & map() const { return _map; } - size_t highestFieldNo() const; - friend vespalib::asciistream & operator << (vespalib::asciistream & os, const StringFieldIdTMap & f); - private: - StringFieldIdTMapT _map; -}; - -typedef vespalib::stringref FieldRef; - -/** - This is the base class representing a document. It gives a document some - basic properties. A document is a collection of fields, together with a - document id and a time stamp. -*/ -class Document -{ - public: - Document(size_t maxFieldCount) : _docId(0), _fieldCount(maxFieldCount) { } - Document(search::DocumentIdT doc, size_t maxFieldCount) : _docId(doc), _fieldCount(maxFieldCount) { } - virtual ~Document(); - const search::DocumentIdT & getDocId() const { return _docId; } - size_t getFieldCount() const { return _fieldCount; } - void setDocId(const search::DocumentIdT & v) { _docId = v; } - virtual const document::FieldValue * getField(FieldIdT fId) const = 0; - /** - Returns true, if not possible to set. - */ - virtual bool setField(FieldIdT fId, document::FieldValue::UP fv) = 0; - private: - search::DocumentIdT _docId; - const size_t _fieldCount; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.cpp b/streamingvisitors/src/vespa/vsm/common/documenttypemapping.cpp deleted file mode 100644 index 7886c44b2e0..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "documenttypemapping.h" -#include <vespa/document/repo/documenttyperepo.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/vespalib/stllike/hash_map.hpp> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.common.documenttypemapping"); - -namespace vsm { - -DocumentTypeMapping::DocumentTypeMapping() : - _fieldMap(), - _defaultDocumentTypeName(), - _defaultDocumentType(), - _documentTypeFreq() -{ } - -DocumentTypeMapping::~DocumentTypeMapping() { } - -namespace { - -vespalib::string getDocTypeId(const document::DocumentType & docType) -{ - vespalib::string typeId(docType.getName()); - typeId += "0"; // Hardcoded version (version not supported) - return typeId; -} - -} - -void DocumentTypeMapping::init(const vespalib::string & defaultDocumentType, - const StringFieldIdTMapT & fieldList, - const document::DocumentTypeRepo &repo) -{ - _defaultDocumentType = repo.getDocumentType(defaultDocumentType); - _defaultDocumentTypeName = getDocTypeId(*_defaultDocumentType); - LOG(debug, "Setting default document type to '%s'", - _defaultDocumentTypeName.c_str()); - buildFieldMap(_defaultDocumentType, fieldList, _defaultDocumentTypeName); -} - -bool DocumentTypeMapping::prepareBaseDoc(SharedFieldPathMap & map) const -{ - FieldPathMapMapT::const_iterator found = _fieldMap.find(_defaultDocumentTypeName); - if (found != _fieldMap.end()) { - map = std::make_shared<FieldPathMapT>(found->second); - LOG(debug, "Found FieldPathMap for default document type '%s' with %zd elements", - _defaultDocumentTypeName.c_str(), map->size()); - } else { - LOG(warning, "No FieldPathMap found for default document type '%s'. Using empty one", - _defaultDocumentTypeName.c_str()); - map = std::make_shared<FieldPathMapT>(); - } - return true; -} - -void DocumentTypeMapping::buildFieldMap( - const document::DocumentType *docTypePtr, - const StringFieldIdTMapT & fieldList, const vespalib::string & typeId) -{ - LOG(debug, "buildFieldMap: docType = '%s', fieldList.size = '%zd', typeId = '%s'", - docTypePtr->getName().c_str(), fieldList.size(), typeId.c_str()); - const document::DocumentType & docType = *docTypePtr; - size_t highestFNo(0); - for (StringFieldIdTMapT::const_iterator it = fieldList.begin(), mt = fieldList.end(); it != mt; it++) { - highestFNo = std::max(highestFNo, size_t(it->second)); - } - highestFNo++; - FieldPathMapT & fieldMap = _fieldMap[typeId]; - - fieldMap.resize(highestFNo); - - size_t validCount(0); - for (StringFieldIdTMapT::const_iterator it = fieldList.begin(), mt = fieldList.end(); it != mt; it++) { - vespalib::string fname = it->first; - LOG(debug, "Handling %s -> %d", fname.c_str(), it->second); - try { - if ((it->first[0] != '[') && (it->first != "summaryfeatures") && (it->first != "rankfeatures") && (it->first != "ranklog") && (it->first != "sddocname") && (it->first != "documentid")) { - FieldPath fieldPath; - docType.buildFieldPath(fieldPath, fname); - fieldMap[it->second] = std::move(fieldPath); - validCount++; - LOG(spam, "Found %s -> %d in document", fname.c_str(), it->second); - } - } catch (const std::exception & e) { - LOG(debug, "Could not get field info for '%s' in documenttype '%s' (id = '%s') : %s", - it->first.c_str(), docType.getName().c_str(), typeId.c_str(), e.what()); - } - } - _documentTypeFreq.insert(std::make_pair(validCount, docTypePtr)); -} - -const document::DocumentType & DocumentTypeMapping::getCurrentDocumentType() const -{ - if (_documentTypeFreq.empty()) { - throw std::runtime_error("No document type registered yet."); - } - return *_documentTypeFreq.rbegin()->second; -} - - -} diff --git a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.h b/streamingvisitors/src/vespa/vsm/common/documenttypemapping.h deleted file mode 100644 index 607b40cec47..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/documenttypemapping.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/common/storagedocument.h> - -namespace document { class DocumentTypeRepo; } - -namespace vsm -{ - -class DocumentTypeMapping -{ -public: - DocumentTypeMapping(); - ~DocumentTypeMapping(); - - /** - * Prepares the given document by sharing the field info map - * registered for that document type. - **/ - bool prepareBaseDoc(SharedFieldPathMap & doc) const; - - /** - * Builds a field info map for all registered document types. - **/ - void init(const vespalib::string & defaultDocumentType, - const StringFieldIdTMapT & fieldList, - const document::DocumentTypeRepo &repo); - - const document::DocumentType & getCurrentDocumentType() const; - const vespalib::string & getDefaultDocumentTypeName() const - { return _defaultDocumentTypeName; } - const document::DocumentType *getDefaultDocumentType() const - { return _defaultDocumentType; } - -private: - /** - * Builds a field info map for the given type id. This is a - * mapping from field id to field path and field value for all - * field names in the given list based on the given document type. - **/ - void buildFieldMap(const document::DocumentType *docType, - const StringFieldIdTMapT & fieldList, - const vespalib::string & typeId); - typedef vespalib::hash_map<vespalib::string, FieldPathMapT> FieldPathMapMapT; - typedef std::multimap<size_t, const document::DocumentType *> DocumentTypeUsage; - FieldPathMapMapT _fieldMap; - vespalib::string _defaultDocumentTypeName; - const document::DocumentType *_defaultDocumentType; - DocumentTypeUsage _documentTypeFreq; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.cpp b/streamingvisitors/src/vespa/vsm/common/fieldmodifier.cpp deleted file mode 100644 index b39afd83b5a..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "fieldmodifier.h" -#include <vespa/vespalib/stllike/hash_map.hpp> - -namespace vsm { - -FieldModifierMap::FieldModifierMap() : - _map() -{ } - -FieldModifierMap::~FieldModifierMap() { } - -FieldModifier * -FieldModifierMap::getModifier(FieldIdT fId) const -{ - FieldModifierMapT::const_iterator itr = _map.find(fId); - if (itr == _map.end()) { - return NULL; - } - return itr->second.get(); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.h b/streamingvisitors/src/vespa/vsm/common/fieldmodifier.h deleted file mode 100644 index 60e480fa237..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/fieldmodifier.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/document/fieldvalue/fieldvalue.h> -#include <vespa/vsm/common/document.h> - -namespace vsm { - -/** - * Interface for classes that want to modify a field value. - **/ -class FieldModifier -{ -public: - typedef std::unique_ptr<FieldModifier> UP; - - /** - * Modifies the given field value and returns a new one. - **/ - virtual document::FieldValue::UP modify(const document::FieldValue & fv) = 0; - - /** - * Modifies the given field value and returns a new one. - * Use the given field path to iterate the field value. - **/ - virtual document::FieldValue::UP modify(const document::FieldValue & fv, - const document::FieldPath & path) = 0; - - virtual ~FieldModifier() { } -}; - -typedef vespalib::hash_map<FieldIdT, FieldModifier::UP> FieldModifierMapT; - -/** - * This class wraps a map from field id to field modifier. - **/ -class FieldModifierMap -{ -private: - FieldModifierMapT _map; - -public: - FieldModifierMap(); - ~FieldModifierMap(); - FieldModifierMapT & map() { return _map; } - const FieldModifierMapT & map() const { return _map; } - - /** - * Returns the modifier associated with the given field id or NULL if not found. - * - * @param fId the field id to look up. - * @return the field modifier or NULL if not found. - **/ - FieldModifier * getModifier(FieldIdT fId) const; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/common/storagedocument.cpp b/streamingvisitors/src/vespa/vsm/common/storagedocument.cpp deleted file mode 100644 index a0d666268f5..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/storagedocument.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "storagedocument.h" -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.storagedocument"); - -using NestedIterator = document::FieldValue::PathRange; - -namespace vsm { - -StorageDocument::StorageDocument(document::Document::UP doc, const SharedFieldPathMap & fim, size_t fieldNoLimit) : - Document(fieldNoLimit), - _doc(std::move(doc)), - _fieldMap(fim), - _cachedFields(getFieldCount()), - _backedFields() -{ } - -StorageDocument::~StorageDocument() { } - -namespace { - FieldPath _emptyFieldPath; - StorageDocument::SubDocument _empySubDocument(NULL, _emptyFieldPath.getFullRange()); -} - -const StorageDocument::SubDocument & -StorageDocument::getComplexField(FieldIdT fId) const -{ - if (_cachedFields[fId].getFieldValue() == NULL) { - const FieldPath & fp = (*_fieldMap)[fId]; - if ( ! fp.empty() ) { - const document::StructuredFieldValue * sfv = _doc.get(); - NestedIterator nested = fp.getFullRange(); - const document::FieldPathEntry& fvInfo = nested.cur(); - bool ok = sfv->getValue(fvInfo.getFieldRef(), fvInfo.getFieldValueToSet()); - if (ok) { - SubDocument tmp(&fvInfo.getFieldValueToSet(), nested.next()); - _cachedFields[fId].swap(tmp); - } - } else { - LOG(debug, "Failed getting field fId %d.", fId); - return _empySubDocument; - } - } - return _cachedFields[fId]; -} - -void StorageDocument::saveCachedFields() const -{ - size_t m(_cachedFields.size()); - _backedFields.reserve(m); - for (size_t i(0); i < m; i++) { - if (_cachedFields[i].getFieldValue() != 0) { - _backedFields.emplace_back(document::FieldValue::UP(_cachedFields[i].getFieldValue()->clone())); - _cachedFields[i].setFieldValue(_backedFields.back().get()); - } - } -} - -const document::FieldValue * -StorageDocument::getField(FieldIdT fId) const -{ - return getComplexField(fId).getFieldValue(); -} - -bool StorageDocument::setField(FieldIdT fId, document::FieldValue::UP fv) -{ - bool ok(fId < _cachedFields.size()); - if (ok) { - const FieldPath & fp = (*_fieldMap)[fId]; - SubDocument tmp(fv.get(), NestedIterator(fp.end(), fp.end())); - _cachedFields[fId].swap(tmp); - _backedFields.emplace_back(std::move(fv)); - } - return ok; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/common/storagedocument.h b/streamingvisitors/src/vespa/vsm/common/storagedocument.h deleted file mode 100644 index a7f21cb052f..00000000000 --- a/streamingvisitors/src/vespa/vsm/common/storagedocument.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "document.h" -#include <vespa/document/fieldvalue/document.h> - -namespace vsm { - -typedef vespalib::CloneablePtr<document::FieldValue> FieldValueContainer; -typedef document::FieldPath FieldPath; // field path to navigate a field value -typedef std::vector<FieldPath> FieldPathMapT; // map from field id to field path -typedef std::shared_ptr<FieldPathMapT> SharedFieldPathMap; - -class StorageDocument : public Document { -public: - typedef std::unique_ptr<StorageDocument> UP; - - class SubDocument { - public: - SubDocument() : _fieldValue(nullptr) {} - SubDocument(document::FieldValue *fv, document::FieldValue::PathRange nested) : - _fieldValue(fv), - _range(nested) - { } - - const document::FieldValue *getFieldValue() const { return _fieldValue; } - void setFieldValue(document::FieldValue *fv) { _fieldValue = fv; } - const document::FieldValue::PathRange & getRange() const { return _range; } - void swap(SubDocument &rhs) { - std::swap(_fieldValue, rhs._fieldValue); - std::swap(_range, rhs._range); - } - private: - FieldPath::const_iterator begin() const; - FieldPath::const_iterator end() const; - document::FieldValue *_fieldValue; - document::FieldValue::PathRange _range; - }; -public: - StorageDocument(document::Document::UP doc, const SharedFieldPathMap &fim, size_t fieldNoLimit); - StorageDocument(const StorageDocument &) = delete; - StorageDocument & operator = (const StorageDocument &) = delete; - ~StorageDocument(); - - const document::Document &docDoc() const { return *_doc; } - bool valid() const { return _doc.get() != nullptr; } - const SubDocument &getComplexField(FieldIdT fId) const; - const document::FieldValue *getField(FieldIdT fId) const override; - bool setField(FieldIdT fId, document::FieldValue::UP fv) override ; - void saveCachedFields() const; -private: - document::Document::UP _doc; - SharedFieldPathMap _fieldMap; - mutable std::vector<SubDocument> _cachedFields; - mutable std::vector<document::FieldValue::UP> _backedFields; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/config/.gitignore b/streamingvisitors/src/vespa/vsm/config/.gitignore deleted file mode 100644 index d58390943e2..00000000000 --- a/streamingvisitors/src/vespa/vsm/config/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.depend -Makefile -config-*.cpp -config-*.h diff --git a/streamingvisitors/src/vespa/vsm/config/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/config/CMakeLists.txt deleted file mode 100644 index fea0bafe6b2..00000000000 --- a/streamingvisitors/src/vespa/vsm/config/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(vsm_vconfig OBJECT - SOURCES - DEPENDS -) -vespa_generate_config(vsm_vconfig vsmfields.def) -install_config_definition(vsmfields.def vespa.config.search.vsm.vsmfields.def) -vespa_generate_config(vsm_vconfig vsm.def) -install_config_definition(vsm.def vespa.config.search.vsm.vsm.def) -vespa_generate_config(vsm_vconfig vsmsummary.def) -install_config_definition(vsmsummary.def vespa.config.search.vsm.vsmsummary.def) diff --git a/streamingvisitors/src/vespa/vsm/config/vsm-cfif.h b/streamingvisitors/src/vespa/vsm/config/vsm-cfif.h deleted file mode 100644 index 22033aee232..00000000000 --- a/streamingvisitors/src/vespa/vsm/config/vsm-cfif.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/config/config-vsmfields.h> -#include <vespa/vsm/config/config-vsm.h> -#include <vespa/vsm/config/config-vsmsummary.h> -#include <vespa/vespalib/util/ptrholder.h> - -using vespa::config::search::vsm::VsmConfig; -using vespa::config::search::vsm::VsmsummaryConfig; -using vespa::config::search::vsm::VsmfieldsConfig; - -namespace vsm { - -typedef vespalib::PtrHolder<VsmfieldsConfig> VsmfieldsHolder; -typedef std::shared_ptr<VsmfieldsConfig> VsmfieldsHandle; - -typedef vespalib::PtrHolder<VsmConfig> VsmHolder; -typedef std::shared_ptr<VsmConfig> VsmHandle; - -typedef vespalib::PtrHolder<VsmsummaryConfig> FastS_VsmsummaryHolder; -typedef std::shared_ptr<VsmsummaryConfig> FastS_VsmsummaryHandle; - -} - diff --git a/streamingvisitors/src/vespa/vsm/config/vsm.def b/streamingvisitors/src/vespa/vsm/config/vsm.def deleted file mode 100644 index 1971f9e9574..00000000000 --- a/streamingvisitors/src/vespa/vsm/config/vsm.def +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.search.vsm - -## The document model for the documents used as input for the VSM -doctype reference - -## Configuration for storage client used by VSM -storagecfg reference - -## Config defining what search method should be applied to different -## fields in the documents. It also contains a mapping from index name -## to a set of fields making up that index. -vsmfields reference diff --git a/streamingvisitors/src/vespa/vsm/config/vsmfields.def b/streamingvisitors/src/vespa/vsm/config/vsmfields.def deleted file mode 100644 index 5e943c9274d..00000000000 --- a/streamingvisitors/src/vespa/vsm/config/vsmfields.def +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.search.vsm - -## Level of verification applied to the documents received. -documentverificationlevel int default=0 - -## Set if one should ignore limit hits. -searchall int default=1 - -## The name of a field for which we are assigning a search method. -## The field name refers directly to a field in the document model. -fieldspec[].name string - -## The search method for a given field. Note: same field in 2 different document types must match on type if not a random result might be expected. -fieldspec[].searchmethod enum { NONE, BOOL, AUTOUTF8, UTF8, SSE2UTF8, INT8, INT16, INT32, INT64, FLOAT16, FLOAT, DOUBLE, GEOPOS } default=AUTOUTF8 -fieldspec[].arg1 string default="" - -## Maximum number of chars to search per field. -fieldspec[].maxlength int default=1048576 - -## Type of the field -fieldspec[].fieldtype enum {ATTRIBUTE, INDEX} default=INDEX - -## The name of a documenttype for which we are assigning a set of indexes. -documenttype[].name string -## The name of an index of a documenttype for which we are assigning a set of fields. -documenttype[].index[].name string - -## The name of a field part of an index. -## The field name refers directly to a field in the document model. -documenttype[].index[].field[].name string diff --git a/streamingvisitors/src/vespa/vsm/config/vsmsummary.def b/streamingvisitors/src/vespa/vsm/config/vsmsummary.def deleted file mode 100644 index 5eb96624826..00000000000 --- a/streamingvisitors/src/vespa/vsm/config/vsmsummary.def +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.config.search.vsm - -## The name of the result class that should be generated for documents -## returned from the VSM. If this value is empty, the first found -## result class will be used. -outputclass string default="" - -## Mapping of field names between the result class and the document -## model. This value represents the name in the result class. Fields -## not mentioned here will get the identity mapping. -fieldmap[].summary string - -## Mapping of field names between the result class and the document -## model. This field vector represents the names in the document model -## that should be used as input when generating the summary field. -fieldmap[].document[].field string - -## This command specifies how the document fields should be combined -## when generating the summary field. -fieldmap[].command enum { NONE, FLATTENJUNIPER, FLATTENSPACE } default=NONE diff --git a/streamingvisitors/src/vespa/vsm/searcher/.gitignore b/streamingvisitors/src/vespa/vsm/searcher/.gitignore deleted file mode 100644 index 95bc02923a9..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.exe -*.ilk -*.pdb -.depend* -Makefile diff --git a/streamingvisitors/src/vespa/vsm/searcher/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/searcher/CMakeLists.txt deleted file mode 100644 index 0a2a9ec21d2..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - set(SSE2_FILES "fold.cpp") -else() - unset(SSE2_FILES) -endif() - -vespa_add_library(vsm_vsmsearcher OBJECT - SOURCES - boolfieldsearcher.cpp - fieldsearcher.cpp - floatfieldsearcher.cpp - ${SSE2_FILES} - futf8strchrfieldsearcher.cpp - geo_pos_field_searcher.cpp - intfieldsearcher.cpp - strchrfieldsearcher.cpp - utf8flexiblestringfieldsearcher.cpp - utf8strchrfieldsearcher.cpp - utf8stringfieldsearcherbase.cpp - utf8substringsearcher.cpp - utf8substringsnippetmodifier.cpp - utf8suffixstringfieldsearcher.cpp - utf8exactstringfieldsearcher.cpp - DEPENDS - vsm_vconfig -) diff --git a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp deleted file mode 100644 index 8c9b556e593..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "boolfieldsearcher.h" -#include <vespa/document/fieldvalue/boolfieldvalue.h> - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -namespace { -vespalib::stringref TRUE = "true"; -vespalib::stringref FALSE = "false"; -} - -std::unique_ptr<FieldSearcher> -BoolFieldSearcher::duplicate() const -{ - return std::make_unique<BoolFieldSearcher>(*this); -} - -BoolFieldSearcher::BoolFieldSearcher(FieldIdT fId) : - FieldSearcher(fId), - _terms() -{ } - -BoolFieldSearcher::~BoolFieldSearcher() = default; - -void BoolFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf) -{ - _terms.clear(); - FieldSearcher::prepare(qtl, buf); - for (const QueryTerm * qt : qtl) { - if (TRUE == qt->getTerm()) { - _terms.push_back(true); - } else if (FALSE == qt->getTerm()) { - _terms.push_back(false); - } else { - int64_t low; - int64_t high; - bool valid = qt->getAsIntegerTerm(low, high); - _terms.push_back(valid && (low > 0)); - } - } -} - -void BoolFieldSearcher::onValue(const document::FieldValue & fv) -{ - for(size_t j=0, jm(_terms.size()); j < jm; j++) { - if (static_cast<const document::BoolFieldValue &>(fv).getValue() == _terms[j]) { - addHit(*_qtl[j], 0); - } - } - ++_words; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.h deleted file mode 100644 index f6afef9e507..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/boolfieldsearcher.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "fieldsearcher.h" - -namespace vsm { - -class BoolFieldSearcher : public FieldSearcher -{ -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - BoolFieldSearcher(FieldIdT fId); - ~BoolFieldSearcher(); - void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override; - void onValue(const document::FieldValue & fv) override; -private: - std::vector<bool> _terms; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp deleted file mode 100644 index e69999b160e..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp +++ /dev/null @@ -1,301 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "fieldsearcher.h" -#include <vespa/vsm/vsm/fieldsearchspec.h> -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.searcher.fieldsearcher"); - -using search::byte; -using search::streaming::Query; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; -using search::v16qi; - -namespace vsm { - -class force -{ - public: - force() { FieldSearcher::init(); } -}; - -static force __forceInit; - -byte FieldSearcher::_foldLowCase[256]; -byte FieldSearcher::_wordChar[256]; - -FieldSearcherBase::FieldSearcherBase() : - _qtl(), - _qtlFastBuffer(), - _qtlFastSize(0), - _qtlFast(nullptr) -{ -} - -FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org) : - _qtl(), - _qtlFastBuffer(), - _qtlFastSize(0), - _qtlFast(nullptr) -{ - prepare(org._qtl); -} - -FieldSearcherBase::~FieldSearcherBase() -{ -} - -FieldSearcherBase & FieldSearcherBase::operator = (const FieldSearcherBase & org) -{ - if (this != &org) { - prepare(org._qtl); - } - return *this; -} - -void FieldSearcherBase::prepare(const QueryTermList & qtl) -{ - _qtl = qtl; - _qtlFastBuffer.resize(sizeof(*_qtlFast)*(_qtl.size()+1), 0x13); - _qtlFast = reinterpret_cast<v16qi *>(reinterpret_cast<unsigned long>(&_qtlFastBuffer[0]+15) & ~0xf); - _qtlFastSize = 0; - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - const QueryTerm & qt = **it; - memcpy(&_qtlFast[_qtlFastSize++], qt.getTerm(), std::min(size_t(16), qt.termLen())); - } -} - -FieldSearcher::FieldSearcher(const FieldIdT & fId, bool defaultPrefix) : - FieldSearcherBase(), - _field(fId), - _matchType(defaultPrefix ? PREFIX : REGULAR), - _maxFieldLength(0x100000), - _currentElementId(0), - _currentElementWeight(1), - _pureUsAsciiCount(0), - _pureUsAsciiFieldCount(0), - _anyUtf8Count(0), - _anyUtf8FieldCount(0), - _words(0), - _badUtf8Count(0), - _zeroCount(0) -{ - zeroStat(); -} - -FieldSearcher::~FieldSearcher() = default; - -bool FieldSearcher::search(const StorageDocument & doc) -{ - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field()); - fInfo.setHitOffset(qt.getHitList().size()); - } - onSearch(doc); - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field()); - fInfo.setHitCount(qt.getHitList().size() - fInfo.getHitOffset()); - fInfo.setFieldLength(_words); - } - _words = 0; - return true; -} - -void FieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & UNUSED_PARAM(buf)) -{ - FieldSearcherBase::prepare(qtl); - prepareFieldId(); -} - -size_t FieldSearcher::countWords(const FieldRef & f) -{ - size_t words = 0; - const char * n = f.data(); - const char * e = n + f.size(); - for( ; n < e; ++n) { - for (; isspace(*n) && (n<e); ++n); - const char * m = n; - for (; iswordchar(*n) && (n<e); ++n); - if (n > m) { - words++; - } - } - return words; -} - -void FieldSearcher::prepareFieldId() -{ - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - qt.resizeFieldId(field()); - } -} - -void FieldSearcher::addStat(const FieldSearcher & toAdd) -{ - _pureUsAsciiCount += toAdd._pureUsAsciiCount; - _pureUsAsciiFieldCount += toAdd._pureUsAsciiFieldCount; - _anyUtf8Count += toAdd._anyUtf8Count; - _anyUtf8FieldCount += toAdd._anyUtf8FieldCount; - _badUtf8Count += toAdd._badUtf8Count; - _zeroCount += toAdd._zeroCount; - for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] += toAdd._utf8Count[i]; } -} - -void FieldSearcher::zeroStat() -{ - _pureUsAsciiCount = 0; - _pureUsAsciiFieldCount = 0; - _anyUtf8Count = 0; - _anyUtf8FieldCount = 0; - _badUtf8Count = 0; - _zeroCount = 0; - for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] = 0; } -} - -void FieldSearcher::init() -{ - for (unsigned i = 0; i < NELEMS(_foldLowCase); i++) { - _foldLowCase[i] = 0; - _wordChar[i] = 0; - } - for (int i = 'A'; i <= 'Z'; i++) { - _wordChar[i] = 0xFF; - _foldLowCase[i] = i | 0x20; - } - for (int i = 'a'; i <= 'z'; i++) { - _wordChar[i] = 0xFF; - _foldLowCase[i] = i; - } - for (int i = '0'; i <= '9'; i++) { - _wordChar[i] = 0xFF; - _foldLowCase[i] = i; - } - for (int i = 0xC0; i <= 0xFF; i++) { - _wordChar[i] = 0xFF; - } - _wordChar[0xd7] = 0; - _wordChar[0xf7] = 0; - - if (1) /* _doAccentRemoval */ { - _foldLowCase[0xc0] = 'a'; - _foldLowCase[0xc1] = 'a'; - _foldLowCase[0xc2] = 'a'; - _foldLowCase[0xc3] = 'a'; // A tilde - _foldLowCase[0xc7] = 'c'; - _foldLowCase[0xc8] = 'e'; - _foldLowCase[0xc9] = 'e'; - _foldLowCase[0xca] = 'e'; - _foldLowCase[0xcb] = 'e'; - _foldLowCase[0xcc] = 'i'; // I grave - _foldLowCase[0xcd] = 'i'; - _foldLowCase[0xce] = 'i'; - _foldLowCase[0xcf] = 'i'; - _foldLowCase[0xd3] = 'o'; - _foldLowCase[0xd4] = 'o'; - _foldLowCase[0xda] = 'u'; - _foldLowCase[0xdb] = 'u'; - - _foldLowCase[0xe0] = 'a'; - _foldLowCase[0xe1] = 'a'; - _foldLowCase[0xe2] = 'a'; - _foldLowCase[0xe3] = 'a'; // a tilde - _foldLowCase[0xe7] = 'c'; - _foldLowCase[0xe8] = 'e'; - _foldLowCase[0xe9] = 'e'; - _foldLowCase[0xea] = 'e'; - _foldLowCase[0xeb] = 'e'; - _foldLowCase[0xec] = 'i'; // i grave - _foldLowCase[0xed] = 'i'; - _foldLowCase[0xee] = 'i'; - _foldLowCase[0xef] = 'i'; - _foldLowCase[0xf3] = 'o'; - _foldLowCase[0xf4] = 'o'; - _foldLowCase[0xfa] = 'u'; - _foldLowCase[0xfb] = 'u'; - } -} - -void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT & difm, const SharedSearcherBuf & searcherBuf, Query & query) -{ - QueryTermList qtl; - query.getLeafs(qtl); - vespalib::string tmp; - for (FieldIdTSearcherMap::iterator it = begin(), mt = end(); it != mt; it++) { - QueryTermList onlyInIndex; - FieldIdT fid = (*it)->field(); - for (QueryTermList::iterator qt = qtl.begin(), mqt = qtl.end(); qt != mqt; qt++) { - QueryTerm * q = *qt; - for (DocumentTypeIndexFieldMapT::const_iterator dt(difm.begin()), dmt(difm.end()); dt != dmt; dt++) { - const IndexFieldMapT & fim = dt->second; - IndexFieldMapT::const_iterator found = fim.find(FieldSearchSpecMap::stripNonFields(q->index())); - if (found != fim.end()) { - const FieldIdTList & index = found->second; - if ((find(index.begin(), index.end(), fid) != index.end()) && (find(onlyInIndex.begin(), onlyInIndex.end(), q) == onlyInIndex.end())) { - onlyInIndex.push_back(q); - } - } else { - LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", q->index().c_str()); - } - } - } - /// Should perhaps do a unique on onlyInIndex - (*it)->prepare(onlyInIndex, searcherBuf); - if (logger.wants(ns_log::Logger::spam)) { - char tmpBuf[16]; - sprintf(tmpBuf,"%d", fid); - tmp += tmpBuf; - tmp += ", "; - } - } - LOG(debug, "Will search in %s", tmp.c_str()); -} - -bool FieldSearcher::onSearch(const StorageDocument & doc) -{ - bool retval(true); - size_t fNo(field()); - const StorageDocument::SubDocument & sub = doc.getComplexField(fNo); - if (sub.getFieldValue() != nullptr) { - IteratorHandler ih(*this); - sub.getFieldValue()->iterateNested(sub.getRange(), ih); - } - return retval; -} - -void -FieldSearcher::IteratorHandler::onPrimitive(uint32_t, const Content & c) -{ - LOG(spam, "onPrimitive: field value '%s'", c.getValue().toString().c_str()); - _searcher.setCurrentWeight(c.getWeight()); - _searcher.setCurrentElementId(getArrayIndex()); - _searcher.onValue(c.getValue()); -} - -void -FieldSearcher::IteratorHandler::onCollectionStart(const Content & c) -{ - const document::FieldValue & fv = c.getValue(); - LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str()); - if (fv.isA(document::FieldValue::Type::ARRAY)) { - const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(fv); - LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size()); - } else if (fv.isA(document::FieldValue::Type::WSET)) { - const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv); - LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size()); - } -} - -void -FieldSearcher::IteratorHandler::onStructStart(const Content & c) -{ - LOG(spam, "onStructStart: field value '%s'", c.getValue().toString().c_str()); - _searcher.onStructValue(static_cast<const document::StructFieldValue &>(c.getValue())); -} - - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h deleted file mode 100644 index 5c2ef8fec28..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/document/fieldvalue/iteratorhandler.h> -#include <vespa/searchlib/query/streaming/query.h> -#include <vespa/vsm/common/document.h> -#include <vespa/vsm/common/storagedocument.h> - -namespace vsm { - -typedef size_t termcount_t; -typedef size_t termsize_t; - -#if defined(COLLECT_CHAR_STAT) - #define NEED_CHAR_STAT(a) { a; } -#else - #define NEED_CHAR_STAT(a) -#endif - -typedef ucs4_t cmptype_t; -typedef vespalib::Array<cmptype_t> SearcherBuf; -typedef std::shared_ptr<SearcherBuf> SharedSearcherBuf; -typedef std::vector<char> CharVector; - -class FieldSearcherBase -{ -protected: - search::streaming::QueryTermList _qtl; -private: - CharVector _qtlFastBuffer; -protected: - FieldSearcherBase(); - FieldSearcherBase(const FieldSearcherBase & org); - virtual ~FieldSearcherBase(void); - FieldSearcherBase & operator = (const FieldSearcherBase & org); - void prepare(const search::streaming::QueryTermList & qtl); - size_t _qtlFastSize; - search::v16qi *_qtlFast; -}; - -class FieldSearcher : public FieldSearcherBase -{ -public: - enum MatchType { - REGULAR, - PREFIX, - SUBSTRING, - SUFFIX, - EXACT - }; - - FieldSearcher(const FieldIdT & fId, bool defaultPrefix=false); - ~FieldSearcher() override; - virtual std::unique_ptr<FieldSearcher> duplicate() const = 0; - bool search(const StorageDocument & doc); - virtual void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf); - const FieldIdT & field() const { return _field; } - void field(const FieldIdT & v) { _field = v; prepareFieldId(); } - bool prefix() const { return _matchType == PREFIX; } - bool substring() const { return _matchType == SUBSTRING; } - bool suffix() const { return _matchType == SUFFIX; } - bool exact() const { return _matchType == EXACT; } - void setMatchType(MatchType mt) { _matchType = mt; } - static void init(); - static search::byte fold(search::byte c) { return _foldLowCase[c]; } - static search::byte iswordchar(search::byte c) { return _wordChar[c]; } - static search::byte isspace(search::byte c) { return ! iswordchar(c); } - static size_t countWords(const FieldRef & f); - unsigned pureUsAsciiCount() const { return _pureUsAsciiCount; } - unsigned pureUsAsciiFieldCount() const { return _pureUsAsciiFieldCount; } - unsigned anyUtf8Count() const { return _anyUtf8Count; } - unsigned anyUtf8FieldCount() const { return _anyUtf8FieldCount; } - unsigned badUtf8Count() const { return _badUtf8Count; } - unsigned zeroCount() const { return _zeroCount; } - unsigned utf8Count(size_t sz) const { return _utf8Count[1+sz]; } - const unsigned * utf8Count() const { return _utf8Count; } - int32_t getCurrentWeight() const { return _currentElementWeight; } - void addStat(const FieldSearcher & toAdd); - void zeroStat(); - FieldSearcher & maxFieldLength(uint32_t maxFieldLength_) { _maxFieldLength = maxFieldLength_; return *this; } - size_t maxFieldLength() const { return _maxFieldLength; } - -private: - class IteratorHandler : public document::fieldvalue::IteratorHandler { - private: - FieldSearcher & _searcher; - - void onPrimitive(uint32_t fid, const Content & c) override; - void onCollectionStart(const Content & c) override; - void onStructStart(const Content & c) override; - - public: - IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {} - }; - friend class IteratorHandler; // to allow calls to onValue(); - - void prepareFieldId(); - void setCurrentWeight(int32_t weight) { _currentElementWeight = weight; } - void setCurrentElementId(int32_t weight) { _currentElementId = weight; } - bool onSearch(const StorageDocument & doc); - virtual void onValue(const document::FieldValue & fv) = 0; - virtual void onStructValue(const document::StructFieldValue &) { } - FieldIdT _field; - MatchType _matchType; - unsigned _maxFieldLength; - uint32_t _currentElementId; - int32_t _currentElementWeight; // Contains the weight of the current item being evaluated. - /// Number of bytes in blocks containing pure us-ascii - unsigned _pureUsAsciiCount; - /// Number of blocks containing pure us-ascii - unsigned _pureUsAsciiFieldCount; - /// Number of bytes in blocks containing any non us-ascii - unsigned _anyUtf8Count; - /// Number of blocks containing any non us-ascii - unsigned _anyUtf8FieldCount; -protected: - /// Number of terms searched. - unsigned _words; - /// Number of utf8 bytes by utf8 size. - unsigned _utf8Count[6]; - unsigned _badUtf8Count; - unsigned _zeroCount; -protected: - void addPureUsAsciiField(size_t sz) { _pureUsAsciiCount += sz; _pureUsAsciiFieldCount++;; } - void addAnyUtf8Field(size_t sz) { _anyUtf8Count += sz; _anyUtf8FieldCount++; } - /** - * Adds a hit to the given query term. - * For each call to onValue() a batch of words are processed, and the position is local to this batch. - **/ - void addHit(search::streaming::QueryTerm & qt, uint32_t pos) const { - qt.add(_words + pos, field(), _currentElementId, getCurrentWeight()); - } -public: - static search::byte _foldLowCase[256]; - static search::byte _wordChar[256]; -}; - -typedef std::unique_ptr<FieldSearcher> FieldSearcherContainer; -typedef std::vector<FieldSearcherContainer> FieldIdTSearcherMapT; - -class FieldIdTSearcherMap : public FieldIdTSearcherMapT -{ -public: - void prepare(const DocumentTypeIndexFieldMapT & difm, const SharedSearcherBuf & searcherBuf, search::streaming::Query & query); -}; - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp deleted file mode 100644 index 02d8bd8c12a..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "floatfieldsearcher.h" - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -FloatFieldSearcher::duplicate() const -{ - return std::make_unique<FloatFieldSearcher>(*this); -} - -std::unique_ptr<FieldSearcher> -DoubleFieldSearcher::duplicate() const -{ - return std::make_unique<DoubleFieldSearcher>(*this); -} - -template<typename T> -FloatFieldSearcherT<T>::FloatFieldSearcherT(FieldIdT fId) : - FieldSearcher(fId), - _floatTerm() -{} - -template<typename T> -FloatFieldSearcherT<T>::~FloatFieldSearcherT() {} - -template<typename T> -void FloatFieldSearcherT<T>::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf) -{ - _floatTerm.clear(); - FieldSearcher::prepare(qtl, buf); - for (QueryTermList::const_iterator it=qtl.begin(); it < qtl.end(); it++) { - const QueryTerm * qt = *it; - size_t sz(qt->termLen()); - if (sz) { - double low; - double high; - bool valid = qt->getAsDoubleTerm(low, high); - _floatTerm.push_back(FloatInfo(low, high, valid)); - } - } -} - - -template<typename T> -void FloatFieldSearcherT<T>::onValue(const document::FieldValue & fv) -{ - for(size_t j=0, jm(_floatTerm.size()); j < jm; j++) { - const FloatInfo & ii = _floatTerm[j]; - if (ii.valid() && (ii.cmp(fv.getAsDouble()))) { - addHit(*_qtl[j], 0); - } - } - ++_words; -} - -template<typename T> -bool FloatFieldSearcherT<T>::FloatInfo::cmp(T key) const -{ - return (_lower <= key) && (key <= _upper); -} - -template class FloatFieldSearcherT<float>; -template class FloatFieldSearcherT<double>; - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.h deleted file mode 100644 index 98018fbf4a3..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/floatfieldsearcher.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "fieldsearcher.h" - -namespace vsm { - -template <typename T> -class FloatFieldSearcherT : public FieldSearcher -{ -public: - FloatFieldSearcherT(FieldIdT fId=0); - ~FloatFieldSearcherT(); - void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override; - void onValue(const document::FieldValue & fv) override; -protected: - class FloatInfo - { - public: - FloatInfo(T low, T high, bool v) : _lower(low), _upper(high), _valid(v) { if (low > high) { _lower = high; _upper = low; } } - bool cmp(T key) const; - bool valid() const { return _valid; } - void setValid(bool v) { _valid = v; } - T getLow() const { return _lower; } - T getHigh() const { return _upper; } - private: - T _lower; - T _upper; - bool _valid; - }; - typedef std::vector<FloatInfo> FloatInfoListT; - FloatInfoListT _floatTerm; -}; - -typedef FloatFieldSearcherT<float> FloatFieldSearcherTF; -typedef FloatFieldSearcherT<double> FloatFieldSearcherTD; - -class FloatFieldSearcher : public FloatFieldSearcherTF -{ -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - FloatFieldSearcher(FieldIdT fId=0) : FloatFieldSearcherTF(fId) { } -}; - -class DoubleFieldSearcher : public FloatFieldSearcherTD -{ -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - DoubleFieldSearcher(FieldIdT fId=0) : FloatFieldSearcherTD(fId) { } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/fold.cpp b/streamingvisitors/src/vespa/vsm/searcher/fold.cpp deleted file mode 100644 index bd2392d3ad6..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/fold.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// -#include "fold.h" - -namespace vsm { - -const unsigned char * sse2_foldaa(const unsigned char * toFoldOrg, size_t sz, unsigned char * foldedOrg) -{ - typedef char v16qi __attribute__ ((__vector_size__(16))); - typedef long long v2di __attribute__ ((__vector_size__(16))); - static v16qi _G_0 = { '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1 }; - static v16qi _G_9 = { '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9' }; - static v16qi _G_a = { 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1 }; - static v16qi _G_z = { 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z' }; - static v16qi _G_8bit = { (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, - (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4 }; - static v2di _G_lowCase = { 0x2020202020202020ULL, 0x2020202020202020ULL }; - const v16qi *toFold = reinterpret_cast<const v16qi *>(toFoldOrg); - v2di * folded = reinterpret_cast<v2di *>(foldedOrg); - size_t i=0; - for (size_t m=sz/16; i < m; i++) - { -#ifndef __INTEL_COMPILER - int nonAscii = __builtin_ia32_pmovmskb128(toFold[i]); - if (nonAscii) - { -#ifdef __clang__ - v16qi non8Mask = _G_8bit > toFold[i]; -#else - v16qi non8Mask = __builtin_ia32_pcmpgtb128(_G_8bit, toFold[i]); -#endif - int non8bit = __builtin_ia32_pmovmskb128(non8Mask); - if (non8bit) - { - break; - } - break; - } -#ifdef __clang__ - v16qi _0 = toFold[i] > _G_0; - v16qi _z = toFold[i] > _G_z; - v2di _0_z = v2di(_0) ^ v2di(_z); - v2di toLow = _0_z & v2di(toFold[i]); - v16qi low = v16qi(toLow | _G_lowCase); - _0 = low > _G_0; - v16qi _9 = low > _G_9; - v16qi _a = low > _G_a; - _z = low > _G_z; - v2di _0_9_m = v2di(_0) ^ v2di(_9); - v2di _a_z_m = v2di(_a) ^ v2di(_z); - v2di _0_9 = _0_9_m & v2di(low); - v2di _a_z = _a_z_m & v2di(low); - folded[i] = _0_9 | _a_z; -#else - v16qi _0 = __builtin_ia32_pcmpgtb128(toFold[i], _G_0); - v16qi _z = __builtin_ia32_pcmpgtb128(toFold[i], _G_z); - v2di _0_z = __builtin_ia32_pxor128(v2di(_0), v2di(_z)); - v2di toLow = __builtin_ia32_pand128(_0_z, v2di(toFold[i])); - v16qi low = v16qi(__builtin_ia32_por128(toLow, _G_lowCase)); - _0 = __builtin_ia32_pcmpgtb128(low, _G_0); - v16qi _9 = __builtin_ia32_pcmpgtb128(low, _G_9); - v16qi _a = __builtin_ia32_pcmpgtb128(low, _G_a); - _z = __builtin_ia32_pcmpgtb128(low, _G_z); - v2di _0_9_m = __builtin_ia32_pxor128(v2di(_0), v2di(_9)); - v2di _a_z_m = __builtin_ia32_pxor128(v2di(_a), v2di(_z)); - v2di _0_9 = __builtin_ia32_pand128(_0_9_m, v2di(low)); - v2di _a_z = __builtin_ia32_pand128(_a_z_m, v2di(low)); - folded[i] = __builtin_ia32_por128(_0_9, _a_z); -#endif -#else -# warning "Intel's icc compiler does not like __builtin_ia32_pxor128" - LOG_ABORT("should not be reached"); -#endif - } - return toFoldOrg+i*16; -} - -const unsigned char * sse2_foldua(const unsigned char * toFoldOrg, size_t sz, unsigned char * foldedOrg) -{ - typedef char v16qi __attribute__ ((__vector_size__(16))); - typedef long long v2di __attribute__ ((__vector_size__(16))); - static v16qi _G_0 = { '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1, '0'-1 }; - static v16qi _G_9 = { '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9' }; - static v16qi _G_a = { 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1, 'a'-1 }; - static v16qi _G_z = { 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z' }; - static v16qi _G_8bit = { (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, - (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4, (char)0xc4 }; - static v2di _G_lowCase = { 0x2020202020202020ULL, 0x2020202020202020ULL }; - v2di * folded = reinterpret_cast<v2di *>(foldedOrg); - size_t i=0; - for (size_t m=sz/16; i < m; i++) - { -#ifndef __INTEL_COMPILER -#ifdef __clang__ - v16qi current = __builtin_ia32_lddqu(reinterpret_cast<const char *>(&toFoldOrg[i*16])); -#else - v16qi current = __builtin_ia32_loaddqu(reinterpret_cast<const char *>(&toFoldOrg[i*16])); -#endif - int nonAscii = __builtin_ia32_pmovmskb128(current); - if (nonAscii) - { -#ifdef __clang__ - v16qi non8Mask = _G_8bit > current; -#else - v16qi non8Mask = __builtin_ia32_pcmpgtb128(_G_8bit, current); -#endif - int non8bit = __builtin_ia32_pmovmskb128(non8Mask); - if (non8bit) - { - break; - } - break; - } -#ifdef __clang__ - v16qi _0 = current > _G_0; - v16qi _z = current > _G_z; - v2di _0_z = v2di(_0) ^ v2di(_z); - v2di toLow = _0_z & v2di(current); - v16qi low = v16qi(toLow | _G_lowCase); - _0 = low > _G_0; - v16qi _9 = low > _G_9; - v16qi _a = low > _G_a; - _z = low > _G_z; - v2di _0_9_m = v2di(_0) ^ v2di(_9); - v2di _a_z_m = v2di(_a) ^ v2di(_z); - v2di _0_9 = _0_9_m & v2di(low); - v2di _a_z = _a_z_m & v2di(low); - folded[i] = _0_9 | _a_z; -#else - v16qi _0 = __builtin_ia32_pcmpgtb128(current, _G_0); - v16qi _z = __builtin_ia32_pcmpgtb128(current, _G_z); - v2di _0_z = __builtin_ia32_pxor128(v2di(_0), v2di(_z)); - v2di toLow = __builtin_ia32_pand128(_0_z, v2di(current)); - v16qi low = v16qi(__builtin_ia32_por128(toLow, _G_lowCase)); - _0 = __builtin_ia32_pcmpgtb128(low, _G_0); - v16qi _9 = __builtin_ia32_pcmpgtb128(low, _G_9); - v16qi _a = __builtin_ia32_pcmpgtb128(low, _G_a); - _z = __builtin_ia32_pcmpgtb128(low, _G_z); - v2di _0_9_m = __builtin_ia32_pxor128(v2di(_0), v2di(_9)); - v2di _a_z_m = __builtin_ia32_pxor128(v2di(_a), v2di(_z)); - v2di _0_9 = __builtin_ia32_pand128(_0_9_m, v2di(low)); - v2di _a_z = __builtin_ia32_pand128(_a_z_m, v2di(low)); - folded[i] = __builtin_ia32_por128(_0_9, _a_z); -#endif -#else -# warning "Intel's icc compiler does not like __builtin_ia32_pxor128" - LOG_ABORT("should not be reached"); -#endif - } - return toFoldOrg+i*16; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/fold.h b/streamingvisitors/src/vespa/vsm/searcher/fold.h deleted file mode 100644 index 578b883484f..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/fold.h +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/common/document.h> - -namespace vsm { - -const search::byte * sse2_foldaa(const search::byte * toFoldOrg, size_t sz, search::byte * foldedOrg); -const search::byte * sse2_foldua(const search::byte * toFoldOrg, size_t sz, search::byte * foldedOrg); - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp deleted file mode 100644 index fc5d77de419..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.cpp +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "futf8strchrfieldsearcher.h" -#ifdef __x86_64__ -#include "fold.h" -#endif -#include <vespa/vespalib/util/size_literals.h> - -using search::byte; -using search::streaming::QueryTerm; -using search::v16qi; -using vespalib::Optimized; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -FUTF8StrChrFieldSearcher::duplicate() const -{ - return std::make_unique<FUTF8StrChrFieldSearcher>(*this); -} - -FUTF8StrChrFieldSearcher::FUTF8StrChrFieldSearcher() - : UTF8StrChrFieldSearcher(), - _folded(4_Ki) -{ } -FUTF8StrChrFieldSearcher::FUTF8StrChrFieldSearcher(FieldIdT fId) - : UTF8StrChrFieldSearcher(fId), - _folded(4_Ki) -{ } -FUTF8StrChrFieldSearcher::~FUTF8StrChrFieldSearcher() {} - -bool -FUTF8StrChrFieldSearcher::ansiFold(const char * toFold, size_t sz, char * folded) -{ - bool retval(true); - for(size_t i=0; i < sz; i++) { - byte c = toFold[i]; - if (c>=128) { retval = false; break; } - folded[i] = FieldSearcher::_foldLowCase[c]; - } - return retval; -} - -bool -FUTF8StrChrFieldSearcher::lfoldaa(const char * toFold, size_t sz, char * folded, size_t & unalignedStart) -{ - unalignedStart = (size_t(toFold) & 0xF); -#ifdef __x86_64__ - bool retval(true); - size_t unalignedsz = std::min(sz, (16 - unalignedStart) & 0xF); - - size_t foldedUnaligned = (size_t(folded) & 0xF); - unalignedStart = (foldedUnaligned < unalignedStart) ? (unalignedStart-foldedUnaligned) : unalignedStart + 16 - foldedUnaligned; - size_t alignedStart = unalignedStart+unalignedsz; - - size_t alignedsz = sz - unalignedsz; - size_t alignsz16 = alignedsz & 0xFFFFFFF0; - size_t rest = alignedsz - alignsz16; - - if (unalignedStart) { - retval = ansiFold(toFold, unalignedsz, folded + unalignedStart); - } - if (alignsz16 && retval) { - const byte * end = sse2_foldaa(reinterpret_cast<const byte *>(toFold+unalignedsz), alignsz16, reinterpret_cast<byte *>(folded+alignedStart)); - retval = (end == reinterpret_cast<const byte *>(toFold+unalignedsz+alignsz16)); - } - if(rest && retval) { - retval = ansiFold(toFold + unalignedsz + alignsz16, rest, folded+alignedStart+alignsz16); - } - return retval; -#else - return ansiFold(toFold, sz, folded + unalignedStart); -#endif -} - -bool -FUTF8StrChrFieldSearcher::lfoldua(const char * toFold, size_t sz, char * folded, size_t & alignedStart) -{ - alignedStart = 0xF - (size_t(folded + 0xF) % 0x10); -#ifdef __x86_64__ - bool retval(true); - - size_t alignsz16 = sz & 0xFFFFFFF0; - size_t rest = sz - alignsz16; - - if (alignsz16) { - const byte * end = sse2_foldua(reinterpret_cast<const byte *>(toFold), alignsz16, reinterpret_cast<byte *>(folded+alignedStart)); - retval = (end == reinterpret_cast<const byte *>(toFold+alignsz16)); - } - if(rest && retval) { - retval = ansiFold(toFold + alignsz16, rest, folded+alignedStart+alignsz16); - } - return retval; -#else - return ansiFold(toFold, sz, folded + alignedStart); -#endif -} - -namespace { - -#ifdef __x86_64__ -inline const char * advance(const char * n, const v16qi zero) -{ - uint32_t charMap = 0; - unsigned zeroCountSum = 0; - do { // find first '\0' character (the end of the word) -#ifndef __INTEL_COMPILER -#ifdef __clang__ - v16qi tmpCurrent = __builtin_ia32_lddqu(n+zeroCountSum); - v16qi tmp0 = tmpCurrent == zero; -#else - v16qi tmpCurrent = __builtin_ia32_loaddqu(n+zeroCountSum); - v16qi tmp0 = __builtin_ia32_pcmpeqb128(tmpCurrent, reinterpret_cast<v16qi>(zero)); -#endif - charMap = __builtin_ia32_pmovmskb128(tmp0); // 1 in charMap equals to '\0' in input buffer -#else -# warning "Intel's icc compiler does not like __builtin_ia32_xxxxx" - LOG_ABORT("should not be reached"); -#endif - zeroCountSum += 16; - } while (!charMap); - int charCount = Optimized::lsbIdx(charMap); // number of word characters in last 16 bytes - uint32_t zeroMap = ((~charMap) & 0xffff) >> charCount; - - int zeroCounter = Optimized::lsbIdx(zeroMap); // number of non-characters ('\0') in last 16 bytes - int sum = zeroCountSum - 16 + charCount + zeroCounter; - if (!zeroMap) { // only '\0' in last 16 bytes (no new word found) - do { // find first word character (the next word) -#ifndef __INTEL_COMPILER -#ifdef __clang__ - v16qi tmpCurrent = __builtin_ia32_lddqu(n+zeroCountSum); - tmpCurrent = tmpCurrent > zero; -#else - v16qi tmpCurrent = __builtin_ia32_loaddqu(n+zeroCountSum); - tmpCurrent = __builtin_ia32_pcmpgtb128(tmpCurrent, reinterpret_cast<v16qi>(zero)); -#endif - zeroMap = __builtin_ia32_pmovmskb128(tmpCurrent); // 1 in zeroMap equals to word character in input buffer -#else -# warning "Intel's icc compiler does not like __builtin_ia32_xxxxx" - LOG_ABORT("should not be reached"); -#endif - zeroCountSum += 16; - } while(!zeroMap); - zeroCounter = Optimized::lsbIdx(zeroMap); - sum = zeroCountSum - 16 + zeroCounter; - } - return n + sum; -} -#else -inline const char* advance(const char* n) -{ - const char* p = n; - const char* zero = static_cast<const char *>(memchr(p, 0, 64_Ki)); - while (zero == nullptr) { - p += 64_Ki; - zero = static_cast<const char *>(memchr(p, 0, 64_Ki)); - } - p = zero; - while (*p == '\0') { - ++p; - } - return p; -} -#endif - -} - -size_t FUTF8StrChrFieldSearcher::match(const char *folded, size_t sz, QueryTerm & qt) -{ -#ifdef __x86_64__ - const v16qi _G_zero = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -#endif - termcount_t words(0); - const char * term; - termsize_t tsz = qt.term(term); - const char *et=term+tsz; - const char * n = folded; - const char *e = n + sz; - - while (!*n) n++; - while (true) { - if (n>=e) break; - -#if 0 - v16qi current = __builtin_ia32_loaddqu(n); - current = __builtin_ia32_pcmpeqb128(current, _qtlFast[0]); - unsigned eqMap = __builtin_ia32_pmovmskb128(current); - unsigned neqMap = ~eqMap; - unsigned numEq = Optimized::lsbIdx(neqMap); - /* if (eqMap)*/ { - if (numEq >= 16) { - const char *tt = term+16; - const char *p = n+16; - while ( (*tt == *p) && (tt < et)) { tt++; p++; numEq++; } - } - if ((numEq >= tsz) && (prefix() || qt.isPrefix() || !n[tsz])) { - addHit(qt, words); - } - } -#else - const char *tt = term; - while ((tt < et) && (*tt == *n)) { tt++; n++; } - if ((tt == et) && (prefix() || qt.isPrefix() || !*n)) { - addHit(qt, words); - } -#endif - words++; -#ifdef __x86_64__ - n = advance(n, _G_zero); -#else - n = advance(n); -#endif - } - return words; -} - -size_t FUTF8StrChrFieldSearcher::match(const char *folded, size_t sz, size_t mintsz, QueryTerm ** qtl, size_t qtlSize) -{ - (void) mintsz; -#ifdef __x86_64__ - const v16qi _G_zero = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -#endif - termcount_t words(0); - const char * n = folded; - const char *e = n + sz; - while (!*n) n++; - for( ; ; ) { - if (n>=e) break; -#if 0 - v16qi current = __builtin_ia32_loaddqu(n); - for(size_t i=0; i < qtlSize; i++) { - v16qi tmpEq = __builtin_ia32_pcmpeqb128(current, _qtlFast[i]); - unsigned eqMap = __builtin_ia32_pmovmskb128(tmpEq); - /* if (eqMap) */ { - QueryTerm & qt = *qtl[i]; - unsigned neqMap = ~eqMap; - unsigned numEq = Optimized::lsbIdx(neqMap); - termsize_t tsz = qt.termLen(); - if (numEq >= 16) { - const char *tt = qt.term() + 16; - const char *et=tt+tsz; - const char *p = n+16; - while ( (*tt == *p) && (tt < et)) { tt++; p++; numEq++; } - } - if ((numEq >= tsz) && (prefix() || qt.isPrefix() || !n[tsz])) { - addHit(qt, words); - } - } - } -#else - for(QueryTerm ** it=qtl, ** mt=qtl+qtlSize; it != mt; it++) { - QueryTerm & qt = **it; - const char * term; - termsize_t tsz = qt.term(term); - - const char *et=term+tsz; - const char *fnt; - for (fnt = n; (term < et) && (*term == *fnt); term++, fnt++); - if ((term == et) && (prefix() || qt.isPrefix() || !*fnt)) { - addHit(qt, words); - } - } -#endif - words++; -#ifdef __x86_64__ - n = advance(n, _G_zero); -#else - n = advance(n); -#endif - } - return words; -} - -size_t FUTF8StrChrFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) -{ - _folded.reserve(f.size()+16*3); //Enable fulle xmm0 store - size_t unalignedStart(0); - bool ascii7Bit = lfoldua(f.data(), f.size(), &_folded[0], unalignedStart); - if (ascii7Bit) { - char * folded = &_folded[unalignedStart]; - /// Add the pattern 00 01 00 to avoid multiple eof tests of falling off the edge. - folded[f.size()] = 0; - folded[f.size()+1] = 0x01; - memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values - return match(folded, f.size(), qt); - NEED_CHAR_STAT(addPureUsAsciiField(f.size())); - } else { - return UTF8StrChrFieldSearcher::matchTerm(f, qt); - } -} - -size_t FUTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) -{ - _folded.reserve(f.size()+16*3); //Enable fulle xmm0 store - size_t unalignedStart(0); - bool ascii7Bit = lfoldua(f.data(), f.size(), &_folded[0], unalignedStart); - if (ascii7Bit) { - char * folded = &_folded[unalignedStart]; - /// Add the pattern 00 01 00 to avoid multiple eof tests of falling off the edge. - folded[f.size()] = 0; - folded[f.size()+1] = 0x01; - memset(folded + f.size() + 2, 0, 16); // initialize padding data to avoid valgrind complaining about uninitialized values - return match(folded, f.size(), mintsz, &_qtl[0], _qtl.size()); - NEED_CHAR_STAT(addPureUsAsciiField(f.size())); - } else { - return UTF8StrChrFieldSearcher::matchTerms(f, mintsz); - } -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.h deleted file mode 100644 index 900ab4c9120..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/futf8strchrfieldsearcher.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "utf8strchrfieldsearcher.h" - -namespace vsm { - -class FUTF8StrChrFieldSearcher : public UTF8StrChrFieldSearcher -{ -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - FUTF8StrChrFieldSearcher(); - FUTF8StrChrFieldSearcher(FieldIdT fId); - ~FUTF8StrChrFieldSearcher(); - static bool ansiFold(const char * toFold, size_t sz, char * folded); - static bool lfoldaa(const char * toFold, size_t sz, char * folded, size_t & unalignedStart); - static bool lfoldua(const char * toFold, size_t sz, char * folded, size_t & alignedStart); - private: - size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - size_t matchTerms(const FieldRef&, const size_t shortestTerm) override; - virtual size_t match(const char *folded, size_t sz, search::streaming::QueryTerm & qt); - size_t match(const char *folded, size_t sz, size_t mintsz, search::streaming::QueryTerm ** qtl, size_t qtlSize); - std::vector<char> _folded; -}; - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp deleted file mode 100644 index db93bda7778..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "geo_pos_field_searcher.h" -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/structfieldvalue.h> -#include <vespa/searchlib/common/geo_location_parser.h> -#include <vespa/vespalib/util/issue.h> -#include <vespa/vespalib/util/exception.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.searcher.geo_pos_field_searcher"); - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; -using search::common::GeoLocation; -using search::common::GeoLocationParser; - -namespace vsm { - -std::unique_ptr<FieldSearcher> GeoPosFieldSearcher::duplicate() const { - return std::make_unique<GeoPosFieldSearcher>(*this); -} - -GeoPosFieldSearcher::GeoPosFieldSearcher(FieldIdT fId) : - FieldSearcher(fId), - _geoPosTerm() -{} - -GeoPosFieldSearcher::~GeoPosFieldSearcher() {} - -void GeoPosFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf) { - _geoPosTerm.clear(); - FieldSearcher::prepare(qtl, buf); - for (const QueryTerm * qt : qtl) { - const vespalib::string & str = qt->getTermString(); - GeoLocationParser parser; - bool valid = parser.parseNoField(str); - if (! valid) { - vespalib::Issue::report("invalid position in term: %s", str.c_str()); - } - _geoPosTerm.emplace_back(parser.getGeoLocation()); - } -} - -void GeoPosFieldSearcher::onValue(const document::FieldValue & fv) { - LOG(spam, "ignore field value '%s'", fv.toString().c_str()); -} - -void GeoPosFieldSearcher::onStructValue(const document::StructFieldValue & fv) { - size_t num_terms = _geoPosTerm.size(); - for (size_t j = 0; j < num_terms; ++j) { - const GeoPosInfo & gpi = _geoPosTerm[j]; - if (gpi.valid() && gpi.cmp(fv)) { - addHit(*_qtl[j], 0); - } - } - ++_words; -} - -bool GeoPosFieldSearcher::GeoPosInfo::cmp(const document::StructFieldValue & sfv) const { - try { - auto xv = sfv.getValue("x"); - auto yv = sfv.getValue("y"); - if (xv && yv) { - int32_t x = xv->getAsInt(); - int32_t y = yv->getAsInt(); - GeoLocation::Point p{x,y}; - if (inside_limit(p)) { - return true; - } - } - } catch (const vespalib::Exception &e) { - vespalib::Issue::report("bad fieldvalue for GeoPosFieldSearcher: %s", e.getMessage().c_str()); - } - return false; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.h b/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.h deleted file mode 100644 index ef1c5b5a1c4..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/geo_pos_field_searcher.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "fieldsearcher.h" -#include <vespa/searchlib/common/geo_location.h> - -namespace vsm { - -class GeoPosFieldSearcher : public FieldSearcher { -public: - GeoPosFieldSearcher(FieldIdT fId=0); - ~GeoPosFieldSearcher(); - void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override; - void onValue(const document::FieldValue & fv) override; - void onStructValue(const document::StructFieldValue & fv) override; - std::unique_ptr<FieldSearcher> duplicate() const override; -protected: - using GeoLocation = search::common::GeoLocation; - class GeoPosInfo : public GeoLocation { - public: - GeoPosInfo (GeoLocation loc) noexcept : GeoLocation(std::move(loc)) {} - bool cmp(const document::StructFieldValue & fv) const; - }; - typedef std::vector<GeoPosInfo> GeoPosInfoListT; - GeoPosInfoListT _geoPosTerm; -}; - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp deleted file mode 100644 index 8cfb8e6df14..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "intfieldsearcher.h" - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -IntFieldSearcher::duplicate() const -{ - return std::make_unique<IntFieldSearcher>(*this); -} - -IntFieldSearcher::IntFieldSearcher(FieldIdT fId) : - FieldSearcher(fId), - _intTerm() -{ } - -IntFieldSearcher::~IntFieldSearcher() = default; - -void IntFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf) -{ - _intTerm.clear(); - FieldSearcher::prepare(qtl, buf); - for (QueryTermList::const_iterator it=qtl.begin(); it < qtl.end(); it++) { - const QueryTerm * qt = *it; - size_t sz(qt->termLen()); - if (sz) { - int64_t low; - int64_t high; - bool valid = qt->getAsIntegerTerm(low, high); - _intTerm.push_back(IntInfo(low, high, valid)); - } - } -} - -void IntFieldSearcher::onValue(const document::FieldValue & fv) -{ - for(size_t j=0, jm(_intTerm.size()); j < jm; j++) { - const IntInfo & ii = _intTerm[j]; - if (ii.valid() && (ii.cmp(fv.getAsLong()))) { - addHit(*_qtl[j], 0); - } - } - ++_words; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.h deleted file mode 100644 index a2b17a87f4b..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/intfieldsearcher.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "fieldsearcher.h" - -namespace vsm { - -class IntFieldSearcher : public FieldSearcher -{ -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - IntFieldSearcher(FieldIdT fId=0); - ~IntFieldSearcher(); - void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override; - void onValue(const document::FieldValue & fv) override; -protected: - class IntInfo - { - public: - IntInfo(int64_t low, int64_t high, bool v) : _lower(low), _upper(high), _valid(v) { if (low > high) { _lower = high; _upper = low; } } - bool cmp(int64_t key) const { return (_lower <= key) && (key <= _upper); } - bool valid() const { return _valid; } - private: - int64_t _lower; - int64_t _upper; - bool _valid; - }; - typedef std::vector<IntInfo> IntInfoListT; - IntInfoListT _intTerm; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp deleted file mode 100644 index 1c4ff78ff4a..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "strchrfieldsearcher.h" -#include <vespa/document/fieldvalue/stringfieldvalue.h> - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -void StrChrFieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf) -{ - FieldSearcher::prepare(qtl, buf); -} - -void StrChrFieldSearcher::onValue(const document::FieldValue & fv) -{ - const document::LiteralFieldValueB & sfv = static_cast<const document::LiteralFieldValueB &>(fv); - vespalib::stringref val = sfv.getValueRef(); - FieldRef fr(val.data(), std::min(maxFieldLength(), val.size())); - matchDoc(fr); -} - -bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef) -{ - bool retval(true); - if (_qtl.size() > 1) { - size_t mintsz = shortestTerm(); - if (fieldRef.size() >= mintsz) { - _words += matchTerms(fieldRef, mintsz); - } else { - _words += countWords(fieldRef); - } - } else { - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - if (fieldRef.size() >= qt.termLen()) { - _words += matchTerm(fieldRef, qt); - } else { - _words += countWords(fieldRef); - } - } - } - return retval; -} - -size_t StrChrFieldSearcher::shortestTerm() const -{ - size_t mintsz(_qtl.front()->termLen()); - for(QueryTermList::const_iterator it=_qtl.begin()+1, mt=_qtl.end(); it != mt; it++) { - const QueryTerm & qt = **it; - mintsz = std::min(mintsz, qt.termLen()); - } - return mintsz; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.h deleted file mode 100644 index 0155c79cddf..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "fieldsearcher.h" - -namespace vsm { - -class StrChrFieldSearcher : public FieldSearcher -{ -public: - StrChrFieldSearcher() : FieldSearcher(0) { } - StrChrFieldSearcher(FieldIdT fId) : FieldSearcher(fId) { } - void onValue(const document::FieldValue & fv) override; - void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override; -private: - size_t shortestTerm() const; - bool matchDoc(const FieldRef & field); - virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) = 0; - virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) = 0; -}; - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp deleted file mode 100644 index 977602a691c..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "utf8exactstringfieldsearcher.h" - -using search::byte; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -UTF8ExactStringFieldSearcher::duplicate() const -{ - return std::make_unique<UTF8ExactStringFieldSearcher>(*this); -} - -size_t -UTF8ExactStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) -{ - (void) mintsz; - for (QueryTermList::iterator it = _qtl.begin(), mt = _qtl.end(); it != mt; ++it) { - QueryTerm & qt = **it; - matchTermExact(f, qt); - } - return 1; -} - -size_t -UTF8ExactStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) -{ - return matchTermExact(f, qt); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h deleted file mode 100644 index 744974a6cf6..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h> - -namespace vsm -{ - -/** - * This class does suffix utf8 searches. - **/ -class UTF8ExactStringFieldSearcher : public UTF8StringFieldSearcherBase -{ -protected: - virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override; - -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - UTF8ExactStringFieldSearcher() : UTF8StringFieldSearcherBase() { } - UTF8ExactStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp deleted file mode 100644 index 9aef99f9fa1..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "utf8flexiblestringfieldsearcher.h" - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.searcher.utf8flexiblestringfieldsearcher"); - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -UTF8FlexibleStringFieldSearcher::duplicate() const -{ - return std::make_unique<UTF8FlexibleStringFieldSearcher>(*this); -} - -size_t -UTF8FlexibleStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) -{ - (void) mintsz; - size_t words = 0; - for (QueryTermList::iterator it = _qtl.begin(); it != _qtl.end(); ++it) { - words = matchTerm(f, **it); - } - return words; -} - -size_t -UTF8FlexibleStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) -{ - if (qt.isPrefix()) { - LOG(debug, "Use prefix match for prefix term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermRegular(f, qt); - } else if (qt.isSubstring()) { - LOG(debug, "Use substring match for substring term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermSubstring(f, qt); - } else if (qt.isSuffix()) { - LOG(debug, "Use suffix match for suffix term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermSuffix(f, qt); - } else if (qt.isExactstring()) { - LOG(debug, "Use exact match for exact term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermExact(f, qt); - } else { - if (substring()) { - LOG(debug, "Use substring match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermSubstring(f, qt); - } else if (suffix()) { - LOG(debug, "Use suffix match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermSuffix(f, qt); - } else if (exact()) { - LOG(debug, "Use exact match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermExact(f, qt); - } else { - LOG(debug, "Use regular/prefix match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); - return matchTermRegular(f, qt); - } - } -} - -UTF8FlexibleStringFieldSearcher::UTF8FlexibleStringFieldSearcher() : - UTF8StringFieldSearcherBase() -{ } - -UTF8FlexibleStringFieldSearcher::UTF8FlexibleStringFieldSearcher(FieldIdT fId) : - UTF8StringFieldSearcherBase(fId) -{ } - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h deleted file mode 100644 index 63931af0036..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h> - -namespace vsm -{ - -/** - * This class does utf8 searches based on the query term type. - * It will choose between regular search strategy (including prefix) and substring search strategy. - **/ -class UTF8FlexibleStringFieldSearcher : public UTF8StringFieldSearcherBase -{ -private: - /** - * Tries to match the given query term against the content of the given field reference. - * Search strategy is choosen based on the query term type. - **/ - virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - - /** - * Tries to match each query term in the underlying query against the content of the given field reference. - * Search strategy is choosen based on the query term type. - **/ - virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override; - -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - UTF8FlexibleStringFieldSearcher(); - UTF8FlexibleStringFieldSearcher(FieldIdT fId); -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp deleted file mode 100644 index 0d93009655c..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "utf8strchrfieldsearcher.h" - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; -using search::byte; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -UTF8StrChrFieldSearcher::duplicate() const -{ - return std::make_unique<UTF8StrChrFieldSearcher>(*this); -} - -size_t -UTF8StrChrFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) -{ - (void) mintsz; - termcount_t words(0); - const byte * n = reinterpret_cast<const byte *> (f.data()); - const byte * e = n + f.size(); - if (f.size() >= _buf->size()) { - _buf->reserve(f.size() + 1); - } - cmptype_t * fn = &(*_buf.get())[0]; - size_t fl(0); - - for( ; n < e; ) { - if (!*n) { _zeroCount++; n++; } - n = tokenize(n, _buf->capacity(), fn, fl); - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - const cmptype_t * term; - termsize_t tsz = qt.term(term); - if ((tsz <= fl) && (prefix() || qt.isPrefix() || (tsz == fl))) { - const cmptype_t *tt=term, *et=term+tsz; - for (const cmptype_t *fnt=fn; (tt < et) && (*tt == *fnt); tt++, fnt++); - if (tt == et) { - addHit(qt, words); - } - } - } - words++; - } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); - return words; -} - -size_t -UTF8StrChrFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) -{ - return matchTermRegular(f, qt); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.h deleted file mode 100644 index 1687a1a18c0..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8strchrfieldsearcher.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "utf8stringfieldsearcherbase.h" - -namespace vsm { - -/** - * This class does normal utf8 searches. - * This class uses an highly optimized version of the tokenize method in fastlib. - **/ -class UTF8StrChrFieldSearcher : public UTF8StringFieldSearcherBase -{ -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - UTF8StrChrFieldSearcher() : UTF8StringFieldSearcherBase() { } - UTF8StrChrFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { } - -protected: - size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp deleted file mode 100644 index 148cdf2c0c3..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "utf8stringfieldsearcherbase.h" -#include <cassert> - -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; -using search::byte; - -namespace vsm { - -const byte * -UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * dstbuf, size_t & tokenlen) -{ - if (maxSz > 0) { - maxSz--; - } - ucs4_t c(*p); - ucs4_t *q(dstbuf); - const byte * end(p+maxSz); - - // Skip non-word characters between words - for (; p < end; ) { - if (c < 128) { - if (!c) { break; } - p++; - if (__builtin_expect(_isWord[c], false)) { - *q++ = _foldCase[c]; - c = 0; - } else { - c = *p; - } - } else { - const byte * oldP(p); - c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); - if (Fast_UnicodeUtil::IsWordChar(c)) { - _utf8Count[p-oldP-1]++; - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) { - q = Fast_UnicodeUtil::ucs4copy(q,repl); - } - } else { - c = ToFold(c); - *q++ = c; - } - break; - } else { - if (c == _BadUTF8Char) { - _badUtf8Count++; - } else { - _utf8Count[p-oldP-1]++; - } - c = *p; - } - } - } - - c = *p; // Next char - for (; p < end;) { - if (c < 128) { // Common case, ASCII - if (!c) { break; } - p++; - if (__builtin_expect(!_isWord[c], false)) { - c = 0; - } else { - *q++ = _foldCase[c]; - c = *p; - } - } else { - const byte * oldP(p); - c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); - if (__builtin_expect(Fast_UnicodeUtil::IsWordChar(c), false)) { - _utf8Count[p-oldP-1]++; - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) { - q = Fast_UnicodeUtil::ucs4copy(q,repl); - } - } else { - c = ToFold(c); - *q++ = c; - } - - c = *p; - } else { - if (c == _BadUTF8Char) { - _badUtf8Count++; - } else { - _utf8Count[p-oldP-1]++; - } - break; - } - } - } - *q = 0; - tokenlen = q - dstbuf; - return p; -} - -size_t -UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt) -{ - termcount_t words(0); - const byte * n = reinterpret_cast<const byte *> (f.data()); - // __builtin_prefetch(n, 0, 0); - const cmptype_t * term; - termsize_t tsz = qt.term(term); - const byte * e = n + f.size(); - if ( f.size() >= _buf->size()) { - _buf->reserve(f.size() + 1); - } - cmptype_t * fn = &(*_buf.get())[0]; - size_t fl(0); - - for( ; n < e; ) { - if (!*n) { _zeroCount++; n++; } - n = tokenize(n, _buf->capacity(), fn, fl); - if ((tsz <= fl) && (prefix() || qt.isPrefix() || (tsz == fl))) { - const cmptype_t *tt=term, *et=term+tsz; - for (const cmptype_t *fnt=fn; (tt < et) && (*tt == *fnt); tt++, fnt++); - if (tt == et) { - addHit(qt, words); - } - } - words++; - } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); - return words; -} - -size_t -UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt) -{ - const byte * n = reinterpret_cast<const byte *> (f.data()); - const cmptype_t * term; - termsize_t tsz = qt.term(term); - const cmptype_t * eterm = term+tsz; - const byte * e = n + f.size(); - if (tsz <= f.size()) { - bool equal(true); - for (; equal && (n < e) && (term < eterm); term++) { - if (*term < 0x80) { - equal = (*term == _foldCase[*n++]); - } else { - cmptype_t c = ToFold(Fast_UnicodeUtil::GetUTF8CharNonAscii(n)); - equal = (*term == c); - } - } - if (equal && (term == eterm) && (qt.isPrefix() || (n == e))) { - addHit(qt,0); - } - } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); - return 1; -} - -size_t -UTF8StringFieldSearcherBase::matchTermSubstring(const FieldRef & f, QueryTerm & qt) -{ - if (qt.termLen() == 0) { return 0; } - const byte * n = reinterpret_cast<const byte *> (f.data()); - const cmptype_t * term; - termsize_t tsz = qt.term(term); - if ( f.size() >= _buf->size()) { - _buf->reserve(f.size() + 1); - } - cmptype_t * fntemp = &(*_buf.get())[0]; - BufferWrapper wrapper(fntemp); - size_t fl = skipSeparators(n, f.size(), wrapper); - const cmptype_t * fn(fntemp); - const cmptype_t * fe = fn + fl; - const cmptype_t * fre = fe - tsz; - termcount_t words(0); - for(words = 0; fn <= fre; ) { - const cmptype_t *tt=term, *et=term+tsz, *fnt=fn; - for (; (tt < et) && (*tt == *fnt); tt++, fnt++); - if (tt == et) { - fn = fnt; - addHit(qt, words); - } else { - if ( ! Fast_UnicodeUtil::IsWordChar(*fn++) ) { - words++; - for(; (fn < fre) && ! Fast_UnicodeUtil::IsWordChar(*fn) ; fn++ ); - } - } - } - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); - return words + 1; // we must also count the last word -} - -size_t -UTF8StringFieldSearcherBase::matchTermSuffix(const FieldRef & f, QueryTerm & qt) -{ - termcount_t words = 0; - const byte * srcbuf = reinterpret_cast<const byte *> (f.data()); - const byte * srcend = srcbuf + f.size(); - const cmptype_t * term; - termsize_t tsz = qt.term(term); - if (f.size() >= _buf->size()) { - _buf->reserve(f.size() + 1); - } - cmptype_t * dstbuf = &(*_buf.get())[0]; - size_t tokenlen = 0; - - for( ; srcbuf < srcend; ) { - if (*srcbuf == 0) { - ++_zeroCount; - ++srcbuf; - } - srcbuf = tokenize(srcbuf, _buf->capacity(), dstbuf, tokenlen); - if (matchTermSuffix(term, tsz, dstbuf, tokenlen)) { - addHit(qt, words); - } - words++; - } - return words; -} - -UTF8StringFieldSearcherBase::UTF8StringFieldSearcherBase() : - StrChrFieldSearcher(), - Fast_NormalizeWordFolder(), - Fast_UnicodeUtil() -{ -} - -UTF8StringFieldSearcherBase::UTF8StringFieldSearcherBase(FieldIdT fId) : - StrChrFieldSearcher(fId), - Fast_NormalizeWordFolder(), - Fast_UnicodeUtil() -{ -} - -UTF8StringFieldSearcherBase::~UTF8StringFieldSearcherBase() {} - -void -UTF8StringFieldSearcherBase::prepare(QueryTermList & qtl, const SharedSearcherBuf & buf) -{ - StrChrFieldSearcher::prepare(qtl, buf); - _buf = buf; -} - -bool -UTF8StringFieldSearcherBase::matchTermSuffix(const cmptype_t * term, size_t termlen, - const cmptype_t * word, size_t wordlen) -{ - if ((termlen <= wordlen)) { - const cmptype_t * titr = term + termlen - 1; - const cmptype_t * witr = word + wordlen - 1; - bool hit = true; - // traverse the term and the word back to front - for (; titr >= term; --titr, --witr) { - if (*titr != *witr) { - hit = false; - break; - } - } - return hit; - } - return false; -} - -bool -UTF8StringFieldSearcherBase::isSeparatorCharacter(ucs4_t c) -{ - return ((c < 0x20) && (c != '\n') && (c != '\t')); -} - -template <typename T> -size_t -UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T & dstbuf) { - const search::byte * e(p+sz); - const search::byte * b(p); - - for(; p < e; ) { - ucs4_t c(*p); - const search::byte * oldP(p); - if (c < 128) { - p++; - if (!isSeparatorCharacter(c)) { - dstbuf.onCharacter(_foldCase[c], (oldP - b)); - } - } else { - c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); - const char *repl = ReplacementString(c); - if (repl != NULL) { - size_t repllen = strlen(repl); - if (repllen > 0) { - ucs4_t * buf = dstbuf.getBuf(); - ucs4_t * newBuf = Fast_UnicodeUtil::ucs4copy(buf, repl); - if (dstbuf.hasOffsets()) { - for (; buf < newBuf; ++buf) { - dstbuf.incBuf(1); - dstbuf.onOffset(oldP - b); - } - } else { - dstbuf.incBuf(newBuf - buf); - } - } - } else { - c = ToFold(c); - dstbuf.onCharacter(c, (oldP - b)); - } - if (c == _BadUTF8Char) { - _badUtf8Count++; - } else { - _utf8Count[p-oldP-1]++; - } - } - } - assert(dstbuf.valid()); - return dstbuf.size(); -} - -template unsigned long UTF8StringFieldSearcherBase::skipSeparators<UTF8StringFieldSearcherBase::BufferWrapper>(unsigned char const*, unsigned long, UTF8StringFieldSearcherBase::BufferWrapper&); -template unsigned long UTF8StringFieldSearcherBase::skipSeparators<UTF8StringFieldSearcherBase::OffsetWrapper>(unsigned char const*, unsigned long, UTF8StringFieldSearcherBase::OffsetWrapper&); - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h deleted file mode 100644 index f540a7ac457..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "strchrfieldsearcher.h" -#include <vespa/fastlib/text/normwordfolder.h> - -namespace vsm { - -/** - * This class is the base class for all utf8 string searchers. - * It contains utility functions used by the other searchers. - * As normal the prepare method is called - * after the query is built. A SharedSearcherBuf is used given to it. This is a - * buffer that is shared among all searchers that are run in the same context. - * Reuse of this buffer ensures better cache hit ratio because this is just a - * scratchpad for tokenizing. It will grow till the max size and stay there. - **/ -class UTF8StringFieldSearcherBase : public StrChrFieldSearcher, protected Fast_NormalizeWordFolder, public Fast_UnicodeUtil -{ -public: - /** - * Template class that wraps an ucs4 buffer. - * Used when invoking skipSeparators() during substring matching. - **/ - class BufferWrapper - { - protected: - ucs4_t * _bbuf; - ucs4_t * _cbuf; - - public: - BufferWrapper(ucs4_t * buf) : _bbuf(buf), _cbuf(buf) { } - BufferWrapper(ucs4_t * buf, size_t *) : _bbuf(buf), _cbuf(buf) { } - void onCharacter(ucs4_t ch, size_t) { *_cbuf++ = ch; } - void onOffset(size_t) { } - void incBuf(size_t inc) { _cbuf += inc; } - ucs4_t * getBuf() { return _cbuf; } - bool valid() { return true; } - size_t size() { return (_cbuf - _bbuf); } - bool hasOffsets() { return false; } - }; - - /** - * Template class that wraps an offset buffer in addition to an ucs4 buffer. - * The offset buffer contains offsets into the original utf8 buffer. - **/ - class OffsetWrapper : public BufferWrapper - { - private: - size_t * _boff; - size_t * _coff; - - public: - OffsetWrapper(ucs4_t * buf, size_t * offsets) : BufferWrapper(buf), _boff(offsets), _coff(offsets) {} - void onCharacter(ucs4_t ch, size_t of) { *_cbuf++ = ch; *_coff++ = of; } - void onOffset(size_t of) { *_coff++ = of; } - bool valid() { return (size() == (size_t)(_coff - _boff)); } - bool hasOffsets() { return true; } - }; - -protected: - SharedSearcherBuf _buf; - - const search::byte * tokenize(const search::byte * buf, size_t maxSz, cmptype_t * dstbuf, size_t & tokenlen); - - /** - * Matches the given query term against the words in the given field reference - * using exact or prefix match strategy. - * - * @param f the field reference to match against. - * @param qt the query term trying to match. - * @return the number of words in the field ref. - **/ - size_t matchTermRegular(const FieldRef & f, search::streaming::QueryTerm & qt); - - /** - * Matches the given query term against the characters in the given field reference - * using substring match strategy. - * - * @param f the field reference to match against. - * @param qt the query term trying to match. - * @return the number of words in the field ref. - **/ - size_t matchTermSubstring(const FieldRef & f, search::streaming::QueryTerm & qt); - - /** - * Matches the given query term against the words in the given field reference - * using suffix match strategy. - * - * @param f the field reference to match against. - * @param qt the query term trying to match. - * @return the number of words in the field ref. - **/ - size_t matchTermSuffix(const FieldRef & f, search::streaming::QueryTerm & qt); - - /** - * Matches the given query term against the words in the given field reference - * using exact match strategy. - * - * @param f the field reference to match against. - * @param qt the query term trying to match. - * @return the number of words in the field ref. - **/ - size_t matchTermExact(const FieldRef & f, search::streaming::QueryTerm & qt); - -public: - UTF8StringFieldSearcherBase(); - UTF8StringFieldSearcherBase(FieldIdT fId); - ~UTF8StringFieldSearcherBase(); - void prepare(search::streaming::QueryTermList & qtl, const SharedSearcherBuf & buf) override; - /** - * Matches the given query term against the given word using suffix match strategy. - * - * @param term the buffer with the term. - * @param termLen the length of the term. - * @param word the buffer with the word. - * @param wordlen the length of the word. - * @return true if the term matches the word. - **/ - static bool matchTermSuffix(const cmptype_t * term, size_t termlen, - const cmptype_t * word, size_t wordlen); - - /** - * Checks whether the given character is a separator character. - **/ - static bool isSeparatorCharacter(ucs4_t); - - /** - * Transforms the given utf8 array into an array of ucs4 characters. - * Folding is performed. Separator characters are skipped. - **/ - template <typename T> - size_t skipSeparators(const search::byte * p, size_t sz, T & dstbuf); - -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp deleted file mode 100644 index fd327d3a3df..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vsm/searcher/utf8substringsearcher.h> - -using search::byte; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -UTF8SubStringFieldSearcher::duplicate() const -{ - return std::make_unique<UTF8SubStringFieldSearcher>(*this); -} - -size_t -UTF8SubStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) -{ - const byte * n = reinterpret_cast<const byte *> (f.data()); - if ( f.size() >= _buf->size()) { - _buf->reserve(f.size() + 1); - } - cmptype_t * fntemp = &(*_buf.get())[0]; - BufferWrapper wrapper(fntemp); - size_t fl = skipSeparators(n, f.size(), wrapper); - const cmptype_t * fn(fntemp); - const cmptype_t * fe = fn + fl; - const cmptype_t * fre = fe - mintsz; - termcount_t words(0); - for(words = 0; fn <= fre; ) { - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - const cmptype_t * term; - termsize_t tsz = qt.term(term); - - const cmptype_t *tt=term, *et=term+tsz, *fnt=fn; - for (; (tt < et) && (*tt == *fnt); tt++, fnt++); - if (tt == et) { - addHit(qt, words); - } - } - if ( ! Fast_UnicodeUtil::IsWordChar(*fn++) ) { - words++; - for(; (fn < fre) && ! Fast_UnicodeUtil::IsWordChar(*fn); fn++ ); - } - } - - NEED_CHAR_STAT(addAnyUtf8Field(f.size())); - return words + 1; // we must also count the last word -} - -size_t -UTF8SubStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) -{ - return matchTermSubstring(f, qt); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.h deleted file mode 100644 index 1c463c28847..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h> - -namespace vsm { - -/** - * This class does substring utf8 searches. - **/ -class UTF8SubStringFieldSearcher : public UTF8StringFieldSearcherBase -{ -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - UTF8SubStringFieldSearcher() : UTF8StringFieldSearcherBase() { } - UTF8SubStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { } -protected: - size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp deleted file mode 100644 index be02a58cfda..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "utf8substringsnippetmodifier.h" -#include <cassert> - -using search::byte; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -UTF8SubstringSnippetModifier::duplicate() const -{ - return std::make_unique<UTF8SubstringSnippetModifier>(*this); -} - -size_t -UTF8SubstringSnippetModifier::matchTerms(const FieldRef & f, const size_t mintsz) -{ - _modified->reset(); - _readPtr = f.data(); - const byte * src = reinterpret_cast<const byte *> (f.data()); - // resize ucs4 buffer - if (f.size() >= _buf->size()) { - _buf->resize(f.size() + 1); - } - // resize offset buffers - if (f.size() >= _offsets->size()) { - _offsets->resize(f.size() + 1); - } - // resize modified buffer - if (f.size() + 16 > _modified->getLength()) { - _modified->resize(f.size() + 16); // make room for some unit separators - } - cmptype_t * dbegin = &(*_buf.get())[0]; - OffsetWrapper wrapper(dbegin, &(*_offsets)[0]); - size_t numchars = skipSeparators(src, f.size(), wrapper); - const cmptype_t * ditr = dbegin; - const cmptype_t * dend = ditr + numchars; - const cmptype_t * drend = dend - mintsz; - termcount_t words = 0; - for(; ditr <= drend; ) { - for (QueryTermList::iterator itr = _qtl.begin(); itr != _qtl.end(); ++itr) { - QueryTerm & qt = **itr; - const cmptype_t * term; - termsize_t tsz = qt.term(term); - - const cmptype_t * titr = term; - const cmptype_t * tend = term + tsz; - const cmptype_t * dtmp = ditr; - for (; (titr < tend) && (*titr == *dtmp); ++titr, ++dtmp); - if (titr == tend) { - const char * mbegin = f.data() + (*_offsets)[ditr - dbegin]; - const char * mend = f.data() + ((dtmp < dend) ? ((*_offsets)[dtmp - dbegin]) : f.size()); - if (_readPtr <= mbegin) { - // We will only copy from the field ref once. - // If we have overlapping matches only the first one will be considered. - insertSeparators(mbegin, mend); - } - addHit(qt, words); - } - } - if ( ! Fast_UnicodeUtil::IsWordChar(*ditr++) ) { - words++; - for(; (ditr < drend) && ! Fast_UnicodeUtil::IsWordChar(*ditr) ; ++ditr ); - } - } - assert(_readPtr <= (f.data() + f.size())); - // copy remaining - size_t toCopy = f.size() - (_readPtr - f.data()); - copyToModified(toCopy); - - return words + 1; // we must also count the last word -} - -size_t -UTF8SubstringSnippetModifier::matchTerm(const FieldRef & f, QueryTerm & qt) -{ - const cmptype_t * term; - termsize_t tsz = qt.term(term); - return matchTerms(f, tsz); -} - -void -UTF8SubstringSnippetModifier::copyToModified(size_t n, bool skipSep) -{ - if (n == 0) { - return; - } - if (skipSep) { - for (const char * readEnd = _readPtr + n; _readPtr < readEnd; ++_readPtr) { - if (!isSeparatorCharacter(*_readPtr)) { - _modified->put(*_readPtr); - } - } - } else { - _modified->put(_readPtr, n); - _readPtr += n; - } -} - -void -UTF8SubstringSnippetModifier::insertSeparators(const char * mbegin, const char * mend) -{ - copyToModified(mbegin - _readPtr); - _modified->put(_unitSep); - // skip separators such that the match is not splitted. - copyToModified((mend - mbegin), true); - _modified->put(_unitSep); -} - -UTF8SubstringSnippetModifier::UTF8SubstringSnippetModifier() : - UTF8StringFieldSearcherBase(), - _modified(new CharBuffer(32)), - _offsets(new std::vector<size_t>(32)), - _readPtr(NULL), - _unitSep('\x1F') -{ -} - -UTF8SubstringSnippetModifier::UTF8SubstringSnippetModifier(FieldIdT fId) : - UTF8StringFieldSearcherBase(fId), - _modified(new CharBuffer(32)), - _offsets(new std::vector<size_t>(32)), - _readPtr(NULL), - _unitSep('\x1F') -{ -} - -UTF8SubstringSnippetModifier::UTF8SubstringSnippetModifier(FieldIdT fId, - const CharBuffer::SP & modBuf, - const SharedOffsetBuffer & offBuf) : - UTF8StringFieldSearcherBase(fId), - _modified(modBuf), - _offsets(offBuf), - _readPtr(NULL), - _unitSep('\x1F') -{ -} - -UTF8SubstringSnippetModifier::~UTF8SubstringSnippetModifier() {} - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.h b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.h deleted file mode 100644 index 0127a7f2827..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "utf8stringfieldsearcherbase.h" -#include <vespa/vsm/common/charbuffer.h> - -namespace vsm { - -typedef std::shared_ptr<std::vector<size_t> > SharedOffsetBuffer; - -/** - * This class does substring searches the same way as UTF8SubStringFieldSearcher. - * While matching the query term(s) against the field reference it builds a modified - * buffer based on the field reference where the only difference is that unit separators - * are inserted before and after a match. These extra unit separators make it possible - * to highlight a substring match when later generating snippets. - **/ -class UTF8SubstringSnippetModifier : public UTF8StringFieldSearcherBase -{ -private: - CharBuffer::SP _modified; // buffer to write the modified field value - SharedOffsetBuffer _offsets; // for each character in _buf we have an offset into the utf8 buffer (field reference) - const char * _readPtr; // buffer to read from (field reference) - char _unitSep; // the unit separator character to use - - virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override; - - /** - * Copies n bytes from the field reference to the modified buffer and updates the read pointer. - * Separator characters from the field reference can be skipped. - * This is to avoid that a match is splitted by separator characters from the original field reference. - * - * @param n the number of bytes to copy. - * @param skipSep whether we should skip separator characters from the field reference. - **/ - void copyToModified(size_t n, bool skipSep = false); - - /** - * Copies from the field reference to the modified buffer and inserts unit separators for a match - * starting at mbegin (in the field reference) and ending at mend (in the field reference). - * A unit separator is inserted before and after the match. - * - * @param mbegin the beginning of the match. - * @param mend the end of the match. - **/ - void insertSeparators(const char * mbegin, const char * mend); - -public: - typedef std::shared_ptr<UTF8SubstringSnippetModifier> SP; - - std::unique_ptr<FieldSearcher> duplicate() const override; - - UTF8SubstringSnippetModifier(); - UTF8SubstringSnippetModifier(FieldIdT fId); - ~UTF8SubstringSnippetModifier(); - - /** - * Creates a new instance. - * - * @param fId the field id to operate on. - * @param modBuf the shared buffer used to store the modified field value. - * @param offBuf the shared buffer used to store the offsets into the field reference. - **/ - UTF8SubstringSnippetModifier(FieldIdT fId, const CharBuffer::SP & modBuf, const SharedOffsetBuffer & offBuf); - - const CharBuffer & getModifiedBuf() const { return *_modified; } - const search::streaming::QueryTermList & getQueryTerms() const { return _qtl; } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.cpp deleted file mode 100644 index 3495d46b85b..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "utf8suffixstringfieldsearcher.h" - -using search::byte; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; - -namespace vsm { - -std::unique_ptr<FieldSearcher> -UTF8SuffixStringFieldSearcher::duplicate() const -{ - return std::make_unique<UTF8SuffixStringFieldSearcher>(*this); -} - -size_t -UTF8SuffixStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz) -{ - (void) mintsz; - termcount_t words = 0; - const byte * srcbuf = reinterpret_cast<const byte *> (f.data()); - const byte * srcend = srcbuf + f.size(); - if (f.size() >= _buf->size()) { - _buf->reserve(f.size() + 1); - } - cmptype_t * dstbuf = &(*_buf.get())[0]; - size_t tokenlen = 0; - - for( ; srcbuf < srcend; ) { - if (*srcbuf == 0) { - ++_zeroCount; - ++srcbuf; - } - srcbuf = tokenize(srcbuf, _buf->capacity(), dstbuf, tokenlen); - for (QueryTermList::iterator it = _qtl.begin(), mt = _qtl.end(); it != mt; ++it) { - QueryTerm & qt = **it; - const cmptype_t * term; - termsize_t tsz = qt.term(term); - if (matchTermSuffix(term, tsz, dstbuf, tokenlen)) { - addHit(qt, words); - } - } - words++; - } - return words; -} - -size_t -UTF8SuffixStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) -{ - return matchTermSuffix(f, qt); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h deleted file mode 100644 index 0640ac22da5..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8suffixstringfieldsearcher.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/searcher/utf8stringfieldsearcherbase.h> - -namespace vsm -{ - -/** - * This class does suffix utf8 searches. - **/ -class UTF8SuffixStringFieldSearcher : public UTF8StringFieldSearcherBase -{ -protected: - virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override; - -public: - std::unique_ptr<FieldSearcher> duplicate() const override; - UTF8SuffixStringFieldSearcher() : UTF8StringFieldSearcherBase() { } - UTF8SuffixStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/vsm/.gitignore b/streamingvisitors/src/vespa/vsm/vsm/.gitignore deleted file mode 100644 index 95bc02923a9..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.exe -*.ilk -*.pdb -.depend* -Makefile diff --git a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt b/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt deleted file mode 100644 index adc00b341a3..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(vsm_vsmbase OBJECT - SOURCES - docsumfieldspec.cpp - docsumfilter.cpp - fieldsearchspec.cpp - flattendocsumwriter.cpp - slimefieldwriter.cpp - snippetmodifier.cpp - vsm-adapter.cpp - docsumconfig.cpp - DEPENDS - vsm_vconfig -) diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.cpp deleted file mode 100644 index 656e9eed132..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vsm/vsm/docsumconfig.h> -#include <vespa/searchsummary/docsummary/docsumfieldwriter.h> -#include <vespa/searchsummary/docsummary/matched_elements_filter_dfw.h> -#include <vespa/searchlib/common/matching_elements_fields.h> -#include <vespa/vsm/config/config-vsmfields.h> -#include <vespa/vsm/config/config-vsmsummary.h> - -using search::MatchingElementsFields; -using search::docsummary::IDocsumFieldWriter; -using search::docsummary::EmptyDFW; -using search::docsummary::MatchedElementsFilterDFW; -using search::docsummary::ResultConfig; -using vespa::config::search::vsm::VsmfieldsConfig; -using vespa::config::search::vsm::VsmsummaryConfig; - -namespace vsm { - -namespace { - -void populate_fields(MatchingElementsFields& fields, VsmfieldsConfig& fields_config, const vespalib::string& field_name) -{ - vespalib::string prefix = field_name + "."; - for (const auto& spec : fields_config.fieldspec) { - if (spec.name.substr(0, prefix.size()) == prefix) { - fields.add_mapping(field_name, spec.name); - } - if (spec.name == field_name) { - fields.add_field(field_name); - } - } -} - -} - -DynamicDocsumConfig::DynamicDocsumConfig(search::docsummary::IDocsumEnvironment* env, search::docsummary::DynamicDocsumWriter* writer, std::shared_ptr<VsmfieldsConfig> vsm_fields_config) - : Parent(env, writer), - _vsm_fields_config(std::move(vsm_fields_config)) -{ -} - -IDocsumFieldWriter::UP -DynamicDocsumConfig::createFieldWriter(const string & fieldName, const string & overrideName, const string & argument, bool & rc, std::shared_ptr<search::MatchingElementsFields> matching_elems_fields) -{ - IDocsumFieldWriter::UP fieldWriter; - if ((overrideName == "staticrank") || - (overrideName == "ranklog") || - (overrideName == "label") || - (overrideName == "project") || - (overrideName == "positions") || - (overrideName == "absdist") || - (overrideName == "subproject")) - { - fieldWriter = std::make_unique<EmptyDFW>(); - rc = true; - } else if ((overrideName == "attribute") || - (overrideName == "attributecombiner") || - (overrideName == "geopos")) { - rc = true; - } else if ((overrideName == "matchedattributeelementsfilter") || - (overrideName == "matchedelementsfilter")) { - string source_field = argument.empty() ? fieldName : argument; - const ResultConfig& resultConfig = getResultConfig(); - int source_field_enum = resultConfig.GetFieldNameEnum().Lookup(source_field.c_str()); - populate_fields(*matching_elems_fields, *_vsm_fields_config, source_field); - fieldWriter = MatchedElementsFilterDFW::create(source_field, source_field_enum, matching_elems_fields); - rc = static_cast<bool>(fieldWriter); - } else { - fieldWriter = search::docsummary::DynamicDocsumConfig::createFieldWriter(fieldName, overrideName, argument, rc, matching_elems_fields); - } - return fieldWriter; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.h b/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.h deleted file mode 100644 index 11010c04e90..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumconfig.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/searchsummary/docsummary/docsumconfig.h> - -namespace vespa::config::search::vsm { -namespace internal { class InternalVsmfieldsType; } -typedef const internal::InternalVsmfieldsType VsmfieldsConfig; -} -namespace vsm { - -class DynamicDocsumConfig : public search::docsummary::DynamicDocsumConfig -{ -public: - using Parent = search::docsummary::DynamicDocsumConfig; - using VsmfieldsConfig = vespa::config::search::vsm::VsmfieldsConfig; -private: - std::shared_ptr<VsmfieldsConfig> _vsm_fields_config; -public: - DynamicDocsumConfig(search::docsummary::IDocsumEnvironment* env, search::docsummary::DynamicDocsumWriter* writer, std::shared_ptr<VsmfieldsConfig> vsm_fields_config); -private: - std::unique_ptr<search::docsummary::IDocsumFieldWriter> - createFieldWriter(const string & fieldName, const string & overrideName, - const string & cf, bool & rc, std::shared_ptr<search::MatchingElementsFields> matching_elems_fields) override; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp deleted file mode 100644 index 936aaaa2091..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "docsumfieldspec.h" - -namespace vsm { - -DocsumFieldSpec::FieldIdentifier::FieldIdentifier() : - _id(StringFieldIdTMap::npos), - _path() -{ } - -DocsumFieldSpec::FieldIdentifier::FieldIdentifier(FieldIdT id, FieldPath path) : - _id(id), - _path(std::move(path)) -{ } - -DocsumFieldSpec::FieldIdentifier::FieldIdentifier(FieldIdentifier &&) noexcept = default; -DocsumFieldSpec::FieldIdentifier & DocsumFieldSpec::FieldIdentifier::operator=(FieldIdentifier &&) noexcept = default; -DocsumFieldSpec::FieldIdentifier::~FieldIdentifier() = default; - -DocsumFieldSpec::DocsumFieldSpec() : - _resultType(search::docsummary::RES_INT), - _command(VsmsummaryConfig::Fieldmap::Command::NONE), - _outputField(), - _inputFields() -{ } - -DocsumFieldSpec::DocsumFieldSpec(search::docsummary::ResType resultType, - VsmsummaryConfig::Fieldmap::Command command) : - _resultType(resultType), - _command(command), - _outputField(), - _inputFields() -{ } - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h b/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h deleted file mode 100644 index db6ee9fa223..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumfieldspec.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/searchsummary/docsummary/resultclass.h> -#include <vespa/vsm/common/document.h> -#include <vespa/vsm/common/storagedocument.h> -#include <vespa/vsm/config/vsm-cfif.h> - -namespace vsm { - -/** - * This class contains the specifications for how to generate a summary field. - **/ -class DocsumFieldSpec { -public: - /** - * This class contains a field id and a field path (to navigate a field value). - **/ - class FieldIdentifier { - private: - FieldIdT _id; - FieldPath _path; - - public: - FieldIdentifier(); - FieldIdentifier(FieldIdT id, FieldPath path); - FieldIdentifier(FieldIdentifier &&) noexcept; - FieldIdentifier & operator=(FieldIdentifier &&) noexcept; - FieldIdentifier(const FieldIdentifier &) = delete; - FieldIdentifier & operator=(const FieldIdentifier &) = delete; - ~FieldIdentifier(); - FieldIdT getId() const { return _id; } - const FieldPath & getPath() const { return _path; } - }; - - typedef std::vector<FieldIdentifier> FieldIdentifierVector; - -private: - search::docsummary::ResType _resultType; - VsmsummaryConfig::Fieldmap::Command _command; - FieldIdentifier _outputField; - FieldIdentifierVector _inputFields; - -public: - DocsumFieldSpec(); - DocsumFieldSpec(search::docsummary::ResType resultType, VsmsummaryConfig::Fieldmap::Command command); - - /** - * Returns the result type for the summary field. - **/ - search::docsummary::ResType getResultType() const { return _resultType; } - - /** - * Returns the command specifying how to transform input fields into output summary field. - **/ - VsmsummaryConfig::Fieldmap::Command getCommand() const { return _command; } - - /** - * Returns whether the input field and output field are identical. - **/ - bool hasIdentityMapping() const { - return _inputFields.size() == 1 && _outputField.getId() == _inputFields[0].getId(); - } - - const FieldIdentifier & getOutputField() const { return _outputField; } - void setOutputField(FieldIdentifier outputField) { _outputField = std::move(outputField); } - const FieldIdentifierVector & getInputFields() const { return _inputFields; } - FieldIdentifierVector & getInputFields() { return _inputFields; } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp b/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp deleted file mode 100644 index 70759feb41c..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.cpp +++ /dev/null @@ -1,477 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "docsumfilter.h" -#include "slimefieldwriter.h" -#include <vespa/searchsummary/docsummary/summaryfieldconverter.h> -#include <vespa/document/base/exceptions.h> -#include <vespa/document/fieldvalue/iteratorhandler.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.docsumfilter"); - -using namespace search::docsummary; - - -namespace { - -class Handler : public document::fieldvalue::IteratorHandler { -public: -}; - -struct IntResultHandler : public Handler { - int32_t value; - IntResultHandler() : value(0) {} - void onPrimitive(uint32_t, const Content & c) override { - value = c.getValue().getAsInt(); - } -}; - -struct LongResultHandler : public Handler { - int64_t value; - LongResultHandler() : value(0) {} - void onPrimitive(uint32_t, const Content & c) override { - value = c.getValue().getAsLong(); - } -}; - -struct FloatResultHandler : public Handler { - float value; - FloatResultHandler() : value(0) {} - void onPrimitive(uint32_t, const Content & c) override { - value = c.getValue().getAsFloat(); - } -}; - -struct DoubleResultHandler : public Handler { - double value; - DoubleResultHandler() : value(0) {} - void onPrimitive(uint32_t, const Content & c) override { - value = c.getValue().getAsDouble(); - } -}; - -class StringResultHandler : public Handler { -private: - ResType _type; - ResultPacker & _packer; - void addToPacker(const char * buf, size_t len) { - switch (_type) { - case RES_STRING: - _packer.AddString(buf, len); - break; - case RES_LONG_STRING: - _packer.AddLongString(buf, len); - break; - default: - break; - } - } - -public: - StringResultHandler(ResType t, ResultPacker & p) : _type(t), _packer(p) {} - void onPrimitive(uint32_t, const Content & c) override { - const document::FieldValue & fv = c.getValue(); - if (fv.isLiteral()) { - const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv); - vespalib::stringref s = lfv.getValueRef(); - addToPacker(s.data(), s.size()); - } else { - vespalib::string s = fv.toString(); - addToPacker(s.c_str(), s.size()); - } - } -}; - -class RawResultHandler : public Handler { -private: - ResType _type; - ResultPacker & _packer; - -public: - RawResultHandler(ResType t, ResultPacker & p) : _type(t), _packer(p) {} - void onPrimitive(uint32_t, const Content & c) override { - const document::FieldValue & fv = c.getValue(); - try { - std::pair<const char *, size_t> buf = fv.getAsRaw(); - if (buf.first != nullptr) { - switch (_type) { - case RES_DATA: - _packer.AddData(buf.first, buf.second); - break; - case RES_LONG_DATA: - _packer.AddLongData(buf.first, buf.second); - break; - default: - break; - } - } - } catch (document::InvalidDataTypeConversionException & e) { - LOG(warning, "RawResultHandler: Could not get field value '%s' as raw. Skipping writing this field", fv.toString().c_str()); - _packer.AddEmpty(); - } - } -}; - - -} - - -namespace vsm { - -FieldPath -copyPathButFirst(const FieldPath & rhs) { - // skip the element that correspond to the start field value - FieldPath path; - if ( ! rhs.empty()) { - for (auto it = rhs.begin() + 1; it != rhs.end(); ++it) { - path.push_back(std::make_unique<document::FieldPathEntry>(**it)); - } - } - return path; -} - -void -DocsumFilter::prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldSpec & toolsSpec, - const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap) -{ - { // setup output field - const vespalib::string & name = toolsSpec.getOutputName(); - LOG(debug, "prepareFieldSpec: output field name '%s'", name.c_str()); - FieldIdT field = fieldMap.fieldNo(name); - if (field != FieldMap::npos) { - if (field < fieldPathMap.size()) { - spec.setOutputField(DocsumFieldSpec::FieldIdentifier(field, copyPathButFirst(fieldPathMap[field]))); - } else { - LOG(warning, "Could not find a field path for field '%s' with id '%d'", name.c_str(), field); - spec.setOutputField(DocsumFieldSpec::FieldIdentifier(field, FieldPath())); - } - } else { - LOG(warning, "Could not find output summary field '%s'", name.c_str()); - } - } - // setup input fields - for (size_t i = 0; i < toolsSpec.getInputNames().size(); ++i) { - const vespalib::string & name = toolsSpec.getInputNames()[i]; - LOG(debug, "prepareFieldSpec: input field name '%s'", name.c_str()); - FieldIdT field = fieldMap.fieldNo(name); - if (field != FieldMap::npos) { - if (field < fieldPathMap.size()) { - LOG(debug, "field %u < map size %zu", field, fieldPathMap.size()); - spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier(field, copyPathButFirst(fieldPathMap[field]))); - } else { - LOG(warning, "Could not find a field path for field '%s' with id '%d'", name.c_str(), field); - spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier(field, FieldPath())); - } - if (_highestFieldNo <= field) { - _highestFieldNo = field + 1; - } - } else { - LOG(warning, "Could not find input summary field '%s'", name.c_str()); - } - } -} - -const document::FieldValue * -DocsumFilter::getFieldValue(const DocsumFieldSpec::FieldIdentifier & fieldId, - VsmsummaryConfig::Fieldmap::Command command, - const Document & docsum, bool & modified) -{ - FieldIdT fId = fieldId.getId(); - const document::FieldValue * fv = docsum.getField(fId); - if (fv == nullptr) { - return nullptr; - } - switch (command) { - case VsmsummaryConfig::Fieldmap::Command::FLATTENJUNIPER: - if (_snippetModifiers != nullptr) { - FieldModifier * mod = _snippetModifiers->getModifier(fId); - if (mod != nullptr) { - _cachedValue = mod->modify(*fv, fieldId.getPath()); - modified = true; - return _cachedValue.get(); - } - } - [[fallthrough]]; - default: - return fv; - } -} - - -DocsumFilter::DocsumFilter(const DocsumToolsPtr &tools, const IDocSumCache & docsumCache) : - _docsumCache(&docsumCache), - _tools(tools), - _fields(), - _highestFieldNo(0), - _packer(tools ? tools->getResultConfig() : nullptr), - _flattenWriter(), - _snippetModifiers(nullptr), - _cachedValue(), - _emptyFieldPath() -{ } - -DocsumFilter::~DocsumFilter() =default; - -void DocsumFilter::init(const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap) -{ - if (_tools.get()) { - const ResultClass *resClass = _tools->getResultClass(); - const std::vector<DocsumTools::FieldSpec> & inputSpecs = _tools->getFieldSpecs(); - if (resClass != nullptr) { - uint32_t entryCnt = resClass->GetNumEntries(); - assert(entryCnt == inputSpecs.size()); - for (uint32_t i = 0; i < entryCnt; ++i) { - const ResConfigEntry &entry = *resClass->GetEntry(i); - const DocsumTools::FieldSpec & toolsSpec = inputSpecs[i]; - _fields.push_back(DocsumFieldSpec(entry._type, toolsSpec.getCommand())); - LOG(debug, "About to prepare field spec for summary field '%s'", entry._bindname.c_str()); - prepareFieldSpec(_fields.back(), toolsSpec, fieldMap, fieldPathMap); - } - assert(entryCnt == _fields.size()); - } - } -} - -uint32_t -DocsumFilter::getNumDocs() const -{ - return std::numeric_limits<uint32_t>::max(); -} - -void -DocsumFilter::writeField(const document::FieldValue & fv, const FieldPath & path, ResType type, ResultPacker & packer) -{ - switch (type) { - case RES_INT: { - IntResultHandler rh; - fv.iterateNested(path, rh); - uint32_t val = rh.value; - packer.AddInteger(val); - break; } - case RES_SHORT: { - IntResultHandler rh; - fv.iterateNested(path, rh); - uint16_t val = rh.value; - packer.AddShort(val); - break; } - case RES_BYTE: { - IntResultHandler rh; - fv.iterateNested(path, rh); - uint8_t val = rh.value; - packer.AddByte(val); - break; } - case RES_BOOL: { - IntResultHandler rh; - fv.iterateNested(path, rh); - uint8_t val = rh.value; - packer.AddByte(val); - break; } - case RES_FLOAT: { - FloatResultHandler rh; - fv.iterateNested(path, rh); - float val = rh.value; - packer.AddFloat(val); - break; } - case RES_DOUBLE: { - DoubleResultHandler rh; - fv.iterateNested(path, rh); - double val = rh.value; - packer.AddDouble(val); - break; } - case RES_INT64: { - LongResultHandler rh; - fv.iterateNested(path, rh); - uint64_t val = rh.value; - packer.AddInt64(val); - break; } - case RES_STRING: - case RES_LONG_STRING: - { - StringResultHandler rh(type, packer); - // the string result handler adds the result to the packer - fv.iterateNested(path, rh); - } - break; - case RES_DATA: - case RES_LONG_DATA: - { - RawResultHandler rh(type, packer); - // the raw result handler adds the result to the packer - fv.iterateNested(path, rh); - } - break; - default: - LOG(warning, "Unknown docsum field type: %s", ResultConfig::GetResTypeName(type)); - packer.AddEmpty(); // unhandled output type - break; - } -} - - -void -DocsumFilter::writeSlimeField(const DocsumFieldSpec & fieldSpec, - const Document & docsum, - ResultPacker & packer) -{ - if (fieldSpec.getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) { - const DocsumFieldSpec::FieldIdentifier & fieldId = fieldSpec.getOutputField(); - const document::FieldValue * fv = docsum.getField(fieldId.getId()); - if (fv != nullptr) { - LOG(debug, "writeSlimeField: About to write field '%d' as Slime: field value = '%s'", - fieldId.getId(), fv->toString().c_str()); - SlimeFieldWriter writer; - if (! fieldSpec.hasIdentityMapping()) { - writer.setInputFields(fieldSpec.getInputFields()); - } - writer.convert(*fv); - const vespalib::stringref out = writer.out(); - packer.AddLongString(out.data(), out.size()); - } else { - LOG(debug, "writeSlimeField: Field value not set for field '%d'", fieldId.getId()); - packer.AddEmpty(); - } - } else { - LOG(debug, "writeSlimeField: Cannot handle this command"); - packer.AddEmpty(); - } -} - -void -DocsumFilter::writeFlattenField(const DocsumFieldSpec & fieldSpec, - const Document & docsum, - ResultPacker & packer) -{ - if (fieldSpec.getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) { - LOG(debug, "writeFlattenField: Cannot handle command NONE"); - packer.AddEmpty(); - return; - } - - if (fieldSpec.getResultType() != RES_LONG_STRING && - fieldSpec.getResultType() != RES_STRING) - { - LOG(debug, "writeFlattenField: Can only handle result types STRING and LONG_STRING"); - packer.AddEmpty(); - return; - } - - switch (fieldSpec.getCommand()) { - case VsmsummaryConfig::Fieldmap::Command::FLATTENJUNIPER: - _flattenWriter.setSeparator("\x1E"); // record separator (same as juniper uses) - break; - default: - break; - } - const DocsumFieldSpec::FieldIdentifierVector & inputFields = fieldSpec.getInputFields(); - for (size_t i = 0; i < inputFields.size(); ++i) { - const DocsumFieldSpec::FieldIdentifier & fieldId = inputFields[i]; - bool modified = false; - const document::FieldValue * fv = getFieldValue(fieldId, fieldSpec.getCommand(), docsum, modified); - if (fv != nullptr) { - LOG(debug, "writeFlattenField: About to flatten field '%d' with field value (%s) '%s'", - fieldId.getId(), modified ? "modified" : "original", fv->toString().c_str()); - if (modified) { - fv->iterateNested(_emptyFieldPath, _flattenWriter); - } else { - fv->iterateNested(fieldId.getPath(), _flattenWriter); - } - } else { - LOG(debug, "writeFlattenField: Field value not set for field '%d'", fieldId.getId()); - } - } - - const CharBuffer & buf = _flattenWriter.getResult(); - switch (fieldSpec.getResultType()) { - case RES_STRING: - packer.AddString(buf.getBuffer(), buf.getPos()); - break; - case RES_LONG_STRING: - packer.AddLongString(buf.getBuffer(), buf.getPos()); - break; - default: - break; - } - _flattenWriter.clear(); -} - - -void -DocsumFilter::writeEmpty(ResType type, ResultPacker & packer) -{ - // use the 'notdefined' values when writing numeric values - switch (type) { - case RES_INT: - packer.AddInteger(std::numeric_limits<int32_t>::min()); - break; - case RES_SHORT: - packer.AddShort(std::numeric_limits<int16_t>::min()); - break; - case RES_BYTE: - packer.AddByte(0); // byte fields are unsigned so we have no 'notdefined' value. - break; - case RES_FLOAT: - packer.AddFloat(std::numeric_limits<float>::quiet_NaN()); - break; - case RES_DOUBLE: - packer.AddDouble(std::numeric_limits<double>::quiet_NaN()); - break; - case RES_INT64: - packer.AddInt64(std::numeric_limits<int64_t>::min()); - break; - default: - packer.AddEmpty(); - break; - } -} - -uint32_t -DocsumFilter::getSummaryClassId() const -{ - return _tools->getResultClass() ? _tools->getResultClass()->GetClassID() : ResultConfig::NoClassID(); -} - -DocsumStoreValue -DocsumFilter::getMappedDocsum(uint32_t id) -{ - const ResultClass *resClass = _tools->getResultClass(); - if (resClass == nullptr) { - return DocsumStoreValue(nullptr, 0); - } - - const Document & doc = _docsumCache->getDocSum(id); - - _packer.Init(resClass->GetClassID()); - for (FieldSpecList::iterator it(_fields.begin()), end = _fields.end(); it != end; ++it) { - ResType type = it->getResultType(); - if (type == RES_JSONSTRING) { - // this really means 'structured data' - writeSlimeField(*it, doc, _packer); - } else { - if (it->getInputFields().size() == 1 && it->getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) { - const DocsumFieldSpec::FieldIdentifier & fieldId = it->getInputFields()[0]; - const document::FieldValue * field = doc.getField(fieldId.getId()); - if (field != nullptr) { - writeField(*field, fieldId.getPath(), type, _packer); - } else { - writeEmpty(type, _packer); // void input - } - } else if (it->getInputFields().size() == 0 && it->getCommand() == VsmsummaryConfig::Fieldmap::Command::NONE) { - LOG(spam, "0 inputfields for output field %u", it->getOutputField().getId()); - writeEmpty(type, _packer); // no input - } else { - writeFlattenField(*it, doc, _packer); - } - } - } - - const char *buf; - uint32_t buflen; - bool ok = _packer.GetDocsumBlob(&buf, &buflen); - if (ok) { - return DocsumStoreValue(buf, buflen); - } else { - return DocsumStoreValue(nullptr, 0); - } -} - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.h b/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.h deleted file mode 100644 index e6f7ae3e6fe..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/docsumfilter.h +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vsm/common/docsum.h> -#include <vespa/vsm/common/fieldmodifier.h> -#include <vespa/vsm/vsm/docsumfieldspec.h> -#include <vespa/vsm/vsm/fieldsearchspec.h> -#include <vespa/vsm/vsm/flattendocsumwriter.h> -#include <vespa/vsm/vsm/vsm-adapter.h> -#include <vespa/searchsummary/docsummary/resultpacker.h> -#include <vespa/searchsummary/docsummary/docsumstore.h> - -using search::docsummary::IDocsumStore; -using search::docsummary::DocsumStoreValue; -using search::docsummary::ResType; -using search::docsummary::ResultPacker; - -namespace vsm { - -/** - * This class implements the IDocsumStore interface such that docsum blobs - * can be fetched based on local document id. The docsum blobs are generated - * on the fly when requested. - **/ -class DocsumFilter : public IDocsumStore -{ -private: - typedef std::vector<DocsumFieldSpec> FieldSpecList; // list of summary field specs - typedef std::vector<vespalib::string> StringList; - typedef StringFieldIdTMap FieldMap; - - const IDocSumCache * _docsumCache; - DocsumToolsPtr _tools; - FieldSpecList _fields; // list of summary fields to generate - size_t _highestFieldNo; - ResultPacker _packer; - FlattenDocsumWriter _flattenWriter; - const FieldModifierMap * _snippetModifiers; - document::FieldValue::UP _cachedValue; - document::FieldPath _emptyFieldPath; - - DocsumFilter(const DocsumFilter &); - DocsumFilter &operator=(const DocsumFilter &); - void prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldSpec & toolsSpec, - const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap); - const document::FieldValue * getFieldValue(const DocsumFieldSpec::FieldIdentifier & fieldId, - VsmsummaryConfig::Fieldmap::Command command, - const Document & docsum, bool & modified); - void writeField(const document::FieldValue & fv, const FieldPath & path, ResType type, ResultPacker & packer); - void writeSlimeField(const DocsumFieldSpec & fieldSpec, const Document & docsum, ResultPacker & packer); - void writeFlattenField(const DocsumFieldSpec & fieldSpec, const Document & docsum, ResultPacker & packer); - void writeEmpty(ResType type, ResultPacker & packer); - -public: - DocsumFilter(const DocsumToolsPtr & tools, const IDocSumCache & docsumCache); - ~DocsumFilter() override; - const DocsumToolsPtr & getTools() const { return _tools; } - - /** - * Initializes this docsum filter using the given field map and field path map. - * The field map is used to map from field name to field id. - * The field path map is used to retrieve the field path for each input field. - * - * @param fieldMap maps from field name -> field id - * @param fieldPathMap maps from field id -> field path - **/ - void init(const FieldMap & fieldMap, const FieldPathMapT & fieldPathMap); - - /** - * Sets the snippet modifiers to use when writing string fields used as input to snippet generation. - **/ - void setSnippetModifiers(const FieldModifierMap & modifiers) { _snippetModifiers = &modifiers; } - - /** - * Returns the highest field id + 1 among all fields in the field spec list. - **/ - size_t getHighestFieldNo() const { return _highestFieldNo; } - - - void setDocSumStore(const IDocSumCache & docsumCache) { _docsumCache = &docsumCache; } - - // Inherit doc from IDocsumStore - DocsumStoreValue getMappedDocsum(uint32_t id) override; - uint32_t getNumDocs() const override; - uint32_t getSummaryClassId() const override; -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp deleted file mode 100644 index 7043e63ec87..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ /dev/null @@ -1,334 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "fieldsearchspec.h" -#include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h> -#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h> -#include <vespa/vsm/searcher/utf8substringsearcher.h> -#include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h> -#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h> -#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> -#include <vespa/vsm/searcher/intfieldsearcher.h> -#include <vespa/vsm/searcher/boolfieldsearcher.h> -#include <vespa/vsm/searcher/floatfieldsearcher.h> -#include <vespa/vsm/searcher/geo_pos_field_searcher.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <regex> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.fieldsearchspec"); - -#define DEBUGMASK 0x01 - -using search::streaming::ConstQueryTermList; -using search::streaming::Query; -using search::streaming::QueryTerm; - -namespace vsm { - -namespace { - -void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) { - if (arg1 == "prefix") { - searcher->setMatchType(FieldSearcher::PREFIX); - } else if (arg1 == "substring") { - searcher->setMatchType(FieldSearcher::SUBSTRING); - } else if (arg1 == "suffix") { - searcher->setMatchType(FieldSearcher::SUFFIX); - } else if (arg1 == "exact") { - searcher->setMatchType(FieldSearcher::EXACT); - } else if (arg1 == "word") { - searcher->setMatchType(FieldSearcher::EXACT); - } -} - -} - -FieldSearchSpec::FieldSearchSpec() : - _id(0), - _name(), - _maxLength(0x100000), - _searcher(), - _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE), - _arg1(), - _reconfigured(false) -{ -} -FieldSearchSpec::~FieldSearchSpec() = default; - -FieldSearchSpec::FieldSearchSpec(FieldSearchSpec&& rhs) noexcept = default; -FieldSearchSpec& FieldSearchSpec::operator=(FieldSearchSpec&& rhs) noexcept = default; - -FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & fname, - VsmfieldsConfig::Fieldspec::Searchmethod searchDef, - const vespalib::string & arg1, size_t maxLength_) : - _id(fid), - _name(fname), - _maxLength(maxLength_), - _searcher(), - _searchMethod(searchDef), - _arg1(arg1), - _reconfigured(false) -{ - switch(searchDef) { - default: - LOG(warning, "Unknown searchdef = %d. Defaulting to AUTOUTF8", static_cast<int>(searchDef)); - [[fallthrough]]; - case VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8: - case VsmfieldsConfig::Fieldspec::Searchmethod::NONE: - case VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8: - case VsmfieldsConfig::Fieldspec::Searchmethod::UTF8: - if (arg1 == "substring") { - _searcher = std::make_unique<UTF8SubStringFieldSearcher>(fid); - } else if (arg1 == "suffix") { - _searcher = std::make_unique<UTF8SuffixStringFieldSearcher>(fid); - } else if (arg1 == "exact") { - _searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid); - } else if (arg1 == "word") { - _searcher = std::make_unique<UTF8ExactStringFieldSearcher>(fid); - } else if (searchDef == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) { - _searcher = std::make_unique<UTF8StrChrFieldSearcher>(fid); - } else { - _searcher = std::make_unique<FUTF8StrChrFieldSearcher>(fid); - } - break; - case VsmfieldsConfig::Fieldspec::Searchmethod::BOOL: - _searcher = std::make_unique<BoolFieldSearcher>(fid); - break; - case VsmfieldsConfig::Fieldspec::Searchmethod::INT8: - case VsmfieldsConfig::Fieldspec::Searchmethod::INT16: - case VsmfieldsConfig::Fieldspec::Searchmethod::INT32: - case VsmfieldsConfig::Fieldspec::Searchmethod::INT64: - _searcher = std::make_unique<IntFieldSearcher>(fid); - break; - case VsmfieldsConfig::Fieldspec::Searchmethod::FLOAT: - _searcher = std::make_unique<FloatFieldSearcher>(fid); - break; - case VsmfieldsConfig::Fieldspec::Searchmethod::DOUBLE: - _searcher = std::make_unique<DoubleFieldSearcher>(fid); - break; - case VsmfieldsConfig::Fieldspec::Searchmethod::GEOPOS: - _searcher = std::make_unique<GeoPosFieldSearcher>(fid); - break; - } - if (_searcher) { - setMatchType(_searcher, arg1); - _searcher->maxFieldLength(maxLength()); - } -} - -void -FieldSearchSpec::reconfig(const QueryTerm & term) -{ - if (_reconfigured) { - return; - } - switch (_searchMethod) { - case VsmfieldsConfig::Fieldspec::Searchmethod::NONE: - case VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8: - case VsmfieldsConfig::Fieldspec::Searchmethod::UTF8: - case VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8: - if ((term.isSubstring() && _arg1 != "substring") || - (term.isSuffix() && _arg1 != "suffix") || - (term.isExactstring() && _arg1 != "exact") || - (term.isPrefix() && _arg1 == "suffix")) - { - _searcher = std::make_unique<UTF8FlexibleStringFieldSearcher>(id()); - // preserve the basic match property of the searcher - setMatchType(_searcher, _arg1); - LOG(debug, "Reconfigured to use UTF8FlexibleStringFieldSearcher (%s) for field '%s' with id '%d'", - _searcher->prefix() ? "prefix" : "regular", name().c_str(), id()); - _reconfigured = true; - } - break; - default: - break; - } -} - -vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f) -{ - os << f._id << ' ' << f._name << ' '; - if ( ! f._searcher) { - os << " No searcher defined.\n"; - } - return os; -} - -FieldSearchSpecMap::FieldSearchSpecMap() = default; - -FieldSearchSpecMap::~FieldSearchSpecMap() = default; - -namespace { - const std::string _G_empty(""); - const std::string _G_value(".value"); - const std::regex _G_map1("\\{[a-zA-Z0-9]+\\}"); - const std::regex _G_map2("\\{\".*\"\\}"); - const std::regex _G_array("\\[[0-9]+\\]"); -} - -vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & rawIndex) -{ - if ((rawIndex.find('[') != vespalib::string::npos) || (rawIndex.find('{') != vespalib::string::npos)) { - std::string index = std::regex_replace(std::string(rawIndex), _G_map1, _G_value); - index = std::regex_replace(index, _G_map2, _G_value); - index = std::regex_replace(index, _G_array, _G_empty); - return index; - } - return rawIndex; -} - -bool FieldSearchSpecMap::buildFieldsInQuery(const Query & query, StringFieldIdTMap & fieldsInQuery) const -{ - bool retval(true); - ConstQueryTermList qtl; - query.getLeafs(qtl); - - for (const auto & term : qtl) { - for (const auto & dtm : documentTypeMap()) { - const IndexFieldMapT & fim = dtm.second; - vespalib::string rawIndex(term->index()); - vespalib::string index(stripNonFields(rawIndex)); - IndexFieldMapT::const_iterator fIt = fim.find(index); - if (fIt != fim.end()) { - for(FieldIdT fid : fIt->second) { - const FieldSearchSpec & spec = specMap().find(fid)->second; - LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.c_str(), index.c_str()); - if ((rawIndex != index) && (spec.name().find(index) == 0)) { - vespalib::string modIndex(rawIndex); - modIndex.append(spec.name().substr(index.size())); - fieldsInQuery.add(modIndex, spec.id()); - } else { - fieldsInQuery.add(spec.name(),spec.id()); - } - } - } else { - LOG(warning, "No valid indexes registered for index %s", term->index().c_str()); - retval = false; - } - } - } - return retval; -} - -void FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded) -{ - for(size_t i(0), m(otherFieldsNeeded.size()); i < m; i++) { - LOG(debug, "otherFieldsNeeded[%zd] = '%s'", i, otherFieldsNeeded[i].c_str()); - _nameIdMap.add(otherFieldsNeeded[i]); - } -} - -namespace { - -FieldIdTList -buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const FieldSearchSpecMapT & specMap, - const VsmfieldsConfig::Documenttype::IndexVector & indexes) -{ - LOG(spam, "Index %s with %zd fields", ci.name.c_str(), ci.field.size()); - FieldIdTList ifm; - for (const VsmfieldsConfig::Documenttype::Index::Field & cf : ci.field) { - LOG(spam, "Parsing field %s", cf.name.c_str()); - auto foundIndex = std::find_if(indexes.begin(), indexes.end(), - [&cf](const auto & v) { return v.name == cf.name;}); - if ((foundIndex != indexes.end()) && (cf.name != ci.name)) { - FieldIdTList sub = buildFieldSet(*foundIndex, specMap, indexes); - ifm.insert(ifm.end(), sub.begin(), sub.end()); - } else { - auto foundField = std::find_if(specMap.begin(), specMap.end(), - [&cf](const auto & v) { return v.second.name() == cf.name;} ); - if (foundField != specMap.end()) { - ifm.push_back(foundField->second.id()); - } else { - LOG(warning, "Field %s not defined. Ignoring....", cf.name.c_str()); - } - } - } - return ifm; -} - -} - -bool FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) -{ - bool retval(true); - LOG(spam, "Parsing %zd fields", conf->fieldspec.size()); - for(const VsmfieldsConfig::Fieldspec & cfs : conf->fieldspec) { - LOG(spam, "Parsing %s", cfs.name.c_str()); - FieldIdT fieldId = specMap().size(); - FieldSearchSpec fss(fieldId, cfs.name, cfs.searchmethod, cfs.arg1.c_str(), cfs.maxlength); - _specMap[fieldId] = std::move(fss); - _nameIdMap.add(cfs.name, fieldId); - LOG(spam, "M in %d = %s", fieldId, cfs.name.c_str()); - } - - LOG(spam, "Parsing %zd document types", conf->documenttype.size()); - for(const VsmfieldsConfig::Documenttype & di : conf->documenttype) { - IndexFieldMapT indexMapp; - LOG(spam, "Parsing document type %s with %zd indexes", di.name.c_str(), di.index.size()); - for(const VsmfieldsConfig::Documenttype::Index & ci : di.index) { - indexMapp[ci.name] = buildFieldSet(ci, specMap(), di.index); - } - _documentTypeMap[di.name] = indexMapp; - } - return retval; -} - -void -FieldSearchSpecMap::reconfigFromQuery(const Query & query) -{ - ConstQueryTermList qtl; - query.getLeafs(qtl); - - for (const auto & termA : qtl) { - for (const auto & ifm : documentTypeMap()) { - IndexFieldMapT::const_iterator itc = ifm.second.find(termA->index()); - if (itc != ifm.second.end()) { - for (FieldIdT fid : itc->second) { - FieldSearchSpec & spec = _specMap.find(fid)->second; - spec.reconfig(*termA); - } - } - } - } -} - -bool lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b) -{ - return a->field() < b->field(); -} - -void FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) -{ - fieldSearcherMap.clear(); - for (const auto & entry : fieldsInQuery) { - FieldIdT fId = entry.second; - const FieldSearchSpec & spec = specMap().find(fId)->second; - fieldSearcherMap.emplace_back(spec.searcher().duplicate()); - } - std::sort(fieldSearcherMap.begin(), fieldSearcherMap.end(), lesserField); -} - - -vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df) -{ - os << "DocumentTypeMap = \n"; - for (const auto & dtm : df.documentTypeMap()) { - os << "DocType = " << dtm.first << "\n"; - os << "IndexMap = \n"; - for (const auto &index : dtm.second) { - os << index.first << ": "; - for (FieldIdT fid : index.second) { - os << fid << ' '; - } - os << '\n'; - } - } - os << "SpecMap = \n"; - for (const auto & entry : df.specMap()) { - os << entry.first << " = " << entry.second << '\n'; - } - os << "NameIdMap = \n" << df.nameIdMap(); - return os; -} - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h deleted file mode 100644 index 7b78a8634e0..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/vsm/searcher/fieldsearcher.h> -#include <vespa/vsm/config/vsm-cfif.h> - -namespace vsm { - -class FieldSearchSpec -{ -public: - FieldSearchSpec(); - FieldSearchSpec(const FieldIdT & id, const vespalib::string & name, - VsmfieldsConfig::Fieldspec::Searchmethod searchMethod, - const vespalib::string & arg1, size_t maxLength); - ~FieldSearchSpec(); - FieldSearchSpec(FieldSearchSpec&& rhs) noexcept; - FieldSearchSpec& operator=(FieldSearchSpec&& rhs) noexcept; - const FieldSearcher & searcher() const { return *_searcher; } - const vespalib::string & name() const { return _name; } - FieldIdT id() const { return _id; } - bool valid() const { return static_cast<bool>(_searcher); } - size_t maxLength() const { return _maxLength; } - - /** - * Reconfigures the field searcher based on information in the given query term. - **/ - void reconfig(const search::streaming::QueryTerm & term); - - friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f); - -private: - FieldIdT _id; - vespalib::string _name; - size_t _maxLength; - FieldSearcherContainer _searcher; - VsmfieldsConfig::Fieldspec::Searchmethod _searchMethod; - vespalib::string _arg1; - bool _reconfigured; -}; - -typedef std::map<FieldIdT, FieldSearchSpec> FieldSearchSpecMapT; - -class FieldSearchSpecMap -{ -public: - FieldSearchSpecMap(); - ~FieldSearchSpecMap(); - - /** - * Iterates over all fields in the vsmfields config and creates a mapping from field id to FieldSearchSpec objects - * and a mapping from field name to field id. It then iterates over all document types and index names - * and creates a mapping from index name to list of field ids for each document type. - **/ - bool buildFromConfig(const VsmfieldsHandle & conf); - - /** - * Iterates over the given field name vector adding extra elements to the mapping from field name to field id. - **/ - void buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded); - - /** - * Reconfigures some of the field searchers based on information in the given query. - **/ - void reconfigFromQuery(const search::streaming::Query & query); - - /** - * Adds a [field name, field id] entry to the given mapping for each field name used in the given query. - * This is achieved by mapping from query term index name -> list of field ids -> [field name, field id] pairs. - **/ - bool buildFieldsInQuery(const search::streaming::Query & query, StringFieldIdTMap & fieldsInQuery) const; - - /** - * Adds a [field name, field id] entry to the given mapping for each field name in the given vector. - **/ - void buildFieldsInQuery(const std::vector<vespalib::string> & otherFieldsNeeded, StringFieldIdTMap & fieldsInQuery) const; - - /** - * Adds a FieldSearcher object to the given field searcher map for each field name in the other map. - **/ - void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap); - - const FieldSearchSpecMapT & specMap() const { return _specMap; } - //const IndexFieldMapT & indexMap() const { return _documentTypeMap.begin()->second; } - const DocumentTypeIndexFieldMapT & documentTypeMap() const { return _documentTypeMap; } - const StringFieldIdTMap & nameIdMap() const { return _nameIdMap; } - friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & f); - - static vespalib::string stripNonFields(const vespalib::string & rawIndex); - -private: - FieldSearchSpecMapT _specMap; // mapping from field id to field search spec - DocumentTypeIndexFieldMapT _documentTypeMap; // mapping from index name to field id list for each document type - StringFieldIdTMap _nameIdMap; // mapping from field name to field id -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.cpp b/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.cpp deleted file mode 100644 index 06b652d85e6..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "flattendocsumwriter.h" -#include <vespa/document/fieldvalue/fieldvalues.h> - -namespace vsm { - -void -FlattenDocsumWriter::considerSeparator() -{ - if (_useSeparator) { - _output.put(_separator.c_str(), _separator.size()); - } -} - -void -FlattenDocsumWriter::onPrimitive(uint32_t, const Content & c) -{ - considerSeparator(); - const document::FieldValue & fv = c.getValue(); - if (fv.isLiteral()) { - const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv); - vespalib::stringref value = lfv.getValueRef(); - _output.put(value.data(), value.size()); - } else if (fv.isNumeric() || - fv.isA(document::FieldValue::Type::BOOL)) - { - vespalib::string value = fv.getAsString(); - _output.put(value.data(), value.size()); - } else { - vespalib::string value = fv.toString(); - _output.put(value.data(), value.size()); - } - _useSeparator = true; -} - -FlattenDocsumWriter::FlattenDocsumWriter(const vespalib::string & separator) : - _output(32), - _separator(separator), - _useSeparator(false) -{ } - -FlattenDocsumWriter::~FlattenDocsumWriter() = default; - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.h b/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.h deleted file mode 100644 index 47c6f1e75d0..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/flattendocsumwriter.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/document/fieldvalue/fieldvalue.h> -#include <vespa/document/fieldvalue/iteratorhandler.h> -#include <vespa/vsm/common/charbuffer.h> - -namespace vsm { - -/** - * This class is used to flatten out and write a complex field value. - * A separator string is inserted between primitive field values. - **/ -class FlattenDocsumWriter : public document::fieldvalue::IteratorHandler { -private: - CharBuffer _output; - vespalib::string _separator; - bool _useSeparator; - - void considerSeparator(); - void onPrimitive(uint32_t, const Content & c) override; - -public: - FlattenDocsumWriter(const vespalib::string & separator = " "); - ~FlattenDocsumWriter(); - void setSeparator(const vespalib::string & separator) { _separator = separator; } - const CharBuffer & getResult() const { return _output; } - void clear() { - _output.reset(); - _separator = " "; - _useSeparator = false; - } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/vsm/i_matching_elements_filler.h b/streamingvisitors/src/vespa/vsm/vsm/i_matching_elements_filler.h deleted file mode 100644 index a35cea40cec..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/i_matching_elements_filler.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <memory> - -namespace search { -class MatchingElements; -class MatchingElementsFields; -} - -namespace vsm { - -/* - * Interface class for filling matching elements structure for - * streaming search. - */ -class IMatchingElementsFiller { -public: - virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields& fields) = 0; - virtual ~IMatchingElementsFiller() = default; -}; - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp b/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp deleted file mode 100644 index 5bc5798fb9d..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.cpp +++ /dev/null @@ -1,220 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "slimefieldwriter.h" -#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/size_literals.h> -#include <vespa/searchsummary/docsummary/resultconfig.h> -#include <vespa/document/datatype/positiondatatype.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.slimefieldwriter"); - -namespace { - -vespalib::string -toString(const vsm::FieldPath & fieldPath) -{ - vespalib::asciistream oss; - for (size_t i = 0; i < fieldPath.size(); ++i) { - if (i > 0) { - oss << "."; - } - oss << fieldPath[i].getName(); - } - return oss.str(); -} - -vespalib::string -toString(const std::vector<vespalib::string> & fieldPath) -{ - vespalib::asciistream oss; - for (size_t i = 0; i < fieldPath.size(); ++i) { - if (i > 0) { - oss << "."; - } - oss << fieldPath[i]; - } - return oss.str(); -} - -} // namespace <unnamed> - -using namespace vespalib::slime::convenience; - - -namespace vsm { - -void -SlimeFieldWriter::traverseRecursive(const document::FieldValue & fv, Inserter &inserter) -{ - LOG(debug, "traverseRecursive: class(%s), fieldValue(%s), currentPath(%s)", - fv.className(), fv.toString().c_str(), toString(_currPath).c_str()); - - if (fv.isCollection()) { - const document::CollectionFieldValue & cfv = static_cast<const document::CollectionFieldValue &>(fv); - if (cfv.isA(document::FieldValue::Type::ARRAY)) { - const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(cfv); - Cursor &a = inserter.insertArray(); - for (size_t i = 0; i < afv.size(); ++i) { - const document::FieldValue & nfv = afv[i]; - ArrayInserter ai(a); - traverseRecursive(nfv, ai); - } - } else { - assert(cfv.isA(document::FieldValue::Type::WSET)); - const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(cfv); - Cursor &a = inserter.insertArray(); - Symbol isym = a.resolve("item"); - Symbol wsym = a.resolve("weight"); - for (const auto &entry : wsfv) { - Cursor &o = a.addObject(); - const document::FieldValue & nfv = *entry.first; - ObjectSymbolInserter oi(o, isym); - traverseRecursive(nfv, oi); - int weight = static_cast<const document::IntFieldValue &>(*entry.second).getValue(); - o.setLong(wsym, weight); - } - } - } else if (fv.isA(document::FieldValue::Type::MAP)) { - const document::MapFieldValue & mfv = static_cast<const document::MapFieldValue &>(fv); - Cursor &a = inserter.insertArray(); - Symbol keysym = a.resolve("key"); - Symbol valsym = a.resolve("value"); - for (const auto &entry : mfv) { - Cursor &o = a.addObject(); - ObjectSymbolInserter ki(o, keysym); - traverseRecursive(*entry.first, ki); - _currPath.push_back("value"); - ObjectSymbolInserter vi(o, valsym); - traverseRecursive(*entry.second, vi); - _currPath.pop_back(); - } - } else if (fv.isStructured()) { - const document::StructuredFieldValue & sfv = static_cast<const document::StructuredFieldValue &>(fv); - Cursor &o = inserter.insertObject(); - if (sfv.getDataType() == &document::PositionDataType::getInstance() - && search::docsummary::ResultConfig::wantedV8geoPositions()) - { - bool ok = true; - try { - int x = std::numeric_limits<int>::min(); - int y = std::numeric_limits<int>::min(); - for (const document::Field & entry : sfv) { - document::FieldValue::UP fval(sfv.getValue(entry)); - if (entry.getName() == "x") { - x = fval->getAsInt(); - } else if (entry.getName() == "y") { - y = fval->getAsInt(); - } else { - ok = false; - } - } - if (x == std::numeric_limits<int>::min()) ok = false; - if (y == std::numeric_limits<int>::min()) ok = false; - if (ok) { - o.setDouble("lat", double(y) / 1.0e6); - o.setDouble("lng", double(x) / 1.0e6); - return; - } - } catch (std::exception &e) { - (void)e; - // fallback to code below - } - } - for (const document::Field & entry : sfv) { - if (explorePath(entry.getName())) { - _currPath.push_back(entry.getName()); - Memory keymem(entry.getName()); - ObjectInserter oi(o, keymem); - document::FieldValue::UP fval(sfv.getValue(entry)); - traverseRecursive(*fval, oi); - _currPath.pop_back(); - } - } - } else { - if (fv.isLiteral()) { - const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv); - inserter.insertString(lfv.getValueRef()); - } else if (fv.isNumeric()) { - switch (fv.getDataType()->getId()) { - case document::DataType::T_BYTE: - case document::DataType::T_SHORT: - case document::DataType::T_INT: - case document::DataType::T_LONG: - inserter.insertLong(fv.getAsLong()); - break; - case document::DataType::T_DOUBLE: - inserter.insertDouble(fv.getAsDouble()); - break; - case document::DataType::T_FLOAT: - inserter.insertDouble(fv.getAsFloat()); - break; - default: - inserter.insertString(fv.getAsString()); - } - } else if (fv.isA(document::FieldValue::Type::BOOL)) { - const auto & bfv = static_cast<const document::BoolFieldValue &>(fv); - inserter.insertBool(bfv.getValue()); - } else { - inserter.insertString(fv.toString()); - } - } -} - -bool -SlimeFieldWriter::explorePath(vespalib::stringref candidate) -{ - if (_inputFields == nullptr) { - return true; - } - // find out if we should explore the current path - for (size_t i = 0; i < _inputFields->size(); ++i) { - const FieldPath & fp = (*_inputFields)[i].getPath(); - if (_currPath.size() <= fp.size()) { - bool equal = true; - for (size_t j = 0; j < _currPath.size() && equal; ++j) { - equal = (fp[j].getName() == _currPath[j]); - } - if (equal) { - if (_currPath.size() == fp.size()) { - return true; - } else if (fp[_currPath.size()].getName() == candidate) { - // the current path matches one of the input field paths - return true; - } - } - } - } - return false; -} - -SlimeFieldWriter::SlimeFieldWriter() : - _rbuf(4_Ki), - _slime(), - _inputFields(nullptr), - _currPath() -{ -} - -SlimeFieldWriter::~SlimeFieldWriter() = default; - -void -SlimeFieldWriter::convert(const document::FieldValue & fv) -{ - if (LOG_WOULD_LOG(debug)) { - if (_inputFields != nullptr) { - for (size_t i = 0; i < _inputFields->size(); ++i) { - LOG(debug, "write: input field path [%zd] '%s'", i, toString((*_inputFields)[i].getPath()).c_str()); - } - } else { - LOG(debug, "write: no input fields"); - } - } - SlimeInserter inserter(_slime); - traverseRecursive(fv, inserter); - search::SlimeOutputRawBufAdapter adapter(_rbuf); - vespalib::slime::BinaryFormat::encode(_slime, adapter); -} - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h b/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h deleted file mode 100644 index b5adac8985f..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/slimefieldwriter.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "docsumfieldspec.h" -#include <vespa/vsm/common/storagedocument.h> -#include <vespa/document/fieldvalue/fieldvalues.h> -#include <vespa/vespalib/data/slime/slime.h> -#include <vespa/searchlib/util/rawbuf.h> - -namespace vsm { - -/** - * This class is used to write a field value as slime binary data. - * If only a subset of the field value should be written this subset - * is specified using the setInputFields() function. - **/ -class SlimeFieldWriter -{ -private: - search::RawBuf _rbuf; - vespalib::Slime _slime; - const DocsumFieldSpec::FieldIdentifierVector * _inputFields; - std::vector<vespalib::string> _currPath; - - void traverseRecursive(const document::FieldValue & fv, vespalib::slime::Inserter & inserter); - bool explorePath(vespalib::stringref candidate); - -public: - SlimeFieldWriter(); - ~SlimeFieldWriter(); - - - /** - * Specifies the subset of the field value that should be written. - **/ - void setInputFields(const DocsumFieldSpec::FieldIdentifierVector & inputFields) { _inputFields = &inputFields; } - - /** - * Convert the given field value - **/ - void convert(const document::FieldValue & fv); - - /** - * Return a reference to the output binary data - **/ - vespalib::stringref out() const { - return vespalib::stringref(_rbuf.GetDrainPos(), _rbuf.GetUsedLen()); - } - - void clear() { - _rbuf.Reuse(); - _inputFields = nullptr; - _currPath.clear(); - } -}; - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.cpp b/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.cpp deleted file mode 100644 index 127302311f9..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "snippetmodifier.h" -#include <vespa/document/fieldvalue/stringfieldvalue.h> -#include <vespa/vespalib/stllike/hash_map.hpp> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.snippetmodifier"); - -using namespace document; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; -typedef vespalib::hash_map<vsm::FieldIdT, QueryTermList> FieldQueryTermMap; - -namespace { - -void -addIfNotPresent(FieldQueryTermMap & map, vsm::FieldIdT fId, QueryTerm * qt) -{ - FieldQueryTermMap::iterator itr = map.find(fId); - if (itr != map.end()) { - QueryTermList & qtl = itr->second; - if (std::find(qtl.begin(), qtl.end(), qt) == qtl.end()) { - qtl.push_back(qt); - } - } else { - map[fId].push_back(qt); - } -} - -} - -namespace vsm { - -void -SnippetModifier::considerSeparator() -{ - if (_useSep) { - _valueBuf->put(_groupSep); - } -} - -void -SnippetModifier::onPrimitive(uint32_t, const Content & c) -{ - considerSeparator(); - _searcher->onValue(c.getValue()); - _valueBuf->put(_searcher->getModifiedBuf().getBuffer(), _searcher->getModifiedBuf().getPos()); - _useSep = true; -} - -void -SnippetModifier::reset() -{ - _valueBuf->reset(); - _useSep = false; -} - - -SnippetModifier::SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher) : - _searcher(searcher), - _valueBuf(new CharBuffer(32)), - _groupSep('\x1E'), - _useSep(false), - _empty() -{ -} - -SnippetModifier::SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher, const CharBuffer::SP & valueBuf) : - _searcher(searcher), - _valueBuf(valueBuf), - _groupSep('\x1E'), - _useSep(false), - _empty() -{ -} - -SnippetModifier::~SnippetModifier() {} - -FieldValue::UP -SnippetModifier::modify(const FieldValue & fv, const document::FieldPath & path) -{ - reset(); - fv.iterateNested(path, *this); - return FieldValue::UP(new StringFieldValue(vespalib::string(_valueBuf->getBuffer(), _valueBuf->getPos()))); -} - - -SnippetModifierManager::SnippetModifierManager() : - _modifiers(), - _searchBuf(new SearcherBuf(64)), - _searchModifyBuf(new CharBuffer(64)), - _searchOffsetBuf(new std::vector<size_t>(64)), - _modifierBuf(new CharBuffer(128)) -{ -} - -SnippetModifierManager::~SnippetModifierManager() {} - -void -SnippetModifierManager::setup(const QueryTermList & queryTerms, - const FieldSearchSpecMapT & specMap, - const IndexFieldMapT & indexMap) -{ - FieldQueryTermMap fqtm; - - // setup modifiers - for (QueryTermList::const_iterator i = queryTerms.begin(); i != queryTerms.end(); ++i) { - QueryTerm * qt = *i; - IndexFieldMapT::const_iterator j = indexMap.find(qt->index()); - if (j != indexMap.end()) { - for (FieldIdTList::const_iterator k = j->second.begin(); k != j->second.end(); ++k) { - FieldIdT fId = *k; - const FieldSearchSpec & spec = specMap.find(fId)->second; - if (spec.searcher().substring() || qt->isSubstring()) { // we need a modifier for this field id - addIfNotPresent(fqtm, fId, qt); - if (_modifiers.getModifier(fId) == NULL) { - LOG(debug, "Create snippet modifier for field id '%u'", fId); - UTF8SubstringSnippetModifier::SP searcher - (new UTF8SubstringSnippetModifier(fId, _searchModifyBuf, _searchOffsetBuf)); - _modifiers.map()[fId] = std::make_unique<SnippetModifier>(searcher, _modifierBuf); - } - } - } - } - } - - // prepare modifiers - for (auto & entry : _modifiers.map()) { - FieldIdT fId = entry.first; - SnippetModifier & smod = static_cast<SnippetModifier &>(*entry.second); - smod.getSearcher()->prepare(fqtm[fId], _searchBuf); - } -} - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.h b/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.h deleted file mode 100644 index 4718ab8783a..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/snippetmodifier.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "fieldsearchspec.h" -#include <vespa/vsm/common/charbuffer.h> -#include <vespa/vsm/common/document.h> -#include <vespa/vsm/common/fieldmodifier.h> -#include <vespa/vsm/searcher/utf8substringsnippetmodifier.h> -#include <vespa/document/fieldvalue/fieldvalue.h> -#include <vespa/document/fieldvalue/iteratorhandler.h> - -namespace vsm { - -/** - * This class is responsible for modifying field values where we have substring search and that are used - * as input to snippet generation. - * - * The class implements the FieldModifier interface to modify field values, and the IteratorHandler interface - * to traverse complex field values. Primitive field values are passed to the underlying searcher that is - * responsible for modifying the field value by inserting unit separators before and after matches. - * A group separator is inserted between primitive field values the same way as done by FlattenDocsumWriter. - **/ -class SnippetModifier : public FieldModifier, public document::fieldvalue::IteratorHandler -{ -private: - UTF8SubstringSnippetModifier::SP _searcher; - CharBuffer::SP _valueBuf; // buffer to store the final modified field value - char _groupSep; - bool _useSep; - document::FieldPath _empty; - - void considerSeparator(); - // Inherrit doc from document::FieldValue::IteratorHandler - void onPrimitive(uint32_t, const Content & c) override; - void reset(); - -public: - /** - * Creates a new instance. - * - * @param searcher the searcher used to modify primitive field values. - **/ - SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher); - - /** - * Creates a new instance. - * - * @param searcher the searcher used to modify primitive field values. - * @param valueBuf the shared buffer used to store the final modified field value. - **/ - SnippetModifier(const UTF8SubstringSnippetModifier::SP & searcher, const CharBuffer::SP & valueBuf); - - ~SnippetModifier(); - - /** - * Modifies the complete given field value. - **/ - document::FieldValue::UP modify(const document::FieldValue & fv) override { - return modify(fv, _empty); - } - - /** - * Modifies the given field value by passing all primitive field values to the searcher and - * inserting group separators between them. A string field value is returned. - * The iterating of the field value is limited by the given field path. - * - * @param fv the field value to modify. - * @param path the field path used to iterate the field value. - * @return the new modified field value. - **/ - document::FieldValue::UP modify(const document::FieldValue & fv, - const document::FieldPath & path) override; - - const CharBuffer & getValueBuf() const { return *_valueBuf; } - const UTF8SubstringSnippetModifier::SP & getSearcher() const { return _searcher; } -}; - -/** - * This class manages a set of snippet modifiers. - * The modifiers are instantiated and prepared in the setup function. - * This class also holds shared buffers that are used by the modifiers. - **/ -class SnippetModifierManager -{ -private: - FieldModifierMap _modifiers; - SharedSearcherBuf _searchBuf; - CharBuffer::SP _searchModifyBuf; - SharedOffsetBuffer _searchOffsetBuf; - CharBuffer::SP _modifierBuf; - -public: - SnippetModifierManager(); - ~SnippetModifierManager(); - - /** - * Setups snippet modifiers for all fields where we have substring search. - * - * @param queryTerms the query terms to take into consideration. - * @param specMap mapping from field id to search spec objects. - * @param fieldMap mapping from index (used in the query) to a list of field ids. - **/ - void setup(const search::streaming::QueryTermList & queryTerms, - const FieldSearchSpecMapT & specMap, const IndexFieldMapT & fieldMap); - - const FieldModifierMap & getModifiers() const { return _modifiers; } -}; - -} - diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp deleted file mode 100644 index 5507532d4f3..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.cpp +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "vsm-adapter.hpp" -#include "docsumconfig.h" -#include "i_matching_elements_filler.h" -#include <vespa/searchlib/common/matching_elements.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.vsm-adapter"); - -using search::docsummary::ResConfigEntry; -using search::docsummary::KeywordExtractor; -using search::MatchingElements; -using config::ConfigSnapshot; - -namespace vsm { - -GetDocsumsStateCallback::GetDocsumsStateCallback() : - _summaryFeatures(), - _rankFeatures(), - _matching_elements_filler() -{ } - -void GetDocsumsStateCallback::FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env) -{ - (void) env; - if (_summaryFeatures) { // set the summary features to write to the docsum - state->_summaryFeatures = _summaryFeatures; - state->_summaryFeaturesCached = true; - } -} - -void GetDocsumsStateCallback::FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env) -{ - (void) env; - if (_rankFeatures) { // set the rank features to write to the docsum - state->_rankFeatures = _rankFeatures; - } -} - -void GetDocsumsStateCallback::FillDocumentLocations(GetDocsumsState *state, IDocsumEnvironment * env) -{ - (void) state; - (void) env; -} - -std::unique_ptr<MatchingElements> -GetDocsumsStateCallback::fill_matching_elements(const search::MatchingElementsFields& fields) -{ - if (_matching_elements_filler) { - return _matching_elements_filler->fill_matching_elements(fields); - } - return std::make_unique<MatchingElements>(); -} - -void -GetDocsumsStateCallback::set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler) -{ - _matching_elements_filler = std::move(matching_elements_filler); -} - -GetDocsumsStateCallback::~GetDocsumsStateCallback() = default; - -DocsumTools::FieldSpec::FieldSpec() : - _outputName(), - _inputNames(), - _command(VsmsummaryConfig::Fieldmap::Command::NONE) -{ } - -DocsumTools::FieldSpec::~FieldSpec() = default; - -DocsumTools::DocsumTools(std::unique_ptr<DynamicDocsumWriter> writer) : - _writer(std::move(writer)), - _juniper(), - _resultClass(), - _fieldSpecs() -{ } - - -DocsumTools::~DocsumTools() = default; - -bool -DocsumTools::obtainFieldNames(const FastS_VsmsummaryHandle &cfg) -{ - uint32_t defaultSummaryId = getResultConfig()->LookupResultClassId(cfg->outputclass); - _resultClass = getResultConfig()->LookupResultClass(defaultSummaryId); - if (_resultClass != NULL) { - for (uint32_t i = 0; i < _resultClass->GetNumEntries(); ++i) { - const ResConfigEntry * entry = _resultClass->GetEntry(i); - _fieldSpecs.push_back(FieldSpec()); - _fieldSpecs.back().setOutputName(entry->_bindname); - bool found = false; - if (cfg) { - // check if we have this summary field in the vsmsummary config - for (uint32_t j = 0; j < cfg->fieldmap.size() && !found; ++j) { - if (entry->_bindname == cfg->fieldmap[j].summary.c_str()) { - for (uint32_t k = 0; k < cfg->fieldmap[j].document.size(); ++k) { - _fieldSpecs.back().getInputNames().push_back(cfg->fieldmap[j].document[k].field); - } - _fieldSpecs.back().setCommand(cfg->fieldmap[j].command); - found = true; - } - } - } - if (!found) { - // use yourself as input - _fieldSpecs.back().getInputNames().push_back(entry->_bindname); - } - } - } else { - LOG(warning, "could not locate result class: '%s'", cfg->outputclass.c_str()); - } - return true; -} - -void -VSMAdapter::configure(const VSMConfigSnapshot & snapshot) -{ - std::lock_guard guard(_lock); - LOG(debug, "(re-)configure VSM (docsum tools)"); - - std::shared_ptr<SummaryConfig> summary(snapshot.getConfig<SummaryConfig>()); - std::shared_ptr<SummarymapConfig> summaryMap(snapshot.getConfig<SummarymapConfig>()); - std::shared_ptr<VsmsummaryConfig> vsmSummary(snapshot.getConfig<VsmsummaryConfig>()); - std::shared_ptr<JuniperrcConfig> juniperrc(snapshot.getConfig<JuniperrcConfig>()); - - _fieldsCfg.set(snapshot.getConfig<VsmfieldsConfig>().release()); - _fieldsCfg.latch(); - - LOG(debug, "configureFields(): Size of cfg fieldspec: %zd", _fieldsCfg.get()->fieldspec.size()); // UlfC: debugging - LOG(debug, "configureFields(): Size of cfg documenttype: %zd", _fieldsCfg.get()->documenttype.size()); // UlfC: debugging - LOG(debug, "configureSummary(): Size of cfg classes: %zd", summary->classes.size()); // UlfC: debugging - LOG(debug, "configureSummaryMap(): Size of cfg override: %zd", summaryMap->override.size()); // UlfC: debugging - LOG(debug, "configureVsmSummary(): Size of cfg fieldmap: %zd", vsmSummary->fieldmap.size()); // UlfC: debugging - LOG(debug, "configureVsmSummary(): outputclass='%s'", vsmSummary->outputclass.c_str()); // UlfC: debugging - - // init result config - std::unique_ptr<ResultConfig> resCfg(new ResultConfig()); - if ( ! resCfg->ReadConfig(*summary.get(), _configId.c_str())) { - throw std::runtime_error("(re-)configuration of VSM (docsum tools) failed due to bad summary config"); - } - - // init keyword extractor - auto kwExtractor = std::make_unique<KeywordExtractor>(nullptr); - kwExtractor->AddLegalIndexSpec(_highlightindexes.c_str()); - vespalib::string spec = kwExtractor->GetLegalIndexSpec(); - LOG(debug, "index highlight spec: '%s'", spec.c_str()); - - // create dynamic docsum writer - auto writer = std::make_unique<DynamicDocsumWriter>(resCfg.release(), kwExtractor.release()); - - // configure juniper (used when configuring DynamicDocsumConfig) - _juniperProps = std::make_unique<JuniperProperties>(*juniperrc); - auto juniper = std::make_unique<juniper::Juniper>(_juniperProps.get(), &_wordFolder); - - // create new docsum tools - auto docsumTools = std::make_unique<DocsumTools>(std::move(writer)); - docsumTools->setJuniper(std::move(juniper)); - - // configure dynamic docsum writer - DynamicDocsumConfig dynDocsumConfig(docsumTools.get(), docsumTools->getDocsumWriter(), _fieldsCfg.get()); - dynDocsumConfig.configure(*summaryMap.get()); - - // configure new docsum tools - if (docsumTools->obtainFieldNames(vsmSummary)) { - // latch new docsum tools into production - _docsumTools.set(docsumTools.release()); - _docsumTools.latch(); - } else { - throw std::runtime_error("(re-)configuration of VSM (docsum tools) failed"); - } -} - -VSMConfigSnapshot::VSMConfigSnapshot(const vespalib::string & configId, const config::ConfigSnapshot & snapshot) - : _configId(configId), - _snapshot(std::make_unique<config::ConfigSnapshot>(snapshot)) -{ } -VSMConfigSnapshot::~VSMConfigSnapshot() = default; - -VSMAdapter::VSMAdapter(const vespalib::string & highlightindexes, const vespalib::string & configId, Fast_WordFolder & wordFolder) - : _highlightindexes(highlightindexes), - _configId(configId), - _wordFolder(wordFolder), - _fieldsCfg(), - _docsumTools(), - _juniperProps(), - _lock() -{ -} - - -VSMAdapter::~VSMAdapter() = default; - -} diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h deleted file mode 100644 index 6484269353b..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.h +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/searchlib/query/base.h> -#include <vespa/vsm/config/vsm-cfif.h> -#include <vespa/config-summary.h> -#include <vespa/config-summarymap.h> -#include <vespa/searchlib/common/featureset.h> -#include <vespa/searchsummary/docsummary/docsumwriter.h> -#include <vespa/searchsummary/docsummary/docsumstate.h> -#include <vespa/searchsummary/docsummary/idocsumenvironment.h> -#include <vespa/juniper/rpinterface.h> - -using search::docsummary::ResultConfig; -using search::docsummary::ResultClass; -using search::docsummary::IDocsumWriter; -using search::docsummary::DynamicDocsumWriter; -using search::docsummary::GetDocsumsState; -using search::docsummary::IDocsumEnvironment; -using search::docsummary::JuniperProperties; - -using vespa::config::search::SummaryConfig; -using vespa::config::search::SummarymapConfig; -using vespa::config::search::summary::JuniperrcConfig; - -namespace config { class ConfigSnapshot; } -namespace vsm { - -class IMatchingElementsFiller; - -class GetDocsumsStateCallback : public search::docsummary::GetDocsumsStateCallback -{ -private: - search::FeatureSet::SP _summaryFeatures; - search::FeatureSet::SP _rankFeatures; - std::unique_ptr<IMatchingElementsFiller> _matching_elements_filler; - -public: - GetDocsumsStateCallback(); - void FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env) override; - void FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env) override; - virtual void FillDocumentLocations(GetDocsumsState * state, IDocsumEnvironment * env); - virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields& fields) override; - void setSummaryFeatures(const search::FeatureSet::SP & sf) { _summaryFeatures = sf; } - void setRankFeatures(const search::FeatureSet::SP & rf) { _rankFeatures = rf; } - void set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler); - ~GetDocsumsStateCallback(); -}; - -class DocsumTools : public IDocsumEnvironment -{ -public: - class FieldSpec { - private: - vespalib::string _outputName; - std::vector<vespalib::string> _inputNames; - VsmsummaryConfig::Fieldmap::Command _command; - - public: - FieldSpec(); - ~FieldSpec(); - const vespalib::string & getOutputName() const { return _outputName; } - void setOutputName(const vespalib::string & name) { _outputName = name; } - const std::vector<vespalib::string> & getInputNames() const { return _inputNames; } - std::vector<vespalib::string> & getInputNames() { return _inputNames; } - VsmsummaryConfig::Fieldmap::Command getCommand() const { return _command; } - void setCommand(VsmsummaryConfig::Fieldmap::Command command) { _command = command; } - }; - -private: - std::unique_ptr<DynamicDocsumWriter> _writer; - std::unique_ptr<juniper::Juniper> _juniper; - const ResultClass * _resultClass; - std::vector<FieldSpec> _fieldSpecs; - DocsumTools(const DocsumTools &); - DocsumTools &operator=(const DocsumTools &); - -public: - DocsumTools(std::unique_ptr<DynamicDocsumWriter> writer); - ~DocsumTools(); - void setJuniper(std::unique_ptr<juniper::Juniper> juniper) { _juniper = std::move(juniper); } - ResultConfig *getResultConfig() const { return _writer->GetResultConfig(); } - DynamicDocsumWriter *getDocsumWriter() const { return _writer.get(); } - const ResultClass *getResultClass() const { return _resultClass; } - const std::vector<FieldSpec> & getFieldSpecs() const { return _fieldSpecs; } - bool obtainFieldNames(const FastS_VsmsummaryHandle &cfg); - - // inherit doc from IDocsumEnvironment - search::IAttributeManager * getAttributeManager() override { return NULL; } - vespalib::string lookupIndex(const vespalib::string&) const override { return ""; } - juniper::Juniper * getJuniper() override { return _juniper.get(); } -}; - -typedef std::shared_ptr<DocsumTools> DocsumToolsPtr; - -class VSMConfigSnapshot { -private: - const vespalib::string _configId; - std::unique_ptr<const config::ConfigSnapshot> _snapshot; -public: - VSMConfigSnapshot(const vespalib::string & configId, const config::ConfigSnapshot & snapshot); - ~VSMConfigSnapshot(); - template <typename ConfigType> - std::unique_ptr<ConfigType> getConfig() const; -}; - -class VSMAdapter -{ -public: - VSMAdapter(const vespalib::string & highlightindexes, const vespalib::string & configId, Fast_WordFolder & wordFolder); - virtual ~VSMAdapter(); - - VsmfieldsHandle getFieldsConfig() const { return _fieldsCfg.get(); } - DocsumToolsPtr getDocsumTools() const { return _docsumTools.get(); } - void configure(const VSMConfigSnapshot & snapshot); -private: - vespalib::string _highlightindexes; - const vespalib::string _configId; - Fast_WordFolder & _wordFolder; - vespalib::PtrHolder<VsmfieldsConfig> _fieldsCfg; - vespalib::PtrHolder<DocsumTools> _docsumTools; - std::unique_ptr<JuniperProperties> _juniperProps; - - std::mutex _lock; - - VSMAdapter(const VSMAdapter &); - VSMAdapter &operator=(const VSMAdapter &); -}; - -} // namespace vsm - diff --git a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.hpp b/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.hpp deleted file mode 100644 index f071dbb2015..00000000000 --- a/streamingvisitors/src/vespa/vsm/vsm/vsm-adapter.hpp +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "vsm-adapter.h" -#include <vespa/config/retriever/configsnapshot.hpp> - -namespace vsm { - -template <typename ConfigType> -std::unique_ptr<ConfigType> -VSMConfigSnapshot::getConfig() const -{ - return _snapshot->getConfig<ConfigType>(_configId); -} - -} // namespace vsm - |