summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-05-21 13:31:10 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2022-05-21 14:29:19 +0000
commit58a7afd1bd0cd358a8d19bfefd3e0c2c32daecc1 (patch)
tree4ced08d5ed7c7020e3cfb516f135f885334ff27d /searchsummary
parent2c34544abef32f7da1c05a83a3648532afb53186 (diff)
Fold fastlib into vespalib and gc some unused code.
Also move some code only used by juniper up into juniper test module.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/tests/juniper/.gitignore1
-rw-r--r--searchsummary/src/tests/juniper/CMakeLists.txt25
-rw-r--r--searchsummary/src/tests/juniper/SrcTestSuite.cpp2
-rw-r--r--searchsummary/src/tests/juniper/auxTest.h4
-rw-r--r--searchsummary/src/tests/juniper/latintokenizertest.cpp9
-rw-r--r--searchsummary/src/tests/juniper/latintokenizertest.h450
-rw-r--r--searchsummary/src/tests/juniper/matchobjectTest.h2
-rw-r--r--searchsummary/src/tests/juniper/mcandTest.h4
-rw-r--r--searchsummary/src/tests/juniper/queryparserTest.h2
-rw-r--r--searchsummary/src/tests/juniper/suite.h264
-rw-r--r--searchsummary/src/tests/juniper/test.cpp141
-rw-r--r--searchsummary/src/tests/juniper/test.h150
-rw-r--r--searchsummary/src/vespa/juniper/config.cpp5
-rw-r--r--searchsummary/src/vespa/juniper/juniperparams.cpp42
-rw-r--r--searchsummary/src/vespa/juniper/juniperparams.h22
-rw-r--r--searchsummary/src/vespa/juniper/latintokenizer.h377
-rw-r--r--searchsummary/src/vespa/juniper/queryparser.h3
-rw-r--r--searchsummary/src/vespa/juniper/result.cpp6
-rw-r--r--searchsummary/src/vespa/juniper/rpinterface.cpp6
20 files changed, 1426 insertions, 90 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index 84633bee262..d51e29dbd00 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -4,7 +4,6 @@ vespa_define_module(
vespalog
vespalib
configdefinitions
- fastlib_fast
document
searchlib
diff --git a/searchsummary/src/tests/juniper/.gitignore b/searchsummary/src/tests/juniper/.gitignore
index 46b307da632..09d6225ca26 100644
--- a/searchsummary/src/tests/juniper/.gitignore
+++ b/searchsummary/src/tests/juniper/.gitignore
@@ -14,3 +14,4 @@ juniper_auxTest_app
juniper_matchobjectTest_app
juniper_mcandTest_app
juniper_queryparserTest_app
+juniper_latintokenizertest_app
diff --git a/searchsummary/src/tests/juniper/CMakeLists.txt b/searchsummary/src/tests/juniper/CMakeLists.txt
index d15e91f1f63..77e7052a429 100644
--- a/searchsummary/src/tests/juniper/CMakeLists.txt
+++ b/searchsummary/src/tests/juniper/CMakeLists.txt
@@ -1,4 +1,10 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(juniper_testsuite
+ SOURCES
+ test.cpp
+ DEPENDS
+)
+
vespa_add_executable(juniper_mcandTest_app TEST
SOURCES
mcandTest.cpp
@@ -6,7 +12,7 @@ vespa_add_executable(juniper_mcandTest_app TEST
testenv.cpp
DEPENDS
searchsummary
- fastlib_fast_testsuite
+ juniper_testsuite
)
vespa_add_test(NAME juniper_mcandTest_app COMMAND juniper_mcandTest_app)
vespa_add_executable(juniper_queryparserTest_app TEST
@@ -17,7 +23,7 @@ vespa_add_executable(juniper_queryparserTest_app TEST
testenv.cpp
DEPENDS
searchsummary
- fastlib_fast_testsuite
+ juniper_testsuite
)
vespa_add_test(NAME juniper_queryparserTest_app COMMAND juniper_queryparserTest_app)
vespa_add_executable(juniper_matchobjectTest_app TEST
@@ -28,7 +34,7 @@ vespa_add_executable(juniper_matchobjectTest_app TEST
fakerewriter.cpp
DEPENDS
searchsummary
- fastlib_fast_testsuite
+ juniper_testsuite
)
vespa_add_test(NAME juniper_matchobjectTest_app COMMAND juniper_matchobjectTest_app)
vespa_add_executable(juniper_appender_test_app TEST
@@ -36,7 +42,6 @@ vespa_add_executable(juniper_appender_test_app TEST
appender_test.cpp
DEPENDS
searchsummary
- fastlib_fast_testsuite
)
vespa_add_test(NAME juniper_appender_test_app COMMAND juniper_appender_test_app)
vespa_add_executable(juniper_queryvisitor_test_app TEST
@@ -44,7 +49,6 @@ vespa_add_executable(juniper_queryvisitor_test_app TEST
queryvisitor_test.cpp
DEPENDS
searchsummary
- fastlib_fast_testsuite
)
vespa_add_test(NAME juniper_queryvisitor_test_app COMMAND juniper_queryvisitor_test_app)
vespa_add_executable(juniper_auxTest_app TEST
@@ -54,7 +58,7 @@ vespa_add_executable(juniper_auxTest_app TEST
testenv.cpp
DEPENDS
searchsummary
- fastlib_fast_testsuite
+ juniper_testsuite
)
vespa_add_test(NAME juniper_auxTest_app COMMAND juniper_auxTest_app)
vespa_add_executable(juniper_SrcTestSuite_app TEST
@@ -68,6 +72,13 @@ vespa_add_executable(juniper_SrcTestSuite_app TEST
testenv.cpp
DEPENDS
searchsummary
- fastlib_fast_testsuite
+ juniper_testsuite
)
vespa_add_test(NAME juniper_SrcTestSuite_app COMMAND juniper_SrcTestSuite_app)
+vespa_add_executable(juniper_latintokenizertest_app TEST
+ SOURCES
+ latintokenizertest.cpp
+ DEPENDS
+ juniper_testsuite
+)
+vespa_add_test(NAME juniper_latintokenizertest_app NO_VALGRIND COMMAND juniper_latintokenizertest_app)
diff --git a/searchsummary/src/tests/juniper/SrcTestSuite.cpp b/searchsummary/src/tests/juniper/SrcTestSuite.cpp
index c1e4dc2cd19..870c7b9f378 100644
--- a/searchsummary/src/tests/juniper/SrcTestSuite.cpp
+++ b/searchsummary/src/tests/juniper/SrcTestSuite.cpp
@@ -1,12 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "testenv.h"
+#include "suite.h"
#include "mcandTest.h"
#include "queryparserTest.h"
#include "matchobjectTest.h"
#include "auxTest.h"
#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/fastlib/testsuite/suite.h>
/**
* The SrcTestSuite class runs all the unit tests for the src module.
*
diff --git a/searchsummary/src/tests/juniper/auxTest.h b/searchsummary/src/tests/juniper/auxTest.h
index dd6d79e024a..9ff391911b3 100644
--- a/searchsummary/src/tests/juniper/auxTest.h
+++ b/searchsummary/src/tests/juniper/auxTest.h
@@ -3,9 +3,9 @@
// Auxiliary tests for juniper - based on Juniper 1.x proximitytest.cpp
-#include <map>
-#include <vespa/fastlib/testsuite/test.h>
#include "testenv.h"
+#include "test.h"
+#include <map>
class AuxTest : public Test
{
diff --git a/searchsummary/src/tests/juniper/latintokenizertest.cpp b/searchsummary/src/tests/juniper/latintokenizertest.cpp
new file mode 100644
index 00000000000..89273ab1ec0
--- /dev/null
+++ b/searchsummary/src/tests/juniper/latintokenizertest.cpp
@@ -0,0 +1,9 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "latintokenizertest.h"
+
+int main(int, char **) {
+ LatinTokenizerTest lta;
+ lta.SetStream(&std::cout);
+ lta.Run();
+ return lta.Report();
+}
diff --git a/searchsummary/src/tests/juniper/latintokenizertest.h b/searchsummary/src/tests/juniper/latintokenizertest.h
new file mode 100644
index 00000000000..b4d113918ee
--- /dev/null
+++ b/searchsummary/src/tests/juniper/latintokenizertest.h
@@ -0,0 +1,450 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "test.h"
+#include <vespa/juniper/latintokenizer.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+class Mapel_Pucntuation {
+private:
+ /** Member variables. */
+ static bool *_lookup;
+public:
+
+ /** Constructors */
+ Mapel_Pucntuation();
+
+ /** Punctuation predicate. */
+ bool operator()(char c) const {
+ return _lookup[static_cast<unsigned char>(c)];
+ }
+
+};
+
+class Maple_Space {
+private:
+
+ /** Member variables. */
+ static bool *_lookup;
+
+public:
+
+ /** Constructors */
+ Maple_Space();
+
+ /** Space predicate. */
+ bool operator()(char c) const {
+ return _lookup[static_cast<unsigned char>(c)];
+ }
+
+};
+
+bool *Maple_Space::_lookup = NULL;
+bool *Mapel_Pucntuation::_lookup = NULL;
+
+Mapel_Pucntuation::Mapel_Pucntuation() {
+
+ // Initialize lookup table.
+ if (_lookup == NULL) {
+
+ _lookup = new bool[256];
+
+ for (unsigned int i = 0; i < 256; ++i) {
+ _lookup[i] = false;
+ }
+
+ _lookup[static_cast<unsigned char>('.')] = true;
+ _lookup[static_cast<unsigned char>(',')] = true;
+ _lookup[static_cast<unsigned char>(':')] = true;
+ _lookup[static_cast<unsigned char>(';')] = true;
+ _lookup[static_cast<unsigned char>('|')] = true;
+ _lookup[static_cast<unsigned char>('!')] = true;
+ _lookup[static_cast<unsigned char>('?')] = true;
+ _lookup[static_cast<unsigned char>('@')] = true;
+ _lookup[static_cast<unsigned char>('/')] = true;
+ _lookup[static_cast<unsigned char>('(')] = true;
+ _lookup[static_cast<unsigned char>(')')] = true;
+ _lookup[static_cast<unsigned char>('[')] = true;
+ _lookup[static_cast<unsigned char>(']')] = true;
+ _lookup[static_cast<unsigned char>('{')] = true;
+ _lookup[static_cast<unsigned char>('}')] = true;
+ _lookup[static_cast<unsigned char>('<')] = true;
+ _lookup[static_cast<unsigned char>('>')] = true;
+ _lookup[static_cast<unsigned char>('*')] = true;
+ _lookup[static_cast<unsigned char>('=')] = true;
+ _lookup[static_cast<unsigned char>('%')] = true;
+ _lookup[static_cast<unsigned char>('\\')] = true;
+
+ }
+
+}
+
+Maple_Space::Maple_Space() {
+
+ // Initialize lookup table.
+ if (_lookup == NULL) {
+
+ _lookup = new bool[256];
+
+ for (unsigned int i = 0; i < 256; ++i) {
+ _lookup[i] = false;
+ }
+
+ _lookup[static_cast<unsigned char>(' ')] = true;
+ _lookup[static_cast<unsigned char>('\n')] = true;
+ _lookup[static_cast<unsigned char>('\t')] = true;
+ _lookup[static_cast<unsigned char>('\r')] = true;
+ _lookup[static_cast<unsigned char>('"')] = true;
+ _lookup[static_cast<unsigned char>('\'')] = true;
+ _lookup[static_cast<unsigned char>('`')] = true;
+ _lookup[static_cast<unsigned char>('_')] = true;
+
+ }
+}
+
+class LatinTokenizerTest : public Test
+{
+private:
+ void TestSimple();
+ void TestSimpleLength();
+ void TestEnding();
+ void TestEndingLength();
+ void TestNull();
+ void TestNullLength();
+ void TestEmpty();
+ void TestEmptyLength();
+ void TestMapelURL();
+
+ template <typename IsSeparator, typename IsPunctuation>
+ void TestWord(Fast_LatinTokenizer<IsSeparator,IsPunctuation>* lt,
+ const char* correct,
+ bool punct = false)
+ {
+ typename Fast_LatinTokenizer<IsSeparator,IsPunctuation>::Fast_Token token;
+ _test(lt->MoreTokens());
+
+ token = lt->GetNextToken();
+ char temp = *token.second;
+ *token.second = '\0';
+ vespalib::string word = vespalib::make_string("%s", token.first);
+ *token.second = temp;
+
+ PushDesc(vespalib::make_string("%s%s == %s", "word: ", word.c_str(), correct).c_str());
+
+ _test(word == correct);
+
+ _test(token._punctuation == punct);
+
+ PopDesc();
+ }
+
+ void TestTypeparamObservers();
+
+public:
+ LatinTokenizerTest();
+ ~LatinTokenizerTest();
+ void Run() override;
+};
+
+
+LatinTokenizerTest::LatinTokenizerTest()
+{
+
+}
+
+
+LatinTokenizerTest::~LatinTokenizerTest()
+{
+
+}
+
+
+void LatinTokenizerTest::TestSimple()
+{
+ PushDesc("Simple");
+
+ Fast_SimpleLatinTokenizer lt;
+ std::string s("This is. my . test String.");
+ lt.SetNewText(const_cast<char*>(s.c_str()));
+
+ PushDesc("This");
+ TestWord(&lt, "This");
+ PopDesc();
+ PushDesc("is");
+ TestWord(&lt, "is");
+ PopDesc();
+ PushDesc(".");
+ TestWord(&lt, ".", true);
+ PopDesc();
+ PushDesc("my");
+ TestWord(&lt, "my");
+ PopDesc();
+ PushDesc(".");
+ TestWord(&lt, ".", true);
+ PopDesc();
+ PushDesc("test");
+ TestWord(&lt, "test");
+ PopDesc();
+ PushDesc("String");
+ TestWord(&lt, "String");
+ PopDesc();
+ PushDesc(".");
+ TestWord(&lt, ".", true);
+ PopDesc();
+
+ _test(!lt.MoreTokens());
+
+ PopDesc();
+}
+
+
+
+void LatinTokenizerTest::TestSimpleLength()
+{
+ PushDesc("Simple");
+
+ Fast_SimpleLatinTokenizer lt;
+ std::string s("This is. my . test String.");
+ lt.SetNewText(const_cast<char*>(s.c_str()),
+ s.length());
+
+ PushDesc("This");
+ TestWord(&lt, "This");
+ PopDesc();
+ PushDesc("is");
+ TestWord(&lt, "is");
+ PopDesc();
+ PushDesc(".");
+ TestWord(&lt, ".", true);
+ PopDesc();
+ PushDesc("my");
+ TestWord(&lt, "my");
+ PopDesc();
+ PushDesc(".");
+ TestWord(&lt, ".", true);
+ PopDesc();
+ PushDesc("test");
+ TestWord(&lt, "test");
+ PopDesc();
+ PushDesc("String");
+ TestWord(&lt, "String");
+ PopDesc();
+ PushDesc(".");
+ TestWord(&lt, ".", true);
+ PopDesc();
+
+ _test(!lt.MoreTokens());
+
+ PopDesc();
+}
+
+
+
+void LatinTokenizerTest::TestEnding()
+{
+ PushDesc("Ending\n");
+
+ std::string text("This is my test String ");
+ Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()));
+
+ TestWord(lt, "This");
+ TestWord(lt, "is");
+ TestWord(lt, "my");
+ TestWord(lt, "test");
+ TestWord(lt, "String");
+
+ _test(!lt->MoreTokens());
+
+ _test(text == lt->GetOriginalText());
+
+ delete lt;
+
+ PopDesc();
+}
+
+void LatinTokenizerTest::TestEndingLength()
+{
+ PushDesc("Ending\n");
+
+ std::string text("This is my test String ");
+ Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()),
+ text.length());
+
+ TestWord(lt, "This");
+ TestWord(lt, "is");
+ TestWord(lt, "my");
+ TestWord(lt, "test");
+ TestWord(lt, "String");
+
+ _test(!lt->MoreTokens());
+
+ _test(text == std::string(lt->GetOriginalText()));
+
+ delete lt;
+
+ PopDesc();
+}
+
+void LatinTokenizerTest::TestNull()
+{
+ PushDesc("Null\n");
+
+ Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(NULL);
+
+ _test(!lt->MoreTokens());
+
+ _test(lt->GetOriginalText() == NULL);
+
+ delete lt;
+
+ PopDesc();
+}
+
+void LatinTokenizerTest::TestNullLength()
+{
+ PushDesc("Null\n");
+
+ Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(NULL, 0);
+
+ _test(!lt->MoreTokens());
+
+ _test(lt->GetOriginalText() == NULL);
+
+ delete lt;
+
+ PopDesc();
+}
+
+void LatinTokenizerTest::TestEmpty()
+{
+ PushDesc("Empty\n");
+
+ std::string text(" ");
+ Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()));
+
+ _test(!lt->MoreTokens());
+
+ delete lt;
+
+ PopDesc();
+}
+
+void LatinTokenizerTest::TestEmptyLength()
+{
+ PushDesc("Empty\n");
+
+ std::string text(" ");
+ Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()),
+ text.length());
+
+ _test(!lt->MoreTokens());
+
+ delete lt;
+
+ PopDesc();
+}
+
+
+class TPS
+{
+private:
+ TPS(const TPS &);
+ TPS& operator=(const TPS &);
+
+public:
+ TPS() : _myfunc(NULL) {}
+ void Init(int (*myfunc)(int c))
+ {
+ _myfunc = myfunc;
+ }
+
+ bool operator()(char c)
+ {
+// LatinTokenizerTest::_test(_myfunc);
+ return (_myfunc(c) != 0);
+ }
+
+private:
+ int (*_myfunc)(int c);
+};
+
+void LatinTokenizerTest::TestTypeparamObservers()
+{
+
+ typedef Fast_LatinTokenizer<TPS,TPS> MyTokenizer;
+
+ PushDesc("TypeparamObservers\n");
+ std::string text("4Some6text");
+ MyTokenizer* tok = new MyTokenizer(const_cast<char*>(text.c_str()));
+ tok->GetIsPunctuation().Init(ispunct);
+ tok->GetIsSeparator().Init(isdigit);
+
+ TestWord(tok,"Some");
+ TestWord(tok,"text");
+ _test(!tok->MoreTokens());
+ PopDesc();
+
+ delete tok;
+}
+
+void LatinTokenizerTest::TestMapelURL()
+{
+
+ typedef Fast_LatinTokenizer<Maple_Space, Mapel_Pucntuation> MyTokenizer;
+
+ PushDesc("MapelURL\n");
+ std::string text("http://search.msn.co.uk/results.asp?q= cfg=SMCBROWSE rn=1825822 dp=1873075 v=166:");
+ MyTokenizer* tok = new MyTokenizer(const_cast<char*>(text.c_str()));
+
+ TestWord(tok,"http", false);
+ TestWord(tok,":", true);
+ TestWord(tok,"/", true);
+ TestWord(tok,"/", true);
+ TestWord(tok,"search", false);
+ TestWord(tok,".", true);
+ TestWord(tok,"msn", false);
+ TestWord(tok,".", true);
+ TestWord(tok,"co", false);
+ TestWord(tok,".", true);
+ TestWord(tok,"uk", false);
+ TestWord(tok,"/", true);
+ TestWord(tok,"results", false);
+ TestWord(tok,".", true);
+ TestWord(tok,"asp", false);
+ TestWord(tok,"?", true);
+ TestWord(tok,"q", false);
+ TestWord(tok,"=", true);
+ TestWord(tok,"cfg", false);
+ TestWord(tok,"=", true);
+ TestWord(tok,"SMCBROWSE", false);
+ TestWord(tok,"rn", false);
+ TestWord(tok,"=", true);
+ TestWord(tok,"1825822", false);
+ TestWord(tok,"dp", false);
+ TestWord(tok,"=", true);
+ TestWord(tok,"1873075", false);
+ TestWord(tok,"v", false);
+ TestWord(tok,"=", true);
+ TestWord(tok,"166", false);
+ TestWord(tok,":", true);
+ _test(!tok->MoreTokens());
+ PopDesc();
+
+ delete tok;
+}
+
+
+
+void LatinTokenizerTest::Run()
+{
+ TestSimple();
+ TestSimpleLength();
+ TestEnding();
+ TestEndingLength();
+ TestNull();
+ TestNullLength();
+ TestEmpty();
+ TestEmptyLength();
+ TestTypeparamObservers();
+ TestMapelURL();
+}
diff --git a/searchsummary/src/tests/juniper/matchobjectTest.h b/searchsummary/src/tests/juniper/matchobjectTest.h
index 5bfd29a371f..9fdd3e4719f 100644
--- a/searchsummary/src/tests/juniper/matchobjectTest.h
+++ b/searchsummary/src/tests/juniper/matchobjectTest.h
@@ -5,7 +5,7 @@
#pragma once
#include "testenv.h"
-#include <vespa/fastlib/testsuite/test.h>
+#include "test.h"
#include <map>
/**
diff --git a/searchsummary/src/tests/juniper/mcandTest.h b/searchsummary/src/tests/juniper/mcandTest.h
index cdb01e91e3b..6eb8b4d66e5 100644
--- a/searchsummary/src/tests/juniper/mcandTest.h
+++ b/searchsummary/src/tests/juniper/mcandTest.h
@@ -4,10 +4,10 @@
*/
#pragma once
-#include <map>
-#include <vespa/fastlib/testsuite/test.h>
#include "testenv.h"
+#include "test.h"
#include <vespa/juniper/mcand.h>
+#include <map>
/**
* The MatchCandidateTest class holds
diff --git a/searchsummary/src/tests/juniper/queryparserTest.h b/searchsummary/src/tests/juniper/queryparserTest.h
index 7dc4dda63fa..803fbd4c999 100644
--- a/searchsummary/src/tests/juniper/queryparserTest.h
+++ b/searchsummary/src/tests/juniper/queryparserTest.h
@@ -5,9 +5,9 @@
#pragma once
#include "testenv.h"
+#include "test.h"
#include <vespa/juniper/queryparser.h>
#include <vespa/juniper/rewriter.h>
-#include <vespa/fastlib/testsuite/test.h>
#include <map>
/**
diff --git a/searchsummary/src/tests/juniper/suite.h b/searchsummary/src/tests/juniper/suite.h
new file mode 100644
index 00000000000..fea685731ae
--- /dev/null
+++ b/searchsummary/src/tests/juniper/suite.h
@@ -0,0 +1,264 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**************************************************************************
+ * Author: Bård Kvalheim
+ *
+ * A test suite. Modified from the suite written by Chuck Allison.
+ * http://www.cuj.com/archive/1809/feature.html
+ *
+ * Licensed to Yahoo, and relicensed under the terms of the Apache 2.0 license
+ *
+ * The usage of suite is simple:
+ *
+ * mysuite.h:
+ * -----
+ *
+ * #include <iosfwd>
+ * #include <vespa/fastlib/testsuite/suite.h>
+ *
+ * class MySuite : public Suite
+ * {
+ * public:
+ * MySuite() :
+ * Suite("My test suite. ", &cout)
+ * {
+ * AddTest(new MyTest1());
+ * AddTest(new MyTest2());
+ * }
+ * };
+ *
+ *
+ *
+ * class MySuiteApp
+ * {
+ * public:
+ * int Main();
+ * };
+ *
+ *
+ * ---
+ *
+ * mysuite.cpp:
+ * -----
+ *
+ * #include "mysuite.h"
+ *
+ *
+ * int MyTestApp::Main() {
+ * MyTestSuite mts;
+ * mts.Run();
+ * mts.Report();
+ * mts.Free();
+ * }
+ *
+ * ---
+ *
+ **************************************************************************/
+
+#pragma once
+
+#include "test.h" // includes <string>, <iosfwd>
+
+
+#include <vector>
+
+#include <iostream>
+#include <stdexcept>
+#include <cassert>
+
+
+namespace fast::testsuite {
+
+class TestSuiteError;
+
+class Suite
+{
+public:
+ Suite(const std::string& name, std::ostream* osptr = 0);
+
+ std::string GetName() const;
+ long GetNumPassed() const;
+ long GetNumFailed() const;
+ const std::ostream* GetStream() const;
+ void SetStream(std::ostream* osptr);
+
+ void AddTest(Test* t); //throw (TestSuiteError);
+ void AddSuite(const Suite&); //throw(TestSuiteError);
+ void Run(); // Calls Test::run() repeatedly
+ long Report() const;
+ void Free(); // deletes tests
+ virtual ~Suite(void) { }
+
+private:
+ std::string m_name;
+ std::ostream* m_osptr;
+ std::vector<Test*> m_tests;
+ void Reset();
+ int GetLongestName() const;
+
+ // Disallowed ops:
+ Suite(const Suite&);
+ Suite& operator=(const Suite&);
+};
+
+inline
+Suite::Suite(const std::string& name, std::ostream* osptr)
+ : m_name(name),
+ m_osptr(osptr),
+ m_tests()
+{
+}
+
+inline
+std::string Suite::GetName() const
+{
+ return m_name;
+}
+
+inline
+const std::ostream* Suite::GetStream() const
+{
+ return m_osptr;
+}
+
+inline
+void Suite::SetStream(std::ostream* osptr)
+{
+ m_osptr = osptr;
+}
+
+
+/*class TestSuiteError : public logic_error
+ {
+ public:
+ TestSuiteError(const std::string& s = "")
+ : logic_error(s)
+ {}
+ };*/
+
+void Suite::AddTest(Test* t) //throw(TestSuiteError)
+{
+ // Make sure test has a stream:
+ if (t == 0) {}
+ //throw TestSuiteError("Null test in Suite::addTest");
+ else if (m_osptr != 0 && t->GetStream() == 0)
+ t->SetStream(m_osptr);
+
+ m_tests.push_back(t);
+ t->Reset();
+}
+
+void Suite::AddSuite(const Suite& s) //throw(TestSuiteError)
+{
+ for (size_t i = 0; i < s.m_tests.size(); ++i)
+ AddTest(s.m_tests[i]);
+}
+
+void Suite::Free()
+{
+ // This is not a destructor because tests
+ // don't have to be on the heap.
+ for (size_t i = 0; i < m_tests.size(); ++i)
+ {
+ delete m_tests[i];
+ m_tests[i] = 0;
+ }
+}
+
+void Suite::Run()
+{
+ Reset();
+ int longestName = GetLongestName();
+ const char *nm;
+ int x = 0;
+ for (size_t i = 0; i < m_tests.size(); ++i) {
+ assert(m_tests[i]);
+ nm = m_tests[i]->get_name();
+ if (nm) {
+ *m_osptr << std::endl << nm << ": ";
+ for (x = longestName - strlen(nm); x > 0; --x)
+ *m_osptr << ' ';
+ *m_osptr << std::flush;
+ }
+ m_tests[i]->Run();
+ }
+}
+
+
+// Find the longest test name
+int Suite::GetLongestName() const
+{
+ int longestName = 0, len = 0;
+ const char *nm;
+ for (size_t i = 0; i < m_tests.size(); ++i) {
+ assert(m_tests[i]);
+ nm = m_tests[i]->get_name();
+ if ( nm != NULL && (len = strlen(nm)) > longestName )
+ longestName = len;
+ }
+ return longestName;
+}
+
+long Suite::Report() const
+{
+ if (m_osptr) {
+ int longestName = GetLongestName();
+ int lineLength = longestName + 8 + 16 + 10;
+ long totFail = 0;
+ int x = 0;
+ *m_osptr << std::endl << std::endl
+ << "Suite \"" << m_name << "\"" << std::endl;
+ for (x = 0; x < lineLength; ++x)
+ *m_osptr << '=';
+ *m_osptr << "=";
+
+ // Write the individual reports
+ for (size_t i = 0; i < m_tests.size(); ++i) {
+ assert(m_tests[i]);
+ const char *nm = m_tests[i]->get_name();
+ totFail += m_tests[i]->Report(longestName -
+ (nm ? strlen(nm) : longestName));
+ }
+
+ for (x = 0; x < lineLength; ++x)
+ *m_osptr << '=';
+ *m_osptr << "=\n";
+ return totFail;
+ }
+ else
+ return GetNumFailed();
+}
+
+long Suite::GetNumPassed() const
+{
+ long totPass = 0;
+ for (size_t i = 0; i < m_tests.size(); ++i)
+ {
+ assert(m_tests[i]);
+ totPass += m_tests[i]->GetNumPassed();
+ }
+ return totPass;
+}
+
+long Suite::GetNumFailed() const
+{
+ long totFail = 0;
+ for (size_t i = 0; i < m_tests.size(); ++i)
+ {
+ assert(m_tests[i]);
+ totFail += m_tests[i]->GetNumFailed();
+ }
+ return totFail;
+}
+
+void Suite::Reset()
+{
+ for (size_t i = 0; i < m_tests.size(); ++i)
+ {
+ assert(m_tests[i]);
+ m_tests[i]->Reset();
+ }
+}
+
+}
+
+using fast::testsuite::Suite;
diff --git a/searchsummary/src/tests/juniper/test.cpp b/searchsummary/src/tests/juniper/test.cpp
new file mode 100644
index 00000000000..18930b1bca2
--- /dev/null
+++ b/searchsummary/src/tests/juniper/test.cpp
@@ -0,0 +1,141 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "test.h"
+
+namespace fast::testsuite {
+
+Test::Test(std::ostream* osptr, const char*name) :
+ m_osptr(osptr),
+ name_(name),
+ m_nPass(0),
+ m_nFail(0),
+ m_index(0),
+ m_description()
+{
+ m_pchar[0]= '|';
+ m_pchar[1]= '-';
+}
+
+Test::Test(const char*name) :
+ Test(nullptr, name)
+{
+}
+
+const char *Test::get_name() const {
+ return (name_ == NULL) ? "Test " : name_;
+}
+
+const std::string& Test::GetSourceDirectory()
+{
+ static const std::string srcDir = [] () {
+ std::string dir(".");
+ const char* env = getenv("SOURCE_DIRECTORY");
+ if (env) {
+ dir = env;
+ }
+ if (*dir.rbegin() != '/') {
+ dir += "/";
+ }
+ return dir;
+ } ();
+ return srcDir;
+}
+
+long Test::GetNumPassed() const
+{
+ return m_nPass;
+}
+
+long Test::GetNumFailed() const
+{
+ return m_nFail;
+}
+
+const std::ostream* Test::GetStream() const
+{
+ return m_osptr;
+}
+
+void Test::SetStream(std::ostream* osptr)
+{
+ m_osptr = osptr;
+}
+
+void Test::_Succeed()
+{
+ ++m_nPass;
+}
+
+void Test::Reset()
+{
+ m_nPass = m_nFail = 0;
+}
+
+void Test::PushDesc(const std::string& desc)
+{
+ m_description.push_back(desc);
+}
+
+void Test::PopDesc()
+{
+ m_description.pop_back();
+}
+
+size_t Test::print_desc() const
+{
+ std::copy(m_description.begin(), m_description.end(),
+ std::ostream_iterator<std::string>(*m_osptr));
+ return m_description.size();
+}
+
+void Test::print_progress() {
+ ++m_index;
+ m_index = m_index % 2;
+ *m_osptr << '\b' <<'\b' <<'\b';
+ *m_osptr <<' ' << m_pchar[m_index] << ' ' << std::flush;
+}
+
+bool Test::do_fail(const std::string& lbl, const char* fname, long lineno,
+ bool addEndl)
+{
+ ++m_nFail;
+ if (m_osptr) {
+ *m_osptr << std::endl
+ << fname << ':' << lineno << ": "
+ << get_name() << " failure: (" << lbl << ")"
+ << std::endl;
+ if (addEndl && print_desc() > 0)
+ *m_osptr << std::endl << std::endl;
+ }
+ return false;
+}
+
+bool Test::do_test(bool cond, const std::string& lbl,
+ const char* fname, long lineno)
+{
+ if (!cond) {
+ return do_fail(lbl, fname, lineno);
+ }
+ else {
+ _Succeed();
+ print_progress();
+ return true;
+ }
+}
+
+long Test::Report(int padSpaces) const
+{
+ if (m_osptr) {
+ *m_osptr << std::endl << get_name();
+
+ // Pad the name with the given number of spaces
+ for (int i= 0; i < padSpaces; ++i) *m_osptr << ' ';
+
+ *m_osptr << "\tPassed: " << m_nPass
+ << "\tFailed: " << m_nFail
+ << std::endl;
+ }
+ return m_nFail;
+}
+
+}
diff --git a/searchsummary/src/tests/juniper/test.h b/searchsummary/src/tests/juniper/test.h
new file mode 100644
index 00000000000..1388c3ba812
--- /dev/null
+++ b/searchsummary/src/tests/juniper/test.h
@@ -0,0 +1,150 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**************************************************************************
+ * Author: Bård Kvalheim
+ *
+ * The test class of the testsuite. Written by Chuck Allison.
+ * http://www.cuj.com/archive/1809/feature.html
+ *
+ * Apart for a trick the usage of the test class is very simple:
+ *
+ * mytest.h:
+ * ----
+ * #include <iosfwd>
+ * #include <vespa/fastlib/testsuite/test.h>
+ *
+ * class MyTest : public Test
+ * {
+ * public:
+ * virtual void Run() {
+ * // do the tests _test is ok if the argument are true
+ * _test(expr);
+ * }
+ *
+ * };
+ *
+ * class MyTestApp
+ * {
+ * public:
+ * int Main();
+ * };
+ *
+ *
+ * ----
+ *
+ *
+ * mytest.cpp:
+ * ----
+ * #include "mytest.h"
+ *
+ * int MyTestApp::Main()
+ * {
+ * MyTest mt;
+ * mt.SetStream(&std::cout);
+ * mt.Run();
+ * mt.Report();
+ *
+ * return 0;
+ * }
+ *
+ *
+ * ----
+ *
+ * The trick is that the all the code except the main function is in
+ * the .h file. The reason for this is that it is simpler to integerate
+ * the single test into a suite of tests.
+ *
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+#include <iostream>
+#include <typeinfo>
+#include <vector>
+#include <algorithm>
+#include <iterator>
+
+// The following have underscores because they are macros
+// (and it's impolite to usurp other users' functions!).
+// For consistency, _succeed() also has an underscore.
+#define _test(cond) do_test((cond), #cond, __FILE__, __LINE__)
+#define _test_equal(lhs, rhs) \
+ do_equality_test((lhs), (rhs), #lhs, __FILE__, __LINE__)
+#define _fail(str) do_fail((str), __FILE__, __LINE__)
+
+namespace fast::testsuite {
+
+class Test
+{
+public:
+ explicit Test(std::ostream* osptr = 0, const char *name = NULL);
+ explicit Test(const char *name);
+ virtual ~Test(){}
+ virtual void Run() = 0;
+
+ const char *get_name() const;
+ static const std::string& GetSourceDirectory();
+ long GetNumPassed() const;
+ long GetNumFailed() const;
+ const std::ostream* GetStream() const;
+ void SetStream(std::ostream* osptr);
+
+ void _Succeed();
+ long Report(int padSpaces = 1) const;
+ virtual void Reset();
+
+ void PushDesc(const std::string& desc);
+ void PopDesc();
+
+protected:
+ std::ostream* m_osptr;
+ const char *name_;
+
+ bool do_test(bool cond, const std::string& lbl,
+ const char* fname, long lineno);
+ bool do_fail(const std::string& lbl, const char* fname, long lineno,
+ bool addEndl = true);
+ template <typename t1, typename t2>
+ bool do_equality_test(const t1& lhs, const t2& rhs,
+ const char* lbl, const char* fname, long lineno);
+ virtual void print_progress();
+
+private:
+ long m_nPass;
+ long m_nFail;
+ int m_index;
+ char m_pchar[4];
+
+ std::vector<std::string> m_description;
+
+ size_t print_desc() const;
+
+ // Disallowed:
+ Test(const Test&);
+ Test& operator=(const Test&);
+};
+
+template <typename t1, typename t2>
+bool Test::do_equality_test(const t1& lhs, const t2& rhs, const char* lbl,
+ const char* fname, long lineno)
+{
+ if (lhs == rhs) {
+ _Succeed();
+ print_progress();
+ return true;
+ }
+ do_fail(std::string(lbl), fname, lineno, false);
+ if (m_osptr) {
+ *m_osptr << "Equality test failed: "
+ << "Expected '" << rhs
+ << "' got '" << lhs << "'"
+ << std::endl;
+ if (print_desc() > 0)
+ *m_osptr << std::endl << std::endl;
+ }
+ return false;
+}
+
+}
+
+using fast::testsuite::Test;
diff --git a/searchsummary/src/vespa/juniper/config.cpp b/searchsummary/src/vespa/juniper/config.cpp
index b9213bb21f1..a82a8d74b8a 100644
--- a/searchsummary/src/vespa/juniper/config.cpp
+++ b/searchsummary/src/vespa/juniper/config.cpp
@@ -1,7 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "config.h"
-#include "IJuniperProperties.h"
#include "rpinterface.h"
#include "juniperdebug.h"
#define _NEED_SUMMARY_CONFIG_IMPL
@@ -14,7 +13,7 @@ namespace juniper
Config::Config(const char* config_name, Juniper & juniper) :
_docsumparams(),
_matcherparams(),
- _sumconf(NULL),
+ _sumconf(nullptr),
_config_name(config_name),
_juniper(juniper)
{
@@ -51,7 +50,7 @@ Config::Config(const char* config_name, Juniper & juniper) :
.SetMaxMatches(max_matches)
.SetSurroundMax(surround_max)
.SetFallback(fallback);
- _matcherparams.SetWantGlobalRank(true)
+ _matcherparams
.SetStemMinLength(stem_min).SetStemMaxExtend(stem_extend)
.SetMatchWindowSize(match_winsize)
.SetMaxMatchCandidates(max_match_candidates)
diff --git a/searchsummary/src/vespa/juniper/juniperparams.cpp b/searchsummary/src/vespa/juniper/juniperparams.cpp
index e600c23f7c4..4f25b2446ad 100644
--- a/searchsummary/src/vespa/juniper/juniperparams.cpp
+++ b/searchsummary/src/vespa/juniper/juniperparams.cpp
@@ -1,7 +1,5 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "juniperdebug.h"
#include "juniperparams.h"
-#include "Matcher.h"
#include <cstring>
// DocsumParams implementation:
@@ -9,7 +7,7 @@
DocsumParams::DocsumParams() :
_enabled(false), _length(256), _min_length(128), _max_matches(3),
- _surround_max(80), _space_chars(""), _fallback(FALLBACK_NONE)
+ _surround_max(80), _fallback(FALLBACK_NONE)
{ }
DocsumParams& DocsumParams::SetEnabled(bool en)
@@ -42,12 +40,6 @@ DocsumParams& DocsumParams::SetSurroundMax(size_t length)
return *this;
}
-DocsumParams& DocsumParams::SetSpaceChars(const char* spacechars)
-{
- _space_chars = spacechars;
- return *this;
-}
-
DocsumParams& DocsumParams::SetFallback(const char* fallback)
{
if (strcmp("prefix", fallback) == 0) {
@@ -63,7 +55,6 @@ size_t DocsumParams::MinLength() const { return _min_length; }
size_t DocsumParams::MaxMatches() const { return _max_matches; }
size_t DocsumParams::SurroundMax() const { return _surround_max; }
bool DocsumParams::Enabled() const { return _enabled; }
-const char* DocsumParams::SpaceChars() const { return _space_chars.c_str(); }
int DocsumParams::Fallback() const { return _fallback; }
// MatcherParams implementation:
@@ -71,54 +62,26 @@ int DocsumParams::Fallback() const { return _fallback; }
MatcherParams::MatcherParams() :
- _prefix_extend_length(3),
- _prefix_min_length(5),
_match_winsize(200),
_match_winsize_fallback_multiplier(10.0),
_max_match_candidates(1000),
- _want_global_rank(false),
_stem_min(0), _stem_extend(0),
_wordfolder(NULL), _proximity_factor(1.0)
{ }
-MatcherParams& MatcherParams::SetPrefixExtendLength(size_t extend_length)
-{
- _prefix_extend_length = extend_length;
- return *this;
-}
-
-MatcherParams& MatcherParams::SetPrefixMinLength(size_t min_length)
-{
- _prefix_min_length = min_length;
- return *this;
-}
-
-
MatcherParams& MatcherParams::SetMatchWindowSize(size_t winsize)
{
_match_winsize = winsize;
return *this;
}
-MatcherParams& MatcherParams::SetMatchWindowSizeFallbackMultiplier(double winsize)
-{
- _match_winsize_fallback_multiplier = winsize;
- return *this;
-}
-
MatcherParams& MatcherParams::SetMaxMatchCandidates(size_t max_match_candidates)
{
_max_match_candidates = max_match_candidates;
return *this;
}
-MatcherParams& MatcherParams::SetWantGlobalRank(bool global_rank)
-{
- _want_global_rank = global_rank;
- return *this;
-}
-
MatcherParams& MatcherParams::SetStemMinLength(size_t stem_min)
{
_stem_min = stem_min;
@@ -132,12 +95,9 @@ MatcherParams& MatcherParams::SetStemMaxExtend(size_t stem_extend)
return *this;
}
-size_t MatcherParams::PrefixExtendLength() const { return _prefix_extend_length; }
-size_t MatcherParams::PrefixMinLength() const { return _prefix_min_length; }
size_t MatcherParams::MatchWindowSize() const { return _match_winsize; }
double MatcherParams::MatchWindowSizeFallbackMultiplier() const { return _match_winsize_fallback_multiplier; }
size_t MatcherParams::MaxMatchCandidates() const { return _max_match_candidates; }
-bool MatcherParams::WantGlobalRank() const { return _want_global_rank; }
size_t MatcherParams::StemMinLength() const { return _stem_min; }
size_t MatcherParams::StemMaxExtend() const { return _stem_extend; }
diff --git a/searchsummary/src/vespa/juniper/juniperparams.h b/searchsummary/src/vespa/juniper/juniperparams.h
index 44980ce8b43..f4f17779f2d 100644
--- a/searchsummary/src/vespa/juniper/juniperparams.h
+++ b/searchsummary/src/vespa/juniper/juniperparams.h
@@ -31,9 +31,6 @@ public:
DocsumParams& SetSurroundMax(size_t length);
size_t SurroundMax() const;
- DocsumParams& SetSpaceChars(const char* spacechars);
- const char* SpaceChars() const;
-
DocsumParams& SetFallback(const char* fallback);
int Fallback() const;
@@ -43,7 +40,6 @@ private:
size_t _min_length;
size_t _max_matches;
size_t _surround_max;
- std::string _space_chars;
int _fallback;
};
@@ -52,25 +48,17 @@ class MatcherParams
{
public:
MatcherParams();
-
- MatcherParams& SetPrefixExtendLength(size_t extend_length);
- size_t PrefixExtendLength() const;
-
- MatcherParams& SetPrefixMinLength(size_t min_length);
- size_t PrefixMinLength() const;
+ MatcherParams(MatcherParams &) = delete;
+ MatcherParams &operator=(MatcherParams &) = delete;
MatcherParams& SetMatchWindowSize(size_t winsize);
size_t MatchWindowSize() const;
- MatcherParams& SetMatchWindowSizeFallbackMultiplier(double winsize);
double MatchWindowSizeFallbackMultiplier() const;
MatcherParams& SetMaxMatchCandidates(size_t max_match_candidates);
size_t MaxMatchCandidates() const;
- MatcherParams& SetWantGlobalRank(bool global_rank);
- bool WantGlobalRank() const;
-
MatcherParams& SetStemMinLength(size_t stem_min);
size_t StemMinLength() const;
@@ -84,19 +72,13 @@ public:
double ProximityFactor();
private:
- size_t _prefix_extend_length;
- size_t _prefix_min_length;
size_t _match_winsize;
double _match_winsize_fallback_multiplier;
size_t _max_match_candidates;
- bool _want_global_rank;
size_t _stem_min;
size_t _stem_extend;
Fast_WordFolder* _wordfolder; // The wordfolder object needed as 1st parameter to folderfun
double _proximity_factor;
-
- MatcherParams(MatcherParams &);
- MatcherParams &operator=(MatcherParams &);
};
diff --git a/searchsummary/src/vespa/juniper/latintokenizer.h b/searchsummary/src/vespa/juniper/latintokenizer.h
new file mode 100644
index 00000000000..7a98d780c56
--- /dev/null
+++ b/searchsummary/src/vespa/juniper/latintokenizer.h
@@ -0,0 +1,377 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+*****************************************************************************
+* @author Bård Kvalheim
+* @date Creation date: 2001-12-07
+*
+* A configurable tokenizer template that accepts two predicates: One to
+* determine separator symbols and one to determine punctuation symbols. A
+* typedef is defined that uses isspace/1 and ispunct/1.
+*
+* This tokenizer does not alter the text, and does not copy it.
+*
+* This tokenizer is not meant to be used as a real tokenizer for all
+* languages. It is only a fast and simple latin tokenizer, intended for
+* very basic applications.
+*
+* The tokens are returned as (char *, char *, bool) triples. The two
+* first elements delimit the token string, while the third element is
+* true if the token is a punctuation symbol.
+*
+* If the last character in the input text is a punctuation symbol, the last
+* token is the following:
+*
+* text = " something bl bla ."
+*
+* token.first -> .
+* token.second -> \0
+* token._punctuation = true;
+*
+* In other words, token.second can point to the terminating '\0' in the input
+* text.
+*
+*****************************************************************************/
+
+#pragma once
+
+#include <cctype>
+#include <cstring>
+
+/**
+*****************************************************************************
+* A simple tokenizer. See description above.
+*
+* @class Fast_LatinTokenizer
+* @author Bård Kvalheim
+* @date Creation date: 2001-12-07
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+class Fast_LatinTokenizer {
+private:
+ Fast_LatinTokenizer(const Fast_LatinTokenizer &);
+ Fast_LatinTokenizer& operator=(const Fast_LatinTokenizer &);
+
+public:
+
+ /** Helper class. */
+ class Fast_Token {
+ public:
+
+ /** Member variables. */
+ char *first; // Points to start of token. Named 'first' for std::pair compatibility.
+ char *second; // Points to end of token. Named 'second' for std::pair compatibility.
+ bool _punctuation; // Is the token a punctuation symbol?
+
+ /** Constructors. */
+ Fast_Token(char *begin, char *end, bool punctuation) : first(begin), second(end), _punctuation(punctuation) {}
+ Fast_Token() : first(NULL), second(NULL), _punctuation(false) {}
+ Fast_Token(const Fast_Token &other)
+ : first(other.first),
+ second(other.second),
+ _punctuation(other._punctuation)
+ {
+ }
+ Fast_Token& operator=(const Fast_Token &other)
+ {
+ first = other.first;
+ second = other.second;
+ _punctuation = other._punctuation;
+ return *this;
+ }
+
+ };
+
+ /** Constructors/destructor. */
+ Fast_LatinTokenizer();
+ explicit Fast_LatinTokenizer(char *text);
+ Fast_LatinTokenizer(char *text, size_t length);
+ virtual ~Fast_LatinTokenizer();
+
+ /** Constructors, sort of. */
+ void SetNewText(char *text);
+ void SetNewText(char *text, size_t length);
+
+ /** Are there any more tokens left? */
+ bool MoreTokens();
+
+ /** Return next token. */
+ Fast_Token GetNextToken();
+
+ /** Return text buffer. */
+ char *GetOriginalText();
+
+ /** Observers in case we need not perform some action specific
+ * to the IsSeparator or IsPunctuation implementations
+ * (such as extra initialization or statistics gathering or...)
+ */
+ IsPunctuation& GetIsPunctuation() { return _isPunctuation; }
+ IsSeparator& GetIsSeparator() { return _isSeparator; }
+
+private:
+
+ /** Member variables. */
+ char *_org; // Holds the original text buffer.
+ char *_next; // Points to the current buffer position.
+ char *_end; // Points to the end of the buffer.
+ bool _moreTokens; // More text to process?
+ IsSeparator _isSeparator; // Separator symbol predicate.
+ IsPunctuation _isPunctuation; // Punctuation symbol predicate.
+
+ /** Helper methods. */
+ void SkipBlanks();
+
+};
+
+/**
+*****************************************************************************
+* Default constructor.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer() :
+ _org(NULL),
+ _next(NULL),
+ _end(NULL),
+ _moreTokens(false),
+ _isSeparator(),
+ _isPunctuation()
+{
+}
+
+/**
+*****************************************************************************
+* Constructor. Accepts a '\0' terminated text buffer.
+*
+* @param text
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer(char *text) :
+ _org(NULL),
+ _next(NULL),
+ _end(NULL),
+ _moreTokens(false),
+ _isSeparator(),
+ _isPunctuation()
+{
+ SetNewText(text);
+}
+
+/**
+*****************************************************************************
+* Constructor. Accepts a text buffer and the buffer length
+*
+* @param text
+* @param length
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer(char *text, size_t length)
+ : _org(NULL),
+ _next(NULL),
+ _end(NULL),
+ _moreTokens(false),
+ _isSeparator(),
+ _isPunctuation()
+{
+ SetNewText(text, length);
+}
+
+/**
+*****************************************************************************
+* Destructor.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::~Fast_LatinTokenizer() {
+}
+
+/**
+*****************************************************************************
+* Sets a new '\0' terminated string.
+*
+* @param text
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+void
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SetNewText(char *text) {
+
+ _org = text;
+ _next = text;
+ _moreTokens = text != NULL;
+ _end = NULL;
+}
+
+/**
+*****************************************************************************
+* Sets a new string, given the text buffer and its length.
+*
+* @param text
+* @param length
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+void
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SetNewText(char *text, size_t length) {
+
+ _org = text;
+ _next = text;
+ _moreTokens = text != NULL;
+ _end = (_next ? _next + length : NULL);
+}
+
+/**
+*****************************************************************************
+* Skips all blanks and flags if there are more tokens.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+void
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SkipBlanks() {
+
+ if (!_moreTokens) return;
+ // Initialized with '\0' terminated buffer?
+ if (_end == NULL) {
+ while (*_next != '\0' && _isSeparator(*_next)) {
+ ++_next;
+ }
+ if (*_next == '\0') {
+ _moreTokens = false;
+ }
+ }
+
+ // Initialized with specified buffer length.
+ else {
+ while (_next != _end && _isSeparator(*_next)) {
+ ++_next;
+ }
+ if (_next == _end) {
+ _moreTokens = false;
+ }
+ }
+
+}
+
+/**
+*****************************************************************************
+* Returns true if there are more tokens left in the text buffer.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+bool
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::MoreTokens() {
+ SkipBlanks();
+ return _moreTokens;
+}
+
+/**
+*****************************************************************************
+* Returns the next token as a Fast_Token.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+typename Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_Token
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::GetNextToken() {
+
+ char *prev = _next;
+
+ // Skip all blanks and flag if there are no more tokens.
+ SkipBlanks();
+
+ // Initialized with '\0' terminated buffer? Find the next blank or punctuation.
+ if (_end == NULL) {
+ while (*_next != '\0' && !_isSeparator(*_next) && !_isPunctuation(*_next)) {
+ ++_next;
+ }
+
+ // Initialized with specified buffer length.
+ } else {
+ while (_next != _end && !_isSeparator(*_next) && !_isPunctuation(*_next)) {
+ ++_next;
+ }
+ }
+
+ // Check if this token is a punctuation symbol, and generate token.
+ bool isToken = ((_next - prev == 0) && _isPunctuation(*prev));
+
+ if (isToken) {
+ ++_next;
+ }
+
+ Fast_Token token(prev, _next, isToken);
+
+ return token;
+
+}
+
+/**
+*****************************************************************************
+* Returns the original text buffer.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+char *
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::GetOriginalText() {
+ return _org;
+}
+
+/**
+*****************************************************************************
+* Helper class.
+*
+* When using isspace/1, ensure that the argument is cast to unsigned char to
+* avoid problems with sign extension. See system documentation for details.
+*
+* @class Fast_IsSpace
+* @author Bård Kvalheim
+* @date Creation date: 2001-12-07
+*****************************************************************************/
+
+struct Fast_IsSpace {
+ bool operator()(char c) {return (isspace(static_cast<unsigned char>(c)) != 0);}
+};
+
+/**
+*****************************************************************************
+* Helper class.
+*
+* When using ispunct/1, ensure that the argument is cast to unsigned char to
+* avoid problems with sign extension. See system documentation for details.
+*
+* @class Fast_IsPunctuation
+* @author Bård Kvalheim
+* @date Creation date: 2001-12-07
+*****************************************************************************/
+
+struct Fast_IsPunctuation {
+ bool operator()(char c) {return (ispunct(static_cast<unsigned char>(c)) != 0);}
+};
+
+/**
+*****************************************************************************
+* A simple tokenizer. See description above.
+*
+* @class Fast_SimpleLatinTokenizer
+* @author Bård Kvalheim
+* @date Creation date: 2001-12-07
+*****************************************************************************/
+
+typedef Fast_LatinTokenizer<Fast_IsSpace, Fast_IsPunctuation> Fast_SimpleLatinTokenizer;
diff --git a/searchsummary/src/vespa/juniper/queryparser.h b/searchsummary/src/vespa/juniper/queryparser.h
index 5715daa3661..9c596892e31 100644
--- a/searchsummary/src/vespa/juniper/queryparser.h
+++ b/searchsummary/src/vespa/juniper/queryparser.h
@@ -1,13 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
-
/* Simple prefix syntax advanced query parser for Juniper debug/testing */
#include "query.h"
+#include "latintokenizer.h"
#include <map>
#include <string>
-#include <vespa/fastlib/text/latintokenizer.h>
namespace juniper
{
diff --git a/searchsummary/src/vespa/juniper/result.cpp b/searchsummary/src/vespa/juniper/result.cpp
index 653e692e015..15ad9aa2a98 100644
--- a/searchsummary/src/vespa/juniper/result.cpp
+++ b/searchsummary/src/vespa/juniper/result.cpp
@@ -80,12 +80,12 @@ Result::Result(Config* config, QueryHandle* qhandle,
}
/* Create the new pipeline */
- _tokenizer.reset(new JuniperTokenizer(wordfolder, NULL, 0, NULL));
+ _tokenizer = std::make_unique<JuniperTokenizer>(wordfolder, nullptr, 0, nullptr, nullptr);
- _matcher.reset(new Matcher(this));
+ _matcher = std::make_unique<Matcher>(this);
_matcher->SetProximityFactor(mp.ProximityFactor());
- _registry.reset(new SpecialTokenRegistry(_matcher->getQuery()));
+ _registry = std::make_unique<SpecialTokenRegistry>(_matcher->getQuery());
if (qhandle->_log_mask)
_matcher->set_log(qhandle->_log_mask);
diff --git a/searchsummary/src/vespa/juniper/rpinterface.cpp b/searchsummary/src/vespa/juniper/rpinterface.cpp
index 75a441fb957..f9e91073a9b 100644
--- a/searchsummary/src/vespa/juniper/rpinterface.cpp
+++ b/searchsummary/src/vespa/juniper/rpinterface.cpp
@@ -94,12 +94,6 @@ void Juniper::FlushRewriters()
_modifier->FlushRewriters();
}
-void ReleaseConfig(Config*& config)
-{
- delete config;
- config = NULL;
-}
-
void ReleaseQueryHandle(QueryHandle*& handle)
{