Fold fastlib into vespalib and gc some unused code.

Also move some code only used by juniper up into juniper test module.
author: Henning Baldersheim <balder@yahoo-inc.com> 2022-05-21 13:31:10 +0000
committer: Henning Baldersheim <balder@yahoo-inc.com> 2022-05-21 14:29:19 +0000
commit: 58a7afd1bd0cd358a8d19bfefd3e0c2c32daecc1 (patch)
tree: 4ced08d5ed7c7020e3cfb516f135f885334ff27d /searchsummary
parent: 2c34544abef32f7da1c05a83a3648532afb53186 (diff)
20 files changed, 1426 insertions, 90 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index 84633bee262..d51e29dbd00 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -4,7 +4,6 @@ vespa_define_module(
     vespalog
     vespalib
     configdefinitions
-    fastlib_fast
     document
     searchlib
 
diff --git a/searchsummary/src/tests/juniper/.gitignore b/searchsummary/src/tests/juniper/.gitignore
index 46b307da632..09d6225ca26 100644
--- a/searchsummary/src/tests/juniper/.gitignore
+++ b/searchsummary/src/tests/juniper/.gitignore
@@ -14,3 +14,4 @@ juniper_auxTest_app
 juniper_matchobjectTest_app
 juniper_mcandTest_app
 juniper_queryparserTest_app
+juniper_latintokenizertest_app
diff --git a/searchsummary/src/tests/juniper/CMakeLists.txt b/searchsummary/src/tests/juniper/CMakeLists.txt
index d15e91f1f63..77e7052a429 100644
--- a/searchsummary/src/tests/juniper/CMakeLists.txt
+++ b/searchsummary/src/tests/juniper/CMakeLists.txt
@@ -1,4 +1,10 @@
 # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(juniper_testsuite
+    SOURCES
+    test.cpp
+    DEPENDS
+)
+
 vespa_add_executable(juniper_mcandTest_app TEST
     SOURCES
     mcandTest.cpp
@@ -6,7 +12,7 @@ vespa_add_executable(juniper_mcandTest_app TEST
     testenv.cpp
     DEPENDS
     searchsummary
-    fastlib_fast_testsuite
+    juniper_testsuite
 )
 vespa_add_test(NAME juniper_mcandTest_app COMMAND juniper_mcandTest_app)
 vespa_add_executable(juniper_queryparserTest_app TEST
@@ -17,7 +23,7 @@ vespa_add_executable(juniper_queryparserTest_app TEST
     testenv.cpp
     DEPENDS
     searchsummary
-    fastlib_fast_testsuite
+    juniper_testsuite
 )
 vespa_add_test(NAME juniper_queryparserTest_app COMMAND juniper_queryparserTest_app)
 vespa_add_executable(juniper_matchobjectTest_app TEST
@@ -28,7 +34,7 @@ vespa_add_executable(juniper_matchobjectTest_app TEST
     fakerewriter.cpp
     DEPENDS
     searchsummary
-    fastlib_fast_testsuite
+    juniper_testsuite
 )
 vespa_add_test(NAME juniper_matchobjectTest_app COMMAND juniper_matchobjectTest_app)
 vespa_add_executable(juniper_appender_test_app TEST
@@ -36,7 +42,6 @@ vespa_add_executable(juniper_appender_test_app TEST
     appender_test.cpp
     DEPENDS
     searchsummary
-    fastlib_fast_testsuite
 )
 vespa_add_test(NAME juniper_appender_test_app COMMAND juniper_appender_test_app)
 vespa_add_executable(juniper_queryvisitor_test_app TEST
@@ -44,7 +49,6 @@ vespa_add_executable(juniper_queryvisitor_test_app TEST
     queryvisitor_test.cpp
     DEPENDS
     searchsummary
-    fastlib_fast_testsuite
 )
 vespa_add_test(NAME juniper_queryvisitor_test_app COMMAND juniper_queryvisitor_test_app)
 vespa_add_executable(juniper_auxTest_app TEST
@@ -54,7 +58,7 @@ vespa_add_executable(juniper_auxTest_app TEST
     testenv.cpp
     DEPENDS
     searchsummary
-    fastlib_fast_testsuite
+    juniper_testsuite
 )
 vespa_add_test(NAME juniper_auxTest_app COMMAND juniper_auxTest_app)
 vespa_add_executable(juniper_SrcTestSuite_app TEST
@@ -68,6 +72,13 @@ vespa_add_executable(juniper_SrcTestSuite_app TEST
     testenv.cpp
     DEPENDS
     searchsummary
-    fastlib_fast_testsuite
+    juniper_testsuite
 )
 vespa_add_test(NAME juniper_SrcTestSuite_app COMMAND juniper_SrcTestSuite_app)
+vespa_add_executable(juniper_latintokenizertest_app TEST
+    SOURCES
+    latintokenizertest.cpp
+    DEPENDS
+    juniper_testsuite
+)
+vespa_add_test(NAME juniper_latintokenizertest_app NO_VALGRIND COMMAND juniper_latintokenizertest_app)
diff --git a/searchsummary/src/tests/juniper/SrcTestSuite.cpp b/searchsummary/src/tests/juniper/SrcTestSuite.cpp
index c1e4dc2cd19..870c7b9f378 100644
--- a/searchsummary/src/tests/juniper/SrcTestSuite.cpp
+++ b/searchsummary/src/tests/juniper/SrcTestSuite.cpp
@@ -1,12 +1,12 @@
 // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "testenv.h"
+#include "suite.h"
 #include "mcandTest.h"
 #include "queryparserTest.h"
 #include "matchobjectTest.h"
 #include "auxTest.h"
 #include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/fastlib/testsuite/suite.h>
 /**
  * The SrcTestSuite class runs all the unit tests for the src module.
  *
diff --git a/searchsummary/src/tests/juniper/auxTest.h b/searchsummary/src/tests/juniper/auxTest.h
index dd6d79e024a..9ff391911b3 100644
--- a/searchsummary/src/tests/juniper/auxTest.h
+++ b/searchsummary/src/tests/juniper/auxTest.h
@@ -3,9 +3,9 @@
 
 // Auxiliary tests for juniper - based on Juniper 1.x proximitytest.cpp
 
-#include <map>
-#include <vespa/fastlib/testsuite/test.h>
 #include "testenv.h"
+#include "test.h"
+#include <map>
 
 class AuxTest : public Test
 {
diff --git a/searchsummary/src/tests/juniper/latintokenizertest.cpp b/searchsummary/src/tests/juniper/latintokenizertest.cpp
new file mode 100644
index 00000000000..89273ab1ec0
--- /dev/null
+++ b/searchsummary/src/tests/juniper/latintokenizertest.cpp
@@ -0,0 +1,9 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "latintokenizertest.h"
+
+int main(int, char **) {
+    LatinTokenizerTest lta;
+    lta.SetStream(&std::cout);
+    lta.Run();
+    return lta.Report();
+}
diff --git a/searchsummary/src/tests/juniper/latintokenizertest.h b/searchsummary/src/tests/juniper/latintokenizertest.h
new file mode 100644
index 00000000000..b4d113918ee
--- /dev/null
+++ b/searchsummary/src/tests/juniper/latintokenizertest.h
@@ -0,0 +1,450 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "test.h"
+#include <vespa/juniper/latintokenizer.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+class Mapel_Pucntuation {
+private:
+    /** Member variables. */
+    static bool *_lookup;
+public:
+
+    /** Constructors */
+    Mapel_Pucntuation();
+
+    /** Punctuation predicate. */
+    bool operator()(char c) const {
+        return _lookup[static_cast<unsigned char>(c)];
+    }
+
+};
+
+class Maple_Space {
+private:
+
+    /** Member variables. */
+    static bool *_lookup;
+
+public:
+
+    /** Constructors */
+    Maple_Space();
+
+    /** Space predicate. */
+    bool operator()(char c) const {
+        return _lookup[static_cast<unsigned char>(c)];
+    }
+
+};
+
+bool *Maple_Space::_lookup       = NULL;
+bool *Mapel_Pucntuation::_lookup = NULL;
+
+Mapel_Pucntuation::Mapel_Pucntuation() {
+
+    // Initialize lookup table.
+    if (_lookup == NULL) {
+
+        _lookup = new bool[256];
+
+        for (unsigned int i = 0; i < 256; ++i) {
+            _lookup[i] = false;
+        }
+
+        _lookup[static_cast<unsigned char>('.')]  = true;
+        _lookup[static_cast<unsigned char>(',')]  = true;
+        _lookup[static_cast<unsigned char>(':')]  = true;
+        _lookup[static_cast<unsigned char>(';')]  = true;
+        _lookup[static_cast<unsigned char>('|')]  = true;
+        _lookup[static_cast<unsigned char>('!')]  = true;
+        _lookup[static_cast<unsigned char>('?')]  = true;
+        _lookup[static_cast<unsigned char>('@')]  = true;
+        _lookup[static_cast<unsigned char>('/')]  = true;
+        _lookup[static_cast<unsigned char>('(')]  = true;
+        _lookup[static_cast<unsigned char>(')')]  = true;
+        _lookup[static_cast<unsigned char>('[')]  = true;
+        _lookup[static_cast<unsigned char>(']')]  = true;
+        _lookup[static_cast<unsigned char>('{')]  = true;
+        _lookup[static_cast<unsigned char>('}')]  = true;
+        _lookup[static_cast<unsigned char>('<')]  = true;
+        _lookup[static_cast<unsigned char>('>')]  = true;
+        _lookup[static_cast<unsigned char>('*')]  = true;
+        _lookup[static_cast<unsigned char>('=')]  = true;
+        _lookup[static_cast<unsigned char>('%')]  = true;
+        _lookup[static_cast<unsigned char>('\\')] = true;
+
+    }
+
+}
+
+Maple_Space::Maple_Space() {
+
+    // Initialize lookup table.
+    if (_lookup == NULL) {
+
+        _lookup = new bool[256];
+
+        for (unsigned int i = 0; i < 256; ++i) {
+            _lookup[i] = false;
+        }
+
+        _lookup[static_cast<unsigned char>(' ')]  = true;
+        _lookup[static_cast<unsigned char>('\n')] = true;
+        _lookup[static_cast<unsigned char>('\t')] = true;
+        _lookup[static_cast<unsigned char>('\r')] = true;
+        _lookup[static_cast<unsigned char>('"')]  = true;
+        _lookup[static_cast<unsigned char>('\'')] = true;
+        _lookup[static_cast<unsigned char>('`')]  = true;
+        _lookup[static_cast<unsigned char>('_')]  = true;
+
+    }
+}
+
+class LatinTokenizerTest : public Test
+{
+private:
+    void TestSimple();
+    void TestSimpleLength();
+    void TestEnding();
+    void TestEndingLength();
+    void TestNull();
+    void TestNullLength();
+    void TestEmpty();
+    void TestEmptyLength();
+    void TestMapelURL();
+
+    template <typename IsSeparator, typename IsPunctuation>
+    void TestWord(Fast_LatinTokenizer<IsSeparator,IsPunctuation>* lt,
+                  const char* correct,
+                  bool punct = false)
+    {
+        typename Fast_LatinTokenizer<IsSeparator,IsPunctuation>::Fast_Token token;
+        _test(lt->MoreTokens());
+
+        token = lt->GetNextToken();
+        char temp = *token.second;
+        *token.second = '\0';
+        vespalib::string word = vespalib::make_string("%s", token.first);
+        *token.second = temp;
+
+        PushDesc(vespalib::make_string("%s%s == %s", "word: ", word.c_str(), correct).c_str());
+
+        _test(word == correct);
+
+        _test(token._punctuation == punct);
+
+        PopDesc();
+    }
+
+    void TestTypeparamObservers();
+
+public:
+    LatinTokenizerTest();
+    ~LatinTokenizerTest();
+    void Run() override;
+};
+
+
+LatinTokenizerTest::LatinTokenizerTest()
+{
+
+}
+
+
+LatinTokenizerTest::~LatinTokenizerTest()
+{
+
+}
+
+
+void LatinTokenizerTest::TestSimple()
+{
+    PushDesc("Simple");
+
+    Fast_SimpleLatinTokenizer lt;
+    std::string s("This is. my . test String.");
+    lt.SetNewText(const_cast<char*>(s.c_str()));
+
+    PushDesc("This");
+    TestWord(&lt, "This");
+    PopDesc();
+    PushDesc("is");
+    TestWord(&lt, "is");
+    PopDesc();
+    PushDesc(".");
+    TestWord(&lt, ".", true);
+    PopDesc();
+    PushDesc("my");
+    TestWord(&lt, "my");
+    PopDesc();
+    PushDesc(".");
+    TestWord(&lt, ".", true);
+    PopDesc();
+    PushDesc("test");
+    TestWord(&lt, "test");
+    PopDesc();
+    PushDesc("String");
+    TestWord(&lt, "String");
+    PopDesc();
+    PushDesc(".");
+    TestWord(&lt, ".", true);
+    PopDesc();
+
+    _test(!lt.MoreTokens());
+
+    PopDesc();
+}
+
+
+
+void LatinTokenizerTest::TestSimpleLength()
+{
+    PushDesc("Simple");
+
+    Fast_SimpleLatinTokenizer lt;
+    std::string s("This is. my . test String.");
+    lt.SetNewText(const_cast<char*>(s.c_str()),
+                  s.length());
+
+    PushDesc("This");
+    TestWord(&lt, "This");
+    PopDesc();
+    PushDesc("is");
+    TestWord(&lt, "is");
+    PopDesc();
+    PushDesc(".");
+    TestWord(&lt, ".", true);
+    PopDesc();
+    PushDesc("my");
+    TestWord(&lt, "my");
+    PopDesc();
+    PushDesc(".");
+    TestWord(&lt, ".", true);
+    PopDesc();
+    PushDesc("test");
+    TestWord(&lt, "test");
+    PopDesc();
+    PushDesc("String");
+    TestWord(&lt, "String");
+    PopDesc();
+    PushDesc(".");
+    TestWord(&lt, ".", true);
+    PopDesc();
+
+    _test(!lt.MoreTokens());
+
+    PopDesc();
+}
+
+
+
+void LatinTokenizerTest::TestEnding()
+{
+    PushDesc("Ending\n");
+
+    std::string text("This is   my test String ");
+    Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()));
+
+    TestWord(lt, "This");
+    TestWord(lt, "is");
+    TestWord(lt, "my");
+    TestWord(lt, "test");
+    TestWord(lt, "String");
+
+    _test(!lt->MoreTokens());
+
+    _test(text == lt->GetOriginalText());
+
+    delete lt;
+
+    PopDesc();
+}
+
+void LatinTokenizerTest::TestEndingLength()
+{
+    PushDesc("Ending\n");
+
+    std::string text("This is   my test String ");
+    Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()),
+                                                                  text.length());
+
+    TestWord(lt, "This");
+    TestWord(lt, "is");
+    TestWord(lt, "my");
+    TestWord(lt, "test");
+    TestWord(lt, "String");
+
+    _test(!lt->MoreTokens());
+
+    _test(text == std::string(lt->GetOriginalText()));
+
+    delete lt;
+
+    PopDesc();
+}
+
+void LatinTokenizerTest::TestNull()
+{
+    PushDesc("Null\n");
+
+    Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(NULL);
+
+    _test(!lt->MoreTokens());
+
+    _test(lt->GetOriginalText() == NULL);
+
+    delete lt;
+
+    PopDesc();
+}
+
+void LatinTokenizerTest::TestNullLength()
+{
+    PushDesc("Null\n");
+
+    Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(NULL, 0);
+
+    _test(!lt->MoreTokens());
+
+    _test(lt->GetOriginalText() == NULL);
+
+    delete lt;
+
+    PopDesc();
+}
+
+void LatinTokenizerTest::TestEmpty()
+{
+    PushDesc("Empty\n");
+
+    std::string text(" ");
+    Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()));
+
+    _test(!lt->MoreTokens());
+
+    delete lt;
+
+    PopDesc();
+}
+
+void LatinTokenizerTest::TestEmptyLength()
+{
+    PushDesc("Empty\n");
+
+    std::string text(" ");
+    Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()),
+                                                                  text.length());
+
+    _test(!lt->MoreTokens());
+
+    delete lt;
+
+    PopDesc();
+}
+
+
+class TPS
+{
+private:
+    TPS(const TPS &);
+    TPS& operator=(const TPS &);
+
+public:
+    TPS() : _myfunc(NULL) {}
+    void Init(int (*myfunc)(int c))
+    {
+        _myfunc = myfunc;
+    }
+
+    bool operator()(char c)
+    {
+//      LatinTokenizerTest::_test(_myfunc);
+        return (_myfunc(c) != 0);
+    }
+
+private:
+    int (*_myfunc)(int c);
+};
+
+void LatinTokenizerTest::TestTypeparamObservers()
+{
+
+    typedef Fast_LatinTokenizer<TPS,TPS> MyTokenizer;
+
+    PushDesc("TypeparamObservers\n");
+    std::string text("4Some6text");
+    MyTokenizer* tok = new MyTokenizer(const_cast<char*>(text.c_str()));
+    tok->GetIsPunctuation().Init(ispunct);
+    tok->GetIsSeparator().Init(isdigit);
+
+    TestWord(tok,"Some");
+    TestWord(tok,"text");
+    _test(!tok->MoreTokens());
+    PopDesc();
+
+    delete tok;
+}
+
+void LatinTokenizerTest::TestMapelURL()
+{
+
+    typedef Fast_LatinTokenizer<Maple_Space, Mapel_Pucntuation> MyTokenizer;
+
+    PushDesc("MapelURL\n");
+    std::string text("http://search.msn.co.uk/results.asp?q= cfg=SMCBROWSE rn=1825822 dp=1873075 v=166:");
+    MyTokenizer* tok = new MyTokenizer(const_cast<char*>(text.c_str()));
+
+    TestWord(tok,"http", false);
+    TestWord(tok,":", true);
+    TestWord(tok,"/", true);
+    TestWord(tok,"/", true);
+    TestWord(tok,"search", false);
+    TestWord(tok,".", true);
+    TestWord(tok,"msn", false);
+    TestWord(tok,".", true);
+    TestWord(tok,"co", false);
+    TestWord(tok,".", true);
+    TestWord(tok,"uk", false);
+    TestWord(tok,"/", true);
+    TestWord(tok,"results", false);
+    TestWord(tok,".", true);
+    TestWord(tok,"asp", false);
+    TestWord(tok,"?", true);
+    TestWord(tok,"q", false);
+    TestWord(tok,"=", true);
+    TestWord(tok,"cfg", false);
+    TestWord(tok,"=", true);
+    TestWord(tok,"SMCBROWSE", false);
+    TestWord(tok,"rn", false);
+    TestWord(tok,"=", true);
+    TestWord(tok,"1825822", false);
+    TestWord(tok,"dp", false);
+    TestWord(tok,"=", true);
+    TestWord(tok,"1873075", false);
+    TestWord(tok,"v", false);
+    TestWord(tok,"=", true);
+    TestWord(tok,"166", false);
+    TestWord(tok,":", true);
+    _test(!tok->MoreTokens());
+    PopDesc();
+
+    delete tok;
+}
+
+
+
+void LatinTokenizerTest::Run()
+{
+    TestSimple();
+    TestSimpleLength();
+    TestEnding();
+    TestEndingLength();
+    TestNull();
+    TestNullLength();
+    TestEmpty();
+    TestEmptyLength();
+    TestTypeparamObservers();
+    TestMapelURL();
+}
diff --git a/searchsummary/src/tests/juniper/matchobjectTest.h b/searchsummary/src/tests/juniper/matchobjectTest.h
index 5bfd29a371f..9fdd3e4719f 100644
--- a/searchsummary/src/tests/juniper/matchobjectTest.h
+++ b/searchsummary/src/tests/juniper/matchobjectTest.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include "testenv.h"
-#include <vespa/fastlib/testsuite/test.h>
+#include "test.h"
 #include <map>
 
 /**
diff --git a/searchsummary/src/tests/juniper/mcandTest.h b/searchsummary/src/tests/juniper/mcandTest.h
index cdb01e91e3b..6eb8b4d66e5 100644
--- a/searchsummary/src/tests/juniper/mcandTest.h
+++ b/searchsummary/src/tests/juniper/mcandTest.h
@@ -4,10 +4,10 @@
  */
 #pragma once
 
-#include <map>
-#include <vespa/fastlib/testsuite/test.h>
 #include "testenv.h"
+#include "test.h"
 #include <vespa/juniper/mcand.h>
+#include <map>
 
 /**
  * The MatchCandidateTest class holds
diff --git a/searchsummary/src/tests/juniper/queryparserTest.h b/searchsummary/src/tests/juniper/queryparserTest.h
index 7dc4dda63fa..803fbd4c999 100644
--- a/searchsummary/src/tests/juniper/queryparserTest.h
+++ b/searchsummary/src/tests/juniper/queryparserTest.h
@@ -5,9 +5,9 @@
 #pragma once
 
 #include "testenv.h"
+#include "test.h"
 #include <vespa/juniper/queryparser.h>
 #include <vespa/juniper/rewriter.h>
-#include <vespa/fastlib/testsuite/test.h>
 #include <map>
 
 /**
diff --git a/searchsummary/src/tests/juniper/suite.h b/searchsummary/src/tests/juniper/suite.h
new file mode 100644
index 00000000000..fea685731ae
--- /dev/null
+++ b/searchsummary/src/tests/juniper/suite.h
@@ -0,0 +1,264 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**************************************************************************
+ * Author: Bård Kvalheim
+ *
+ * A test suite. Modified from the suite written by Chuck Allison.
+ * http://www.cuj.com/archive/1809/feature.html
+ *
+ * Licensed to Yahoo, and relicensed under the terms of the Apache 2.0 license
+ *
+ * The usage of suite is simple:
+ *
+ * mysuite.h:
+ * -----
+ *
+ * #include <iosfwd>
+ * #include <vespa/fastlib/testsuite/suite.h>
+ *
+ * class MySuite : public Suite
+ * {
+ * public:
+ *   MySuite() :
+ *     Suite("My test suite. ", &cout)
+ *   {
+ *     AddTest(new MyTest1());
+ *     AddTest(new MyTest2());
+ *   }
+ * };
+ *
+ *
+ *
+ * class MySuiteApp
+ * {
+ * public:
+ *   int Main();
+ * };
+ *
+ *
+ * ---
+ *
+ * mysuite.cpp:
+ * -----
+ *
+ * #include "mysuite.h"
+ *
+ *
+ * int MyTestApp::Main() {
+ *   MyTestSuite mts;
+ *   mts.Run();
+ *   mts.Report();
+ *   mts.Free();
+ * }
+ *
+ * ---
+ *
+ **************************************************************************/
+
+#pragma once
+
+#include "test.h"   // includes <string>, <iosfwd>
+
+
+#include <vector>
+
+#include <iostream>
+#include <stdexcept>
+#include <cassert>
+
+
+namespace fast::testsuite {
+
+class TestSuiteError;
+
+class Suite
+{
+public:
+    Suite(const std::string& name, std::ostream* osptr = 0);
+
+    std::string    GetName() const;
+    long           GetNumPassed() const;
+    long           GetNumFailed() const;
+    const std::ostream* GetStream() const;
+    void           SetStream(std::ostream* osptr);
+
+    void AddTest(Test* t);       //throw (TestSuiteError);
+    void AddSuite(const Suite&); //throw(TestSuiteError);
+    void Run();     // Calls Test::run() repeatedly
+    long Report() const;
+    void Free();    // deletes tests
+    virtual ~Suite(void) { }
+
+private:
+    std::string m_name;
+    std::ostream* m_osptr;
+    std::vector<Test*> m_tests;
+    void Reset();
+    int GetLongestName() const;
+
+    // Disallowed ops:
+    Suite(const Suite&);
+    Suite& operator=(const Suite&);
+};
+
+inline
+Suite::Suite(const std::string& name, std::ostream* osptr)
+    : m_name(name),
+      m_osptr(osptr),
+      m_tests()
+{
+}
+
+inline
+std::string Suite::GetName() const
+{
+    return m_name;
+}
+
+inline
+const std::ostream* Suite::GetStream() const
+{
+    return m_osptr;
+}
+
+inline
+void Suite::SetStream(std::ostream* osptr)
+{
+    m_osptr = osptr;
+}
+
+
+/*class TestSuiteError : public logic_error
+  {
+  public:
+  TestSuiteError(const std::string& s = "")
+  : logic_error(s)
+  {}
+  };*/
+
+void Suite::AddTest(Test* t) //throw(TestSuiteError)
+{
+    // Make sure test has a stream:
+    if (t == 0) {}
+    //throw TestSuiteError("Null test in Suite::addTest");
+    else if (m_osptr != 0 && t->GetStream() == 0)
+        t->SetStream(m_osptr);
+
+    m_tests.push_back(t);
+    t->Reset();
+}
+
+void Suite::AddSuite(const Suite& s) //throw(TestSuiteError)
+{
+    for (size_t i = 0; i < s.m_tests.size(); ++i)
+        AddTest(s.m_tests[i]);
+}
+
+void Suite::Free()
+{
+    // This is not a destructor because tests
+    // don't have to be on the heap.
+    for (size_t i = 0; i < m_tests.size(); ++i)
+    {
+        delete m_tests[i];
+        m_tests[i] = 0;
+    }
+}
+
+void Suite::Run()
+{
+    Reset();
+    int longestName = GetLongestName();
+    const char *nm;
+    int x = 0;
+    for (size_t i = 0; i < m_tests.size(); ++i) {
+        assert(m_tests[i]);
+        nm = m_tests[i]->get_name();
+        if (nm) {
+            *m_osptr << std::endl << nm << ": ";
+            for (x = longestName - strlen(nm); x > 0; --x)
+                *m_osptr << ' ';
+            *m_osptr << std::flush;
+        }
+        m_tests[i]->Run();
+    }
+}
+
+
+// Find the longest test name
+int Suite::GetLongestName() const
+{
+    int longestName = 0, len = 0;
+    const char *nm;
+    for (size_t i = 0; i < m_tests.size(); ++i) {
+        assert(m_tests[i]);
+        nm = m_tests[i]->get_name();
+        if ( nm != NULL && (len = strlen(nm)) > longestName )
+            longestName = len;
+    }
+    return longestName;
+}
+
+long Suite::Report() const
+{
+    if (m_osptr) {
+        int longestName = GetLongestName();
+        int lineLength = longestName + 8 + 16 + 10;
+        long totFail = 0;
+        int x = 0;
+        *m_osptr << std::endl << std::endl
+                 << "Suite \"" << m_name << "\"" << std::endl;
+        for (x = 0; x < lineLength; ++x)
+            *m_osptr << '=';
+        *m_osptr << "=";
+
+        // Write the individual reports
+        for (size_t i = 0; i < m_tests.size(); ++i) {
+            assert(m_tests[i]);
+            const char *nm = m_tests[i]->get_name();
+            totFail += m_tests[i]->Report(longestName -
+                                          (nm ? strlen(nm) : longestName));
+        }
+
+        for (x = 0; x < lineLength; ++x)
+            *m_osptr << '=';
+        *m_osptr << "=\n";
+        return totFail;
+    }
+    else
+        return GetNumFailed();
+}
+
+long Suite::GetNumPassed() const
+{
+    long totPass = 0;
+    for (size_t i = 0; i < m_tests.size(); ++i)
+    {
+        assert(m_tests[i]);
+        totPass += m_tests[i]->GetNumPassed();
+    }
+    return totPass;
+}
+
+long Suite::GetNumFailed() const
+{
+    long totFail = 0;
+    for (size_t i = 0; i < m_tests.size(); ++i)
+    {
+        assert(m_tests[i]);
+        totFail += m_tests[i]->GetNumFailed();
+    }
+    return totFail;
+}
+
+void Suite::Reset()
+{
+    for (size_t i = 0; i < m_tests.size(); ++i)
+    {
+        assert(m_tests[i]);
+        m_tests[i]->Reset();
+    }
+}
+
+}
+
+using fast::testsuite::Suite;
diff --git a/searchsummary/src/tests/juniper/test.cpp b/searchsummary/src/tests/juniper/test.cpp
new file mode 100644
index 00000000000..18930b1bca2
--- /dev/null
+++ b/searchsummary/src/tests/juniper/test.cpp
@@ -0,0 +1,141 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "test.h"
+
+namespace fast::testsuite {
+
+Test::Test(std::ostream* osptr, const char*name) :
+    m_osptr(osptr),
+    name_(name),
+    m_nPass(0),
+    m_nFail(0),
+    m_index(0),
+    m_description()
+{
+    m_pchar[0]= '|';
+    m_pchar[1]= '-';
+}
+
+Test::Test(const char*name) :
+    Test(nullptr, name)
+{
+}
+
+const char *Test::get_name() const {
+    return (name_ == NULL) ? "Test " : name_;
+}
+
+const std::string& Test::GetSourceDirectory()
+{
+    static const std::string srcDir = [] () {
+        std::string dir(".");
+        const char* env = getenv("SOURCE_DIRECTORY");
+        if (env) {
+            dir = env;
+        }
+        if (*dir.rbegin() != '/') {
+            dir += "/";
+        }
+        return dir;
+    } ();
+    return srcDir;
+}
+
+long Test::GetNumPassed() const
+{
+    return m_nPass;
+}
+
+long Test::GetNumFailed() const
+{
+    return m_nFail;
+}
+
+const std::ostream* Test::GetStream() const
+{
+    return m_osptr;
+}
+
+void Test::SetStream(std::ostream* osptr)
+{
+    m_osptr = osptr;
+}
+
+void Test::_Succeed()
+{
+    ++m_nPass;
+}
+
+void Test::Reset()
+{
+    m_nPass = m_nFail = 0;
+}
+
+void Test::PushDesc(const std::string& desc)
+{
+    m_description.push_back(desc);
+}
+
+void Test::PopDesc()
+{
+    m_description.pop_back();
+}
+
+size_t Test::print_desc() const
+{
+    std::copy(m_description.begin(), m_description.end(),
+              std::ostream_iterator<std::string>(*m_osptr));
+    return m_description.size();
+}
+
+void Test::print_progress() {
+    ++m_index;
+    m_index = m_index % 2;
+    *m_osptr << '\b' <<'\b' <<'\b';
+    *m_osptr <<' ' << m_pchar[m_index] << ' ' << std::flush;
+}
+
+bool Test::do_fail(const std::string& lbl, const char* fname, long lineno,
+                   bool addEndl)
+{
+    ++m_nFail;
+    if (m_osptr) {
+        *m_osptr << std::endl
+                 << fname << ':' << lineno << ": "
+                 << get_name() << " failure: (" << lbl << ")"
+                 << std::endl;
+        if (addEndl && print_desc() > 0)
+            *m_osptr << std::endl << std::endl;
+    }
+    return false;
+}
+
+bool Test::do_test(bool cond, const std::string& lbl,
+                   const char* fname, long lineno)
+{
+    if (!cond) {
+        return do_fail(lbl, fname, lineno);
+    }
+    else {
+        _Succeed();
+        print_progress();
+        return true;
+    }
+}
+
+long Test::Report(int padSpaces) const
+{
+    if (m_osptr) {
+        *m_osptr << std::endl << get_name();
+
+        // Pad the name with the given number of spaces
+        for (int i= 0; i < padSpaces; ++i) *m_osptr << ' ';
+
+        *m_osptr << "\tPassed: " << m_nPass
+                 << "\tFailed: " << m_nFail
+                 << std::endl;
+    }
+    return m_nFail;
+}
+
+}
diff --git a/searchsummary/src/tests/juniper/test.h b/searchsummary/src/tests/juniper/test.h
new file mode 100644
index 00000000000..1388c3ba812
--- /dev/null
+++ b/searchsummary/src/tests/juniper/test.h
@@ -0,0 +1,150 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**************************************************************************
+ * Author: Bård Kvalheim
+ *
+ * The test class of the testsuite. Written by Chuck Allison.
+ * http://www.cuj.com/archive/1809/feature.html
+ *
+ * Apart for a trick the usage of the test class is very simple:
+ *
+ * mytest.h:
+ * ----
+ * #include <iosfwd>
+ * #include <vespa/fastlib/testsuite/test.h>
+ *
+ * class MyTest : public Test
+ * {
+ * public:
+ *   virtual void Run() {
+ *     // do the tests _test is ok if the argument are true
+ *     _test(expr);
+ *   }
+ *
+ * };
+ *
+ * class MyTestApp
+ * {
+ * public:
+ *   int Main();
+ * };
+ *
+ *
+ * ----
+ *
+ *
+ * mytest.cpp:
+ * ----
+ * #include "mytest.h"
+ *
+ * int MyTestApp::Main()
+ * {
+ *   MyTest mt;
+ *   mt.SetStream(&std::cout);
+ *   mt.Run();
+ *   mt.Report();
+ *
+ *   return 0;
+ * }
+ *
+ *
+ * ----
+ *
+ * The trick is that the all the code except the main function is in
+ * the .h file. The reason for this is that it is simpler to integerate
+ * the single test into a suite of tests.
+ *
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+#include <iostream>
+#include <typeinfo>
+#include <vector>
+#include <algorithm>
+#include <iterator>
+
+// The following have underscores because they are macros
+// (and it's impolite to usurp other users' functions!).
+// For consistency, _succeed() also has an underscore.
+#define _test(cond) do_test((cond), #cond, __FILE__, __LINE__)
+#define _test_equal(lhs, rhs)                                   \
+    do_equality_test((lhs), (rhs),  #lhs, __FILE__, __LINE__)
+#define _fail(str) do_fail((str), __FILE__, __LINE__)
+
+namespace fast::testsuite {
+
+class Test
+{
+public:
+    explicit Test(std::ostream* osptr = 0, const char *name = NULL);
+    explicit Test(const char *name);
+    virtual ~Test(){}
+    virtual void Run() = 0;
+
+    const char *get_name() const;
+    static const std::string& GetSourceDirectory();
+    long GetNumPassed() const;
+    long GetNumFailed() const;
+    const std::ostream* GetStream() const;
+    void SetStream(std::ostream* osptr);
+
+    void _Succeed();
+    long Report(int padSpaces = 1) const;
+    virtual void Reset();
+
+    void PushDesc(const std::string& desc);
+    void PopDesc();
+
+protected:
+    std::ostream* m_osptr;
+    const char *name_;
+
+    bool do_test(bool cond, const std::string& lbl,
+                 const char* fname, long lineno);
+    bool do_fail(const std::string& lbl, const char* fname, long lineno,
+                 bool addEndl = true);
+    template <typename t1, typename t2>
+    bool do_equality_test(const t1& lhs, const t2& rhs,
+                          const char* lbl, const char* fname, long lineno);
+    virtual void print_progress();
+
+private:
+    long m_nPass;
+    long m_nFail;
+    int m_index;
+    char m_pchar[4];
+
+    std::vector<std::string> m_description;
+
+    size_t print_desc() const;
+
+    // Disallowed:
+    Test(const Test&);
+    Test& operator=(const Test&);
+};
+
+template <typename t1, typename t2>
+bool Test::do_equality_test(const t1& lhs, const t2& rhs, const char* lbl,
+                            const char* fname, long lineno)
+{
+    if (lhs == rhs) {
+        _Succeed();
+        print_progress();
+        return true;
+    }
+    do_fail(std::string(lbl), fname, lineno, false);
+    if (m_osptr) {
+        *m_osptr << "Equality test failed: "
+                 << "Expected '" << rhs
+                 << "' got '" << lhs << "'"
+                 << std::endl;
+        if (print_desc() > 0)
+            *m_osptr << std::endl << std::endl;
+    }
+    return false;
+}
+
+}
+
+using fast::testsuite::Test;
diff --git a/searchsummary/src/vespa/juniper/config.cpp b/searchsummary/src/vespa/juniper/config.cpp
index b9213bb21f1..a82a8d74b8a 100644
--- a/searchsummary/src/vespa/juniper/config.cpp
+++ b/searchsummary/src/vespa/juniper/config.cpp
@@ -1,7 +1,6 @@
 // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 
 #include "config.h"
-#include "IJuniperProperties.h"
 #include "rpinterface.h"
 #include "juniperdebug.h"
 #define _NEED_SUMMARY_CONFIG_IMPL
@@ -14,7 +13,7 @@ namespace juniper
 Config::Config(const char* config_name, Juniper & juniper) :
     _docsumparams(),
     _matcherparams(),
-    _sumconf(NULL),
+    _sumconf(nullptr),
     _config_name(config_name),
     _juniper(juniper)
 {
@@ -51,7 +50,7 @@ Config::Config(const char* config_name, Juniper & juniper) :
         .SetMaxMatches(max_matches)
         .SetSurroundMax(surround_max)
         .SetFallback(fallback);
-    _matcherparams.SetWantGlobalRank(true)
+    _matcherparams
         .SetStemMinLength(stem_min).SetStemMaxExtend(stem_extend)
         .SetMatchWindowSize(match_winsize)
         .SetMaxMatchCandidates(max_match_candidates)
diff --git a/searchsummary/src/vespa/juniper/juniperparams.cpp b/searchsummary/src/vespa/juniper/juniperparams.cpp
index e600c23f7c4..4f25b2446ad 100644
--- a/searchsummary/src/vespa/juniper/juniperparams.cpp
+++ b/searchsummary/src/vespa/juniper/juniperparams.cpp
@@ -1,7 +1,5 @@
 // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "juniperdebug.h"
 #include "juniperparams.h"
-#include "Matcher.h"
 #include <cstring>
 
 // DocsumParams implementation:
@@ -9,7 +7,7 @@
 
 DocsumParams::DocsumParams() :
     _enabled(false), _length(256), _min_length(128), _max_matches(3),
-    _surround_max(80), _space_chars(""), _fallback(FALLBACK_NONE)
+    _surround_max(80), _fallback(FALLBACK_NONE)
 { }
 
 DocsumParams& DocsumParams::SetEnabled(bool en)
@@ -42,12 +40,6 @@ DocsumParams& DocsumParams::SetSurroundMax(size_t length)
     return *this;
 }
 
-DocsumParams& DocsumParams::SetSpaceChars(const char* spacechars)
-{
-    _space_chars = spacechars;
-    return *this;
-}
-
 DocsumParams& DocsumParams::SetFallback(const char* fallback)
 {
     if (strcmp("prefix", fallback) == 0) {
@@ -63,7 +55,6 @@ size_t DocsumParams::MinLength() const   { return _min_length; }
 size_t DocsumParams::MaxMatches() const  { return _max_matches; }
 size_t DocsumParams::SurroundMax() const { return _surround_max; }
 bool   DocsumParams::Enabled() const     { return _enabled; }
-const char* DocsumParams::SpaceChars() const { return _space_chars.c_str(); }
 int DocsumParams::Fallback() const { return _fallback; }
 
 // MatcherParams implementation:
@@ -71,54 +62,26 @@ int DocsumParams::Fallback() const { return _fallback; }
 
 
 MatcherParams::MatcherParams() :
-    _prefix_extend_length(3),
-    _prefix_min_length(5),
     _match_winsize(200),
     _match_winsize_fallback_multiplier(10.0),
     _max_match_candidates(1000),
-    _want_global_rank(false),
     _stem_min(0), _stem_extend(0),
     _wordfolder(NULL), _proximity_factor(1.0)
 { }
 
 
-MatcherParams& MatcherParams::SetPrefixExtendLength(size_t extend_length)
-{
-    _prefix_extend_length = extend_length;
-    return *this;
-}
-
-MatcherParams& MatcherParams::SetPrefixMinLength(size_t min_length)
-{
-    _prefix_min_length = min_length;
-    return *this;
-}
-
-
 MatcherParams& MatcherParams::SetMatchWindowSize(size_t winsize)
 {
     _match_winsize = winsize;
     return *this;
 }
 
-MatcherParams& MatcherParams::SetMatchWindowSizeFallbackMultiplier(double winsize)
-{
-    _match_winsize_fallback_multiplier = winsize;
-    return *this;
-}
-
 MatcherParams& MatcherParams::SetMaxMatchCandidates(size_t max_match_candidates)
 {
     _max_match_candidates = max_match_candidates;
     return *this;
 }
 
-MatcherParams& MatcherParams::SetWantGlobalRank(bool global_rank)
-{
-    _want_global_rank = global_rank;
-    return *this;
-}
-
 MatcherParams& MatcherParams::SetStemMinLength(size_t stem_min)
 {
     _stem_min = stem_min;
@@ -132,12 +95,9 @@ MatcherParams& MatcherParams::SetStemMaxExtend(size_t stem_extend)
     return *this;
 }
 
-size_t MatcherParams::PrefixExtendLength() const { return _prefix_extend_length; }
-size_t MatcherParams::PrefixMinLength() const { return _prefix_min_length; }
 size_t MatcherParams::MatchWindowSize() const { return _match_winsize; }
 double MatcherParams::MatchWindowSizeFallbackMultiplier() const { return _match_winsize_fallback_multiplier; }
 size_t MatcherParams::MaxMatchCandidates() const { return _max_match_candidates; }
-bool   MatcherParams::WantGlobalRank() const { return _want_global_rank; }
 size_t MatcherParams::StemMinLength() const { return _stem_min; }
 size_t MatcherParams::StemMaxExtend() const { return _stem_extend; }
 
diff --git a/searchsummary/src/vespa/juniper/juniperparams.h b/searchsummary/src/vespa/juniper/juniperparams.h
index 44980ce8b43..f4f17779f2d 100644
--- a/searchsummary/src/vespa/juniper/juniperparams.h
+++ b/searchsummary/src/vespa/juniper/juniperparams.h
@@ -31,9 +31,6 @@ public:
     DocsumParams& SetSurroundMax(size_t length);
     size_t SurroundMax() const;
 
-    DocsumParams& SetSpaceChars(const char* spacechars);
-    const char* SpaceChars() const;
-
     DocsumParams& SetFallback(const char* fallback);
     int Fallback() const;
 
@@ -43,7 +40,6 @@ private:
     size_t _min_length;
     size_t _max_matches;
     size_t _surround_max;
-    std::string _space_chars;
     int _fallback;
 };
 
@@ -52,25 +48,17 @@ class MatcherParams
 {
 public:
     MatcherParams();
-
-    MatcherParams& SetPrefixExtendLength(size_t extend_length);
-    size_t PrefixExtendLength() const;
-
-    MatcherParams& SetPrefixMinLength(size_t min_length);
-    size_t PrefixMinLength() const;
+    MatcherParams(MatcherParams &) = delete;
+    MatcherParams &operator=(MatcherParams &) = delete;
 
     MatcherParams& SetMatchWindowSize(size_t winsize);
     size_t MatchWindowSize() const;
 
-    MatcherParams& SetMatchWindowSizeFallbackMultiplier(double winsize);
     double MatchWindowSizeFallbackMultiplier() const;
 
     MatcherParams& SetMaxMatchCandidates(size_t max_match_candidates);
     size_t MaxMatchCandidates() const;
 
-    MatcherParams& SetWantGlobalRank(bool global_rank);
-    bool WantGlobalRank() const;
-
     MatcherParams& SetStemMinLength(size_t stem_min);
     size_t StemMinLength() const;
 
@@ -84,19 +72,13 @@ public:
     double ProximityFactor();
 
 private:
-    size_t _prefix_extend_length;
-    size_t _prefix_min_length;
     size_t _match_winsize;
     double _match_winsize_fallback_multiplier;
     size_t _max_match_candidates;
-    bool _want_global_rank;
     size_t _stem_min;
     size_t _stem_extend;
     Fast_WordFolder* _wordfolder; // The wordfolder object needed as 1st parameter to folderfun
     double _proximity_factor;
-
-    MatcherParams(MatcherParams &);
-    MatcherParams &operator=(MatcherParams &);
 };
 
 
diff --git a/searchsummary/src/vespa/juniper/latintokenizer.h b/searchsummary/src/vespa/juniper/latintokenizer.h
new file mode 100644
index 00000000000..7a98d780c56
--- /dev/null
+++ b/searchsummary/src/vespa/juniper/latintokenizer.h
@@ -0,0 +1,377 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+*****************************************************************************
+* @author Bård Kvalheim
+* @date    Creation date: 2001-12-07
+*
+* A configurable tokenizer template that accepts two predicates: One to
+* determine separator symbols and one to determine punctuation symbols. A
+* typedef is defined that uses isspace/1 and ispunct/1.
+*
+* This tokenizer does not alter the text, and does not copy it.
+*
+* This tokenizer is not meant to be used as a real tokenizer for all
+* languages. It is only a fast and simple latin tokenizer, intended for
+* very basic applications.
+*
+* The tokens are returned as (char *, char *, bool) triples.  The two
+* first elements delimit the token string, while the third element is
+* true if the token is a punctuation symbol.
+*
+* If the last character in the input text is a punctuation symbol, the last
+* token is the following:
+*
+*    text = " something bl bla ."
+*
+*    token.first        -> .
+*    token.second       -> \0
+*    token._punctuation = true;
+*
+*  In other words, token.second can point to the terminating '\0' in the input
+*  text.
+*
+*****************************************************************************/
+
+#pragma once
+
+#include <cctype>
+#include <cstring>
+
+/**
+*****************************************************************************
+* A simple tokenizer. See description above.
+*
+* @class   Fast_LatinTokenizer
+* @author Bård Kvalheim
+* @date    Creation date: 2001-12-07
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+class Fast_LatinTokenizer {
+private:
+    Fast_LatinTokenizer(const Fast_LatinTokenizer &);
+    Fast_LatinTokenizer& operator=(const Fast_LatinTokenizer &);
+
+public:
+
+    /** Helper class. */
+    class Fast_Token {
+    public:
+
+        /** Member variables. */
+        char *first;        // Points to start of token. Named 'first' for std::pair compatibility.
+        char *second;       // Points to end of token.  Named 'second' for std::pair compatibility.
+        bool  _punctuation; // Is the token a punctuation symbol?
+
+        /** Constructors. */
+        Fast_Token(char *begin, char *end, bool punctuation) : first(begin), second(end), _punctuation(punctuation) {}
+        Fast_Token() : first(NULL), second(NULL), _punctuation(false) {}
+        Fast_Token(const Fast_Token &other)
+            : first(other.first),
+              second(other.second),
+              _punctuation(other._punctuation)
+        {
+        }
+        Fast_Token& operator=(const Fast_Token &other)
+        {
+            first = other.first;
+            second = other.second;
+            _punctuation = other._punctuation;
+            return *this;
+        }
+
+    };
+
+    /** Constructors/destructor. */
+    Fast_LatinTokenizer();
+    explicit Fast_LatinTokenizer(char *text);
+    Fast_LatinTokenizer(char *text, size_t length);
+    virtual ~Fast_LatinTokenizer();
+
+    /** Constructors, sort of. */
+    void           SetNewText(char *text);
+    void           SetNewText(char *text, size_t length);
+
+    /** Are there any more tokens left? */
+    bool           MoreTokens();
+
+    /** Return next token. */
+    Fast_Token     GetNextToken();
+
+    /** Return text buffer. */
+    char          *GetOriginalText();
+
+    /** Observers in case we need not perform some action specific
+     *  to the IsSeparator or IsPunctuation implementations
+     *  (such as extra initialization or statistics gathering or...)
+     */
+    IsPunctuation& GetIsPunctuation() { return _isPunctuation; }
+    IsSeparator&   GetIsSeparator()   { return _isSeparator;   }
+
+private:
+
+    /** Member variables. */
+    char          *_org;           // Holds the original text buffer.
+    char          *_next;          // Points to the current buffer position.
+    char          *_end;           // Points to the end of the buffer.
+    bool           _moreTokens;    // More text to process?
+    IsSeparator    _isSeparator;   // Separator symbol predicate.
+    IsPunctuation  _isPunctuation; // Punctuation symbol predicate.
+
+    /** Helper methods. */
+    void           SkipBlanks();
+
+};
+
+/**
+*****************************************************************************
+* Default constructor.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer() :
+    _org(NULL),
+    _next(NULL),
+    _end(NULL),
+    _moreTokens(false),
+    _isSeparator(),
+    _isPunctuation()
+{
+}
+
+/**
+*****************************************************************************
+* Constructor. Accepts a '\0' terminated text buffer.
+*
+* @param  text
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer(char *text) :
+    _org(NULL),
+    _next(NULL),
+    _end(NULL),
+    _moreTokens(false),
+    _isSeparator(),
+    _isPunctuation()
+{
+    SetNewText(text);
+}
+
+/**
+*****************************************************************************
+* Constructor. Accepts a text buffer and the buffer length
+*
+* @param  text
+* @param  length
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer(char *text, size_t length)
+    : _org(NULL),
+      _next(NULL),
+      _end(NULL),
+      _moreTokens(false),
+      _isSeparator(),
+      _isPunctuation()
+{
+    SetNewText(text, length);
+}
+
+/**
+*****************************************************************************
+* Destructor.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::~Fast_LatinTokenizer() {
+}
+
+/**
+*****************************************************************************
+* Sets a new '\0' terminated string.
+*
+* @param  text
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+void
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SetNewText(char *text) {
+
+    _org        = text;
+    _next       = text;
+    _moreTokens = text != NULL;
+    _end        = NULL;
+}
+
+/**
+*****************************************************************************
+* Sets a new string, given the text buffer and its length.
+*
+* @param  text
+* @param  length
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+void
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SetNewText(char *text, size_t length) {
+
+    _org        = text;
+    _next       = text;
+    _moreTokens = text != NULL;
+    _end        = (_next ? _next + length : NULL);
+}
+
+/**
+*****************************************************************************
+* Skips all blanks and flags if there are more tokens.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+void
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SkipBlanks() {
+
+    if (!_moreTokens) return;
+    // Initialized with '\0' terminated buffer?
+    if (_end == NULL) {
+        while (*_next != '\0' && _isSeparator(*_next)) {
+            ++_next;
+        }
+        if (*_next == '\0') {
+            _moreTokens = false;
+        }
+    }
+
+    // Initialized with specified buffer length.
+    else {
+        while (_next != _end && _isSeparator(*_next)) {
+            ++_next;
+        }
+        if (_next == _end) {
+            _moreTokens = false;
+        }
+    }
+
+}
+
+/**
+*****************************************************************************
+* Returns true if there are more tokens left in the text buffer.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+bool
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::MoreTokens() {
+    SkipBlanks();
+    return _moreTokens;
+}
+
+/**
+*****************************************************************************
+* Returns the next token as a Fast_Token.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+typename Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_Token
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::GetNextToken() {
+
+    char *prev = _next;
+
+    // Skip all blanks and flag if there are no more tokens.
+    SkipBlanks();
+
+    // Initialized with '\0' terminated buffer? Find the next blank or punctuation.
+    if (_end == NULL) {
+        while (*_next != '\0' && !_isSeparator(*_next) && !_isPunctuation(*_next)) {
+            ++_next;
+        }
+
+        // Initialized with specified buffer length.
+    }  else {
+        while (_next != _end && !_isSeparator(*_next) && !_isPunctuation(*_next)) {
+            ++_next;
+        }
+    }
+
+    // Check if this token is a punctuation symbol, and generate token.
+    bool isToken = ((_next - prev == 0) && _isPunctuation(*prev));
+
+    if (isToken) {
+        ++_next;
+    }
+
+    Fast_Token token(prev, _next, isToken);
+
+    return token;
+
+}
+
+/**
+*****************************************************************************
+* Returns the original text buffer.
+*
+* @author Bård Kvalheim
+*****************************************************************************/
+
+template <typename IsSeparator, typename IsPunctuation>
+char *
+Fast_LatinTokenizer<IsSeparator, IsPunctuation>::GetOriginalText() {
+    return _org;
+}
+
+/**
+*****************************************************************************
+* Helper class.
+*
+* When using isspace/1, ensure that the argument is cast to unsigned char to
+* avoid problems with sign extension. See system documentation for details.
+*
+* @class   Fast_IsSpace
+* @author Bård Kvalheim
+* @date    Creation date: 2001-12-07
+*****************************************************************************/
+
+struct Fast_IsSpace {
+    bool operator()(char c) {return (isspace(static_cast<unsigned char>(c)) != 0);}
+};
+
+/**
+*****************************************************************************
+* Helper class.
+*
+* When using ispunct/1, ensure that the argument is cast to unsigned char to
+* avoid problems with sign extension. See system documentation for details.
+*
+* @class   Fast_IsPunctuation
+* @author Bård Kvalheim
+* @date    Creation date: 2001-12-07
+*****************************************************************************/
+
+struct Fast_IsPunctuation {
+    bool operator()(char c) {return (ispunct(static_cast<unsigned char>(c)) != 0);}
+};
+
+/**
+*****************************************************************************
+* A simple tokenizer. See description above.
+*
+* @class   Fast_SimpleLatinTokenizer
+* @author Bård Kvalheim
+* @date    Creation date: 2001-12-07
+*****************************************************************************/
+
+typedef Fast_LatinTokenizer<Fast_IsSpace, Fast_IsPunctuation> Fast_SimpleLatinTokenizer;
diff --git a/searchsummary/src/vespa/juniper/queryparser.h b/searchsummary/src/vespa/juniper/queryparser.h
index 5715daa3661..9c596892e31 100644
--- a/searchsummary/src/vespa/juniper/queryparser.h
+++ b/searchsummary/src/vespa/juniper/queryparser.h
@@ -1,13 +1,12 @@
 // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 #pragma once
 
-
 /* Simple prefix syntax advanced query parser for Juniper debug/testing */
 
 #include "query.h"
+#include "latintokenizer.h"
 #include <map>
 #include <string>
-#include <vespa/fastlib/text/latintokenizer.h>
 
 namespace juniper
 {
diff --git a/searchsummary/src/vespa/juniper/result.cpp b/searchsummary/src/vespa/juniper/result.cpp
index 653e692e015..15ad9aa2a98 100644
--- a/searchsummary/src/vespa/juniper/result.cpp
+++ b/searchsummary/src/vespa/juniper/result.cpp
@@ -80,12 +80,12 @@ Result::Result(Config* config, QueryHandle* qhandle,
     }
 
     /* Create the new pipeline */
-    _tokenizer.reset(new JuniperTokenizer(wordfolder, NULL, 0, NULL));
+    _tokenizer = std::make_unique<JuniperTokenizer>(wordfolder, nullptr, 0, nullptr, nullptr);
 
-    _matcher.reset(new Matcher(this));
+    _matcher = std::make_unique<Matcher>(this);
     _matcher->SetProximityFactor(mp.ProximityFactor());
 
-    _registry.reset(new SpecialTokenRegistry(_matcher->getQuery()));
+    _registry = std::make_unique<SpecialTokenRegistry>(_matcher->getQuery());
 
     if (qhandle->_log_mask)
         _matcher->set_log(qhandle->_log_mask);
diff --git a/searchsummary/src/vespa/juniper/rpinterface.cpp b/searchsummary/src/vespa/juniper/rpinterface.cpp
index 75a441fb957..f9e91073a9b 100644
--- a/searchsummary/src/vespa/juniper/rpinterface.cpp
+++ b/searchsummary/src/vespa/juniper/rpinterface.cpp
@@ -94,12 +94,6 @@ void Juniper::FlushRewriters()
     _modifier->FlushRewriters();
 }
 
-void ReleaseConfig(Config*& config)
-{
-    delete config;
-    config = NULL;
-}
-
 
 void ReleaseQueryHandle(QueryHandle*& handle)
 {
author	Henning Baldersheim <balder@yahoo-inc.com>	2022-05-21 13:31:10 +0000
committer	Henning Baldersheim <balder@yahoo-inc.com>	2022-05-21 14:29:19 +0000
commit	58a7afd1bd0cd358a8d19bfefd3e0c2c32daecc1 (patch)
tree	4ced08d5ed7c7020e3cfb516f135f885334ff27d /searchsummary
parent	2c34544abef32f7da1c05a83a3648532afb53186 (diff)