diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-21 13:31:10 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-21 14:29:19 +0000 |
commit | 58a7afd1bd0cd358a8d19bfefd3e0c2c32daecc1 (patch) | |
tree | 4ced08d5ed7c7020e3cfb516f135f885334ff27d /searchsummary | |
parent | 2c34544abef32f7da1c05a83a3648532afb53186 (diff) |
Fold fastlib into vespalib and gc some unused code.
Also move some code only used by juniper up into juniper test module.
Diffstat (limited to 'searchsummary')
20 files changed, 1426 insertions, 90 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt index 84633bee262..d51e29dbd00 100644 --- a/searchsummary/CMakeLists.txt +++ b/searchsummary/CMakeLists.txt @@ -4,7 +4,6 @@ vespa_define_module( vespalog vespalib configdefinitions - fastlib_fast document searchlib diff --git a/searchsummary/src/tests/juniper/.gitignore b/searchsummary/src/tests/juniper/.gitignore index 46b307da632..09d6225ca26 100644 --- a/searchsummary/src/tests/juniper/.gitignore +++ b/searchsummary/src/tests/juniper/.gitignore @@ -14,3 +14,4 @@ juniper_auxTest_app juniper_matchobjectTest_app juniper_mcandTest_app juniper_queryparserTest_app +juniper_latintokenizertest_app diff --git a/searchsummary/src/tests/juniper/CMakeLists.txt b/searchsummary/src/tests/juniper/CMakeLists.txt index d15e91f1f63..77e7052a429 100644 --- a/searchsummary/src/tests/juniper/CMakeLists.txt +++ b/searchsummary/src/tests/juniper/CMakeLists.txt @@ -1,4 +1,10 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(juniper_testsuite + SOURCES + test.cpp + DEPENDS +) + vespa_add_executable(juniper_mcandTest_app TEST SOURCES mcandTest.cpp @@ -6,7 +12,7 @@ vespa_add_executable(juniper_mcandTest_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_mcandTest_app COMMAND juniper_mcandTest_app) vespa_add_executable(juniper_queryparserTest_app TEST @@ -17,7 +23,7 @@ vespa_add_executable(juniper_queryparserTest_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_queryparserTest_app COMMAND juniper_queryparserTest_app) vespa_add_executable(juniper_matchobjectTest_app TEST @@ -28,7 +34,7 @@ vespa_add_executable(juniper_matchobjectTest_app TEST fakerewriter.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_matchobjectTest_app COMMAND juniper_matchobjectTest_app) vespa_add_executable(juniper_appender_test_app TEST @@ -36,7 +42,6 @@ vespa_add_executable(juniper_appender_test_app TEST appender_test.cpp DEPENDS searchsummary - fastlib_fast_testsuite ) vespa_add_test(NAME juniper_appender_test_app COMMAND juniper_appender_test_app) vespa_add_executable(juniper_queryvisitor_test_app TEST @@ -44,7 +49,6 @@ vespa_add_executable(juniper_queryvisitor_test_app TEST queryvisitor_test.cpp DEPENDS searchsummary - fastlib_fast_testsuite ) vespa_add_test(NAME juniper_queryvisitor_test_app COMMAND juniper_queryvisitor_test_app) vespa_add_executable(juniper_auxTest_app TEST @@ -54,7 +58,7 @@ vespa_add_executable(juniper_auxTest_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_auxTest_app COMMAND juniper_auxTest_app) vespa_add_executable(juniper_SrcTestSuite_app TEST @@ -68,6 +72,13 @@ vespa_add_executable(juniper_SrcTestSuite_app TEST testenv.cpp DEPENDS searchsummary - fastlib_fast_testsuite + juniper_testsuite ) vespa_add_test(NAME juniper_SrcTestSuite_app COMMAND juniper_SrcTestSuite_app) +vespa_add_executable(juniper_latintokenizertest_app TEST + SOURCES + latintokenizertest.cpp + DEPENDS + juniper_testsuite +) +vespa_add_test(NAME juniper_latintokenizertest_app NO_VALGRIND COMMAND juniper_latintokenizertest_app) diff --git a/searchsummary/src/tests/juniper/SrcTestSuite.cpp b/searchsummary/src/tests/juniper/SrcTestSuite.cpp index c1e4dc2cd19..870c7b9f378 100644 --- a/searchsummary/src/tests/juniper/SrcTestSuite.cpp +++ b/searchsummary/src/tests/juniper/SrcTestSuite.cpp @@ -1,12 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "testenv.h" +#include "suite.h" #include "mcandTest.h" #include "queryparserTest.h" #include "matchobjectTest.h" #include "auxTest.h" #include <vespa/vespalib/testkit/testapp.h> -#include <vespa/fastlib/testsuite/suite.h> /** * The SrcTestSuite class runs all the unit tests for the src module. * diff --git a/searchsummary/src/tests/juniper/auxTest.h b/searchsummary/src/tests/juniper/auxTest.h index dd6d79e024a..9ff391911b3 100644 --- a/searchsummary/src/tests/juniper/auxTest.h +++ b/searchsummary/src/tests/juniper/auxTest.h @@ -3,9 +3,9 @@ // Auxiliary tests for juniper - based on Juniper 1.x proximitytest.cpp -#include <map> -#include <vespa/fastlib/testsuite/test.h> #include "testenv.h" +#include "test.h" +#include <map> class AuxTest : public Test { diff --git a/searchsummary/src/tests/juniper/latintokenizertest.cpp b/searchsummary/src/tests/juniper/latintokenizertest.cpp new file mode 100644 index 00000000000..89273ab1ec0 --- /dev/null +++ b/searchsummary/src/tests/juniper/latintokenizertest.cpp @@ -0,0 +1,9 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "latintokenizertest.h" + +int main(int, char **) { + LatinTokenizerTest lta; + lta.SetStream(&std::cout); + lta.Run(); + return lta.Report(); +} diff --git a/searchsummary/src/tests/juniper/latintokenizertest.h b/searchsummary/src/tests/juniper/latintokenizertest.h new file mode 100644 index 00000000000..b4d113918ee --- /dev/null +++ b/searchsummary/src/tests/juniper/latintokenizertest.h @@ -0,0 +1,450 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "test.h" +#include <vespa/juniper/latintokenizer.h> +#include <vespa/vespalib/util/stringfmt.h> + +class Mapel_Pucntuation { +private: + /** Member variables. */ + static bool *_lookup; +public: + + /** Constructors */ + Mapel_Pucntuation(); + + /** Punctuation predicate. */ + bool operator()(char c) const { + return _lookup[static_cast<unsigned char>(c)]; + } + +}; + +class Maple_Space { +private: + + /** Member variables. */ + static bool *_lookup; + +public: + + /** Constructors */ + Maple_Space(); + + /** Space predicate. */ + bool operator()(char c) const { + return _lookup[static_cast<unsigned char>(c)]; + } + +}; + +bool *Maple_Space::_lookup = NULL; +bool *Mapel_Pucntuation::_lookup = NULL; + +Mapel_Pucntuation::Mapel_Pucntuation() { + + // Initialize lookup table. + if (_lookup == NULL) { + + _lookup = new bool[256]; + + for (unsigned int i = 0; i < 256; ++i) { + _lookup[i] = false; + } + + _lookup[static_cast<unsigned char>('.')] = true; + _lookup[static_cast<unsigned char>(',')] = true; + _lookup[static_cast<unsigned char>(':')] = true; + _lookup[static_cast<unsigned char>(';')] = true; + _lookup[static_cast<unsigned char>('|')] = true; + _lookup[static_cast<unsigned char>('!')] = true; + _lookup[static_cast<unsigned char>('?')] = true; + _lookup[static_cast<unsigned char>('@')] = true; + _lookup[static_cast<unsigned char>('/')] = true; + _lookup[static_cast<unsigned char>('(')] = true; + _lookup[static_cast<unsigned char>(')')] = true; + _lookup[static_cast<unsigned char>('[')] = true; + _lookup[static_cast<unsigned char>(']')] = true; + _lookup[static_cast<unsigned char>('{')] = true; + _lookup[static_cast<unsigned char>('}')] = true; + _lookup[static_cast<unsigned char>('<')] = true; + _lookup[static_cast<unsigned char>('>')] = true; + _lookup[static_cast<unsigned char>('*')] = true; + _lookup[static_cast<unsigned char>('=')] = true; + _lookup[static_cast<unsigned char>('%')] = true; + _lookup[static_cast<unsigned char>('\\')] = true; + + } + +} + +Maple_Space::Maple_Space() { + + // Initialize lookup table. + if (_lookup == NULL) { + + _lookup = new bool[256]; + + for (unsigned int i = 0; i < 256; ++i) { + _lookup[i] = false; + } + + _lookup[static_cast<unsigned char>(' ')] = true; + _lookup[static_cast<unsigned char>('\n')] = true; + _lookup[static_cast<unsigned char>('\t')] = true; + _lookup[static_cast<unsigned char>('\r')] = true; + _lookup[static_cast<unsigned char>('"')] = true; + _lookup[static_cast<unsigned char>('\'')] = true; + _lookup[static_cast<unsigned char>('`')] = true; + _lookup[static_cast<unsigned char>('_')] = true; + + } +} + +class LatinTokenizerTest : public Test +{ +private: + void TestSimple(); + void TestSimpleLength(); + void TestEnding(); + void TestEndingLength(); + void TestNull(); + void TestNullLength(); + void TestEmpty(); + void TestEmptyLength(); + void TestMapelURL(); + + template <typename IsSeparator, typename IsPunctuation> + void TestWord(Fast_LatinTokenizer<IsSeparator,IsPunctuation>* lt, + const char* correct, + bool punct = false) + { + typename Fast_LatinTokenizer<IsSeparator,IsPunctuation>::Fast_Token token; + _test(lt->MoreTokens()); + + token = lt->GetNextToken(); + char temp = *token.second; + *token.second = '\0'; + vespalib::string word = vespalib::make_string("%s", token.first); + *token.second = temp; + + PushDesc(vespalib::make_string("%s%s == %s", "word: ", word.c_str(), correct).c_str()); + + _test(word == correct); + + _test(token._punctuation == punct); + + PopDesc(); + } + + void TestTypeparamObservers(); + +public: + LatinTokenizerTest(); + ~LatinTokenizerTest(); + void Run() override; +}; + + +LatinTokenizerTest::LatinTokenizerTest() +{ + +} + + +LatinTokenizerTest::~LatinTokenizerTest() +{ + +} + + +void LatinTokenizerTest::TestSimple() +{ + PushDesc("Simple"); + + Fast_SimpleLatinTokenizer lt; + std::string s("This is. my . test String."); + lt.SetNewText(const_cast<char*>(s.c_str())); + + PushDesc("This"); + TestWord(<, "This"); + PopDesc(); + PushDesc("is"); + TestWord(<, "is"); + PopDesc(); + PushDesc("."); + TestWord(<, ".", true); + PopDesc(); + PushDesc("my"); + TestWord(<, "my"); + PopDesc(); + PushDesc("."); + TestWord(<, ".", true); + PopDesc(); + PushDesc("test"); + TestWord(<, "test"); + PopDesc(); + PushDesc("String"); + TestWord(<, "String"); + PopDesc(); + PushDesc("."); + TestWord(<, ".", true); + PopDesc(); + + _test(!lt.MoreTokens()); + + PopDesc(); +} + + + +void LatinTokenizerTest::TestSimpleLength() +{ + PushDesc("Simple"); + + Fast_SimpleLatinTokenizer lt; + std::string s("This is. my . test String."); + lt.SetNewText(const_cast<char*>(s.c_str()), + s.length()); + + PushDesc("This"); + TestWord(<, "This"); + PopDesc(); + PushDesc("is"); + TestWord(<, "is"); + PopDesc(); + PushDesc("."); + TestWord(<, ".", true); + PopDesc(); + PushDesc("my"); + TestWord(<, "my"); + PopDesc(); + PushDesc("."); + TestWord(<, ".", true); + PopDesc(); + PushDesc("test"); + TestWord(<, "test"); + PopDesc(); + PushDesc("String"); + TestWord(<, "String"); + PopDesc(); + PushDesc("."); + TestWord(<, ".", true); + PopDesc(); + + _test(!lt.MoreTokens()); + + PopDesc(); +} + + + +void LatinTokenizerTest::TestEnding() +{ + PushDesc("Ending\n"); + + std::string text("This is my test String "); + Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str())); + + TestWord(lt, "This"); + TestWord(lt, "is"); + TestWord(lt, "my"); + TestWord(lt, "test"); + TestWord(lt, "String"); + + _test(!lt->MoreTokens()); + + _test(text == lt->GetOriginalText()); + + delete lt; + + PopDesc(); +} + +void LatinTokenizerTest::TestEndingLength() +{ + PushDesc("Ending\n"); + + std::string text("This is my test String "); + Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()), + text.length()); + + TestWord(lt, "This"); + TestWord(lt, "is"); + TestWord(lt, "my"); + TestWord(lt, "test"); + TestWord(lt, "String"); + + _test(!lt->MoreTokens()); + + _test(text == std::string(lt->GetOriginalText())); + + delete lt; + + PopDesc(); +} + +void LatinTokenizerTest::TestNull() +{ + PushDesc("Null\n"); + + Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(NULL); + + _test(!lt->MoreTokens()); + + _test(lt->GetOriginalText() == NULL); + + delete lt; + + PopDesc(); +} + +void LatinTokenizerTest::TestNullLength() +{ + PushDesc("Null\n"); + + Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(NULL, 0); + + _test(!lt->MoreTokens()); + + _test(lt->GetOriginalText() == NULL); + + delete lt; + + PopDesc(); +} + +void LatinTokenizerTest::TestEmpty() +{ + PushDesc("Empty\n"); + + std::string text(" "); + Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str())); + + _test(!lt->MoreTokens()); + + delete lt; + + PopDesc(); +} + +void LatinTokenizerTest::TestEmptyLength() +{ + PushDesc("Empty\n"); + + std::string text(" "); + Fast_SimpleLatinTokenizer* lt = new Fast_SimpleLatinTokenizer(const_cast<char*>(text.c_str()), + text.length()); + + _test(!lt->MoreTokens()); + + delete lt; + + PopDesc(); +} + + +class TPS +{ +private: + TPS(const TPS &); + TPS& operator=(const TPS &); + +public: + TPS() : _myfunc(NULL) {} + void Init(int (*myfunc)(int c)) + { + _myfunc = myfunc; + } + + bool operator()(char c) + { +// LatinTokenizerTest::_test(_myfunc); + return (_myfunc(c) != 0); + } + +private: + int (*_myfunc)(int c); +}; + +void LatinTokenizerTest::TestTypeparamObservers() +{ + + typedef Fast_LatinTokenizer<TPS,TPS> MyTokenizer; + + PushDesc("TypeparamObservers\n"); + std::string text("4Some6text"); + MyTokenizer* tok = new MyTokenizer(const_cast<char*>(text.c_str())); + tok->GetIsPunctuation().Init(ispunct); + tok->GetIsSeparator().Init(isdigit); + + TestWord(tok,"Some"); + TestWord(tok,"text"); + _test(!tok->MoreTokens()); + PopDesc(); + + delete tok; +} + +void LatinTokenizerTest::TestMapelURL() +{ + + typedef Fast_LatinTokenizer<Maple_Space, Mapel_Pucntuation> MyTokenizer; + + PushDesc("MapelURL\n"); + std::string text("http://search.msn.co.uk/results.asp?q= cfg=SMCBROWSE rn=1825822 dp=1873075 v=166:"); + MyTokenizer* tok = new MyTokenizer(const_cast<char*>(text.c_str())); + + TestWord(tok,"http", false); + TestWord(tok,":", true); + TestWord(tok,"/", true); + TestWord(tok,"/", true); + TestWord(tok,"search", false); + TestWord(tok,".", true); + TestWord(tok,"msn", false); + TestWord(tok,".", true); + TestWord(tok,"co", false); + TestWord(tok,".", true); + TestWord(tok,"uk", false); + TestWord(tok,"/", true); + TestWord(tok,"results", false); + TestWord(tok,".", true); + TestWord(tok,"asp", false); + TestWord(tok,"?", true); + TestWord(tok,"q", false); + TestWord(tok,"=", true); + TestWord(tok,"cfg", false); + TestWord(tok,"=", true); + TestWord(tok,"SMCBROWSE", false); + TestWord(tok,"rn", false); + TestWord(tok,"=", true); + TestWord(tok,"1825822", false); + TestWord(tok,"dp", false); + TestWord(tok,"=", true); + TestWord(tok,"1873075", false); + TestWord(tok,"v", false); + TestWord(tok,"=", true); + TestWord(tok,"166", false); + TestWord(tok,":", true); + _test(!tok->MoreTokens()); + PopDesc(); + + delete tok; +} + + + +void LatinTokenizerTest::Run() +{ + TestSimple(); + TestSimpleLength(); + TestEnding(); + TestEndingLength(); + TestNull(); + TestNullLength(); + TestEmpty(); + TestEmptyLength(); + TestTypeparamObservers(); + TestMapelURL(); +} diff --git a/searchsummary/src/tests/juniper/matchobjectTest.h b/searchsummary/src/tests/juniper/matchobjectTest.h index 5bfd29a371f..9fdd3e4719f 100644 --- a/searchsummary/src/tests/juniper/matchobjectTest.h +++ b/searchsummary/src/tests/juniper/matchobjectTest.h @@ -5,7 +5,7 @@ #pragma once #include "testenv.h" -#include <vespa/fastlib/testsuite/test.h> +#include "test.h" #include <map> /** diff --git a/searchsummary/src/tests/juniper/mcandTest.h b/searchsummary/src/tests/juniper/mcandTest.h index cdb01e91e3b..6eb8b4d66e5 100644 --- a/searchsummary/src/tests/juniper/mcandTest.h +++ b/searchsummary/src/tests/juniper/mcandTest.h @@ -4,10 +4,10 @@ */ #pragma once -#include <map> -#include <vespa/fastlib/testsuite/test.h> #include "testenv.h" +#include "test.h" #include <vespa/juniper/mcand.h> +#include <map> /** * The MatchCandidateTest class holds diff --git a/searchsummary/src/tests/juniper/queryparserTest.h b/searchsummary/src/tests/juniper/queryparserTest.h index 7dc4dda63fa..803fbd4c999 100644 --- a/searchsummary/src/tests/juniper/queryparserTest.h +++ b/searchsummary/src/tests/juniper/queryparserTest.h @@ -5,9 +5,9 @@ #pragma once #include "testenv.h" +#include "test.h" #include <vespa/juniper/queryparser.h> #include <vespa/juniper/rewriter.h> -#include <vespa/fastlib/testsuite/test.h> #include <map> /** diff --git a/searchsummary/src/tests/juniper/suite.h b/searchsummary/src/tests/juniper/suite.h new file mode 100644 index 00000000000..fea685731ae --- /dev/null +++ b/searchsummary/src/tests/juniper/suite.h @@ -0,0 +1,264 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/************************************************************************** + * Author: Bård Kvalheim + * + * A test suite. Modified from the suite written by Chuck Allison. + * http://www.cuj.com/archive/1809/feature.html + * + * Licensed to Yahoo, and relicensed under the terms of the Apache 2.0 license + * + * The usage of suite is simple: + * + * mysuite.h: + * ----- + * + * #include <iosfwd> + * #include <vespa/fastlib/testsuite/suite.h> + * + * class MySuite : public Suite + * { + * public: + * MySuite() : + * Suite("My test suite. ", &cout) + * { + * AddTest(new MyTest1()); + * AddTest(new MyTest2()); + * } + * }; + * + * + * + * class MySuiteApp + * { + * public: + * int Main(); + * }; + * + * + * --- + * + * mysuite.cpp: + * ----- + * + * #include "mysuite.h" + * + * + * int MyTestApp::Main() { + * MyTestSuite mts; + * mts.Run(); + * mts.Report(); + * mts.Free(); + * } + * + * --- + * + **************************************************************************/ + +#pragma once + +#include "test.h" // includes <string>, <iosfwd> + + +#include <vector> + +#include <iostream> +#include <stdexcept> +#include <cassert> + + +namespace fast::testsuite { + +class TestSuiteError; + +class Suite +{ +public: + Suite(const std::string& name, std::ostream* osptr = 0); + + std::string GetName() const; + long GetNumPassed() const; + long GetNumFailed() const; + const std::ostream* GetStream() const; + void SetStream(std::ostream* osptr); + + void AddTest(Test* t); //throw (TestSuiteError); + void AddSuite(const Suite&); //throw(TestSuiteError); + void Run(); // Calls Test::run() repeatedly + long Report() const; + void Free(); // deletes tests + virtual ~Suite(void) { } + +private: + std::string m_name; + std::ostream* m_osptr; + std::vector<Test*> m_tests; + void Reset(); + int GetLongestName() const; + + // Disallowed ops: + Suite(const Suite&); + Suite& operator=(const Suite&); +}; + +inline +Suite::Suite(const std::string& name, std::ostream* osptr) + : m_name(name), + m_osptr(osptr), + m_tests() +{ +} + +inline +std::string Suite::GetName() const +{ + return m_name; +} + +inline +const std::ostream* Suite::GetStream() const +{ + return m_osptr; +} + +inline +void Suite::SetStream(std::ostream* osptr) +{ + m_osptr = osptr; +} + + +/*class TestSuiteError : public logic_error + { + public: + TestSuiteError(const std::string& s = "") + : logic_error(s) + {} + };*/ + +void Suite::AddTest(Test* t) //throw(TestSuiteError) +{ + // Make sure test has a stream: + if (t == 0) {} + //throw TestSuiteError("Null test in Suite::addTest"); + else if (m_osptr != 0 && t->GetStream() == 0) + t->SetStream(m_osptr); + + m_tests.push_back(t); + t->Reset(); +} + +void Suite::AddSuite(const Suite& s) //throw(TestSuiteError) +{ + for (size_t i = 0; i < s.m_tests.size(); ++i) + AddTest(s.m_tests[i]); +} + +void Suite::Free() +{ + // This is not a destructor because tests + // don't have to be on the heap. + for (size_t i = 0; i < m_tests.size(); ++i) + { + delete m_tests[i]; + m_tests[i] = 0; + } +} + +void Suite::Run() +{ + Reset(); + int longestName = GetLongestName(); + const char *nm; + int x = 0; + for (size_t i = 0; i < m_tests.size(); ++i) { + assert(m_tests[i]); + nm = m_tests[i]->get_name(); + if (nm) { + *m_osptr << std::endl << nm << ": "; + for (x = longestName - strlen(nm); x > 0; --x) + *m_osptr << ' '; + *m_osptr << std::flush; + } + m_tests[i]->Run(); + } +} + + +// Find the longest test name +int Suite::GetLongestName() const +{ + int longestName = 0, len = 0; + const char *nm; + for (size_t i = 0; i < m_tests.size(); ++i) { + assert(m_tests[i]); + nm = m_tests[i]->get_name(); + if ( nm != NULL && (len = strlen(nm)) > longestName ) + longestName = len; + } + return longestName; +} + +long Suite::Report() const +{ + if (m_osptr) { + int longestName = GetLongestName(); + int lineLength = longestName + 8 + 16 + 10; + long totFail = 0; + int x = 0; + *m_osptr << std::endl << std::endl + << "Suite \"" << m_name << "\"" << std::endl; + for (x = 0; x < lineLength; ++x) + *m_osptr << '='; + *m_osptr << "="; + + // Write the individual reports + for (size_t i = 0; i < m_tests.size(); ++i) { + assert(m_tests[i]); + const char *nm = m_tests[i]->get_name(); + totFail += m_tests[i]->Report(longestName - + (nm ? strlen(nm) : longestName)); + } + + for (x = 0; x < lineLength; ++x) + *m_osptr << '='; + *m_osptr << "=\n"; + return totFail; + } + else + return GetNumFailed(); +} + +long Suite::GetNumPassed() const +{ + long totPass = 0; + for (size_t i = 0; i < m_tests.size(); ++i) + { + assert(m_tests[i]); + totPass += m_tests[i]->GetNumPassed(); + } + return totPass; +} + +long Suite::GetNumFailed() const +{ + long totFail = 0; + for (size_t i = 0; i < m_tests.size(); ++i) + { + assert(m_tests[i]); + totFail += m_tests[i]->GetNumFailed(); + } + return totFail; +} + +void Suite::Reset() +{ + for (size_t i = 0; i < m_tests.size(); ++i) + { + assert(m_tests[i]); + m_tests[i]->Reset(); + } +} + +} + +using fast::testsuite::Suite; diff --git a/searchsummary/src/tests/juniper/test.cpp b/searchsummary/src/tests/juniper/test.cpp new file mode 100644 index 00000000000..18930b1bca2 --- /dev/null +++ b/searchsummary/src/tests/juniper/test.cpp @@ -0,0 +1,141 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "test.h" + +namespace fast::testsuite { + +Test::Test(std::ostream* osptr, const char*name) : + m_osptr(osptr), + name_(name), + m_nPass(0), + m_nFail(0), + m_index(0), + m_description() +{ + m_pchar[0]= '|'; + m_pchar[1]= '-'; +} + +Test::Test(const char*name) : + Test(nullptr, name) +{ +} + +const char *Test::get_name() const { + return (name_ == NULL) ? "Test " : name_; +} + +const std::string& Test::GetSourceDirectory() +{ + static const std::string srcDir = [] () { + std::string dir("."); + const char* env = getenv("SOURCE_DIRECTORY"); + if (env) { + dir = env; + } + if (*dir.rbegin() != '/') { + dir += "/"; + } + return dir; + } (); + return srcDir; +} + +long Test::GetNumPassed() const +{ + return m_nPass; +} + +long Test::GetNumFailed() const +{ + return m_nFail; +} + +const std::ostream* Test::GetStream() const +{ + return m_osptr; +} + +void Test::SetStream(std::ostream* osptr) +{ + m_osptr = osptr; +} + +void Test::_Succeed() +{ + ++m_nPass; +} + +void Test::Reset() +{ + m_nPass = m_nFail = 0; +} + +void Test::PushDesc(const std::string& desc) +{ + m_description.push_back(desc); +} + +void Test::PopDesc() +{ + m_description.pop_back(); +} + +size_t Test::print_desc() const +{ + std::copy(m_description.begin(), m_description.end(), + std::ostream_iterator<std::string>(*m_osptr)); + return m_description.size(); +} + +void Test::print_progress() { + ++m_index; + m_index = m_index % 2; + *m_osptr << '\b' <<'\b' <<'\b'; + *m_osptr <<' ' << m_pchar[m_index] << ' ' << std::flush; +} + +bool Test::do_fail(const std::string& lbl, const char* fname, long lineno, + bool addEndl) +{ + ++m_nFail; + if (m_osptr) { + *m_osptr << std::endl + << fname << ':' << lineno << ": " + << get_name() << " failure: (" << lbl << ")" + << std::endl; + if (addEndl && print_desc() > 0) + *m_osptr << std::endl << std::endl; + } + return false; +} + +bool Test::do_test(bool cond, const std::string& lbl, + const char* fname, long lineno) +{ + if (!cond) { + return do_fail(lbl, fname, lineno); + } + else { + _Succeed(); + print_progress(); + return true; + } +} + +long Test::Report(int padSpaces) const +{ + if (m_osptr) { + *m_osptr << std::endl << get_name(); + + // Pad the name with the given number of spaces + for (int i= 0; i < padSpaces; ++i) *m_osptr << ' '; + + *m_osptr << "\tPassed: " << m_nPass + << "\tFailed: " << m_nFail + << std::endl; + } + return m_nFail; +} + +} diff --git a/searchsummary/src/tests/juniper/test.h b/searchsummary/src/tests/juniper/test.h new file mode 100644 index 00000000000..1388c3ba812 --- /dev/null +++ b/searchsummary/src/tests/juniper/test.h @@ -0,0 +1,150 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/************************************************************************** + * Author: Bård Kvalheim + * + * The test class of the testsuite. Written by Chuck Allison. + * http://www.cuj.com/archive/1809/feature.html + * + * Apart for a trick the usage of the test class is very simple: + * + * mytest.h: + * ---- + * #include <iosfwd> + * #include <vespa/fastlib/testsuite/test.h> + * + * class MyTest : public Test + * { + * public: + * virtual void Run() { + * // do the tests _test is ok if the argument are true + * _test(expr); + * } + * + * }; + * + * class MyTestApp + * { + * public: + * int Main(); + * }; + * + * + * ---- + * + * + * mytest.cpp: + * ---- + * #include "mytest.h" + * + * int MyTestApp::Main() + * { + * MyTest mt; + * mt.SetStream(&std::cout); + * mt.Run(); + * mt.Report(); + * + * return 0; + * } + * + * + * ---- + * + * The trick is that the all the code except the main function is in + * the .h file. The reason for this is that it is simpler to integerate + * the single test into a suite of tests. + * + *************************************************************************/ + +#pragma once + +#include <string> +#include <iostream> +#include <typeinfo> +#include <vector> +#include <algorithm> +#include <iterator> + +// The following have underscores because they are macros +// (and it's impolite to usurp other users' functions!). +// For consistency, _succeed() also has an underscore. +#define _test(cond) do_test((cond), #cond, __FILE__, __LINE__) +#define _test_equal(lhs, rhs) \ + do_equality_test((lhs), (rhs), #lhs, __FILE__, __LINE__) +#define _fail(str) do_fail((str), __FILE__, __LINE__) + +namespace fast::testsuite { + +class Test +{ +public: + explicit Test(std::ostream* osptr = 0, const char *name = NULL); + explicit Test(const char *name); + virtual ~Test(){} + virtual void Run() = 0; + + const char *get_name() const; + static const std::string& GetSourceDirectory(); + long GetNumPassed() const; + long GetNumFailed() const; + const std::ostream* GetStream() const; + void SetStream(std::ostream* osptr); + + void _Succeed(); + long Report(int padSpaces = 1) const; + virtual void Reset(); + + void PushDesc(const std::string& desc); + void PopDesc(); + +protected: + std::ostream* m_osptr; + const char *name_; + + bool do_test(bool cond, const std::string& lbl, + const char* fname, long lineno); + bool do_fail(const std::string& lbl, const char* fname, long lineno, + bool addEndl = true); + template <typename t1, typename t2> + bool do_equality_test(const t1& lhs, const t2& rhs, + const char* lbl, const char* fname, long lineno); + virtual void print_progress(); + +private: + long m_nPass; + long m_nFail; + int m_index; + char m_pchar[4]; + + std::vector<std::string> m_description; + + size_t print_desc() const; + + // Disallowed: + Test(const Test&); + Test& operator=(const Test&); +}; + +template <typename t1, typename t2> +bool Test::do_equality_test(const t1& lhs, const t2& rhs, const char* lbl, + const char* fname, long lineno) +{ + if (lhs == rhs) { + _Succeed(); + print_progress(); + return true; + } + do_fail(std::string(lbl), fname, lineno, false); + if (m_osptr) { + *m_osptr << "Equality test failed: " + << "Expected '" << rhs + << "' got '" << lhs << "'" + << std::endl; + if (print_desc() > 0) + *m_osptr << std::endl << std::endl; + } + return false; +} + +} + +using fast::testsuite::Test; diff --git a/searchsummary/src/vespa/juniper/config.cpp b/searchsummary/src/vespa/juniper/config.cpp index b9213bb21f1..a82a8d74b8a 100644 --- a/searchsummary/src/vespa/juniper/config.cpp +++ b/searchsummary/src/vespa/juniper/config.cpp @@ -1,7 +1,6 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "config.h" -#include "IJuniperProperties.h" #include "rpinterface.h" #include "juniperdebug.h" #define _NEED_SUMMARY_CONFIG_IMPL @@ -14,7 +13,7 @@ namespace juniper Config::Config(const char* config_name, Juniper & juniper) : _docsumparams(), _matcherparams(), - _sumconf(NULL), + _sumconf(nullptr), _config_name(config_name), _juniper(juniper) { @@ -51,7 +50,7 @@ Config::Config(const char* config_name, Juniper & juniper) : .SetMaxMatches(max_matches) .SetSurroundMax(surround_max) .SetFallback(fallback); - _matcherparams.SetWantGlobalRank(true) + _matcherparams .SetStemMinLength(stem_min).SetStemMaxExtend(stem_extend) .SetMatchWindowSize(match_winsize) .SetMaxMatchCandidates(max_match_candidates) diff --git a/searchsummary/src/vespa/juniper/juniperparams.cpp b/searchsummary/src/vespa/juniper/juniperparams.cpp index e600c23f7c4..4f25b2446ad 100644 --- a/searchsummary/src/vespa/juniper/juniperparams.cpp +++ b/searchsummary/src/vespa/juniper/juniperparams.cpp @@ -1,7 +1,5 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "juniperdebug.h" #include "juniperparams.h" -#include "Matcher.h" #include <cstring> // DocsumParams implementation: @@ -9,7 +7,7 @@ DocsumParams::DocsumParams() : _enabled(false), _length(256), _min_length(128), _max_matches(3), - _surround_max(80), _space_chars(""), _fallback(FALLBACK_NONE) + _surround_max(80), _fallback(FALLBACK_NONE) { } DocsumParams& DocsumParams::SetEnabled(bool en) @@ -42,12 +40,6 @@ DocsumParams& DocsumParams::SetSurroundMax(size_t length) return *this; } -DocsumParams& DocsumParams::SetSpaceChars(const char* spacechars) -{ - _space_chars = spacechars; - return *this; -} - DocsumParams& DocsumParams::SetFallback(const char* fallback) { if (strcmp("prefix", fallback) == 0) { @@ -63,7 +55,6 @@ size_t DocsumParams::MinLength() const { return _min_length; } size_t DocsumParams::MaxMatches() const { return _max_matches; } size_t DocsumParams::SurroundMax() const { return _surround_max; } bool DocsumParams::Enabled() const { return _enabled; } -const char* DocsumParams::SpaceChars() const { return _space_chars.c_str(); } int DocsumParams::Fallback() const { return _fallback; } // MatcherParams implementation: @@ -71,54 +62,26 @@ int DocsumParams::Fallback() const { return _fallback; } MatcherParams::MatcherParams() : - _prefix_extend_length(3), - _prefix_min_length(5), _match_winsize(200), _match_winsize_fallback_multiplier(10.0), _max_match_candidates(1000), - _want_global_rank(false), _stem_min(0), _stem_extend(0), _wordfolder(NULL), _proximity_factor(1.0) { } -MatcherParams& MatcherParams::SetPrefixExtendLength(size_t extend_length) -{ - _prefix_extend_length = extend_length; - return *this; -} - -MatcherParams& MatcherParams::SetPrefixMinLength(size_t min_length) -{ - _prefix_min_length = min_length; - return *this; -} - - MatcherParams& MatcherParams::SetMatchWindowSize(size_t winsize) { _match_winsize = winsize; return *this; } -MatcherParams& MatcherParams::SetMatchWindowSizeFallbackMultiplier(double winsize) -{ - _match_winsize_fallback_multiplier = winsize; - return *this; -} - MatcherParams& MatcherParams::SetMaxMatchCandidates(size_t max_match_candidates) { _max_match_candidates = max_match_candidates; return *this; } -MatcherParams& MatcherParams::SetWantGlobalRank(bool global_rank) -{ - _want_global_rank = global_rank; - return *this; -} - MatcherParams& MatcherParams::SetStemMinLength(size_t stem_min) { _stem_min = stem_min; @@ -132,12 +95,9 @@ MatcherParams& MatcherParams::SetStemMaxExtend(size_t stem_extend) return *this; } -size_t MatcherParams::PrefixExtendLength() const { return _prefix_extend_length; } -size_t MatcherParams::PrefixMinLength() const { return _prefix_min_length; } size_t MatcherParams::MatchWindowSize() const { return _match_winsize; } double MatcherParams::MatchWindowSizeFallbackMultiplier() const { return _match_winsize_fallback_multiplier; } size_t MatcherParams::MaxMatchCandidates() const { return _max_match_candidates; } -bool MatcherParams::WantGlobalRank() const { return _want_global_rank; } size_t MatcherParams::StemMinLength() const { return _stem_min; } size_t MatcherParams::StemMaxExtend() const { return _stem_extend; } diff --git a/searchsummary/src/vespa/juniper/juniperparams.h b/searchsummary/src/vespa/juniper/juniperparams.h index 44980ce8b43..f4f17779f2d 100644 --- a/searchsummary/src/vespa/juniper/juniperparams.h +++ b/searchsummary/src/vespa/juniper/juniperparams.h @@ -31,9 +31,6 @@ public: DocsumParams& SetSurroundMax(size_t length); size_t SurroundMax() const; - DocsumParams& SetSpaceChars(const char* spacechars); - const char* SpaceChars() const; - DocsumParams& SetFallback(const char* fallback); int Fallback() const; @@ -43,7 +40,6 @@ private: size_t _min_length; size_t _max_matches; size_t _surround_max; - std::string _space_chars; int _fallback; }; @@ -52,25 +48,17 @@ class MatcherParams { public: MatcherParams(); - - MatcherParams& SetPrefixExtendLength(size_t extend_length); - size_t PrefixExtendLength() const; - - MatcherParams& SetPrefixMinLength(size_t min_length); - size_t PrefixMinLength() const; + MatcherParams(MatcherParams &) = delete; + MatcherParams &operator=(MatcherParams &) = delete; MatcherParams& SetMatchWindowSize(size_t winsize); size_t MatchWindowSize() const; - MatcherParams& SetMatchWindowSizeFallbackMultiplier(double winsize); double MatchWindowSizeFallbackMultiplier() const; MatcherParams& SetMaxMatchCandidates(size_t max_match_candidates); size_t MaxMatchCandidates() const; - MatcherParams& SetWantGlobalRank(bool global_rank); - bool WantGlobalRank() const; - MatcherParams& SetStemMinLength(size_t stem_min); size_t StemMinLength() const; @@ -84,19 +72,13 @@ public: double ProximityFactor(); private: - size_t _prefix_extend_length; - size_t _prefix_min_length; size_t _match_winsize; double _match_winsize_fallback_multiplier; size_t _max_match_candidates; - bool _want_global_rank; size_t _stem_min; size_t _stem_extend; Fast_WordFolder* _wordfolder; // The wordfolder object needed as 1st parameter to folderfun double _proximity_factor; - - MatcherParams(MatcherParams &); - MatcherParams &operator=(MatcherParams &); }; diff --git a/searchsummary/src/vespa/juniper/latintokenizer.h b/searchsummary/src/vespa/juniper/latintokenizer.h new file mode 100644 index 00000000000..7a98d780c56 --- /dev/null +++ b/searchsummary/src/vespa/juniper/latintokenizer.h @@ -0,0 +1,377 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** +***************************************************************************** +* @author Bård Kvalheim +* @date Creation date: 2001-12-07 +* +* A configurable tokenizer template that accepts two predicates: One to +* determine separator symbols and one to determine punctuation symbols. A +* typedef is defined that uses isspace/1 and ispunct/1. +* +* This tokenizer does not alter the text, and does not copy it. +* +* This tokenizer is not meant to be used as a real tokenizer for all +* languages. It is only a fast and simple latin tokenizer, intended for +* very basic applications. +* +* The tokens are returned as (char *, char *, bool) triples. The two +* first elements delimit the token string, while the third element is +* true if the token is a punctuation symbol. +* +* If the last character in the input text is a punctuation symbol, the last +* token is the following: +* +* text = " something bl bla ." +* +* token.first -> . +* token.second -> \0 +* token._punctuation = true; +* +* In other words, token.second can point to the terminating '\0' in the input +* text. +* +*****************************************************************************/ + +#pragma once + +#include <cctype> +#include <cstring> + +/** +***************************************************************************** +* A simple tokenizer. See description above. +* +* @class Fast_LatinTokenizer +* @author Bård Kvalheim +* @date Creation date: 2001-12-07 +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +class Fast_LatinTokenizer { +private: + Fast_LatinTokenizer(const Fast_LatinTokenizer &); + Fast_LatinTokenizer& operator=(const Fast_LatinTokenizer &); + +public: + + /** Helper class. */ + class Fast_Token { + public: + + /** Member variables. */ + char *first; // Points to start of token. Named 'first' for std::pair compatibility. + char *second; // Points to end of token. Named 'second' for std::pair compatibility. + bool _punctuation; // Is the token a punctuation symbol? + + /** Constructors. */ + Fast_Token(char *begin, char *end, bool punctuation) : first(begin), second(end), _punctuation(punctuation) {} + Fast_Token() : first(NULL), second(NULL), _punctuation(false) {} + Fast_Token(const Fast_Token &other) + : first(other.first), + second(other.second), + _punctuation(other._punctuation) + { + } + Fast_Token& operator=(const Fast_Token &other) + { + first = other.first; + second = other.second; + _punctuation = other._punctuation; + return *this; + } + + }; + + /** Constructors/destructor. */ + Fast_LatinTokenizer(); + explicit Fast_LatinTokenizer(char *text); + Fast_LatinTokenizer(char *text, size_t length); + virtual ~Fast_LatinTokenizer(); + + /** Constructors, sort of. */ + void SetNewText(char *text); + void SetNewText(char *text, size_t length); + + /** Are there any more tokens left? */ + bool MoreTokens(); + + /** Return next token. */ + Fast_Token GetNextToken(); + + /** Return text buffer. */ + char *GetOriginalText(); + + /** Observers in case we need not perform some action specific + * to the IsSeparator or IsPunctuation implementations + * (such as extra initialization or statistics gathering or...) + */ + IsPunctuation& GetIsPunctuation() { return _isPunctuation; } + IsSeparator& GetIsSeparator() { return _isSeparator; } + +private: + + /** Member variables. */ + char *_org; // Holds the original text buffer. + char *_next; // Points to the current buffer position. + char *_end; // Points to the end of the buffer. + bool _moreTokens; // More text to process? + IsSeparator _isSeparator; // Separator symbol predicate. + IsPunctuation _isPunctuation; // Punctuation symbol predicate. + + /** Helper methods. */ + void SkipBlanks(); + +}; + +/** +***************************************************************************** +* Default constructor. +* +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer() : + _org(NULL), + _next(NULL), + _end(NULL), + _moreTokens(false), + _isSeparator(), + _isPunctuation() +{ +} + +/** +***************************************************************************** +* Constructor. Accepts a '\0' terminated text buffer. +* +* @param text +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer(char *text) : + _org(NULL), + _next(NULL), + _end(NULL), + _moreTokens(false), + _isSeparator(), + _isPunctuation() +{ + SetNewText(text); +} + +/** +***************************************************************************** +* Constructor. Accepts a text buffer and the buffer length +* +* @param text +* @param length +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_LatinTokenizer(char *text, size_t length) + : _org(NULL), + _next(NULL), + _end(NULL), + _moreTokens(false), + _isSeparator(), + _isPunctuation() +{ + SetNewText(text, length); +} + +/** +***************************************************************************** +* Destructor. +* +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::~Fast_LatinTokenizer() { +} + +/** +***************************************************************************** +* Sets a new '\0' terminated string. +* +* @param text +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +void +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SetNewText(char *text) { + + _org = text; + _next = text; + _moreTokens = text != NULL; + _end = NULL; +} + +/** +***************************************************************************** +* Sets a new string, given the text buffer and its length. +* +* @param text +* @param length +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +void +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SetNewText(char *text, size_t length) { + + _org = text; + _next = text; + _moreTokens = text != NULL; + _end = (_next ? _next + length : NULL); +} + +/** +***************************************************************************** +* Skips all blanks and flags if there are more tokens. +* +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +void +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::SkipBlanks() { + + if (!_moreTokens) return; + // Initialized with '\0' terminated buffer? + if (_end == NULL) { + while (*_next != '\0' && _isSeparator(*_next)) { + ++_next; + } + if (*_next == '\0') { + _moreTokens = false; + } + } + + // Initialized with specified buffer length. + else { + while (_next != _end && _isSeparator(*_next)) { + ++_next; + } + if (_next == _end) { + _moreTokens = false; + } + } + +} + +/** +***************************************************************************** +* Returns true if there are more tokens left in the text buffer. +* +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +bool +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::MoreTokens() { + SkipBlanks(); + return _moreTokens; +} + +/** +***************************************************************************** +* Returns the next token as a Fast_Token. +* +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +typename Fast_LatinTokenizer<IsSeparator, IsPunctuation>::Fast_Token +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::GetNextToken() { + + char *prev = _next; + + // Skip all blanks and flag if there are no more tokens. + SkipBlanks(); + + // Initialized with '\0' terminated buffer? Find the next blank or punctuation. + if (_end == NULL) { + while (*_next != '\0' && !_isSeparator(*_next) && !_isPunctuation(*_next)) { + ++_next; + } + + // Initialized with specified buffer length. + } else { + while (_next != _end && !_isSeparator(*_next) && !_isPunctuation(*_next)) { + ++_next; + } + } + + // Check if this token is a punctuation symbol, and generate token. + bool isToken = ((_next - prev == 0) && _isPunctuation(*prev)); + + if (isToken) { + ++_next; + } + + Fast_Token token(prev, _next, isToken); + + return token; + +} + +/** +***************************************************************************** +* Returns the original text buffer. +* +* @author Bård Kvalheim +*****************************************************************************/ + +template <typename IsSeparator, typename IsPunctuation> +char * +Fast_LatinTokenizer<IsSeparator, IsPunctuation>::GetOriginalText() { + return _org; +} + +/** +***************************************************************************** +* Helper class. +* +* When using isspace/1, ensure that the argument is cast to unsigned char to +* avoid problems with sign extension. See system documentation for details. +* +* @class Fast_IsSpace +* @author Bård Kvalheim +* @date Creation date: 2001-12-07 +*****************************************************************************/ + +struct Fast_IsSpace { + bool operator()(char c) {return (isspace(static_cast<unsigned char>(c)) != 0);} +}; + +/** +***************************************************************************** +* Helper class. +* +* When using ispunct/1, ensure that the argument is cast to unsigned char to +* avoid problems with sign extension. See system documentation for details. +* +* @class Fast_IsPunctuation +* @author Bård Kvalheim +* @date Creation date: 2001-12-07 +*****************************************************************************/ + +struct Fast_IsPunctuation { + bool operator()(char c) {return (ispunct(static_cast<unsigned char>(c)) != 0);} +}; + +/** +***************************************************************************** +* A simple tokenizer. See description above. +* +* @class Fast_SimpleLatinTokenizer +* @author Bård Kvalheim +* @date Creation date: 2001-12-07 +*****************************************************************************/ + +typedef Fast_LatinTokenizer<Fast_IsSpace, Fast_IsPunctuation> Fast_SimpleLatinTokenizer; diff --git a/searchsummary/src/vespa/juniper/queryparser.h b/searchsummary/src/vespa/juniper/queryparser.h index 5715daa3661..9c596892e31 100644 --- a/searchsummary/src/vespa/juniper/queryparser.h +++ b/searchsummary/src/vespa/juniper/queryparser.h @@ -1,13 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once - /* Simple prefix syntax advanced query parser for Juniper debug/testing */ #include "query.h" +#include "latintokenizer.h" #include <map> #include <string> -#include <vespa/fastlib/text/latintokenizer.h> namespace juniper { diff --git a/searchsummary/src/vespa/juniper/result.cpp b/searchsummary/src/vespa/juniper/result.cpp index 653e692e015..15ad9aa2a98 100644 --- a/searchsummary/src/vespa/juniper/result.cpp +++ b/searchsummary/src/vespa/juniper/result.cpp @@ -80,12 +80,12 @@ Result::Result(Config* config, QueryHandle* qhandle, } /* Create the new pipeline */ - _tokenizer.reset(new JuniperTokenizer(wordfolder, NULL, 0, NULL)); + _tokenizer = std::make_unique<JuniperTokenizer>(wordfolder, nullptr, 0, nullptr, nullptr); - _matcher.reset(new Matcher(this)); + _matcher = std::make_unique<Matcher>(this); _matcher->SetProximityFactor(mp.ProximityFactor()); - _registry.reset(new SpecialTokenRegistry(_matcher->getQuery())); + _registry = std::make_unique<SpecialTokenRegistry>(_matcher->getQuery()); if (qhandle->_log_mask) _matcher->set_log(qhandle->_log_mask); diff --git a/searchsummary/src/vespa/juniper/rpinterface.cpp b/searchsummary/src/vespa/juniper/rpinterface.cpp index 75a441fb957..f9e91073a9b 100644 --- a/searchsummary/src/vespa/juniper/rpinterface.cpp +++ b/searchsummary/src/vespa/juniper/rpinterface.cpp @@ -94,12 +94,6 @@ void Juniper::FlushRewriters() _modifier->FlushRewriters(); } -void ReleaseConfig(Config*& config) -{ - delete config; - config = NULL; -} - void ReleaseQueryHandle(QueryHandle*& handle) { |