diff options
Diffstat (limited to 'searchsummary/src/tests')
24 files changed, 3391 insertions, 0 deletions
diff --git a/searchsummary/src/tests/juniper/.gitignore b/searchsummary/src/tests/juniper/.gitignore new file mode 100644 index 00000000000..46b307da632 --- /dev/null +++ b/searchsummary/src/tests/juniper/.gitignore @@ -0,0 +1,16 @@ +*.log +*Suite +*Test +*suite +*test +.depend +Makefile +dummylib +semantic.cache +juniper_appender_test_app +juniper_queryvisitor_test_app +juniper_SrcTestSuite_app +juniper_auxTest_app +juniper_matchobjectTest_app +juniper_mcandTest_app +juniper_queryparserTest_app diff --git a/searchsummary/src/tests/juniper/CMakeLists.txt b/searchsummary/src/tests/juniper/CMakeLists.txt new file mode 100644 index 00000000000..d15e91f1f63 --- /dev/null +++ b/searchsummary/src/tests/juniper/CMakeLists.txt @@ -0,0 +1,73 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(juniper_mcandTest_app TEST + SOURCES + mcandTest.cpp + mcandTestApp.cpp + testenv.cpp + DEPENDS + searchsummary + fastlib_fast_testsuite +) +vespa_add_test(NAME juniper_mcandTest_app COMMAND juniper_mcandTest_app) +vespa_add_executable(juniper_queryparserTest_app TEST + SOURCES + queryparserTest.cpp + queryparserTestApp.cpp + fakerewriter.cpp + testenv.cpp + DEPENDS + searchsummary + fastlib_fast_testsuite +) +vespa_add_test(NAME juniper_queryparserTest_app COMMAND juniper_queryparserTest_app) +vespa_add_executable(juniper_matchobjectTest_app TEST + SOURCES + matchobjectTest.cpp + matchobjectTestApp.cpp + testenv.cpp + fakerewriter.cpp + DEPENDS + searchsummary + fastlib_fast_testsuite +) +vespa_add_test(NAME juniper_matchobjectTest_app COMMAND juniper_matchobjectTest_app) +vespa_add_executable(juniper_appender_test_app TEST + SOURCES + appender_test.cpp + DEPENDS + searchsummary + fastlib_fast_testsuite +) +vespa_add_test(NAME juniper_appender_test_app COMMAND juniper_appender_test_app) +vespa_add_executable(juniper_queryvisitor_test_app TEST + SOURCES + queryvisitor_test.cpp + DEPENDS + searchsummary + fastlib_fast_testsuite +) +vespa_add_test(NAME juniper_queryvisitor_test_app COMMAND juniper_queryvisitor_test_app) +vespa_add_executable(juniper_auxTest_app TEST + SOURCES + auxTest.cpp + auxTestApp.cpp + testenv.cpp + DEPENDS + searchsummary + fastlib_fast_testsuite +) +vespa_add_test(NAME juniper_auxTest_app COMMAND juniper_auxTest_app) +vespa_add_executable(juniper_SrcTestSuite_app TEST + SOURCES + mcandTest.cpp + queryparserTest.cpp + fakerewriter.cpp + SrcTestSuite.cpp + matchobjectTest.cpp + auxTest.cpp + testenv.cpp + DEPENDS + searchsummary + fastlib_fast_testsuite +) +vespa_add_test(NAME juniper_SrcTestSuite_app COMMAND juniper_SrcTestSuite_app) diff --git a/searchsummary/src/tests/juniper/SrcTestSuite.cpp b/searchsummary/src/tests/juniper/SrcTestSuite.cpp new file mode 100644 index 00000000000..c1e4dc2cd19 --- /dev/null +++ b/searchsummary/src/tests/juniper/SrcTestSuite.cpp @@ -0,0 +1,38 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "testenv.h" +#include "mcandTest.h" +#include "queryparserTest.h" +#include "matchobjectTest.h" +#include "auxTest.h" +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/fastlib/testsuite/suite.h> +/** + * The SrcTestSuite class runs all the unit tests for the src module. + * + * @author Knut Omang + */ +class SrcTestSuite : public Suite { + +public: + SrcTestSuite(); +}; + +SrcTestSuite::SrcTestSuite() : + Suite("SrcTestSuite", &std::cout) +{ + // All tests for this module + AddTest(new MatchCandidateTest()); + AddTest(new MatchObjectTest()); + AddTest(new QueryParserTest()); + AddTest(new AuxTest()); +} + +int main(int argc, char **argv) { + juniper::TestEnv te(argc, argv, TEST_PATH("./testclient.rc").c_str()); + SrcTestSuite suite; + suite.Run(); + long failures = suite.Report(); + suite.Free(); + return (int)failures; +} diff --git a/searchsummary/src/tests/juniper/appender_test.cpp b/searchsummary/src/tests/juniper/appender_test.cpp new file mode 100644 index 00000000000..65e876f1a35 --- /dev/null +++ b/searchsummary/src/tests/juniper/appender_test.cpp @@ -0,0 +1,59 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/testapp.h> + +#define _NEED_SUMMARY_CONFIG_IMPL +#include <vespa/juniper/SummaryConfig.h> +#include <vespa/juniper/juniperdebug.h> +#include <vespa/juniper/appender.h> +#include <vespa/vespalib/stllike/string.h> +#include <vector> + +using namespace juniper; + +struct FixtureBase +{ + const char *_connectors; + SummaryConfig _cfg; + Appender _appender; + FixtureBase(ConfigFlag preserve_white_space) + : _connectors(""), + _cfg("[on]", "[off]", "[dots]", "\x1f", + reinterpret_cast<const unsigned char*>(_connectors), + ConfigFlag::CF_OFF, + preserve_white_space), + _appender(&_cfg) + { + } + void assertString(const vespalib::string &input, const vespalib::string &output) { + std::vector<char> buf; + _appender.append(buf, input.c_str(), input.size()); + EXPECT_EQUAL(output, vespalib::string(&buf[0], buf.size())); + } +}; + +struct DefaultFixture : public FixtureBase +{ + DefaultFixture() : FixtureBase(ConfigFlag::CF_OFF) {} +}; + +struct PreserveFixture : public FixtureBase +{ + PreserveFixture() : FixtureBase(ConfigFlag::CF_ON) {} +}; + +TEST_F("requireThatMultipleWhiteSpacesAreEliminated", DefaultFixture) +{ + f.assertString("text with\nwhite \nspace like this", + "text with white space like this"); +} + +TEST_F("requireThatMultipleWhiteSpacesArePreserved", PreserveFixture) +{ + f.assertString("text with\nwhite \nspace like this", + "text with\nwhite \nspace like this"); +} + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchsummary/src/tests/juniper/auxTest.cpp b/searchsummary/src/tests/juniper/auxTest.cpp new file mode 100644 index 00000000000..15f5ad1749e --- /dev/null +++ b/searchsummary/src/tests/juniper/auxTest.cpp @@ -0,0 +1,931 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "auxTest.h" +#include <vespa/fastos/file.h> +#include <vespa/log/log.h> +LOG_SETUP(".auxtest"); + +// Using separator definitions only from here: + +#define COLOR_HIGH_ON "\e[1;31m" +#define COLOR_HIGH_OFF "\e[0m" + +#ifndef FASTOS_DEBUG +static int debug_level = 0; +#endif + +bool color_highlight = false; +bool verbose = false; +const unsigned char* connectors = reinterpret_cast<const unsigned char*>("-'"); + +using juniper::SpecialTokenRegistry; + +AuxTest::AuxTest() : Test("Auxiliary"), test_methods_(), _sumconf(0) +{ + init(); +} + +AuxTest::~AuxTest() +{ + DeleteSummaryConfig(_sumconf); +} + + +void AuxTest::init() +{ + test_methods_["TestExample"] = + &AuxTest::TestExample; + test_methods_["TestPropertyMap"] = + &AuxTest::TestPropertyMap; + test_methods_["TestRerase"] = + &AuxTest::TestRerase; + test_methods_["TestUTF811"] = + &AuxTest::TestUTF811; + test_methods_["TestUTF812"] = + &AuxTest::TestUTF812; + test_methods_["TestDoubleWidth"] = + &AuxTest::TestDoubleWidth; + test_methods_["TestPartialUTF8"] = + &AuxTest::TestPartialUTF8; + test_methods_["TestLargeBlockChinese"] = + &AuxTest::TestLargeBlockChinese; + test_methods_["TestUTF8context"] = + &AuxTest::TestUTF8context; + test_methods_["TestJapanese"] = + &AuxTest::TestJapanese; + test_methods_["TestStartHits"] = + &AuxTest::TestStartHits; + test_methods_["TestEndHit"] = + &AuxTest::TestEndHit; + test_methods_["TestJuniperStack"] = + &AuxTest::TestJuniperStack; + test_methods_["TestSpecialTokenRegistry"] = + &AuxTest::TestSpecialTokenRegistry; + test_methods_["TestWhiteSpacePreserved"] = + &AuxTest::TestWhiteSpacePreserved; +} + + +// needed closures + +void AuxTest::TestUTF811() +{ + TestUTF8(11); +} + +void AuxTest::TestUTF812() +{ + TestUTF8(12); +} + + +int +countBrokenUTF8(const char *data, uint32_t len) +{ + int broken = 0; + int remain = 0; + + for (uint32_t i = 0; i < len; ++i) { + unsigned char val = data[i]; + switch (val & 0xc0) { + case 0xc0: // first char + remain = 1; + val <<= 2; + while ((val & 0x80) != 0) { + ++remain; + val <<= 1; + } + if (remain > 5) { + ++broken; + remain = 0; + } + break; + case 0x80: // continuation char + if (remain == 0) { + ++broken; + } else { + --remain; + } + break; + default: // single char + if (remain > 0) { + ++broken; + remain = 0; + } + break; + } + } + return broken; +} + +void +AuxTest::TestDoubleWidth() +{ + char input[17] = + "[\x1f\xef\xbd\x93\xef\xbd\x8f\xef\xbd\x8e\xef\xbd\x99\x1f]"; + + juniper::PropertyMap myprops; + myprops // no fallback, should get match + .set("juniper.dynsum.escape_markup", "off") + .set("juniper.dynsum.highlight_off", "</hi>") + .set("juniper.dynsum.continuation", "<sep />") + .set("juniper.dynsum.highlight_on", "<hi>"); + Fast_NormalizeWordFolder wf; + juniper::Juniper juniper(&myprops, &wf); + juniper::Config myConfig("best", juniper); + + juniper::QueryParser q("\xef\xbd\x93\xef\xbd\x8f\xef\xbd\x8e\xef\xbd\x99"); + juniper::QueryHandle qh(q, nullptr, juniper.getModifier()); + juniper::Result* res = juniper::Analyse(&myConfig, &qh, + input, 17, 0, 0, 0); + _test(res != nullptr); + + juniper::Summary* sum = juniper::GetTeaser(res, nullptr); + (void) sum; + // this should work + // _test(sum->Length() != 0); + juniper::ReleaseResult(res); +} + + + +void +AuxTest::TestPartialUTF8() +{ + const int inputSize = 5769; // NB: update this if input is changed + char input[inputSize]; + { + FastOS_File file((GetSourceDirectory() + "partialutf8.input.utf8").c_str()); + _test(file.OpenReadOnly()); + _test(file.GetSize() == inputSize); + _test(file.Read(input, inputSize)); + _test(countBrokenUTF8(input, inputSize) == 0); + } + + juniper::PropertyMap myprops; + myprops // config taken from vespa test case + .set("juniper.dynsum.escape_markup", "off") + .set("juniper.dynsum.highlight_off", "") + .set("juniper.dynsum.continuation", "") + .set("juniper.dynsum.fallback", "prefix") + .set("juniper.dynsum.highlight_on", ""); + Fast_NormalizeWordFolder wf; + juniper::Juniper juniper(&myprops, &wf); + juniper::Config myConfig("best", juniper); + + juniper::QueryParser q("ipod"); + juniper::QueryHandle qh(q, nullptr, juniper.getModifier()); + juniper::Result* res = juniper::Analyse(&myConfig, &qh, + input, inputSize, 0, 0, 0); + _test(res != nullptr); + + juniper::Summary* sum = juniper::GetTeaser(res, nullptr); + _test(sum->Length() != 0); + + // check for partial/broken utf-8 + _test(countBrokenUTF8(sum->Text(), sum->Length()) == 0); + + juniper::ReleaseResult(res); +} + +void AuxTest::TestLargeBlockChinese() +{ + const int inputSize = 10410; // NB: update this if input is changed + char input[inputSize]; + { + FastOS_File file((GetSourceDirectory() + "largeblockchinese.input.utf8").c_str()); + _test(file.OpenReadOnly()); + _test(file.GetSize() == inputSize); + _test(file.Read(input, inputSize)); + _test(countBrokenUTF8(input, inputSize) == 0); + } + + juniper::PropertyMap myprops; + myprops // config taken from reported bug + .set("juniper.dynsum.length", "50") + .set("juniper.dynsum.min_length", "20") + .set("juniper.dynsum.escape_markup", "off") + .set("juniper.dynsum.highlight_off", "") + .set("juniper.dynsum.continuation", "") + .set("juniper.dynsum.fallback", "prefix") + .set("juniper.dynsum.highlight_on", ""); + Fast_NormalizeWordFolder wf; + juniper::Juniper juniper(&myprops, &wf); + juniper::Config myConfig("best", juniper); + + juniper::QueryParser q("希望"); + juniper::QueryHandle qh(q, nullptr, juniper.getModifier()); + juniper::Result* res = juniper::Analyse(&myConfig, &qh, + input, inputSize, 0, 0, 0); + _test(res != nullptr); + + juniper::Summary* sum = juniper::GetTeaser(res, nullptr); + _test(sum->Length() != 0); + + // check that the entire block of chinese data is not returned in the summary + _test(sum->Length() < 100); + + // check for partial/broken utf-8 + _test(countBrokenUTF8(sum->Text(), sum->Length()) == 0); + + juniper::ReleaseResult(res); +} + +void AuxTest::TestExample() +{ + juniper::QueryParser q("AND(consume,sleep,tree)"); + juniper::QueryHandle qh(q, nullptr, juniper::_Juniper->getModifier()); + + // some content + const char* content = "the monkey consumes bananas and sleeps afterwards." + "&%#%&! cries the sleepy monkey and jumps down from the tree." + "the last token here is split across lines consumed"; + int content_len = strlen(content); + juniper::Result* res = + juniper::Analyse(juniper::TestConfig, + &qh, + content, content_len, + 0, 0, 0); + _test(res != nullptr); + + res->Scan(); + Matcher& m = *res->_matcher; + _test(m.TotalMatchCnt(0) == 2 && m.ExactMatchCnt(0) == 0); + juniper::ReleaseResult(res); +} + + +void +AuxTest::TestPropertyMap() +{ + juniper::PropertyMap map; + IJuniperProperties *props = ↦ + map.set("foo", "bar").set("one", "two"); + _test(props->GetProperty("bogus") == nullptr); + _test(strcmp(props->GetProperty("bogus", "default"), "default") == 0); + _test(strcmp(props->GetProperty("foo"), "bar") == 0); + _test(strcmp(props->GetProperty("one", "default"), "two") == 0); +} + + +void AuxTest::TestRerase() +{ + std::list<int> ls; + + for (int i = 0; i < 10; i++) + ls.push_back(i); + + for (std::list<int>::reverse_iterator rit = ls.rbegin(); + rit != ls.rend();) + { + if (*rit == 5 || *rit == 6) + { + // STL hackers heaven - puh this was cumbersome.. + std::list<int>::reverse_iterator new_it(ls.erase((++rit).base())); + rit = new_it; + } + else + ++rit; + } + + std::string s; + for (std::list<int>::iterator it = ls.begin(); + it != ls.end(); ++it) + s += ('0' + *it); + _test(s == std::string("01234789")); +} + +// Debug dump with positions for reference +void test_dump(const char* s, unsigned int len) +{ + printf("test_dump: length %u\n", len); + for (unsigned int i = 0; i < len;) + { + unsigned int start = i; + for (; i < len;) + { + if ((signed char) s[i] < 0) { + printf("�"); + } else { + printf("%c", s[i]); + } + i++; + if (!(i % 100)) break; + } + printf("\n"); + i = start + 10; + for (; i < len && i % 100; i+= 10) + printf("%7s%3d", "", i); + printf("\n"); + } +} + +namespace { + +#if defined(__cpp_char8_t) +const char * +char_from_u8(const char8_t * p) { + return reinterpret_cast<const char *>(p); +} +#else +const char * +char_from_u8(const char * p) { + return p; +} +#endif + +} + +void AuxTest::TestUTF8(unsigned int size) +{ + const char* s = char_from_u8(u8"\u00e5pent s\u00f8k\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5"); + const unsigned char* p = (const unsigned char*)s; + + int moved = 0; + for (int i = 0; i < (int)size + 2; i++) + { + // Forward tests: + p = (const unsigned char*)(s + i); + moved = Fast_UnicodeUtil::UTF8move((const unsigned char*)s, size, p, +1); + LOG(spam, "forw. moved %d, pos %d", moved, i); + if (i == 0 || i == 8) + _test(moved == 2); + else if (i >= (int)size) + _test(moved == -1); + else + _test(moved == 1); + + // backward tests + p = (const unsigned char*)(s + i); + moved = Fast_UnicodeUtil::UTF8move((const unsigned char*)s, size, p, -1); + LOG(spam, "backw.moved %d, pos %d", moved, i); + if (i == 10 || i == 9 || i == 2) + _test(moved == 2); + else if (i == 0 || i > (int)size) + _test(moved == -1); + else + _test(moved == 1); + + // move-to-start tests: + p = (const unsigned char*)(s + i); + moved = Fast_UnicodeUtil::UTF8move((const unsigned char*)s, size, p, 0); + LOG(spam, "to-start.moved %d, pos %d", moved, i); + if (i == 9 || i == 1) + _test(moved == 1); + else if (i >= (int)size) + _test(moved == -1); + else + _test(moved == 0); + } + + // Assumption about equality of UCS4 IsWordChar and isalnum for + // ascii (c < 128) : + for (unsigned char c = 0; c < 128; c++) + { + const unsigned char* pc = &c; + ucs4_t u = Fast_UnicodeUtil::GetUTF8Char(pc); + bool utf8res = Fast_UnicodeUtil::IsWordChar(u); + bool asciires = isalnum(c); + _test(utf8res == asciires); + if (utf8res != asciires) + fprintf(stderr, ":%c:%d != :%c:%d\n", u, utf8res, c, asciires); + } +} + + +void AuxTest::TestUTF8context() +{ + const char* iso_cont = char_from_u8(u8"AND(m\u00b5ss,fast,s\u00f8kemotor,\u00e5relang)"); + juniper::QueryParser q(iso_cont); + juniper::QueryHandle qh(q, nullptr, juniper::_Juniper->getModifier()); + + // some content + std::string s(char_from_u8(u8"Fast leverer s\u00d8kemotorer og andre nyttige ting for \u00e5 finne frem p\u00e5 ")); + s.append(char_from_u8(u8"internett. Teknologien er basert p\u00e5 \u00c5relang")); + s += UNIT_SEPARATOR; + s.append(char_from_u8(u8"norsk innsats og forskning i")); + s += GROUP_SEPARATOR; + s.append(char_from_u8(u8"trondheimsmilj\u00f8et. M\u00b5ss med denne nye funksjonaliteten for \u00e5 vise frem")); + s += UNIT_SEPARATOR; + s.append(char_from_u8(u8" beste forekomst av s\u00f8ket med s\u00f8kemotor til brukeren blir det enda bedre. ")); + s.append(char_from_u8(u8"Hvis bare UTF8-kodingen virker som den skal for tegn som tar mer enn \u00e9n byte.")); + + juniper::Result* res = juniper::Analyse(juniper::TestConfig, &qh, s.c_str(), s.size(), 0, 0, 0); + _test(res != nullptr); + + size_t charsize; + Matcher& m = *res->_matcher; + + res->Scan(); + _test(m.TotalMatchCnt(0) == 1 && m.ExactMatchCnt(0) == 1); + _test(m.TotalMatchCnt(1) == 1 && m.ExactMatchCnt(2) == 1); + _test(m.TotalMatchCnt(2) == 2 && m.ExactMatchCnt(2) == 1); + _test(m.TotalMatchCnt(3) == 1 && m.ExactMatchCnt(2) == 1); + + char separators[3]; + separators[0] = UNIT_SEPARATOR; + separators[1] = GROUP_SEPARATOR; + separators[2] = '\0'; + + if (color_highlight) + _sumconf = CreateSummaryConfig(COLOR_HIGH_ON, COLOR_HIGH_OFF, "...", separators, connectors); + else + _sumconf = CreateSummaryConfig("<hit>", "</hit>", "...", separators, connectors); + for (int i = 1; i <= 10; i++) + { + // Short summaries with many matches + test_summary(m, s.c_str(), s.size(), i*30, i / 3, i*10, charsize); + // fewer matches, longer summaries + test_summary(m, s.c_str(), s.size(), i*60, i / 6, i*20, charsize); + } + // Summary som er stort nok til � ta hele teksten + test_summary(m, s.c_str(), s.size(), 800, 100, 300, charsize); + // fprintf(stderr, "charsize %d s.size %d\n", charsize, s.size()); + _test(charsize == s.size() - 3 - 11); // Subtract eliminated separators and dual bytes + + // "Syke" settinger for summary: + test_summary(m, s.c_str(), s.size(), 10000, 0, 1000, charsize); + // fprintf(stderr, "charsize %d s.size %d\n", charsize, s.size()); + _test(charsize == s.size() - 3 - 11); // Subtract eliminated separators and dual bytes + + if (GetNumFailed() > 0 && debug_level > 0) + { + fprintf(stderr, "Characters in original text: %ld\n", s.size()); + test_dump(s.c_str(), s.size()); + m.dump_statistics(); + } + juniper::ReleaseResult(res); +} + + +struct TermTextPair +{ + const char* term; + const char* text; +}; + +static TermTextPair testjap[] = +{ + // japanese string as term + { "私はガラスを食べられます", + "this is some japanese: 私はガラスを食べられます。それは私を傷つけません。 ending here" }, + + // HUGE japanese prefix and postfix and simple match in middle: + { "bond", + "私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。 bond 私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。私はガラスを食べられます。それは私を傷つけません。" }, + { "japanese", "Simple。match。check。for。japanese。sep" }, + { "hit", " -. hit at start" }, + { "hit", "hit at end .,: " }, + { "hit", "---------------------------------------------------------------------------------------------------------------------this is a text that is long enough to generate a hit that does have dots on both sides ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; " }, + { nullptr, nullptr } +}; + + +void AuxTest::TestJapanese() +{ + for (int i = 0; testjap[i].term != nullptr; i++) + { + const char* qstr = testjap[i].term; + juniper::QueryParser q(qstr); + juniper::QueryHandle qh(q, nullptr, juniper::_Juniper->getModifier()); + + const char* content = testjap[i].text; + int content_len = strlen(content); + juniper::Result* res = juniper::Analyse(juniper::TestConfig, &qh, + content, content_len, + 0, 0, 0); + _test(res != nullptr); + + size_t charsize; + Matcher& m = *res->_matcher; + + res->Scan(); + if (color_highlight) + _sumconf = CreateSummaryConfig(COLOR_HIGH_ON, COLOR_HIGH_OFF, "...", "", connectors); + else + _sumconf = CreateSummaryConfig("<hit>", "</hit>", "...", "", connectors); + + SummaryDesc* sumdesc = m.CreateSummaryDesc(256, 256, 4, 80); + _test(sumdesc != nullptr); + if (!sumdesc) + return; + std::string sum = BuildSummary(content, content_len, sumdesc, _sumconf, charsize); + + switch (i) + { + case 0: + // Matching a multibyte sequence + _test(m.TotalMatchCnt(0) == 1 && m.ExactMatchCnt(0) == 1); + // printf("total %d exact %d\n", m.TotalMatchCnt(0),m.ExactMatchCnt(0)); + break; + case 1: + // Matching short word in loong multibyte sequence + _test(m.TotalMatchCnt(0) == 1 && m.ExactMatchCnt(0) == 1); + _test(sum.size() <= 400); + break; + case 2: + // Matching word in between multibyte separators + _test(m.TotalMatchCnt(0) == 1 && m.ExactMatchCnt(0) == 1); + break; + case 3: + // Check that result is the complete string (markup excluded) + _test(sum.size() - 11 == charsize); + // printf("sz %d charsz %d :%s:\n", sum.size(), charsize, sum.c_str()); + break; + case 4: + // Check that result is the complete string (markup excluded) + _test(sum.size() - 11 == charsize); + // printf("sz %d charsz %d :%s:\n", sum.size(), charsize, sum.c_str()); + break; + case 5: + // Check that we get no noise at the start or end of this + _test(sum.size() == 103 && charsize == 86); + // printf("sz %d charsz %d :%s:\n", sum.size(), charsize, sum.c_str()); + break; + default: + break; + } + juniper::ReleaseResult(res); + DeleteSummaryDesc(sumdesc); + DeleteSummaryConfig(_sumconf); + } +} + + +void AuxTest::test_summary(Matcher& m, const char* content, size_t content_len, + int size, int matches, int surround, size_t& charsize) +{ + SummaryDesc* sum = m.CreateSummaryDesc(size, size, matches, surround); + _test(sum != nullptr); + if (!sum) + { + // No summary generated! + return; + } + std::string res = BuildSummary(content, content_len, sum, _sumconf, charsize); + + if ((verbose || GetNumFailed() > 0) && debug_level > 0) { + printf("\nRequested size: %d, matches: %d, surround: %d, Summary size %lu :%s:\n", + size, matches, surround, static_cast<unsigned long>(res.size()), res.c_str()); + } + DeleteSummaryDesc(sum); +} + +void AuxTest::TestStartHits() +{ + juniper::QueryParser q("elvis"); + juniper::QueryHandle qh(q, "dynlength.120", juniper::_Juniper->getModifier()); + + const char* content = + "Elvis, this is a long match before matching Elvis again and then som more text at" + " the end. But this text at the end must be much longer than this to trigger the case." + " In fact it must be much longer. And then som more text at the end. But this text at " + "the end must be much longer than this to trigger the case"; + int content_len = strlen(content); + juniper::Result* res = juniper::Analyse(juniper::TestConfig, &qh, + content, content_len, + 0, 0, 0); + _test(res != nullptr); + + juniper::Summary* sum = juniper::GetTeaser(res, nullptr); + (void) sum; + // TODO: ReEnable _test(sum->Length() != 0); + juniper::ReleaseResult(res); +} + + +void AuxTest::TestEndHit() +{ + juniper::QueryParser q("match"); + juniper::QueryHandle qh(q, "dynlength.120", juniper::_Juniper->getModifier()); + + const char* content = + "In this case we need a fairly long text that does not fit entirely into the resulting" + " summary, but that has a hit towards the end of the document where the expected length" + " extends the end of the doc. This means that the prefix must be more than 256 bytes" + " long. Here is the stuff we are looking for to match in a case where we have " + "surround_len bytes closer than good towardstheend�����������������������������������"; + size_t content_len = strlen(content) - 55; + + juniper::Result* res = juniper::Analyse(juniper::TestConfig, &qh, + content, content_len, + 0, 0, 0); + _test(res != nullptr); + + juniper::Summary* sum = juniper::GetTeaser(res, nullptr); + _test(sum->Length() != 0); + juniper::ReleaseResult(res); +} + +void AuxTest::TestJuniperStack() +{ + // Stack simplification tests + QueryExpr* q = new QueryNode(1, 0, 0); + QueryExpr* q1 = new QueryNode(1, 0, 0); + QueryExpr* q2 = new QueryTerm("Hepp", 4, 0); + q->AddChild(q1); + q1->AddChild(q2); + + SimplifyStack(q); + + std::string s; + q->Dump(s); + _test(strcmp(s.c_str(),"Hepp:100") == 0); + delete q; + + if (GetNumFailed() > 0) + fprintf(stderr, "TestJuniperStack: %s\n", s.c_str()); + + q = new QueryNode(2, 0, 0); + q->_arity = 0; + SimplifyStack(q); + std::string s1; + _test(q == nullptr); + + if (GetNumFailed() > 0) + fprintf(stderr, "TestJuniperStack: %s\n", s.c_str()); +} + +class TokenProcessor : public ITokenProcessor { +private: + const std::string & _text; + std::vector<std::string> _tokens; +public: + TokenProcessor(const std::string & text) : _text(text), _tokens() {} + void handle_token(Token & t) override { + _tokens.push_back(std::string(_text.c_str() + t.bytepos, t.bytelen)); + //LOG(info, "handle_token(%s): bytepos(%d), wordpos(%d), bytelen(%d), curlen(%d)", + //_tokens.back().c_str(), + //(int)t.bytepos, (int)t.wordpos, t.bytelen, t.curlen); + } + void handle_end(Token & t) override { + _tokens.push_back(std::string(_text.c_str() + t.bytepos, t.bytelen)); + //LOG(info, "handle_end(%s): bytepos(%d), wordpos(%d), bytelen(%d), curlen(%d)", + //_tokens.back().c_str(), + //(int)t.bytepos, (int)t.wordpos, t.bytelen, t.curlen); + } + const std::vector<std::string> & getTokens() const { return _tokens; } +}; + + +bool +AuxTest::assertChar(ucs4_t act, char exp) +{ + //LOG(info, "assertChar(%d(%c), %c)", act, (char)act, exp); + return _test((char) act == exp); +} + +typedef std::unique_ptr<QueryNode> QueryNodeUP; +struct QB { + QueryNodeUP q; + QB(size_t numTerms) : q(new QueryNode(numTerms, 0, 0)) {} + QB(QB & rhs) : q(std::move(rhs.q)) { } + QB & add(const char * t, bool st = true) { + QueryTerm * qt = new QueryTerm(t, strlen(t), 0); + if (st) qt->_options |= X_SPECIALTOKEN; + q->AddChild(qt); + return *this; + } +}; +struct Ctx { + std::string text; + QB qb; + SpecialTokenRegistry str; + Fast_NormalizeWordFolder wf; + TokenProcessor tp; + JuniperTokenizer jt; + Ctx(const std::string & text_, QB & qb_); + ~Ctx(); +}; + +Ctx::Ctx(const std::string & text_, QB & qb_) : text(text_), qb(qb_), str(qb.q.get()), wf(), tp(text), jt(&wf, text.c_str(), text.size(), &tp, &str) { jt.scan(); } +Ctx::~Ctx() { } + +void +AuxTest::TestSpecialTokenRegistry() +{ + { + typedef SpecialTokenRegistry::CharStream CharStream; + ucs4_t buf[16]; + { + std::string text = " c+-"; + CharStream cs(text.c_str(), text.c_str() + text.size(), buf, buf + 16); + _test(!cs.isStartWordChar()); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), ' ')); + _test(cs.hasMoreChars()); + cs.reset(); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), ' ')); + _test(assertChar(cs.getNextChar(), 'c')); + _test(cs.hasMoreChars()); + cs.reset(); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), ' ')); + _test(assertChar(cs.getNextChar(), 'c')); + _test(assertChar(cs.getNextChar(), '+')); + _test(cs.hasMoreChars()); + cs.reset(); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), ' ')); + _test(assertChar(cs.getNextChar(), 'c')); + _test(assertChar(cs.getNextChar(), '+')); + _test(assertChar(cs.getNextChar(), '-')); + _test(!cs.hasMoreChars()); + cs.reset(); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), ' ')); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), 'c')); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), '+')); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), '-')); + _test(!cs.hasMoreChars()); + } + { // test reset with increase to next char + std::string text = " c+-"; + CharStream cs(text.c_str(), text.c_str() + text.size(), buf, buf + 16); + _test(cs.resetAndInc()); + _test(cs.isStartWordChar()); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), 'c')); + _test(assertChar(cs.getNextChar(), '+')); + _test(assertChar(cs.getNextChar(), '-')); + _test(!cs.hasMoreChars()); + cs.reset(); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), 'c')); + _test(assertChar(cs.getNextChar(), '+')); + _test(assertChar(cs.getNextChar(), '-')); + _test(!cs.hasMoreChars()); + _test(cs.resetAndInc()); + _test(!cs.isStartWordChar()); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), '+')); + _test(assertChar(cs.getNextChar(), '-')); + _test(!cs.hasMoreChars()); + _test(cs.resetAndInc()); + _test(!cs.isStartWordChar()); + _test(cs.hasMoreChars()); + _test(assertChar(cs.getNextChar(), '-')); + _test(!cs.hasMoreChars()); + _test(!cs.resetAndInc()); + _test(!cs.hasMoreChars()); + } + { // test lower case + std::string text = "C"; + CharStream cs(text.c_str(), text.c_str() + text.size(), buf, buf + 16); + _test(assertChar(cs.getNextChar(), 'c')); + } + } + { // test tokenizer with special token registry + + { // only special token registered + Ctx c("foo", QB(2).add("c++").add("foo", false)); + _test(c.str.getSpecialTokens().size() == 1); + } + { // various matches + std::string annotation = "\357\277\271dvdplusminus\357\277\272dvd+-\357\277\273"; + std::string text = "c++ !my C++ text ?.net dvd+- stuff " + annotation; + Ctx c(text, QB(3).add("c++").add(".net").add("dvd+-", false)); + _test(c.str.getSpecialTokens().size() == 2); + _test(c.tp.getTokens().size() == 9); + _test(c.tp.getTokens()[0] == "c++"); + _test(c.tp.getTokens()[1] == "my"); + _test(c.tp.getTokens()[2] == "C++"); + _test(c.tp.getTokens()[3] == "text"); + _test(c.tp.getTokens()[4] == ".net"); + _test(c.tp.getTokens()[5] == "dvd"); + _test(c.tp.getTokens()[6] == "stuff"); + _test(c.tp.getTokens()[7] == annotation); + _test(c.tp.getTokens()[8] == ""); + } + { // cannot start inside a word + Ctx c("foo ac++", QB(1).add("c++")); + _test(c.tp.getTokens().size() == 3); + _test(c.tp.getTokens()[0] == "foo"); + _test(c.tp.getTokens()[1] == "ac"); + _test(c.tp.getTokens()[2] == ""); + } + { // can end inside a word (TODO: can be fixed if it is a problem) + Ctx c("++ca foo", QB(1).add("++c")); + _test(c.tp.getTokens().size() == 4); + _test(c.tp.getTokens()[0] == "++c"); + _test(c.tp.getTokens()[1] == "a"); + _test(c.tp.getTokens()[2] == "foo"); + _test(c.tp.getTokens()[3] == ""); + } + { // many scans but only match at the end + Ctx c("a+b- a+b+c- a+b+c+", QB(1).add("a+b+c+")); + _test(c.tp.getTokens().size() == 7); + _test(c.tp.getTokens()[0] == "a"); + _test(c.tp.getTokens()[1] == "b"); + _test(c.tp.getTokens()[2] == "a"); + _test(c.tp.getTokens()[3] == "b"); + _test(c.tp.getTokens()[4] == "c"); + _test(c.tp.getTokens()[5] == "a+b+c+"); + _test(c.tp.getTokens()[6] == ""); + } + { // two special tokens (one being a substring of the other) + Ctx c("c+c+c-", QB(2).add("c+c+c+").add("+c+")); + _test(c.tp.getTokens().size() == 4); + _test(c.tp.getTokens()[0] == "c"); + _test(c.tp.getTokens()[1] == "+c+"); + _test(c.tp.getTokens()[2] == "c"); + _test(c.tp.getTokens()[3] == ""); + } + { // cjk + Ctx c("fish: \xE9\xB1\xBC!", QB(1).add("\xE9\xB1\xBC!")); + _test(c.tp.getTokens().size() == 3); + _test(c.tp.getTokens()[0] == "fish"); + _test(c.tp.getTokens()[1] == "\xE9\xB1\xBC!"); + _test(c.tp.getTokens()[2] == ""); + } + { // special token with non-word first + Ctx c("+++c ..net", QB(2).add("++c").add(".net")); + _test(c.tp.getTokens().size() == 3); + _test(c.tp.getTokens()[0] == "++c"); + _test(c.tp.getTokens()[1] == ".net"); + _test(c.tp.getTokens()[2] == ""); + } + } +} + +void +AuxTest::TestWhiteSpacePreserved() +{ + vespalib::string input = "\x1f" + "best" + "\x1f" + " " + "\x1f" + "of" + "\x1f" + " " + "\n" + "\x1f" + "metallica" + "\x1f"; + + juniper::PropertyMap myprops; + myprops.set("juniper.dynsum.escape_markup", "off") + .set("juniper.dynsum.highlight_off", "</hi>") + .set("juniper.dynsum.continuation", "<sep />") + .set("juniper.dynsum.highlight_on", "<hi>") + .set("juniper.dynsum.preserve_white_space", "on"); + Fast_NormalizeWordFolder wf; + juniper::Juniper juniper(&myprops, &wf); + juniper::Config myConfig("myconfig", juniper); + + juniper::QueryParser q("best"); + juniper::QueryHandle qh(q, nullptr, juniper.getModifier()); + juniper::Result* res = juniper::Analyse(&myConfig, &qh, input.c_str(), input.size(), 0, 0, 0); + _test(res != nullptr); + + juniper::Summary* sum = juniper::GetTeaser(res, nullptr); + vespalib::string expected = "<hi>best</hi> of \nmetallica"; + vespalib::string actual(sum->Text(), sum->Length()); + _test(actual == expected); + juniper::ReleaseResult(res); +} + +void AuxTest::Run(MethodContainer::iterator &itr) { + try { + (this->*itr->second)(); + } catch (...) { + _fail("Got unknown exception in test method " + itr->first); + } +} + +void AuxTest::Run(const char* method) { + MethodContainer::iterator pos(test_methods_.find(method)); + if (pos != test_methods_.end()) { + Run(pos); + } else { + std::cerr << "ERROR: No test method named \"" + << method << "\"" << std::endl; + _fail("No such method"); + } +} + +void AuxTest::Run() { + for (MethodContainer::iterator itr(test_methods_.begin()); + itr != test_methods_.end(); + ++itr) + Run(itr); +} + + +void AuxTest::Run(int argc, char* argv[]) +{ + for (int i = 1; i < argc; ++i) + { + if (strcmp(argv[i], "-m") == 0 && argc > i + 1) + { + Run(argv[++i]); + return; + } + } + Run(); +} diff --git a/searchsummary/src/tests/juniper/auxTest.h b/searchsummary/src/tests/juniper/auxTest.h new file mode 100644 index 00000000000..dd6d79e024a --- /dev/null +++ b/searchsummary/src/tests/juniper/auxTest.h @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +// Auxiliary tests for juniper - based on Juniper 1.x proximitytest.cpp + +#include <map> +#include <vespa/fastlib/testsuite/test.h> +#include "testenv.h" + +class AuxTest : public Test +{ +private: + AuxTest(const AuxTest&); + AuxTest& operator=(const AuxTest&); +public: + AuxTest(); + virtual ~AuxTest(); + + typedef void(AuxTest::* tst_method_ptr) (); + typedef std::map<std::string, tst_method_ptr> MethodContainer; + MethodContainer test_methods_; + void init(); + + void Run(MethodContainer::iterator &itr); + void Run(const char* method); + void Run(int argc, char* argv[]); + void Run() override; +protected: + /** + * Since we are running within Emacs, the default behavior of + * print_progress which includes backspace does not work. + * We'll use a single '.' instead. + */ + void print_progress() override { *m_osptr << '.' << std::flush; } +private: + // tests: + void TestPropertyMap(); + void TestRerase(); + void TestExample(); + void TestUTF8context(); + void TestJapanese(); + void TestStartHits(); + void TestEndHit(); + void TestJuniperStack(); + void TestUTF811(); + void TestUTF812(); + void TestDoubleWidth(); + void TestPartialUTF8(); + void TestLargeBlockChinese(); + void TestSpecialTokenRegistry(); + void TestWhiteSpacePreserved(); + + bool assertChar(ucs4_t act, char exp); + + // Utilities + char* IsoToUtf8 (const char* iso, size_t size); + char* Utf8ToIso (const char* iso, size_t size); + void test_summary(Matcher& m, const char* input, size_t input_len, + int size, int matches, int surround, size_t& charsize); + void TestUTF8(unsigned int size); + + bool _split_char; + SummaryConfig* _sumconf; +}; + diff --git a/searchsummary/src/tests/juniper/auxTestApp.cpp b/searchsummary/src/tests/juniper/auxTestApp.cpp new file mode 100644 index 00000000000..5090e2d7dfc --- /dev/null +++ b/searchsummary/src/tests/juniper/auxTestApp.cpp @@ -0,0 +1,17 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "auxTest.h" +#include <vespa/vespalib/testkit/testapp.h> + +void Usage(char* s) +{ + fprintf(stderr, "Usage: %s [-d debug_level]\n", s); +} + + +int main(int argc, char **argv) { + juniper::TestEnv te(argc, argv, TEST_PATH("./testclient.rc").c_str()); + AuxTest pta; + pta.SetStream(&std::cout); + pta.Run(argc, argv); + return pta.Report(); +} diff --git a/searchsummary/src/tests/juniper/fakerewriter.cpp b/searchsummary/src/tests/juniper/fakerewriter.cpp new file mode 100644 index 00000000000..bbaf7079525 --- /dev/null +++ b/searchsummary/src/tests/juniper/fakerewriter.cpp @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "fakerewriter.h" +#include <vespa/vespalib/util/stringfmt.h> + +namespace juniper +{ + +struct RewriteHandle +{ + RewriteHandle(std::string& in, uint32_t langid) + : _s(in), _ls(""), _cnt(0), _langid(langid) {} + + std::string& next() + { + if (_cnt > 3 || _langid > 4) + _ls = ""; + else + _ls = vespalib::make_string("%s%d", _s.c_str(), _cnt++); + return _ls; + } + std::string _s; + std::string _ls; + int _cnt; + uint32_t _langid; +}; +} // end namespace juniper + +using namespace juniper; + +const char* FakeRewriter::Name() const +{ + return _name.c_str(); +} + + +RewriteHandle* FakeRewriter::Rewrite(uint32_t langid, const char* term) +{ + std::string t(term); + if (langid > 4) return NULL; + return new RewriteHandle(t, langid); +} + +RewriteHandle* FakeRewriter::Rewrite(uint32_t langid, const char* term, size_t length) +{ + std::string t(term, length); + if (langid > 4) return NULL; + return new RewriteHandle(t, langid); +} + + +const char* FakeRewriter::NextTerm(RewriteHandle* exp, size_t& length) +{ + std::string& t = exp->next(); + if (t.size() == 0) + { + delete exp; + return NULL; + } + length = t.size(); + return t.c_str(); +} diff --git a/searchsummary/src/tests/juniper/fakerewriter.h b/searchsummary/src/tests/juniper/fakerewriter.h new file mode 100644 index 00000000000..e1e5de59feb --- /dev/null +++ b/searchsummary/src/tests/juniper/fakerewriter.h @@ -0,0 +1,18 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/juniper/rewriter.h> +#include <string> + +class FakeRewriter: public juniper::IRewriter +{ +public: + FakeRewriter() : _name() {} + const char* Name() const override; + juniper::RewriteHandle* Rewrite(uint32_t langid, const char* term) override; + juniper::RewriteHandle* Rewrite(uint32_t langid, const char* term, size_t length) override; + const char* NextTerm(juniper::RewriteHandle* exp, size_t& length) override; +private: + std::string _name; +}; + diff --git a/searchsummary/src/tests/juniper/largeblockchinese.input.utf8 b/searchsummary/src/tests/juniper/largeblockchinese.input.utf8 new file mode 100644 index 00000000000..9b85e8d06e6 --- /dev/null +++ b/searchsummary/src/tests/juniper/largeblockchinese.input.utf8 @@ -0,0 +1 @@ +我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待希望希望希望希望希望希望希望我只能期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期待期 diff --git a/searchsummary/src/tests/juniper/matchobjectTest.cpp b/searchsummary/src/tests/juniper/matchobjectTest.cpp new file mode 100644 index 00000000000..07e3cf84767 --- /dev/null +++ b/searchsummary/src/tests/juniper/matchobjectTest.cpp @@ -0,0 +1,404 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * Author: Knut Omang + */ +#include "matchobjectTest.h" +#include "fakerewriter.h" + +// Comment out cerr below to ignore unimplemented tests +#define NOTEST(name) \ +std::cerr << std::endl << __FILE__ << ':' << __LINE__ << ": " \ + << "No test for method '" << (name) << "'" << std::endl; + +/************************************************************************* + * Test methods + * + * This section contains boolean methods for testing each public method + * in the class being tested + *************************************************************************/ + +/** + * Test of the Term method. + */ +void MatchObjectTest::testTerm() { + // Test that two equal keywords are matched properly: + TestQuery q("NEAR/2(word,PHRASE(near,word))"); + + const char* content = "This is a small text with word appearing near word"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 3);// 3 occurrences + match_candidate_set& ms = m.OrderedMatchSet(); + + _test(ms.size() == 2); + + delete res; + // printf("%d %d\n", m.TotalHits(),ms.size()); + TestQuery q1("t*t"); + TestQuery q2("*ea*"); + TestQuery q3("*d"); + TestQuery q4("*word"); + Result* r1 = juniper::Analyse(juniper::TestConfig, &q1._qhandle, content, content_len, 0, 0, 0); + Result* r2 = juniper::Analyse(juniper::TestConfig, &q2._qhandle, content, content_len, 0, 0, 0); + Result* r3 = juniper::Analyse(juniper::TestConfig, &q3._qhandle, content, content_len, 0, 0, 0); + Result* r4 = juniper::Analyse(juniper::TestConfig, &q4._qhandle, content, content_len, 0, 0, 0); + if (r1 != 0) + { + r1->Scan(); + _test(r1->_matcher->TotalHits() == 1); + delete r1; + } + else + _test(r1 != 0); + + if (r2 != 0) + { + r2->Scan(); + _test(r2->_matcher->TotalHits() == 2); + delete r2; + } + else + _test(r2 != 0); + + if (r3 != 0) + { + r3->Scan(); + _test(r3->_matcher->TotalHits() == 2); + delete r3; + } + else + _test(r3 != 0); + + if (r4 != 0) + { + r4->Scan(); + _test_equal(r4->_matcher->TotalHits(), 2); + delete r4; + } + else + _test(r4 != 0); +} + +/** + * Test of the Match method. + */ +void MatchObjectTest::testMatch() { + // Check that we hit on the longest match first + juniper::QueryParser p("AND(junipe,juniper)"); + juniper::QueryHandle qh(p, NULL, juniper::_Juniper->getModifier()); + + MatchObject* mo = qh.MatchObj(0); + juniper::Result res(juniper::TestConfig, &qh, "", 0, 0); + unsigned opts = 0; + match_iterator mi(mo, &res); + ucs4_t ucs4_str[10]; + Fast_UnicodeUtil::ucs4copy(ucs4_str, "junipers"); + Token token; + token.token = ucs4_str; + token.curlen = 8; + int idx = mo->Match(mi, token, opts); + _test(strcmp(mo->Term(idx)->term(),"juniper") == 0); + + { + // This test would loop in v.2.2.2 + TestQuery q("(word,"); + _test(q._qparser.ParseError()); + } + + { + // Test to trigger ticket #5734 Dev Data Search + std::string + doc("A simple document with an extremelylongwordhit in the middle of it that is" + "long enough to allow the error to be triggered extremelylongwordhit." + "A simple document with an extremelylongwordhit in the middle of it that is" + "long enough to allow the error to be triggered extremelylongwordhit." + "A simple document with an extremelylongwordhit in the middle of it that is" + "long enough to allow the error to be triggered extremelylongwordhit." + "A simple document with an extremelylongwordhit in the middle of it that is" + "long enough to allow the error to be triggered extremelylongwordhit." + "A simple document with an extremelylongwordhit in the middle of it that is" + "long enough to allow the error to be triggered extremelylongwordhit." + "A simple document with an extremelylongwordhit in the middle of it that is" + "long enough to allow the error to be triggered extremelylongwordhit." + "A simple document with an extremelylongwordhit in the middle of it that is" + "long enough to allow the error to be triggered extremelylongwordhit."); + TestQuery q("OR(OR(extremelylongwordhits,extremelylongwordhit,extremelylongwordhits," + "extremelylongwordhit,extremelylongwordhits,extremelylongwordhit," + "extremelylongwordhit,extremelylongwordhits,extremelylongwordhit," + "extremelylongwordhit,extremelylongwordhits,extremelylongwordhit," + "extremelylongwordhit,extremelylongwordhits,extremelylongwordhit," + "extremelylongwordhit,extremelylongwordhits,extremelylongwordhit," + "extremelylongwordhit))"); + QueryHandle& qh1(q._qhandle); + juniper::Result res1(juniper::TestConfig, &qh1, + doc.c_str(), doc.size(), 0); + juniper::Summary* sum = res1.GetTeaser(NULL); + std::string s(sum->Text()); + _test_equal(s, + "A simple document with an <b>extremelylongwordhit</b> in the middle" + " of it that islong enough to allow...triggered " + "<b>extremelylongwordhit</b>.A simple document with an " + "<b>extremelylongwordhit</b> in the middle of it that islong enough to allow..."); + } +} + +/** + * Test matching in annotated buffers + */ +void MatchObjectTest::testMatchAnnotated() { + const char *doc = "A big and ugly teaser about " + "\xEF\xBF\xB9" + "buying" + "\xEF\xBF\xBA" + "buy" + "\xEF\xBF\xBB" + " stuff"; + TestQuery q("AND(big,buy)"); + QueryHandle &qh1(q._qhandle); + juniper::Result res1(juniper::TestConfig, &qh1, + doc, strlen(doc), 0); + juniper::Summary *sum = res1.GetTeaser(NULL); + std::string s(sum->Text()); + + _test_equal(s, + "A <b>big</b> and ugly teaser about <b>" + "\xEF\xBF\xB9" + "buying" + "\xEF\xBF\xBA" + "buy" + "\xEF\xBF\xBB" + "</b> stuff"); +} + + +/** + * Test of the the expansion based (langid) constructor + */ +void MatchObjectTest::testLangid() +{ + FakeRewriter frew; + juniper::_Juniper->AddRewriter("exp", &frew, true, false); + juniper::_Juniper->AddRewriter("red", &frew, false, true); + juniper::_Juniper->AddRewriter("expred", &frew, true, true); + + TestQuery q("AND(exp:a,red:b1,expred:c)"); + QueryHandle& qh(q._qhandle); + + { + { + std::string stk; + qh.MatchObj(0)->Query()->Dump(stk); + _test_equal(stk, + "Node<a:3>[Node<a:4>[a0:100,a1:100,a2:100,a3:100]," + "b1:100,Node<a:4>[c0:100,c1:100,c2:100,c3:100]]"); + } + + std::string doc("see if we can match b or c somewhere in this a3 doc. " + "Note that we should not match b1 or c1 or a somewhere.."); + juniper::Result res(juniper::TestConfig, &qh, doc.c_str(), doc.size(),0); + + juniper::Summary* sum = res.GetTeaser(NULL); + std::string s(sum->Text()); + _test_equal(s, + "see if we can match <b>b</b> or <b>c</b> somewhere in this" + " <b>a3</b> doc. Note that we should not match b1 or c1 or a somewhere.."); + } + + { + // Do another test with the same query handle (testing reuse of qh with rewriters) + std::string doc("Try to run this on another doc just to see if b or c still can be" + " matched with the same query handle"); + juniper::Result res(juniper::TestConfig, &qh, + doc.c_str(), doc.size(), 0); + + juniper::Summary* sum = res.GetTeaser(NULL); + std::string s(sum->Text()); + _test_equal(s, + "Try to run this on another doc just to see if <b>b</b> or <b>c</b>" + " still can be matched with the same query handle"); + } + juniper::_Juniper->FlushRewriters(); +} + + +/** + * Test of the the expansion based (langid) constructor in + * combination with a normal search + */ +void MatchObjectTest::testCombined() +{ + FakeRewriter frew; + juniper::_Juniper->AddRewriter("exp", &frew, true, false); + juniper::_Juniper->AddRewriter("red", &frew, false, true); + + TestQuery q("OR(OR(AND(exp:a,b)))"); + QueryHandle& qh(q._qhandle); + + { + std::string doc("see if we can match a3 or c somewhere in this b doc. " + "Note that we should not match b1 or c1 or a somewhere.."); + juniper::Result res(juniper::TestConfig, &qh, doc.c_str(), doc.size(), 0); + + juniper::Summary* sum = res.GetTeaser(NULL); + std::string s(sum->Text()); + _test_equal(s, + "see if we can match <b>a3</b> or c somewhere in this <b>b</b> doc." + " Note that we should not match b1 or c1 or a somewhere.."); + } + juniper::_Juniper->FlushRewriters(); +} + +/** Test parameter input via options + */ + +void MatchObjectTest::testParams() +{ + { + TestQuery q("AND(a,b)", "near.1"); + QueryHandle& qh = q._qhandle; + std::string stk; + qh.MatchObj(0)->Query()->Dump(stk); + // Expect l:1 == limit:1 v: Validity check of keywords needed, c: Completeness req'ed + _test_equal(stk, "Node<a:2,l:1,v,c>[a:100,b:100]"); + } + + { + TestQuery q("AND(a,b)", "onear.1"); + QueryHandle& qh = q._qhandle; + std::string stk; + qh.MatchObj(0)->Query()->Dump(stk); + // Expect l:1 == limit:1 o: ordered, v: Validity check of keywords needed, + // c: Completeness req'ed + _test_equal(stk, "Node<a:2,o,l:1,v,c>[a:100,b:100]"); + } + + { + TestQuery q("AND(a,b)", "within.1"); + QueryHandle& qh = q._qhandle; + std::string stk; + qh.MatchObj(0)->Query()->Dump(stk); + // Expect l:1 == limit:1 o: ordered, v: Validity check of keywords needed, + // c: Completeness req'ed + _test_equal(stk, "Node<a:2,o,l:1,v,c>[a:100,b:100]"); + } + + { + // Check that query option replaces orig.query + TestQuery q("OR(a,b)", "query.ONEAR/1(a,b)"); + QueryHandle& qh = q._qhandle; + std::string stk; + qh.MatchObj(0)->Query()->Dump(stk); + // Expect l:1 == limit:1 o: ordered, v: Validity check of keywords needed, + // c: Completeness req'ed + _test_equal(stk, "Node<a:2,o,l:1,v,c>[a:100,b:100]"); + } + + { + // Check that query option replaces orig.query, and check that ANY works.. + TestQuery q("OR(a,b,c)", "query.ANY(a,b)"); + QueryHandle& qh = q._qhandle; + std::string stk; + qh.MatchObj(0)->Query()->Dump(stk); + // Expect l:1 == limit:1 o: ordered, v: Validity check of keywords needed, + // c: Completeness req'ed + _test_equal(stk, "Node<a:2>[a:100,b:100]"); + } +} + + +/************************************************************************* + * Test administration methods + *************************************************************************/ + +/** + * Set up common stuff for all test methods. + * This method is called immediately before each test method is called + */ +bool MatchObjectTest::setUp() { + return true; +} + +/** + test_methods_["testCombined"] = + &MatchObjectTest::testCombined; + * Tear down common stuff for all test methods. + * This method is called immediately after each test method is called + */ +void MatchObjectTest::tearDown() { +} + +/** + * Build up a map with all test methods + */ +void MatchObjectTest::init() { + test_methods_["testTerm"] = + &MatchObjectTest::testTerm; + test_methods_["testMatch"] = + &MatchObjectTest::testMatch; + test_methods_["testMatchAnnotated"] = + &MatchObjectTest::testMatchAnnotated; + test_methods_["testLangid"] = + &MatchObjectTest::testLangid; + test_methods_["testCombined"] = + &MatchObjectTest::testCombined; + test_methods_["testParams"] = + &MatchObjectTest::testParams; +} + +/************************************************************************* + * main entry points + *************************************************************************/ + + +void MatchObjectTest::Run(MethodContainer::iterator &itr) { + try { + if (setUp()) { + (this->*itr->second)(); + tearDown(); + } + } catch (...) { + _fail("Got unknown exception in test method " + itr->first); + } +} + +void MatchObjectTest::Run(const char* method) { + MethodContainer::iterator pos(test_methods_.find(method)); + if (pos != test_methods_.end()) { + Run(pos); + } else { + std::cerr << "ERROR: No test method named \"" + << method << "\"" << std::endl; + _fail("No such method"); + } +} + +void MatchObjectTest::Run() { + for (MethodContainer::iterator itr(test_methods_.begin()); + itr != test_methods_.end(); + ++itr) + Run(itr); +} + +/* + * Parse runtime arguments before running. + * If the -m METHOD parameter is given, run only that method + */ +void MatchObjectTest::Run(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-m") == 0 && argc > i + 1) { + Run(argv[++i]); + return; + } + } + Run(); +} diff --git a/searchsummary/src/tests/juniper/matchobjectTest.h b/searchsummary/src/tests/juniper/matchobjectTest.h new file mode 100644 index 00000000000..5bfd29a371f --- /dev/null +++ b/searchsummary/src/tests/juniper/matchobjectTest.h @@ -0,0 +1,106 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * Author: Knut Omang + */ +#pragma once + +#include "testenv.h" +#include <vespa/fastlib/testsuite/test.h> +#include <map> + +/** + * The MatchObjectTest class holds + * the unit tests for the MatchObject class. + * + * @sa MatchObject + * @author Knut Omang + */ +class MatchObjectTest : public Test { + + /************************************************************************* + * Test methods + * + * This section contains boolean methods for testing each public method + * in the class ing tested + *************************************************************************/ + + /** + * Test of the Term method. + */ + void testTerm(); + + + /** + * Test of performance + */ + void testPerformance(); + + /** + * Test of the Match method. + */ + void testMatch(); + + /** + * Test of the Match method on annotated buffers. + */ + void testMatchAnnotated(); + + /** + * Test of the the expansion based (langid) constructor + */ + void testLangid(); + void testCombined(); + + + /** Test parameter input via query handle options + */ + void testParams(); + + + /************************************************************************* + * Test administration methods + *************************************************************************/ + + /** + * Set up common stuff for all test methods. + * This method is called immediately before each test method is called + */ + bool setUp(); + + /** + * Tear down common stuff for all test methods. + * This method is called immediately after each test method is called + */ + void tearDown(); + + typedef void(MatchObjectTest::* tst_method_ptr) (); + typedef std::map<std::string, tst_method_ptr> MethodContainer; + MethodContainer test_methods_; + void init(); + +protected: + + /** + * Since we are running within Emacs, the default behavior of + * print_progress which includes backspace does not work. + * We'll use a single '.' instead. + */ + void print_progress() override { *m_osptr << '.' << std::flush; } +public: + + MatchObjectTest() : Test("MatchObject"), test_methods_() { init(); } + ~MatchObjectTest() {} + + /************************************************************************* + * main entry points + *************************************************************************/ + void Run(MethodContainer::iterator &itr); + void Run() override; + void Run(const char *method); + void Run(int argc, char* argv[]); +}; + + +// Local Variables: +// mode:c++ +// End: diff --git a/searchsummary/src/tests/juniper/matchobjectTestApp.cpp b/searchsummary/src/tests/juniper/matchobjectTestApp.cpp new file mode 100644 index 00000000000..8bdebfe7207 --- /dev/null +++ b/searchsummary/src/tests/juniper/matchobjectTestApp.cpp @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "matchobjectTest.h" +#include "testenv.h" +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/juniper/wildcard_match.h> +#include <iostream> + +namespace { +void test(const char * word, const char * pattern, bool expect) { + EXPECT_EQUAL(expect, fast::util::wildcard_match(word, pattern)); +} +} + +void +test_wildcard() +{ + test("a", "b", false); + test("b", "b", true); + test("abc", "def", false); + test("def", "def", true); + test("def", "d?f", true); + test("def", "d?d", false); + test("def", "??d", false); + test("def", "d??", true); + test("abcdef", "a*e", false); + test("abcdef", "a*f", true); + test("abcdef", "a?c*f", true); + test("abcdef", "a?b*f", false); + test("abcdef", "a*b*f", true); + test("abcdef", "abc*", true); + test("abcdef", "*def", true); +} + +int main(int argc, char **argv) { + test_wildcard(); + juniper::TestEnv te(argc, argv, TEST_PATH("./testclient.rc").c_str()); + MatchObjectTest test; + test.SetStream(&std::cout); + test.Run(argc, argv); + return (int)test.Report(); +} diff --git a/searchsummary/src/tests/juniper/mcandTest.cpp b/searchsummary/src/tests/juniper/mcandTest.cpp new file mode 100644 index 00000000000..5a465275a80 --- /dev/null +++ b/searchsummary/src/tests/juniper/mcandTest.cpp @@ -0,0 +1,647 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * Author: Knut Omang + */ + +#include "mcandTest.h" + +#include <vespa/log/log.h> +LOG_SETUP(".mcandtest"); + +// Comment out cerr below to ignore unimplemented tests +#define NOTEST(name) \ + std::cerr << std::endl << __FILE__ << ':' << __LINE__ << ": " \ + << "No test for method '" << (name) << "'" << std::endl; + + +MatchCandidateTest::MatchCandidateTest() : + Test("MatchCandidate"), test_methods_() +{ init(); } + +/************************************************************************* + * Test methods + * + * This section contains boolean methods for testing each public method + * in the class being tested + *************************************************************************/ + +/** + * Test of the SetDocid method. + */ +void MatchCandidateTest::testSetDocid() { +// NOTEST("SetDocid"); +} + + +/** + * Test that the empty query is handled properly even for Analyse and + * GetTeaser/GetRelevancy/GetLog calls.. (Fastserver < 4.21 semantics) + */ +void MatchCandidateTest::testLog() { + TestQuery q(""); + std::string content("Here we go hepp and then some words away hoi some silly text here"); + + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res); // We get a result handle + _test(!res->_mo); // but it is empty + + juniper::Summary* sum = juniper::GetTeaser(res); + std::string s(sum->Text()); + _test_equal(s, std::string("")); + + long relevance = juniper::GetRelevancy(res); + _test_equal(relevance, PROXIMITYBOOST_NOCONSTRAINT_OFFSET); + + sum = juniper::GetLog(res); + s = sum->Text(); + _test_equal(s, std::string("")); + juniper::ReleaseResult(res); +} + + +/** + * Test of proximity metric = 0 + */ +void MatchCandidateTest::testDump() { + std::string content("Here we go hepp and then some words away hoi"); + + { + TestQuery q("NEAR/1(hepp,hoi)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // zero value since there are no hits and constraints are enabled.. + _test_equal(relevance, 0); + juniper::ReleaseResult(res); + } + + { + TestQuery q("OR(NEAR/1(hepp,hoi),bananas)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // Check that X_CONSTR propagates as intended + _test_equal(relevance, 0); + juniper::ReleaseResult(res); + } + + { + TestQuery q("PHRASE(hepp,hoi)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // constant value since there are no hits but this is + // also not a constrained search.. + _test_equal(relevance, PROXIMITYBOOST_NOCONSTRAINT_OFFSET); + juniper::ReleaseResult(res); + } + + { + TestQuery q("AND(hepp,hoi)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // Relevance may change, but nice to discover such changes.. + // The important is that we get a nonzero value here as a hit + _test_equal(relevance, 4470); + juniper::ReleaseResult(res); + } +} + + +/** + * Test of the order method. + */ +void MatchCandidateTest::testorder() { + TestQuery q("PHRASE(test,phrase)"); + + const char* content = "This is a simple text where a phrase match can be found not" + " quite adjacent to a test phrase work"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. Scan calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 3); // 3 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test(ms.size() == 1); + juniper::ReleaseResult(res); +} + + +/** + * Test of the matches_limit method. + */ +void MatchCandidateTest::testMatches_limit() { + TestQuery q("OR(PHRASE(phrase,match),PHRASE(test,word))"); + + const char* content = "This is a simple text where a phrase match can be found not" + " quite adjacent to a test word"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 4);// 3 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test(ms.size() == 2); // The first (complete) match and the second starting at "test" + + // Check if we get the correct teaser as well.. + juniper::Summary* sum = juniper::GetTeaser(res); + _test(strcmp(sum->Text(), + "This is a simple text where a <b>phrase</b> <b>match</b> can be found not" + " quite adjacent to a <b>test</b> <b>word</b>") == 0); + juniper::ReleaseResult(res); +} + + +/** + * Test of the accept method. + */ +void MatchCandidateTest::testAccept() { + TestQuery q("AND(simple,test)"); + + const char* content = "This is a simple test where we should get a perfect match"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 2); // 2 overlapping candidate starting points + _test(m.QueryTerms() == 2); // 2 query terms + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test(ms.size() > 0); + + if (!ms.size()) { + juniper::ReleaseResult(res); + return; // No point in continuing.. + } + + MatchCandidate& mc = *(*(ms.begin())); + + _test(mc.elems() == 2); + _test(mc.startpos() == 10); + _test(mc.endpos() == 21); + _test(!mc.order()); // Unordered for AND op + _test(mc.ctxt_startpos() == 0); + + mc.make_keylist(); + _test(mc._klist.size() == 2); // Two occurrence elements in list + + // Just for the sake of it, verify that we get a proper teaser out of this also.. + juniper::Summary* sum = juniper::GetTeaser(res); + _test(strcmp(sum->Text(), + "This is a <b>simple</b> <b>test</b> where we should get a perfect match") == 0); + juniper::ReleaseResult(res); +} + + +/** + * Test of the rank method. + */ +void MatchCandidateTest::testRank() { +// NOTEST("rank"); +} + + +/** + * Test of simple nested query + */ +void MatchCandidateTest::testMake_keylist() { + TestQuery q("OR(AND(phrase,match),AND(test,phrase))"); + + const char* content = "This is a simple text where a phrase match can be found not" + " quite adjacent to a test phrase"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 4);// 3 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test_equal(static_cast<size_t>(ms.size()), 6u); + + juniper::ReleaseResult(res); +} + + +/** + * Test of the add_to_keylist method. + */ +void MatchCandidateTest::testAdd_to_keylist() { + // Nested NEAR-test (triggered if nested NEAR with PHRASE) Ticket Dev Data Search 6109 + TestQuery q("NEAR/4(PHRASE(phr1,phr2),PHRASE(phr3,phr4))"); + + const char* content = "connect truende. phr1 phr2 www www www phr3 phr4 acuicola 8844"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + +// Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 4);// 4 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test_equal(static_cast<size_t>(ms.size()), 1u); // Single result + + // Bug triggered when result is fetched.. + juniper::Summary* sum = juniper::GetTeaser(res); + std::string s(sum->Text()); + _test_equal(s, + "connect truende. <b>phr1</b> <b>phr2</b> www www www <b>phr3</b>" + " <b>phr4</b> acuicola 8844"); + + juniper::ReleaseResult(res); +} + + +/** + * Test of the length method. + */ +void MatchCandidateTest::testLength() { + const char* content = "this simple text with adjacent words of a certain pattern must" + " be matched according to specific rules to be detailed in this test."; + size_t content_len = strlen(content); + + { + // Nested complex NEAR-test with double matches at same pos + TestQuery q("NEAR/4(pattern,NEAR/1(simple,with),NEAR/2(simple,adjacent))"); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, &q._qhandle, + content, content_len, + 0, 0, 0); + + juniper::Summary* sum = juniper::GetTeaser(res); + Matcher& m = *res->_matcher; + match_candidate_set& ms = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(ms.size()), 1u); + + std::string s(sum->Text()); + _test_equal(s, + "this <b>simple</b> text <b>with</b> <b>adjacent</b> words of " + "a certain <b>pattern</b> must be matched according to specific" + " rules to be detailed in this test."); + juniper::ReleaseResult(res); + } + + { + // Nested complex NEAR-test with double matches at same pos should not yield hit with ONEAR + TestQuery q("ONEAR/4(pattern,NEAR/1(simple,with),NEAR/2(simple,adjacent))"); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle + ,content, content_len, + 0, 0, 0); + + res->Scan(); + Matcher& m = *res->_matcher; + match_candidate_set& ms = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(ms.size()), 0u); + + juniper::ReleaseResult(res); + } + + { + // Likewise nested complex NEAR-test with double matches at same pos but just outside limit + // should not match: + TestQuery q("NEAR/4(pattern,NEAR/1(simple,with),NEAR/1(simple,adjacent))"); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, &q._qhandle, + content, content_len, + 0, 0, 0); + + res->Scan(); + Matcher& m = *res->_matcher; + match_candidate_set& ms = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(ms.size()), 0u); + + juniper::ReleaseResult(res); + } +} + + +struct MyTokenProcessor : public ITokenProcessor +{ + Matcher &_m; + std::vector<size_t> _cands; + MyTokenProcessor(Matcher &m) : _m(m), _cands() {} + ~MyTokenProcessor() override; + void handle_token(Token &token) override { + _m.handle_token(token); + const match_sequence *ms = _m.GetWorkSet(); + _cands.push_back(ms[0].size()); + LOG(info, "match_sequence[0].size(%zu)", _cands.back()); + } + void handle_end(Token &token) override { + _m.handle_end(token); + } +}; + +MyTokenProcessor::~MyTokenProcessor() = default; + +/** + * Test that max number of match candidates can be controlled. + */ +void MatchCandidateTest::requireThatMaxNumberOfMatchCandidatesCanBeControlled() +{ + TestQuery q("PHRASE(re,re,re,re,foo,re,re,re,re,bar)"); + q._qhandle._max_match_candidates = 4; + + const char *content = "re re re re foo re re re re bar re re re re foo re re re re bar"; + size_t content_len = strlen(content); + + Result *res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Deflect tokens to my processor + Matcher &m = *res->_matcher; + MyTokenProcessor proc(m); + res->_tokenizer->SetSuccessor(&proc); + res->Scan(); + + _test_equal(proc._cands.size(), 20u); + for (size_t i = 0; i < proc._cands.size(); ++i) { + _test(proc._cands[i] <= 4u); + } + _test_equal(m.TotalHits(), 20); + match_candidate_set& mcs = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(mcs.size()), 2u); + + juniper::ReleaseResult(res); +} + + +/** + * Test of the order method. + */ +void MatchCandidateTest::testOrder() { +// NOTEST("order"); +} + + +/** + * Test of the size method. + */ +void MatchCandidateTest::testSize() { +// NOTEST("size"); +} + + +/** + * Test of the endpos method. + */ +void MatchCandidateTest::testEndpos() { +// NOTEST("endpos"); +} + + +/** + * Test of the ctxt_startpos method. + */ +void MatchCandidateTest::testCtxt_startpos() { +// NOTEST("ctxt_startpos"); +} + + +/** + * Test of the starttoken method. + */ +void MatchCandidateTest::testStarttoken() { +// NOTEST("starttoken"); +} + + +/** + * Test of the word_distance method. + */ +void MatchCandidateTest::testWord_distance() { +// NOTEST("word_distance"); +} + + +/** + * Test of the distance method. + */ +void MatchCandidateTest::testDistance() { +// NOTEST("distance"); +} + + +/** + * Test of the elem_store_sz method. + */ +void MatchCandidateTest::testElem_store_sz() { +// NOTEST("elem_store_sz"); +} + + +/** + * Test of the elems method. + */ +void MatchCandidateTest::testElems() { +// NOTEST("elems"); +} + + +/** + * Test of the distance method. + */ +void MatchCandidateTest::testDistance1() { +// NOTEST("distance"); +} + + +/** + * Test of the set_valid method. + */ +void MatchCandidateTest::testSet_valid() { +// NOTEST("set_valid"); +} + + +/************************************************************************* + * Test administration methods + *************************************************************************/ + +/** + * Set up common stuff for all test methods. + * This method is called immediately before each test method is called + */ +bool MatchCandidateTest::setUp() { + return true; +} + +/** + * Tear down common stuff for all test methods. + * This method is called immediately after each test method is called + */ +void MatchCandidateTest::tearDown() { +} + +/** + * Build up a map with all test methods + */ +void MatchCandidateTest::init() { + test_methods_["testSetDocid"] = + &MatchCandidateTest::testSetDocid; + test_methods_["testLog"] = + &MatchCandidateTest::testLog; + test_methods_["testDump"] = + &MatchCandidateTest::testDump; + test_methods_["testorder"] = + &MatchCandidateTest::testorder; + test_methods_["testMatches_limit"] = + &MatchCandidateTest::testMatches_limit; + test_methods_["testAccept"] = + &MatchCandidateTest::testAccept; + test_methods_["testRank"] = + &MatchCandidateTest::testRank; + test_methods_["testMake_keylist"] = + &MatchCandidateTest::testMake_keylist; + test_methods_["testAdd_to_keylist"] = + &MatchCandidateTest::testAdd_to_keylist; + test_methods_["testLength"] = + &MatchCandidateTest::testLength; + test_methods_["requireThatMaxNumberOfMatchCandidatesCanBeControlled"] = + &MatchCandidateTest::requireThatMaxNumberOfMatchCandidatesCanBeControlled; + test_methods_["testOrder"] = + &MatchCandidateTest::testOrder; + test_methods_["testSize"] = + &MatchCandidateTest::testSize; + test_methods_["testEndpos"] = + &MatchCandidateTest::testEndpos; + test_methods_["testCtxt_startpos"] = + &MatchCandidateTest::testCtxt_startpos; + test_methods_["testStarttoken"] = + &MatchCandidateTest::testStarttoken; + test_methods_["testWord_distance"] = + &MatchCandidateTest::testWord_distance; + test_methods_["testDistance"] = + &MatchCandidateTest::testDistance; + test_methods_["testElem_store_sz"] = + &MatchCandidateTest::testElem_store_sz; + test_methods_["testElems"] = + &MatchCandidateTest::testElems; + test_methods_["testDistance1"] = + &MatchCandidateTest::testDistance1; + test_methods_["testSet_valid"] = + &MatchCandidateTest::testSet_valid; +} + +/************************************************************************* + * main entry points + *************************************************************************/ + + +void MatchCandidateTest::Run(MethodContainer::iterator &itr) { + try { + if (setUp()) { + (this->*itr->second)(); + tearDown(); + } + } catch (...) { + _fail("Got unknown exception in test method " + itr->first); + } +} + +void MatchCandidateTest::Run(const char* method) { + MethodContainer::iterator pos(test_methods_.find(method)); + if (pos != test_methods_.end()) { + Run(pos); + } else { + std::cerr << "ERROR: No test method named \"" + << method << "\"" << std::endl; + _fail("No such method"); + } +} + +void MatchCandidateTest::Run() { + for (MethodContainer::iterator itr(test_methods_.begin()); + itr != test_methods_.end(); + ++itr) + Run(itr); +} + +/* + * Parse runtime arguments before running. + * If the -m METHOD parameter is given, run only that method + */ +void MatchCandidateTest::Run(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-m") == 0 && argc > i + 1) + { + Run(argv[++i]); + return; + } + } + Run(); +} diff --git a/searchsummary/src/tests/juniper/mcandTest.h b/searchsummary/src/tests/juniper/mcandTest.h new file mode 100644 index 00000000000..cdb01e91e3b --- /dev/null +++ b/searchsummary/src/tests/juniper/mcandTest.h @@ -0,0 +1,204 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * Author Knut Omang + */ +#pragma once + +#include <map> +#include <vespa/fastlib/testsuite/test.h> +#include "testenv.h" +#include <vespa/juniper/mcand.h> + +/** + * The MatchCandidateTest class holds + * the unit tests for the MatchCandidate class. + * + * @sa MatchCandidate + * @author Knut Omang + */ +class MatchCandidateTest : public Test { + + /************************************************************************* + * Test methods + * + * This section contains boolean methods for testing each public method + * in the class ing tested + *************************************************************************/ + + /** + * Test of the SetDocid method. + */ + void testSetDocid(); + + + /** + * Test of the log method. + */ + void testLog(); + + + /** + * Test of the dump method. + */ + void testDump(); + + + /** + * Test of the (order method. + */ + void testorder(); + + + /** + * Test of the matches_limit method. + */ + void testMatches_limit(); + + + /** + * Test of the accept method. + */ + void testAccept(); + + + /** + * Test of the rank method. + */ + void testRank(); + + + /** + * Test of the make_keylist method. + */ + void testMake_keylist(); + + + /** + * Test of the add_to_keylist method. + */ + void testAdd_to_keylist(); + + + /** + * Test of the length method. + */ + void testLength(); + + /** + * Test that the max number of match candidates can be controlled. + */ + void requireThatMaxNumberOfMatchCandidatesCanBeControlled(); + + /** + * Test of the order method. + */ + void testOrder(); + + + /** + * Test of the size method. + */ + void testSize(); + + + /** + * Test of the endpos method. + */ + void testEndpos(); + + + /** + * Test of the ctxt_startpos method. + */ + void testCtxt_startpos(); + + + /** + * Test of the starttoken method. + */ + void testStarttoken(); + + + /** + * Test of the word_distance method. + */ + void testWord_distance(); + + + /** + * Test of the distance method. + */ + void testDistance(); + + + /** + * Test of the elem_store_sz method. + */ + void testElem_store_sz(); + + + /** + * Test of the elems method. + */ + void testElems(); + + + /** + * Test of the distance method. + */ + void testDistance1(); + + + /** + * Test of the set_valid method. + */ + void testSet_valid(); + + + /************************************************************************* + * Test administration methods + *************************************************************************/ + + /** + * Set up common stuff for all test methods. + * This method is called immediately before each test method is called + */ + bool setUp(); + + /** + * Tear down common stuff for all test methods. + * This method is called immediately after each test method is called + */ + void tearDown(); + + typedef void(MatchCandidateTest::* tst_method_ptr) (); + typedef std::map<std::string, tst_method_ptr> MethodContainer; + MethodContainer test_methods_; + void init(); +protected: + + /** + * Since we are running within Emacs, the default behavior of + * print_progress which includes backspace does not work. + * We'll use a single '.' instead. + */ + void print_progress() override { *m_osptr << '.' << std::flush; } + +public: + + MatchCandidateTest(); + ~MatchCandidateTest() {} + + /************************************************************************* + * main entry points + *************************************************************************/ + void Run(MethodContainer::iterator &itr); + void Run() override; + void Run(const char *method); + void Run(int argc, char* argv[]); +}; + + +// Local Variables: +// mode:c++ +// End: diff --git a/searchsummary/src/tests/juniper/mcandTestApp.cpp b/searchsummary/src/tests/juniper/mcandTestApp.cpp new file mode 100644 index 00000000000..7b1a15934d3 --- /dev/null +++ b/searchsummary/src/tests/juniper/mcandTestApp.cpp @@ -0,0 +1,12 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "mcandTest.h" +#include <vespa/vespalib/testkit/testapp.h> + +int main(int argc, char **argv) { + juniper::TestEnv te(argc, argv, TEST_PATH("./testclient.rc").c_str()); + MatchCandidateTest test; + test.SetStream(&std::cout); + test.Run(argc, argv); + return (int)test.Report(); +} diff --git a/searchsummary/src/tests/juniper/partialutf8.input.utf8 b/searchsummary/src/tests/juniper/partialutf8.input.utf8 new file mode 100644 index 00000000000..df25fbb8c60 --- /dev/null +++ b/searchsummary/src/tests/juniper/partialutf8.input.utf8 @@ -0,0 +1 @@ +補充拉∼看樣子你知道有哪些配件阿 襪子那些 總共有6個顏色還有保護套http://www.fnac.com.tw/searchresults.aspx?kind=1&txtsearch=ipod&productid=23550703這是法雅客的網站 可以參考===================================================這是apple網站上完整的配件淡 很多台灣買不到喔belkin 錄音裝置用 belkin voice recorder 錄音裝置,將您的語音筆記錄進 ipod;您可以用它錄製備忘錄、會議記錄、訪談過程等等,並將內容儲存在 ipod 上,還能加註時間和日期,以便日後查找。(請注意:本產品與 ipod mini 並不相容) ipod 線控裝置與耳機有了線控裝置、再加上一組額外的耳機,在路上聽 ipod 就更方便了。本產品可以搭配具有 dock 連接埠的 ipod、以及 ipod mini 使用。 belkin 讀卡機下次渡假的時候,您可以不必再幫數位相機帶一大堆記憶卡了。當您的記憶卡拍滿照片的時候,只要使用方便的 belkin media reader 讀卡機,就可以將數位照片直接存進 ipod。(請注意:本產品與 ipod mini 並不相容) belkin 電池組必須離開電腦好幾天嗎?您可以選購 belkin 公司出品的備用電池組,讓您可以使用 4 個標準的 3 號電池讓 ipod 連續播送音樂 20 小時。 ipod 攜帶套這個特製的攜帶套可以保護您的 ipod 不受外物碰撞,而且隨身攜帶更方便。您可以將它夾在腰帶上、手提袋上、甚至背包上。多國電源轉接套件有了這個配件,您就可以安心把 ipod 帶到全世界任何地方。它包含了 6 種不同形狀的電源插頭,適用於全世界各地的電源插座。 最適合在家使用的 ipod 配件 ipod dock 或 ipod mini dock為您家或辦公室再多買一個 ipod dock 轉接座。這個轉接座上有立體聲線路輸出,能連接您的立體聲揚聲器,讓 ipod 變成一組節省空間的立體音響。 dock 連接埠至 usb 2.0 + firewire 訊號線(pc 適用)windows 專用的這款 dock 至 usb 2.0 + firewire 訊號線讓 ipod 能透過 usb 2.0* 進行資料同步,並且以 firewire 介面充電。(請注意:本產品與 ipod mini 並不相容) jbl creature ii 揚聲器將您的 ipod 變成一部家用音響。透過簡單的即插即用設定、以及便利的觸控開關,creature 能將您的多媒體音效帶往另一個空間。新版 ipod 專用 navipod 紅外線遙控器這款遙控器有 5 個按鈕,另外有一個接收器直接插在 ipod 頂端;透過遙控器,您可以在房間的另外一端操作 ipod。ipod 立體音響連接套件想讓您的音樂資料庫和播放列表成為下一次派對的焦點嗎?蘋果的 apple stereo connection kit 立體音響連接套件,再加上 monster cable 訊號線,就是最好的答案。(本產品必須搭配具有 dock 連接埠的 ipod 機種使用)monster isplitter這款由 monster cable 出品的 ipod 迷你立體聲 y 形分岔接頭,能將 2 組迷你立體聲耳機或揚聲器連接在一個接口上,讓您可以和朋友分享音樂。 給駕車奔馳一族的 ipod 玩家 griffin itrip fm 播放機現在,您的 ipod 可以透過您車上的 fm 收音機播放音樂了。 itrip fm transmitter 播放機可以搭配具有 dock 連接埠的新款 ipod 使用,而且只會耗費極為少量的 ipod 電力,也不需要安裝電池。 sony 錄音帶轉接器將 ipod 連接到您的汽車音響上;只要將轉接器插進汽車音響上的錄音座,您就可以立即享受 ipod 上的音樂。(必須搭配能橫向插入錄音帶的汽車音響?/p> belkin 車用點煙器轉接裝置可選購的 belkin 車用點煙器轉接裝置內含一組 3.5mm 訊號輸出線,可以直接插上汽車音響的訊號輸入插口,讓您在車上也可以欣賞 ipod 播放的音樂。icarplay wireless - fm 播放機monster icarplay wireless - fm transmitter 播放機,能在為 ipod 充電的同時,透過汽車上的 fm 收音機播放 ipod 上的音樂;必須搭配有 dock 連接埠的 ipod 使用。monster icase 旅行組為您的 ipod 和配件準備一套完整的儲存和保護盒。這套旅行組包括 monster icarcharger 充電器和 monster isplitter 迷你立體聲 y 形分岔接頭;可搭配有 dock 連接埠的 ipod 使用。 belkin tunedok 車用放置架belkin tunedok 讓您開車時輕鬆帶著 ipod 聽音樂。把您的 ipod 放在 tunedok 上,讓它的 air-grip 吸盤將 ipod 穩當固定在適當的位置,以便您隨手取用。可搭配所有機型的 ipod。
\ No newline at end of file diff --git a/searchsummary/src/tests/juniper/queryparserTest.cpp b/searchsummary/src/tests/juniper/queryparserTest.cpp new file mode 100644 index 00000000000..e8afbcc0cee --- /dev/null +++ b/searchsummary/src/tests/juniper/queryparserTest.cpp @@ -0,0 +1,246 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * Author Knut Omang + */ +#include "queryparserTest.h" +#include "fakerewriter.h" + + +// Comment out cerr below to ignore unimplemented tests +#define NOTEST(name) \ +std::cerr << std::endl << __FILE__ << ':' << __LINE__ << ": " \ + << "No test for method '" << (name) << "'" << std::endl; + +/************************************************************************* + * Test methods + * + * This section contains boolean methods for testing each public method + * in the class being tested + *************************************************************************/ + +/** + * Test of the UsefulIndex method. + */ +void QueryParserTest::testUsefulIndex() { +// NOTEST("UsefulIndex"); +} + + +/** + * Test of the Index method (also implicit test of integration with + * expander interface) + */ +void QueryParserTest::testIndex() { + FakeRewriter fexp; + // Add as rewriter for query and not for document + juniper::_Juniper->AddRewriter("ourindex", &fexp, true, false); + juniper::QueryParser p("AND(ourindex:cake,myindex:eat)"); + _test(p.ParseError() == 0); + if (p.ParseError()) return; + + juniper::QueryHandle qh(p, NULL, juniper::_Juniper->getModifier()); + std::string stk; + qh.MatchObj(0)->Query()->Dump(stk); + _test_equal(stk, "Node<a:2>[Node<a:4>[cake0:100,cake1:100,cake2:100,cake3:100],eat:100]"); + + std::string stk1; + qh.MatchObj(6)->Query()->Dump(stk1); + _test_equal(stk1, "Node<a:2>[cake:100,eat:100]"); + + // Then let's add a reducer rewriter (should not affect anything..) + juniper::_Juniper->AddRewriter("myindex", &fexp, false, true); + std::string stk2; + qh.MatchObj(0)->Query()->Dump(stk2); + _test_equal(stk2, "Node<a:2>[Node<a:4>[cake0:100,cake1:100,cake2:100,cake3:100],eat:100]"); +} + + +/** + * Test of the Creator method. + */ +void QueryParserTest::testCreator() { +// NOTEST("Creator"); +} + + +/** + * Test of the Weight method. + */ +void QueryParserTest::testWeight() { + { + // Complex nested query (bug example from datasearch 4.0) + juniper::QueryParser p2("OR(ANDNOT(AND(a,b),c),OR(d,e))"); + _test(p2.ParseError() == 0); + + juniper::QueryHandle qh2(p2, NULL, juniper::_Juniper->getModifier()); + std::string stk2; + qh2.MatchObj(0)->Query()->Dump(stk2); + _test_equal(stk2, "Node<a:2>[Node<a:2>[a:100,b:100],Node<a:2>[d:100,e:100]]"); + } + { + // Another complex nested query (bug example from datasearch 4.0) + juniper::QueryParser p2("OR(ANDNOT(RANK(a,OR(b,c)),d),OR(e,f))"); + _test(p2.ParseError() == 0); + + juniper::QueryHandle qh2(p2, NULL, juniper::_Juniper->getModifier()); + std::string stk2; + qh2.MatchObj(0)->Query()->Dump(stk2); + _test_equal(stk2, "Node<a:2>[a:100,Node<a:2>[e:100,f:100]]"); + } +} + + +/** + * Test of the Traverse method. + */ +void QueryParserTest::testTraverse() { + // simple OR query + juniper::QueryParser p1("OR(a,b,c)"); + _test(p1.ParseError() == 0); + + juniper::QueryHandle qh1(p1, NULL, juniper::_Juniper->getModifier()); + std::string stk1; + qh1.MatchObj(0)->Query()->Dump(stk1); + _test(strcmp(stk1.c_str(),"Node<a:3>[a:100,b:100,c:100]") == 0); + + { + // Complex query with phrases + juniper::QueryParser p2("OR(AND(xx,yy),PHRASE(junip*,proximity),PHRASE(data,search))"); + _test(p2.ParseError() == 0); + + juniper::QueryHandle qh2(p2, NULL, juniper::_Juniper->getModifier()); + std::string stk2; + qh2.MatchObj(0)->Query()->Dump(stk2); + _test(strcmp(stk2.c_str(), + "Node<a:3,v>[" + "Node<a:2>[xx:100,yy:100]," + "Node<a:2,o,l:0,e,v,c>[junip*:100,proximity:100]," + "Node<a:2,o,l:0,e,v,c>[data:100,search:100]]") == 0); + } + + { + // Triggering bug ticket 5690 Dev Data Search: + juniper::QueryParser p2("ANDNOT(ANDNOT(AND(cmsm,OR(cidus,ntus)," + "OR(jtft,jtct,jtin,jtfp)," + "OR(PHRASE(strategic,marketing)," + "PHRASE(marketing,strategy))),a))"); + _test(p2.ParseError() == 0); + + juniper::QueryHandle qh2(p2, NULL, juniper::_Juniper->getModifier()); + std::string stk2; + qh2.MatchObj(0)->Query()->Dump(stk2); + std::string s(stk2.c_str()); + _test_equal(s, + "Node<a:4,v>[cmsm:100,Node<a:2>[cidus:100,ntus:100]," + "Node<a:4>[jtft:100,jtct:100,jtin:100,jtfp:100]," + "Node<a:2,v>[Node<a:2,o,l:0,e,v,c>[strategic:100,marketing:100]," + "Node<a:2,o,l:0,e,v,c>[marketing:100,strategy:100]]]"); + } + + // Query with NEAR and WITHIN + juniper::QueryParser p3("OR(NEAR/1(linux,kernel),WITHIN/3(linus,torvalds))"); + _test(p3.ParseError() == 0); + + juniper::QueryHandle qh3(p3, NULL, juniper::_Juniper->getModifier()); + std::string stk3; + qh3.MatchObj(0)->Query()->Dump(stk3); + _test(strcmp(stk3.c_str(), + "Node<a:2,v>[" + "Node<a:2,l:1,v,c>[linux:100,kernel:100]," + "Node<a:2,o,l:3,v,c>[linus:100,torvalds:100]]") == 0); + + // Query with ONEAR + juniper::QueryParser p4("OR(ONEAR/3(linus,torvalds))"); + _test(p4.ParseError() == 0); + + juniper::QueryHandle qh4(p4, NULL, juniper::_Juniper->getModifier()); + std::string stk4; + qh4.MatchObj(0)->Query()->Dump(stk4); + _test(strcmp(stk4.c_str(), + "Node<a:2,o,l:3,v,c>[linus:100,torvalds:100]") == 0); +} + + +/************************************************************************* + * Test administration methods + *************************************************************************/ + +/** + * Set up common stuff for all test methods. + * This method is called immediately before each test method is called + */ +bool QueryParserTest::setUp() { + return true; +} + +/** + * Tear down common stuff for all test methods. + * This method is called immediately after each test method is called + */ +void QueryParserTest::tearDown() { +} + +/** + * Build up a map with all test methods + */ +void QueryParserTest::init() { + test_methods_["testUsefulIndex"] = + &QueryParserTest::testUsefulIndex; + test_methods_["testIndex"] = + &QueryParserTest::testIndex; + test_methods_["testCreator"] = + &QueryParserTest::testCreator; + test_methods_["testWeight"] = + &QueryParserTest::testWeight; + test_methods_["testTraverse"] = + &QueryParserTest::testTraverse; +} + +/************************************************************************* + * main entry points + *************************************************************************/ + + +void QueryParserTest::Run(MethodContainer::iterator &itr) { + try { + if (setUp()) { + (this->*itr->second)(); + tearDown(); + } + } catch (...) { + _fail("Got unknown exception in test method " + itr->first); + } +} + +void QueryParserTest::Run(const char* method) { + MethodContainer::iterator pos(test_methods_.find(method)); + if (pos != test_methods_.end()) { + Run(pos); + } else { + std::cerr << "ERROR: No test method named \"" + << method << "\"" << std::endl; + _fail("No such method"); + } +} + +void QueryParserTest::Run() { + for (MethodContainer::iterator itr(test_methods_.begin()); + itr != test_methods_.end(); + ++itr) + Run(itr); +} + +/* + * Parse runtime arguments before running. + * If the -m METHOD parameter is given, run only that method + */ +void QueryParserTest::Run(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-m") == 0 && argc > i + 1) + { + Run(argv[++i]); + return; + } + } + Run(); +} diff --git a/searchsummary/src/tests/juniper/queryparserTest.h b/searchsummary/src/tests/juniper/queryparserTest.h new file mode 100644 index 00000000000..7dc4dda63fa --- /dev/null +++ b/searchsummary/src/tests/juniper/queryparserTest.h @@ -0,0 +1,106 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * Author Knut Omang + */ +#pragma once + +#include "testenv.h" +#include <vespa/juniper/queryparser.h> +#include <vespa/juniper/rewriter.h> +#include <vespa/fastlib/testsuite/test.h> +#include <map> + +/** + * The QueryParserTest class holds + * the unit tests for the QueryParser class. + * + * @sa QueryParser + * @author Knut Omang + */ +class QueryParserTest : public Test { + + /************************************************************************* + * Test methods + * + * This section contains boolean methods for testing each public method + * in the class ing tested + *************************************************************************/ + + /** + * Test of the UsefulIndex method. + */ + void testUsefulIndex(); + + + /** + * Test of the Index method. + */ + void testIndex(); + + + /** + * Test of the Creator method. + */ + void testCreator(); + + + /** + * Test of the Weight method. + */ + void testWeight(); + + + /** + * Test of the Traverse method. + */ + void testTraverse(); + + + /************************************************************************* + * Test administration methods + *************************************************************************/ + + /** + * Set up common stuff for all test methods. + * This method is called immediately before each test method is called + */ + bool setUp(); + + /** + * Tear down common stuff for all test methods. + * This method is called immediately after each test method is called + */ + void tearDown(); + + typedef void(QueryParserTest::* tst_method_ptr) (); + typedef std::map<std::string, tst_method_ptr> MethodContainer; + MethodContainer test_methods_; + void init(); + +protected: + + /** + * Since we are running within Emacs, the default behavior of + * print_progress which includes backspace does not work. + * We'll use a single '.' instead. + */ + void print_progress() override { *m_osptr << '.' << std::flush; } + +public: + + QueryParserTest() : Test("QueryParser"), test_methods_() { init(); } + ~QueryParserTest() {} + + /************************************************************************* + * main entry points + *************************************************************************/ + void Run(MethodContainer::iterator &itr); + void Run() override; + void Run(const char *method); + void Run(int argc, char* argv[]); +}; + + +// Local Variables: +// mode:c++ +// End: diff --git a/searchsummary/src/tests/juniper/queryparserTestApp.cpp b/searchsummary/src/tests/juniper/queryparserTestApp.cpp new file mode 100644 index 00000000000..c34f0b77ae9 --- /dev/null +++ b/searchsummary/src/tests/juniper/queryparserTestApp.cpp @@ -0,0 +1,14 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "queryparserTest.h" +#include "testenv.h" +#include <vespa/vespalib/testkit/testapp.h> + + +int main(int argc, char **argv) { + juniper::TestEnv te(argc, argv, TEST_PATH("./testclient.rc").c_str()); + QueryParserTest test; + test.SetStream(&std::cout); + test.Run(argc, argv); + return (int)test.Report(); +} diff --git a/searchsummary/src/tests/juniper/queryvisitor_test.cpp b/searchsummary/src/tests/juniper/queryvisitor_test.cpp new file mode 100644 index 00000000000..0b99bf6583c --- /dev/null +++ b/searchsummary/src/tests/juniper/queryvisitor_test.cpp @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <memory> +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/juniper/queryhandle.h> +#include <vespa/juniper/queryvisitor.h> +#include <vespa/vespalib/stllike/string.h> + +using namespace juniper; + +class MyQuery : public juniper::IQuery +{ +private: + vespalib::string _term; + +public: + MyQuery(const vespalib::string &term) : _term(term) {} + + virtual bool Traverse(IQueryVisitor* v) const override { + v->VisitKeyword(nullptr, _term.c_str(), _term.size()); + return true; + } + virtual int Weight(const QueryItem*) const override { + return 0; + } + virtual ItemCreator Creator(const QueryItem*) const override { + return ItemCreator::CREA_ORIG; + } + virtual const char* Index(const QueryItem*, size_t*) const override { + return "my_index"; + } + virtual bool UsefulIndex(const QueryItem*) const override { + return true; + } +}; + +struct Fixture +{ + MyQuery query; + QueryModifier modifier; + QueryHandle handle; + QueryVisitor visitor; + Fixture(const vespalib::string &term) + : query(term), + modifier(), + handle(query, "", modifier), + visitor(query, &handle, modifier) + {} +}; + +TEST_F("require that terms are picked up by the query visitor", Fixture("my_term")) +{ + auto query = std::unique_ptr<QueryExpr>(f.visitor.GetQuery()); + ASSERT_TRUE(query != nullptr); + QueryNode *node = query->AsNode(); + ASSERT_TRUE(node != nullptr); + EXPECT_EQUAL(1, node->_arity); + QueryTerm *term = node->_children[0]->AsTerm(); + ASSERT_TRUE(term != nullptr); + EXPECT_EQUAL("my_term", vespalib::string(term->term())); +} + +TEST_F("require that empty terms are ignored by the query visitor", Fixture("")) +{ + QueryExpr *query = f.visitor.GetQuery(); + ASSERT_TRUE(query == nullptr); +} + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchsummary/src/tests/juniper/testclient.rc b/searchsummary/src/tests/juniper/testclient.rc new file mode 100644 index 00000000000..d04262c364c --- /dev/null +++ b/searchsummary/src/tests/juniper/testclient.rc @@ -0,0 +1,69 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +## Dynamic teasers +## Some sensible default values +## This file is used by the testclient application by default, +## if run from this directory. +## Use the -f option to testclient to specify an alternate location + +# A string to be included before each hit in the generated summary +juniper.dynsum.highlight_on <b> + +# A string to be included after each hit in the generated summary +juniper.dynsum.highlight_off </b> + +# A string to be included to denote abbreviated/left out pieces of the +# original text in the generated summary +juniper.dynsum.continuation ... + +# whether or not Juniper should escape the 5 chars <>&"' +# auto means escape if any of the markup defs above starts with < +juniper.dynsum.escape_markup auto + +# Length of the generated summary in bytes. This is a hint to Juniper. +# The result may be slightly longer or shorter depending on the structure +# of the available document text and the submitted query. +juniper.dynsum.length 256 + +# The number of (possibly partial) set of keywords matching the query +# to try to include in the summary. The larger this value compared is +# set relative to the length parameter, the more dense the keywords +# may appear in the summary. +juniper.dynsum.max_matches 3 + +# The maximal number of bytes of context to prepend and append to each +# of the selected query keyword hits. This parameter defines the max +# size a summary would become if there are few keyword hits (max_matches +# set low or document contained few matches of the keywords. +juniper.dynsum.surround_max 128 + +# The size of the sliding window used to determine if +# multiple query terms occur together. The larger the value, the more +# likely the system will find (and present in dynamic summary) complete +# matches containing all the search terms. The downside is a potential +# performance overhead of keeping candidates for matches longer during +# matching, and consequently updating more candidates that eventually +# gets thrown +juniper.matcher.winsize 600 + +# The minimal number of bytes in a query keyword for it to be subject +# to the simple Juniper stemming algorithm. Keywords that are shorter +# than or equal to this limit will only yield exact matches in the +# dynamic summaries. +juniper.stem.min_length 5 + +# The maximal number of bytes in a query keyword for it to be subject +# to the simple Juniper stemming algorithm +juniper.stem.max_extend 3 + +# A factor to multiply the internal Juniper metric with when producing +# proximity metric for a given field. A real/floating point value accepted +# Default value is 0.25 - scaling down by a factor 4. +# Note that the QRserver also supports a factor that is global to all proximity +# metric fields, and that is applied in addition when proximityboosting +# is enabled there. This parameter applies to Juniper version >= 2.0.4 only. +# and is intended to be used on a per field basis. +juniper.proximity.factor 0.25 + +# debugging Juniper (intended for internal usage) +# (See juniperdebug.h for details about the various bits) +juniper.debug_mask 0x0 diff --git a/searchsummary/src/tests/juniper/testenv.cpp b/searchsummary/src/tests/juniper/testenv.cpp new file mode 100644 index 00000000000..769c24b829c --- /dev/null +++ b/searchsummary/src/tests/juniper/testenv.cpp @@ -0,0 +1,119 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* Setup and parameter parsing for static Juniper environment to reuse + * within test framework + */ + +#include "testenv.h" +#include <vespa/juniper/propreader.h> +#include <unistd.h> + +namespace juniper +{ + +bool color_highlight = false; +// Easy access in tests.. +Config* TestConfig; +Juniper * _Juniper; + + +TestEnv::TestEnv(int argc, char **argv, const char* propfile) : + _props(), _config(), _juniper(), _wordFolder() +{ + int c; + + while ((c = getopt(argc, argv, "d:hcm:")) != EOF) + { + switch (c) + { + case 'd': +#ifdef FASTOS_DEBUG + debug_level = strtol(optarg, NULL, 0); +#else + fprintf(stderr, "This version of Juniper compiled without debug\n"); +#endif + break; + case 'c': + color_highlight = true; + break; + case 'm': + // option handled by test framework + break; + case 'h': + default: + Usage(argv[0]); + return; + } + } + + int expected_args = 0; + + if (argc - optind < expected_args) + { + Usage(argv[0]); + return; + } + + _props.reset(new PropReader(propfile)); + + if (color_highlight) + { + _props->UpdateProperty("juniper.dynsum.highlight_on", "\\1b[1;31m"); + _props->UpdateProperty("juniper.dynsum.highlight_off", "\\1b[0m"); + } + + _juniper.reset(new Juniper(_props.get(), &_wordFolder)); + _Juniper = _juniper.get(); + _config = _juniper->CreateConfig(); + TestConfig = _config.get(); +} + +TestEnv::~TestEnv() +{ +} + +void TestEnv::Usage(char* s) +{ + fprintf(stderr, "Usage: %s [options]\n", s); + fprintf(stderr, "Available options:\n"); + fprintf(stderr, " -d<debugmask>: Turn on debugging\n"); + fprintf(stderr, " -h: This help\n"); +} + + +TestQuery::TestQuery(const char* qexp, const char* options) : + _qparser(qexp), + _qhandle(_qparser, options, _Juniper->getModifier()) +{ } + + +PropertyMap::PropertyMap() + : _map() +{ +} + + +PropertyMap::~PropertyMap() +{ +} + + +PropertyMap & +PropertyMap::set(const char *name, const char *value) +{ + _map[std::string(name)] = std::string(value); + return *this; +} + + +const char * +PropertyMap::GetProperty(const char* name, const char* def) +{ + std::map<std::string, std::string>::iterator res = _map.find(std::string(name)); + if (res != _map.end()) { + return res->second.c_str(); + } + return def; +} + + +} // end namespace juniper diff --git a/searchsummary/src/tests/juniper/testenv.h b/searchsummary/src/tests/juniper/testenv.h new file mode 100644 index 00000000000..a43f4a11bec --- /dev/null +++ b/searchsummary/src/tests/juniper/testenv.h @@ -0,0 +1,70 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +/* Include most of the stuff that we might need */ + +#include <vespa/fastlib/text/unicodeutil.h> +#include <vespa/fastlib/text/normwordfolder.h> +#include <vespa/juniper/query.h> +#include <vespa/juniper/juniperdebug.h> +#include <vespa/juniper/rpinterface.h> +#include <vespa/juniper/queryhandle.h> +#include <vespa/juniper/queryparser.h> +#include <vespa/juniper/queryvisitor.h> +#include <vespa/juniper/result.h> +#include <vespa/juniper/config.h> +#include <vespa/juniper/queryparser.h> +#include <vespa/juniper/matchobject.h> +#include <vespa/juniper/SummaryConfig.h> +#include <vespa/juniper/Matcher.h> +#include <vespa/juniper/mcand.h> +#include <vespa/juniper/propreader.h> +#include <vespa/juniper/specialtokenregistry.h> + +namespace juniper +{ + +class TestEnv +{ +public: + TestEnv(int argc, char **argv, const char* propfile); + virtual ~TestEnv(); + void Usage(char* s); +private: + std::unique_ptr<PropReader> _props; + std::unique_ptr<Config> _config; + std::unique_ptr<Juniper> _juniper; + Fast_NormalizeWordFolder _wordFolder; + TestEnv(const TestEnv&); + TestEnv& operator=(const TestEnv&); +}; + + +class TestQuery +{ +public: + TestQuery(const char* qexp, const char* options = NULL); + QueryParser _qparser; + QueryHandle _qhandle; +}; + + +class PropertyMap : public IJuniperProperties +{ +private: + std::map<std::string, std::string> _map; +public: + PropertyMap(); + ~PropertyMap(); + PropertyMap &set(const char *name, const char *value); + const char* GetProperty(const char* name, const char* def = NULL) override; +}; + + +extern Config* TestConfig; +extern Juniper * _Juniper; + +} // end namespace juniper + +typedef juniper::TestQuery TestQuery; + |