diff options
Diffstat (limited to 'juniper/src/test/mcandTest.cpp')
-rw-r--r-- | juniper/src/test/mcandTest.cpp | 659 |
1 files changed, 659 insertions, 0 deletions
diff --git a/juniper/src/test/mcandTest.cpp b/juniper/src/test/mcandTest.cpp new file mode 100644 index 00000000000..3da6b452ace --- /dev/null +++ b/juniper/src/test/mcandTest.cpp @@ -0,0 +1,659 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Implementation of the automated unit test class for the MatchCandidate + * class. + * + * @file mcandTest.cpp + * + * @author Knut Omang + * + * @date Created 27 Feb 2003 + * + * $Id$ + * + * <pre> + * Copyright (c) : 2003 Fast Search & Transfer ASA + * ALL RIGHTS RESERVED + * </pre> + ***************************************************************************/ +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(""); +#include "mcandTest.h" +#include "testenv.h" + +// Comment out cerr below to ignore unimplemented tests +#define NOTEST(name) \ + std::cerr << std::endl << __FILE__ << ':' << __LINE__ << ": " \ + << "No test for method '" << (name) << "'" << std::endl; + + +MatchCandidateTest::MatchCandidateTest() : + Test("MatchCandidate"), test_methods_() +{ init(); } + +/************************************************************************* + * Test methods + * + * This section contains boolean methods for testing each public method + * in the class being tested + *************************************************************************/ + +/** + * Test of the SetDocid method. + */ +void MatchCandidateTest::testSetDocid() { +// NOTEST("SetDocid"); +} + + +/** + * Test that the empty query is handled properly even for Analyse and + * GetTeaser/GetRelevancy/GetLog calls.. (Fastserver < 4.21 semantics) + */ +void MatchCandidateTest::testLog() { + TestQuery q(""); + std::string content("Here we go hepp and then some words away hoi some silly text here"); + + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res); // We get a result handle + _test(!res->_mo); // but it is empty + + juniper::Summary* sum = juniper::GetTeaser(res); + std::string s(sum->Text()); + _test_equal(s, std::string("")); + + long relevance = juniper::GetRelevancy(res); + _test_equal(relevance, PROXIMITYBOOST_NOCONSTRAINT_OFFSET); + + sum = juniper::GetLog(res); + s = sum->Text(); + _test_equal(s, std::string("")); + juniper::ReleaseResult(res); +} + + +/** + * Test of proximity metric = 0 + */ +void MatchCandidateTest::testDump() { + std::string content("Here we go hepp and then some words away hoi"); + + { + TestQuery q("NEAR/1(hepp,hoi)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // zero value since there are no hits and constraints are enabled.. + _test_equal(relevance, 0); + juniper::ReleaseResult(res); + } + + { + TestQuery q("OR(NEAR/1(hepp,hoi),bananas)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // Check that X_CONSTR propagates as intended + _test_equal(relevance, 0); + juniper::ReleaseResult(res); + } + + { + TestQuery q("PHRASE(hepp,hoi)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // constant value since there are no hits but this is + // also not a constrained search.. + _test_equal(relevance, PROXIMITYBOOST_NOCONSTRAINT_OFFSET); + juniper::ReleaseResult(res); + } + + { + TestQuery q("AND(hepp,hoi)"); + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content.c_str(), content.size(), + 0, 0, 0); + _test(res != NULL); + long relevance = juniper::GetRelevancy(res); + // Relevance may change, but nice to discover such changes.. + // The important is that we get a nonzero value here as a hit + _test_equal(relevance, 4470); + juniper::ReleaseResult(res); + } +} + + +/** + * Test of the order method. + */ +void MatchCandidateTest::testorder() { + TestQuery q("PHRASE(test,phrase)"); + + const char* content = "This is a simple text where a phrase match can be found not" + " quite adjacent to a test phrase work"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. Scan calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 3); // 3 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test(ms.size() == 1); + juniper::ReleaseResult(res); +} + + +/** + * Test of the matches_limit method. + */ +void MatchCandidateTest::testMatches_limit() { + TestQuery q("OR(PHRASE(phrase,match),PHRASE(test,word))"); + + const char* content = "This is a simple text where a phrase match can be found not" + " quite adjacent to a test word"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 4);// 3 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test(ms.size() == 2); // The first (complete) match and the second starting at "test" + + // Check if we get the correct teaser as well.. + juniper::Summary* sum = juniper::GetTeaser(res); + _test(strcmp(sum->Text(), + "This is a simple text where a <b>phrase</b> <b>match</b> can be found not" + " quite adjacent to a <b>test</b> <b>word</b>") == 0); + juniper::ReleaseResult(res); +} + + +/** + * Test of the accept method. + */ +void MatchCandidateTest::testAccept() { + TestQuery q("AND(simple,test)"); + + const char* content = "This is a simple test where we should get a perfect match"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 2); // 2 overlapping candidate starting points + _test(m.QueryTerms() == 2); // 2 query terms + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test(ms.size() > 0); + + if (!ms.size()) { + juniper::ReleaseResult(res); + return; // No point in continuing.. + } + + MatchCandidate& mc = *(*(ms.begin())); + + _test(mc.elems() == 2); + _test(mc.startpos() == 10); + _test(mc.endpos() == 21); + _test(!mc.order()); // Unordered for AND op + _test(mc.ctxt_startpos() == 0); + + mc.make_keylist(); + _test(mc._klist.size() == 2); // Two occurrence elements in list + + // Just for the sake of it, verify that we get a proper teaser out of this also.. + juniper::Summary* sum = juniper::GetTeaser(res); + _test(strcmp(sum->Text(), + "This is a <b>simple</b> <b>test</b> where we should get a perfect match") == 0); + juniper::ReleaseResult(res); +} + + +/** + * Test of the rank method. + */ +void MatchCandidateTest::testRank() { +// NOTEST("rank"); +} + + +/** + * Test of simple nested query + */ +void MatchCandidateTest::testMake_keylist() { + TestQuery q("OR(AND(phrase,match),AND(test,phrase))"); + + const char* content = "This is a simple text where a phrase match can be found not" + " quite adjacent to a test phrase"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 4);// 3 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test_equal(static_cast<size_t>(ms.size()), 6u); + + juniper::ReleaseResult(res); +} + + +/** + * Test of the add_to_keylist method. + */ +void MatchCandidateTest::testAdd_to_keylist() { + // Nested NEAR-test (triggered if nested NEAR with PHRASE) Ticket Dev Data Search 6109 + TestQuery q("NEAR/4(PHRASE(phr1,phr2),PHRASE(phr3,phr4))"); + + const char* content = "connect truende. phr1 phr2 www www www phr3 phr4 acuicola 8844"; + size_t content_len = strlen(content); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + +// Do the scanning manually. This calls accept several times + res->Scan(); + Matcher& m = *res->_matcher; + + _test(m.TotalHits() == 4);// 4 occurrences + + match_candidate_set& ms = m.OrderedMatchSet(); + + _test_equal(static_cast<size_t>(ms.size()), 1u); // Single result + + // Bug triggered when result is fetched.. + juniper::Summary* sum = juniper::GetTeaser(res); + std::string s(sum->Text()); + _test_equal(s, + "connect truende. <b>phr1</b> <b>phr2</b> www www www <b>phr3</b>" + " <b>phr4</b> acuicola 8844"); + + juniper::ReleaseResult(res); +} + + +/** + * Test of the length method. + */ +void MatchCandidateTest::testLength() { + const char* content = "this simple text with adjacent words of a certain pattern must" + " be matched according to specific rules to be detailed in this test."; + size_t content_len = strlen(content); + + { + // Nested complex NEAR-test with double matches at same pos + TestQuery q("NEAR/4(pattern,NEAR/1(simple,with),NEAR/2(simple,adjacent))"); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, &q._qhandle, + content, content_len, + 0, 0, 0); + + juniper::Summary* sum = juniper::GetTeaser(res); + Matcher& m = *res->_matcher; + match_candidate_set& ms = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(ms.size()), 1u); + + std::string s(sum->Text()); + _test_equal(s, + "this <b>simple</b> text <b>with</b> <b>adjacent</b> words of " + "a certain <b>pattern</b> must be matched according to specific" + " rules to be detailed in this test."); + juniper::ReleaseResult(res); + } + + { + // Nested complex NEAR-test with double matches at same pos should not yield hit with ONEAR + TestQuery q("ONEAR/4(pattern,NEAR/1(simple,with),NEAR/2(simple,adjacent))"); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, + &q._qhandle + ,content, content_len, + 0, 0, 0); + + res->Scan(); + Matcher& m = *res->_matcher; + match_candidate_set& ms = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(ms.size()), 0u); + + juniper::ReleaseResult(res); + } + + { + // Likewise nested complex NEAR-test with double matches at same pos but just outside limit + // should not match: + TestQuery q("NEAR/4(pattern,NEAR/1(simple,with),NEAR/1(simple,adjacent))"); + + // Fetch a result descriptor: + Result* res = juniper::Analyse(juniper::TestConfig, &q._qhandle, + content, content_len, + 0, 0, 0); + + res->Scan(); + Matcher& m = *res->_matcher; + match_candidate_set& ms = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(ms.size()), 0u); + + juniper::ReleaseResult(res); + } +} + + +struct MyTokenProcessor : public ITokenProcessor +{ + Matcher &_m; + std::vector<size_t> _cands; + MyTokenProcessor(Matcher &m) : _m(m), _cands() {} + virtual void handle_token(Token &token) { + _m.handle_token(token); + const match_sequence *ms = _m.GetWorkSet(); + _cands.push_back(ms[0].size()); + LOG(info, "match_sequence[0].size(%zu)", _cands.back()); + } + virtual void handle_end(Token &token) { + _m.handle_end(token); + } +}; + + +/** + * Test that max number of match candidates can be controlled. + */ +void MatchCandidateTest::requireThatMaxNumberOfMatchCandidatesCanBeControlled() +{ + TestQuery q("PHRASE(re,re,re,re,foo,re,re,re,re,bar)"); + q._qhandle._max_match_candidates = 4; + + const char *content = "re re re re foo re re re re bar re re re re foo re re re re bar"; + size_t content_len = strlen(content); + + Result *res = juniper::Analyse(juniper::TestConfig, + &q._qhandle, + content, content_len, + 0, 0, 0); + _test(res != 0); + + // Deflect tokens to my processor + Matcher &m = *res->_matcher; + MyTokenProcessor proc(m); + res->_tokenizer->SetSuccessor(&proc); + res->Scan(); + + _test_equal(proc._cands.size(), 20u); + for (size_t i = 0; i < proc._cands.size(); ++i) { + _test(proc._cands[i] <= 4u); + } + _test_equal(m.TotalHits(), 20); + match_candidate_set& mcs = m.OrderedMatchSet(); + _test_equal(static_cast<size_t>(mcs.size()), 2u); + + juniper::ReleaseResult(res); +} + + +/** + * Test of the order method. + */ +void MatchCandidateTest::testOrder() { +// NOTEST("order"); +} + + +/** + * Test of the size method. + */ +void MatchCandidateTest::testSize() { +// NOTEST("size"); +} + + +/** + * Test of the endpos method. + */ +void MatchCandidateTest::testEndpos() { +// NOTEST("endpos"); +} + + +/** + * Test of the ctxt_startpos method. + */ +void MatchCandidateTest::testCtxt_startpos() { +// NOTEST("ctxt_startpos"); +} + + +/** + * Test of the starttoken method. + */ +void MatchCandidateTest::testStarttoken() { +// NOTEST("starttoken"); +} + + +/** + * Test of the word_distance method. + */ +void MatchCandidateTest::testWord_distance() { +// NOTEST("word_distance"); +} + + +/** + * Test of the distance method. + */ +void MatchCandidateTest::testDistance() { +// NOTEST("distance"); +} + + +/** + * Test of the elem_store_sz method. + */ +void MatchCandidateTest::testElem_store_sz() { +// NOTEST("elem_store_sz"); +} + + +/** + * Test of the elems method. + */ +void MatchCandidateTest::testElems() { +// NOTEST("elems"); +} + + +/** + * Test of the distance method. + */ +void MatchCandidateTest::testDistance1() { +// NOTEST("distance"); +} + + +/** + * Test of the set_valid method. + */ +void MatchCandidateTest::testSet_valid() { +// NOTEST("set_valid"); +} + + +/************************************************************************* + * Test administration methods + *************************************************************************/ + +/** + * Set up common stuff for all test methods. + * This method is called immediately before each test method is called + */ +bool MatchCandidateTest::setUp() { + return true; +} + +/** + * Tear down common stuff for all test methods. + * This method is called immediately after each test method is called + */ +void MatchCandidateTest::tearDown() { +} + +/** + * Build up a map with all test methods + */ +void MatchCandidateTest::init() { + test_methods_["testSetDocid"] = + &MatchCandidateTest::testSetDocid; + test_methods_["testLog"] = + &MatchCandidateTest::testLog; + test_methods_["testDump"] = + &MatchCandidateTest::testDump; + test_methods_["testorder"] = + &MatchCandidateTest::testorder; + test_methods_["testMatches_limit"] = + &MatchCandidateTest::testMatches_limit; + test_methods_["testAccept"] = + &MatchCandidateTest::testAccept; + test_methods_["testRank"] = + &MatchCandidateTest::testRank; + test_methods_["testMake_keylist"] = + &MatchCandidateTest::testMake_keylist; + test_methods_["testAdd_to_keylist"] = + &MatchCandidateTest::testAdd_to_keylist; + test_methods_["testLength"] = + &MatchCandidateTest::testLength; + test_methods_["requireThatMaxNumberOfMatchCandidatesCanBeControlled"] = + &MatchCandidateTest::requireThatMaxNumberOfMatchCandidatesCanBeControlled; + test_methods_["testOrder"] = + &MatchCandidateTest::testOrder; + test_methods_["testSize"] = + &MatchCandidateTest::testSize; + test_methods_["testEndpos"] = + &MatchCandidateTest::testEndpos; + test_methods_["testCtxt_startpos"] = + &MatchCandidateTest::testCtxt_startpos; + test_methods_["testStarttoken"] = + &MatchCandidateTest::testStarttoken; + test_methods_["testWord_distance"] = + &MatchCandidateTest::testWord_distance; + test_methods_["testDistance"] = + &MatchCandidateTest::testDistance; + test_methods_["testElem_store_sz"] = + &MatchCandidateTest::testElem_store_sz; + test_methods_["testElems"] = + &MatchCandidateTest::testElems; + test_methods_["testDistance1"] = + &MatchCandidateTest::testDistance1; + test_methods_["testSet_valid"] = + &MatchCandidateTest::testSet_valid; +} + +/************************************************************************* + * main entry points + *************************************************************************/ + + +void MatchCandidateTest::Run(MethodContainer::iterator &itr) { + try { + if (setUp()) { + (this->*itr->second)(); + tearDown(); + } + } catch (...) { + _fail("Got unknown exception in test method " + itr->first); + } +} + +void MatchCandidateTest::Run(const char* method) { + MethodContainer::iterator pos(test_methods_.find(method)); + if (pos != test_methods_.end()) { + Run(pos); + } else { + std::cerr << "ERROR: No test method named \"" + << method << "\"" << std::endl; + _fail("No such method"); + } +} + +void MatchCandidateTest::Run() { + for (MethodContainer::iterator itr(test_methods_.begin()); + itr != test_methods_.end(); + ++itr) + Run(itr); +} + +/* + * Parse runtime arguments before running. + * If the -m METHOD parameter is given, run only that method + */ +void MatchCandidateTest::Run(int argc, char* argv[]) { + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-m") == 0 && argc > i + 1) + { + Run(argv[++i]); + return; + } + } + Run(); +} |