diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-19 15:15:55 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-19 15:15:55 +0000 |
commit | 5ecf186af848c4a518d7268fcb452b264fba939a (patch) | |
tree | 9d9e01e78f321d24a221aa2f31cfa9a70b29a818 /searchsummary/src/vespa/juniper/result.cpp | |
parent | 3988acc5dd01647c479412d5815c37d6fa4c6a2b (diff) |
Fold juniper into searchsummary library.
Diffstat (limited to 'searchsummary/src/vespa/juniper/result.cpp')
-rw-r--r-- | searchsummary/src/vespa/juniper/result.cpp | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/searchsummary/src/vespa/juniper/result.cpp b/searchsummary/src/vespa/juniper/result.cpp new file mode 100644 index 00000000000..653e692e015 --- /dev/null +++ b/searchsummary/src/vespa/juniper/result.cpp @@ -0,0 +1,211 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#define _NEED_SUMMARY_CONFIG_IMPL 1 +#include "SummaryConfig.h" +#include "rpinterface.h" +#include "result.h" +#include "juniperparams.h" +#include "Matcher.h" +#include "config.h" +#include "appender.h" + +#include <vespa/log/log.h> +LOG_SETUP(".juniper.result"); + +namespace juniper { + +/** Actual implementation of Juniper generated summaries. */ +class SummaryImpl : public Summary +{ +public: + explicit SummaryImpl() : _text("") {} + explicit SummaryImpl(const std::string& t) : _text(t) {} + ~SummaryImpl() {} + const char* Text() const override { return _text.c_str(); } + size_t Length() const override { return _text.size(); } + std::string _text; +}; + + +Result::Result(Config* config, QueryHandle* qhandle, + const char* docsum, size_t docsum_len, uint32_t langid) : + _qhandle(qhandle), + _mo(qhandle->MatchObj(langid)), + _docsum(docsum), + _docsum_len(docsum_len), + _langid(langid), + _config(config), + _matcher(), + _tokenizer(), + _summaries(), + _scan_done(false), + _dynsum_len(-1), + _max_matches(-1), + _surround_max(-1), + _stem_min(0), + _stem_extend(0), + _winsize(0), + _winsize_fallback_multiplier(10.0), + _max_match_candidates(1000) +{ + if (!_mo) return; // The empty result.. + + MatcherParams& mp = _config->_matcherparams; + Fast_WordFolder* wordfolder = mp.WordFolder(); + + if (_qhandle->_stem_min < 0) + _stem_min = mp.StemMinLength(); + else + _stem_min = _qhandle->_stem_min; + + if (_qhandle->_stem_extend < 0) + _stem_extend = mp.StemMaxExtend(); + else + _stem_extend = _qhandle->_stem_extend; + + if (_qhandle->_winsize < 0) + _winsize = mp.MatchWindowSize(); + else + _winsize = _qhandle->_winsize; + + if (_qhandle->_winsize_fallback_multiplier < 0) + _winsize_fallback_multiplier = mp.MatchWindowSizeFallbackMultiplier(); + else + _winsize_fallback_multiplier = _qhandle->_winsize_fallback_multiplier; + + if (_qhandle->_max_match_candidates < 0) { + _max_match_candidates = mp.MaxMatchCandidates(); + } else { + _max_match_candidates = _qhandle->_max_match_candidates; + } + + /* Create the new pipeline */ + _tokenizer.reset(new JuniperTokenizer(wordfolder, NULL, 0, NULL)); + + _matcher.reset(new Matcher(this)); + _matcher->SetProximityFactor(mp.ProximityFactor()); + + _registry.reset(new SpecialTokenRegistry(_matcher->getQuery())); + + if (qhandle->_log_mask) + _matcher->set_log(qhandle->_log_mask); + + _tokenizer->SetSuccessor(_matcher.get()); + if (!_registry->getSpecialTokens().empty()) { + _tokenizer->setRegistry(_registry.get()); + } +} + +Result::~Result() +{ + delete_all(_summaries); +} + + +long Result::GetRelevancy() +{ + if (!_mo) return PROXIMITYBOOST_NOCONSTRAINT_OFFSET; + if (!_mo->Query()) return PROXIMITYBOOST_NOCONSTRAINT_OFFSET; + Scan(); + long retval = _matcher->GlobalRank(); + LOG(debug, "juniper::GetRelevancy(%lu)", retval); + return retval; +} + +Summary* Result::GetTeaser(const Config* alt_config) +{ + LOG(debug, "juniper::GetTeaser"); + const Config* cfg = (alt_config ? alt_config : _config); + const DocsumParams& dsp = cfg->_docsumparams; + if (_qhandle->_dynsum_len < 0) + _dynsum_len = dsp.Length(); + else + _dynsum_len = _qhandle->_dynsum_len; + SummaryImpl *sum = NULL; + // Avoid overhead when being called with an empty stack + if (_mo && _mo->Query()) { + Scan(); + if (_qhandle->_max_matches < 0) + _max_matches = dsp.MaxMatches(); + else + _max_matches = _qhandle->_max_matches; + if (_qhandle->_surround_max < 0) + _surround_max = dsp.SurroundMax(); + else + _surround_max = _qhandle->_surround_max; + + SummaryDesc* sdesc = + _matcher->CreateSummaryDesc(_dynsum_len, dsp.MinLength(), _max_matches, _surround_max); + + if (sdesc) { + size_t char_size; + sum = new SummaryImpl(BuildSummary(_docsum, _docsum_len, sdesc, cfg->_sumconf, char_size)); + DeleteSummaryDesc(sdesc); + } + } + + if (sum == NULL) { + sum = new SummaryImpl(); + } + + if (sum->_text.empty() && dsp.Fallback() == DocsumParams::FALLBACK_PREFIX) + { + std::vector<char> text; + Appender a(cfg->_sumconf); + ucs4_t buf[TOKEN_DSTLEN]; + const char *src = _docsum; + const char *src_end = _docsum + _docsum_len; + ucs4_t *dst = buf; + ucs4_t *dst_end = dst + TOKEN_DSTLEN; + Fast_WordFolder *folder = _config->_matcherparams.WordFolder(); + + text.reserve(_dynsum_len*2); + if (src_end - src <= _dynsum_len) { + a.append(text, src, src_end - src); + src = src_end; // ensure while loop not run + } + while (src < src_end) + { + const char *startpos; + size_t tokenLen; + const char *old_src = src; + size_t old_sum_len = text.size(); + src = folder->UCS4Tokenize(src, src_end, dst, dst_end, + startpos, tokenLen); + if (dst[0] == 0) { + a.append(text, old_src, src_end - old_src); + src = src_end; // ensure loop termination + } else { + a.append(text, old_src, src - old_src); + } + if (text.size() > (size_t) _dynsum_len) { + text.resize(old_sum_len); + text.insert(text.end(), cfg->_sumconf->dots().begin(), cfg->_sumconf->dots().end()); + break; + } + } + sum->_text = std::string(&text[0], text.size()); + } + _summaries.push_back(sum); + return sum; +} + + +Summary* Result::GetLog() +{ + // Avoid overhead when being called with an empty stack + Summary* sum = NULL; + if (_mo && _mo->Query()) + { + LOG(debug, "juniper::GetLog"); + Scan(); + sum = new SummaryImpl(_matcher->GetLog()); + } + else + sum = new SummaryImpl(); + _summaries.push_back(sum); + return sum; +} + + +} // end namespace juniper |