diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-05-15 00:40:43 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-15 00:40:43 +0200 |
commit | dacf557add1c6a3ffab036cdf2f7dfdf9750b22e (patch) | |
tree | 3a9dfff58b98898e2e28c0337925f4f04e5eaeb0 /streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp | |
parent | 2722ce9d1d1ec12d57ebd3833ce37b0958afb752 (diff) |
Revert "Collapse vsm into streamingvisitors"
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp')
-rw-r--r-- | streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp | 301 |
1 files changed, 0 insertions, 301 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp deleted file mode 100644 index e69999b160e..00000000000 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp +++ /dev/null @@ -1,301 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "fieldsearcher.h" -#include <vespa/vsm/vsm/fieldsearchspec.h> -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> - -#include <vespa/log/log.h> -LOG_SETUP(".vsm.searcher.fieldsearcher"); - -using search::byte; -using search::streaming::Query; -using search::streaming::QueryTerm; -using search::streaming::QueryTermList; -using search::v16qi; - -namespace vsm { - -class force -{ - public: - force() { FieldSearcher::init(); } -}; - -static force __forceInit; - -byte FieldSearcher::_foldLowCase[256]; -byte FieldSearcher::_wordChar[256]; - -FieldSearcherBase::FieldSearcherBase() : - _qtl(), - _qtlFastBuffer(), - _qtlFastSize(0), - _qtlFast(nullptr) -{ -} - -FieldSearcherBase::FieldSearcherBase(const FieldSearcherBase & org) : - _qtl(), - _qtlFastBuffer(), - _qtlFastSize(0), - _qtlFast(nullptr) -{ - prepare(org._qtl); -} - -FieldSearcherBase::~FieldSearcherBase() -{ -} - -FieldSearcherBase & FieldSearcherBase::operator = (const FieldSearcherBase & org) -{ - if (this != &org) { - prepare(org._qtl); - } - return *this; -} - -void FieldSearcherBase::prepare(const QueryTermList & qtl) -{ - _qtl = qtl; - _qtlFastBuffer.resize(sizeof(*_qtlFast)*(_qtl.size()+1), 0x13); - _qtlFast = reinterpret_cast<v16qi *>(reinterpret_cast<unsigned long>(&_qtlFastBuffer[0]+15) & ~0xf); - _qtlFastSize = 0; - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - const QueryTerm & qt = **it; - memcpy(&_qtlFast[_qtlFastSize++], qt.getTerm(), std::min(size_t(16), qt.termLen())); - } -} - -FieldSearcher::FieldSearcher(const FieldIdT & fId, bool defaultPrefix) : - FieldSearcherBase(), - _field(fId), - _matchType(defaultPrefix ? PREFIX : REGULAR), - _maxFieldLength(0x100000), - _currentElementId(0), - _currentElementWeight(1), - _pureUsAsciiCount(0), - _pureUsAsciiFieldCount(0), - _anyUtf8Count(0), - _anyUtf8FieldCount(0), - _words(0), - _badUtf8Count(0), - _zeroCount(0) -{ - zeroStat(); -} - -FieldSearcher::~FieldSearcher() = default; - -bool FieldSearcher::search(const StorageDocument & doc) -{ - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field()); - fInfo.setHitOffset(qt.getHitList().size()); - } - onSearch(doc); - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - QueryTerm::FieldInfo & fInfo = qt.getFieldInfo(field()); - fInfo.setHitCount(qt.getHitList().size() - fInfo.getHitOffset()); - fInfo.setFieldLength(_words); - } - _words = 0; - return true; -} - -void FieldSearcher::prepare(QueryTermList & qtl, const SharedSearcherBuf & UNUSED_PARAM(buf)) -{ - FieldSearcherBase::prepare(qtl); - prepareFieldId(); -} - -size_t FieldSearcher::countWords(const FieldRef & f) -{ - size_t words = 0; - const char * n = f.data(); - const char * e = n + f.size(); - for( ; n < e; ++n) { - for (; isspace(*n) && (n<e); ++n); - const char * m = n; - for (; iswordchar(*n) && (n<e); ++n); - if (n > m) { - words++; - } - } - return words; -} - -void FieldSearcher::prepareFieldId() -{ - for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) { - QueryTerm & qt = **it; - qt.resizeFieldId(field()); - } -} - -void FieldSearcher::addStat(const FieldSearcher & toAdd) -{ - _pureUsAsciiCount += toAdd._pureUsAsciiCount; - _pureUsAsciiFieldCount += toAdd._pureUsAsciiFieldCount; - _anyUtf8Count += toAdd._anyUtf8Count; - _anyUtf8FieldCount += toAdd._anyUtf8FieldCount; - _badUtf8Count += toAdd._badUtf8Count; - _zeroCount += toAdd._zeroCount; - for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] += toAdd._utf8Count[i]; } -} - -void FieldSearcher::zeroStat() -{ - _pureUsAsciiCount = 0; - _pureUsAsciiFieldCount = 0; - _anyUtf8Count = 0; - _anyUtf8FieldCount = 0; - _badUtf8Count = 0; - _zeroCount = 0; - for (size_t i=0; i<NELEMS(_utf8Count); i++) { _utf8Count[i] = 0; } -} - -void FieldSearcher::init() -{ - for (unsigned i = 0; i < NELEMS(_foldLowCase); i++) { - _foldLowCase[i] = 0; - _wordChar[i] = 0; - } - for (int i = 'A'; i <= 'Z'; i++) { - _wordChar[i] = 0xFF; - _foldLowCase[i] = i | 0x20; - } - for (int i = 'a'; i <= 'z'; i++) { - _wordChar[i] = 0xFF; - _foldLowCase[i] = i; - } - for (int i = '0'; i <= '9'; i++) { - _wordChar[i] = 0xFF; - _foldLowCase[i] = i; - } - for (int i = 0xC0; i <= 0xFF; i++) { - _wordChar[i] = 0xFF; - } - _wordChar[0xd7] = 0; - _wordChar[0xf7] = 0; - - if (1) /* _doAccentRemoval */ { - _foldLowCase[0xc0] = 'a'; - _foldLowCase[0xc1] = 'a'; - _foldLowCase[0xc2] = 'a'; - _foldLowCase[0xc3] = 'a'; // A tilde - _foldLowCase[0xc7] = 'c'; - _foldLowCase[0xc8] = 'e'; - _foldLowCase[0xc9] = 'e'; - _foldLowCase[0xca] = 'e'; - _foldLowCase[0xcb] = 'e'; - _foldLowCase[0xcc] = 'i'; // I grave - _foldLowCase[0xcd] = 'i'; - _foldLowCase[0xce] = 'i'; - _foldLowCase[0xcf] = 'i'; - _foldLowCase[0xd3] = 'o'; - _foldLowCase[0xd4] = 'o'; - _foldLowCase[0xda] = 'u'; - _foldLowCase[0xdb] = 'u'; - - _foldLowCase[0xe0] = 'a'; - _foldLowCase[0xe1] = 'a'; - _foldLowCase[0xe2] = 'a'; - _foldLowCase[0xe3] = 'a'; // a tilde - _foldLowCase[0xe7] = 'c'; - _foldLowCase[0xe8] = 'e'; - _foldLowCase[0xe9] = 'e'; - _foldLowCase[0xea] = 'e'; - _foldLowCase[0xeb] = 'e'; - _foldLowCase[0xec] = 'i'; // i grave - _foldLowCase[0xed] = 'i'; - _foldLowCase[0xee] = 'i'; - _foldLowCase[0xef] = 'i'; - _foldLowCase[0xf3] = 'o'; - _foldLowCase[0xf4] = 'o'; - _foldLowCase[0xfa] = 'u'; - _foldLowCase[0xfb] = 'u'; - } -} - -void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT & difm, const SharedSearcherBuf & searcherBuf, Query & query) -{ - QueryTermList qtl; - query.getLeafs(qtl); - vespalib::string tmp; - for (FieldIdTSearcherMap::iterator it = begin(), mt = end(); it != mt; it++) { - QueryTermList onlyInIndex; - FieldIdT fid = (*it)->field(); - for (QueryTermList::iterator qt = qtl.begin(), mqt = qtl.end(); qt != mqt; qt++) { - QueryTerm * q = *qt; - for (DocumentTypeIndexFieldMapT::const_iterator dt(difm.begin()), dmt(difm.end()); dt != dmt; dt++) { - const IndexFieldMapT & fim = dt->second; - IndexFieldMapT::const_iterator found = fim.find(FieldSearchSpecMap::stripNonFields(q->index())); - if (found != fim.end()) { - const FieldIdTList & index = found->second; - if ((find(index.begin(), index.end(), fid) != index.end()) && (find(onlyInIndex.begin(), onlyInIndex.end(), q) == onlyInIndex.end())) { - onlyInIndex.push_back(q); - } - } else { - LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", q->index().c_str()); - } - } - } - /// Should perhaps do a unique on onlyInIndex - (*it)->prepare(onlyInIndex, searcherBuf); - if (logger.wants(ns_log::Logger::spam)) { - char tmpBuf[16]; - sprintf(tmpBuf,"%d", fid); - tmp += tmpBuf; - tmp += ", "; - } - } - LOG(debug, "Will search in %s", tmp.c_str()); -} - -bool FieldSearcher::onSearch(const StorageDocument & doc) -{ - bool retval(true); - size_t fNo(field()); - const StorageDocument::SubDocument & sub = doc.getComplexField(fNo); - if (sub.getFieldValue() != nullptr) { - IteratorHandler ih(*this); - sub.getFieldValue()->iterateNested(sub.getRange(), ih); - } - return retval; -} - -void -FieldSearcher::IteratorHandler::onPrimitive(uint32_t, const Content & c) -{ - LOG(spam, "onPrimitive: field value '%s'", c.getValue().toString().c_str()); - _searcher.setCurrentWeight(c.getWeight()); - _searcher.setCurrentElementId(getArrayIndex()); - _searcher.onValue(c.getValue()); -} - -void -FieldSearcher::IteratorHandler::onCollectionStart(const Content & c) -{ - const document::FieldValue & fv = c.getValue(); - LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str()); - if (fv.isA(document::FieldValue::Type::ARRAY)) { - const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(fv); - LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size()); - } else if (fv.isA(document::FieldValue::Type::WSET)) { - const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(fv); - LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size()); - } -} - -void -FieldSearcher::IteratorHandler::onStructStart(const Content & c) -{ - LOG(spam, "onStructStart: field value '%s'", c.getValue().toString().c_str()); - _searcher.onStructValue(static_cast<const document::StructFieldValue &>(c.getValue())); -} - - -} |